├── .gitignore ├── doc └── image │ ├── Lenna.png │ └── Lenna-150x150-bilinear.png ├── test ├── CMakeLists.txt └── test-omp.lua ├── Minus.lua ├── SparseCriterion.lua ├── DistMarginCriterion.lua ├── Tic.lua ├── Toc.lua ├── generic ├── Template.c ├── SpatialDownSampling.c ├── SpatialUpSampling.c ├── SpatialLinear.c ├── SpatialRadialMatching.c ├── DataSetLabelMe.c ├── DistMarginCriterion.c ├── SpatialMaxSampling.c ├── SpatialMatching.c ├── SpatialReSampling.c ├── SpatialGraph.c ├── SpatialReSamplingEx.c └── SoftMaxTree.c ├── FunctionWrapper.lua ├── SpatialMaxSampling.lua ├── CMakeLists.txt ├── nnx-0.1-1.rockspec ├── SaturatedLU.lua ├── TreeNLLCriterion.lua ├── LA.lua ├── LICENSE ├── SpatialMatching.lua ├── SuperCriterion.lua ├── SpatialRadialMatching.lua ├── SpatialDownSampling.lua ├── SoftMaxForest.lua ├── MultiSoftMax.lua ├── Probe.lua ├── SpatialLinear.lua ├── SpatialReSampling.lua ├── PushTable.lua ├── Balance.lua ├── PullTable.lua ├── init.c ├── SpatialGraph.lua ├── QDRiemaNNLinear.lua ├── SpatialSparseCriterion.lua ├── SpatialUpSampling.lua ├── DistNLLCriterion.lua ├── DataList.lua ├── CTCCriterion.lua ├── init.lua ├── SpatialReSamplingEx.lua ├── SpatialClassifier.lua ├── SpatialPadding.lua ├── SpatialColorTransform.lua ├── PixelSort.lua ├── SpatialPyramid.lua ├── SpatialNormalization.lua ├── DataSet.lua ├── README.md └── SpatialFovea.lua /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | -------------------------------------------------------------------------------- /doc/image/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clementfarabet/lua---nnx/HEAD/doc/image/Lenna.png -------------------------------------------------------------------------------- /doc/image/Lenna-150x150-bilinear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clementfarabet/lua---nnx/HEAD/doc/image/Lenna-150x150-bilinear.png -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | install_files(${INSTALL_PREFIX} test-all.lua) 3 | install_files(${INSTALL_PREFIX} test-omp.lua) 4 | install_files(${INSTALL_PREFIX} test_lbfgs.lua) 5 | install_files(${INSTALL_PREFIX} test_cg.lua) 6 | install_files(${INSTALL_PREFIX} rosenbrock.lua) 7 | -------------------------------------------------------------------------------- /Minus.lua: -------------------------------------------------------------------------------- 1 | local Minus, parent = torch.class('nn.Minus', 'nn.Module') 2 | 3 | function Minus:updateOutput(input) 4 | self.output:resizeAs(input):copy(input):mul(-1) 5 | return self.output 6 | end 7 | 8 | function Minus:updateGradInput(input, gradOutput) 9 | self.gradInput:resizeAs(input):copy(gradOutput):mul(-1) 10 | return self.gradInput 11 | end 12 | -------------------------------------------------------------------------------- /SparseCriterion.lua: -------------------------------------------------------------------------------- 1 | local SparseCriterion, parent = torch.class('nn.SparseCriterion', 'nn.Criterion') 2 | 3 | function SparseCriterion:__init() 4 | parent.__init(self) 5 | self.sizeAverage = true 6 | end 7 | 8 | function SparseCriterion:updateOutput(input) 9 | input.nn.SparseCriterion_updateOutput(self, input) 10 | return self.output 11 | end 12 | 13 | function SparseCriterion:updateGradInput(input) 14 | input.nn.SparseCriterion_updateGradInput(self, input) 15 | return self.gradInput 16 | end 17 | -------------------------------------------------------------------------------- /DistMarginCriterion.lua: -------------------------------------------------------------------------------- 1 | local DistMarginCriterion, parent = torch.class('nn.DistMarginCriterion', 'nn.Criterion') 2 | 3 | function DistMarginCriterion:__init() 4 | parent.__init(self) 5 | self.sizeAverage = true 6 | end 7 | 8 | function DistMarginCriterion:updateOutput(input, target) 9 | return input.nn.DistMarginCriterion_updateOutput(self, input, target) 10 | end 11 | 12 | function DistMarginCriterion:updateGradInput(input, target) 13 | return input.nn.DistMarginCriterion_updateGradInput(self, input, target) 14 | end 15 | -------------------------------------------------------------------------------- /Tic.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | local Tic, parent = torch.class('nn.Tic', 'nn.Module') 3 | 4 | function Tic:__init(name) 5 | parent.__init(self) 6 | self.name = name or 'default' 7 | tic_modules = tic_modules or {} 8 | tic_modules[self.name] = torch.Timer() 9 | end 10 | 11 | function Tic:updateOutput(input) 12 | tic_modules[self.name]:reset() 13 | self.output = input 14 | return self.output 15 | end 16 | 17 | function Tic:updateGradInput(input, gradOutput) 18 | self.gradInput = gradOutput 19 | return self.gradInput 20 | end 21 | -------------------------------------------------------------------------------- /Toc.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | local Toc, parent = torch.class('nn.Toc', 'nn.Module') 3 | 4 | function Toc:__init(name, comment) 5 | parent.__init(self) 6 | self.name = name or 'default' 7 | self.comment = comment or '' 8 | end 9 | 10 | function Toc:updateOutput(input) 11 | print("Toc '"..self.name.."' ("..self.comment..") : "..tic_modules[self.name]:time()['real']) 12 | self.output = input 13 | return self.output 14 | end 15 | 16 | function Toc:updateGradInput(input, gradOutput) 17 | self.gradInput = gradOutput 18 | return self.gradInput 19 | end 20 | -------------------------------------------------------------------------------- /generic/Template.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/Template.c" 3 | #else 4 | 5 | static int nn_(Template_updateOutput)(lua_State *L) 6 | { 7 | 8 | } 9 | 10 | static int nn_(Template_updateGradInput)(lua_State *L) 11 | { 12 | 13 | } 14 | 15 | static const struct luaL_Reg nn_(Template__) [] = { 16 | {"Template_updateOutput", nn_(Template_updateOutput)}, 17 | {"Template_updateGradInput", nn_(Template_updateGradInput)}, 18 | {NULL, NULL} 19 | }; 20 | 21 | static void nn_(Template_init)(lua_State *L) 22 | { 23 | luaT_pushmetatable(L, torch_Tensor); 24 | luaT_registeratname(L, nn_(Template__), "nn"); 25 | lua_pop(L,1); 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /FunctionWrapper.lua: -------------------------------------------------------------------------------- 1 | local FunctionWrapper, parent = torch.class('nn.FunctionWrapper', 'nn.Module') 2 | 3 | local help_desc = [[ 4 | Dummy module that takes a forward and a backward function as argument. 5 | ]] 6 | 7 | function FunctionWrapper:__init(init, updateOutput, updateGradInput) 8 | init(self) 9 | self.fn_updateOutput = updateOutput 10 | self.fn_updateGradInput = updateGradInput 11 | end 12 | 13 | function FunctionWrapper:updateOutput(input) 14 | self.output = self.fn_updateOutput(self, input) 15 | return self.output 16 | end 17 | 18 | function FunctionWrapper:updateGradInput(input, gradOutput) 19 | self.gradInput = self.fn_updateGradInput(self, input, gradOutput) 20 | return self.gradInput 21 | end -------------------------------------------------------------------------------- /SpatialMaxSampling.lua: -------------------------------------------------------------------------------- 1 | local SpatialMaxSampling, parent = torch.class('nn.SpatialMaxSampling', 'nn.Module') 2 | 3 | function SpatialMaxSampling:__init(...) 4 | parent.__init(self) 5 | xlua.unpack_class( 6 | self, {...}, 'nn.SpatialMaxSampling', 7 | 'resample an image using max selection', 8 | {arg='owidth', type='number', help='output width'}, 9 | {arg='oheight', type='number', help='output height'} 10 | ) 11 | self.indices = torch.Tensor() 12 | end 13 | 14 | function SpatialMaxSampling:updateOutput(input) 15 | input.nn.SpatialMaxSampling_updateOutput(self, input) 16 | return self.output 17 | end 18 | 19 | function SpatialMaxSampling:updateGradInput(input, gradOutput) 20 | input.nn.SpatialMaxSampling_updateGradInput(self, input, gradOutput) 21 | return self.gradInput 22 | end 23 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) 3 | CMAKE_POLICY(VERSION 2.6) 4 | IF(LUAROCKS_PREFIX) 5 | MESSAGE(STATUS "Installing Torch through Luarocks") 6 | STRING(REGEX REPLACE "(.*)lib/luarocks/rocks.*" "\\1" CMAKE_INSTALL_PREFIX "${LUAROCKS_PREFIX}") 7 | MESSAGE(STATUS "Prefix inferred from Luarocks: ${CMAKE_INSTALL_PREFIX}") 8 | ENDIF() 9 | FIND_PACKAGE(Torch REQUIRED) 10 | FIND_PACKAGE(OpenMP) 11 | 12 | IF (OPENMP_FOUND) 13 | MESSAGE (STATUS "OpenMP Found with compiler flag : ${OpenMP_C_FLAGS}") 14 | SET (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 15 | ENDIF (OPENMP_FOUND) 16 | 17 | SET(src init.c) 18 | FILE(GLOB luasrc *.lua) 19 | SET(luasrc ${luasrc} test/test-all.lua) 20 | SET(luasrc ${luasrc} test/test-omp.lua) 21 | ADD_TORCH_PACKAGE(nnx "${src}" "${luasrc}" "Image Processing") 22 | TARGET_LINK_LIBRARIES(nnx luaT TH) 23 | IF(LUALIB) 24 | TARGET_LINK_LIBRARIES(nnx ${LUALIB}) 25 | ENDIF() 26 | -------------------------------------------------------------------------------- /nnx-0.1-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "nnx" 2 | version = "0.1-1" 3 | 4 | source = { 5 | url = "git://github.com/clementfarabet/lua---nnx", 6 | tag = "master" 7 | } 8 | 9 | description = { 10 | summary = "A completely unstable and experimental package that extends Torch's builtin nn library", 11 | detailed = [[ 12 | This is an experimental package that extends nn. You've be warned! 13 | ]], 14 | homepage = "https://github.com/clementfarabet/lua---nnx", 15 | license = "BSD" 16 | } 17 | 18 | dependencies = { 19 | "torch >= 7.0", 20 | "xlua >= 1.0" 21 | } 22 | 23 | build = { 24 | type = "command", 25 | build_command = [[ 26 | cmake -E make_directory build && cd build && cmake .. -DLUALIB=$(LUALIB) -DLUA_INCDIR="$(LUA_INCDIR)" -DLUA_LIBDIR="$(LUA_LIBDIR)" -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUAROCKS_PREFIX)" -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) 27 | ]], 28 | install_command = "cd build && $(MAKE) install" 29 | } 30 | -------------------------------------------------------------------------------- /SaturatedLU.lua: -------------------------------------------------------------------------------- 1 | local SaturatedLU, parent = torch.class('nn.SaturatedLU','nn.Module') 2 | 3 | function SaturatedLU:__init(th,v,th2,v2) 4 | parent.__init(self) 5 | self.threshold = th or -1.0 6 | self.val = v or -1.0 7 | self.threshold2 = th2 or 1.0 8 | self.val2 = v2 or 1.0 9 | if (th and type(th) ~= 'number') or (v and type(v) ~= 'number') 10 | or (th2 and type(th2) ~= 'number') or (v2 and type(v2) ~= 'number') then 11 | error('nn.SaturatedLU(lower-bound, value, upper-bound, value2)') 12 | end 13 | end 14 | 15 | function SaturatedLU:updateOutput(input) 16 | self.output = input:clone() 17 | self.output[self.output:lt(self.threshold)] = self.val 18 | self.output[self.output:gt(self.threshold2)] = self.val2 19 | return self.output 20 | end 21 | 22 | function SaturatedLU:updateGradInput(input, gradOutput) 23 | self.gradInput = gradOutput:clone() 24 | self.gradInput[input:lt(self.threshold)] = 0 25 | self.gradInput[input:gt(self.threshold2)] = 0 26 | return self.gradInput 27 | end -------------------------------------------------------------------------------- /TreeNLLCriterion.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ TreeNLLCriterion ]]-- 3 | -- Negative Log Likelihood for SoftMaxTrees. 4 | -- Used for maximizing the likelihood of SoftMaxTree outputs. 5 | -- SoftMaxTree outputs a column tensor representing the log likelihood 6 | -- of each target in the batch. Thus SoftMaxTree requires the targets. 7 | -- So this Criterion only computes the negative of those outputs, as 8 | -- well as its corresponding gradients. 9 | ------------------------------------------------------------------------ 10 | local TreeNLLCriterion, parent = torch.class("nn.TreeNLLCriterion", "nn.Criterion") 11 | 12 | function TreeNLLCriterion:__init() 13 | self._module = nn.Mean() 14 | parent.__init(self) 15 | self._output_grad = torch.Tensor{-1} 16 | end 17 | 18 | function TreeNLLCriterion:updateOutput(input, target) 19 | return -self._module:forward(input)[1] 20 | end 21 | 22 | function TreeNLLCriterion:updateGradInput(input, target) 23 | return self._module:backward(input, self._output_grad) 24 | end 25 | -------------------------------------------------------------------------------- /LA.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | 3 | 4 | --Based on: http://arxiv.org/pdf/1412.6830v1.pdf 5 | --If input dimension is larger than 1, a reshape is needed after usage. 6 | --Usage: 7 | ------------------------------------ 8 | -- model:add(LA(4, 3 * 32 * 32)) 9 | -- model:add(nn.Reshape(3,32,32)) 10 | ------------------------------------ 11 | 12 | 13 | function LA(s, inputSize) 14 | local module = nn.Sequential() 15 | local maxmodules = {} 16 | for i = 1,s do 17 | maxmodules[i] = nn.Sequential() 18 | maxmodules[i]:add(nn.MulConstant(-1.0)) 19 | maxmodules[i]:add(nn.Add(inputSize,true)) 20 | maxmodules[i]:add(nn.ReLU()) 21 | maxmodules[i]:add(nn.CMul(inputSize)) 22 | end 23 | maxmodules[s+1] = nn.Sequential() 24 | maxmodules[s+1]:add(nn.ReLU()) 25 | 26 | local catmodule = nn.ConcatTable() 27 | print('number of modules is: '.. #maxmodules) 28 | for i=1,#maxmodules do 29 | catmodule:add(maxmodules[i]) 30 | end 31 | 32 | module:add(catmodule) 33 | 34 | 35 | module:add(nn.JoinTable(1)) 36 | module:add(nn.Reshape(s + 1,inputSize)) 37 | 38 | module:add(nn.Sum(1)) 39 | 40 | 41 | return module 42 | end 43 | 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | ---------------------------------------------------------------------- 3 | -- 4 | -- Copyright (c) 2011 Clement Farabet, Marco Scoffier, 5 | -- Koray Kavukcuoglu, Benoit Corda 6 | -- 7 | -- 8 | -- Permission is hereby granted, free of charge, to any person obtaining 9 | -- a copy of this software and associated documentation files (the 10 | -- "Software"), to deal in the Software without restriction, including 11 | -- without limitation the rights to use, copy, modify, merge, publish, 12 | -- distribute, sublicense, and/or sell copies of the Software, and to 13 | -- permit persons to whom the Software is furnished to do so, subject to 14 | -- the following conditions: 15 | -- 16 | -- The above copyright notice and this permission notice shall be 17 | -- included in all copies or substantial portions of the Software. 18 | -- 19 | -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | -- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | -- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 | -- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 23 | -- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 | -- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 | -- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -- 27 | ---------------------------------------------------------------------- 28 | -------------------------------------------------------------------------------- /SpatialMatching.lua: -------------------------------------------------------------------------------- 1 | local SpatialMatching, parent = torch.class('nn.SpatialMatching', 'nn.Module') 2 | 3 | function SpatialMatching:__init(maxh, maxw, full_output) 4 | -- If full_output is false, output is computed on elements of the first input 5 | -- for which all the possible corresponding elements exist in the second input 6 | -- In addition, if full_output is set to false, the pixel (1,1) of the first input 7 | -- is supposed to correspond to the pixel (maxh/2, maxw/2) of the second one 8 | parent.__init(self) 9 | self.maxw = maxw or 11 10 | self.maxh = maxh or 11 11 | if full_output == nil then 12 | full_output = false 13 | end 14 | self.full_output = full_output 15 | self.gradInput1 = torch.Tensor() 16 | self.gradInput2 = torch.Tensor() 17 | end 18 | 19 | function SpatialMatching:updateOutput(input) 20 | -- input is a table of 2 inputs, each one being KxHxW 21 | -- if not full_output, the 1st one is KxH1xW1 where H1 <= H-maxh+1, W1 <= W-maxw+1 22 | self.output:resize(input[1]:size(2), input[1]:size(3), self.maxh, self.maxw) 23 | input[1].nn.SpatialMatching_updateOutput(self, input[1], input[2]) 24 | return self.output 25 | end 26 | 27 | function SpatialMatching:updateGradInput(input, gradOutput) 28 | self.gradInput1:resize(input[1]:size()):zero() 29 | self.gradInput2:resize(input[2]:size()):zero() 30 | input[1].nn.SpatialMatching_updateGradInput(self, input[1], input[2], gradOutput) 31 | self.gradInput = {self.gradInput1, self.gradInput2} 32 | return self.gradInput 33 | end 34 | -------------------------------------------------------------------------------- /SuperCriterion.lua: -------------------------------------------------------------------------------- 1 | local SuperCriterion, parent = torch.class('nn.SuperCriterion', 'nn.Criterion') 2 | 3 | function SuperCriterion:__init() 4 | parent.__init(self) 5 | self.criterions = {} 6 | self.weights = {} 7 | self.gradInput = {} 8 | end 9 | 10 | function SuperCriterion:add(criterion, weight) 11 | weight = weight or 1 12 | table.insert(self.criterions, criterion) 13 | table.insert(self.weights, weight) 14 | end 15 | 16 | function SuperCriterion:updateOutput(input, target) 17 | self.output = 0 18 | if type(target) == 'table' then 19 | for i,criterion in ipairs(self.criterions) do 20 | self.output = self.output + self.weights[i]*criterion:updateOutput(input[i],target[i]) 21 | end 22 | else 23 | for i,criterion in ipairs(self.criterions) do 24 | self.output = self.output + self.weights[i]*criterion:updateOutput(input[i],target) 25 | end 26 | end 27 | return self.output 28 | end 29 | 30 | function SuperCriterion:updateGradInput(input, target) 31 | if type(target) == 'table' then 32 | for i,criterion in ipairs(self.criterions) do 33 | self.gradInput[i] = torch.Tensor() or self.gradInput[i] 34 | self.gradInput[i]:resizeAs(input[i]):zero() 35 | self.gradInput[i]:add(self.weights[i], criterion:updateGradInput(input[i],target[i])) 36 | end 37 | else 38 | for i,criterion in ipairs(self.criterions) do 39 | self.gradInput[i] = torch.Tensor() or self.gradInput[i] 40 | self.gradInput[i]:resizeAs(input[i]):zero() 41 | self.gradInput[i]:add(self.weights[i], criterion:updateGradInput(input[i],target)) 42 | end 43 | end 44 | return self.gradInput 45 | end 46 | -------------------------------------------------------------------------------- /SpatialRadialMatching.lua: -------------------------------------------------------------------------------- 1 | local SpatialRadialMatching, parent = torch.class('nn.SpatialRadialMatching', 'nn.Module') 2 | 3 | function SpatialRadialMatching:__init(maxh) 4 | -- If full_output is false, output is computed on elements of the first input 5 | -- for which all the possible corresponding elements exist in the second input 6 | -- In addition, if full_output is set to false, the pixel (1,1) of the first input 7 | -- is supposed to correspond to the pixel (maxh/2, maxw/2) of the second one 8 | parent.__init(self) 9 | self.maxh = maxh 10 | self.gradInput1 = torch.Tensor() 11 | self.gradInput2 = torch.Tensor() 12 | end 13 | 14 | function SpatialRadialMatching:updateOutput(input) 15 | -- input is a table of 2 inputs, each one being KxHxW 16 | -- if not full_output, the 1st one is KxH1xW1 where H1 <= H-maxh+1, W1 <= W-maxw+1 17 | self.output:resize(input[1]:size(2), input[1]:size(3), self.maxh) 18 | --if input[3] == nil then 19 | -- input[3] = torch.LongTensor(input[1]:size(2), input[1]:size(3)):fill(1) 20 | --end 21 | --input[1].nn.SpatialRadialMatching_updateOutput(self, input[1], input[2], input[3]) 22 | input[1].nn.SpatialRadialMatching_updateOutput(self, input[1], input[2]) 23 | return self.output 24 | end 25 | 26 | function SpatialRadialMatching:updateGradInput(input, gradOutput) 27 | self.gradInput1:resize(input[1]:size()):zero() 28 | self.gradInput2:resize(input[2]:size()):zero() 29 | --input[1].nn.SpatialRadialMatching_updateGradInput(self,input[1],input[2],gradOutput,input[3]) 30 | input[1].nn.SpatialRadialMatching_updateGradInput(self,input[1],input[2],gradOutput) 31 | self.gradInput = {self.gradInput1, self.gradInput2} 32 | return self.gradInput 33 | end 34 | -------------------------------------------------------------------------------- /SpatialDownSampling.lua: -------------------------------------------------------------------------------- 1 | local SpatialDownSampling, parent = torch.class('nn.SpatialDownSampling', 'nn.Module') 2 | 3 | local help_desc = [[ 4 | Applies a 2D down-sampling over an input image composed of 5 | several input planes. The input tensor in forward(input) is 6 | expected to be a 3D tensor (nInputPlane x width x height). 7 | The number of output planes will be the same as nInputPlane. 8 | 9 | The downsampling is done using the simple average 10 | technique. For interpolated (bicubic) downsampling, use 11 | nn.SpatialReSampling(). 12 | 13 | If the input image is a 3D tensor nInputPlane x width x height, 14 | the output image size will be nInputPlane x owidth x oheight where 15 | 16 | owidth = floor(width/rW) 17 | oheight = floor(height/rH) ]] 18 | 19 | function SpatialDownSampling:__init(...) 20 | parent.__init(self) 21 | 22 | -- get args 23 | xlua.unpack_class(self, {...}, 'nn.SpatialDownSampling', help_desc, 24 | {arg='rW', type='number', help='ratio width', req=true}, 25 | {arg='rH', type='number', help='ratio height', req=true}) 26 | end 27 | 28 | function SpatialDownSampling:updateOutput(input) 29 | if (input:size(2) / self.rH) < 1 then 30 | error('input too small in dimension 2') 31 | elseif (input:size(3) / self.rW) < 1 then 32 | error('input too small in dimension 3') 33 | end 34 | self.output:resize(input:size(1), math.floor(input:size(2) / self.rH), 35 | math.floor(input:size(3) / self.rW)) 36 | input.nn.SpatialDownSampling_updateOutput(self, input) 37 | return self.output 38 | end 39 | 40 | function SpatialDownSampling:updateGradInput(input, gradOutput) 41 | self.gradInput:resizeAs(input) 42 | input.nn.SpatialDownSampling_updateGradInput(self, gradOutput) 43 | return self.gradInput 44 | end 45 | -------------------------------------------------------------------------------- /SoftMaxForest.lua: -------------------------------------------------------------------------------- 1 | local SoftMaxForest, parent = torch.class("nn.SoftMaxForest", "nn.Container") 2 | 3 | function SoftMaxForest:__init(inputSize, trees, rootIds, gaterSize, gaterAct, accUpdate) 4 | local gaterAct = gaterAct or nn.Tanh() 5 | local gaterSize = gaterSize or {} 6 | 7 | -- experts 8 | self.experts = nn.ConcatTable() 9 | self.smts = {} 10 | for i,tree in ipairs(trees) do 11 | local smt = nn.SoftMaxTree(inputSize, tree, rootIds[i], accUpdate) 12 | table.insert(self._smts, smt) 13 | self.experts:add(smt) 14 | end 15 | 16 | -- gater 17 | self.gater = nn.Sequential() 18 | self.gater:add(nn.SelectTable(1)) -- ignore targets 19 | for i,hiddenSize in ipairs(gaterSize) do 20 | self.gater:add(nn.Linear(inputSize, hiddenSize)) 21 | self.gater:add(gaterAct:clone()) 22 | inputSize = hiddenSize 23 | end 24 | self.gater:add(nn.Linear(inputSize, self.experts:size())) 25 | self.gater:add(nn.SoftMax()) 26 | 27 | -- mixture 28 | self.trunk = nn.ConcatTable() 29 | self.trunk:add(self._gater) 30 | self.trunk:add(self._experts) 31 | self.mixture = nn.MixtureTable() 32 | self.module = nn.Sequential() 33 | self.module:add(self.trunk) 34 | self.module:add(self.mixture) 35 | parent.__init(self) 36 | self.modules[1] = self.module 37 | end 38 | 39 | function SoftMaxForest:updateOutput(input) 40 | self.output = self.module:updateOutput(input) 41 | return self.output 42 | end 43 | 44 | function SoftMaxForest:updateGradInput(input, gradOutput) 45 | self.gradInput = self.module:updateGradInput(input, gradOutput) 46 | return self.gradInput 47 | end 48 | 49 | function SoftMaxForest:accGradParameters(input, gradOutput, scale) 50 | self.module:accGradParameters(input, gradOutput, scale) 51 | end 52 | 53 | function SoftMaxForest:accUpdateGradParameters(input, gradOutput, lr) 54 | self.module:accUpdateGradParameters(input, gradOutput, lr) 55 | end 56 | -------------------------------------------------------------------------------- /MultiSoftMax.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ MultiSoftMax ]]-- 3 | -- Takes 2D or 3D input and performs a softmax over the last dimension. 4 | ------------------------------------------------------------------------ 5 | local MultiSoftMax, parent = torch.class('nn.MultiSoftMax', 'nn.Module') 6 | 7 | function MultiSoftMax.__init(self) 8 | parent.__init(self) 9 | self._input = torch.Tensor() 10 | self._output = torch.Tensor() 11 | self._gradInput = torch.Tensor() 12 | self._gradOutput = torch.Tensor() 13 | end 14 | 15 | function MultiSoftMax:updateOutput(input) 16 | if input:dim() == 2 then 17 | return input.THNN.SoftMax_updateOutput(input:cdata(), self.output:cdata()) 18 | end 19 | if input:dim() ~= 3 then 20 | error"Only supports 2D or 3D inputs" 21 | end 22 | self._input:view(input, input:size(1)*input:size(2), input:size(3)) 23 | local output = self.output 24 | self.output = self._output 25 | input.THNN.SoftMax_updateOutput(self._input:cdata(), self.output:cdata()) 26 | output:viewAs(self.output, input) 27 | self.output = output 28 | return self.output 29 | end 30 | 31 | function MultiSoftMax:updateGradInput(input, gradOutput) 32 | if input:dim() == 2 then 33 | return input.THNN.SoftMax_updateGradInput(input:cdata(), gradOutput:cdata(), 34 | self.gradInput:cdata(), self.output:cdata()) 35 | end 36 | self._gradOutput:view(gradOutput, input:size(1)*input:size(2), input:size(3)) 37 | local gradInput = self.gradInput 38 | self.gradInput = self._gradInput 39 | local output = self.output 40 | self.output = self._output 41 | input.THNN.SoftMax_updateGradInput(self._input:cdata(), self._gradOutput:cdata(), 42 | self.gradInput:cdata(), self.output:cdata()) 43 | self.gradInput = gradInput:viewAs(self.gradInput, input) 44 | self.output = output 45 | return self.gradInput 46 | end 47 | -------------------------------------------------------------------------------- /Probe.lua: -------------------------------------------------------------------------------- 1 | local Probe, parent = torch.class('nn.Probe', 'nn.Module') 2 | 3 | function Probe:__init(name,display) 4 | parent.__init(self) 5 | self.name = name or 'unnamed' 6 | self.display = display 7 | nn._ProbeTimer = nn._ProbeTimer or torch.Timer() 8 | end 9 | 10 | function Probe:updateOutput(input) 11 | self.output = input 12 | local legend = '<' .. self.name .. '>.output' 13 | local size = {} 14 | for i = 1,input:dim() do 15 | size[i] = input:size(i) 16 | end 17 | size = table.concat(size,'x') 18 | local diff = nn._ProbeTimer:time().real - (nn._ProbeLast or 0) 19 | nn._ProbeLast = nn._ProbeTimer:time().real 20 | print('') 21 | print(legend) 22 | print(' + size = ' .. size) 23 | print(' + mean = ' .. input:mean()) 24 | print(' + std = ' .. input:std()) 25 | print(' + min = ' .. input:min()) 26 | print(' + max = ' .. input:max()) 27 | print(' + time since last probe = ' .. string.format('%0.1f',diff*1000) .. 'ms') 28 | if self.display then 29 | self.winf = image.display{image=input, win=self.winf, legend=legend} 30 | end 31 | return self.output 32 | end 33 | 34 | function Probe:updateGradInput(input, gradOutput) 35 | self.gradInput = gradOutput 36 | local legend = 'layer<' .. self.name .. '>.gradInput' 37 | local size = {} 38 | for i = 1,gradOutput:dim() do 39 | size[i] = gradOutput:size(i) 40 | end 41 | size = table.concat(size,'x') 42 | local diff = nn._ProbeTimer:time().real - (nn._ProbeLast or 0) 43 | nn._ProbeLast = nn._ProbeTimer:time().real 44 | print('') 45 | print(legend) 46 | print(' + size = ' .. size) 47 | print(' + mean = ' .. gradOutput:mean()) 48 | print(' + std = ' .. gradOutput:std()) 49 | print(' + min = ' .. gradOutput:min()) 50 | print(' + max = ' .. gradOutput:max()) 51 | print(' + time since last probe = ' .. string.format('%0.1f',diff*1000) .. 'ms') 52 | if self.display then 53 | self.winb = image.display{image=gradOutput, win=self.winb, legend=legend} 54 | end 55 | return self.gradInput 56 | end 57 | -------------------------------------------------------------------------------- /SpatialLinear.lua: -------------------------------------------------------------------------------- 1 | local SpatialLinear, parent = torch.class('nn.SpatialLinear', 'nn.Module') 2 | 3 | function SpatialLinear:__init(fanin, fanout) 4 | parent.__init(self) 5 | 6 | self.fanin = fanin or 1 7 | self.fanout = fanout or 1 8 | 9 | self.weightDecay = 0 10 | self.weight = torch.Tensor(self.fanout, self.fanin) 11 | self.bias = torch.Tensor(self.fanout) 12 | self.gradWeight = torch.Tensor(self.fanout, self.fanin) 13 | self.gradBias = torch.Tensor(self.fanout) 14 | 15 | self.output = torch.Tensor(fanout,1,1) 16 | self.gradInput = torch.Tensor(fanin,1,1) 17 | 18 | self:reset() 19 | end 20 | 21 | function SpatialLinear:reset(stdv) 22 | if stdv then 23 | stdv = stdv * math.sqrt(3) 24 | else 25 | stdv = 1./math.sqrt(self.weight:size(1)) 26 | end 27 | for i=1,self.weight:size(1) do 28 | self.weight:select(1, i):apply(function() 29 | return torch.uniform(-stdv, stdv) 30 | end) 31 | self.bias[i] = torch.uniform(-stdv, stdv) 32 | end 33 | end 34 | 35 | function SpatialLinear:zeroGradParameters(momentum) 36 | if momentum then 37 | self.gradWeight:mul(momentum) 38 | self.gradBias:mul(momentum) 39 | else 40 | self.gradWeight:zero() 41 | self.gradBias:zero() 42 | end 43 | end 44 | 45 | function SpatialLinear:updateParameters(learningRate) 46 | self.weight:add(-learningRate, self.gradWeight) 47 | self.bias:add(-learningRate, self.gradBias) 48 | end 49 | 50 | function SpatialLinear:decayParameters(decay) 51 | self.weight:add(-decay, self.weight) 52 | self.bias:add(-decay, self.bias) 53 | end 54 | 55 | function SpatialLinear:updateOutput(input) 56 | self.output:resize(self.fanout, input:size(2), input:size(3)) 57 | input.nn.SpatialLinear_updateOutput(self, input) 58 | return self.output 59 | end 60 | 61 | function SpatialLinear:updateGradInput(input, gradOutput) 62 | self.gradInput:resize(self.fanin, input:size(2), input:size(3)) 63 | input.nn.SpatialLinear_updateGradInput(self, input, gradOutput) 64 | return self.gradInput 65 | end 66 | -------------------------------------------------------------------------------- /SpatialReSampling.lua: -------------------------------------------------------------------------------- 1 | local SpatialReSampling, parent = torch.class('nn.SpatialReSampling', 'nn.Module') 2 | 3 | local help_desc = 4 | [[Applies a 2D re-sampling over an input image composed of 5 | several input planes. The input tensor in forward(input) is 6 | expected to be a 3D or 4D tensor ([batchSize x nInputPlane x width x height). 7 | The number of output planes will be the same as the nb of input 8 | planes. 9 | 10 | The re-sampling is done using bilinear interpolation. For a 11 | simple nearest-neihbor upsampling, use nn.SpatialUpSampling(), 12 | and for a simple average-based down-sampling, use 13 | nn.SpatialDownSampling(). 14 | 15 | If the input image is a 3D tensor nInputPlane x height x width, 16 | the output image size will be nInputPlane x oheight x owidth where 17 | owidth and oheight are given to the constructor. 18 | 19 | Instead of owidth & oheight, one can provide rwidth & rheight, 20 | such that owidth = iwidth*rwidth & oheight = iheight*rheight. ]] 21 | 22 | function SpatialReSampling:__init(...) 23 | parent.__init(self) 24 | xlua.unpack_class( 25 | self, {...}, 'nn.SpatialReSampling', help_desc, 26 | {arg='rwidth', type='number', help='ratio: owidth/iwidth'}, 27 | {arg='rheight', type='number', help='ratio: oheight/iheight'}, 28 | {arg='owidth', type='number', help='output width'}, 29 | {arg='oheight', type='number', help='output height'} 30 | ) 31 | end 32 | 33 | function SpatialReSampling:updateOutput(input) 34 | assert(input:dim() == 3 or input:dim() == 4, 35 | 'input to SpatialReSampling must be 3D or 4D, received: [' .. 36 | table.concat(input:size():totable(), ', ') .. ']') 37 | local hDim, wDim = 2, 3 38 | if input:dim() == 4 then 39 | hDim, wDim = 3, 4 40 | end 41 | self.oheight = self.oheight or self.rheight*input:size(hDim) 42 | self.owidth = self.owidth or self.rwidth*input:size(wDim) 43 | input.nn.SpatialReSampling_updateOutput(self, input) 44 | return self.output 45 | end 46 | 47 | function SpatialReSampling:updateGradInput(input, gradOutput) 48 | input.nn.SpatialReSampling_updateGradInput(self, input, gradOutput) 49 | return self.gradInput 50 | end 51 | -------------------------------------------------------------------------------- /PushTable.lua: -------------------------------------------------------------------------------- 1 | local PushTable, parent = torch.class("nn.PushTable", "nn.Module") 2 | 3 | function PushTable:__init(index) 4 | self._index = index 5 | self._pulls = {} 6 | self.output = {} 7 | self._gradInput = torch.Tensor() 8 | self.gradInput = {} 9 | self._forward = false 10 | end 11 | 12 | function PushTable:pull(index) 13 | local pull = nn.PullTable(self, index) 14 | table.insert(self._pulls, pull) 15 | return pull 16 | end 17 | 18 | function PushTable:updateOutput(inputTable) 19 | for i, input in ipairs(inputTable) do 20 | if i < self._index then 21 | self.output[i] = input 22 | elseif i > self._index then 23 | self.output[i-1] = input 24 | end 25 | end 26 | 27 | local input = inputTable[self._index] 28 | for i,pull in ipairs(self._pulls) do 29 | pull:_updateOutput(input) 30 | end 31 | 32 | self._forward = true 33 | return self.output 34 | end 35 | 36 | function PushTable:_updateGradInput(gradOutput) 37 | if self._forward then 38 | if torch.type(self.gradInput) ~= torch.type(gradOutput) then 39 | self._gradInput = gradOutput.new() 40 | end 41 | self._gradInput:resizeAs(gradOutput) 42 | self._gradInput:copy(gradOutput) 43 | else 44 | self._gradInput:add(gradOutput) 45 | end 46 | self._forward = false 47 | end 48 | 49 | function PushTable:updateGradInput(inputTable, gradOutputTable) 50 | for i, gradOutput in ipairs(gradOutputTable) do 51 | if i < self._index then 52 | self.gradInput[i] = gradOutput 53 | elseif i > self._index then 54 | self.gradInput[i+1] = gradOutput 55 | end 56 | end 57 | self.gradInput[self._index] = self._gradInput 58 | assert(#inputTable == #self.gradInput, "tables size mismatch") 59 | return self.gradInput 60 | end 61 | 62 | 63 | function PushTable:type(type, tensorCache) 64 | assert(type, 'PullTable: must provide a type to convert to') 65 | 66 | tensorCache = tensorCache or {} 67 | 68 | -- find all tensors and convert them 69 | for key,param in pairs(self) do 70 | if(key ~= "_pulls") then 71 | self[key] = nn.utils.recursiveType(param, type, tensorCache) 72 | end 73 | end 74 | return self 75 | end 76 | 77 | 78 | -------------------------------------------------------------------------------- /Balance.lua: -------------------------------------------------------------------------------- 1 | local Balance, parent = torch.class('nn.Balance', 'nn.Module') 2 | ------------------------------------------------------------------------ 3 | --[[ Balance ]]-- 4 | -- Constrains the distribution of a preceding SoftMax to have equal 5 | -- probability of category over examples. So each category has a 6 | -- mean probability of 1/nCategory. 7 | ------------------------------------------------------------------------ 8 | 9 | function Balance:__init(nBatch) 10 | parent.__init(self) 11 | self.nBatch = nBatch or 10 12 | self.inputCache = torch.Tensor() 13 | self.prob = torch.Tensor() 14 | self.sum = torch.Tensor() 15 | self.batchSize = 0 16 | self.startIdx = 1 17 | self.train = true 18 | end 19 | 20 | function Balance:updateOutput(input) 21 | assert(input:dim() == 2, "Only works with 2D inputs (batches)") 22 | if self.batchSize ~= input:size(1) then 23 | self.inputCache:resize(input:size(1)*self.nBatch, input:size(2)):zero() 24 | self.batchSize = input:size(1) 25 | self.startIdx = 1 26 | end 27 | 28 | self.output:resizeAs(input):copy(input) 29 | if not self.train then 30 | return self.output 31 | end 32 | -- keep track of previous batches of P(Y|X) 33 | self.inputCache:narrow(1, self.startIdx, input:size(1)):copy(input) 34 | 35 | -- P(X) is uniform for all X, i.e. P(X) = 1/c where c is a constant 36 | -- P(Y) = sum_x( P(Y|X)*P(X) ) 37 | self.prob:sum(self.inputCache, 1):div(self.prob:sum()) 38 | -- P(X|Y) = P(Y|X)*P(X)/P(Y) 39 | self.output:cdiv(self.prob:resize(1,input:size(2)):expandAs(input))--:div(input:size(2)) 40 | -- P(Z|X) = P(X|Y)*sum_y( P(X|Y) ) where P(Z) = 1/d where d is a constant 41 | self.sum:sum(self.output, 2) 42 | self.output:cdiv(self.sum:resize(input:size(1),1):expandAs(self.output)) 43 | 44 | self.startIdx = self.startIdx + self.batchSize 45 | if self.startIdx > self.inputCache:size(1) then 46 | self.startIdx = 1 47 | end 48 | 49 | return self.output 50 | end 51 | 52 | function Balance:updateGradInput(input, gradOutput) 53 | self.gradInput:resizeAs(gradOutput) 54 | self.gradInput:copy(gradOutput) 55 | self.gradInput:cdiv(self.sum:resize(input:size(1),1):expandAs(self.output)) 56 | self.gradInput:cdiv(self.prob:resize(1,input:size(2)):expandAs(input)) 57 | return self.gradInput 58 | end 59 | -------------------------------------------------------------------------------- /PullTable.lua: -------------------------------------------------------------------------------- 1 | local PullTable, parent = torch.class("nn.PullTable", "nn.Module") 2 | 3 | function PullTable:__init(push, index) 4 | self._push = push 5 | self._index = index 6 | self.output = {} 7 | self.gradInput = {} 8 | end 9 | 10 | function PullTable:_updateOutput(output) 11 | self._output = output 12 | end 13 | 14 | function PullTable:updateOutput(inputTable) 15 | if torch.type(inputTable) == 'table' then 16 | for i, input in ipairs(inputTable) do 17 | if i < self._index then 18 | self.output[i] = input 19 | else 20 | self.output[i+1] = input 21 | end 22 | end 23 | self.output[self._index] = self._output 24 | else 25 | if self._index == 1 then 26 | self.output[2] = inputTable 27 | self.output[1] = self._output 28 | else 29 | assert(self._index == 2, "table index out of range") 30 | self.output[1] = inputTable 31 | self.output[2] = self._output 32 | end 33 | end 34 | return self.output 35 | end 36 | 37 | function PullTable:updateGradInput(inputTable, gradOutputTable) 38 | self._push:_updateGradInput(gradOutputTable[self._index]) 39 | 40 | if torch.type(inputTable) == 'table' then 41 | if torch.type(self.gradInput) ~= 'table' then 42 | self.gradInput = {} 43 | end 44 | for i, gradOutput in ipairs(gradOutputTable) do 45 | if i < self._index then 46 | self.gradInput[i] = gradOutput 47 | elseif i > self._index then 48 | self.gradInput[i-1] = gradOutput 49 | end 50 | end 51 | assert(#inputTable == #self.gradInput, "tables size mismatch") 52 | else 53 | if self._index == 1 then 54 | self.gradInput = gradOutputTable[2] 55 | else 56 | self.gradInput = gradOutputTable[1] 57 | end 58 | end 59 | return self.gradInput 60 | end 61 | 62 | 63 | function PullTable:type(type, tensorCache) 64 | assert(type, 'PullTable: must provide a type to convert to') 65 | 66 | tensorCache = tensorCache or {} 67 | 68 | -- find all tensors and convert them 69 | for key,param in pairs(self) do 70 | if(key ~= "_push") then 71 | self[key] = nn.utils.recursiveType(param, type, tensorCache) 72 | end 73 | end 74 | 75 | return self 76 | end 77 | 78 | -------------------------------------------------------------------------------- /init.c: -------------------------------------------------------------------------------- 1 | #include "TH.h" 2 | #include "luaT.h" 3 | 4 | #ifdef _OPENMP 5 | #include "omp.h" 6 | #endif 7 | 8 | #define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) 9 | #define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor) 10 | #define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME) 11 | 12 | #include "generic/SpatialReSamplingEx.c" 13 | #include "THGenerateFloatTypes.h" 14 | 15 | #include "generic/SpatialLinear.c" 16 | #include "THGenerateFloatTypes.h" 17 | 18 | #include "generic/SpatialUpSampling.c" 19 | #include "THGenerateFloatTypes.h" 20 | 21 | #include "generic/SpatialDownSampling.c" 22 | #include "THGenerateFloatTypes.h" 23 | 24 | #include "generic/SpatialReSampling.c" 25 | #include "THGenerateFloatTypes.h" 26 | 27 | #include "generic/SpatialMaxSampling.c" 28 | #include "THGenerateFloatTypes.h" 29 | 30 | #include "generic/DistMarginCriterion.c" 31 | #include "THGenerateFloatTypes.h" 32 | 33 | #include "generic/SpatialGraph.c" 34 | #include "THGenerateFloatTypes.h" 35 | 36 | #include "generic/SpatialMatching.c" 37 | #include "THGenerateFloatTypes.h" 38 | 39 | #include "generic/SpatialRadialMatching.c" 40 | #include "THGenerateFloatTypes.h" 41 | 42 | #include "generic/DataSetLabelMe.c" 43 | #include "THGenerateFloatTypes.h" 44 | 45 | #include "generic/SoftMaxTree.c" 46 | #include "THGenerateFloatTypes.h" 47 | 48 | DLL_EXPORT int luaopen_libnnx(lua_State *L) 49 | { 50 | nn_FloatSpatialLinear_init(L); 51 | nn_FloatSpatialReSamplingEx_init(L); 52 | nn_FloatSpatialUpSampling_init(L); 53 | nn_FloatSpatialDownSampling_init(L); 54 | nn_FloatSpatialReSampling_init(L); 55 | nn_FloatSpatialMaxSampling_init(L); 56 | nn_FloatDistMarginCriterion_init(L); 57 | nn_FloatSpatialGraph_init(L); 58 | nn_FloatSpatialMatching_init(L); 59 | nn_FloatSpatialRadialMatching_init(L); 60 | nn_FloatDataSetLabelMe_init(L); 61 | nn_FloatSoftMaxTree_init(L); 62 | 63 | nn_DoubleSpatialLinear_init(L); 64 | nn_DoubleSpatialReSamplingEx_init(L); 65 | nn_DoubleSpatialUpSampling_init(L); 66 | nn_DoubleSpatialDownSampling_init(L); 67 | nn_DoubleSpatialReSampling_init(L); 68 | nn_DoubleSpatialMaxSampling_init(L); 69 | nn_DoubleDistMarginCriterion_init(L); 70 | nn_DoubleSpatialGraph_init(L); 71 | nn_DoubleSpatialMatching_init(L); 72 | nn_DoubleSpatialRadialMatching_init(L); 73 | nn_DoubleDataSetLabelMe_init(L); 74 | nn_DoubleSoftMaxTree_init(L); 75 | 76 | return 1; 77 | } 78 | -------------------------------------------------------------------------------- /SpatialGraph.lua: -------------------------------------------------------------------------------- 1 | local SpatialGraph, parent = torch.class('nn.SpatialGraph', 'nn.Module') 2 | 3 | local help_desc = 4 | [[Creates an edge-weighted graph from a set of N feature 5 | maps. 6 | 7 | The input is a 3D tensor width x height x nInputPlane, the 8 | output is a 3D tensor width x height x 2. The first slice 9 | of the output contains horizontal edges, the second vertical 10 | edges. 11 | 12 | The input features are assumed to be >= 0. 13 | More precisely: 14 | + dist == 'euclid' and norm == true: the input features should 15 | also be <= 1, to produce properly normalized distances (btwn 0 and 1); 16 | + dist == 'cosine': the input features do not need to be bounded, 17 | as the cosine dissimilarity normalizes with respect to each vector. 18 | An epsilon is automatically added, so that components that are == 0 19 | are properly considered as being similar. 20 | ]] 21 | 22 | function SpatialGraph:__init(...) 23 | parent.__init(self) 24 | 25 | xlua.unpack_class( 26 | self, {...}, 27 | 'nn.SpatialGraph', help_desc, 28 | {arg='dist', type='string', help='distance metric to use', default='euclid'}, 29 | {arg='normalize', type='boolean', help='normalize euclidean distances btwn 0 and 1 (assumes input range to be btwn 0 and 1)', default=true}, 30 | {arg='connex', type='number', help='connexity', default=4} 31 | ) 32 | 33 | if self.connex ~= 4 then 34 | xlua.error('4 is the only connexity supported, for now', 'nn.SpatialGraph',self.usage) 35 | end 36 | self.dist = ((self.dist == 'euclid') and 0) or ((self.dist == 'cosine') and 1) 37 | or xerror('euclid is the only distance supported, for now','nn.SpatialGraph',self.usage) 38 | self.normalize = (self.normalize and 1) or 0 39 | if self.dist == 'cosine' and self.normalize == 1 then 40 | xerror('normalized cosine is not supported for now [just because I couldnt figure out the gradient :-)]', 41 | 'nn.SpatialGraph', self.usage) 42 | end 43 | end 44 | 45 | function SpatialGraph:updateOutput(input) 46 | self.output:resize(self.connex / 2, input:size(2), input:size(3)) 47 | input.nn.SpatialGraph_updateOutput(self, input) 48 | return self.output 49 | end 50 | 51 | function SpatialGraph:updateGradInput(input, gradOutput) 52 | self.gradInput:resizeAs(input) 53 | input.nn.SpatialGraph_updateGradInput(self, input, gradOutput) 54 | return self.gradInput 55 | end 56 | -------------------------------------------------------------------------------- /QDRiemaNNLinear.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Author: Gaetan Marceau Caron (gaetan.marceau-caron@inria.fr) and Yann Ollivier 3 | -- Description: Implementation of the quasi-diagonal reduction 4 | -- based on the Practical Riemannian Neural Networks paper (http://arxiv.org/abs/1602.08007) 5 | -- 6 | local QDRiemaNNLinear, parent = torch.class('nnx.QDRiemaNNLinear', 'nn.Linear') 7 | 8 | function QDRiemaNNLinear:__init(inputSize, outputSize, gamma, qdFlag) 9 | parent.__init(self,inputSize, outputSize) 10 | if qdFlag == nil then -- Flag for choosing between diagonal or quasi-diagonal reductions 11 | self.qdFlag = true 12 | else 13 | self.qdFlag = qdFlag 14 | end 15 | self.gamma = gamma or 0.01 -- update rate of the metric 16 | self.matReg = 1e-12 -- numerical regularization 17 | self.initMetric = true -- flag for first update 18 | self.Mii = torch.Tensor(outputSize, inputSize) 19 | if self.qdFlag then self.M0i = torch.Tensor(outputSize, inputSize) end 20 | self.M00 = torch.Tensor(outputSize) 21 | end 22 | 23 | function QDRiemaNNLinear:accGradParameters(input, gradOutput) 24 | parent.accGradParameters(self,input,gradOutput) 25 | 26 | local gradOutputSqT = torch.pow(gradOutput,2):t() 27 | 28 | if self.initMetric then 29 | self.Mii:mm(gradOutputSqT,torch.pow(input,2)) 30 | self.M00:mv(gradOutputSqT,self.addBuffer) 31 | if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end 32 | self.initMetric = false 33 | else 34 | self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) 35 | if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end 36 | self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) 37 | end 38 | 39 | if self.qdFlag then 40 | local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) 41 | local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) 42 | self.gradWeight:copy(numerator:cdiv(denominator)) 43 | 44 | local temp = torch.cmul(self.M0i,self.gradWeight):sum(2) 45 | self.gradBias:add(-1.,temp):cdiv(torch.add(self.M00,self.matReg)) 46 | 47 | else 48 | self.gradWeight:cdiv(self.Mii:add(self.matReg)) 49 | self.gradBias:cdiv(self.M00:add(self.matReg)) 50 | end 51 | end 52 | 53 | function QDRiemaNNLinear:reset() 54 | self.initMetric = true 55 | stdv = 1./math.sqrt(self.weight:size(2)) 56 | self.weight:normal(0, stdv) 57 | self.bias:zero() 58 | return self 59 | end 60 | -------------------------------------------------------------------------------- /SpatialSparseCriterion.lua: -------------------------------------------------------------------------------- 1 | local SpatialSparseCriterion, parent = torch.class('nn.SpatialSparseCriterion', 'nn.SparseCriterion') 2 | 3 | function SpatialSparseCriterion:__init(...) 4 | parent.__init(self) 5 | 6 | xlua.unpack_class(self, {...}, 7 | 'nn.SpatialSparseCriterion', 8 | 'A spatial extension of the SparseCriterion class.\n' 9 | ..' Provides a set of parameters to deal with spatial mini-batch training.', 10 | {arg='nbGradients', type='number', help='number of gradients to backpropagate (-1:all, >=1:nb)', default=-1}, 11 | {arg='sizeAverage', type='number', help='if true, forward() returns an average instead of a sum of errors', default=true} 12 | ) 13 | end 14 | 15 | function SpatialSparseCriterion:updateOutput(input) 16 | self.fullOutput = self.fullOutput or torch.Tensor() 17 | self.fullOutput:resize(input:size(2), input:size(3)) 18 | input.nn.SpatialSparseCriterion_updateOutput(self, input) 19 | if self.sizeAverage then 20 | self.output = self.fullOutput:mean() 21 | else 22 | self.output = self.fullOutput:sum() 23 | end 24 | return self.output 25 | end 26 | 27 | function SpatialSparseCriterion:updateGradInput(input,target) 28 | -- (1) retrieve adjusted target 29 | target = self.target 30 | -- (2) resize input gradient map 31 | self.gradInput:resizeAs(input):zero() 32 | -- (3) compute input gradients, based on the nbGradients param 33 | if self.nbGradients == -1 then 34 | -- dense gradients 35 | input.nn.SpatialSparseCriterion_updateGradInput(self, input, self.gradInput) 36 | elseif self.nbGradients == 1 then 37 | -- only 1 gradient is computed, sampled in the center 38 | self.fullGradInput = torch.Tensor() or self.fullGradInput 39 | self.fullGradInput:resizeAs(input):zero() 40 | input.nn.SpatialSparseCriterion_updateGradInput(self, input, self.fullGradInput) 41 | local y = math.ceil(self.gradInput:size(2)/2) 42 | local x = math.ceil(self.gradInput:size(3)/2) 43 | self.gradInput:select(3,x):select(2,y):copy(self.fullGradInput:select(3,x):select(2,y)) 44 | else 45 | -- only N gradients are computed, sampled in random locations 46 | self.fullGradInput = torch.Tensor() or self.fullGradInput 47 | self.fullGradInput:resizeAs(input):zero() 48 | input.nn.SpatialSparseCriterion_updateGradInput(self, input, self.fullGradInput) 49 | for i = 1,self.nbGradients do 50 | local x = math.random(1,self.gradInput:size(1)) 51 | local y = math.random(1,self.gradInput:size(2)) 52 | self.gradInput:select(3,x):select(2,y):copy(self.fullGradInput:select(3,x):select(2,y)) 53 | end 54 | end 55 | return self.gradInput 56 | end 57 | -------------------------------------------------------------------------------- /SpatialUpSampling.lua: -------------------------------------------------------------------------------- 1 | local SpatialUpSampling, parent = torch.class('nn.SpatialUpSampling', 'nn.Module') 2 | 3 | local help_desc = [[ 4 | Applies a 2D up-sampling over an input image composed of 5 | several input planes. The input tensor in forward(input) is 6 | expected to be a 3D tensor (nInputPlane x width x height). 7 | The number of output planes will be the same as nInputPlane. 8 | 9 | The upsampling is done using the simple nearest neighbor 10 | technique. For interpolated (bicubic) upsampling, use 11 | nn.SpatialReSampling(). 12 | 13 | If the input image is a 3D tensor nInputPlane x width x height, 14 | the output image size will be nInputPlane x owidth x oheight where 15 | 16 | owidth = width*dW 17 | oheight = height*dH ]] 18 | 19 | function SpatialUpSampling:__init(...) 20 | parent.__init(self) 21 | 22 | -- get args 23 | xlua.unpack_class(self, {...}, 'nn.SpatialUpSampling', help_desc, 24 | {arg='dW', type='number', help='stride width', req=true}, 25 | {arg='dH', type='number', help='stride height', req=true}, 26 | {arg='yDim', type='number', help='image y dimension', default=2}, 27 | {arg='xDim', type='number', help='image x dimension', default=3} 28 | ) 29 | if self.yDim+1 ~= self.xDim then 30 | error('nn.SpatialUpSampling: yDim must be equals to xDim-1') 31 | end 32 | self.outputSize = torch.LongStorage(4) 33 | self.inputSize = torch.LongStorage(4) 34 | end 35 | 36 | function SpatialUpSampling:updateOutput(input) 37 | self.inputSize:fill(1) 38 | for i = 1,self.yDim-1 do 39 | self.inputSize[1] = self.inputSize[1] * input:size(i) 40 | end 41 | self.inputSize[2] = input:size(self.yDim) 42 | self.inputSize[3] = input:size(self.xDim) 43 | for i = self.xDim+1,input:nDimension() do 44 | self.inputSize[4] = self.inputSize[4] * input:size(i) 45 | end 46 | self.outputSize[1] = self.inputSize[1] 47 | self.outputSize[2] = self.inputSize[2] * self.dH 48 | self.outputSize[3] = self.inputSize[3] * self.dW 49 | self.outputSize[4] = self.inputSize[4] 50 | self.output:resize(self.outputSize) 51 | input.nn.SpatialUpSampling_updateOutput(self, input:reshape(self.inputSize)) 52 | local outputSize2 = input:size() 53 | outputSize2[self.yDim] = outputSize2[self.yDim] * self.dH 54 | outputSize2[self.xDim] = outputSize2[self.xDim] * self.dW 55 | self.output = self.output:reshape(outputSize2) 56 | return self.output 57 | end 58 | 59 | function SpatialUpSampling:updateGradInput(input, gradOutput) 60 | self.gradInput:resize(self.inputSize) 61 | input.nn.SpatialUpSampling_updateGradInput(self, input, 62 | gradOutput:reshape(self.outputSize)) 63 | self.gradInput = self.gradInput:reshape(input:size()) 64 | return self.gradInput 65 | end 66 | -------------------------------------------------------------------------------- /DistNLLCriterion.lua: -------------------------------------------------------------------------------- 1 | local DistNLLCriterion, parent = torch.class('nn.DistNLLCriterion', 'nn.Criterion') 2 | 3 | function DistNLLCriterion:__init(opts) 4 | parent.__init(self) 5 | -- user options 6 | opts = opts or {} 7 | self.inputIsADistance = opts.inputIsADistance or false 8 | self.inputIsProbability = opts.inputIsProbability or false 9 | self.inputIsLogProbability = opts.inputIsLogProbability or false 10 | self.targetIsProbability = opts.targetIsProbability 11 | if self.targetIsProbability == nil then self.targetIsProbability = true end 12 | -- internal 13 | self.targetSoftMax = nn.SoftMax() 14 | self.inputLogSoftMax = nn.LogSoftMax() 15 | self.inputLog = nn.Log() 16 | self.gradLogInput = torch.Tensor() 17 | self.input = torch.Tensor() 18 | end 19 | 20 | function DistNLLCriterion:normalize(input, target) 21 | -- normalize target 22 | if not self.targetIsProbability then 23 | self.probTarget = self.targetSoftMax:updateOutput(target) 24 | else 25 | self.probTarget = target 26 | end 27 | 28 | -- flip input if a distance 29 | if self.inputIsADistance then 30 | self.input:resizeAs(input):copy(input):mul(-1) 31 | else 32 | self.input = input 33 | end 34 | 35 | -- normalize input 36 | if not self.inputIsLogProbability and not self.inputIsProbability then 37 | self.logProbInput = self.inputLogSoftMax:updateOutput(self.input) 38 | elseif not self.inputIsLogProbability then 39 | self.logProbInput = self.inputLog:updateOutput(self.input) 40 | else 41 | self.logProbInput = self.input 42 | end 43 | end 44 | 45 | function DistNLLCriterion:denormalize() 46 | -- denormalize gradients 47 | if not self.inputIsLogProbability and not self.inputIsProbability then 48 | self.gradInput = self.inputLogSoftMax:updateGradInput(self.input, self.gradLogInput) 49 | elseif not self.inputIsLogProbability then 50 | self.gradInput = self.inputLog:updateGradInput(self.input, self.gradLogInput) 51 | else 52 | self.gradInput = self.gradLogInput 53 | end 54 | 55 | -- if input is a distance, then flip gradients back 56 | if self.inputIsADistance then 57 | self.gradInput:mul(-1) 58 | end 59 | end 60 | 61 | function DistNLLCriterion:updateOutput(input, target) 62 | self:normalize(input, target) 63 | self.output = 0 64 | for i = 1,input:size(1) do 65 | self.output = self.output - self.logProbInput[i] * self.probTarget[i] 66 | end 67 | return self.output 68 | end 69 | 70 | function DistNLLCriterion:updateGradInput(input, target) 71 | self:normalize(input, target) 72 | self.gradLogInput:resizeAs(input) 73 | for i = 1,input:size(1) do 74 | self.gradLogInput[i] = -self.probTarget[i] 75 | end 76 | self:denormalize() 77 | return self.gradInput 78 | end 79 | -------------------------------------------------------------------------------- /DataList.lua: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- 2 | -- DataList: a container for plain DataSets. 3 | -- Each sub dataset represents elements from only one class. 4 | -- 5 | -- Authors: Corda, Farabet 6 | -------------------------------------------------------------------------------- 7 | 8 | local DataList, parent = torch.class('nn.DataList','nn.DataSet') 9 | 10 | function DataList:__init() 11 | parent.__init(self) 12 | self.datasets = {} 13 | self.nbClass = 0 14 | self.ClassName = {} 15 | self.ClassMax = 0 16 | self.nbSamples = 0 17 | self.targetIsProbability = false 18 | self.spatialTarget = false 19 | end 20 | 21 | function DataList:__tostring__() 22 | str = 'DataList:\n' 23 | str = str .. ' + nb samples : '..self.nbSamples..'\n' 24 | str = str .. ' + nb classes : '..self.nbClass 25 | return str 26 | end 27 | 28 | function DataList:__index__(key) 29 | if type(key)=='number' and self.nbClass>0 and key <= self.nbSamples then 30 | local class = ((key-1) % self.nbClass) + 1 31 | local classSize = self.datasets[class]:size() 32 | local elmt = math.floor((key-1)/self.nbClass) + 1 33 | elmt = ((elmt-1) % classSize) + 1 34 | 35 | -- create target vector on the fly 36 | if self.spatialTarget then 37 | if self.targetIsProbability then 38 | self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):zero() 39 | else 40 | self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):fill(-1) 41 | end 42 | self.datasets[class][elmt][2][class][1][1] = 1 43 | else 44 | if self.targetIsProbability then 45 | self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):zero() 46 | else 47 | self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):fill(-1) 48 | end 49 | self.datasets[class][elmt][2][class] = 1 50 | end 51 | 52 | -- apply hook on sample 53 | local sample = self.datasets[class][elmt] 54 | if self.hookOnSample then 55 | sample = self.hookOnSample(self,sample) 56 | end 57 | 58 | -- auto conversion to CUDA 59 | if torch.getdefaulttensortype() == 'torch.CudaTensor' then 60 | sample[1] = torch.Tensor(sample[1]:size()):copy(sample[1]) 61 | end 62 | 63 | return sample,true 64 | end 65 | -- if key is not a number this should return nil 66 | return rawget(self, key) 67 | end 68 | 69 | function DataList:appendDataSet(dataSet,className) 70 | table.insert(self.datasets,dataSet) 71 | -- you can append the same class several times with this mechanism 72 | if self.ClassName[className] then 73 | self.ClassName[className] = self.ClassName[className] + dataSet:size() 74 | else 75 | self.ClassName[className] = dataSet:size() 76 | self.nbClass = self.nbClass + 1 77 | table.insert(self.ClassName,self.nbClass,className) 78 | end 79 | self.ClassMax = 80 | math.floor(math.max(self.ClassMax,self.ClassName[className])) 81 | self.nbSamples = self.ClassMax * self.nbClass 82 | end 83 | -------------------------------------------------------------------------------- /test/test-omp.lua: -------------------------------------------------------------------------------- 1 | 2 | function nnx.test_omp(nThread) 3 | 4 | require 'lunit' 5 | require 'sys' 6 | 7 | nnx._test_all_ = nil 8 | module("nnx._test_omp_", lunit.testcase, package.seeall) 9 | 10 | math.randomseed(os.time()) 11 | 12 | if openmp then 13 | nThread = nThread or openmp.getNumThreads() 14 | else 15 | nThread = nThread or error('please specify number of threads') 16 | end 17 | 18 | -- test dimensions 19 | width = 100 20 | height = 100 21 | maps = 64 22 | maps2 = 16 23 | 24 | -- accumulate times 25 | times = {} 26 | 27 | -- precision 28 | precision = 1e-10 29 | 30 | -- generic test function 31 | local function forward(name) 32 | n.nThread = 1 33 | res = n:forward(vec) 34 | res1 = torch.Tensor():resizeAs(res) 35 | res2 = torch.Tensor():resizeAs(res) 36 | 37 | t=sys.clock() 38 | res1:copy( n:forward(vec) ) 39 | ts.c = sys.clock()-t 40 | 41 | res:zero() 42 | 43 | t=sys.clock() 44 | n.nThread = nThread 45 | res2:copy( n:forward(vec) ) 46 | ts.omp = sys.clock()-t 47 | 48 | err = (res1-res2):abs():max() 49 | assert_equal((err < precision), true, name .. ": error = " .. err) 50 | end 51 | 52 | -- generic test function 53 | local function backward(name) 54 | n.nThread = 1 55 | n:forward(vec) 56 | res = n:backward(vec,vecb) 57 | res1 = torch.Tensor():resizeAs(res) 58 | res2 = torch.Tensor():resizeAs(res) 59 | 60 | t=sys.clock() 61 | res1:copy( n:backward(vec,vecb) ) 62 | tsb.c = sys.clock()-t 63 | 64 | res:zero() 65 | 66 | t=sys.clock() 67 | n.nThread = nThread 68 | res2:copy( n:backward(vec,vecb) ) 69 | tsb.omp = sys.clock()-t 70 | 71 | err = (res1-res2):abs():max() 72 | assert_equal((err < precision), true, name .. ": error = " .. err) 73 | end 74 | 75 | -- tests 76 | function test_SpatialMaxPooling() 77 | ts = {} 78 | times['SpatialMaxPooling_forward'] = ts 79 | n = nn.SpatialMaxPooling(4,4) 80 | vec = lab.randn(maps,height,width) 81 | forward('SpatialMaxPooling_forward') 82 | 83 | -- ts = {} 84 | -- times['SpatialMaxPooling_backward'] = ts 85 | -- local tbl = nn.tables.random(maps,maps2,math.min(maps,8)) 86 | -- vec = lab.randn(maps,height,width) 87 | -- vecb = lab.randn(maps,height/4,width/4) 88 | -- backward('SpatialMaxPooling_backward') 89 | end 90 | 91 | -- run all tests 92 | lunit.main() 93 | 94 | -- report 95 | print '\nTiming report:' 96 | ntests = 0 97 | glob_speedup = 0 98 | for module,times in pairs(times) do 99 | local speedup = (times.c/times.omp) 100 | print(module .. ' in C: ' .. times.c .. ', with OMP: ' .. times.omp 101 | .. ', speedup: ' .. speedup .. 'x') 102 | glob_speedup = glob_speedup + speedup 103 | ntests = ntests + 1 104 | end 105 | print('Average speedup: ' .. (glob_speedup/ntests) .. 'x') 106 | 107 | end 108 | -------------------------------------------------------------------------------- /CTCCriterion.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ CTCCriterion ]] -- 3 | -- CTC Alignment for sequence data where input and labels do not align. 4 | -- Useful for speech recognition on a phoneme/character level basis. 5 | -- Inputs assumed are in the form of seqLength x batch x inputDim. 6 | -- If batchFirst = true then input in the form of batch x seqLength x inputDim. 7 | -- Targets assumed in the form of {{1,2},{3,4}} where {1,2} is for the first 8 | -- element and so forth. 9 | ------------------------------------------------------------------------ 10 | local CTCCriterion, parent = torch.class('nn.CTCCriterion', 'nn.Criterion') 11 | 12 | function CTCCriterion:__init(batchFirst) 13 | require 'warp_ctc' 14 | parent.__init(self) 15 | self.acts = torch.Tensor() 16 | self.batchFirst = batchFirst or false 17 | end 18 | 19 | function CTCCriterion:forward(input, target, sizes) 20 | return self:updateOutput(input, target, sizes) 21 | end 22 | 23 | function CTCCriterion:updateOutput(input, target, sizes) 24 | assert(sizes, 25 | "You must pass the size of each sequence in the batch as a tensor") 26 | local acts = self.acts 27 | acts:resizeAs(input):copy(input) 28 | if input:dim() == 3 then 29 | if self.batchFirst then 30 | acts = acts:transpose(1, 2) 31 | acts = self:makeContiguous(acts) 32 | end 33 | acts:view(acts, acts:size(1) * acts:size(2), -1) 34 | end 35 | assert(acts:nDimension() == 2) 36 | self.sizes = torch.totable(sizes) 37 | self.gradInput = acts.new():resizeAs(acts):zero() 38 | if input:type() == 'torch.CudaTensor' then 39 | self.output = sumCosts(gpu_ctc(acts, self.gradInput, target, self.sizes)) 40 | else 41 | acts = acts:float() 42 | self.gradInput = self.gradInput:float() 43 | self.output = sumCosts(cpu_ctc(acts, self.gradInput, target, self.sizes)) 44 | end 45 | return self.output / sizes:size(1) 46 | end 47 | 48 | function CTCCriterion:updateGradInput(input, target) 49 | if input:dim() == 2 then -- (seqLen * batchSize) x outputDim 50 | return self.gradInput 51 | end 52 | if self.batchFirst then -- batchSize x seqLen x outputDim 53 | self.gradInput = self.gradInput:view(input:size(2), input:size(1), -1):transpose(1, 2) 54 | else -- seqLen x batchSize x outputDim 55 | self.gradInput:view(self.gradInput, input:size(1), input:size(2), -1) 56 | end 57 | return self.gradInput 58 | end 59 | 60 | function CTCCriterion:makeContiguous(input) 61 | if not input:isContiguous() then 62 | self._input = self._input or input.new() 63 | self._input:typeAs(input):resizeAs(input):copy(input) 64 | input = self._input 65 | end 66 | return input 67 | end 68 | 69 | --If batching occurs multiple costs are returned. We sum the costs and return. 70 | function sumCosts(list) 71 | local acc 72 | for k, v in ipairs(list) do 73 | if 1 == k then 74 | acc = v 75 | else 76 | acc = acc + v 77 | end 78 | end 79 | return acc 80 | end -------------------------------------------------------------------------------- /init.lua: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------------------- 2 | -- 3 | -- Copyright (c) 2011 Clement Farabet, Marco Scoffier, 4 | -- Koray Kavukcuoglu, Benoit Corda 5 | -- 6 | -- 7 | -- Permission is hereby granted, free of charge, to any person obtaining 8 | -- a copy of this software and associated documentation files (the 9 | -- "Software"), to deal in the Software without restriction, including 10 | -- without limitation the rights to use, copy, modify, merge, publish, 11 | -- distribute, sublicense, and/or sell copies of the Software, and to 12 | -- permit persons to whom the Software is furnished to do so, subject to 13 | -- the following conditions: 14 | -- 15 | -- The above copyright notice and this permission notice shall be 16 | -- included in all copies or substantial portions of the Software. 17 | -- 18 | -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | -- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | -- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | -- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 22 | -- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | -- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 | -- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | -- 26 | ---------------------------------------------------------------------- 27 | 28 | require 'torch' 29 | require 'xlua' 30 | require 'nn' 31 | 32 | -- create global nnx table: 33 | nnx = {} 34 | 35 | -- c lib: 36 | require 'libnnx' 37 | 38 | -- for testing: 39 | require('nnx.test-all') 40 | require('nnx.test-omp') 41 | 42 | -- tools: 43 | require('nnx.Probe') 44 | require('nnx.Tic') 45 | require('nnx.Toc') 46 | 47 | -- spatial (images) operators: 48 | require('nnx.SpatialLinear') 49 | require('nnx.SpatialClassifier') 50 | require('nnx.SpatialNormalization') 51 | require('nnx.SpatialPadding') 52 | require('nnx.SpatialReSamplingEx') 53 | require('nnx.SpatialUpSampling') 54 | require('nnx.SpatialDownSampling') 55 | require('nnx.SpatialReSampling') 56 | require('nnx.SpatialRecursiveFovea') 57 | require('nnx.SpatialFovea') 58 | require('nnx.SpatialPyramid') 59 | require('nnx.SpatialGraph') 60 | require('nnx.SpatialMatching') 61 | require('nnx.SpatialRadialMatching') 62 | require('nnx.SpatialMaxSampling') 63 | require('nnx.SpatialColorTransform') 64 | require('nnx.PixelSort') 65 | 66 | -- other modules 67 | require('nnx.FunctionWrapper') 68 | 69 | -- misc 70 | require('nnx.SaturatedLU') 71 | require('nnx.Minus') 72 | require('nnx.SoftMaxTree') 73 | require('nnx.SoftMaxForest') 74 | require('nnx.MultiSoftMax') 75 | require('nnx.Balance') 76 | require('nnx.PushTable') 77 | require('nnx.PullTable') 78 | require('nnx.QDRiemaNNLinear') 79 | 80 | -- criterions: 81 | require('nnx.SuperCriterion') 82 | require('nnx.DistNLLCriterion') 83 | require('nnx.DistMarginCriterion') 84 | require('nnx.TreeNLLCriterion') 85 | require('nnx.CTCCriterion') 86 | 87 | -- datasets: 88 | require('nnx.DataSet') 89 | require('nnx.DataList') 90 | require('nnx.DataSetLabelMe') 91 | require('nnx.DataSetSamplingPascal') 92 | -------------------------------------------------------------------------------- /generic/SpatialDownSampling.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/SpatialDownSampling.c" 3 | #else 4 | 5 | static int nn_(SpatialDownSampling_updateOutput)(lua_State *L) { 6 | // get all params 7 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 8 | int rW = luaT_getfieldcheckint(L, 1, "rW"); 9 | int rH = luaT_getfieldcheckint(L, 1, "rH"); 10 | THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); 11 | 12 | // dims 13 | int iwidth = input->size[2]; 14 | int iheight = input->size[1]; 15 | int ichannels = input->size[0]; 16 | int owidth = floor(iwidth / rW); 17 | int oheight = floor(iheight / rH); 18 | 19 | // get strides 20 | long *is = input->stride; 21 | long *os = output->stride; 22 | 23 | // get raw pointers 24 | real *input_data = THTensor_(data)(input); 25 | real *output_data = THTensor_(data)(output); 26 | 27 | // resample each plane 28 | real avg; 29 | real *input_p = input_data, *output_p = output_data; 30 | int k, x, y, i, j; 31 | for (k = 0; k < ichannels; ++k, input_p += is[0], output_p += os[0]) 32 | for (y = 0; y < oheight; ++y) 33 | for (x = 0; x < owidth; ++x) { 34 | avg = 0.0; 35 | for (i = y*rH; i < (y+1)*rH; ++i) 36 | for (j = x*rW; j < (x+1)*rW; ++j) 37 | avg += input_p[i*is[1]+j*is[2]]; 38 | output_p[y*os[1] + x*os[2]] = avg; 39 | } 40 | THTensor_(mul)(output, output, 1.0f/(rH*rW)); 41 | return 1; 42 | } 43 | 44 | static int nn_(SpatialDownSampling_updateGradInput)(lua_State *L) { 45 | // get all params 46 | THTensor *gradOutput = luaT_checkudata(L, 2, torch_Tensor); 47 | THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); 48 | int rW = luaT_getfieldcheckint(L, 1, "rW"); 49 | int rH = luaT_getfieldcheckint(L, 1, "rH"); 50 | 51 | THArgCheck(gradOutput->nDimension == 3, 2, "gradOutput must be 3D Tensor"); 52 | 53 | // dims 54 | int owidth = gradOutput->size[2]; 55 | int oheight = gradOutput->size[1]; 56 | int ochannels = gradOutput->size[0]; 57 | 58 | // get strides 59 | long *gis = gradInput->stride; 60 | long *gos = gradOutput->stride; 61 | 62 | THTensor_(zero)(gradInput); 63 | 64 | // get raw pointers 65 | real *gradInput_data = THTensor_(data)(gradInput); 66 | real *gradOutput_data = THTensor_(data)(gradOutput); 67 | 68 | // compute gradients for each plane 69 | real *gradInput_p = gradInput_data, *gradOutput_p = gradOutput_data; 70 | int k, x, y, i, j; 71 | for (k = 0; k < ochannels; ++k, gradInput_p += gis[0], gradOutput_p += gos[0]) 72 | for (y = 0; y < oheight; ++y) 73 | for (x = 0; x < owidth; ++x) 74 | for (i = y*rH; i < (y+1)*rH; ++i) 75 | for (j = x*rW; j < (x+1)*rW; ++j) 76 | gradInput_p[i*gis[1]+j*gis[2]] += gradOutput_p[y*gos[1]+x*gos[2]]; 77 | THTensor_(mul)(gradInput, gradInput, 1.0f/(rH*rW)); 78 | 79 | return 1; 80 | } 81 | 82 | static const struct luaL_Reg nn_(SpatialDownSampling__) [] = { 83 | {"SpatialDownSampling_updateOutput", nn_(SpatialDownSampling_updateOutput)}, 84 | {"SpatialDownSampling_updateGradInput", nn_(SpatialDownSampling_updateGradInput)}, 85 | {NULL, NULL} 86 | }; 87 | 88 | static void nn_(SpatialDownSampling_init)(lua_State *L) 89 | { 90 | luaT_pushmetatable(L, torch_Tensor); 91 | luaT_registeratname(L, nn_(SpatialDownSampling__), "nn"); 92 | lua_pop(L,1); 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /SpatialReSamplingEx.lua: -------------------------------------------------------------------------------- 1 | local SpatialReSamplingEx, parent = torch.class('nn.SpatialReSamplingEx', 'nn.Module') 2 | 3 | local help_desc = [[ 4 | Extended spatial resampling. 5 | ]] 6 | function SpatialReSamplingEx:__init(...) 7 | parent.__init(self) 8 | 9 | -- get args 10 | xlua.unpack_class( 11 | self, {...}, 'nn.SpatialReSampling', help_desc, 12 | {arg='rwidth', type='number', help='ratio: owidth/iwidth'}, 13 | {arg='rheight', type='number', help='ratio: oheight/iheight'}, 14 | {arg='owidth', type='number', help='output width'}, 15 | {arg='oheight', type='number', help='output height'}, 16 | {arg='mode', type='string', help='Mode : simple | average (only for downsampling) | bilinear', default = 'simple'}, 17 | {arg='yDim', type='number', help='image y dimension', default=2}, 18 | {arg='xDim', type='number', help='image x dimension', default=3} 19 | ) 20 | if self.yDim+1 ~= self.xDim then 21 | error('nn.SpatialReSamplingEx: yDim must be equals to xDim-1') 22 | end 23 | self.outputSize = torch.LongStorage(4) 24 | self.inputSize = torch.LongStorage(4) 25 | if self.mode == 'simple' then self.mode_c = 0 end 26 | if self.mode == 'average' then self.mode_c = 1 end 27 | if self.mode == 'bilinear' then self.mode_c = 2 end 28 | if not self.mode_c then 29 | error('SpatialReSampling: mode must be simple | average | bilinear') 30 | end 31 | end 32 | 33 | local function round(a) 34 | return math.floor(a+0.5) 35 | end 36 | 37 | function SpatialReSamplingEx:updateOutput(input) 38 | -- compute iheight, iwidth, oheight and owidth 39 | self.iheight = input:size(self.yDim) 40 | self.iwidth = input:size(self.xDim) 41 | self.oheightCurrent = self.oheight or round(self.rheight*self.iheight) 42 | self.owidthCurrent = self.owidth or round(self.rwidth*self.iwidth) 43 | if not ((self.oheightCurrent>=self.iheight) == (self.owidthCurrent>=self.iwidth)) then 44 | error('SpatialReSamplingEx: Cannot upsample one dimension while downsampling the other') 45 | end 46 | 47 | -- resize input into K1 x iheight x iwidth x K2 tensor 48 | self.inputSize:fill(1) 49 | for i = 1,self.yDim-1 do 50 | self.inputSize[1] = self.inputSize[1] * input:size(i) 51 | end 52 | self.inputSize[2] = self.iheight 53 | self.inputSize[3] = self.iwidth 54 | for i = self.xDim+1,input:nDimension() do 55 | self.inputSize[4] = self.inputSize[4] * input:size(i) 56 | end 57 | local reshapedInput = input:reshape(self.inputSize) 58 | 59 | -- prepare output of size K1 x oheight x owidth x K2 60 | self.outputSize[1] = self.inputSize[1] 61 | self.outputSize[2] = self.oheightCurrent 62 | self.outputSize[3] = self.owidthCurrent 63 | self.outputSize[4] = self.inputSize[4] 64 | self.output:resize(self.outputSize) 65 | 66 | -- resample over dims 2 and 3 67 | input.nn.SpatialReSamplingEx_updateOutput(self, input:reshape(self.inputSize)) 68 | 69 | --resize output into the same shape as input 70 | local outputSize2 = input:size() 71 | outputSize2[self.yDim] = self.oheightCurrent 72 | outputSize2[self.xDim] = self.owidthCurrent 73 | self.output = self.output:reshape(outputSize2) 74 | return self.output 75 | end 76 | 77 | function SpatialReSamplingEx:updateGradInput(input, gradOutput) 78 | self.gradInput:resize(self.inputSize) 79 | input.nn.SpatialReSamplingEx_updateGradInput(self, gradOutput:reshape(self.outputSize)) 80 | self.gradInput = self.gradInput:reshape(input:size()) 81 | return self.gradInput 82 | end 83 | -------------------------------------------------------------------------------- /SpatialClassifier.lua: -------------------------------------------------------------------------------- 1 | local Classifier, parent = torch.class('nn.SpatialClassifier', 'nn.Module') 2 | 3 | function Classifier:__init(classifier) 4 | parent.__init(self) 5 | -- public: 6 | self.classifier = classifier or nn.Sequential() 7 | self.spatialOutput = true 8 | -- private: 9 | self.inputF = torch.Tensor() 10 | self.inputT = torch.Tensor() 11 | self.outputF = torch.Tensor() 12 | self.output = torch.Tensor() 13 | self.gradOutputF = torch.Tensor() 14 | self.gradOutputT = torch.Tensor() 15 | self.gradInputF = torch.Tensor() 16 | self.gradInput = torch.Tensor() 17 | -- compat: 18 | self.modules = {self.classifier} 19 | end 20 | 21 | function Classifier:add(module) 22 | self.classifier:add(module) 23 | end 24 | 25 | function Classifier:updateOutput(input) 26 | -- get dims: 27 | if input:nDimension() ~= 3 then 28 | error(' input should be 3D: KxHxW') 29 | end 30 | local K = input:size(1) 31 | local H = input:size(2) 32 | local W = input:size(3) 33 | local HW = H*W 34 | 35 | -- transpose input: 36 | self.inputF:set(input):resize(K, HW) 37 | self.inputT:resize(HW, K):copy(self.inputF:t()) 38 | 39 | -- classify all locations: 40 | self.outputT = self.classifier:updateOutput(self.inputT) 41 | 42 | if self.spatialOutput then 43 | -- transpose output: 44 | local N = self.outputT:size(2) 45 | self.outputF:resize(N, HW):copy(self.outputT:t()) 46 | self.output:set(self.outputF):resize(N,H,W) 47 | else 48 | -- leave output flat: 49 | self.output = self.outputT 50 | end 51 | return self.output 52 | end 53 | 54 | function Classifier:updateGradInput(input, gradOutput) 55 | -- get dims: 56 | local K = input:size(1) 57 | local H = input:size(2) 58 | local W = input:size(3) 59 | local HW = H*W 60 | local N = gradOutput:size(1) 61 | 62 | -- transpose input 63 | self.inputF:set(input):resize(K, HW) 64 | self.inputT:resize(HW, K):copy(self.inputF:t()) 65 | 66 | if self.spatialOutput then 67 | -- transpose gradOutput 68 | self.gradOutputF:set(gradOutput):resize(N, HW) 69 | self.gradOutputT:resize(HW, N):copy(self.gradOutputF:t()) 70 | else 71 | self.gradOutputT = gradOutput 72 | end 73 | 74 | -- backward through classifier: 75 | self.gradInputT = self.classifier:updateGradInput(self.inputT, self.gradOutputT) 76 | 77 | -- transpose gradInput 78 | self.gradInputF:resize(K, HW):copy(self.gradInputT:t()) 79 | self.gradInput:set(self.gradInputF):resize(K,H,W) 80 | return self.gradInput 81 | end 82 | 83 | function Classifier:accGradParameters(input, gradOutput, scale) 84 | -- get dims: 85 | local K = input:size(1) 86 | local H = input:size(2) 87 | local W = input:size(3) 88 | local HW = H*W 89 | local N = gradOutput:size(1) 90 | 91 | -- transpose input 92 | self.inputF:set(input):resize(K, HW) 93 | self.inputT:resize(HW, K):copy(self.inputF:t()) 94 | 95 | if self.spatialOutput then 96 | -- transpose gradOutput 97 | self.gradOutputF:set(gradOutput):resize(N, HW) 98 | self.gradOutputT:resize(HW, N):copy(self.gradOutputF:t()) 99 | else 100 | self.gradOutputT = gradOutput 101 | end 102 | 103 | -- backward through classifier: 104 | self.classifier:accGradParameters(self.inputT, self.gradOutputT, scale) 105 | end 106 | 107 | function Classifier:zeroGradParameters() 108 | self.classifier:zeroGradParameters() 109 | end 110 | 111 | function Classifier:updateParameters(learningRate) 112 | self.classifier:updateParameters(learningRate) 113 | end 114 | -------------------------------------------------------------------------------- /generic/SpatialUpSampling.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/SpatialUpSampling.c" 3 | #else 4 | 5 | static int nn_(SpatialUpSampling_updateOutput)(lua_State *L) 6 | { 7 | // get all params 8 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 9 | int dW = luaT_getfieldcheckint(L, 1, "dW"); 10 | int dH = luaT_getfieldcheckint(L, 1, "dH"); 11 | THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); 12 | 13 | // dims 14 | int iwidth = input->size[2]; 15 | int iheight = input->size[1]; 16 | int owidth = iwidth * dW; 17 | int oheight = iheight * dH; 18 | int channels1 = input->size[0]; 19 | int channels2 = input->size[3]; 20 | 21 | // get strides 22 | long *is = input->stride; 23 | long *os = output->stride; 24 | 25 | // get raw pointers 26 | real *input_data = THTensor_(data)(input); 27 | real *output_data = THTensor_(data)(output); 28 | 29 | // resample each plane 30 | int k1, k2, x, y; 31 | for (k1 = 0; k1 < channels1; k1++) { 32 | for (k2 = 0; k2 < channels2; k2++) { 33 | // get planes 34 | real *input_p = input_data + k1*is[0] + k2*is[3]; 35 | real *output_p = output_data + k1*os[0] + k2*os[3]; 36 | 37 | // for each plane, resample 38 | for (y=0; ysize[2]; 64 | int oheight = gradOutput->size[1]; 65 | int channels1 = gradOutput->size[0]; 66 | int channels2 = gradOutput->size[3]; 67 | 68 | // resize gradInput 69 | THTensor_(zero)(gradInput); 70 | 71 | // get strides 72 | long *gis = gradInput->stride; 73 | long *gos = gradOutput->stride; 74 | 75 | 76 | // get raw pointers 77 | real *gradInput_data = THTensor_(data)(gradInput); 78 | real *gradOutput_data = THTensor_(data)(gradOutput); 79 | 80 | // compute gradients for each plane 81 | int k1, k2, x, y; 82 | for (k1 = 0; k1 < channels1; k1++) { 83 | for (k2 = 0; k2 < channels2; k2++) { 84 | // get planes 85 | real *gradInput_p = gradInput_data + k1*gis[0] + k2*gis[3]; 86 | real *gradOutput_p = gradOutput_data + k1*gos[0] + k2*gos[3]; 87 | 88 | // for each plane, resample 89 | for (y=0; ysize[0]; 15 | int ochannels = output->size[0]; 16 | 17 | // planes 18 | THTensor *outputPlane = THTensor_(new)(); 19 | THTensor *inputPlane = THTensor_(new)(); 20 | 21 | // process each plane 22 | int ok,ik; 23 | for (ok=0; oksize[2]; 55 | int oheight = gradOutput->size[1]; 56 | 57 | // resize gradInput 58 | THTensor_(zero)(gradInput); 59 | 60 | // select planes 61 | THTensor *gradOutput_xy = THTensor_(new)(); 62 | THTensor *gradOutput_y = THTensor_(new)(); 63 | THTensor *gradInput_xy = THTensor_(new)(); 64 | THTensor *gradInput_y = THTensor_(new)(); 65 | THTensor *input_xy = THTensor_(new)(); 66 | THTensor *input_y = THTensor_(new)(); 67 | 68 | // transpose weight 69 | THTensor *weight_t = THTensor_(newTranspose)(weight,0,1); 70 | 71 | // compute gradient 72 | int x,y; 73 | for (y=0; y 0 then c_output = c_output:narrow(self.y_dim, 1 + self.pad_t, c_output:size(self.y_dim) - self.pad_t) end 53 | if self.pad_b > 0 then c_output = c_output:narrow(self.y_dim, 1, c_output:size(self.y_dim) - self.pad_b) end 54 | if self.pad_l > 0 then c_output = c_output:narrow(self.x_dim, 1 + self.pad_l, c_output:size(self.x_dim) - self.pad_l) end 55 | if self.pad_r > 0 then c_output = c_output:narrow(self.x_dim, 1, c_output:size(self.x_dim) - self.pad_r) end 56 | -- copy input to output 57 | c_output:copy(c_input) 58 | return self.output 59 | end 60 | 61 | function SpatialPadding:updateGradInput(input, gradOutput) 62 | --if input:dim() ~= 3 then error('input must be 3-dimensional') end 63 | self.gradInput:resizeAs(input):zero() 64 | -- crop gradInput if necessary 65 | local cg_input = self.gradInput 66 | if self.pad_t < 0 then cg_input = cg_input:narrow(self.y_dim, 1 - self.pad_t, cg_input:size(self.y_dim) + self.pad_t) end 67 | if self.pad_b < 0 then cg_input = cg_input:narrow(self.y_dim, 1, cg_input:size(self.y_dim) + self.pad_b) end 68 | if self.pad_l < 0 then cg_input = cg_input:narrow(self.x_dim, 1 - self.pad_l, cg_input:size(self.x_dim) + self.pad_l) end 69 | if self.pad_r < 0 then cg_input = cg_input:narrow(self.x_dim, 1, cg_input:size(self.x_dim) + self.pad_r) end 70 | -- crop gradOutout if necessary 71 | local cg_output = gradOutput 72 | if self.pad_t > 0 then cg_output = cg_output:narrow(self.y_dim, 1 + self.pad_t, cg_output:size(self.y_dim) - self.pad_t) end 73 | if self.pad_b > 0 then cg_output = cg_output:narrow(self.y_dim, 1, cg_output:size(self.y_dim) - self.pad_b) end 74 | if self.pad_l > 0 then cg_output = cg_output:narrow(self.x_dim, 1 + self.pad_l, cg_output:size(self.x_dim) - self.pad_l) end 75 | if self.pad_r > 0 then cg_output = cg_output:narrow(self.x_dim, 1, cg_output:size(self.x_dim) - self.pad_r) end 76 | -- copy gradOuput to gradInput 77 | cg_input:copy(cg_output) 78 | return self.gradInput 79 | end 80 | -------------------------------------------------------------------------------- /generic/SpatialRadialMatching.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/SpatialRadialMatching.c" 3 | #else 4 | 5 | #define square(x) ((x)*(x)) 6 | #define max(x,y) (((x)>(y)) ? (x) : (y)) 7 | #define min(x,y) (((x)>(y)) ? (y) : (x)) 8 | 9 | static int nn_(SpatialRadialMatching_updateOutput)(lua_State *L) 10 | { 11 | // get all params 12 | THTensor *input1 = luaT_checkudata(L, 2, torch_Tensor); 13 | THTensor *input2 = luaT_checkudata(L, 3, torch_Tensor); 14 | //THLongTensor *mask= luaT_checkudata(L, 4, luaT_checktypename2id(L, "torch.LongTensor")); 15 | int maxh = luaT_getfieldcheckint(L, 1, "maxh"); 16 | THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); 17 | 18 | // dims 19 | int iwidth = input1->size[2]; 20 | int iheight = input1->size[1]; 21 | int ichannels = input1->size[0]; 22 | 23 | // get strides 24 | long *i1s = input1->stride; 25 | long *i2s = input2->stride; 26 | //long *ms = mask ->stride; 27 | long *os = output->stride; 28 | 29 | // get pointers 30 | real *input1_p = THTensor_(data)(input1); 31 | real *input2_p = THTensor_(data)(input2); 32 | //long *mask_p = THLongTensor_data(mask); 33 | real *output_p = THTensor_(data)(output); 34 | 35 | // compute output 36 | int x1,y1,y2,k; 37 | real dist; 38 | #pragma omp parallel for private(y1,x1,y2,k,dist) 39 | for (y1 = 0; y1 < iheight; y1++) { 40 | for (x1 = 0; x1 < iwidth; x1++) { 41 | //if (mask_p[y1*ms[0] + x1*ms[1]]) { 42 | for (y2 = y1; y2 < y1+maxh; y2++) { 43 | dist = 0.0f; 44 | for (k = 0; k < ichannels; k++) 45 | dist += square( input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] 46 | - input2_p[k*i2s[0] + y2*i2s[1] + x1*i2s[2]]); 47 | output_p[(y2-y1)*os[2] + y1*os[0] + x1*os[1]] = dist; 48 | } 49 | //} 50 | } 51 | } 52 | 53 | // done 54 | return 0; 55 | } 56 | 57 | static int nn_(SpatialRadialMatching_updateGradInput)(lua_State *L) 58 | { 59 | // get all params 60 | THTensor* input1 = luaT_checkudata(L, 2, torch_Tensor); 61 | THTensor* input2 = luaT_checkudata(L, 3, torch_Tensor); 62 | THTensor* gradOutput = luaT_checkudata(L, 4, torch_Tensor); 63 | //THLongTensor* mask = luaT_checkudata(L, 5, luaT_checktypename2id(L, "torch.LongTensor")); 64 | THTensor* gradInput1 = luaT_getfieldcheckudata(L, 1, "gradInput1", torch_Tensor); 65 | THTensor* gradInput2 = luaT_getfieldcheckudata(L, 1, "gradInput2", torch_Tensor); 66 | int maxh = luaT_getfieldcheckint(L, 1, "maxh"); 67 | 68 | // dims 69 | int iwidth = input1->size[2]; 70 | int iheight = input1->size[1]; 71 | int ichannels = input1->size[0]; 72 | 73 | // get strides 74 | long* i1s = input1->stride; 75 | long* i2s = input2->stride; 76 | long* gi1s = gradInput1->stride; 77 | long* gi2s = gradInput2->stride; 78 | long* gos = gradOutput->stride; 79 | //long* ms = mask->stride; 80 | 81 | // get pointers 82 | real* input1_p = THTensor_(data)(input1); 83 | real* input2_p = THTensor_(data)(input2); 84 | real* gradInput1_p = THTensor_(data)(gradInput1); 85 | real* gradInput2_p = THTensor_(data)(gradInput2); 86 | real* gradOutput_p = THTensor_(data)(gradOutput); 87 | //long* mask_p = THLongTensor_data(mask); 88 | 89 | // compute gradients 90 | int x1, y1, y2, k; 91 | real partial_d; 92 | for (y1 = 0; y1 < iheight; y1++) { 93 | for (x1 = 0; x1 < iwidth; x1++) { 94 | // if (mask_p[y1*ms[0] + x1*ms[1]]) { 95 | for (y2 = y1; y2 < y1+maxh; y2++) { 96 | for (k = 0; k < ichannels; k++) { 97 | partial_d = 2.0f*( input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] 98 | - input2_p[k*i2s[0] + y2*i2s[1] + x1*i2s[2]]); 99 | partial_d *= gradOutput_p[(y2-y1)*gos[2]+y1*gos[0]+x1*gos[1]]; 100 | gradInput1_p[k*gi1s[0] + y1*gi1s[1] + x1*gi1s[2]] += partial_d; 101 | gradInput2_p[k*gi2s[0] + y2*gi2s[1] + x1*gi2s[2]] -= partial_d; 102 | } 103 | } 104 | //} 105 | } 106 | } 107 | 108 | // done 109 | return 0; 110 | } 111 | 112 | static const struct luaL_Reg nn_(SpatialRadialMatching__) [] = { 113 | {"SpatialRadialMatching_updateOutput", nn_(SpatialRadialMatching_updateOutput)}, 114 | {"SpatialRadialMatching_updateGradInput", nn_(SpatialRadialMatching_updateGradInput)}, 115 | {NULL, NULL} 116 | }; 117 | 118 | static void nn_(SpatialRadialMatching_init)(lua_State *L) 119 | { 120 | luaT_pushmetatable(L, torch_Tensor); 121 | luaT_registeratname(L, nn_(SpatialRadialMatching__), "nn"); 122 | lua_pop(L,1); 123 | } 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /SpatialColorTransform.lua: -------------------------------------------------------------------------------- 1 | local SpatialColorTransform, parent = torch.class('nn.SpatialColorTransform', 'nn.Module') 2 | 3 | local help_desc = 4 | [[Provides a set of widely used/known color space transforms, 5 | for images: RGB->YUV, YUV->RGB, RGB->Y transforms, and 6 | more exotic transforms such as RGB->Normed-RGB]] 7 | 8 | local help_example = 9 | [[-- transforms an RGB image into a YUV image: 10 | converter = nn.SpatialColorTransform('rgb2yuv') 11 | rgb = image.lena() 12 | yuv = converter:forward(rgb) 13 | image.display(yuv) ]] 14 | 15 | function SpatialColorTransform:__init(type) 16 | -- parent init 17 | parent.__init(self) 18 | 19 | -- require the image package 20 | xlua.require('image',true) 21 | 22 | -- usage 23 | self.usage = xlua.usage( 24 | 'nn.SpatialColorTransform', help_desc, help_example, 25 | {type='string', req=true, 26 | help='transform = yuv2rgb | rgb2yuv | rgb2y | hsl2rgb | hsv2rgb | rgb2hsl | rgb2hsv | rgb2nrgb | rgb2y+nrgb'} 27 | ) 28 | 29 | -- transform type 30 | self.transform = type 31 | if type == 'yuv2rgb' then 32 | self.islinear = true 33 | self.linear = nn.SpatialLinear(3,3) 34 | -- R 35 | self.linear.weight[1][1] = 1 36 | self.linear.weight[1][2] = 0 37 | self.linear.weight[1][3] = 1.13983 38 | self.linear.bias[1] = 0 39 | -- G 40 | self.linear.weight[2][1] = 1 41 | self.linear.weight[2][2] = -0.39465 42 | self.linear.weight[2][3] = -0.58060 43 | self.linear.bias[2] = 0 44 | -- B 45 | self.linear.weight[3][1] = 1 46 | self.linear.weight[3][2] = 2.03211 47 | self.linear.weight[3][3] = 0 48 | self.linear.bias[3] = 0 49 | elseif type == 'rgb2yuv' then 50 | self.islinear = true 51 | self.linear = nn.SpatialLinear(3,3) 52 | -- Y 53 | self.linear.weight[1][1] = 0.299 54 | self.linear.weight[1][2] = 0.587 55 | self.linear.weight[1][3] = 0.114 56 | self.linear.bias[1] = 0 57 | -- U 58 | self.linear.weight[2][1] = -0.14713 59 | self.linear.weight[2][2] = -0.28886 60 | self.linear.weight[2][3] = 0.436 61 | self.linear.bias[2] = 0 62 | -- V 63 | self.linear.weight[3][1] = 0.615 64 | self.linear.weight[3][2] = -0.51499 65 | self.linear.weight[3][3] = -0.10001 66 | self.linear.bias[3] = 0 67 | elseif type == 'rgb2y' then 68 | self.islinear = true 69 | self.linear = nn.SpatialLinear(3,1) 70 | -- Y 71 | self.linear.weight[1][1] = 0.299 72 | self.linear.weight[1][2] = 0.587 73 | self.linear.weight[1][3] = 0.114 74 | self.linear.bias[1] = 0 75 | elseif type == 'hsl2rgb' then 76 | self.islinear = false 77 | elseif type == 'hsv2rgb' then 78 | self.islinear = false 79 | elseif type == 'rgb2hsl' then 80 | self.islinear = false 81 | elseif type == 'rgb2hsv' then 82 | self.islinear = false 83 | elseif type == 'rgb2nrgb' then 84 | self.islinear = false 85 | elseif type == 'rgb2y+nrgb' then 86 | self.islinear = false 87 | else 88 | xlua.error('transform required','nn.SpatialColorTransform',self.usage) 89 | end 90 | end 91 | 92 | function SpatialColorTransform:updateOutput(input) 93 | if self.islinear then 94 | self.output = self.linear:updateOutput(input) 95 | else 96 | if self.transform == 'rgb2hsl' then 97 | self.output = image.rgb2hsl(input, self.output) 98 | elseif self.transform == 'rgb2hsv' then 99 | self.output = image.rgb2hsv(input, self.output) 100 | elseif self.transform == 'hsl2rgb' then 101 | self.output = image.hsl2rgb(input, self.output) 102 | elseif self.transform == 'rgb2hsv' then 103 | self.output = image.rgb2hsv(input, self.output) 104 | elseif self.transform == 'rgb2nrgb' then 105 | self.output = image.rgb2nrgb(input, self.output) 106 | elseif self.transform == 'rgb2y+nrgb' then 107 | self.output:resize(4, input:size(2), input:size(3)) 108 | image.rgb2y(input, self.output:narrow(1,1,1)) 109 | image.rgb2nrgb(input, self.output:narrow(1,2,3)) 110 | end 111 | end 112 | return self.output 113 | end 114 | 115 | function SpatialColorTransform:updateGradInput(input, gradOutput) 116 | if self.islinear then 117 | self.gradInput = self.linear:updateGradInput(input, gradOutput) 118 | else 119 | xlua.error('updateGradInput not implemented for non-linear transforms', 120 | 'SpatialColorTransform.updateGradInput') 121 | end 122 | return self.gradInput 123 | end 124 | 125 | function SpatialColorTransform:type(type) 126 | parent.type(self,type) 127 | if self.islinear then 128 | self.linear:type(type) 129 | end 130 | end 131 | -------------------------------------------------------------------------------- /PixelSort.lua: -------------------------------------------------------------------------------- 1 | local PixelSort, parent = torch.class("nn.PixelSort", "nn.Module") 2 | 3 | -- Reverse pixel shuffle, based on the torch nn.PixelShuffle module (i'd attribute code, but not sure who wrote that) 4 | -- Converts a [batch x channel x m x p] tensor to [batch x channel*r^2 x m/r x p/r] 5 | -- tensor, where r is the downscaling factor. 6 | -- Useful as an alternative to pooling & strided convolutions, as it doesn't discard information 7 | -- if used with bottleneck convolution, you can discard half of the information, as opposed to 3/4 in pooling 8 | -- also avoids the 'checkerboard' sampling issues found with strided convolutions. 9 | -- @param downscaleFactor - the downscaling factor to use 10 | function PixelSort:__init(downscaleFactor) 11 | parent.__init(self) 12 | self.downscaleFactor = downscaleFactor 13 | self.downscaleFactorSquared = self.downscaleFactor * self.downscaleFactor 14 | end 15 | 16 | -- Computes the forward pass of the layer i.e. Converts a 17 | -- [batch x channel x m x p] tensor to [batch x channel*r^2 x m/r x p/r] tensor. 18 | -- @param input - the input tensor to be sorted of size [b x c x m x p] 19 | -- @return output - the sorted tensor of size [b x c*r^2 x m/r x p/r] 20 | function PixelSort:updateOutput(input) 21 | self._intermediateShape = self._intermediateShape or torch.LongStorage(6) 22 | self._outShape = self.outShape or torch.LongStorage() 23 | self._shuffleOut = self._shuffleOut or input.new() 24 | 25 | local batched = false 26 | local batchSize = 1 27 | local inputStartIdx = 1 28 | local outShapeIdx = 1 29 | if input:nDimension() == 4 then 30 | batched = true 31 | batchSize = input:size(1) 32 | inputStartIdx = 2 33 | outShapeIdx = 2 34 | self._outShape:resize(4) 35 | self._outShape[1] = batchSize 36 | else 37 | self._outShape:resize(3) 38 | end 39 | 40 | local channels = input:size(inputStartIdx) 41 | local inHeight = input:size(inputStartIdx + 1) 42 | local inWidth = input:size(inputStartIdx + 2) 43 | 44 | self._intermediateShape[1] = batchSize 45 | self._intermediateShape[2] = channels 46 | self._intermediateShape[3] = inHeight / self.downscaleFactor 47 | self._intermediateShape[4] = self.downscaleFactor 48 | self._intermediateShape[5] = inWidth / self.downscaleFactor 49 | self._intermediateShape[6] = self.downscaleFactor 50 | 51 | self._outShape[outShapeIdx] = channels * self.downscaleFactorSquared 52 | self._outShape[outShapeIdx + 1] = inHeight / self.downscaleFactor 53 | self._outShape[outShapeIdx + 2] = inWidth / self.downscaleFactor 54 | 55 | local inputView = torch.view(input, self._intermediateShape) 56 | 57 | self._shuffleOut:resize(inputView:size(1), inputView:size(2), inputView:size(4), 58 | inputView:size(6), inputView:size(3), inputView:size(5)) 59 | self._shuffleOut:copy(inputView:permute(1, 2, 4, 6, 3, 5)) 60 | 61 | self.output = torch.view(self._shuffleOut, self._outShape) 62 | 63 | return self.output 64 | end 65 | 66 | -- Computes the backward pass of the layer, given the gradient w.r.t. the output 67 | -- this function computes the gradient w.r.t. the input. 68 | -- @param input - the input tensor of shape [b x c x m x p] 69 | -- @param gradOutput - the tensor with the gradients w.r.t. output of shape [b x c*r^2 x m/r x p/r] 70 | -- @return gradInput - a tensor of the same shape as input, representing the gradient w.r.t. input. 71 | function PixelSort:updateGradInput(input, gradOutput) 72 | self._intermediateShape = self._intermediateShape or torch.LongStorage(6) 73 | self._shuffleIn = self._shuffleIn or input.new() 74 | 75 | local batchSize = 1 76 | local inputStartIdx = 1 77 | if input:nDimension() == 4 then 78 | batchSize = input:size(1) 79 | inputStartIdx = 2 80 | end 81 | local channels = input:size(inputStartIdx) 82 | local height = input:size(inputStartIdx + 1) 83 | local width = input:size(inputStartIdx + 2) 84 | 85 | self._intermediateShape[1] = batchSize 86 | self._intermediateShape[2] = channels 87 | self._intermediateShape[3] = self.downscaleFactor 88 | self._intermediateShape[4] = self.downscaleFactor 89 | self._intermediateShape[5] = height /self.downscaleFactor 90 | self._intermediateShape[6] = width /self.downscaleFactor 91 | 92 | local gradOutputView = torch.view(gradOutput, self._intermediateShape) 93 | 94 | self._shuffleIn:resize(gradOutputView:size(1), gradOutputView:size(2), gradOutputView:size(5), 95 | gradOutputView:size(4), gradOutputView:size(6), gradOutputView:size(3)) 96 | self._shuffleIn:copy(gradOutputView:permute(1, 2, 5, 3, 6, 4)) 97 | 98 | self.gradInput = torch.view(self._shuffleIn, input:size()) 99 | 100 | return self.gradInput 101 | end 102 | 103 | 104 | function PixelSort:clearState() 105 | nn.utils.clear(self, { 106 | "_intermediateShape", 107 | "_outShape", 108 | "_shuffleIn", 109 | "_shuffleOut", 110 | }) 111 | return parent.clearState(self) 112 | end 113 | -------------------------------------------------------------------------------- /generic/DataSetLabelMe.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/DataSetLabelMe.c" 3 | #else 4 | 5 | static int nn_(DataSetLabelMe_extract)(lua_State *L) 6 | { 7 | int tags = 1; 8 | THTensor *mask = luaT_checkudata(L, 2, torch_Tensor); 9 | int x_start = lua_tonumber(L, 3); 10 | int x_end = lua_tonumber(L, 4); 11 | int y_start = lua_tonumber(L, 5); 12 | int y_end = lua_tonumber(L, 6); 13 | int idx = lua_tonumber(L, 7); 14 | float filter_ratio = lua_tonumber(L, 8); 15 | int filter_size = lua_tonumber(L, 9); 16 | int filter_step = lua_tonumber(L, 10); 17 | 18 | float ratio = 1; 19 | int x,y,label,tag,size; 20 | THShortStorage *data; 21 | for (x=x_start; x<=x_end; x++) { 22 | for (y=y_start; y<=y_end; y++) { 23 | // label = mask[x][y] 24 | label = THTensor_(get2d)(mask, y-1, x-1); 25 | 26 | // optional filter: insures that at least N% of local pixels belong to the same class 27 | if (filter_ratio > 0) { 28 | int kx,ky,count=0,good=0; 29 | for (kx=MAX(1,x-filter_size/2); kx<=MIN(x_end,x+filter_size/2); kx+=filter_step) { 30 | for (ky=MAX(1,y-filter_size/2); ky<=MIN(y_end,y+filter_size/2); ky+=filter_step) { 31 | int other = THTensor_(get2d)(mask, ky-1, kx-1); 32 | if (other == label) good++; 33 | count++; 34 | } 35 | } 36 | ratio = (float)good/(float)count; 37 | } 38 | 39 | // if filter(s) satisfied, then append label 40 | if (ratio >= filter_ratio) { 41 | lua_rawgeti(L, tags, label); // tag = tags[label] 42 | tag = lua_gettop(L); 43 | lua_pushstring(L, "size"); lua_rawget(L, tag); // size = tag.size 44 | size = lua_tonumber(L,-1); lua_pop(L,1); 45 | lua_pushstring(L, "size"); lua_pushnumber(L, size+3); lua_rawset(L, tag); // tag.size = size + 3 46 | lua_pushstring(L, "data"); lua_rawget(L, tag); // data = tag.data 47 | data = luaT_checkudata(L, -1, "torch.ShortStorage"); lua_pop(L, 1); 48 | data->data[size] = x; // data[size+1] = x 49 | data->data[size+1] = y; // data[size+1] = y 50 | data->data[size+2] = idx; // data[size+1] = idx 51 | lua_pop(L, 1); 52 | } 53 | } 54 | } 55 | return 0; 56 | } 57 | 58 | /******************************************************/ 59 | // Camille : same function that below except it keeps memory about the employed masking segment. 60 | 61 | static int nn_(DataSetSegmentSampling_extract)(lua_State *L) 62 | { 63 | int tags = 1; 64 | THTensor *mask = luaT_checkudata(L, 2, torch_Tensor); 65 | int x_start = lua_tonumber(L, 3); 66 | int x_end = lua_tonumber(L, 4); 67 | int y_start = lua_tonumber(L, 5); 68 | int y_end = lua_tonumber(L, 6); 69 | int idx = lua_tonumber(L, 7); 70 | int idxSegm = lua_tonumber(L, 8); 71 | float filter_ratio = lua_tonumber(L, 9); 72 | int filter_size = lua_tonumber(L, 10); 73 | int filter_step = lua_tonumber(L, 11); 74 | int step = lua_tonumber(L, 12); 75 | float ratio = 1; 76 | int x,y,label,tag,size; 77 | THShortStorage *data; 78 | for (x=x_start; x<=x_end; x=x+step) { 79 | for (y=y_start; y<=y_end; y=y+step) { 80 | // label = mask[x][y] 81 | label = THTensor_(get2d)(mask, y-1, x-1); 82 | // fprintf(stderr,"%d %d \n",x,y); 83 | // optional filter: insures that at least N% of local pixels belong to the same class 84 | if (filter_ratio > 0) { 85 | int kx,ky,count=0,good=0; 86 | for (kx=MAX(1,x-filter_size/2); kx<=MIN(x_end,x+filter_size/2); kx+=filter_step) { 87 | for (ky=MAX(1,y-filter_size/2); ky<=MIN(y_end,y+filter_size/2); ky+=filter_step) { 88 | int other = THTensor_(get2d)(mask, ky-1, kx-1); 89 | if (other == label) good++; 90 | count++; 91 | } 92 | } 93 | ratio = (float)good/(float)count; 94 | } 95 | 96 | // if filter(s) satisfied, then append label 97 | if (ratio >= filter_ratio) { 98 | lua_rawgeti(L, tags, label); // tag = tags[label] 99 | tag = lua_gettop(L); 100 | lua_pushstring(L, "size"); lua_rawget(L, tag); // size = tag.size 101 | size = lua_tonumber(L,-1); lua_pop(L,1); 102 | lua_pushstring(L, "size"); lua_pushnumber(L, size+4); lua_rawset(L, tag); // tag.size = size + 4 103 | lua_pushstring(L, "data"); lua_rawget(L, tag); // data = tag.data 104 | data = luaT_checkudata(L, -1, "torch.ShortStorage"); lua_pop(L, 1); 105 | data->data[size] = x; // data[size+1] = x 106 | data->data[size+1] = y; // data[size+1] = y 107 | data->data[size+2] = idx; // data[size+1] = idx 108 | data->data[size+3] = idxSegm; // data[size+1] = idxSegm 109 | lua_pop(L, 1); 110 | } 111 | } 112 | } 113 | return 0; 114 | } 115 | 116 | 117 | static const struct luaL_Reg nn_(DataSetLabelMe__) [] = { 118 | {"DataSetLabelMe_extract", nn_(DataSetLabelMe_extract)}, 119 | {"DataSetSegmentSampling_extract", nn_(DataSetSegmentSampling_extract)}, 120 | {NULL, NULL} 121 | }; 122 | 123 | 124 | 125 | static void nn_(DataSetLabelMe_init)(lua_State *L) 126 | { 127 | luaT_pushmetatable(L, torch_Tensor); 128 | luaT_registeratname(L, nn_(DataSetLabelMe__), "nn"); 129 | lua_pop(L,1); 130 | } 131 | 132 | #endif 133 | -------------------------------------------------------------------------------- /generic/DistMarginCriterion.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/DistMarginCriterion.c" 3 | #else 4 | 5 | static int nn_(DistMarginCriterion_updateOutput)(lua_State *L) 6 | { 7 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 8 | int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); 9 | real *input_data, *target_data; 10 | long nframe, dim; 11 | long t, d, m; 12 | THTensor *target_; 13 | THTensor *target; 14 | real sum; 15 | 16 | THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); 17 | 18 | if(input->nDimension == 1) { 19 | nframe = 1; 20 | dim = input->size[0]; 21 | target_ = luaT_checkudata(L, 3, torch_Tensor); 22 | target = THTensor_(new)(); 23 | THTensor_(set)(target, target_); 24 | THTensor_(resize2d)(target, 1, dim); 25 | } 26 | else { 27 | nframe = input->size[0]; 28 | dim = input->size[1]; 29 | target_ = luaT_checkudata(L, 3, torch_Tensor); 30 | THArgCheck((target_->nDimension == 2) && (target_->size[0] == nframe) && (target_->size[1] == dim), 31 | 3, "inconsistent target size"); 32 | target = THTensor_(newContiguous)(target_); 33 | } 34 | 35 | for(t = 0; t < nframe; t++) { 36 | for(d = 0; d < dim; d++) { 37 | real idx = THTensor_(get2d)(target, t, d); 38 | THArgCheck((idx >= 0) && (idx <= dim), 3, "target out of range"); 39 | } 40 | } 41 | 42 | input = THTensor_(newContiguous)(input); 43 | input_data = THTensor_(data)(input); 44 | target_data = THTensor_(data)(target); 45 | 46 | sum = 0; 47 | for(t = 0; t < nframe; t++) { 48 | real input_target = THInf; 49 | for (m = 0; m < dim; m++) { 50 | long target_idx = (long)(target_data[m]-1); 51 | if (target_idx == -1) break; 52 | if (input_target > input_data[target_idx]) input_target = input_data[target_idx]; 53 | } 54 | for(d = 0; d < dim; d++) { 55 | int isatarget = 0; 56 | for(m = 0; m < dim; m++) { 57 | long target_idx = (long)(target_data[m]-1); 58 | if (target_idx == -1) break; 59 | else if(d == target_idx) { 60 | isatarget = 1; 61 | break; 62 | } 63 | } 64 | if (isatarget) continue; 65 | 66 | real z = 1 - input_target + input_data[d]; 67 | if(z > 0) sum += z; 68 | } 69 | input_data += dim; 70 | target_data += dim; 71 | } 72 | 73 | if(sizeAverage) 74 | sum /= dim; 75 | 76 | lua_pushnumber(L, sum); 77 | lua_setfield(L, 1, "output"); 78 | 79 | THTensor_(free)(input); 80 | THTensor_(free)(target); 81 | lua_pushnumber(L, sum); 82 | return 1; 83 | } 84 | 85 | static int nn_(DistMarginCriterion_updateGradInput)(lua_State *L) 86 | { 87 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 88 | int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); 89 | THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); 90 | real *input_data; 91 | real *gradInput_data; 92 | real *target_data; 93 | THTensor *target_; 94 | THTensor *target; 95 | long nframe, dim; 96 | long t, d, m; 97 | real g; 98 | 99 | THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); 100 | 101 | if(input->nDimension == 1) { 102 | nframe = 1; 103 | dim = input->size[0]; 104 | target_ = luaT_checkudata(L, 3, torch_Tensor); 105 | target = THTensor_(new)(); 106 | THTensor_(set)(target, target_); 107 | THTensor_(resize2d)(target, 1, dim); 108 | } 109 | else { 110 | nframe = input->size[0]; 111 | dim = input->size[1]; 112 | target_ = luaT_checkudata(L, 3, torch_Tensor); 113 | THArgCheck((target_->nDimension == 2) && (target_->size[0] == nframe) && (target_->size[1] == dim), 114 | 3, "inconsistent target size"); 115 | target = THTensor_(newContiguous)(target_); 116 | } 117 | 118 | g = (sizeAverage ? 1./((real)dim) : 1.); 119 | 120 | input = THTensor_(newContiguous)(input); 121 | input_data = THTensor_(data)(input); 122 | 123 | THTensor_(resizeAs)(gradInput, input); 124 | gradInput_data = THTensor_(data)(gradInput); 125 | 126 | target_data = THTensor_(data)(target); 127 | 128 | for(t = 0; t < nframe; t++) { 129 | real input_target = THInf; 130 | int min_idx = -1; 131 | for (m = 0; m < dim; m++) { 132 | long target_idx = (long)(target_data[m]-1); 133 | if (target_idx == -1) break; 134 | if (input_target > input_data[target_idx]) { 135 | min_idx = target_idx; 136 | input_target = input_data[target_idx]; 137 | } 138 | } 139 | real gradInput_target = 0; 140 | for(d = 0; d < dim; d++) { 141 | int isatarget = 0; 142 | for(m = 0; m < dim; m++) { 143 | long target_idx = (long)(target_data[m]-1); 144 | if (target_idx == -1) break; 145 | else if(d == target_idx) { 146 | isatarget = 1; 147 | break; 148 | } 149 | } 150 | if (isatarget) continue; 151 | 152 | real z = 1 - input_target + input_data[d]; 153 | if(z > 0) { 154 | gradInput_target -= g; 155 | gradInput_data[d] = g; 156 | } 157 | else 158 | gradInput_data[d] = 0; 159 | } 160 | gradInput_data[min_idx] = gradInput_target; 161 | 162 | input_data += dim; 163 | gradInput_data += dim; 164 | target_data += dim; 165 | } 166 | 167 | 168 | THTensor_(free)(input); 169 | THTensor_(free)(target); 170 | return 1; 171 | } 172 | 173 | static const struct luaL_Reg nn_(DistMarginCriterion__) [] = { 174 | {"DistMarginCriterion_updateOutput", nn_(DistMarginCriterion_updateOutput)}, 175 | {"DistMarginCriterion_updateGradInput", nn_(DistMarginCriterion_updateGradInput)}, 176 | {NULL, NULL} 177 | }; 178 | 179 | static void nn_(DistMarginCriterion_init)(lua_State *L) 180 | { 181 | luaT_pushmetatable(L, torch_Tensor); 182 | luaT_registeratname(L, nn_(DistMarginCriterion__), "nn"); 183 | lua_pop(L,1); 184 | } 185 | 186 | #endif 187 | -------------------------------------------------------------------------------- /generic/SpatialMaxSampling.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/SpatialMaxSampling.c" 3 | #else 4 | 5 | #ifndef MAX 6 | #define MAX(a,b) ( ((a)>(b)) ? (a) : (b) ) 7 | #endif 8 | #ifndef MIN 9 | #define MIN(a,b) ( ((a)<(b)) ? (a) : (b) ) 10 | #endif 11 | 12 | static int nn_(SpatialMaxSampling_updateOutput)(lua_State *L) 13 | { 14 | // get all params 15 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 16 | int owidth = luaT_getfieldcheckint(L, 1, "owidth"); 17 | int oheight = luaT_getfieldcheckint(L, 1, "oheight"); 18 | THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); 19 | THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); 20 | 21 | // check dims 22 | luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); 23 | 24 | // dims 25 | int ichannels = input->size[0]; 26 | int iheight = input->size[1]; 27 | int iwidth = input->size[2]; 28 | int ochannels = ichannels; 29 | float dW = (float)iwidth/owidth; 30 | float dH = (float)iheight/oheight; 31 | 32 | // get contiguous input 33 | input = THTensor_(newContiguous)(input); 34 | 35 | // resize output 36 | THTensor_(resize3d)(output, ochannels, oheight, owidth); 37 | 38 | // indices will contain i,j locations for each output point 39 | THTensor_(resize4d)(indices, 2, ochannels, oheight, owidth); 40 | 41 | // get raw pointers 42 | real *input_data = THTensor_(data)(input); 43 | real *output_data = THTensor_(data)(output); 44 | real *indices_data = THTensor_(data)(indices); 45 | 46 | // compute max pooling for each input slice 47 | long k; 48 | for (k = 0; k < ochannels; k++) { 49 | // pointers to slices 50 | real *input_p = input_data + k*iwidth*iheight; 51 | real *output_p = output_data + k*owidth*oheight; 52 | real *indy_p = indices_data + k*owidth*oheight; 53 | real *indx_p = indices_data + (k+ochannels)*owidth*oheight; 54 | 55 | // loop over output 56 | int i,j; 57 | for(i = 0; i < oheight; i++) { 58 | for(j = 0; j < owidth; j++) { 59 | // compute nearest offsets 60 | long ixs = (long)(j*dW); 61 | long iys = (long)(i*dH); 62 | long ixe = MAX(ixs+1, (long)((j+1)*dW)); 63 | long iye = MAX(iys+1, (long)((i+1)*dH)); 64 | 65 | // local pointers 66 | real *op = output_p + i*owidth + j; 67 | real *indxp = indx_p + i*owidth + j; 68 | real *indyp = indy_p + i*owidth + j; 69 | 70 | // compute local max: 71 | long maxindex = -1; 72 | real maxval = -THInf; 73 | long tcntr = 0; 74 | int x,y; 75 | for(y = iys; y < iye; y++) { 76 | for(x = ixs; x < ixe; x++) { 77 | real val = *(input_p + y*iwidth + x); 78 | if (val > maxval) { 79 | maxval = val; 80 | maxindex = tcntr; 81 | } 82 | tcntr++; 83 | } 84 | } 85 | 86 | // set output to local max 87 | *op = maxval; 88 | 89 | // store location of max (x,y) 90 | long kW = ixe-ixs; 91 | *indyp = (int)(maxindex / kW)+1; 92 | *indxp = (maxindex % kW) +1; 93 | } 94 | } 95 | } 96 | 97 | // cleanup 98 | THTensor_(free)(input); 99 | return 1; 100 | } 101 | 102 | static int nn_(SpatialMaxSampling_updateGradInput)(lua_State *L) 103 | { 104 | // get all params 105 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 106 | THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); 107 | THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); 108 | THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); 109 | int owidth = luaT_getfieldcheckint(L, 1, "owidth"); 110 | int oheight = luaT_getfieldcheckint(L, 1, "oheight"); 111 | 112 | // sizes 113 | int ichannels = input->size[0]; 114 | int iheight = input->size[1]; 115 | int iwidth = input->size[2]; 116 | int ochannels = ichannels; 117 | float dW = (float)iwidth/owidth; 118 | float dH = (float)iheight/oheight; 119 | 120 | // get contiguous gradOutput 121 | gradOutput = THTensor_(newContiguous)(gradOutput); 122 | 123 | // resize input 124 | THTensor_(resizeAs)(gradInput, input); 125 | THTensor_(zero)(gradInput); 126 | 127 | // get raw pointers 128 | real *gradInput_data = THTensor_(data)(gradInput); 129 | real *gradOutput_data = THTensor_(data)(gradOutput); 130 | real *indices_data = THTensor_(data)(indices); 131 | 132 | // backprop all 133 | long k; 134 | for (k = 0; k < ichannels; k++) { 135 | // pointers to slices 136 | real *gradOutput_p = gradOutput_data + k*owidth*oheight; 137 | real *gradInput_p = gradInput_data + k*iwidth*iheight; 138 | real *indy_p = indices_data + k*owidth*oheight; 139 | real *indx_p = indices_data + (k+ochannels)*owidth*oheight; 140 | 141 | // calculate max points 142 | int i,j; 143 | for(i = 0; i < oheight; i++) { 144 | for(j = 0; j < owidth; j++) { 145 | // compute nearest offsets 146 | long iys = (long)(i*dH); 147 | long ixs = (long)(j*dW); 148 | 149 | // retrieve position of max 150 | real *indyp = indy_p + i*owidth + j; 151 | real *indxp = indx_p + i*owidth + j; 152 | long maxi = (*indyp) - 1 + iys; 153 | long maxj = (*indxp) - 1 + ixs; 154 | 155 | // update gradient 156 | *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j); 157 | } 158 | } 159 | } 160 | 161 | // cleanup 162 | THTensor_(free)(gradOutput); 163 | 164 | return 1; 165 | } 166 | 167 | static const struct luaL_Reg nn_(SpatialMaxSampling__) [] = { 168 | {"SpatialMaxSampling_updateOutput", nn_(SpatialMaxSampling_updateOutput)}, 169 | {"SpatialMaxSampling_updateGradInput", nn_(SpatialMaxSampling_updateGradInput)}, 170 | {NULL, NULL} 171 | }; 172 | 173 | static void nn_(SpatialMaxSampling_init)(lua_State *L) 174 | { 175 | luaT_pushmetatable(L, torch_Tensor); 176 | luaT_registeratname(L, nn_(SpatialMaxSampling__), "nn"); 177 | lua_pop(L,1); 178 | } 179 | 180 | #endif 181 | -------------------------------------------------------------------------------- /SpatialPyramid.lua: -------------------------------------------------------------------------------- 1 | local SpatialPyramid, parent = torch.class('nn.SpatialPyramid', 'nn.Module') 2 | 3 | local help_desc = [[ 4 | Simplified (and more flexible regarding sizes) fovea: 5 | From a given image, generates a pyramid of scales, and process each scale 6 | with the given list of processors. 7 | The result of each module/scale is then 8 | upsampled to produce a homogenous list of 3D feature maps (a table of 3D tensors) 9 | grouping the different scales. 10 | 11 | There are two operating modes: focused [mostly training], and global [inference]. 12 | 13 | In global mode, 14 | the entire input is processed. 15 | 16 | In focused mode, the fovea is first focused on a particular (x,y) point. 17 | This function has two additional parameters, w and h, that represent the size 18 | of the OUTPUT of the processors. 19 | To focus the fovea, simply call fovea:focus(x,y,w,h) before doing a forward. 20 | A call to fovea:focus(nil) makes it unfocus (go back to global mode). 21 | 22 | If prescaled_input is true, then the input has to be a table of pre-downscaled 23 | 3D tensors. It does not work in focus mode. 24 | ]] 25 | 26 | function SpatialPyramid:__init(ratios, processors, kW, kH, dW, dH, xDimIn, yDimIn, 27 | xDimOut, yDimOut, prescaled_input) 28 | parent.__init(self) 29 | self.prescaled_input = prescaled_input or false 30 | assert(#ratios == #processors) 31 | 32 | self.ratios = ratios 33 | self.kH = kH 34 | self.kW = kW 35 | self.dH = dH 36 | self.dW = dW 37 | self.focused = false 38 | self.x = 0 39 | self.y = 0 40 | self.wFocus = 0 41 | self.hFocus = 0 42 | self.processors = processors 43 | 44 | local wPad = kW-dW 45 | local hPad = kH-dH 46 | local padLeft = math.floor(wPad/2) 47 | local padRight = math.ceil (wPad/2) 48 | local padTop = math.floor(hPad/2) 49 | local padBottom = math.ceil (hPad/2) 50 | 51 | -- focused 52 | self.focused_pipeline = nn.ConcatTable() 53 | for i = 1,#self.ratios do 54 | local seq = nn.Sequential() 55 | seq:add(nn.SpatialPadding(0,0,0,0, yDimIn, xDimIn)) 56 | seq:add(nn.SpatialReSamplingEx{rwidth=1.0/self.ratios[i], rheight=1.0/self.ratios[i], 57 | xDim = xDimIn, yDim = yDimIn, mode='average'}) 58 | seq:add(processors[i]) 59 | self.focused_pipeline:add(seq) 60 | end 61 | 62 | -- unfocused 63 | if prescaled_input then 64 | self.unfocused_pipeline = nn.ParallelTable() 65 | else 66 | self.unfocused_pipeline = nn.ConcatTable() 67 | end 68 | for i = 1,#self.ratios do 69 | local seq = nn.Sequential() 70 | if not prescaled_input then 71 | seq:add(nn.SpatialReSamplingEx{rwidth=1.0/self.ratios[i], rheight=1.0/self.ratios[i], 72 | xDim = xDimIn, yDim = yDimIn, mode='average'}) 73 | seq:add(nn.SpatialPadding(padLeft, padRight, padTop, padBottom, yDimIn, xDimIn)) 74 | end 75 | seq:add(processors[i]) 76 | seq:add(nn.SpatialReSamplingEx{rwidth=self.ratios[i], rheight=self.ratios[i], 77 | xDim=xDimOut, yDim=yDimOut, mode='simple'}) 78 | self.unfocused_pipeline:add(seq) 79 | end 80 | end 81 | 82 | function SpatialPyramid:focus(x, y, w, h) 83 | w = w or 1 84 | h = h or 1 85 | if x and y then 86 | self.x = x 87 | self.y = y 88 | self.focused = true 89 | self.winWidth = {} 90 | self.winHeight = {} 91 | for i = 1,#self.ratios do 92 | self.winWidth[i] = self.ratios[i] * ((w-1) * self.dW + self.kW) 93 | self.winHeight[i] = self.ratios[i] * ((h-1) * self.dH + self.kH) 94 | end 95 | else 96 | self.focused = false 97 | end 98 | end 99 | 100 | function SpatialPyramid:configureFocus(wImg, hImg) 101 | for i = 1,#self.ratios do 102 | local padder = self.focused_pipeline.modules[i].modules[1] 103 | padder.pad_l = -self.x + math.ceil (self.winWidth[i] /2) 104 | padder.pad_r = self.x + math.floor(self.winWidth[i] /2) - wImg 105 | padder.pad_t = -self.y + math.ceil (self.winHeight[i]/2) 106 | padder.pad_b = self.y + math.floor(self.winHeight[i]/2) - hImg 107 | end 108 | end 109 | 110 | function SpatialPyramid:checkSize(input) 111 | for i = 1,#self.ratios do 112 | if (math.fmod(input:size(2), self.ratios[i]) ~= 0) or 113 | (math.fmod(input:size(3), self.ratios[i]) ~= 0) then 114 | error('SpatialPyramid: input sizes must be multiple of ratios') 115 | end 116 | end 117 | end 118 | 119 | function SpatialPyramid:updateOutput(input) 120 | if not self.prescaled_input then 121 | self:checkSize(input) 122 | end 123 | if self.focused then 124 | self:configureFocus(input:size(3), input:size(2)) 125 | self.output = self.focused_pipeline:updateOutput(input) 126 | else 127 | self.output = self.unfocused_pipeline:updateOutput(input) 128 | end 129 | return self.output 130 | end 131 | 132 | function SpatialPyramid:updateGradInput(input, gradOutput) 133 | if self.focused then 134 | self.gradInput = self.focused_pipeline:updateGradInput(input, gradOutput) 135 | else 136 | self.gradInput = self.unfocused_pipeline:updateGradInput(input, gradOutput) 137 | end 138 | return self.gradInput 139 | end 140 | 141 | function SpatialPyramid:zeroGradParameters() 142 | self.focused_pipeline:zeroGradParameters() 143 | self.unfocused_pipeline:zeroGradParameters() 144 | end 145 | 146 | function SpatialPyramid:accGradParameters(input, gradOutput, scale) 147 | if self.focused then 148 | self.focused_pipeline:accGradParameters(input, gradOutput, scale) 149 | else 150 | self.unfocused_pipeline:accGradParameters(input, gradOutput, scale) 151 | end 152 | end 153 | 154 | function SpatialPyramid:updateParameters(learningRate) 155 | if self.focused then 156 | self.focused_pipeline:updateParameters(learningRate) 157 | else 158 | self.unfocused_pipeline:updateParameters(learningRate) 159 | end 160 | end 161 | 162 | function SpatialPyramid:type(type) 163 | parent.type(self, type) 164 | self.focused_pipeline:type(type) 165 | self.unfocused_pipeline:type(type) 166 | return self 167 | end 168 | 169 | function SpatialPyramid:parameters() 170 | if self.focused then 171 | return self.focused_pipeline:parameters() 172 | else 173 | return self.unfocused_pipeline:parameters() 174 | end 175 | end 176 | 177 | function SpatialPyramid:__tostring__() 178 | if self.focused then 179 | local dscr = tostring(self.focused_pipeline):gsub('\n', '\n | ') 180 | return 'SpatialPyramid (focused)\n' .. dscr 181 | else 182 | local dscr = tostring(self.unfocused_pipeline):gsub('\n', '\n | ') 183 | return 'SpatialPyramid (unfocused)\n' .. dscr 184 | end 185 | end 186 | -------------------------------------------------------------------------------- /generic/SpatialMatching.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/SpatialMatching.c" 3 | #else 4 | 5 | #define square(x) ((x)*(x)) 6 | #define max(x,y) (((x)>(y)) ? (x) : (y)) 7 | #define min(x,y) (((x)>(y)) ? (y) : (x)) 8 | 9 | static int nn_(SpatialMatching_updateOutput)(lua_State *L) 10 | { 11 | // get all params 12 | THTensor *input1 = luaT_checkudata(L, 2, torch_Tensor); 13 | THTensor *input2 = luaT_checkudata(L, 3, torch_Tensor); 14 | int maxw = luaT_getfieldcheckint(L, 1, "maxw"); 15 | int maxh = luaT_getfieldcheckint(L, 1, "maxh"); 16 | int full_output = luaT_getfieldcheckboolean(L, 1, "full_output"); 17 | THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); 18 | 19 | // dims 20 | int iwidth = input1->size[2]; 21 | int iheight = input1->size[1]; 22 | int ichannels = input1->size[0]; 23 | 24 | // make contiguous 25 | //input1 = THTensor_(newContiguous)(input1); 26 | //input2 = THTensor_(newContiguous)(input2); 27 | //output = THTensor_(newContiguous)(output); 28 | 29 | // zero output 30 | THTensor_(fill)(output, 1e30); 31 | 32 | // get strides 33 | long *i1s = input1->stride; 34 | long *i2s = input2->stride; 35 | long *os = output->stride; 36 | 37 | // get pointers 38 | real *input1_p = THTensor_(data)(input1); 39 | real *input2_p = THTensor_(data)(input2); 40 | real *output_p = THTensor_(data)(output); 41 | 42 | // compute output 43 | int x1,y1,x2,y2,k; 44 | real dist; 45 | if (full_output) { 46 | // get halves of window size 47 | int halfh1 = ceil((real)maxh/2)-1; 48 | int halfh2 = floor((real)maxh/2)+1; 49 | int halfw1 = ceil((real)maxw/2)-1; 50 | int halfw2 = floor((real)maxw/2)+1; 51 | 52 | long dy, dx; 53 | 54 | #pragma omp parallel for private(x1,x2,y2,k,dist,dy,dx) 55 | for (y1 = 0; y1 < iheight; y1++) { 56 | for (x1 = 0; x1 < iwidth; x1++) { 57 | for (y2 = max(0,y1-halfh1); y2 < min(iheight,y1+halfh2); y2++) { 58 | for (x2 = max(0,(x1-halfw1)); x2 < min(iwidth,x1+halfw2); x2++) { 59 | dist = 0; 60 | for (k = 0; k < ichannels; k++) { 61 | dist += square(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]); 62 | } 63 | dy = y2-y1 + halfh1; 64 | dx = x2-x1 + halfw1; 65 | output_p[dy*os[2] + dx*os[3] + y1*os[0] + x1*os[1]] = dist; 66 | } 67 | } 68 | } 69 | } 70 | /* 71 | real *input1_p_it_start = input1_p, *input1_p_it_end = input1_p+ichannels*i1s[0]; 72 | real *input1_p_it, *input2_p_it; 73 | for (y1 = 0; y1 < iheight; y1++) { 74 | for (x1 = 0; x1 < iwidth; x1++, ++input1_p_it_start, ++input1_p_it_end) { 75 | for (y2 = max(0,y1-halfh1); y2 < min(iheight,y1+halfh2); y2++) { 76 | for (x2 = max(0,(x1-halfw1)); x2 < min(iwidth,x1+halfw2); x2++) { 77 | dist = 0; 78 | for (input1_p_it = input1_p_it_start, input2_p_it=input2_p+y2*i2s[1]+x2*i2s[2]; 79 | input1_p_it != input1_p_it_end; 80 | input1_p_it += i1s[0], input2_p_it += i2s[0]) { 81 | dist += square(*input1_p_it - *input2_p_it); 82 | } 83 | dy = y2-y1 + halfh1; 84 | dx = x2-x1 + halfw1; 85 | output_p[dy*os[0] + dx*os[1] + y1*os[2] + x1*os[3]] = dist; 86 | } 87 | } 88 | } 89 | } 90 | */ 91 | } else { 92 | #pragma omp parallel for private(y1,x1,x2,y2,k,dist) 93 | for (y1 = 0; y1 < iheight; y1++) { 94 | for (x1 = 0; x1 < iwidth; x1++) { 95 | for (y2 = y1; y2 < y1+maxh; y2++) { 96 | for (x2 = x1; x2 < x1+maxw; x2++) { 97 | dist = 0; 98 | for (k = 0; k < ichannels; k++) { 99 | dist += square(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]); 100 | } 101 | output_p[(y2-y1)*os[2] + (x2-x1)*os[3] + y1*os[0] + x1*os[1]] = dist; 102 | } 103 | } 104 | } 105 | } 106 | } 107 | 108 | 109 | // done 110 | return 1; 111 | } 112 | 113 | static int nn_(SpatialMatching_updateGradInput)(lua_State *L) 114 | { 115 | // get all params 116 | THTensor *input1 = luaT_checkudata(L, 2, torch_Tensor); 117 | THTensor *input2 = luaT_checkudata(L, 3, torch_Tensor); 118 | THTensor *gradInput1 = luaT_getfieldcheckudata(L, 1, "gradInput1", torch_Tensor); 119 | THTensor *gradInput2 = luaT_getfieldcheckudata(L, 1, "gradInput2", torch_Tensor); 120 | THTensor *gradOutput = luaT_checkudata(L, 4, torch_Tensor); 121 | int full_output = luaT_getfieldcheckboolean(L, 1, "full_output"); 122 | int maxw = luaT_getfieldcheckint(L, 1, "maxw"); 123 | int maxh = luaT_getfieldcheckint(L, 1, "maxh"); 124 | 125 | // dims 126 | int iwidth = input1->size[2]; 127 | int iheight = input1->size[1]; 128 | int ichannels = input1->size[0]; 129 | 130 | // get strides 131 | long *i1s = input1->stride; 132 | long *i2s = input2->stride; 133 | long *gi1s = gradInput1->stride; 134 | long *gi2s = gradInput2->stride; 135 | long *gos = gradOutput->stride; 136 | 137 | // get pointers 138 | real *input1_p = THTensor_(data)(input1); 139 | real *input2_p = THTensor_(data)(input2); 140 | real *gradInput1_p = THTensor_(data)(gradInput1); 141 | real *gradInput2_p = THTensor_(data)(gradInput2); 142 | real *gradOutput_p = THTensor_(data)(gradOutput); 143 | 144 | // compute gradients 145 | int x1, y1, x2, y2, k; 146 | real partial_d; 147 | if (full_output) { 148 | // get halves of window size 149 | int halfh1 = ceil((real)maxh/2)-1; 150 | int halfh2 = floor((real)maxh/2)+1; 151 | int halfw1 = ceil((real)maxw/2)-1; 152 | int halfw2 = floor((real)maxw/2)+1; 153 | 154 | long dy, dx; 155 | //#pragma omp parallel for private(x1,x2,y2,k,dy,dx,partial_d) NO! gradInput has += 156 | for (y1 = 0; y1 < iheight; y1++) { 157 | for (x1 = 0; x1 < iwidth; x1++) { 158 | for (y2 = max(0,y1-halfh1); y2 < min(iheight,y1+halfh2); y2++) { 159 | for (x2 = max(0,(x1-halfw1)); x2 < min(iwidth,x1+halfw2); x2++) { 160 | dy = y2-y1 + halfh1; 161 | dx = x2-x1 + halfw1; 162 | for (k=0; k(b)) ? (a) : (b) ) 7 | #endif 8 | #ifndef MIN 9 | #define MIN(a,b) ( ((a)<(b)) ? (a) : (b) ) 10 | #endif 11 | 12 | static int nn_(SpatialReSampling_updateOutput)(lua_State *L) 13 | { 14 | // get all params 15 | THTensor *input_ = luaT_checkudata(L, 2, torch_Tensor); 16 | int owidth = luaT_getfieldcheckint(L, 1, "owidth"); 17 | int oheight = luaT_getfieldcheckint(L, 1, "oheight"); 18 | THTensor *output_ = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); 19 | 20 | // check dims 21 | luaL_argcheck(L, (input_->nDimension == 3) || (input_->nDimension == 4), 2, "3D or 4D tensor expected"); 22 | 23 | // dims 24 | int channelDim = 0; 25 | int batchSize = 1; 26 | if (input_->nDimension == 4){ 27 | channelDim = 1; 28 | batchSize = input_->size[0]; 29 | } 30 | 31 | int iwidth = input_->size[channelDim + 2]; 32 | int iheight = input_->size[channelDim + 1]; 33 | int ochannels = input_->size[channelDim]; 34 | 35 | // resize output 36 | if (input_->nDimension == 3) 37 | THTensor_(resize3d)(output_, ochannels, oheight, owidth); 38 | else 39 | THTensor_(resize4d)(output_, batchSize, ochannels, oheight, owidth); 40 | 41 | // select example 42 | THTensor *output = THTensor_(newWithTensor)(output_); 43 | THTensor *input = THTensor_(newWithTensor)(input_); 44 | 45 | // select planes 46 | THTensor *outputPlane = THTensor_(new)(); 47 | THTensor *inputPlane = THTensor_(new)(); 48 | 49 | // mapping ratios 50 | float wratio = (float)(iwidth-1) / (owidth-1); 51 | float hratio = (float)(iheight-1) / (oheight-1); 52 | 53 | int b; 54 | for (b=0; bnDimension == 4) 56 | { 57 | THTensor_(select)(input, input_, 0, b); 58 | THTensor_(select)(output, output_, 0, b); 59 | } 60 | // resample each plane 61 | int k; 62 | for (k=0; knDimension == 4){ 123 | channelDim = 1; 124 | batchSize = input_->size[0]; 125 | } 126 | 127 | int iwidth = input_->size[channelDim+2]; 128 | int iheight = input_->size[channelDim+1]; 129 | int ichannels = input_->size[channelDim]; 130 | int owidth = gradOutput_->size[channelDim+2]; 131 | int oheight = gradOutput_->size[channelDim+1]; 132 | int ochannels = gradOutput_->size[channelDim]; 133 | 134 | // resize gradInput 135 | if (input_->nDimension == 3) 136 | THTensor_(resize3d)(gradInput_, ichannels, iheight, iwidth); 137 | else 138 | THTensor_(resize4d)(gradInput_, batchSize, ichannels, iheight, iwidth); 139 | THTensor_(zero)(gradInput_); 140 | 141 | // select example 142 | THTensor *gradOutput = THTensor_(newWithTensor)(gradOutput_); 143 | THTensor *gradInput = THTensor_(newWithTensor)(gradInput_); 144 | 145 | // select planes 146 | THTensor *gradOutputPlane = THTensor_(new)(); 147 | THTensor *gradInputPlane = THTensor_(new)(); 148 | 149 | // mapping ratios 150 | float wratio = (float)(iwidth-1) / (owidth-1); 151 | float hratio = (float)(iheight-1) / (oheight-1); 152 | 153 | int b; 154 | for (b=0; bnDimension == 4) 156 | { 157 | THTensor_(select)(gradInput, gradInput_, 0, b); 158 | THTensor_(select)(gradOutput, gradOutput_, 0, b); 159 | } 160 | // compute gradients for each plane 161 | int k; 162 | for (k=0; ksize[2]; 21 | int iheight = input->size[1]; 22 | int ichannels = input->size[0]; 23 | int owidth = iwidth; 24 | int oheight = iheight; 25 | int ochannels = connex / 2; 26 | 27 | // norm ? 28 | double normer = (norm == 1) ? 1/sqrt(ichannels) : 1; 29 | 30 | // zero output 31 | THTensor_(zero)(output); 32 | 33 | // Euclidean distance 34 | if (dist == 0) { 35 | // Sum[ (Xi - Xi+1)^2 ] 36 | int x,y,k; 37 | for (k=0; ksize[0], input->size[1], input->size[2]); 65 | THTensor_(copy)(inputb, input); 66 | THTensor_(add)(inputb, inputb, 1e-12); 67 | 68 | // Sum[ (Xi * Xi+1) ] 69 | int x,y,k; 70 | for (y=0; ysize[2]; 125 | //int iheight = input->size[1]; 126 | int ichannels = input->size[0]; 127 | int owidth = gradOutput->size[2]; 128 | int oheight = gradOutput->size[1]; 129 | //int ochannels = gradOutput->size[0]; 130 | 131 | // norm ? 132 | double normer = (norm == 1) ? 1/sqrt(ichannels)/sqrt(ichannels) : 1; 133 | 134 | // resize gradInput 135 | THTensor_(zero)(gradInput); 136 | 137 | // compute derivatives, and backpropagate output error to input 138 | if (dist == 0) { 139 | int x,y,k; 140 | for (k=0; k WARNING: this module has been deprecated,') 31 | print(' please use SpatialContrastiveNormalization instead') 32 | 33 | -- get args 34 | local args, nf, ker, thres 35 | = xlua.unpack( 36 | {...}, 37 | 'nn.SpatialNormalization', 38 | help_desc .. '\n' .. help_example, 39 | {arg='nInputPlane', type='number', help='number of input maps', req=true}, 40 | {arg='kernel', type='torch.Tensor | table', help='a KxK filtering kernel or two {1xK, Kx1} 1D kernels'}, 41 | {arg='threshold', type='number', help='threshold, for division [default = adaptive]'} 42 | ) 43 | 44 | -- check args 45 | if not ker then 46 | xerror('please provide kernel(s)', 'nn.SpatialNormalization', args.usage) 47 | end 48 | self.kernel = ker 49 | local ker2 50 | if type(ker) == 'table' then 51 | ker2 = ker[2] 52 | ker = ker[1] 53 | end 54 | self.nfeatures = nf 55 | self.fixedThres = thres 56 | 57 | -- padding values 58 | self.padW = math.floor(ker:size(2)/2) 59 | self.padH = math.floor(ker:size(1)/2) 60 | self.kerWisPair = 0 61 | self.kerHisPair = 0 62 | 63 | -- padding values for 2nd kernel 64 | if ker2 then 65 | self.pad2W = math.floor(ker2:size(2)/2) 66 | self.pad2H = math.floor(ker2:size(1)/2) 67 | else 68 | self.pad2W = 0 69 | self.pad2H = 0 70 | end 71 | self.ker2WisPair = 0 72 | self.ker2HisPair = 0 73 | 74 | -- normalize kernel 75 | ker:div(ker:sum()) 76 | if ker2 then ker2:div(ker2:sum()) end 77 | 78 | -- manage the case where ker is even size (for padding issue) 79 | if (ker:size(2)/2 == math.floor(ker:size(2)/2)) then 80 | print ('Warning, kernel width is even -> not symetric padding') 81 | self.kerWisPair = 1 82 | end 83 | if (ker:size(1)/2 == math.floor(ker:size(1)/2)) then 84 | print ('Warning, kernel height is even -> not symetric padding') 85 | self.kerHisPair = 1 86 | end 87 | if (ker2 and ker2:size(2)/2 == math.floor(ker2:size(2)/2)) then 88 | print ('Warning, kernel width is even -> not symetric padding') 89 | self.ker2WisPair = 1 90 | end 91 | if (ker2 and ker2:size(1)/2 == math.floor(ker2:size(1)/2)) then 92 | print ('Warning, kernel height is even -> not symetric padding') 93 | self.ker2HisPair = 1 94 | end 95 | 96 | -- create convolution for computing the mean 97 | local convo1 = nn.Sequential() 98 | convo1:add(nn.SpatialPadding(self.padW,self.padW-self.kerWisPair, 99 | self.padH,self.padH-self.kerHisPair)) 100 | local ctable = nn.tables.oneToOne(nf) 101 | convo1:add(nn.SpatialConvolutionMap(ctable,ker:size(2),ker:size(1))) 102 | convo1:add(nn.Sum(1)) 103 | convo1:add(nn.Replicate(nf)) 104 | -- set kernel 105 | local fb = convo1.modules[2].weight 106 | for i=1,fb:size(1) do fb[i]:copy(ker) end 107 | -- set bias to 0 108 | convo1.modules[2].bias:zero() 109 | 110 | -- 2nd ker ? 111 | if ker2 then 112 | local convo2 = nn.Sequential() 113 | convo2:add(nn.SpatialPadding(self.pad2W,self.pad2W-self.ker2WisPair, 114 | self.pad2H,self.pad2H-self.ker2HisPair)) 115 | local ctable = nn.tables.oneToOne(nf) 116 | convo2:add(nn.SpatialConvolutionMap(ctable,ker2:size(2),ker2:size(1))) 117 | convo2:add(nn.Sum(1)) 118 | convo2:add(nn.Replicate(nf)) 119 | -- set kernel 120 | local fb = convo2.modules[2].weight 121 | for i=1,fb:size(1) do fb[i]:copy(ker2) end 122 | -- set bias to 0 123 | convo2.modules[2].bias:zero() 124 | -- convo is a double convo now: 125 | local convopack = nn.Sequential() 126 | convopack:add(convo1) 127 | convopack:add(convo2) 128 | self.convo = convopack 129 | else 130 | self.convo = convo1 131 | end 132 | 133 | -- create convolution for computing the meanstd 134 | local convostd1 = nn.Sequential() 135 | convostd1:add(nn.SpatialPadding(self.padW,self.padW-self.kerWisPair, 136 | self.padH,self.padH-self.kerHisPair)) 137 | convostd1:add(nn.SpatialConvolutionMap(ctable,ker:size(2),ker:size(1))) 138 | convostd1:add(nn.Sum(1)) 139 | convostd1:add(nn.Replicate(nf)) 140 | -- set kernel 141 | local fb = convostd1.modules[2].weight 142 | for i=1,fb:size(1) do fb[i]:copy(ker) end 143 | -- set bias to 0 144 | convostd1.modules[2].bias:zero() 145 | 146 | -- 2nd ker ? 147 | if ker2 then 148 | local convostd2 = nn.Sequential() 149 | convostd2:add(nn.SpatialPadding(self.pad2W,self.pad2W-self.ker2WisPair, 150 | self.pad2H,self.pad2H-self.ker2HisPair)) 151 | convostd2:add(nn.SpatialConvolutionMap(ctable,ker2:size(2),ker2:size(1))) 152 | convostd2:add(nn.Sum(1)) 153 | convostd2:add(nn.Replicate(nf)) 154 | -- set kernel 155 | local fb = convostd2.modules[2].weight 156 | for i=1,fb:size(1) do fb[i]:copy(ker2) end 157 | -- set bias to 0 158 | convostd2.modules[2].bias:zero() 159 | -- convo is a double convo now: 160 | local convopack = nn.Sequential() 161 | convopack:add(convostd1) 162 | convopack:add(convostd2) 163 | self.convostd = convopack 164 | else 165 | self.convostd = convostd1 166 | end 167 | 168 | -- other operation 169 | self.squareMod = nn.Square() 170 | self.sqrtMod = nn.Sqrt() 171 | self.subtractMod = nn.CSubTable() 172 | self.meanDiviseMod = nn.CDivTable() 173 | self.stdDiviseMod = nn.CDivTable() 174 | self.diviseMod = nn.CDivTable() 175 | self.thresMod = nn.Threshold() 176 | -- some tempo states 177 | self.coef = torch.Tensor(1,1) 178 | self.inConvo = torch.Tensor() 179 | self.inMean = torch.Tensor() 180 | self.inputZeroMean = torch.Tensor() 181 | self.inputZeroMeanSq = torch.Tensor() 182 | self.inConvoVar = torch.Tensor() 183 | self.inVar = torch.Tensor() 184 | self.inStdDev = torch.Tensor() 185 | self.thstd = torch.Tensor() 186 | end 187 | 188 | function SpatialNormalization:updateOutput(input) 189 | -- auto switch to 3-channel 190 | self.input = input 191 | if (input:nDimension() == 2) then 192 | self.input = input:clone():resize(1,input:size(1),input:size(2)) 193 | end 194 | 195 | -- recompute coef only if necessary 196 | if (self.input:size(3) ~= self.coef:size(2)) or (self.input:size(2) ~= self.coef:size(1)) then 197 | local intVals = self.input.new(self.nfeatures,self.input:size(2),self.input:size(3)):fill(1) 198 | self.coef = self.convo:updateOutput(intVals) 199 | self.coef = self.coef:clone() 200 | end 201 | 202 | -- compute mean 203 | self.inConvo = self.convo:updateOutput(self.input) 204 | self.inMean = self.meanDiviseMod:updateOutput{self.inConvo,self.coef} 205 | self.inputZeroMean = self.subtractMod:updateOutput{self.input,self.inMean} 206 | 207 | -- compute std dev 208 | self.inputZeroMeanSq = self.squareMod:updateOutput(self.inputZeroMean) 209 | self.inConvoVar = self.convostd:updateOutput(self.inputZeroMeanSq) 210 | self.inStdDevNotUnit = self.sqrtMod:updateOutput(self.inConvoVar) 211 | self.inStdDev = self.stdDiviseMod:updateOutput({self.inStdDevNotUnit,self.coef}) 212 | local meanstd = self.inStdDev:mean() 213 | self.thresMod.threshold = self.fixedThres or math.max(meanstd,1e-3) 214 | self.thresMod.val = self.fixedThres or math.max(meanstd,1e-3) 215 | self.stdDev = self.thresMod:updateOutput(self.inStdDev) 216 | 217 | --remove std dev 218 | self.diviseMod:updateOutput{self.inputZeroMean,self.stdDev} 219 | self.output = self.diviseMod.output 220 | return self.output 221 | end 222 | 223 | function SpatialNormalization:updateGradInput(input, gradOutput) 224 | -- auto switch to 3-channel 225 | self.input = input 226 | if (input:nDimension() == 2) then 227 | self.input = input:clone():resize(1,input:size(1),input:size(2)) 228 | end 229 | self.gradInput:resizeAs(self.input):zero() 230 | 231 | -- backprop all 232 | local gradDiv = self.diviseMod:updateGradInput({self.inputZeroMean,self.stdDev},gradOutput) 233 | local gradThres = gradDiv[2] 234 | local gradZeroMean = gradDiv[1] 235 | local gradinStdDev = self.thresMod:updateGradInput(self.inStdDev,gradThres) 236 | local gradstdDiv = self.stdDiviseMod:updateGradInput({self.inStdDevNotUnit,self.coef},gradinStdDev) 237 | local gradinStdDevNotUnit = gradstdDiv[1] 238 | local gradinConvoVar = self.sqrtMod:updateGradInput(self.inConvoVar,gradinStdDevNotUnit) 239 | local gradinputZeroMeanSq = self.convostd:updateGradInput(self.inputZeroMeanSq,gradinConvoVar) 240 | gradZeroMean:add(self.squareMod:updateGradInput(self.inputZeroMean,gradinputZeroMeanSq)) 241 | local gradDiff = self.subtractMod:updateGradInput({self.input,self.inMean},gradZeroMean) 242 | local gradinMean = gradDiff[2] 243 | local gradinConvoNotUnit = self.meanDiviseMod:updateGradInput({self.inConvo,self.coef},gradinMean) 244 | local gradinConvo = gradinConvoNotUnit[1] 245 | -- first part of the gradInput 246 | self.gradInput:add(gradDiff[1]) 247 | -- second part of the gradInput 248 | self.gradInput:add(self.convo:updateGradInput(self.input,gradinConvo)) 249 | return self.gradInput 250 | end 251 | 252 | function SpatialNormalization:type(type) 253 | parent.type(self,type) 254 | self.convo:type(type) 255 | self.meanDiviseMod:type(type) 256 | self.subtractMod:type(type) 257 | self.squareMod:type(type) 258 | self.convostd:type(type) 259 | self.sqrtMod:type(type) 260 | self.stdDiviseMod:type(type) 261 | self.thresMod:type(type) 262 | self.diviseMod:type(type) 263 | return self 264 | end 265 | -------------------------------------------------------------------------------- /DataSet.lua: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- 2 | -- DataSet: a class to handle standard datasets. 3 | -- 4 | -- Authors: Clement Farabet, Benoit Corda 5 | -------------------------------------------------------------------------------- 6 | 7 | local lDataSet = torch.class('nn.DataSet') 8 | 9 | function lDataSet:__init(...) 10 | xlua.require('image',true) 11 | self.nbSamples = 0 12 | if select('#',...) > 0 then 13 | self:load(...) 14 | end 15 | end 16 | 17 | function lDataSet:size() 18 | return self.nbSamples 19 | end 20 | 21 | function lDataSet:__tostring__() 22 | str = 'DataSet:\n' 23 | if self.nbSamples then 24 | str = str .. ' + nb samples : '..self.nbSamples 25 | else 26 | str = str .. ' + empty set...' 27 | end 28 | return str 29 | end 30 | 31 | function lDataSet:load(...) 32 | -- parse args 33 | local args, dataSetFolder, nbSamplesRequired, cacheFile, channels, 34 | sampleSize,padding 35 | = xlua.unpack( 36 | {...}, 37 | 'DataSet.load', nil, 38 | {arg='dataSetFolder', type='string', help='path to dataset', req=true}, 39 | {arg='nbSamplesRequired', type='number', help='number of patches to load', default='all'}, 40 | {arg='cacheFile', type='string', help='path to file to cache files'}, 41 | {arg='channels', type='number', help='nb of channels', default=1}, 42 | {arg='sampleSize', type='table', help='resize all sample: {c,w,h}'}, 43 | {arg='padding', type='boolean', help='center sample in w,h dont rescale'} 44 | ) 45 | self.cacheFileName = cacheFile or self.cacheFileName 46 | 47 | -- Clear current dataset 48 | self:emptySet() 49 | 50 | -- Then try to find if cache file exists 51 | -- the base name of this file can be provided by useCacheFile() 52 | -- and the suffixe is the nb of samples needed, 'all' if not specified 53 | local fileName 54 | local datasetLoadedFromFile = false 55 | if (self.cacheFileName ~= nil) then 56 | fileName = self.cacheFileName .. '-' .. nbSamplesRequired 57 | if sys.filep(fileName) then 58 | -- File found 59 | print(' Loading samples from cached file ' .. fileName) 60 | f = torch.DiskFile(fileName, 'rw') 61 | f:binary() 62 | self:read(f) 63 | f.close(f) 64 | datasetLoadedFromFile = true 65 | end 66 | end 67 | 68 | -- If dataset couldn't be loaded from cache, load it 69 | if (datasetLoadedFromFile == false) then 70 | self:append{dataSetFolder=dataSetFolder, channels=channels, 71 | nbSamplesRequired=nbSamplesRequired, 72 | sampleSize=sampleSize} 73 | -- if cache name given, create it now 74 | if (fileName ~= nil) then 75 | print(' Dumping dataset to cache file ' .. fileName .. ' for fast retrieval') 76 | f = torch.DiskFile(fileName, 'rw') 77 | f:binary() 78 | self:write(f) 79 | f.close(f) 80 | end 81 | end 82 | end 83 | 84 | function lDataSet:emptySet(dataSetFolder) 85 | for i = 1,table.getn(self) do 86 | self[i] = nil 87 | end 88 | self.nbSamples = 0 89 | end 90 | 91 | function lDataSet:apply(toapply) 92 | print(' Applying function to dataset') 93 | for i=1,self.nbSamples do 94 | xlua.progress(i, self.nbSamples) 95 | self[i][1] = toapply(self[i][1]) 96 | end 97 | end 98 | 99 | function lDataSet:cropAndResize(side) 100 | for i=1,self.nbSamples do 101 | local newSample = torch.Tensor(1, side, side) 102 | local initSide = math.min(self[i][1]:size()[1], self[i][1]:size()[2]) 103 | local x1 = math.floor((self[i][1]:size(3) - initSide) / 2) 104 | local y1 = math.floor((self[i][1]:size(2) - initSide) / 2) 105 | local x2 = x1 + initSide 106 | local y2 = y1 + initSide 107 | image.crop(newSample,self[i][1],x1,y1,x2,y2) 108 | self[i][1] = newSample 109 | end 110 | end 111 | 112 | function lDataSet:add(args) 113 | local input = args.input 114 | local output = args.output 115 | self.nbSamples = self.nbSamples + 1 116 | self[self.nbSamples] = {input, output} 117 | end 118 | 119 | function lDataSet:append(...) 120 | -- parse args 121 | local args, dataSetFolder, channels, nbSamplesRequired, useLabelPiped, 122 | useDirAsLabel, nbLabels, sampleSize, padding 123 | = xlua.unpack( 124 | {...}, 125 | 'DataSet:append', 'append a folder to the dataset object', 126 | {arg='dataSetFolder', type='string', help='path to dataset', req=true}, 127 | {arg='channels', type='number', help='number of channels for the image to load', default=3}, 128 | {arg='nbSamplesRequired', type='number', help='max number of samples to load'}, 129 | {arg='useLabelPiped', type='boolean', help='flag to use the filename as output value',default=false}, 130 | {arg='useDirAsLabel', type='boolean', help='flag to use the directory as label',default=false}, 131 | {arg='nbLabels', type='number', help='how many classes (goes with useDirAsLabel)', default=1}, 132 | {arg='sampleSize', type='table', help='resize all sample: {c,w,h}'}, 133 | {arg='padding',type='boolean',help='do we padd all the inputs in w,h'} 134 | ) 135 | -- parse args 136 | local files = sys.dir(dataSetFolder) 137 | 138 | print(' Loading samples from ' .. args.dataSetFolder .. '/') 139 | 140 | -- nb of samples to load: 141 | local toLoad = table.getn(files) 142 | if (nbSamplesRequired ~= nil and nbSamplesRequired ~= 'all') then 143 | toLoad = math.min(toLoad, nbSamplesRequired) 144 | end 145 | local loaded = 0 146 | 147 | for k,file in pairs(files) do 148 | local input, inputs, rawOutput 149 | 150 | -- disp progress 151 | xlua.progress(k, toLoad) 152 | 153 | if (string.find(file,'.png')) then 154 | -- load the PNG into a new Tensor 155 | pathToPng = sys.concat(dataSetFolder, file) 156 | input = image.loadPNG(pathToPng,channels) 157 | 158 | -- parse the file name and set the ouput from it 159 | rawOutput = sys.split(string.gsub(file, ".png", ""),'|') 160 | 161 | elseif (string.find(file,'.p[pgn]m')) then 162 | -- load the PPM into a new Tensor 163 | pathToPpm = sys.concat(dataSetFolder, file) 164 | input = image.loadPPM(pathToPpm,channels) 165 | 166 | -- parse the file name and set the ouput from it 167 | rawOutput = sys.split(string.gsub(file, ".p[pgn]m", ""),'|') 168 | 169 | elseif (string.find(file,'.jpg')) then 170 | -- load the JPG into a new Tensor 171 | pathToPpm = sys.concat(dataSetFolder, file) 172 | input = image.load(pathToPpm,channels) 173 | 174 | -- parse the file name and set the ouput from it 175 | rawOutput = sys.split(string.gsub(file, ".jpg", ""),'|') 176 | end 177 | 178 | -- if image loaded then add into the set 179 | if (input and rawOutput) then 180 | table.remove(rawOutput,1) --remove file ID 181 | 182 | -- put input in 3D tensor 183 | input:resize(channels, input:size(2), input:size(3)) 184 | 185 | -- rescale ? 186 | if sampleSize then 187 | inputs = torch.Tensor(channels, sampleSize[2], sampleSize[3]) 188 | if padding then 189 | offw = math.floor((sampleSize[2] - input[2])*0.5) 190 | offh = math.floor((sampleSize[3] - input[3])*0.5) 191 | if offw >= 0 and offh >= 0 then 192 | inputs:narrow(2,offw,input[2]):narrow(3,offh,input[3]):copy(input) 193 | else 194 | print('reverse crop not implemented w,h must be larger than all data points') 195 | end 196 | else 197 | image.scale(input, inputs, 'bilinear') 198 | end 199 | else 200 | inputs = input 201 | end 202 | 203 | -- and generate output 204 | local output = torch.Tensor(table.getn(rawOutput), 1) 205 | for i,v in ipairs(rawOutput) do 206 | output[i][1]=v 207 | end 208 | 209 | -- add input/output in the set 210 | self.nbSamples = self.nbSamples + 1 211 | self[self.nbSamples] = {inputs, output} 212 | 213 | loaded = loaded + 1 214 | if (loaded == toLoad) then 215 | break 216 | end 217 | end 218 | 219 | -- some cleanup, for memory 220 | collectgarbage() 221 | end 222 | end 223 | 224 | function lDataSet:appendDataSet(dataset) 225 | print(" Merging dataset of size = "..dataset:size().. 226 | " into dataset of size = "..self:size()) 227 | for i = 1,dataset:size() do 228 | self.nbSamples = self.nbSamples + 1 229 | self[self.nbSamples] = {} 230 | self[self.nbSamples][1] = torch.Tensor(dataset[i][1]):copy(dataset[i][1]) 231 | if (dataset[i][2] ~= nil) then 232 | self[self.nbSamples][2] = torch.Tensor(dataset[i][2]):copy(dataset[i][2]) 233 | end 234 | end 235 | end 236 | 237 | function lDataSet:popSubset(args) 238 | -- parse args 239 | local nElement = args.nElement 240 | local ratio = args.ratio or 0.1 241 | local subset = args.outputSet or nn.DataSet() 242 | 243 | -- get nb of samples to pop 244 | local start_index 245 | if (nElement ~= nil) then 246 | start_index = self:size() - nElement + 1 247 | else 248 | start_index = math.floor((1-ratio)*self:size()) + 1 249 | end 250 | 251 | -- info 252 | print(' Popping ' .. self:size() - start_index + 1 .. ' samples dataset') 253 | 254 | -- extract samples 255 | for i = self:size(), start_index, -1 do 256 | subset.nbSamples = subset.nbSamples + 1 257 | subset[subset.nbSamples] = {} 258 | subset[subset.nbSamples][1] = torch.Tensor(self[i][1]):copy(self[i][1]) 259 | subset[subset.nbSamples][2] = torch.Tensor(self[i][2]):copy(self[i][2]) 260 | self[i] = nil 261 | self.nbSamples = self.nbSamples - 1 262 | end 263 | 264 | -- return network 265 | return subset 266 | end 267 | 268 | function lDataSet:resize(w,h) 269 | self.resized = true 270 | xlua.error('not implemented yet', 'DataSet') 271 | end 272 | 273 | function lDataSet:shuffle() 274 | if (self.nbSamples == 0) then 275 | print('Warning, trying to shuffle empty Dataset, no effect...') 276 | return 277 | end 278 | local n = self.nbSamples 279 | 280 | while n > 2 do 281 | local k = math.random(n) 282 | -- swap elements 283 | self[n], self[k] = self[k], self[n] 284 | n = n - 1 285 | end 286 | end 287 | 288 | function lDataSet:display(nSamples,legend) 289 | local samplesToShow = {} 290 | for i = 1,nSamples do 291 | table.insert(samplesToShow, self[i][1]) 292 | end 293 | image.display{image=samplesToShow,gui=false,legend=legend} 294 | end 295 | 296 | function lDataSet:useCacheFile(fileName) 297 | self.cacheFileName = fileName 298 | end 299 | 300 | function lDataSet:write(file) 301 | file:writeBool(self.resized) 302 | file:writeInt(self.nbSamples) 303 | -- write all the samples 304 | for i = 1,self.nbSamples do 305 | file:writeObject(self[i]) 306 | end 307 | end 308 | 309 | function lDataSet:read(file) 310 | self.resized = file:readBool() 311 | self.nbSamples = file:readInt() 312 | -- read all the samples 313 | for i = 1,self.nbSamples do 314 | self[i] = file:readObject() 315 | end 316 | end 317 | -------------------------------------------------------------------------------- /generic/SpatialReSamplingEx.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/SpatialReSamplingEx.c" 3 | #else 4 | #include 5 | 6 | #ifndef MAX 7 | #define MAX(a,b) ( ((a)>(b)) ? (a) : (b) ) 8 | #endif 9 | #ifndef MIN 10 | #define MIN(a,b) ( ((a)<(b)) ? (a) : (b) ) 11 | #endif 12 | 13 | static int nn_(SpatialReSamplingEx_updateOutput)(lua_State *L) 14 | { 15 | // get all params 16 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 17 | THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); 18 | int oheight = luaT_getfieldcheckint(L, 1, "oheightCurrent"); 19 | int owidth = luaT_getfieldcheckint(L, 1, "owidthCurrent"); 20 | int mode = luaT_getfieldcheckint(L, 1, "mode_c"); 21 | 22 | // dims 23 | int iwidth = input->size[2]; 24 | int iheight = input->size[1]; 25 | int channels1 = input->size[0]; 26 | int channels2 = input->size[3]; 27 | 28 | // get strides 29 | long *is = input->stride; 30 | long *os = output->stride; 31 | 32 | // get raw pointers 33 | real *input_data = THTensor_(data)(input); 34 | real *output_data = THTensor_(data)(output); 35 | 36 | if (mode == 2) { //bilinear 37 | 38 | // mapping ratios 39 | float wratio = (float)(iwidth-1) / (owidth-1); 40 | float hratio = (float)(iheight-1) / (oheight-1); 41 | 42 | // resample each plane 43 | int k1, k2, x, y; 44 | for (k1 = 0; k1 < channels1; ++k1) { 45 | for (k2 = 0; k2 < channels2; ++k2) { 46 | 47 | // get planes 48 | real* input_p = input_data + k1*is[0] + k2*is[3]; 49 | real *output_p = output_data + k1*os[0] + k2*os[3]; 50 | 51 | // for each plane, resample 52 | for (y = 0; y < oheight; ++y) { 53 | for (x = 0; x < owidth; ++x) { 54 | 55 | // subpixel position: 56 | const float ix = wratio*x; 57 | const float iy = hratio*y; 58 | 59 | // 4 nearest neighbors: 60 | const int ix_nw = floor(ix); 61 | const int iy_nw = floor(iy); 62 | const int ix_ne = ix_nw + 1; 63 | const int iy_ne = iy_nw; 64 | const int ix_sw = ix_nw; 65 | const int iy_sw = iy_nw + 1; 66 | const int ix_se = ix_nw + 1; 67 | const int iy_se = iy_nw + 1; 68 | 69 | // get surfaces to each neighbor: 70 | const float se = (ix-(float)ix_nw)*(iy-(float)iy_nw); 71 | const float sw = ((float)ix_ne-ix)*(iy-(float)iy_ne); 72 | const float ne = (ix-(float)ix_sw)*((float)iy_sw-iy); 73 | const float nw = ((float)ix_se-ix)*((float)iy_se-iy); 74 | 75 | // weighted sum of neighbors: 76 | output_p[y*os[1] + x*os[2]] = input_p[iy_nw*is[1] + ix_nw*is[2]] * nw 77 | + input_p[iy_ne*is[1] + MIN(ix_ne,iwidth-1)*is[2]] * ne 78 | + input_p[MIN(iy_sw,iheight-1)*is[1] + ix_sw*is[2]] * sw 79 | + input_p[MIN(iy_se,iheight-1)*is[1] + MIN(ix_se,iwidth-1)*is[2]] * se; 80 | } 81 | } 82 | } 83 | } 84 | 85 | } else { // simple or average 86 | assert((mode == 0) || (mode == 1)); 87 | 88 | // resample 89 | if (oheight >= iheight) { 90 | // upsampling (from lua check we know that owidth >= iwidth) 91 | // upsampling average mode is actually simple mode 92 | int dH = (oheight+iheight-1)/iheight; //=ceil((float)oheight/(float(iheight))) 93 | int dW = (owidth+iwidth-1)/iwidth; 94 | int k1, k2, x, y; 95 | for (k1 = 0; k1 < channels1; k1++) { 96 | for (k2 = 0; k2 < channels2; k2++) { 97 | // get planes 98 | real *input_p = input_data + k1*is[0] + k2*is[3]; 99 | real *output_p = output_data + k1*os[0] + k2*os[3]; 100 | 101 | // for each plane, resample 102 | for (y=0; ysize[2]; 180 | int oheight = gradOutput->size[1]; 181 | int channels1 = gradOutput->size[0]; 182 | int channels2 = gradOutput->size[3]; 183 | 184 | // zero gradInput 185 | THTensor_(zero)(gradInput); 186 | 187 | // get strides 188 | long *gis = gradInput->stride; 189 | long *gos = gradOutput->stride; 190 | 191 | // get raw pointers 192 | real *gradInput_data = THTensor_(data)(gradInput); 193 | real *gradOutput_data = THTensor_(data)(gradOutput); 194 | 195 | if (mode == 2) { //bilinear 196 | 197 | // mapping ratios 198 | float wratio = (float)(iwidth-1) / (owidth-1); 199 | float hratio = (float)(iheight-1) / (oheight-1); 200 | 201 | // compute gradients for each plane 202 | int k1, k2, x, y; 203 | for (k1 = 0; k1 < channels1; ++k1) { 204 | for (k2 = 0; k2 < channels2; ++k2) { 205 | 206 | // get planes 207 | real *gradInput_p = gradInput_data + k1*gis[0] + k2*gis[3]; 208 | real *gradOutput_p = gradOutput_data + k1*gos[0] + k2*gos[3]; 209 | 210 | // for each plane, resample 211 | for (y = 0; y < oheight; ++y) { 212 | for (x = 0; x < owidth; ++x) { 213 | 214 | // subpixel position: 215 | const float ix = wratio*x; 216 | const float iy = hratio*y; 217 | 218 | // 4 nearest neighbors: 219 | const int ix_nw = floor(ix); 220 | const int iy_nw = floor(iy); 221 | const int ix_ne = ix_nw + 1; 222 | const int iy_ne = iy_nw; 223 | const int ix_sw = ix_nw; 224 | const int iy_sw = iy_nw + 1; 225 | const int ix_se = ix_nw + 1; 226 | const int iy_se = iy_nw + 1; 227 | 228 | // get surfaces to each neighbor: 229 | const float se = (ix-(float)ix_nw)*(iy-(float)iy_nw); 230 | const float sw = ((float)ix_ne-ix)*(iy-(float)iy_ne); 231 | const float ne = (ix-(float)ix_sw)*((float)iy_sw-iy); 232 | const float nw = ((float)ix_se-ix)*((float)iy_se-iy); 233 | 234 | // output gradient 235 | const double ograd = gradOutput_p[y*gos[1] + x*gos[2]]; 236 | 237 | // accumulate gradient 238 | gradInput_p[iy_nw*gis[1] + ix_nw*gis[2]] += nw * ograd; 239 | gradInput_p[iy_ne*gis[1] + MIN(ix_ne,iwidth-1)*gis[2]] += ne * ograd; 240 | gradInput_p[MIN(iy_sw,iheight-1)*gis[1] + ix_sw*gis[2]] += sw * ograd; 241 | gradInput_p[MIN(iy_se,iheight-1)*gis[1] + MIN(ix_se,iwidth-1)*gis[2]] += se*ograd; 242 | } 243 | } 244 | } 245 | } 246 | 247 | } else { // simple or average 248 | assert((mode == 0) || (mode == 1)); 249 | 250 | // compute gradients 251 | if (oheight >= iheight) { 252 | // upsampling (from lua check we know that owidth >= iwidth) 253 | // upsampling average mode is actually simple mode 254 | int dH = (oheight+iheight-1)/iheight; //=ceil((float)oheight/(float(iheight))) 255 | int dW = (owidth+iwidth-1)/iwidth; 256 | int k1, k2, x, y; 257 | for (k1 = 0; k1 < channels1; k1++) { 258 | for (k2 = 0; k2 < channels2; k2++) { 259 | // get planes 260 | real *gradInput_p = gradInput_data + k1*gis[0] + k2*gis[3]; 261 | real *gradOutput_p = gradOutput_data + k1*gos[0] + k2*gos[3]; 262 | 263 | // for each plane, resample 264 | for (y=0; ynDimension == 2, 2, "2D(batch mode) tensor expected"); 30 | luaL_argcheck(L, input->size[1] == inputSize, 2, "invalid input size"); 31 | 32 | node = THIntTensor_new(); 33 | nodeWeight = THTensor_(new)(); 34 | nodeBias = THTensor_(new)(); 35 | nodeOutput = THTensor_(new)(); 36 | nodeInput = THTensor_(new)(); 37 | nodeInter = THTensor_(new)(); 38 | 39 | THTensor_(resize1d)(output, input->size[0]); 40 | 41 | for(i = 0; i < input->size[0]; i++) 42 | { 43 | long n = 0; 44 | long childId = (long)(THIntTensor_get1d(target, i)) - 1; 45 | accreal narrowsum = 0; 46 | THTensor_(select)(nodeInput, input, 0, i); 47 | while(1) 48 | { 49 | long parentId, parentIdx, childIdx, nChildren; 50 | /* get next Node in Tree */ 51 | THIntTensor_select(node, childParent, 0, childId); 52 | parentId = (long)(THIntTensor_get1d(node, 0)) - 1; 53 | childIdx = (long)(THIntTensor_get1d(node, 1)) - 1; 54 | 55 | luaL_argcheck(L, parentId != -2, 2, "Non-root node has no parent in tree."); 56 | 57 | THIntTensor_select(node, parentChildren, 0, parentId); 58 | parentIdx = (long)(THIntTensor_get1d(node, 0)) - 1; 59 | nChildren = (long)(THIntTensor_get1d(node, 1)); 60 | 61 | /* Linear */ 62 | THTensor_(narrow)(nodeWeight, weight, 0, parentIdx, nChildren); 63 | THTensor_(narrow)(nodeBias, bias, 0, parentIdx, nChildren); 64 | THTensor_(narrow)(nodeOutput, linearOutput, 0, 0, nChildren); 65 | 66 | THTensor_(addmv)(nodeOutput, 1, nodeBias, 1, nodeWeight, nodeInput); 67 | 68 | /* LogSoftMax */ 69 | THTensor_(set)(nodeInter, nodeOutput); 70 | THTensor_(narrow)(nodeOutput, logsoftOutput, 0, maxFamilyPath*i + n, nChildren); 71 | 72 | input_data = THTensor_(data)(nodeInter); 73 | output_data = THTensor_(data)(nodeOutput); 74 | 75 | accreal logsum = 0; 76 | real maxInput = -THInf; 77 | 78 | for(d = 0; d < nChildren; d++) 79 | maxInput = THMax(maxInput, input_data[d]); 80 | 81 | for(d = 0; d < nChildren; d++) 82 | logsum += THExpMinusApprox(maxInput-input_data[d]); 83 | logsum = maxInput + log(logsum); 84 | 85 | for(d = 0; d < nChildren; d++) 86 | output_data[d] = input_data[d] - logsum; 87 | 88 | /* Narrow */ 89 | THTensor_(set)(nodeInter, nodeOutput); 90 | THTensor_(narrow)(nodeOutput, nodeInter, 0, childIdx, 1); 91 | 92 | /* CAddTable (without log, would have been CMulTable) */ 93 | narrowsum += THTensor_(get1d)(nodeOutput, 0); 94 | n += nChildren; 95 | /* Break when root is reached */ 96 | if (parentId == rootId) 97 | { 98 | break; 99 | } 100 | childId = parentId; 101 | } 102 | THTensor_(set1d)(output, i, narrowsum); 103 | } 104 | 105 | THIntTensor_free(node); 106 | THTensor_(free)(nodeWeight); 107 | THTensor_(free)(nodeBias); 108 | THTensor_(free)(nodeOutput); 109 | THTensor_(free)(nodeInput); 110 | THTensor_(free)(nodeInter); 111 | return 1; 112 | } 113 | 114 | static int nn_(SoftMaxTree_updateGradInput)(lua_State *L) 115 | { 116 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 117 | THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); 118 | THIntTensor *target = (THIntTensor*)luaT_checkudata(L, 4, "torch.IntTensor"); 119 | int inputSize = luaT_getfieldcheckint(L, 1, "inputSize"); 120 | long rootId = (long)(luaT_getfieldcheckint(L, 1, "rootId") - 1); 121 | long maxFamilyPath = (long)luaT_getfieldcheckint(L, 1, "maxFamilyPath"); 122 | 123 | THIntTensor *childParent = (THIntTensor*)luaT_getfieldcheckudata(L, 1, "childParent", "torch.IntTensor"); 124 | THIntTensor *parentChildren = (THIntTensor*)luaT_getfieldcheckudata(L, 1, "parentChildren", "torch.IntTensor"); 125 | 126 | THTensor *logsoftOutput = luaT_getfieldcheckudata(L, 1, "_multiBuffer", torch_Tensor); 127 | 128 | THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); 129 | THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "_gradInput", torch_Tensor); 130 | 131 | THIntTensor *node; 132 | THTensor *nodeWeight, *nodeOutput; 133 | THTensor *nodeGradInput, *weightTranspose; 134 | real *output_data; 135 | 136 | long i, d; 137 | 138 | luaL_argcheck(L, input->nDimension == 2, 2, "2D(batch mode) tensor expected"); 139 | luaL_argcheck(L, input->size[1] == inputSize, 2, "invalid input size"); 140 | 141 | luaL_argcheck(L, gradOutput->nDimension == 1, 2, "1D tensor expected"); 142 | 143 | node = THIntTensor_new(); 144 | nodeWeight = THTensor_(new)(); 145 | nodeOutput = THTensor_(new)(); 146 | nodeGradInput = THTensor_(new)(); 147 | weightTranspose = THTensor_(new)(); 148 | 149 | THTensor_(transpose)(weightTranspose, weight, 0, 1); 150 | THTensor_(resizeAs)(gradInput, input); 151 | THTensor_(zero)(gradInput); 152 | 153 | for(i = 0; i < input->size[0]; i++) 154 | { 155 | long n = 0; 156 | long childId = (long)(THIntTensor_get1d(target, i)) - 1; 157 | real grad = THTensor_(get1d)(gradOutput, i); 158 | 159 | THTensor_(select)(nodeGradInput, gradInput, 0, i); 160 | 161 | while(1) 162 | { 163 | long parentId, parentIdx, childIdx, nChildren; 164 | /* get next Node in Tree */ 165 | THIntTensor_select(node, childParent, 0, childId); 166 | parentId = (long)(THIntTensor_get1d(node, 0)) - 1; 167 | childIdx = (long)(THIntTensor_get1d(node, 1)) - 1; 168 | 169 | luaL_argcheck(L, parentId != -2, 2, "Non-root node has no parent in tree."); 170 | 171 | THIntTensor_select(node, parentChildren, 0, parentId); 172 | parentIdx = (long)(THIntTensor_get1d(node, 0)) - 1; 173 | nChildren = (long)(THIntTensor_get1d(node, 1)); 174 | 175 | luaL_argcheck(L, logsoftOutput->size[0] >= n+nChildren, 2, \ 176 | "Backward performed on different inputs than last forward"); 177 | 178 | /* CAddTable + Narrow + LogSoftMax */ 179 | THTensor_(narrow)(nodeOutput, logsoftOutput, 0, maxFamilyPath*i + n, nChildren); 180 | 181 | output_data = THTensor_(data)(nodeOutput); 182 | 183 | for(d = 0; d < nChildren; d++) 184 | output_data[d] = -exp(output_data[d])*grad; 185 | output_data[childIdx] += grad; 186 | 187 | 188 | /* Linear */ 189 | THTensor_(narrow)(nodeWeight, weightTranspose, 1, parentIdx, nChildren); 190 | 191 | THTensor_(addmv)(nodeGradInput, 1, nodeGradInput, 1, nodeWeight, nodeOutput); 192 | 193 | n += nChildren; 194 | /* Break when root is reached */ 195 | if (parentId == rootId) 196 | { 197 | break; 198 | } 199 | childId = parentId; 200 | } 201 | } 202 | 203 | THIntTensor_free(node); 204 | THTensor_(free)(nodeWeight); 205 | THTensor_(free)(nodeOutput); 206 | THTensor_(free)(nodeGradInput); 207 | THTensor_(free)(weightTranspose); 208 | return 1; 209 | } 210 | 211 | static int nn_(SoftMaxTree_accGradParameters)(lua_State *L) 212 | { 213 | THTensor *input = luaT_checkudata(L, 2, torch_Tensor); 214 | THIntTensor *target = (THIntTensor*)luaT_checkudata(L, 4, "torch.IntTensor"); 215 | real scale = luaL_optnumber(L, 5, 1); 216 | long rootId = (long)(luaT_getfieldcheckint(L, 1, "rootId") - 1); 217 | long maxFamilyPath = (long)luaT_getfieldcheckint(L, 1, "maxFamilyPath"); 218 | 219 | int inputSize = luaT_getfieldcheckint(L, 1, "inputSize"); 220 | THIntTensor *childParent = (THIntTensor*)luaT_getfieldcheckudata(L, 1, "childParent", "torch.IntTensor"); 221 | THIntTensor *parentChildren = (THIntTensor*)luaT_getfieldcheckudata(L, 1, "parentChildren", "torch.IntTensor"); 222 | 223 | THTensor *linearGradOutput = luaT_getfieldcheckudata(L, 1, "_multiBuffer", torch_Tensor);; 224 | 225 | THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); 226 | THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); 227 | 228 | lua_getfield(L, 1, "updates"); 229 | 230 | THIntTensor *node; 231 | THTensor *nodeGradWeight, *nodeGradBias, *nodeInput, *nodeGradOutput; 232 | 233 | long i; 234 | 235 | luaL_argcheck(L, input->nDimension == 2, 2, "2D(batch mode) tensor expected"); 236 | luaL_argcheck(L, input->size[1] == inputSize, 2, "invalid input size"); 237 | 238 | node = THIntTensor_new(); 239 | nodeGradWeight = THTensor_(new)(); 240 | nodeGradBias = THTensor_(new)(); 241 | nodeGradOutput = THTensor_(new)(); 242 | nodeInput = THTensor_(new)(); 243 | 244 | for(i = 0; i < input->size[0]; i++) 245 | { 246 | long n = 0; 247 | long childId = (long)(THIntTensor_get1d(target, i)) - 1; 248 | THTensor_(select)(nodeInput, input, 0, i); 249 | 250 | while(1) 251 | { 252 | long parentId, parentIdx, childIdx, nChildren; 253 | double count; 254 | /* get next Node in Tree */ 255 | THIntTensor_select(node, childParent, 0, childId); 256 | parentId = (long)(THIntTensor_get1d(node, 0)) - 1; 257 | childIdx = (long)(THIntTensor_get1d(node, 1)) - 1; 258 | 259 | luaL_argcheck(L, parentId != -2, 2, "Non-root node has no parent in tree."); 260 | 261 | THIntTensor_select(node, parentChildren, 0, parentId); 262 | parentIdx = (long)(THIntTensor_get1d(node, 0)) - 1; 263 | nChildren = (long)(THIntTensor_get1d(node, 1)); 264 | 265 | THTensor_(narrow)(nodeGradOutput, linearGradOutput, 0, maxFamilyPath*i + n, nChildren); 266 | THTensor_(narrow)(nodeGradWeight, gradWeight, 0, parentIdx, nChildren); 267 | THTensor_(narrow)(nodeGradBias, gradBias, 0, parentIdx, nChildren); 268 | 269 | THTensor_(addr)(nodeGradWeight, 1, nodeGradWeight, scale, nodeGradOutput, nodeInput); 270 | THTensor_(cadd)(nodeGradBias, nodeGradBias, scale, nodeGradOutput); 271 | 272 | /* updates will contain parentId (key) sum of scales (value)*/ 273 | lua_pushinteger(L, (int)(parentId+1)); 274 | lua_gettable(L, -2); 275 | count = lua_tonumber(L, -1) + scale; 276 | lua_pop(L, 1); 277 | 278 | lua_pushinteger(L, (int)(parentId+1)); /* key */ 279 | lua_pushnumber(L, count); /* value */ 280 | lua_settable(L, -3); 281 | 282 | n += nChildren; 283 | /* Break when root is reached */ 284 | if (parentId == rootId) 285 | { 286 | break; 287 | } 288 | childId = parentId; 289 | } 290 | } 291 | 292 | THIntTensor_free(node); 293 | THTensor_(free)(nodeGradWeight); 294 | THTensor_(free)(nodeGradBias); 295 | THTensor_(free)(nodeGradOutput); 296 | THTensor_(free)(nodeInput); 297 | 298 | return 0; 299 | } 300 | 301 | static const struct luaL_Reg nn_(SoftMaxTree__) [] = { 302 | {"SoftMaxTree_updateOutput", nn_(SoftMaxTree_updateOutput)}, 303 | {"SoftMaxTree_updateGradInput", nn_(SoftMaxTree_updateGradInput)}, 304 | {"SoftMaxTree_accGradParameters", nn_(SoftMaxTree_accGradParameters)}, 305 | {NULL, NULL} 306 | }; 307 | 308 | static void nn_(SoftMaxTree_init)(lua_State *L) 309 | { 310 | luaT_pushmetatable(L, torch_Tensor); 311 | luaT_registeratname(L, nn_(SoftMaxTree__), "nn"); 312 | lua_pop(L,1); 313 | } 314 | 315 | #endif 316 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nnx: experimental 'nn' components 2 | 3 | The original neural network from Torch7, [nn](https://github.com/torch/nn), contains stable and widely 4 | used modules. 'nnx' contains more experimental, unproven modules, and 5 | optimizations. Modules that become stable and which are proven useful make 6 | their way into 'nn' (some already have). 7 | 8 | ## Library Documentation ## 9 | This section includes documentation for the following objects: 10 | 11 | * [SoftMaxTree](#nnx.SoftMaxTree) : a hierarchical log-softmax Module; 12 | * [TreeNLLCriterion](#nnx.TreeNLLCriterion) : a negative log-likelihood Criterion for the SoftMaxTree; 13 | * [CTCCriterion](#nnx.CTCCriterion) : a Connectionist Temporal Classification Criterion based on [warp-ctc](https://github.com/baidu-research/warp-ctc); 14 | * [PushTable (and PullTable)](#nnx.PushTable) : extracts a table element and inserts it later in the network; 15 | * [MultiSoftMax](#nnx.MultiSoftMax) : performs a softmax over the last dimension of a 2D or 3D input; 16 | * [SpatialReSampling](#nnx.SpatialReSampling) : performs bilinear resampling of a 3D or 4D input image; 17 | * [QDRiemaNNLinear] (#nnx.QDRiemaNNLinear) : quasi-diagonal reduction for Riemannian gradient descent 18 | * [Recurrent](#nnx.Recurrent) : a generalized recurrent neural network container; 19 | 20 | 21 | ### SoftMaxTree ### 22 | A hierarchy of parameterized log-softmaxes. Used for computing the likelihood of a leaf class. 23 | This Module should be used in conjunction with the [TreeNLLCriterion](#nnx.TreeNLLCriterion). 24 | Using this for large vocabularies (100,000 and more) greatly accelerates training and evaluation 25 | of neural network language models (NNLM). 26 | A vocabulary hierarchy is provided via the [dp](https://github.com/nicholas-leonard/dp/blob/master/README.md) package's 27 | [BillionWords](https://github.com/nicholas-leonard/dp/blob/master/doc/data.md#dp.BillionWords) 28 | [DataSource](https://github.com/nicholas-leonard/dp/blob/master/doc/data.md#dp.DataSource). 29 | 30 | The constructor takes 2 mandatory and 4 optional arguments : 31 | * `inputSize` : the number of units in the input embedding representation; 32 | * `hierarchy` : a Tensor mapping one `parent_id` to many `child_id` (a tree); 33 | * `rootId` : a number identifying the root node in the hierarchy. Defaults to `-1`; 34 | * `accUpdate` : when the intent is to use `backwardUpdate` or `accUpdateGradParameters`, set this to true to save memory. Defaults to false; 35 | * `static` : when true (the defualt), returns parameters with keys that don't change from batch to batch; 36 | * `verbose` : prints some additional information concerning the hierarchy during construction. 37 | 38 | The `forward` method returns an `output` Tensor of size 1D, while 39 | `backward` returns a table `{gradInput, gradTarget}`. The second 40 | variable is just a Tensor of zeros , such that the `targets` can be 41 | propagated through [Containers](https://github.com/torch/nn/blob/master/doc/containers.md#nn.Containers) 42 | like [ParallelTable](https://github.com/torch/nn/blob/master/doc/table.md#nn.ParallelTable). 43 | 44 | ```lua 45 | > input = torch.randn(5,10) 46 | > target = torch.IntTensor{20,24,27,10,12} 47 | > gradOutput = torch.randn(5) 48 | > root_id = 29 49 | > input_size = 10 50 | > hierarchy = { 51 | >> [29]=torch.IntTensor{30,1,2}, [1]=torch.IntTensor{3,4,5}, 52 | >> [2]=torch.IntTensor{6,7,8}, [3]=torch.IntTensor{9,10,11}, 53 | >> [4]=torch.IntTensor{12,13,14}, [5]=torch.IntTensor{15,16,17}, 54 | >> [6]=torch.IntTensor{18,19,20}, [7]=torch.IntTensor{21,22,23}, 55 | >> [8]=torch.IntTensor{24,25,26,27,28} 56 | >> } 57 | > smt = nn.SoftMaxTree(input_size, hierarchy, root_id) 58 | > smt:forward{input, target} 59 | -3.5186 60 | -3.8950 61 | -3.7433 62 | -3.3071 63 | -3.0522 64 | [torch.DoubleTensor of dimension 5] 65 | > smt:backward({input, target}, gradOutput) 66 | { 67 | 1 : DoubleTensor - size: 5x10 68 | 2 : IntTensor - size: 5 69 | } 70 | 71 | ``` 72 | 73 | 74 | ### TreeNLLCriterion ### 75 | Measures the Negative log-likelihood (NLL) for [SoftMaxTrees](#nnx.SoftMaxTree). 76 | Used for maximizing the likelihood of SoftMaxTree outputs. 77 | The SoftMaxTree Module outputs a column Tensor representing the log likelihood 78 | of each target in the batch. Thus SoftMaxTree requires the targets. 79 | So this Criterion only computes the negative of those outputs, as 80 | well as its corresponding gradients. 81 | 82 | 83 | 84 | ### PushTable (and PullTable) ### 85 | PushTable and PullTable work together. The first can be put earlier 86 | in a digraph of Modules such that it can communicate with a 87 | PullTable located later in the graph. `PushTable:forward(input)` 88 | for an `input` table of Tensors to the output, excluding one, the index of which 89 | is specified by the `index` argument in the `PushTable(index)` constructor. 90 | The Tensor identified by this `index` is communicated to one or many 91 | PullTables created via the `PushTable:pull(index)` factory method. 92 | These can be inserted later in the digraph such that 93 | a call to `PushTable:forward(input)`, where `input` is a table or a Tensor, 94 | will output a table with the previously *pushed* Tensor inserted 95 | at index `index`. 96 | 97 | An example utilizing the above [SoftMaxTree](#nnx.SoftMaxTree) Module 98 | and a Linear Module demonstrates how the PushTable can be used to 99 | forward the `target` Tensor without any other 100 | [Table Modules](https://github.com/torch/nn/blob/master/doc/table.md#table-layers): 101 | ```lua 102 | > mlp = nn.Sequential() 103 | > linear = nn.Linear(50,100) 104 | > push = nn.PushTable(2) 105 | > pull = push:pull(2) 106 | > mlp:add(push) 107 | > mlp:add(nn.SelectTable(1)) 108 | > mlp:add(linear) 109 | > mlp:add(pull) 110 | > mlp:add(smt) --smt is a SoftMaxTree instance 111 | > mlp:forward{input, target} -- input and target are defined above 112 | -3.5186 113 | -3.8950 114 | -3.7433 115 | -3.3071 116 | -3.0522 117 | [torch.DoubleTensor of dimension 5] 118 | > mlp:backward({input, target}, gradOutput) -- so is gradOutput 119 | { 120 | 1 : DoubleTensor - size: 5x10 121 | 2 : IntTensor - size: 5 122 | } 123 | ``` 124 | The above code is equivalent to the following: 125 | ```lua 126 | > mlp2 = nn.Sequential() 127 | > para = nn.ParallelTable() 128 | > para:add(linear) 129 | > para:add(nn.Identity()) 130 | > mlp2:add(para) 131 | > mlp2:add(smt) 132 | > mlp2:forward{input, target} 133 | -3.5186 134 | -3.8950 135 | -3.7433 136 | -3.3071 137 | -3.0522 138 | [torch.DoubleTensor of dimension 5] 139 | > mlp2:backward({input, target}, gradOutput) 140 | { 141 | 1 : DoubleTensor - size: 5x10 142 | 2 : IntTensor - size: 5 143 | } 144 | ``` 145 | In some cases, this can simplify the digraph of Modules. Note that 146 | a PushTable can be associated to many PullTables, but each PullTable 147 | is associated to only one PushTable. 148 | 149 | 150 | ### CTCCriterion ### 151 | ``` 152 | criterion = nn.CTCCriterion() 153 | ``` 154 | Creates a Criterion based on Baidus' [warp-ctc](https://github.com/baidu-research/warp-ctc) implementation. 155 | This Module measures the loss between a 3D output of (batch x time x inputdim) and a target without needing alignment of inputs and labels. 156 | Must have installed warp-ctc which can be installed via luarocks: 157 | ``` 158 | luarocks install http://raw.githubusercontent.com/baidu-research/warp-ctc/master/torch_binding/rocks/warp-ctc-scm-1.rockspec 159 | ``` 160 | Supports cuda via: 161 | ``` 162 | criterion = nn.CTCCriterion():cuda() 163 | ``` 164 | Example: 165 | ``` 166 | output = torch.Tensor({{{1,2,3,4,5},{6,7,8,9,10}}}) -- Tensor of size 1x1x5 (batch x time x inputdim). 167 | label = {{1,3}} 168 | sizes = torch.Tensor({2}) -- Size of each sequence (sequence-length) in the batch as a tensor 169 | ctcCriterion = nn.CTCCriterion() 170 | 171 | err = ctcCriterion:forward(output,label,sizes) 172 | gradOut = ctcCriterion:backward(output,label) 173 | print("----CPU----") 174 | print("Error : " .. err) 175 | print("Gradients :") 176 | print(gradOut) 177 | 178 | ctcCriterion = ctcCriterion:cuda() -- Switch to cuda implementation. 179 | output = output:cuda() 180 | 181 | err = ctcCriterion:forward(output,label,sizes) 182 | gradOut = ctcCriterion:backward(output,label) 183 | print("----GPU----") 184 | print("Error : " .. err) 185 | print("Gradients :") 186 | print(gradOut) 187 | ``` 188 | 189 | gives the output: 190 | ``` 191 | ----CPU---- 192 | Error : 4.9038286209106 193 | Gradients : 194 | (1,.,.) = 195 | 0.0117 -0.9683 0.0861 0.2341 0.6364 196 | 0.0117 0.0317 0.0861 -0.7659 0.6364 197 | [torch.FloatTensor of size 1x2x5] 198 | 199 | ----GPU---- 200 | Error : 4.9038290977478 201 | Gradients : 202 | (1,.,.) = 203 | 0.0117 -0.9683 0.0861 0.2341 0.6364 204 | 0.0117 0.0317 0.0861 -0.7659 0.6364 205 | [torch.CudaTensor of size 1x2x5] 206 | ``` 207 | 208 | ### MultiSoftMax ### 209 | This Module takes 2D or 3D input and performs a softmax over the last dimension. 210 | It uses the existing [SoftMax](https://github.com/torch/nn/blob/master/doc/transfer.md#nn.SoftMax) 211 | CUDA/C code to do so such that the Module can be used on both GPU and CPU. 212 | This can be useful for [keypoint detection](https://github.com/nicholas-leonard/dp/blob/master/doc/facialkeypointstutorial.md#multisoftmax). 213 | 214 | 215 | ### SpatialReSampling ### 216 | Applies a 2D re-sampling over an input image composed of 217 | several input planes (or channels, colors). The input tensor in `forward(input)` is 218 | expected to be a 3D or 4D tensor of size : `[batchSize x] nInputPlane x width x height`. 219 | The number of output planes will be the same as the number of input 220 | planes. 221 | 222 | The re-sampling is done using [bilinear interpolation](http://en.wikipedia.org/wiki/Bilinear_interpolation). 223 | For a simple nearest-neihbor upsampling, use `nn.SpatialUpSampling()`, 224 | and for a simple average-based down-sampling, use 225 | `nn.SpatialDownSampling()`. 226 | 227 | If the input image is a 3D tensor of size `nInputPlane x height x width`, 228 | the output image size will be `nInputPlane x oheight x owidth` where 229 | `owidth` and `oheight` are given to the constructor. 230 | 231 | Instead of `owidth` and `oheight`, one can provide `rwidth` and `rheight`, 232 | such that `owidth = iwidth*rwidth` and `oheight = iheight*rheight`. 233 | 234 | As an example, we can run the following code on the famous Lenna image: 235 | ```lua 236 | require 'image' 237 | require 'nnx' 238 | input = image.loadPNG('doc/image/Lenna.png') 239 | l = nn.SpatialReSampling{owidth=150,oheight=150} 240 | output = l:forward(input) 241 | image.save('doc/image/Lenna-150x150-bilinear.png', output) 242 | ``` 243 | 244 | The input: 245 | 246 | ![Lenna](doc/image/Lenna.png) 247 | 248 | The re-sampled output: 249 | 250 | ![Lenna re-sampled](doc/image/Lenna-150x150-bilinear.png) 251 | 252 | 253 | ### QDRiemaNNLinear ### 254 | The Quasi-Diagonal Riemannian Neural Network Linear (QDRiemaNNLinear) module is an implementation 255 | of the quasi-diagonal reduction of metrics, used for Riemannian gradient descent. 256 | The algorithm is defined in Riemannian metrics for neural networks I: feedforward networks by Yann Ollivier (http://arxiv.org/abs/1303.0818) and an efficient implementation is described in Practical Riemannian Neural Networks by Yann Ollivier and Gaetan Marceau-Caron (http://arxiv.org/abs/1602.08007). 257 | To use this module, simply replace `nn.Linear(ninput,noutput)` with `nnx.QDRiemaNNLinear(ninput,noutput)`. 258 | As always, the step-size must be chosen accordingly. 259 | Two additional arguments are also possible: 260 | * gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. A default value depending on the size of the minibatches is `gamma = 1. - torch.pow(1.-1./nTraining,miniBatchSize)` where `nTraining` is the number of training examples of the dataset and `miniBatchSize` is the number of training examples per minibatch. 261 | * qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. 262 | 263 | This module is a straightforward implementation of the outer product gradient descent. 264 | 265 | ## Requirements 266 | 267 | * Torch7 (www.torch.ch) 268 | 269 | ## Installation 270 | 271 | * Install Torch7 (refer to its own documentation). 272 | * clone this project into dev directory of Torch7. 273 | * Rebuild torch, it will include new projects too. 274 | 275 | ## Use the library 276 | 277 | First run torch, and load nnx: 278 | 279 | ``` sh 280 | $ torch 281 | ``` 282 | 283 | ``` lua 284 | > require 'nnx' 285 | ``` 286 | 287 | Once loaded, tab-completion will help you navigate through the 288 | library (note that most function are added directly to nn): 289 | 290 | ``` lua 291 | > nnx. + TAB 292 | ... 293 | > nn. + TAB 294 | ``` 295 | 296 | In particular, it's good to verify that all modules provided pass their 297 | tests: 298 | 299 | ``` lua 300 | > nnx.test_all() 301 | > nnx.test_omp() 302 | ``` 303 | 304 | 305 | ### Recurrent ### 306 | 307 | DEPRECATED July 6th, 2015. Use [rnn](https://github.com/Element-Research/rnn) instead. 308 | -------------------------------------------------------------------------------- /SpatialFovea.lua: -------------------------------------------------------------------------------- 1 | local SpatialFovea, parent = torch.class('nn.SpatialFovea', 'nn.Module') 2 | 3 | local help_desc = 4 | [[From a given image, generates a pyramid of scales, and process each scale 5 | with the given list of preprocessors and processors. 6 | The result of each module/scale is then 7 | upsampled to produce a homogenous list of 3D feature maps (4D tensor). 8 | 9 | The pipeline is the following: 10 | input -> pyramid{ratios} -> preProcessors -> padding -> processors -> [alignment] -> output 11 | 12 | There are two operating modes: focused [training], and global [inference]. 13 | 14 | In inference mode, 15 | the entire input is processed, and an alignment step is performed at the end of 16 | the pipeline, to be fed directly to a SpatialLinear module. 17 | 18 | In sampling mode, the fovea is first focused on a particular (x,y) point, and no 19 | alignment is performed at the end, as all scales should produce a 1x1 result. 20 | To focus the fovea, simply call fovea:focus(x,y,winSize) before doing a forward. 21 | A call to fovea:focus(nil) makes it unfocus (go back to global mode). ]] 22 | 23 | function SpatialFovea:__init(...) 24 | parent.__init(self) 25 | -- check args 26 | xlua.unpack_class( 27 | self, 28 | {...}, 29 | 'nn.SpatialFovea', 30 | help_desc, 31 | {arg='nInputPlane', type='number', help='number of input planes', req=true}, 32 | {arg='ratios', type='table', help='list of downsampling ratios', req=true}, 33 | {arg='processors', type='table', help='list of processors (each processor sees a single scale)', req=true}, 34 | {arg='preProcessors', type='table', help='list of preprocessors (applied before padding)'}, 35 | {arg='fov', type='number', help='field of view (== processors\' receptive field)', default=1}, 36 | {arg='sub', type='number', help='global subsampling (== processors\' subsampling ratio)', default=1}, 37 | {arg='bilinear', type='number', help='bilinear interpolation', default=false}, 38 | {arg='cachePrePreproc', type='number', help='beta: cache preprocessed input based on input\' hash', default=false} 39 | ) 40 | 41 | -- internal modules: 42 | self.downsamplers = {} 43 | self.padders = {} 44 | self.upsamplers = {} 45 | self.preProcessors = self.preProcessors or {} 46 | 47 | -- temporary results: 48 | self.pyramid = {} 49 | self.preProcessed = {} 50 | self.padded = {} 51 | self.narrowed = {} 52 | self.processed = {} 53 | self.upsampled = {} 54 | 55 | self.gradUpsampled = {} 56 | self.gradProcessed = {} 57 | self.gradNarrowed = {} 58 | self.gradPadded = {} 59 | self.gradPreProcessed = {} 60 | self.gradPyramid = {} 61 | 62 | -- inferred params 63 | self.padding = self.fov - self.sub 64 | 65 | -- check processors 66 | if #self.processors ~= #self.ratios then 67 | xlua.error('the number of processors provided should == the number of ratios (scales): ' 68 | .. #self.ratios, 'nn.SpatialFovea') 69 | end 70 | 71 | -- to be compatible with classical container modules 72 | self.modules = self.processors 73 | 74 | -- reset 75 | self:reset() 76 | end 77 | 78 | function SpatialFovea:focus(x,y,fov) 79 | self.x = x 80 | self.y = y 81 | self.fov = fov or self.fov 82 | if self.x and self.y and self.fov then 83 | self.focused = true 84 | else 85 | self.focused = false 86 | end 87 | end 88 | 89 | function SpatialFovea:configure(width,height) 90 | -- init modules 91 | for idx = 1,#self.ratios do 92 | -- down/up ratio 93 | local r = self.ratios[idx] 94 | 95 | -- downsamplers 96 | if self.bilinear then 97 | self.downsamplers[idx] = nn.SpatialReSampling(1/r,1/r) 98 | else 99 | self.downsamplers[idx] = nn.SpatialSubSampling(self.nInputPlane, r, r, r, r) 100 | self.downsamplers[idx].weight:fill(1/(r^2)) 101 | self.downsamplers[idx].bias:zero() 102 | end 103 | 104 | -- padders 105 | if self.padding == 0 then 106 | self.padders[idx] = nn.Identity() 107 | else 108 | local padl = math.floor(self.padding / 2) 109 | local padr = math.floor(self.padding / 2) 110 | self.padders[idx] = nn.SpatialPadding(padl, padr, padl, padr) 111 | end 112 | 113 | -- upsamplers 114 | if self.bilinear then 115 | self.upsamplers[idx] = nn.SpatialReSampling(r, r) 116 | else 117 | self.upsamplers[idx] = nn.SpatialUpSampling(r, r) 118 | end 119 | 120 | -- set correct types 121 | self.downsamplers[idx]:type(self.output:type()) 122 | self.padders[idx]:type(self.output:type()) 123 | self.upsamplers[idx]:type(self.output:type()) 124 | end 125 | end 126 | 127 | function SpatialFovea:updateOutput(input) 128 | -- input must be 3D 129 | if input:nDimension() ~= 3 then 130 | xerror('input must be 3d','nn.SpatialFovea') 131 | end 132 | local width = input:size(3) 133 | local height = input:size(2) 134 | local nmaps = input:size(1) 135 | local nscales = #self.ratios 136 | if input:size(1) ~= self.nInputPlane then 137 | xerror('input must have ' .. self.nInputPlane .. ' input planes' ,'nn.SpatialFovea') 138 | end 139 | self:configure(width,height) 140 | 141 | -- (beta) cache preprocessed data based on a unique hash 142 | local retrieved = false 143 | local hash = 0 144 | if self.cachePrePreproc then 145 | -- create or reuse list of cached inputs 146 | self.cachedPreProcessed = self.cachedPreProcessed or {} 147 | 148 | -- compute an abritrary hash, should be strong enough 149 | local tohash = input 150 | hash = tostring(tohash:sum()) 151 | hash = hash .. tostring(tohash:std()) 152 | 153 | -- check if input was seend before 154 | if self.cachedPreProcessed[hash] then 155 | for idx = 1,nscales do 156 | self.padded[idx] = self.cachedPreProcessed[hash][idx] 157 | end 158 | retrieved = true 159 | end 160 | end 161 | 162 | -- (beta) only compute input if it was not retrieved 163 | if not retrieved then 164 | -- (1) generate pyramid 165 | for idx = 1,nscales do 166 | self.pyramid[idx] = self.downsamplers[idx]:updateOutput(input) 167 | end 168 | 169 | -- (2) preprocess 170 | for idx = 1,nscales do 171 | if self.preProcessors[idx] then 172 | self.preProcessed[idx] = self.preProcessors[idx]:updateOutput(self.pyramid[idx]) 173 | else 174 | self.preProcessed[idx] = self.pyramid[idx] 175 | end 176 | end 177 | 178 | -- (3) pad inputs 179 | for idx = 1,nscales do 180 | self.padded[idx] = self.padders[idx]:updateOutput(self.preProcessed[idx]) 181 | end 182 | 183 | -- store preprocessed input for future use 184 | if self.cachePrePreproc then 185 | self.cachedPreProcessed[hash] = {} 186 | for idx = 1,nscales do 187 | self.cachedPreProcessed[hash][idx] = self.padded[idx]:clone() 188 | end 189 | end 190 | end 191 | 192 | -- (4) is fovea focused ? 193 | if self.focused then 194 | for idx = 1,nscales do 195 | local fov = self.fov 196 | local ox = math.floor(math.floor((self.x-1) / self.ratios[idx]) / self.sub) * self.sub + 1 197 | local oy = math.floor(math.floor((self.y-1) / self.ratios[idx]) / self.sub) * self.sub + 1 198 | self.narrowed[idx] = self.padded[idx]:narrow(3,ox,fov):narrow(2,oy,fov) 199 | end 200 | else 201 | for idx = 1,nscales do 202 | self.narrowed[idx] = self.padded[idx] 203 | end 204 | end 205 | 206 | -- (5) apply processors to pyramid 207 | for idx = 1,nscales do 208 | self.processed[idx] = self.processors[idx]:updateOutput(self.narrowed[idx]) 209 | end 210 | 211 | -- (6) upscale, only if fovea is not focused 212 | if self.focused then 213 | for idx = 1,nscales do 214 | self.upsampled[idx] = self.processed[idx] 215 | end 216 | else 217 | for idx = 1,nscales do 218 | self.upsampled[idx] = self.upsamplers[idx]:updateOutput(self.processed[idx]) 219 | end 220 | end 221 | 222 | -- (7) concatenate all maps into a single 3D volume 223 | local currentslice = 1 224 | for idx = 1,nscales do 225 | currentslice = currentslice + self.processed[idx]:size(1) 226 | end 227 | self.output:resize(currentslice-1, self.upsampled[1]:size(2), self.upsampled[1]:size(3)) 228 | currentslice = 1 229 | for idx = 1,nscales do 230 | local omap = self.output:narrow(1, currentslice, self.upsampled[idx]:size(1)) 231 | omap:copy( self.upsampled[idx] ) 232 | currentslice = currentslice + self.upsampled[idx]:size(1) 233 | end 234 | return self.output 235 | end 236 | 237 | function SpatialFovea:updateGradInput(input, gradOutput) 238 | -- nb of scales 239 | local nscales = #self.ratios 240 | 241 | -- (7) extract different scales 242 | local currentslice = 1 243 | for idx = 1,nscales do 244 | self.gradUpsampled[idx] = gradOutput:narrow(1, currentslice, self.processed[idx]:size(1)) 245 | currentslice = currentslice + self.upsampled[idx]:size(1) 246 | end 247 | 248 | -- (6) bprop through upsamplers 249 | if self.focused then 250 | for idx = 1,nscales do 251 | self.gradProcessed[idx] = self.gradUpsampled[idx] 252 | end 253 | else 254 | for idx = 1,nscales do 255 | self.gradProcessed[idx] = self.upsamplers[idx]:updateGradInput(self.processed[idx], self.gradUpsampled[idx]) 256 | end 257 | end 258 | 259 | -- (5) bprop through processors 260 | for idx = 1,nscales do 261 | self.gradNarrowed[idx] = self.processors[idx]:updateGradInput(self.narrowed[idx], self.gradProcessed[idx]) 262 | end 263 | 264 | -- (beta) if caching preprocessed input, no need to compute 265 | -- backward past this point 266 | if self.cachePrePreproc then 267 | return self.gradNarrowed 268 | end 269 | 270 | -- (4) is fovea focused ? 271 | if self.focused then 272 | for idx = 1,nscales do 273 | self.gradPadded[idx] = self.gradPadded[idx] or torch.Tensor():typeAs(self.output) 274 | self.gradPadded[idx]:resizeAs(self.padded[idx]):zero() 275 | local fov = self.fov 276 | local ox = math.floor(math.floor((self.x-1) / self.ratios[idx]) / self.sub) * self.sub + 1 277 | local oy = math.floor(math.floor((self.y-1) / self.ratios[idx]) / self.sub) * self.sub + 1 278 | self.gradPadded[idx]:narrow(3,ox,fov):narrow(2,oy,fov):copy(self.gradNarrowed[idx]) 279 | end 280 | else 281 | for idx = 1,nscales do 282 | self.gradPadded[idx] = self.gradNarrowed[idx] 283 | end 284 | end 285 | 286 | -- (3) bprop through padders 287 | for idx = 1,nscales do 288 | self.gradPreProcessed[idx] = self.padders[idx]:updateGradInput(self.preProcessed[idx], self.gradPadded[idx]) 289 | end 290 | 291 | -- (2) bprop through preProcessors 292 | for idx = 1,nscales do 293 | if self.preProcessors[idx] then 294 | self.gradPyramid[idx] = self.preProcessors[idx]:updateGradInput(self.pyramid[idx], self.gradPreProcessed[idx]) 295 | else 296 | self.gradPyramid[idx] = self.gradPreProcessed[idx] 297 | end 298 | end 299 | 300 | -- (1) bprop through pyramid 301 | self.gradInput:resizeAs(self.gradPyramid[1]):zero() 302 | for idx = 1,nscales do 303 | self.gradInput:add( self.downsamplers[idx]:updateGradInput(input, self.gradPyramid[idx]) ) 304 | end 305 | return self.gradInput 306 | end 307 | 308 | function SpatialFovea:reset(stdv) 309 | for idx = 1,#self.processors do 310 | if self.processors[idx].reset then 311 | self.processors[idx]:reset(stdv) 312 | end 313 | end 314 | end 315 | 316 | function SpatialFovea:zeroGradParameters() 317 | for idx = 1,#self.processors do 318 | self.processors[idx]:zeroGradParameters() 319 | end 320 | end 321 | 322 | function SpatialFovea:accGradParameters(input, gradOutput, scale) 323 | -- accumulate gradients for all processors 324 | for idx = 1,#self.processors do 325 | self.gradNarrowed[idx] = self.processors[idx]:accGradParameters(self.narrowed[idx], self.gradProcessed[idx], scale) 326 | end 327 | end 328 | 329 | function SpatialFovea:updateParameters(learningRate) 330 | for idx = 1,#self.processors do 331 | self.processors[idx]:updateParameters(learningRate) 332 | end 333 | end 334 | 335 | function SpatialFovea:type(type) 336 | parent.type(self,type) 337 | for idx = 1,#self.processors do 338 | self.processors[idx]:type(type) 339 | self.upsamplers[idx]:type(type) 340 | self.downsamplers[idx]:type(type) 341 | self.padders[idx]:type(type) 342 | end 343 | for idx = 1,#self.preProcessors do 344 | self.preProcessors[idx]:type(type) 345 | end 346 | return self 347 | end 348 | 349 | function SpatialFovea:parameters() 350 | local function tinsert(to, from) 351 | if type(from) == 'table' then 352 | for i=1,#from do 353 | tinsert(to,from[i]) 354 | end 355 | else 356 | table.insert(to,from) 357 | end 358 | end 359 | local w = {} 360 | local gw = {} 361 | for i=1,#self.modules do 362 | local mw,mgw = self.modules[i]:parameters() 363 | if mw then 364 | tinsert(w,mw) 365 | tinsert(gw,mgw) 366 | end 367 | end 368 | return w,gw 369 | end 370 | 371 | function SpatialFovea:__tostring__() 372 | local tab = ' ' 373 | local line = '\n' 374 | local next = ' |`-> ' 375 | local ext = ' | ' 376 | local last = ' ... -> ' 377 | local str = 'nn.SpatialFovea' 378 | str = str .. ' {' .. line .. tab .. 'input' 379 | for i=1,#self.processors do 380 | local pipeline = nn.Sequential() 381 | if self.preProcessors[i] then 382 | pipeline:add(self.preProcessors[i]) 383 | end 384 | pipeline:add(self.processors[i]) 385 | str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(pipeline):gsub(line, line .. tab .. ext) 386 | end 387 | str = str .. line .. tab .. last .. 'output' 388 | str = str .. line .. '}' 389 | return str 390 | end 391 | --------------------------------------------------------------------------------