├── .gitignore ├── utils.h ├── utils.c ├── init.lua ├── init.cu ├── ptnbhwd-scm-1.rockspec ├── init.c ├── LICENSE ├── README.md ├── CMakeLists.txt ├── PerspectiveGridGenerator.lua ├── BilinearSamplerPerspective.lua ├── generic └── BilinearSamplerPerspective.c └── BilinearSamplerPerspective.cu /.gitignore: -------------------------------------------------------------------------------- 1 | # Folders 2 | build/ 3 | -------------------------------------------------------------------------------- /utils.h: -------------------------------------------------------------------------------- 1 | #ifndef CUNN_UTILS_H 2 | #define CUNN_UTILS_H 3 | 4 | #include 5 | #include "THCGeneral.h" 6 | 7 | THCState* getCutorchState(lua_State* L); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | THCState* getCutorchState(lua_State* L) 4 | { 5 | lua_getglobal(L, "cutorch"); 6 | lua_getfield(L, -1, "getState"); 7 | lua_call(L, 0, 1); 8 | THCState *state = (THCState*) lua_touserdata(L, -1); 9 | lua_pop(L, 2); 10 | return state; 11 | } 12 | -------------------------------------------------------------------------------- /init.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | local withCuda = pcall(require, 'cutorch') 3 | 4 | require 'libptn' 5 | if withCuda then 6 | require 'libcuptn' 7 | end 8 | 9 | --require('ptn.AffineTransformMatrixGenerator') 10 | --require('ptn.AffineGridGeneratorBHWD') 11 | --require('ptn.BilinearSamplerBHWD') 12 | 13 | require('ptn.PerspectiveGridGenerator') 14 | require('ptn.BilinearSamplerPerspective') 15 | 16 | --require('ptn.test') 17 | 18 | return nn 19 | -------------------------------------------------------------------------------- /init.cu: -------------------------------------------------------------------------------- 1 | #include "luaT.h" 2 | #include "THC.h" 3 | 4 | #include "utils.c" 5 | 6 | //#include "BilinearSamplerBHWD.cu" 7 | #include "BilinearSamplerPerspective.cu" 8 | 9 | LUA_EXTERNC DLL_EXPORT int luaopen_libcuptn(lua_State *L); 10 | 11 | int luaopen_libcuptn(lua_State *L) 12 | { 13 | lua_newtable(L); 14 | //cunn_BilinearSamplerBHWD_init(L); 15 | 16 | //lua_newtable(L); 17 | cunn_BilinearSamplerPerspective_init(L); 18 | return 1; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /ptnbhwd-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "ptnbhwd" 2 | version = "scm-1" 3 | 4 | 5 | source = { 6 | url = "https://github.com/xcyan/ptnbhwd.git", 7 | } 8 | 9 | 10 | description = { 11 | summary = "Perspective Transformer Layer for Torch", 12 | detailed = [[ 13 | ]], 14 | homepage = "https://github.com/xcyan/ptnbhwd", 15 | license = "MIT" 16 | } 17 | 18 | 19 | dependencies = { 20 | "torch >= 7.0", 21 | "nn >= 1.0", 22 | } 23 | 24 | build = { 25 | type = "command", 26 | build_command = [[ 27 | cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) 28 | ]], 29 | install_command = "cd build && $(MAKE) install" 30 | } 31 | -------------------------------------------------------------------------------- /init.c: -------------------------------------------------------------------------------- 1 | #include "TH.h" 2 | #include "luaT.h" 3 | 4 | #define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) 5 | #define torch_Tensor TH_CONCAT_STRING_3(torch.,Real,Tensor) 6 | #define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME) 7 | 8 | #include "generic/BilinearSamplerPerspective.c" 9 | //#include "generic/BilinearSamplerBHWD.c" 10 | #include "THGenerateFloatTypes.h" 11 | 12 | LUA_EXTERNC DLL_EXPORT int luaopen_libptn(lua_State *L); 13 | //LUA_EXTERNC DLL_EXPORT int luaopen_libptn_pp(lua_State *L); 14 | 15 | int luaopen_libptn(lua_State *L) 16 | { 17 | lua_newtable(L); 18 | lua_pushvalue(L, -1); 19 | lua_setglobal(L, "ptn"); 20 | 21 | //nn_FloatBilinearSamplerBHWD_init(L); 22 | 23 | //nn_DoubleBilinearSamplerBHWD_init(L); 24 | 25 | nn_FloatBilinearSamplerPerspective_init(L); 26 | 27 | nn_DoubleBilinearSamplerPerspective_init(L); 28 | 29 | return 1; 30 | } 31 | 32 | 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Xinchen Yan, Jimei Yang, Ersin Yumer, Yijie Guo and Honglak Lee 4 | Copyright (c) 2015 qassemoquab 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Perspective Transformer Layer 2 | 3 | This is the torch implementation of the [Perspective Transformer Layer](https://papers.nips.cc/paper/6206-perspective-transformer-nets-learning-single-view-3d-object-reconstruction-without-3d-supervision.pdf), which is built on top of the [STN torch implementation](https://github.com/qassemoquab/stnbhwd). 4 | 5 | ## Build 6 | To build the ptn libriary, run the following script. 7 | ``` 8 | luarocks make ptnbhwd-scm-1.rockspec 9 | ``` 10 | 11 | ## Usage 12 | 13 | ``` lua 14 | require 'ptn' 15 | 16 | nn.PerspectiveGridGenerator(depth, height, width, focal_length) 17 | -- takes B x 4 x 4 affine transform matrices as input, 18 | -- outputs a depth x height x width grid in normalized [dmin,dmax] x [-1,1] x [-1,1] coordinates, where dmin and dmax represent the minimal and maximal disparity. 19 | 20 | nn.BilinearSamplerPerspective() 21 | -- takes a table {inputImages, grids} as inputs 22 | -- outputs the interpolated images according to the grids 23 | -- inputImages is a batch of samples in BHWD layout 24 | -- grids is a batch of grids (output of PerspectiveGridGenerator) 25 | -- output is also BHWD 26 | ``` 27 | 28 | ## Citation 29 | If you find this useful, please cite our work as follows: 30 | ``` 31 | @incollection{NIPS2016_6206, 32 | title = {Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision}, 33 | author = {Yan, Xinchen and Yang, Jimei and Yumer, Ersin and Guo, Yijie and Lee, Honglak}, 34 | booktitle = {Advances in Neural Information Processing Systems 29}, 35 | editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett}, 36 | pages = {1696--1704}, 37 | year = {2016}, 38 | publisher = {Curran Associates, Inc.}, 39 | url = {http://papers.nips.cc/paper/6206-perspective-transformer-nets-learning-single-view-3d-object-reconstruction-without-3d-supervision.pdf} 40 | } 41 | ``` 42 | 43 | 44 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8 FATAL_ERROR) 2 | CMAKE_POLICY(VERSION 2.8) 3 | 4 | SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}") 5 | 6 | FIND_PACKAGE(Torch REQUIRED) 7 | 8 | # Flags 9 | # When using MSVC 10 | IF(MSVC) 11 | # we want to respect the standard, and we are bored of those **** . 12 | ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) 13 | ENDIF(MSVC) 14 | 15 | # OpenMP support? 16 | SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?") 17 | IF (APPLE AND CMAKE_COMPILER_IS_GNUCC) 18 | EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION) 19 | STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION}) 20 | MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}") 21 | IF (DARWIN_VERSION GREATER 9) 22 | SET(APPLE_OPENMP_SUCKS 1) 23 | ENDIF (DARWIN_VERSION GREATER 9) 24 | EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion 25 | OUTPUT_VARIABLE GCC_VERSION) 26 | IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2) 27 | MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)") 28 | MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP") 29 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas") 30 | SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE) 31 | ENDIF () 32 | ENDIF () 33 | 34 | IF (WITH_OPENMP) 35 | FIND_PACKAGE(OpenMP) 36 | IF(OPENMP_FOUND) 37 | MESSAGE(STATUS "Compiling with OpenMP support") 38 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 39 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 40 | SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 41 | ENDIF(OPENMP_FOUND) 42 | ENDIF (WITH_OPENMP) 43 | 44 | LINK_DIRECTORIES("${Torch_INSTALL_LIB}") 45 | 46 | SET(src init.c) 47 | FILE(GLOB luasrc *.lua) 48 | ADD_TORCH_PACKAGE(ptn "${src}" "${luasrc}") 49 | TARGET_LINK_LIBRARIES(ptn luaT TH) 50 | 51 | 52 | FIND_PACKAGE(CUDA 5.5) 53 | 54 | IF (CUDA_FOUND) 55 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_20") 56 | 57 | INCLUDE_DIRECTORIES("${Torch_INSTALL_INCLUDE}/THC") 58 | SET(src-cuda init.cu) 59 | CUDA_ADD_LIBRARY(cuptn MODULE ${src-cuda}) 60 | TARGET_LINK_LIBRARIES(cuptn luaT THC TH) 61 | IF(APPLE) 62 | SET_TARGET_PROPERTIES(cuptn PROPERTIES 63 | LINK_FLAGS "-undefined dynamic_lookup") 64 | ENDIF() 65 | ### Torch packages supposes libraries prefix is "lib" 66 | SET_TARGET_PROPERTIES(cuptn PROPERTIES 67 | PREFIX "lib" 68 | IMPORT_PREFIX "lib") 69 | 70 | INSTALL(TARGETS cuptn 71 | RUNTIME DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}" 72 | LIBRARY DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}") 73 | ENDIF(CUDA_FOUND) 74 | -------------------------------------------------------------------------------- /PerspectiveGridGenerator.lua: -------------------------------------------------------------------------------- 1 | -- code adapted from github repo 2 | -- implemented by Yijie Guo (guoyijie@umich.edu) and Xinchen Yan (skywalkeryxc@gmail.com) 3 | 4 | local PGG, parent = torch.class('nn.PerspectiveGridGenerator', 'nn.Module') 5 | 6 | function PGG:__init(depth,height,width,focal_length) 7 | parent.__init(self) 8 | assert(depth > 1) 9 | assert(height > 1) 10 | assert(width > 1) 11 | self.depth = depth 12 | self.height = height 13 | self.width = width 14 | local dmin = 1/(focal_length + math.sqrt(3)) 15 | local dmax = 1/(focal_length) 16 | print(focal_length) 17 | print(dmin .. ' ' .. dmax) 18 | 19 | --zt = 1, xt, yt [-1, 1] 20 | self.baseGrid = torch.Tensor(depth, height, width, 4) 21 | 22 | for k=1,self.depth do 23 | for i=1,self.height do 24 | for j=1,self.width do 25 | local disf = dmin + (k-1)/(self.depth-1) * (dmax-dmin) 26 | --print(disf) 27 | self.baseGrid[k][i][j][1] = 1/disf 28 | self.baseGrid[k][i][j][2] = (-1 + (i-1)/(self.height-1) * 2)/disf 29 | self.baseGrid[k][i][j][3] = (-1 + (j-1)/(self.width-1) * 2)/disf 30 | self.baseGrid[k][i][j][4] = 1 31 | end 32 | end 33 | end 34 | self.batchGrid = torch.Tensor(1, depth, height, width, 4):copy(self.baseGrid) 35 | end 36 | 37 | local function addOuterDim(t) 38 | local sizes = t:size() 39 | local newsizes = torch.LongStorage(sizes:size()+1) 40 | newsizes[1]=1 41 | for i=1,sizes:size() do 42 | newsizes[i+1]=sizes[i] 43 | end 44 | return t:view(newsizes) 45 | end 46 | 47 | function PGG:updateOutput(_transformMatrix) 48 | local transformMatrix 49 | if _transformMatrix:nDimension()==2 then 50 | transformMatrix = addOuterDim(_transformMatrix) 51 | else 52 | transformMatrix = _transformMatrix 53 | end 54 | assert(transformMatrix:nDimension()==3 55 | and transformMatrix:size(2)==4 56 | and transformMatrix:size(3)==4 57 | , 'please input affine transform matrices (bx4x4)') 58 | local batchsize = transformMatrix:size(1) 59 | 60 | if self.batchGrid:size(1) ~= batchsize then 61 | self.batchGrid:resize(batchsize, self.depth, self.height, self.width, 4) 62 | for i=1,batchsize do 63 | self.batchGrid:select(1,i):copy(self.baseGrid) 64 | end 65 | end 66 | 67 | self.output:resize(batchsize, self.depth, self.height, self.width, 4) 68 | local flattenedBatchGrid = self.batchGrid:view(batchsize, self.depth*self.width*self.height, 4) 69 | local flattenedOutput = self.output:view(batchsize, self.depth*self.width*self.height, 4) 70 | torch.bmm(flattenedOutput, flattenedBatchGrid, transformMatrix:transpose(2,3)) 71 | if _transformMatrix:nDimension()==2 then 72 | self.output = self.output:select(1,1) 73 | end 74 | return self.output 75 | end 76 | 77 | function PGG:updateGradInput(_transformMatrix, _gradGrid) 78 | local transformMatrix, gradGrid 79 | if _transformMatrix:nDimension()==2 then 80 | transformMatrix = addOuterDim(_transformMatrix) 81 | gradGrid = addOuterDim(_gradGrid) 82 | else 83 | transformMatrix = _transformMatrix 84 | gradGrid = _gradGrid 85 | end 86 | 87 | local batchsize = transformMatrix:size(1) 88 | 89 | local flattenedGradGrid = gradGrid:view(batchsize, self.depth*self.width*self.height, 4) 90 | local flattenedBatchGrid = self.batchGrid:view(batchsize, self.depth*self.width*self.height, 4) 91 | self.gradInput:resizeAs(transformMatrix):zero() 92 | self.gradInput:baddbmm(flattenedGradGrid:transpose(2,3), flattenedBatchGrid) ---???? 93 | -- torch.baddbmm doesn't work on cudatensors for some reason 94 | 95 | if _transformMatrix:nDimension()==2 then 96 | self.gradInput = self.gradInput:select(1,1) 97 | end 98 | 99 | return self.gradInput 100 | end 101 | -------------------------------------------------------------------------------- /BilinearSamplerPerspective.lua: -------------------------------------------------------------------------------- 1 | -- code adapted from github 2 | -- implemented by Yijie Guo (guoyijie@umich.edu) and Xinchen Yan (skywalkeryxc@gmail.com) 3 | 4 | local BilinearSamplerPerspective, parent = torch.class('nn.BilinearSamplerPerspective', 'nn.Module') 5 | 6 | --[[ 7 | BilinearSamplerBHWD() : 8 | BilinearSamplerBHWD:updateOutput({inputImages, grids}) 9 | BilinearSamplerBHWD:updateGradInput({inputImages, grids}, gradOutput) 10 | 11 | BilinearSamplerBHWD will perform bilinear sampling of the input images according to the 12 | normalized coordinates provided in the grid. Output will be of same size as the grids, 13 | with as many features as the input images. 14 | 15 | - inputImages has to be in BDHWD layout 16 | 17 | - grids have to be in BDHWD layout, with dim(D)=4 18 | - grids contains, for each sample (first dim), the normalized coordinates of the output wrt the input sample 19 | - Z, Y, X coordinate 20 | - normalized coordinates : (-1,-1, -1) points to front top left, (-1, -1,1) points to front top right 21 | - if the normalized coordinates fall outside of the image, then output will be filled with zeros 22 | ]] 23 | 24 | function BilinearSamplerPerspective:__init(focal_length) 25 | parent.__init(self) 26 | self.gradInput={} 27 | self.focal_length = focal_length 28 | end 29 | 30 | function BilinearSamplerPerspective:check(input, gradOutput) 31 | local inputImages = input[1] 32 | local grids = input[2] 33 | 34 | assert(inputImages:isContiguous(), 'Input images have to be contiguous') 35 | assert(inputImages:nDimension()==5) 36 | assert(grids:nDimension()==5) 37 | assert(inputImages:size(1)==grids:size(1)) -- batch 38 | assert(grids:size(5)==4) -- coordinates 39 | 40 | if gradOutput then 41 | assert(grids:size(1)==gradOutput:size(1)) --batchsize 42 | assert(grids:size(2)==gradOutput:size(2)) --depth == dist 43 | assert(grids:size(3)==gradOutput:size(3)) --height 44 | assert(grids:size(4)==gradOutput:size(4)) --width 45 | end 46 | end 47 | 48 | local function addOuterDim(t) 49 | local sizes = t:size() 50 | local newsizes = torch.LongStorage(sizes:size()+1) 51 | newsizes[1]=1 52 | for i=1,sizes:size() do 53 | newsizes[i+1]=sizes[i] 54 | end 55 | return t:view(newsizes) 56 | end 57 | 58 | function BilinearSamplerPerspective:updateOutput(input) 59 | local _inputImages = input[1] 60 | local _grids = input[2] 61 | --print("D") 62 | --print(_grids) 63 | 64 | local inputImages, grids 65 | if _inputImages:nDimension()==4 then --image:size(4) = channel, image:size(1)=depth, image:size(2) = height, image:size(3)=width 66 | inputImages = addOuterDim(_inputImages) 67 | grids = addOuterDim(_grids) 68 | else 69 | inputImages = _inputImages 70 | grids = _grids 71 | end 72 | 73 | local input = {inputImages, grids} 74 | 75 | self:check(input) 76 | 77 | self.output:resize(inputImages:size(1), grids:size(2), grids:size(3), grids:size(4), inputImages:size(5)) 78 | inputImages.nn.BilinearSamplerPerspective_updateOutput(self, inputImages, grids, self.output, self.focal_length) 79 | 80 | if _inputImages:nDimension()==4 then 81 | self.output=self.output:select(1,1) 82 | end 83 | 84 | return self.output 85 | end 86 | 87 | function BilinearSamplerPerspective:updateGradInput(_input, _gradOutput) 88 | local _inputImages = _input[1] 89 | local _grids = _input[2] 90 | 91 | local inputImages, grids, gradOutput 92 | if _inputImages:nDimension()==4 then 93 | inputImages = addOuterDim(_inputImages) 94 | grids = addOuterDim(_grids) 95 | gradOutput = addOuterDim(_gradOutput) 96 | else 97 | inputImages = _inputImages 98 | grids = _grids 99 | gradOutput = _gradOutput 100 | end 101 | 102 | local input = {inputImages, grids} 103 | 104 | self:check(input, gradOutput) 105 | for i=1,#input do 106 | self.gradInput[i] = self.gradInput[i] or input[1].new() 107 | self.gradInput[i]:resizeAs(input[i]):zero() 108 | end 109 | 110 | local gradInputImages = self.gradInput[1] 111 | local gradGrids = self.gradInput[2] 112 | 113 | inputImages.nn.BilinearSamplerPerspective_updateGradInput(self, inputImages, grids, gradInputImages, gradGrids, gradOutput, self.focal_length) 114 | 115 | if _gradOutput:nDimension()==4 then 116 | self.gradInput[1]=self.gradInput[1]:select(1,1) 117 | self.gradInput[2]=self.gradInput[2]:select(1,1) 118 | end 119 | 120 | return self.gradInput 121 | end 122 | -------------------------------------------------------------------------------- /generic/BilinearSamplerPerspective.c: -------------------------------------------------------------------------------- 1 | #ifndef TH_GENERIC_FILE 2 | #define TH_GENERIC_FILE "generic/BilinearSamplerPerspective.c" 3 | #else 4 | 5 | #include 6 | #include 7 | // code adapted from the github repo 8 | // implemented by Yijie Guo (guoyijie@umich.edu) and Xinchen Yan (skywalkeryxc@gmail.com) 9 | 10 | // Affine Transformation 11 | static int nn_(BilinearSamplerBHWD_updateOutput)(lua_State *L) 12 | { 13 | THTensor *inputImages = luaT_checkudata(L, 2, torch_Tensor); 14 | THTensor *grids = luaT_checkudata(L, 3, torch_Tensor); 15 | THTensor *output = luaT_checkudata(L, 4, torch_Tensor); 16 | 17 | int batchsize = inputImages->size[0]; 18 | int inputImages_depth = inputImages->size[1]; 19 | int inputImages_height = inputImages->size[2]; 20 | int inputImages_width = inputImages->size[3]; 21 | int output_height = output->size[2]; 22 | int output_width = output->size[3]; 23 | int output_depth = output->size[1]; 24 | int inputImages_channels = inputImages->size[4]; 25 | 26 | int output_strideBatch = output->stride[0]; 27 | int output_strideHeight = output->stride[2]; 28 | int output_strideWidth = output->stride[3]; 29 | int output_strideDepth = output->stride[1]; 30 | 31 | int inputImages_strideBatch = inputImages->stride[0]; 32 | int inputImages_strideHeight = inputImages->stride[2]; 33 | int inputImages_strideWidth = inputImages->stride[3]; 34 | int inputImages_strideDepth = inputImages->stride[1]; 35 | 36 | int grids_strideBatch = grids->stride[0]; 37 | int grids_strideHeight = grids->stride[2]; 38 | int grids_strideWidth = grids->stride[3]; 39 | int grids_strideDepth = grids->stride[1]; 40 | 41 | real *inputImages_data, *output_data, *grids_data; 42 | inputImages_data = THTensor_(data)(inputImages); 43 | output_data = THTensor_(data)(output); 44 | grids_data = THTensor_(data)(grids); 45 | 46 | int b, yOut, xOut, zOut; 47 | 48 | for(b=0; b < batchsize; b++) 49 | { 50 | for(zOut = 0; zOut < output_depth; zOut++) 51 | { 52 | for(yOut=0; yOut < output_height; yOut++) 53 | { 54 | for(xOut=0; xOut < output_width; xOut++) 55 | { 56 | //read the grid 57 | real yf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth + 1]; 58 | real xf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth + 2]; 59 | real zf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth]; 60 | 61 | // get the weights for interpolation 62 | int yInTopLeftFront, xInTopLeftFront, zInTopLeftFront; 63 | real yWeightTopLeftFront, xWeightTopLeftFront, zWeightTopLeftFront; 64 | 65 | real xcoord = (xf + 1) * (inputImages_width - 1) / 2; 66 | xInTopLeftFront = floor(xcoord); 67 | xWeightTopLeftFront = 1 - (xcoord - xInTopLeftFront); 68 | 69 | real ycoord = (yf + 1) * (inputImages_height - 1) / 2; 70 | yInTopLeftFront = floor(ycoord); 71 | yWeightTopLeftFront = 1 - (ycoord - yInTopLeftFront); 72 | 73 | real zcoord = (zf + 1) * (inputImages_depth - 1) / 2; 74 | zInTopLeftFront = floor(zcoord); 75 | zWeightTopLeftFront = 1 - (zcoord - zInTopLeftFront); 76 | 77 | const int outAddress = output_strideBatch * b + output_strideHeight * yOut + output_strideWidth * xOut + output_strideDepth * zOut; 78 | const int inTopLeftFrontAddress = inputImages_strideBatch * b + inputImages_strideHeight * yInTopLeftFront 79 | + inputImages_strideWidth * xInTopLeftFront + inputImages_strideDepth * zInTopLeftFront; 80 | 81 | const int inTopLeftBackAddress = inTopLeftFrontAddress + inputImages_strideDepth; 82 | 83 | const int inTopRightFrontAddress = inTopLeftFrontAddress + inputImages_strideWidth; 84 | const int inTopRightBackAddress = inTopRightFrontAddress + inputImages_strideDepth; 85 | 86 | const int inBottomLeftFrontAddress = inTopLeftFrontAddress + inputImages_strideHeight; 87 | const int inBottomLeftBackAddress = inBottomLeftFrontAddress + inputImages_strideDepth; 88 | 89 | const int inBottomRightFrontAddress = inBottomLeftFrontAddress + inputImages_strideWidth; 90 | const int inBottomRightBackAddress = inBottomRightFrontAddress + inputImages_strideDepth; 91 | 92 | real v=0; 93 | real inTopLeftFront=0; 94 | real inTopLeftBack=0; 95 | real inTopRightFront=0; 96 | real inTopRightBack=0; 97 | real inBottomLeftFront=0; 98 | real inBottomLeftBack=0; 99 | real inBottomRightFront=0; 100 | real inBottomRightBack=0; 101 | 102 | // we are careful with the boundaries 103 | bool topLeftFrontIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 104 | && yInTopLeftFront >= 0 && yInTopLeftFront<= inputImages_height-1 105 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth-1); 106 | 107 | bool topLeftBackIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 108 | && yInTopLeftFront >= 0 && yInTopLeftFront<= inputImages_height-1 109 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1<= inputImages_depth-1); 110 | 111 | bool topRightFrontIsIn = xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 112 | && yInTopLeftFront >= 0 && yInTopLeftFront <= inputImages_height-1 113 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth - 1; 114 | 115 | bool topRightBackIsIn = (xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 116 | && yInTopLeftFront >= 0 && yInTopLeftFront <= inputImages_height-1 117 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1 <= inputImages_depth-1); 118 | 119 | bool bottomLeftFrontIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 120 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 121 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth-1); 122 | 123 | bool bottomLeftBackIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 124 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 125 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1 <= inputImages_depth-1); 126 | 127 | bool bottomRightFrontIsIn = (xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 128 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 129 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth-1); 130 | 131 | bool bottomRightBackIsIn = (xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 132 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 133 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1 <= inputImages_depth-1); 134 | 135 | int t; 136 | // interpolation happens here 137 | for(t=0; tsize[0]; 182 | int inputImages_height = inputImages->size[2]; 183 | int inputImages_width = inputImages->size[3]; 184 | int inputImages_depth = inputImages->size[1]; 185 | 186 | int gradOutput_height = gradOutput->size[2]; 187 | int gradOutput_width = gradOutput->size[3]; 188 | int gradOutput_depth = gradOutput->size[1]; 189 | int inputImages_channels = inputImages->size[4]; 190 | 191 | int gradOutput_strideBatch = gradOutput->stride[0]; 192 | int gradOutput_strideHeight = gradOutput->stride[2]; 193 | int gradOutput_strideWidth = gradOutput->stride[3]; 194 | int gradOutput_strideDepth = gradOutput->stride[1]; 195 | 196 | int inputImages_strideBatch = inputImages->stride[0]; 197 | int inputImages_strideHeight = inputImages->stride[2]; 198 | int inputImages_strideWidth = inputImages->stride[3]; 199 | int inputImages_strideDepth = inputImages->stride[1]; 200 | 201 | int gradInputImages_strideBatch = gradInputImages->stride[0]; 202 | int gradInputImages_strideHeight = gradInputImages->stride[2]; 203 | int gradInputImages_strideWidth = gradInputImages->stride[3]; 204 | int gradInputImages_strideDepth = gradInputImages->stride[1]; 205 | 206 | int grids_strideBatch = grids->stride[0]; 207 | int grids_strideHeight = grids->stride[2]; 208 | int grids_strideWidth = grids->stride[3]; 209 | int grids_strideDepth = grids->stride[1]; 210 | 211 | int gradGrids_strideBatch = gradGrids->stride[0]; 212 | int gradGrids_strideHeight = gradGrids->stride[2]; 213 | int gradGrids_strideWidth = gradGrids->stride[3]; 214 | int gradGrids_strideDepth = gradGrids->stride[1]; 215 | 216 | real *inputImages_data, *gradOutput_data, *grids_data, *gradGrids_data, *gradInputImages_data; 217 | inputImages_data = THTensor_(data)(inputImages); 218 | gradOutput_data = THTensor_(data)(gradOutput); 219 | grids_data = THTensor_(data)(grids); 220 | gradGrids_data = THTensor_(data)(gradGrids); 221 | gradInputImages_data = THTensor_(data)(gradInputImages); 222 | 223 | int b, yOut, xOut, zOut; 224 | 225 | for(b=0; b < batchsize; b++) 226 | { 227 | for(zOut=0; zOut < gradOutput_depth; zOut++) 228 | { 229 | for(yOut=0; yOut < gradOutput_height; yOut++) 230 | { 231 | for(xOut=0; xOut < gradOutput_width; xOut++) 232 | { 233 | //read the grid 234 | real yf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth + 1]; 235 | real xf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth + 2]; 236 | real zf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth]; 237 | 238 | // get the weights for interpolation 239 | int yInTopLeftFront, xInTopLeftFront, zInTopLeftFront; 240 | real yWeightTopLeftFront, xWeightTopLeftFront, zWeightTopLeftFront; 241 | 242 | real xcoord = (xf + 1) * (inputImages_width - 1) / 2; 243 | xInTopLeftFront = floor(xcoord); 244 | xWeightTopLeftFront = 1 - (xcoord - xInTopLeftFront); 245 | 246 | real ycoord = (yf + 1) * (inputImages_height - 1) / 2; 247 | yInTopLeftFront = floor(ycoord); 248 | yWeightTopLeftFront = 1 - (ycoord - yInTopLeftFront); 249 | 250 | real zcoord = (zf + 1) * (inputImages_depth - 1) / 2; 251 | zInTopLeftFront = floor(zcoord); 252 | zWeightTopLeftFront = 1 - (zcoord - zInTopLeftFront); 253 | 254 | const int inTopLeftFrontAddress = inputImages_strideBatch * b + inputImages_strideHeight * yInTopLeftFront 255 | + inputImages_strideWidth * xInTopLeftFront + inputImages_strideDepth * zInTopLeftFront; 256 | 257 | const int inTopLeftBackAddress = inTopLeftFrontAddress + inputImages_strideDepth; 258 | 259 | const int inTopRightFrontAddress = inTopLeftFrontAddress + inputImages_strideWidth; 260 | const int inTopRightBackAddress = inTopRightFrontAddress + inputImages_strideDepth; 261 | 262 | const int inBottomLeftFrontAddress = inTopLeftFrontAddress + inputImages_strideHeight; 263 | const int inBottomLeftBackAddress = inBottomLeftFrontAddress + inputImages_strideDepth; 264 | 265 | const int inBottomRightFrontAddress = inBottomLeftFrontAddress + inputImages_strideWidth; 266 | const int inBottomRightBackAddress = inBottomRightFrontAddress + inputImages_strideDepth; 267 | 268 | const int gradInputImagesTopLeftFrontAddress = gradInputImages_strideBatch * b + gradInputImages_strideHeight * yInTopLeftFront 269 | + gradInputImages_strideWidth * xInTopLeftFront + gradInputImages_strideDepth * zInTopLeftFront; 270 | const int gradInputImagesTopLeftBackAddress = gradInputImagesTopLeftFrontAddress + gradInputImages_strideDepth; 271 | 272 | const int gradInputImagesTopRightFrontAddress = gradInputImagesTopLeftFrontAddress + gradInputImages_strideWidth; 273 | const int gradInputImagesTopRightBackAddress = gradInputImagesTopRightFrontAddress + gradInputImages_strideDepth; 274 | 275 | const int gradInputImagesBottomLeftFrontAddress = gradInputImagesTopLeftFrontAddress + gradInputImages_strideHeight; 276 | const int gradInputImagesBottomLeftBackAddress = gradInputImagesBottomLeftFrontAddress +gradInputImages_strideDepth; 277 | 278 | const int gradInputImagesBottomRightFrontAddress = gradInputImagesBottomLeftFrontAddress + gradInputImages_strideWidth; 279 | const int gradInputImagesBottomRightBackAddress = gradInputImagesBottomRightFrontAddress + gradInputImages_strideDepth; 280 | 281 | const int gradOutputAddress = gradOutput_strideBatch * b + gradOutput_strideHeight * yOut 282 | + gradOutput_strideWidth * xOut + gradOutput_strideDepth * zOut; 283 | 284 | real topLeftFrontDotProduct = 0; 285 | real topLeftBackDotProduct = 0; 286 | real topRightFrontDotProduct = 0; 287 | real topRightBackDotProduct = 0; 288 | 289 | real bottomLeftFrontDotProduct = 0; 290 | real bottomLeftBackDotProduct = 0; 291 | real bottomRightFrontDotProduct = 0; 292 | real bottomRightBackDotProduct = 0; 293 | 294 | real v=0; 295 | real inTopLeftFront=0; 296 | real inTopLeftBack=0; 297 | real inTopRightFront=0; 298 | real inTopRightBack=0; 299 | 300 | real inBottomLeftFront=0; 301 | real inBottomLeftBack=0; 302 | real inBottomRightFront=0; 303 | real inBottomRightBack=0; 304 | 305 | // we are careful with the boundaries 306 | bool topLeftFrontIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 307 | && yInTopLeftFront >= 0 && yInTopLeftFront<= inputImages_height-1 308 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth-1); 309 | 310 | bool topLeftBackIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 311 | && yInTopLeftFront >= 0 && yInTopLeftFront<= inputImages_height-1 312 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1<= inputImages_depth-1); 313 | 314 | bool topRightFrontIsIn = (xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 315 | && yInTopLeftFront >= 0 && yInTopLeftFront <= inputImages_height-1 316 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth-1); 317 | 318 | bool topRightBackIsIn = (xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 319 | && yInTopLeftFront >= 0 && yInTopLeftFront <= inputImages_height-1 320 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1 <= inputImages_depth-1); 321 | 322 | bool bottomLeftFrontIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 323 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 324 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth-1); 325 | 326 | bool bottomLeftBackIsIn = (xInTopLeftFront >= 0 && xInTopLeftFront <= inputImages_width-1 327 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 328 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1 <= inputImages_depth-1); 329 | 330 | bool bottomRightFrontIsIn = (xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 331 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 332 | && zInTopLeftFront >=0 && zInTopLeftFront <= inputImages_depth-1); 333 | 334 | bool bottomRightBackIsIn = (xInTopLeftFront+1 >= 0 && xInTopLeftFront+1 <= inputImages_width-1 335 | && yInTopLeftFront+1 >= 0 && yInTopLeftFront+1 <= inputImages_height-1 336 | && zInTopLeftFront+1 >=0 && zInTopLeftFront+1 <= inputImages_depth-1); 337 | int t; 338 | 339 | for(t=0; tsize[0]; 452 | int inputImages_depth = inputImages->size[1]; 453 | int inputImages_height = inputImages->size[2]; 454 | int inputImages_width = inputImages->size[3]; 455 | 456 | int output_dist = output->size[1]; 457 | int output_height = output->size[2]; 458 | int output_width = output->size[3]; 459 | 460 | int inputImages_channels = inputImages->size[4]; 461 | 462 | int output_strideBatch = output->stride[0]; 463 | int output_strideDist = output->stride[1]; 464 | int output_strideHeight = output->stride[2]; 465 | int output_strideWidth = output->stride[3]; 466 | 467 | int inputImages_strideBatch = inputImages->stride[0]; 468 | int inputImages_strideDepth = inputImages->stride[1]; 469 | int inputImages_strideHeight = inputImages->stride[2]; 470 | int inputImages_strideWidth = inputImages->stride[3]; 471 | 472 | int grids_strideBatch = grids->stride[0]; 473 | int grids_strideDepth = grids->stride[1]; 474 | int grids_strideHeight = grids->stride[2]; 475 | int grids_strideWidth = grids->stride[3]; 476 | 477 | real *inputImages_data, *output_data, *grids_data; 478 | inputImages_data = THTensor_(data)(inputImages); 479 | output_data = THTensor_(data)(output); 480 | grids_data = THTensor_(data)(grids); 481 | 482 | int b, yOut, xOut, disOut; 483 | 484 | for(b=0; b < batchsize; b++) 485 | { 486 | for(disOut=0; disOut < output_dist; disOut++) 487 | { 488 | for(yOut=0; yOut < output_height; yOut++) 489 | { 490 | for(xOut=0; xOut < output_width; xOut++) 491 | { 492 | 493 | //read the grid 494 | real zf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth]; 495 | real yf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth + 1]; 496 | real xf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth + 2]; 497 | real disf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth + 3]; 498 | 499 | //printf("%.3f %.3f %.3f\n", zf, yf, xf); 500 | // normalize the coordinates (x^w, y^w, z^w, 1) 501 | //yf = yf / disf; 502 | //xf = xf / disf; 503 | //zf = zf / disf - (focal_length + 0.5); 504 | 505 | // get the weights for interpolation 506 | int zInFrontTopLeft, yInFrontTopLeft, xInFrontTopLeft; 507 | real zWeightFrontTopLeft, yWeightFrontTopLeft, xWeightFrontTopLeft; 508 | 509 | real xcoord = (xf + 1) * (inputImages_width - 1) / 2; 510 | xInFrontTopLeft = floor(xcoord); 511 | xWeightFrontTopLeft = 1 - (xcoord - xInFrontTopLeft); 512 | 513 | real ycoord = (yf + 1) * (inputImages_height - 1) / 2; 514 | yInFrontTopLeft = floor(ycoord); 515 | yWeightFrontTopLeft = 1 - (ycoord - yInFrontTopLeft); 516 | 517 | real zcoord = (zf + 1) * (inputImages_depth - 1) / 2; 518 | zInFrontTopLeft = floor(zcoord); 519 | zWeightFrontTopLeft = 1 - (zcoord - zInFrontTopLeft); 520 | 521 | const int outAddress = output_strideBatch * b + output_strideDist * disOut + output_strideHeight * yOut + output_strideWidth * xOut; 522 | 523 | const int inFrontTopLeftAddress = inputImages_strideBatch * b + inputImages_strideDepth * zInFrontTopLeft + inputImages_strideHeight * yInFrontTopLeft + inputImages_strideWidth * xInFrontTopLeft; 524 | const int inFrontTopRightAddress = inFrontTopLeftAddress + inputImages_strideWidth; 525 | const int inFrontBottomLeftAddress = inFrontTopLeftAddress + inputImages_strideHeight; 526 | const int inFrontBottomRightAddress = inFrontBottomLeftAddress + inputImages_strideWidth; 527 | 528 | const int inBackTopLeftAddress = inFrontTopLeftAddress + inputImages_strideDepth; 529 | const int inBackTopRightAddress = inBackTopLeftAddress + inputImages_strideWidth; 530 | const int inBackBottomLeftAddress = inBackTopLeftAddress + inputImages_strideHeight; 531 | const int inBackBottomRightAddress = inBackBottomLeftAddress + inputImages_strideWidth; 532 | 533 | real v=0; 534 | real inFrontTopLeft=0; 535 | real inFrontTopRight=0; 536 | real inFrontBottomLeft=0; 537 | real inFrontBottomRight=0; 538 | real inBackTopLeft=0; 539 | real inBackTopRight=0; 540 | real inBackBottomLeft=0; 541 | real inBackBottomRight=0; 542 | 543 | // we are careful with the boundaries 544 | bool frontTopLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 545 | bool frontTopRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 546 | bool frontBottomLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 547 | bool frontBottomRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 548 | 549 | bool backTopLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft+1>= 0 && zInFrontTopLeft+1<= inputImages_depth-1; 550 | bool backTopRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft+1 >= 0 && zInFrontTopLeft+1 <= inputImages_depth-1; 551 | bool backBottomLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft+1 >= 0 && zInFrontTopLeft+1 <= inputImages_depth-1; 552 | bool backBottomRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft+1 >= 0 && zInFrontTopLeft+1 <= inputImages_depth-1; 553 | 554 | int t; 555 | // interpolation happens here 556 | for(t=0; tsize[0]; 599 | int inputImages_depth = inputImages->size[1]; 600 | int inputImages_height = inputImages->size[2]; 601 | int inputImages_width = inputImages->size[3]; 602 | 603 | int gradOutput_dist = gradOutput->size[1]; 604 | int gradOutput_height = gradOutput->size[2]; 605 | int gradOutput_width = gradOutput->size[3]; 606 | 607 | int inputImages_channels = inputImages->size[4]; 608 | 609 | int gradOutput_strideBatch = gradOutput->stride[0]; 610 | int gradOutput_strideDist = gradOutput->stride[1]; 611 | int gradOutput_strideHeight = gradOutput->stride[2]; 612 | int gradOutput_strideWidth = gradOutput->stride[3]; 613 | 614 | int inputImages_strideBatch = inputImages->stride[0]; 615 | int inputImages_strideDepth = inputImages->stride[1]; 616 | int inputImages_strideHeight = inputImages->stride[2]; 617 | int inputImages_strideWidth = inputImages->stride[3]; 618 | 619 | int gradInputImages_strideBatch = gradInputImages->stride[0]; 620 | int gradInputImages_strideDepth = gradInputImages->stride[1]; 621 | int gradInputImages_strideHeight = gradInputImages->stride[2]; 622 | int gradInputImages_strideWidth = gradInputImages->stride[3]; 623 | 624 | int grids_strideBatch = grids->stride[0]; 625 | int grids_strideDepth = grids->stride[1]; 626 | int grids_strideHeight = grids->stride[2]; 627 | int grids_strideWidth = grids->stride[3]; 628 | 629 | int gradGrids_strideBatch = gradGrids->stride[0]; 630 | int gradGrids_strideDepth = gradGrids->stride[1]; 631 | int gradGrids_strideHeight = gradGrids->stride[2]; 632 | int gradGrids_strideWidth = gradGrids->stride[3]; 633 | 634 | real *inputImages_data, *gradOutput_data, *grids_data, *gradGrids_data, *gradInputImages_data; 635 | inputImages_data = THTensor_(data)(inputImages); 636 | gradOutput_data = THTensor_(data)(gradOutput); 637 | grids_data = THTensor_(data)(grids); 638 | gradGrids_data = THTensor_(data)(gradGrids); 639 | gradInputImages_data = THTensor_(data)(gradInputImages); 640 | 641 | int b, yOut, xOut, disOut; 642 | 643 | for(b=0; b < batchsize; b++) 644 | { 645 | for(disOut = 0; disOut < gradOutput_dist; disOut++) 646 | { 647 | for(yOut=0; yOut < gradOutput_height; yOut++) 648 | { 649 | for(xOut=0; xOut < gradOutput_width; xOut++) 650 | { 651 | 652 | //read the grid 653 | real zf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth]; 654 | real yf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth + 1]; 655 | real xf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth+ 2]; 656 | real disf = grids_data[b*grids_strideBatch + disOut*grids_strideDepth + yOut*grids_strideHeight + xOut*grids_strideWidth+ 3]; 657 | 658 | // yf = yf / disf; 659 | //xf = xf / disf; 660 | //zf = zf / disf; 661 | 662 | // get the weights for interpolation 663 | int zInFrontTopLeft, yInFrontTopLeft, xInFrontTopLeft; 664 | real zWeightFrontTopLeft, yWeightFrontTopLeft, xWeightFrontTopLeft; 665 | 666 | real xcoord = (xf + 1) * (inputImages_width - 1) / 2; 667 | xInFrontTopLeft = floor(xcoord); 668 | xWeightFrontTopLeft = 1 - (xcoord - xInFrontTopLeft); 669 | 670 | real ycoord = (yf + 1) * (inputImages_height - 1) / 2; 671 | yInFrontTopLeft = floor(ycoord); 672 | yWeightFrontTopLeft = 1 - (ycoord - yInFrontTopLeft); 673 | 674 | real zcoord = (zf + 1) * (inputImages_depth - 1) / 2; 675 | zInFrontTopLeft = floor(zcoord); 676 | zWeightFrontTopLeft = 1 - (zcoord - zInFrontTopLeft); 677 | 678 | const int inFrontTopLeftAddress = inputImages_strideBatch * b + inputImages_strideDepth * zInFrontTopLeft + inputImages_strideHeight * yInFrontTopLeft + inputImages_strideWidth * xInFrontTopLeft; 679 | const int inFrontTopRightAddress = inFrontTopLeftAddress + inputImages_strideWidth; 680 | const int inFrontBottomLeftAddress = inFrontTopLeftAddress + inputImages_strideHeight; 681 | const int inFrontBottomRightAddress = inFrontBottomLeftAddress + inputImages_strideWidth; 682 | 683 | const int inBackTopLeftAddress = inFrontTopLeftAddress + inputImages_strideDepth; 684 | const int inBackTopRightAddress = inBackTopLeftAddress + inputImages_strideWidth; 685 | const int inBackBottomLeftAddress = inBackTopLeftAddress + inputImages_strideHeight; 686 | const int inBackBottomRightAddress = inBackBottomLeftAddress + inputImages_strideWidth; 687 | 688 | const int gradInputImagesFrontTopLeftAddress = gradInputImages_strideBatch * b + gradInputImages_strideDepth * zInFrontTopLeft + gradInputImages_strideHeight * yInFrontTopLeft + gradInputImages_strideWidth * xInFrontTopLeft; 689 | const int gradInputImagesFrontTopRightAddress = gradInputImagesFrontTopLeftAddress + gradInputImages_strideWidth; 690 | const int gradInputImagesFrontBottomLeftAddress = gradInputImagesFrontTopLeftAddress + gradInputImages_strideHeight; 691 | const int gradInputImagesFrontBottomRightAddress = gradInputImagesFrontBottomLeftAddress + gradInputImages_strideWidth; 692 | 693 | const int gradInputImagesBackTopLeftAddress = gradInputImagesFrontTopLeftAddress + gradInputImages_strideDepth; 694 | const int gradInputImagesBackTopRightAddress = gradInputImagesBackTopLeftAddress + gradInputImages_strideWidth; 695 | const int gradInputImagesBackBottomLeftAddress = gradInputImagesBackTopLeftAddress + gradInputImages_strideHeight; 696 | const int gradInputImagesBackBottomRightAddress = gradInputImagesBackBottomLeftAddress + gradInputImages_strideWidth; 697 | 698 | const int gradOutputAddress = gradOutput_strideBatch * b + gradOutput_strideDist * disOut + gradOutput_strideHeight * yOut + gradOutput_strideWidth * xOut; 699 | 700 | real frontTopLeftDotProduct = 0; 701 | real frontTopRightDotProduct = 0; 702 | real frontBottomLeftDotProduct = 0; 703 | real frontBottomRightDotProduct = 0; 704 | real backTopLeftDotProduct = 0; 705 | real backTopRightDotProduct = 0; 706 | real backBottomLeftDotProduct = 0; 707 | real backBottomRightDotProduct = 0; 708 | 709 | real v=0; 710 | real inFrontTopLeft=0; 711 | real inFrontTopRight=0; 712 | real inFrontBottomLeft=0; 713 | real inFrontBottomRight=0; 714 | real inBackTopLeft=0; 715 | real inBackTopRight=0; 716 | real inBackBottomLeft=0; 717 | real inBackBottomRight=0; 718 | 719 | // we are careful with the boundaries 720 | bool frontTopLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 721 | bool frontTopRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 722 | bool frontBottomLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 723 | bool frontBottomRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft >= 0 && zInFrontTopLeft <= inputImages_depth-1; 724 | 725 | bool backTopLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft+1>= 0 && zInFrontTopLeft+1<= inputImages_depth-1; 726 | bool backTopRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft >= 0 && yInFrontTopLeft <= inputImages_height-1 && zInFrontTopLeft+1 >= 0 && zInFrontTopLeft+1 <= inputImages_depth-1; 727 | bool backBottomLeftIsIn = xInFrontTopLeft >= 0 && xInFrontTopLeft <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft+1 >= 0 && zInFrontTopLeft+1 <= inputImages_depth-1; 728 | bool backBottomRightIsIn = xInFrontTopLeft+1 >= 0 && xInFrontTopLeft+1 <= inputImages_width-1 && yInFrontTopLeft+1 >= 0 && yInFrontTopLeft+1 <= inputImages_height-1 && zInFrontTopLeft+1 >= 0 && zInFrontTopLeft+1 <= inputImages_depth-1; 729 | 730 | int t; 731 | 732 | for(t=0; t= lowerBound && value <= upperBound); 26 | } 27 | 28 | __device__ void sumReduceShMem(volatile float s[]) 29 | { 30 | /* obviously only works for 32 elements */ 31 | /* sums up a shared memory array of 32 elements, stores it in s[0] */ 32 | /* whole warp can then read first element (broadcasting) */ 33 | //if(threadIdx.x<32) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+32]; } 34 | if(threadIdx.x<16) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+16] + s[threadIdx.x+32]; } 35 | if(threadIdx.x<8) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+8]; } 36 | if(threadIdx.x<4) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+4]; } 37 | if(threadIdx.x<2) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+2]; } 38 | if(threadIdx.x<1) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+1]; } 39 | } 40 | 41 | __device__ void sumReduceShMemPerspective(volatile float s[]) 42 | { 43 | /* obviously only works for 32 elements */ 44 | /* sums up a shared memory array of 32 elements, stores it in s[0] */ 45 | /* whole warp can then read first element (broadcasting) */ 46 | if(threadIdx.x<16) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+16]; } 47 | if(threadIdx.x<8) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+8]; } 48 | if(threadIdx.x<4) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+4]; } 49 | if(threadIdx.x<2) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+2]; } 50 | if(threadIdx.x<1) { s[threadIdx.x] = s[threadIdx.x] + s[threadIdx.x+1]; } 51 | } 52 | 53 | 54 | // Affine Transformation 55 | 56 | __global__ void bilinearSamplingFromGrid(float* inputImages_data, int inputImages_strideBatch, int inputImages_strideChannels, int inputImages_strideDepth, int inputImages_strideHeight, int inputImages_strideWidth, 57 | float* grids_data, int grids_strideBatch, int grids_strideYX, int grids_strideDepth, int grids_strideHeight, int grids_strideWidth, 58 | float* output_data, int output_strideBatch, int output_strideChannels, int output_strideDepth, int output_strideHeight, int output_strideWidth, 59 | int inputImages_channels, int inputImages_depth, int inputImages_height, int inputImages_width, int output_depth, int output_width) 60 | { 61 | // each (32,16) block 16 output pixels (for coalescing the grid read) 62 | // x,y = coordinates (xOut = blockIdx.x*16+blockDim.y+threadIdx.y) 63 | // z = batch index 64 | // threadIdx.x : used for features (coalescing is trivial) 65 | 66 | const int xOut = blockIdx.x*blockDim.y+threadIdx.y; 67 | const bool withinImageBounds = xOut < output_width; 68 | const bool withinGridBounds = blockIdx.x*blockDim.y + threadIdx.x / 3 < output_width; 69 | const int yOut = blockIdx.y; 70 | const int width = inputImages_width; 71 | const int height = inputImages_height; 72 | const int depth = output_depth; 73 | 74 | const int b = blockIdx.z/depth; 75 | const int zOut = blockIdx.z%depth; 76 | 77 | float zf, yf,xf; 78 | 79 | 80 | __shared__ float gridData[48]; 81 | if (threadIdx.y==0 && withinGridBounds) 82 | { 83 | gridData[threadIdx.x] = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth + threadIdx.x]; 84 | } 85 | __syncthreads(); 86 | if(!withinImageBounds) return; 87 | zf = gridData[threadIdx.y*3]; 88 | yf = gridData[threadIdx.y*3+1]; 89 | xf = gridData[threadIdx.y*3+2]; 90 | //printf("%.3f %.3f %.3f\n", zf, yf, xf); 91 | 92 | int yInTopLeftFront, xInTopLeftFront, zInTopLeftFront; 93 | float yWeightTopLeftFront, xWeightTopLeftFront, zWeightTopLeftFront; 94 | getTopLeftFront(zf, inputImages_depth, zInTopLeftFront, zWeightTopLeftFront); 95 | getTopLeftFront(yf, inputImages_height, yInTopLeftFront, yWeightTopLeftFront); 96 | getTopLeftFront(xf, inputImages_width, xInTopLeftFront, xWeightTopLeftFront); 97 | 98 | //printf("GPU y[%.3f] x[%.3f] z[%.3f] WeightTopLeftFront\n",yWeightTopLeftFront, xWeightTopLeftFront, zWeightTopLeftFront); 99 | // printf("GPU y[%d] x[%d] z[%d] InTopLeftFront\n",yInTopLeftFront, xInTopLeftFront, zInTopLeftFront); 100 | 101 | const int outAddress = output_strideBatch * b + output_strideHeight * yOut + output_strideWidth * xOut + output_strideDepth * zOut; 102 | 103 | const int inTopLeftFrontAddress = inputImages_strideBatch * b + inputImages_strideHeight * yInTopLeftFront 104 | + inputImages_strideWidth * xInTopLeftFront + inputImages_strideDepth * zInTopLeftFront; 105 | 106 | const int inTopLeftBackAddress = inTopLeftFrontAddress + inputImages_strideDepth; 107 | 108 | const int inTopRightFrontAddress = inTopLeftFrontAddress + inputImages_strideWidth; 109 | const int inTopRightBackAddress = inTopRightFrontAddress + inputImages_strideDepth; 110 | 111 | const int inBottomLeftFrontAddress = inTopLeftFrontAddress + inputImages_strideHeight; 112 | const int inBottomLeftBackAddress = inBottomLeftFrontAddress + inputImages_strideDepth; 113 | 114 | const int inBottomRightFrontAddress = inBottomLeftFrontAddress + inputImages_strideWidth; 115 | const int inBottomRightBackAddress = inBottomRightFrontAddress + inputImages_strideDepth; 116 | 117 | float v=0; 118 | float inTopLeftFront=0; 119 | float inTopLeftBack=0; 120 | float inTopRightFront=0; 121 | float inTopRightBack=0; 122 | float inBottomLeftFront=0; 123 | float inBottomLeftBack=0; 124 | float inBottomRightFront=0; 125 | float inBottomRightBack=0; 126 | 127 | bool topLeftFrontIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 128 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 129 | 130 | bool topLeftBackIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 131 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 132 | 133 | bool topRightFrontIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 134 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 135 | 136 | bool topRightBackIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 137 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 138 | 139 | bool bottomLeftFrontIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 140 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 141 | 142 | bool bottomLeftBackIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 143 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 144 | 145 | bool bottomRightFrontIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 146 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 147 | 148 | bool bottomRightBackIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 149 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 150 | 151 | 152 | // interpolation happens here 153 | for(int t=threadIdx.x; tsize[3]+15)/16, output->size[2], output->size[0]*output->size[1]); 191 | dim3 threads(48,16); 192 | 193 | /* assume BHWD */ 194 | bilinearSamplingFromGrid <<< blocks, threads, 0, THCState_getCurrentStream(state) >>> (THCudaTensor_data(state, inputImages), 195 | THCudaTensor_stride(state, inputImages, 0), 196 | THCudaTensor_stride(state, inputImages, 4), 197 | THCudaTensor_stride(state, inputImages, 1), 198 | THCudaTensor_stride(state, inputImages, 2), 199 | THCudaTensor_stride(state, inputImages, 3), 200 | 201 | THCudaTensor_data(state, grids), 202 | THCudaTensor_stride(state, grids, 0), 203 | THCudaTensor_stride(state, grids, 4), 204 | THCudaTensor_stride(state, grids, 1), 205 | THCudaTensor_stride(state, grids, 2), 206 | THCudaTensor_stride(state, grids, 3), 207 | 208 | THCudaTensor_data(state, output), 209 | THCudaTensor_stride(state, output, 0), 210 | THCudaTensor_stride(state, output, 4), 211 | THCudaTensor_stride(state, output, 1), 212 | THCudaTensor_stride(state, output, 2), 213 | THCudaTensor_stride(state, output, 3), 214 | 215 | THCudaTensor_size(state, inputImages, 4), 216 | THCudaTensor_size(state, inputImages, 1), 217 | THCudaTensor_size(state, inputImages, 2), 218 | THCudaTensor_size(state, inputImages, 3), 219 | THCudaTensor_size(state, output, 1), 220 | THCudaTensor_size(state, output, 3)); 221 | 222 | 223 | // check for errors 224 | cudaError_t err = cudaGetLastError(); 225 | if (err != cudaSuccess) { 226 | printf("error in BilinearSampler.updateOutput: %s\n", cudaGetErrorString(err)); 227 | THError("aborting"); 228 | } 229 | //printf("GPU forward end!\n"); 230 | return 1; 231 | } 232 | 233 | template __global__ void backwardBilinearSampling(float* inputImages_data, int inputImages_strideBatch, int inputImages_strideChannels, int inputImages_strideDepth, int inputImages_strideHeight, int inputImages_strideWidth, 234 | float* gradInputImages_data, int gradInputImages_strideBatch, int gradInputImages_strideChannels, int gradInputImages_strideDepth, int gradInputImages_strideHeight, int gradInputImages_strideWidth, 235 | float* grids_data, int grids_strideBatch, int grids_strideYX, int grids_strideDepth, int grids_strideHeight, int grids_strideWidth, 236 | float* gradGrids_data, int gradGrids_strideBatch, int gradGrids_strideYX, int gradGrids_strideDepth, int gradGrids_strideHeight, int gradGrids_strideWidth, 237 | float* gradOutput_data, int gradOutput_strideBatch, int gradOutput_strideChannels, int gradOutput_strideDepth, int gradOutput_strideHeight, int gradOutput_strideWidth, 238 | int inputImages_channels, int inputImages_depth, int inputImages_height, int inputImages_width, int gradOutput_depth, int gradOutput_width) 239 | { 240 | // each (32,16) block 16 output pixels (for coalescing the grid read) 241 | // x,y = coordinates 242 | // z = batch index 243 | // threads : used for features 244 | 245 | const int xOut = blockIdx.x*blockDim.y+threadIdx.y; 246 | const bool withinImageBounds = xOut < gradOutput_width; 247 | const bool withinGridBounds = blockIdx.x*blockDim.y + threadIdx.x / 3 < gradOutput_width; 248 | 249 | const int yOut = blockIdx.y; 250 | const int width = inputImages_width; 251 | const int height = inputImages_height; 252 | 253 | const int depth = gradOutput_depth; 254 | 255 | const int b = blockIdx.z/depth; 256 | const int zOut = blockIdx.z%depth; 257 | 258 | float yf,xf, zf; 259 | 260 | __shared__ float gridData[48]; 261 | if (threadIdx.y==0 && withinGridBounds) 262 | { 263 | gridData[threadIdx.x] = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + zOut*grids_strideDepth + threadIdx.x]; 264 | } 265 | __syncthreads(); 266 | 267 | if(withinImageBounds) 268 | { 269 | zf = gridData[threadIdx.y*3]; 270 | yf = gridData[threadIdx.y*3+1]; 271 | xf = gridData[threadIdx.y*3+2]; 272 | 273 | int yInTopLeftFront, xInTopLeftFront, zInTopLeftFront; 274 | float yWeightTopLeftFront, xWeightTopLeftFront, zWeightTopLeftFront; 275 | getTopLeftFront(zf, inputImages_depth, zInTopLeftFront, zWeightTopLeftFront); 276 | getTopLeftFront(yf, inputImages_height, yInTopLeftFront, yWeightTopLeftFront); 277 | getTopLeftFront(xf, inputImages_width, xInTopLeftFront, xWeightTopLeftFront); 278 | 279 | const int inTopLeftFrontAddress = inputImages_strideBatch * b + inputImages_strideHeight * yInTopLeftFront 280 | + inputImages_strideWidth * xInTopLeftFront + inputImages_strideDepth * zInTopLeftFront; 281 | 282 | const int inTopLeftBackAddress = inTopLeftFrontAddress + inputImages_strideDepth; 283 | 284 | const int inTopRightFrontAddress = inTopLeftFrontAddress + inputImages_strideWidth; 285 | const int inTopRightBackAddress = inTopRightFrontAddress + inputImages_strideDepth; 286 | 287 | const int inBottomLeftFrontAddress = inTopLeftFrontAddress + inputImages_strideHeight; 288 | const int inBottomLeftBackAddress = inBottomLeftFrontAddress + inputImages_strideDepth; 289 | 290 | const int inBottomRightFrontAddress = inBottomLeftFrontAddress + inputImages_strideWidth; 291 | const int inBottomRightBackAddress = inBottomRightFrontAddress + inputImages_strideDepth; 292 | 293 | const int gradInputImagesTopLeftFrontAddress = gradInputImages_strideBatch * b + gradInputImages_strideHeight * yInTopLeftFront + gradInputImages_strideWidth * xInTopLeftFront + gradInputImages_strideDepth * zInTopLeftFront; 294 | const int gradInputImagesTopLeftBackAddress = gradInputImagesTopLeftFrontAddress + gradInputImages_strideDepth; 295 | const int gradInputImagesTopRightFrontAddress = gradInputImagesTopLeftFrontAddress + gradInputImages_strideWidth; 296 | const int gradInputImagesTopRightBackAddress = gradInputImagesTopRightFrontAddress + gradInputImages_strideDepth; 297 | 298 | const int gradInputImagesBottomLeftFrontAddress = gradInputImagesTopLeftFrontAddress + gradInputImages_strideHeight; 299 | const int gradInputImagesBottomLeftBackAddress = gradInputImagesBottomLeftFrontAddress + gradInputImages_strideDepth; 300 | const int gradInputImagesBottomRightFrontAddress = gradInputImagesBottomLeftFrontAddress + gradInputImages_strideWidth; 301 | const int gradInputImagesBottomRightBackAddress = gradInputImagesBottomRightFrontAddress + gradInputImages_strideDepth; 302 | 303 | 304 | const int gradOutputAddress = gradOutput_strideBatch * b + gradOutput_strideHeight * yOut + gradOutput_strideWidth * xOut + gradOutput_strideDepth * zOut; 305 | 306 | float topLeftFrontDotProduct = 0; 307 | float topLeftBackDotProduct = 0; 308 | float topRightFrontDotProduct = 0; 309 | float topRightBackDotProduct = 0; 310 | 311 | float bottomLeftFrontDotProduct = 0; 312 | float bottomLeftBackDotProduct = 0; 313 | float bottomRightFrontDotProduct = 0; 314 | float bottomRightBackDotProduct = 0; 315 | 316 | bool topLeftFrontIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 317 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 318 | bool topLeftBackIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 319 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 320 | 321 | bool topRightFrontIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 322 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 323 | bool topRightBackIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 324 | && between(yInTopLeftFront, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 325 | 326 | bool bottomLeftFrontIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 327 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 328 | bool bottomLeftBackIsIn = between(xInTopLeftFront, 0, inputImages_width-1) 329 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 330 | 331 | bool bottomRightFrontIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 332 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront, 0, inputImages_depth-1); 333 | bool bottomRightBackIsIn = between(xInTopLeftFront+1, 0, inputImages_width-1) 334 | && between(yInTopLeftFront+1, 0, inputImages_height-1) && between(zInTopLeftFront+1, 0, inputImages_depth-1); 335 | 336 | /* 337 | In that loop we accumulate 338 | - gradients into the gradInputImages array with atomic adds 339 | - we compute the dot product that we need for the grid gradient 340 | */ 341 | 342 | for(int t=threadIdx.x; tsize[3]+15)/16, gradOutput->size[2], gradOutput->size[0]*gradOutput->size[1]); 524 | dim3 threads(48,16); 525 | 526 | backwardBilinearSampling <<< blocks, threads, 0, THCState_getCurrentStream(state) >>> ( 527 | THCudaTensor_data(state, inputImages), 528 | THCudaTensor_stride(state, inputImages, 0), 529 | THCudaTensor_stride(state, inputImages, 4), 530 | THCudaTensor_stride(state, inputImages, 1), 531 | THCudaTensor_stride(state, inputImages, 2), 532 | THCudaTensor_stride(state, inputImages, 3), 533 | 534 | THCudaTensor_data(state, gradInputImages), 535 | THCudaTensor_stride(state, gradInputImages, 0), 536 | THCudaTensor_stride(state, gradInputImages, 4), 537 | THCudaTensor_stride(state, gradInputImages, 1), 538 | THCudaTensor_stride(state, gradInputImages, 2), 539 | THCudaTensor_stride(state, gradInputImages, 3), 540 | 541 | THCudaTensor_data(state, grids), 542 | THCudaTensor_stride(state, grids, 0), 543 | THCudaTensor_stride(state, grids, 4), 544 | THCudaTensor_stride(state, grids, 1), 545 | THCudaTensor_stride(state, grids, 2), 546 | THCudaTensor_stride(state, grids, 3), 547 | 548 | THCudaTensor_data(state, gradGrids), 549 | THCudaTensor_stride(state, gradGrids, 0), 550 | THCudaTensor_stride(state, gradGrids, 4), 551 | THCudaTensor_stride(state, gradGrids, 1), 552 | THCudaTensor_stride(state, gradGrids, 2), 553 | THCudaTensor_stride(state, gradGrids, 3), 554 | 555 | THCudaTensor_data(state, gradOutput), 556 | THCudaTensor_stride(state, gradOutput, 0), 557 | THCudaTensor_stride(state, gradOutput, 4), 558 | THCudaTensor_stride(state, gradOutput, 1), 559 | THCudaTensor_stride(state, gradOutput, 2), 560 | THCudaTensor_stride(state, gradOutput, 3), 561 | 562 | THCudaTensor_size(state, inputImages, 4), 563 | THCudaTensor_size(state, inputImages, 1), 564 | THCudaTensor_size(state, inputImages, 2), 565 | THCudaTensor_size(state, inputImages, 3), 566 | THCudaTensor_size(state, gradOutput, 1), 567 | THCudaTensor_size(state, gradOutput, 3)); 568 | 569 | 570 | 571 | // check for errors 572 | cudaError_t err = cudaGetLastError(); 573 | if (err != cudaSuccess) { 574 | printf("error in BilinearSampler.updateGradInput: %s\n", cudaGetErrorString(err)); 575 | THError("aborting"); 576 | } 577 | return 1; 578 | } 579 | 580 | 581 | // Perspective Transformation 582 | 583 | __global__ void bilinearSamplingFromGridPerspective(float* inputImages_data, int inputImages_strideBatch, int inputImages_strideChannels, int inputImages_strideDepth, int inputImages_strideHeight, int inputImages_strideWidth, 584 | float* grids_data, int grids_strideBatch, int grids_strideYX, int grids_strideDepth, int grids_strideHeight, int grids_strideWidth, 585 | float* output_data, int output_strideBatch, int output_strideChannels, int output_strideDist, int output_strideHeight, int output_strideWidth, 586 | int inputImages_channels, int inputImages_depth, int inputImages_height, int inputImages_width, int output_dist, int output_width, float focal_length) 587 | { 588 | // each (32,16) block 16 output pixels (for coalescing the grid read) 589 | // x,y = coordinates (xOut = blockIdx.x*16+blockDim.y+threadIdx.y) 590 | // z = batch index 591 | // threadIdx.x : used for features (coalescing is trivial) 592 | 593 | const int xOut = blockIdx.x*blockDim.y+threadIdx.y; 594 | const bool withinImageBounds = xOut < output_width; 595 | const bool withinGridBounds = blockIdx.x*blockDim.y + threadIdx.x / 4 < output_width; 596 | const int yOut = blockIdx.y; 597 | const int width = inputImages_width; 598 | const int height = inputImages_height; 599 | const int dist = output_dist; 600 | 601 | const int b = blockIdx.z/dist; 602 | const int disOut = blockIdx.z%dist; 603 | 604 | float zf,yf,xf, disf; 605 | 606 | __shared__ float gridData[32]; 607 | if (threadIdx.y==0 && withinGridBounds) 608 | { 609 | gridData[threadIdx.x] = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + disOut*grids_strideDepth + threadIdx.x]; 610 | } 611 | __syncthreads(); 612 | if(!withinImageBounds) return; 613 | zf = gridData[threadIdx.y*4]; 614 | yf = gridData[threadIdx.y*4+1]; 615 | xf = gridData[threadIdx.y*4+2]; 616 | disf = gridData[threadIdx.y*4+3]; 617 | 618 | //yf = yf / disf; 619 | //xf = xf / disf; 620 | //zf = zf / disf - (focal_length + 0.5); 621 | 622 | const int outAddress = output_strideBatch * b + output_strideDist * disOut + output_strideHeight * yOut + output_strideWidth * xOut; 623 | 624 | int zInFrontTopLeft, yInFrontTopLeft, xInFrontTopLeft; 625 | float yWeightFrontTopLeft, xWeightFrontTopLeft, zWeightFrontTopLeft; 626 | getTopLeftFront(zf, inputImages_depth, zInFrontTopLeft, zWeightFrontTopLeft); 627 | getTopLeftFront(yf, inputImages_height, yInFrontTopLeft, yWeightFrontTopLeft); 628 | getTopLeftFront(xf, inputImages_width, xInFrontTopLeft, xWeightFrontTopLeft); 629 | 630 | const int inFrontTopLeftAddress = inputImages_strideBatch * b + inputImages_strideDepth * zInFrontTopLeft + inputImages_strideHeight * yInFrontTopLeft + inputImages_strideWidth * xInFrontTopLeft; 631 | const int inFrontTopRightAddress = inFrontTopLeftAddress + inputImages_strideWidth; 632 | const int inFrontBottomLeftAddress = inFrontTopLeftAddress + inputImages_strideHeight; 633 | const int inFrontBottomRightAddress = inFrontBottomLeftAddress + inputImages_strideWidth; 634 | 635 | const int inBackTopLeftAddress = inFrontTopLeftAddress + inputImages_strideDepth; 636 | const int inBackTopRightAddress = inBackTopLeftAddress + inputImages_strideWidth; 637 | const int inBackBottomLeftAddress = inBackTopLeftAddress + inputImages_strideHeight; 638 | const int inBackBottomRightAddress = inBackBottomLeftAddress + inputImages_strideWidth; 639 | 640 | float v=0; 641 | float inFrontTopLeft=0; 642 | float inFrontTopRight=0; 643 | float inFrontBottomLeft=0; 644 | float inFrontBottomRight=0; 645 | float inBackTopLeft=0; 646 | float inBackTopRight=0; 647 | float inBackBottomLeft=0; 648 | float inBackBottomRight=0; 649 | 650 | bool frontTopLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 651 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 652 | 653 | bool backTopLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 654 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 655 | 656 | bool frontTopRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 657 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 658 | 659 | bool backTopRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 660 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 661 | 662 | bool frontBottomLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 663 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 664 | 665 | bool backBottomLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 666 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 667 | 668 | bool frontBottomRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 669 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 670 | 671 | bool backBottomRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 672 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 673 | 674 | // interpolation happens here 675 | for(int t=threadIdx.x; tsize[3]+7)/8, output->size[2], output->size[0]*output->size[1]); 710 | dim3 threads(32,8); 711 | 712 | /* assume BHWD */ 713 | bilinearSamplingFromGridPerspective <<< blocks, threads, 0, THCState_getCurrentStream(state) >>> (THCudaTensor_data(state, inputImages), 714 | 715 | THCudaTensor_stride(state, inputImages, 0), 716 | THCudaTensor_stride(state, inputImages, 4), 717 | THCudaTensor_stride(state, inputImages, 1), 718 | THCudaTensor_stride(state, inputImages, 2), 719 | THCudaTensor_stride(state, inputImages, 3), 720 | 721 | THCudaTensor_data(state, grids), 722 | THCudaTensor_stride(state, grids, 0), 723 | THCudaTensor_stride(state, grids, 4), 724 | THCudaTensor_stride(state, grids, 1), 725 | THCudaTensor_stride(state, grids, 2), 726 | THCudaTensor_stride(state, grids, 3), 727 | 728 | THCudaTensor_data(state, output), 729 | THCudaTensor_stride(state, output, 0), 730 | THCudaTensor_stride(state, output, 4), 731 | THCudaTensor_stride(state, output, 1), 732 | THCudaTensor_stride(state, output, 2), 733 | THCudaTensor_stride(state, output, 3), 734 | 735 | THCudaTensor_size(state, inputImages, 4), 736 | THCudaTensor_size(state, inputImages, 1), 737 | THCudaTensor_size(state, inputImages, 2), 738 | THCudaTensor_size(state, inputImages, 3), 739 | THCudaTensor_size(state, output, 1), 740 | THCudaTensor_size(state, output, 3), 741 | focal_length); 742 | 743 | // check for errors 744 | cudaError_t err = cudaGetLastError(); 745 | if (err != cudaSuccess) { 746 | printf("error in BilinearSampler.updateOutput: %s\n", cudaGetErrorString(err)); 747 | THError("aborting"); 748 | } 749 | return 1; 750 | } 751 | 752 | 753 | template __global__ void backwardBilinearSamplingPerspective(float* inputImages_data, int inputImages_strideBatch, int inputImages_strideChannels, int inputImages_strideDepth, int inputImages_strideHeight, int inputImages_strideWidth, 754 | float* gradInputImages_data, int gradInputImages_strideBatch, int gradInputImages_strideChannels, int gradInputImages_strideDepth, int gradInputImages_strideHeight, int gradInputImages_strideWidth, 755 | float* grids_data, int grids_strideBatch, int grids_strideYX, int grids_strideDepth, int grids_strideHeight, int grids_strideWidth, 756 | float* gradGrids_data, int gradGrids_strideBatch, int gradGrids_strideYX, int gradGrids_strideDepth, int gradGrids_strideHeight, int gradGrids_strideWidth, 757 | float* gradOutput_data, int gradOutput_strideBatch, int gradOutput_strideChannels, int gradOutput_strideDist, int gradOutput_strideHeight, int gradOutput_strideWidth, 758 | int inputImages_channels, int inputImages_depth, int inputImages_height, int inputImages_width, int gradOutput_dist, int gradOutput_width, float focal_length) 759 | { 760 | // each (32,16) block 16 output pixels (for coalescing the grid read) 761 | // x,y = coordinates 762 | // z = batch index 763 | // threads : used for features 764 | 765 | const int xOut = blockIdx.x*blockDim.y+threadIdx.y; 766 | const bool withinImageBounds = xOut < gradOutput_width; 767 | const bool withinGridBounds = blockIdx.x*blockDim.y + threadIdx.x / 4 < gradOutput_width; 768 | 769 | const int yOut = blockIdx.y; 770 | const int width = inputImages_width; 771 | const int height = inputImages_height; 772 | 773 | const int dist = gradOutput_dist; 774 | 775 | const int b = blockIdx.z/dist; 776 | const int disOut = blockIdx.z%dist; 777 | 778 | float zf,yf,xf, disf; 779 | 780 | __shared__ float gridData[32]; 781 | if (threadIdx.y==0 && withinGridBounds) 782 | { 783 | gridData[threadIdx.x] = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + disOut*grids_strideDepth + threadIdx.x]; 784 | } 785 | __syncthreads(); 786 | 787 | if(withinImageBounds) 788 | { 789 | 790 | zf = gridData[threadIdx.y*4]; 791 | yf = gridData[threadIdx.y*4+1]; 792 | xf = gridData[threadIdx.y*4+2]; 793 | disf = gridData[threadIdx.y*4+3]; 794 | 795 | //yf = yf / disf; 796 | //xf = xf / disf; 797 | //zf = zf / disf - (focal_length + 0.5); 798 | 799 | 800 | int zInFrontTopLeft, yInFrontTopLeft, xInFrontTopLeft; 801 | float yWeightFrontTopLeft, xWeightFrontTopLeft, zWeightFrontTopLeft; 802 | getTopLeftFront(zf, inputImages_depth, zInFrontTopLeft, zWeightFrontTopLeft); 803 | getTopLeftFront(yf, inputImages_height, yInFrontTopLeft, yWeightFrontTopLeft); 804 | getTopLeftFront(xf, inputImages_width, xInFrontTopLeft, xWeightFrontTopLeft); 805 | 806 | const int inFrontTopLeftAddress = inputImages_strideBatch * b + inputImages_strideDepth * zInFrontTopLeft + inputImages_strideHeight * yInFrontTopLeft + inputImages_strideWidth * xInFrontTopLeft; 807 | const int inFrontTopRightAddress = inFrontTopLeftAddress + inputImages_strideWidth; 808 | const int inFrontBottomLeftAddress = inFrontTopLeftAddress + inputImages_strideHeight; 809 | const int inFrontBottomRightAddress = inFrontBottomLeftAddress + inputImages_strideWidth; 810 | 811 | const int inBackTopLeftAddress = inFrontTopLeftAddress + inputImages_strideDepth; 812 | const int inBackTopRightAddress = inBackTopLeftAddress + inputImages_strideWidth; 813 | const int inBackBottomLeftAddress = inBackTopLeftAddress + inputImages_strideHeight; 814 | const int inBackBottomRightAddress = inBackBottomLeftAddress + inputImages_strideWidth; 815 | 816 | 817 | const int gradInputImagesFrontTopLeftAddress = gradInputImages_strideBatch * b + gradInputImages_strideDepth * zInFrontTopLeft + gradInputImages_strideHeight * yInFrontTopLeft + gradInputImages_strideWidth *xInFrontTopLeft; 818 | const int gradInputImagesFrontTopRightAddress = gradInputImagesFrontTopLeftAddress + gradInputImages_strideWidth; 819 | const int gradInputImagesFrontBottomLeftAddress = gradInputImagesFrontTopLeftAddress + gradInputImages_strideHeight; 820 | const int gradInputImagesFrontBottomRightAddress = gradInputImagesFrontBottomLeftAddress + gradInputImages_strideWidth; 821 | 822 | const int gradInputImagesBackTopLeftAddress = gradInputImagesFrontTopLeftAddress + gradInputImages_strideDepth; 823 | const int gradInputImagesBackTopRightAddress = gradInputImagesBackTopLeftAddress + gradInputImages_strideWidth; 824 | const int gradInputImagesBackBottomLeftAddress = gradInputImagesBackTopLeftAddress + gradInputImages_strideHeight; 825 | const int gradInputImagesBackBottomRightAddress = gradInputImagesBackBottomLeftAddress + gradInputImages_strideWidth; 826 | 827 | const int gradOutputAddress = gradOutput_strideBatch * b + gradOutput_strideDist * disOut + gradOutput_strideHeight * yOut + gradOutput_strideWidth * xOut; 828 | 829 | 830 | float frontTopLeftDotProduct = 0; 831 | float frontTopRightDotProduct = 0; 832 | float frontBottomLeftDotProduct = 0; 833 | float frontBottomRightDotProduct = 0; 834 | float backTopLeftDotProduct = 0; 835 | float backTopRightDotProduct = 0; 836 | float backBottomLeftDotProduct = 0; 837 | float backBottomRightDotProduct = 0; 838 | 839 | bool frontTopLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 840 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 841 | 842 | bool backTopLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 843 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 844 | 845 | bool frontTopRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 846 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 847 | 848 | bool backTopRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 849 | && between(yInFrontTopLeft, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 850 | 851 | bool frontBottomLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 852 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 853 | 854 | bool backBottomLeftIsIn = between(xInFrontTopLeft, 0, inputImages_width-1) 855 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 856 | 857 | bool frontBottomRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 858 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft, 0, inputImages_depth-1); 859 | 860 | bool backBottomRightIsIn = between(xInFrontTopLeft+1, 0, inputImages_width-1) 861 | && between(yInFrontTopLeft+1, 0, inputImages_height-1) && between(zInFrontTopLeft+1, 0, inputImages_depth-1); 862 | 863 | /* 864 | In that loop we accumulate 865 | - gradients into the gradInputImages array with atomic adds 866 | - we compute the dot product that we need for the grid gradient 867 | */ 868 | 869 | for(int t=threadIdx.x; tsize[3]+7)/8, gradOutput->size[2], gradOutput->size[0]*gradOutput->size[1]); 1030 | dim3 threads(32,8); 1031 | 1032 | backwardBilinearSamplingPerspective <<< blocks, threads, 0, THCState_getCurrentStream(state) >>> ( 1033 | 1034 | THCudaTensor_data(state, inputImages), 1035 | THCudaTensor_stride(state, inputImages, 0), 1036 | THCudaTensor_stride(state, inputImages, 4), 1037 | THCudaTensor_stride(state, inputImages, 1), 1038 | THCudaTensor_stride(state, inputImages, 2), 1039 | THCudaTensor_stride(state, inputImages, 3), 1040 | 1041 | THCudaTensor_data(state, gradInputImages), 1042 | THCudaTensor_stride(state, gradInputImages, 0), 1043 | THCudaTensor_stride(state, gradInputImages, 4), 1044 | THCudaTensor_stride(state, gradInputImages, 1), 1045 | THCudaTensor_stride(state, gradInputImages, 2), 1046 | THCudaTensor_stride(state, gradInputImages, 3), 1047 | 1048 | THCudaTensor_data(state, grids), 1049 | THCudaTensor_stride(state, grids, 0), 1050 | THCudaTensor_stride(state, grids, 4), 1051 | THCudaTensor_stride(state, grids, 1), 1052 | THCudaTensor_stride(state, grids, 2), 1053 | THCudaTensor_stride(state, grids, 3), 1054 | 1055 | THCudaTensor_data(state, gradGrids), 1056 | THCudaTensor_stride(state, gradGrids, 0), 1057 | THCudaTensor_stride(state, gradGrids, 4), 1058 | THCudaTensor_stride(state, gradGrids, 1), 1059 | THCudaTensor_stride(state, gradGrids, 2), 1060 | THCudaTensor_stride(state, gradGrids, 3), 1061 | 1062 | THCudaTensor_data(state, gradOutput), 1063 | THCudaTensor_stride(state, gradOutput, 0), 1064 | THCudaTensor_stride(state, gradOutput, 4), 1065 | THCudaTensor_stride(state, gradOutput, 1), 1066 | THCudaTensor_stride(state, gradOutput, 2), 1067 | THCudaTensor_stride(state, gradOutput, 3), 1068 | 1069 | THCudaTensor_size(state, inputImages, 4), 1070 | THCudaTensor_size(state, inputImages, 1), 1071 | THCudaTensor_size(state, inputImages, 2), 1072 | THCudaTensor_size(state, inputImages, 3), 1073 | THCudaTensor_size(state, gradOutput, 1), 1074 | THCudaTensor_size(state, gradOutput, 3), 1075 | focal_length); 1076 | 1077 | // check for errors 1078 | cudaError_t err = cudaGetLastError(); 1079 | if (err != cudaSuccess) { 1080 | printf("error in BilinearSampler.updateGradInput: %s\n", cudaGetErrorString(err)); 1081 | THError("aborting"); 1082 | } 1083 | return 1; 1084 | } 1085 | 1086 | static const struct luaL_Reg cunn_BilinearSamplerPerspective__ [] = { 1087 | {"BilinearSamplerBHWD_updateOutput", cunn_BilinearSamplerBHWD_updateOutput}, 1088 | {"BilinearSamplerBHWD_updateGradInput", cunn_BilinearSamplerBHWD_updateGradInput}, 1089 | {"BilinearSamplerPerspective_updateOutput", cunn_BilinearSamplerPerspective_updateOutput}, 1090 | {"BilinearSamplerPerspective_updateGradInput", cunn_BilinearSamplerPerspective_updateGradInput}, 1091 | //{"BilinearSamplerPerspective_updateGradInputOnlyGrid", cunn_BilinearSamplerBHWD_updateGradInputOnlyGrid}, 1092 | {NULL, NULL} 1093 | }; 1094 | 1095 | static void cunn_BilinearSamplerPerspective_init(lua_State *L) 1096 | { 1097 | luaT_pushmetatable(L, "torch.CudaTensor"); 1098 | luaT_registeratname(L, cunn_BilinearSamplerPerspective__, "nn"); 1099 | lua_pop(L,1); 1100 | } 1101 | --------------------------------------------------------------------------------