├── LICENSE ├── README.md └── src ├── common ├── KITTI_params.lua ├── NYU_params.lua └── file_utils.py ├── experiment_KITTI ├── DataLoader.lua ├── DataLoader_NYU_Full.lua ├── DataLoader_multi_res.lua ├── DataPointer.lua ├── criterion │ ├── absolute_depth_negative_cos.lua │ ├── normal_l2.lua │ ├── normal_neg_loss_fast.lua │ ├── normal_negative_cos.lua │ ├── normal_negative_cos_cpu.lua │ ├── normal_negative_cos_weighted.lua │ ├── relative_depth.lua │ ├── relative_depth_cpu.lua │ ├── relative_depth_margin.lua │ ├── relative_depth_margin_cpu.lua │ ├── relative_depth_margin_log_negative_cos.lua │ ├── relative_depth_margin_log_negative_cos_multi_res.lua │ ├── relative_depth_margin_log_normal_depth.lua │ ├── relative_depth_margin_log_normal_depth_multi_res.lua │ ├── relative_depth_margin_negative_cos.lua │ ├── relative_depth_margin_negative_cos_var.lua │ ├── relative_depth_margin_negative_cos_var_cpu.lua │ ├── relative_depth_margin_normal_depth.lua │ ├── relative_depth_negative_cos.lua │ ├── relative_depth_negative_cos_cpu.lua │ ├── relative_depth_negative_cos_focal.lua │ ├── relative_depth_negative_cos_focal_cpu.lua │ └── scale_inv_depth_loss.lua ├── load_data.lua ├── main.lua ├── measure.lua ├── models │ ├── get_theoretical_depth_from_normal_v2.lua │ ├── get_theoretical_depth_from_normal_v2_multi_res.lua │ ├── hourglass3.lua │ ├── hourglass3_softplus.lua │ ├── hourglass3_softplus_absolute_depth.lua │ ├── hourglass3_softplus_direct_normal_neg_cos.lua │ ├── hourglass3_softplus_margin.lua │ ├── hourglass3_softplus_margin_depth_from_normal.lua │ ├── hourglass3_softplus_margin_log.lua │ ├── hourglass3_softplus_margin_log_depth_from_normal.lua │ ├── hourglass3_softplus_margin_var.lua │ ├── img_coord_to_world_coord.lua │ ├── img_coord_to_world_coord_multi_res.lua │ ├── layers │ │ ├── Residual.lua │ │ └── inception_new.lua │ └── world_coord_to_normal.lua ├── test_model_on_KITTI.lua ├── validate.lua └── validation_crit │ ├── validate_crit1.lua │ └── validate_crit_NULL.lua └── experiment_NYU ├── DataLoader.lua ├── DataLoader_NYU_Full.lua ├── DataLoader_SNOW.lua ├── DataLoader_multi_res.lua ├── DataPointer.lua ├── criterion ├── absolute_depth_negative_cos.lua ├── depth_var_loss.lua ├── normal_l2.lua ├── normal_neg_loss_fast.lua ├── normal_negative_cos.lua ├── normal_negative_cos_cpu.lua ├── normal_negative_cos_weighted.lua ├── relative_depth.lua ├── relative_depth_cpu.lua ├── relative_depth_margin.lua ├── relative_depth_margin_cpu.lua ├── relative_depth_margin_log_negative_cos.lua ├── relative_depth_margin_log_negative_cos_multi_res.lua ├── relative_depth_margin_log_normal_depth.lua ├── relative_depth_margin_log_normal_depth_multi_res.lua ├── relative_depth_margin_negative_cos.lua ├── relative_depth_margin_negative_cos_var.lua ├── relative_depth_margin_negative_cos_var_cpu.lua ├── relative_depth_margin_normal_depth.lua ├── relative_depth_negative_cos.lua ├── relative_depth_negative_cos_cpu.lua └── scale_inv_depth_loss.lua ├── header.lua ├── hg.lua ├── load_data.lua ├── main.lua ├── measure.lua ├── models ├── get_theoretical_depth_from_normal_v2.lua ├── get_theoretical_depth_from_normal_v2_multi_res.lua ├── hourglass3.lua ├── hourglass3_softplus.lua ├── hourglass3_softplus_absolute_depth.lua ├── hourglass3_softplus_direct_normal_l2.lua ├── hourglass3_softplus_direct_normal_l2_SNOW.lua ├── hourglass3_softplus_direct_normal_neg_cos.lua ├── hourglass3_softplus_direct_normal_neg_cos_SNOW.lua ├── hourglass3_softplus_margin.lua ├── hourglass3_softplus_margin_depth_from_normal.lua ├── hourglass3_softplus_margin_log.lua ├── hourglass3_softplus_margin_log_depth_from_normal.lua ├── hourglass3_softplus_margin_log_depth_from_normal_multi_res.lua ├── hourglass3_softplus_margin_log_multi_res.lua ├── hourglass3_softplus_margin_var.lua ├── img_coord_to_world_coord.lua ├── img_coord_to_world_coord_focal.lua ├── img_coord_to_world_coord_multi_res.lua ├── layers │ ├── Residual.lua │ └── inception_new.lua ├── normal_neg_loss_fast.lua └── world_coord_to_normal.lua ├── test_model_on_NYU_NO_CROP.lua ├── test_model_on_SNOW.lua ├── validate.lua └── validation_crit ├── validate_crit1.lua ├── validate_crit_NULL.lua ├── validate_crit_SNOW.lua ├── validate_crit_SNOW_DIW.lua ├── validate_crit_direct_normal.lua └── validate_crit_direct_normal_SNOW.lua /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, University of Michigan 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Surface Normals in the Wild 2 | 3 | Code for reproducing the results in the following paper: 4 | 5 | 6 | Surface Normals in the Wild, 7 | Weifeng Chen, Donglai Xiang, Jia Deng 8 | International Conference on Computer Vision (ICCV), 2017. 9 | 10 | 11 | Please check out the [project site](http://www-personal.umich.edu/~wfchen/surface-normals-in-the-wild/) for more details. 12 | 13 | 14 | # Setup 15 | 16 | 1. Install the Torch 7 framework as described in http://torch.ch/docs/getting-started.html#_. Please make sure that you have the `cudnn`, `hdf5`, 'mattorch' and `csvigo` modules installed. 17 | 18 | 2. Clone this repo. 19 | 20 | https://github.com/wfchen-umich/surface_normals.git 21 | 22 | 23 | # Evaluating on pre-trained models 24 | 25 | ## Setup 26 | 27 | Please first download the [data files](https://drive.google.com/open?id=0B02I7-1fYj-ceXI4cGlSNDBPcm8) and [pre-trained models](https://drive.google.com/open?id=0B02I7-1fYj-cdmsza01XQ2pIV28) into the `surface_normals` folder. Download the SNOW dataset from the [project site](http://www-personal.umich.edu/~wfchen/surface-normals-in-the-wild/). 28 | 29 | Untar `data.tar.gz` into `surface_normals`. Untar `results.tar.gz` into `surface_normals/src`. Untar `SNOW_Toolkit.tar.gz` into `surface_normals/data`. Untar `SNOW_images.tar.gz` into `surface_normals/data/SNOW_Toolkit`. 30 | 31 | 32 | 33 | ## NYU Experiments 34 | 35 | Change directory into `/surface_normals/src/experiment_NYU`. 36 | 37 | 38 | ### NYU Subset 39 | 40 | To evaluate the pre-trained models ( trained on the NYU labeled training subset), run the following commands: 41 | 42 | * d_n_al: 43 | 44 | th test_model_on_NYU_NO_CROP.lua -num_iter 1000 -prev_model_file ../results/hourglass3_softplus_margin_log/wn1_n5000_d800/model_period2_100000.t7 -test_set 654_NYU_MITpaper_test_imgs_orig_size_points.csv -mode test 45 | 46 | * d_n_dl: 47 | 48 | th test_model_on_NYU_NO_CROP.lua -num_iter 1000 -prev_model_file ../results/hourglass3_softplus_margin_log_depth_from_normal/wn100_n5000_d800/model_period2_100000.t7 -test_set 654_NYU_MITpaper_test_imgs_orig_size_points.csv -mode test 49 | 50 | 51 | ### NYU Full 52 | 53 | To evaluate the pre-trained models ( trained on the full NYU labeled training subset), run the following commands: 54 | 55 | * d_n_al_F: 56 | 57 | th test_model_on_NYU_NO_CROP.lua -num_iter 1000 -prev_model_file ../results/hourglass3_softplus_margin_log/wn1_n5000_d10000_fullNYU/model_period3_100000.t7 -test_set 654_NYU_MITpaper_test_imgs_orig_size_points.csv -mode test 58 | 59 | * d_n_dl_F: 60 | 61 | th test_model_on_NYU_NO_CROP.lua -num_iter 1000 -prev_model_file ../results/hourglass3_softplus_margin_log_depth_from_normal/wn100_n5000_d10000_fullNYU/model_period3_90000.t7 -test_set 654_NYU_MITpaper_test_imgs_orig_size_points.csv -mode test 62 | 63 | 64 | ## SNOW Experiments 65 | 66 | Normals from Predicted Depth: 67 | 68 | * d_n_al_F_SNOW 69 | 70 | th test_model_on_SNOW.lua -num_iter 100000 -prev_model_file ../results/hourglass3_softplus_margin_log/SNOW12_from_n5000_d10000_1e-4/model_period3_100000.t7 -mode test 71 | 72 | 73 | ## KITTI Experiments 74 | 75 | Change directory into `/surface_normals/src/experiment_KITTI`. Run the following commands: 76 | 77 | * d: 78 | 79 | th test_model_on_KITTI.lua -num_iter 1000 -prev_model_file ../results/hourglass3_softplus_margin_log_depth_from_normal/KITTI_1e-4_n0_run2_1e-5/model_period10_200000.t7 -test_set eigen_test_files_combine.csv -mode test 80 | 81 | * d_n_al: 82 | 83 | th test_model_on_KITTI.lua -num_iter 1000 -prev_model_file ../results/hourglass3_softplus_margin_log/KITTI_1e-4_d5000_n5000_run2_1e-5/model_period7_150000.t7 -test_set eigen_test_files_combine.csv -mode test 84 | 85 | * d_n_dl: 86 | 87 | th test_model_on_KITTI.lua -num_iter 1000 -prev_model_file ../results/hourglass3_softplus_margin_log_depth_from_normal/KITTI_1e-4_n5000_run2_1e-5/model_period7_160000.t7 -test_set eigen_test_files_combine.csv -mode test 88 | -------------------------------------------------------------------------------- /src/common/KITTI_params.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require 'cunn' 3 | 4 | g_input_width = 416 5 | g_input_height = 128 6 | 7 | g_fx_rgb = 246.2849; 8 | g_fy_rgb = -241.6745; 9 | g_cx_rgb = 208.0629; 10 | g_cy_rgb = 57.8963; 11 | 12 | 13 | 14 | -- K = [ 15 | -- 246.2849 0 208.0629 16 | -- 0 241.6745 57.8963 17 | -- 0 0 1.0000 18 | -- ] -------------------------------------------------------------------------------- /src/common/NYU_params.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require 'cunn' 3 | 4 | g_input_width = 320 5 | g_input_height = 240 6 | 7 | g_fx_rgb = 5.1885790117450188e+02 / 2; 8 | g_fy_rgb = -5.1946961112127485e+02 / 2; 9 | g_cx_rgb = 3.2558244941119034e+02 / 2; 10 | g_cy_rgb = 2.5373616633400465e+02 / 2; 11 | -------------------------------------------------------------------------------- /src/common/file_utils.py: -------------------------------------------------------------------------------- 1 | # File utils 2 | # ydawei@umich.edu 3 | 4 | import os 5 | import pickle 6 | 7 | 8 | def exec_print(cmd): 9 | print(cmd) 10 | os.system(cmd) 11 | 12 | 13 | def list_all_files_w_ext(folder, ext, recursive=False, cache=False): 14 | FILENAME_CACHE_DIR = 'cache_filename/' 15 | FILENAME_CACHE = FILENAME_CACHE_DIR + folder.replace('/', '_') 16 | if recursive: 17 | FILENAME_CACHE += '-R' 18 | FILENAME_CACHE += ext + '.bin' 19 | 20 | if cache: 21 | try: 22 | with open(FILENAME_CACHE, 'rb') as f: 23 | return pickle.load(f) 24 | except: 25 | pass 26 | 27 | filenames = [] 28 | if recursive: 29 | for root, ignored, fnames in os.walk(folder): 30 | for fname in fnames: 31 | if fname.endswith(ext): 32 | filenames.append(os.path.join(root, fname)) 33 | else: 34 | for fname in os.listdir(folder): 35 | if fname.endswith(ext): 36 | filenames.append(os.path.join(folder,fname)) 37 | 38 | if cache: 39 | if not os.path.exists(FILENAME_CACHE_DIR): 40 | os.makedirs(FILENAME_CACHE_DIR) 41 | with open(FILENAME_CACHE, 'wb') as f: 42 | pickle.dump(filenames, f, True) 43 | 44 | return filenames 45 | 46 | 47 | def load_everything_from(module_name): 48 | g = globals() 49 | m = __import__(module_name) 50 | names = getattr(m, '__all__', None) 51 | if names is None: 52 | names = [name for name in dir(m) if not name.startswith('_')] 53 | for name in names: 54 | g[name] = getattr(m, name) 55 | -------------------------------------------------------------------------------- /src/experiment_KITTI/DataPointer.lua: -------------------------------------------------------------------------------- 1 | require 'xlua' 2 | local DataPointer = torch.class('DataPointer') 3 | 4 | function DataPointer:__init(n_total) 5 | self.n_total = n_total 6 | if self.n_total > 0 then 7 | self.idx_perm = torch.randperm(self.n_total) 8 | self.current_pos = 1 9 | else 10 | self.idx_perm = nil 11 | self.current_pos = nil 12 | end 13 | end 14 | 15 | 16 | function DataPointer:load_next_batch(batch_size) 17 | if self.n_total <= 0 then 18 | return nil 19 | end 20 | 21 | if batch_size == 0 then 22 | return nil 23 | end 24 | 25 | -- get indices 26 | local indices = torch.Tensor() 27 | if batch_size + self.current_pos - 1 <= self.n_total then 28 | indices = self.idx_perm:narrow(1, self.current_pos, batch_size) 29 | else 30 | local rest = batch_size + self.current_pos - 1 - self.n_total 31 | 32 | local part1 = self.idx_perm:narrow(1, self.current_pos, (self.n_total - self.current_pos + 1) ) 33 | local part2 = self.idx_perm:narrow(1, 1, rest) 34 | indices = torch.cat(part1, part2) 35 | end 36 | 37 | 38 | -- update pointer 39 | self.current_pos = self.current_pos + batch_size 40 | if self.current_pos >= self.n_total then 41 | -- reset to the initial position 42 | self.current_pos = 1 43 | 44 | -- reshuffle the images 45 | self.idx_perm = torch.randperm(self.n_total); 46 | end 47 | 48 | return indices 49 | end 50 | 51 | -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/absolute_depth_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './normal_negative_cos' 10 | 11 | local absolute_depth_negative_cos, parent = torch.class('nn.absolute_depth_negative_cos', 'nn.Criterion') 12 | 13 | function absolute_depth_negative_cos:__init(w_normal) 14 | print(string.format(">>>>>>>>>>>>>>>>>>>>>>Criterion: absolute_depth_negative_cos() ")) 15 | parent.__init(self) 16 | self.depth_crit = nn.MSECriterion():cuda() 17 | self.normal_crit = nn.normal_negative_cos() 18 | self.depth_to_normal = nn.Sequential() 19 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 20 | self.depth_to_normal:add(world_coord_to_normal()) 21 | self.depth_to_normal = self.depth_to_normal:cuda() 22 | self.w_normal = w_normal 23 | 24 | self.__loss_normal = 0 25 | self.__loss_absolute_depth = 0 26 | end 27 | 28 | function absolute_depth_negative_cos:updateOutput(input, target) 29 | -- the input is tensor taht represents the depth map 30 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 31 | local n_depth = target[1].n_sample 32 | local n_normal = target[2].n_sample 33 | 34 | self.output = 0 35 | self.__loss_absolute_depth = 0 36 | self.__loss_normal = 0 37 | 38 | if n_depth > 0 then 39 | self.__loss_absolute_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1].full_metric_depth) -- to test 40 | self.output = self.output + self.__loss_absolute_depth 41 | end 42 | if n_normal > 0 then -- to test 43 | -- first go through the depth->normal transormation: ---- to test 44 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 45 | -- then go through the criterion 46 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 47 | self.output = self.output + self.__loss_normal 48 | end 49 | 50 | return self.output 51 | end 52 | 53 | function absolute_depth_negative_cos:updateGradInput(input, target) 54 | -- the input is tensor taht represents the depth map 55 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 56 | 57 | -- pre-allocate memory and reset gradient to 0 58 | if self.gradInput then 59 | local nElement = self.gradInput:nElement() 60 | if self.gradInput:type() ~= input:type() then 61 | self.gradInput = self.gradInput:typeAs(input); 62 | end 63 | self.gradInput:resizeAs(input) 64 | end 65 | 66 | local n_depth = target[1].n_sample 67 | local n_normal = target[2].n_sample 68 | 69 | assert( torch.type(target) == 'table' ); 70 | 71 | if n_depth > 0 then 72 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1].full_metric_depth)) -- to test 73 | end 74 | if n_normal > 0 then -- to test 75 | -- then go through the criterion 76 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 77 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 78 | end 79 | 80 | return self.gradInput 81 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/normal_l2.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_l2, parent = torch.class('nn.normal_l2', 'nn.Criterion') 4 | 5 | 6 | function normal_l2:__init() 7 | print(">>>>>>>>>>>>>>>>> normal loss = normal l2 loss") 8 | parent.__init(self) 9 | self.buffer = torch.Tensor() 10 | end 11 | 12 | 13 | 14 | function normal_l2:updateOutput(input, target) 15 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 16 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 17 | 18 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 19 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 20 | 21 | -- the loss is the negative cos(angle) 22 | self.output = 0 23 | local n_point_total = 0 24 | local cpu_input = input 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | n_point_total = n_point_total + target[batch_idx].n_point 28 | 29 | local x_arr = target[batch_idx].x -- to check: the length of x vary with each sample!!!!! 30 | local y_arr = target[batch_idx].y 31 | 32 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 33 | 34 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 35 | local ground_truth_arr = target[batch_idx].normal 36 | 37 | self.output = self.output + torch.sum( torch.pow(torch.csub(normal_arr, ground_truth_arr),2) ) -- dot product of normals , seems quite expensive move 38 | end 39 | 40 | return self.output / n_point_total 41 | end 42 | 43 | 44 | 45 | function normal_l2:updateGradInput(input, target) 46 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 47 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 48 | 49 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 50 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 51 | 52 | -- the loss is the negative cos(angle) 53 | 54 | -- only accept one single point!!!! 55 | 56 | -- pre-allocate memory and reset gradient to 0 57 | if self.gradInput then 58 | local nElement = self.gradInput:nElement() 59 | if self.gradInput:type() ~= input:type() then 60 | self.gradInput = self.gradInput:typeAs(input); 61 | end 62 | self.gradInput:resizeAs(input) 63 | end 64 | 65 | self.gradInput:zero() 66 | 67 | 68 | 69 | local n_point_total = 0 70 | local cpu_input = input 71 | 72 | for batch_idx = 1 , cpu_input:size(1) do 73 | 74 | n_point_total = n_point_total + target[batch_idx].n_point 75 | local x = target[batch_idx].x[{1}] 76 | local y = target[batch_idx].y[{1}] 77 | 78 | local batch_input = cpu_input[{batch_idx, {}}] 79 | 80 | local ground_truth_arr = target[batch_idx].normal 81 | 82 | self.gradInput[{batch_idx,{}, y, x}]:zero() 83 | self.gradInput[{batch_idx,{}, y, x}]:copy(batch_input[{{}, y, x}]) 84 | self.gradInput[{batch_idx,{}, y, x}]:csub(ground_truth_arr) 85 | self.gradInput[{batch_idx,{}, y, x}]:mul(2) 86 | end 87 | -- io.read() 88 | return self.gradInput:div( n_point_total ) 89 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/normal_neg_loss_fast.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos_fast, parent = torch.class('nn.normal_negative_cos_fast', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos_fast:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos_fast:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | 22 | local n_point_total = input:size(1) * input:size(3) * input:size(4) 23 | 24 | self.output = - torch.sum( torch.cmul(input, target) ) -- dot product of normals , seems quite expensive move 25 | 26 | return self.output / n_point_total 27 | end 28 | 29 | 30 | 31 | function normal_negative_cos_fast:updateGradInput(input, target) 32 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 33 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 34 | 35 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 36 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 37 | 38 | -- the loss is the negative cos(angle) 39 | 40 | 41 | 42 | -- pre-allocate memory and reset gradient to 0 43 | if self.gradInput then 44 | local nElement = self.gradInput:nElement() 45 | if self.gradInput:type() ~= input:type() then 46 | self.gradInput = self.gradInput:typeAs(input); 47 | end 48 | self.gradInput:resizeAs(input) 49 | end 50 | 51 | self.gradInput:zero() 52 | 53 | 54 | 55 | local n_point_total = input:size(1) * input:size(3) * input:size(4) 56 | 57 | self.gradInput:copy(target) 58 | 59 | return self.gradInput:div( -n_point_total ) 60 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/normal_negative_cos.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos, parent = torch.class('nn.normal_negative_cos', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | self.output = 0 22 | 23 | local n_point_total = 0 24 | local cpu_input = input 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | 28 | n_point_total = n_point_total + target[batch_idx].n_point 29 | 30 | local x_arr = target[batch_idx].x -- to check: the length of x vary with each sample!!!!! 31 | local y_arr = target[batch_idx].y 32 | 33 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 34 | 35 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 36 | local ground_truth_arr = target[batch_idx].normal 37 | 38 | 39 | self.output = self.output - torch.sum( torch.cmul(normal_arr, ground_truth_arr) ) -- dot product of normals , seems quite expensive move 40 | end 41 | 42 | return self.output / n_point_total 43 | end 44 | 45 | 46 | 47 | function normal_negative_cos:updateGradInput(input, target) 48 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 49 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 50 | 51 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 52 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 53 | 54 | -- the loss is the negative cos(angle) 55 | 56 | 57 | 58 | -- pre-allocate memory and reset gradient to 0 59 | if self.gradInput then 60 | local nElement = self.gradInput:nElement() 61 | if self.gradInput:type() ~= input:type() then 62 | self.gradInput = self.gradInput:typeAs(input); 63 | end 64 | self.gradInput:resizeAs(input) 65 | end 66 | 67 | self.gradInput:zero() 68 | 69 | 70 | 71 | local n_point_total = 0 72 | local cpu_input = input -- is this necessary? can it be gpu data?? to check 73 | 74 | for batch_idx = 1 , cpu_input:size(1) do 75 | 76 | n_point_total = n_point_total + target[batch_idx].n_point 77 | 78 | local x_arr = target[batch_idx].x 79 | local y_arr = target[batch_idx].y 80 | 81 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 82 | 83 | local unsqueeze = nn.Unsqueeze(2):forward( target[batch_idx].normal:double() ):cuda() 84 | 85 | local p2 = torch.Tensor(3, cpu_input:size()[3], target[batch_idx].n_point):zero():cuda() 86 | local p1 = torch.Tensor(batch_input:size(1), batch_input:size(2), batch_input:size(3)):zero():cuda() 87 | p2:scatter(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1), unsqueeze) 88 | p1:indexAdd(3, x_arr, p2) 89 | 90 | self.gradInput[{batch_idx,{}}]:copy(p1) 91 | end 92 | 93 | return self.gradInput:div( -n_point_total ) 94 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/normal_negative_cos_cpu.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos, parent = torch.class('nn.normal_negative_cos_cpu', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | self.output = 0 22 | 23 | local n_point_total = 0 24 | local cpu_input = input:double() -- is this necessary? can it be gpu data?? to check 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | 28 | n_point_total = n_point_total + target[batch_idx].n_point 29 | 30 | local x_arr = target[batch_idx].x:long() -- to check: the length of x vary with each sample!!!!! 31 | local y_arr = target[batch_idx].y:long() 32 | 33 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 34 | 35 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 36 | local ground_truth_arr = target[batch_idx].normal 37 | 38 | 39 | self.output = self.output - torch.sum( torch.cmul(normal_arr, ground_truth_arr) ) -- dot product of normals , seems quite expensive move 40 | end 41 | 42 | return self.output / n_point_total 43 | end 44 | 45 | 46 | 47 | function normal_negative_cos:updateGradInput(input, target) 48 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 49 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 50 | 51 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 52 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 53 | 54 | -- the loss is the negative cos(angle) 55 | 56 | 57 | 58 | -- pre-allocate memory and reset gradient to 0 59 | if self.gradInput then 60 | local nElement = self.gradInput:nElement() 61 | if self.gradInput:type() ~= input:type() then 62 | self.gradInput = self.gradInput:typeAs(input); 63 | end 64 | self.gradInput:resizeAs(input) 65 | end 66 | 67 | self.gradInput:zero() 68 | 69 | 70 | 71 | local n_point_total = 0 72 | local cpu_input = input:double() -- is this necessary? can it be gpu data?? to check 73 | 74 | for batch_idx = 1 , cpu_input:size(1) do 75 | 76 | n_point_total = n_point_total + target[batch_idx].n_point 77 | 78 | local x_arr = target[batch_idx].x:long() -- to check: the length of x vary with each sample!!!!! 79 | local y_arr = target[batch_idx].y:long() 80 | 81 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 82 | 83 | local unsqueeze = nn.Unsqueeze(2):forward( target[batch_idx].normal ) 84 | 85 | local p2 = torch.Tensor(3, cpu_input:size()[3], target[batch_idx].n_point):zero() 86 | local p1 = torch.Tensor(batch_input:size(1), batch_input:size(2), batch_input:size(3)):zero() 87 | p2:scatter(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1), unsqueeze) 88 | p1:indexAdd(3, x_arr, p2) 89 | 90 | self.gradInput[{batch_idx,{}}]:copy(p1) 91 | end 92 | 93 | return self.gradInput:div( -n_point_total ) 94 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/normal_negative_cos_weighted.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos_weighted, parent = torch.class('nn.normal_negative_cos_weighted', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos_weighted:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos_weighted:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | self.output = 0 22 | 23 | local n_point_total = 0 24 | local cpu_input = input 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | 28 | n_point_total = n_point_total + target[batch_idx].n_point 29 | 30 | local x_arr = target[batch_idx].x -- to check: the length of x vary with each sample!!!!! 31 | local y_arr = target[batch_idx].y 32 | 33 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 34 | 35 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 36 | local ground_truth_arr = target[batch_idx].normal 37 | 38 | local weight = ground_truth_arr[{3,{}}]:clone():mul(-10):add(10.1) -- is it the 3rd element? 39 | local weight_3 = torch.Tensor(3, ground_truth_arr:size(2)) 40 | weight_3[{1,{}}]:copy(weight) 41 | weight_3[{2,{}}]:copy(weight) 42 | weight_3[{3,{}}]:copy(weight) 43 | 44 | 45 | self.output = self.output - torch.sum( torch.cmul( torch.cmul( normal_arr, ground_truth_arr ), weight_3:cuda()) ) -- dot product of normals , seems quite expensive move 46 | end 47 | 48 | return self.output / n_point_total 49 | end 50 | 51 | 52 | 53 | function normal_negative_cos_weighted:updateGradInput(input, target) 54 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 55 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 56 | 57 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 58 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 59 | 60 | -- the loss is the negative cos(angle) 61 | 62 | 63 | 64 | -- pre-allocate memory and reset gradient to 0 65 | if self.gradInput then 66 | local nElement = self.gradInput:nElement() 67 | if self.gradInput:type() ~= input:type() then 68 | self.gradInput = self.gradInput:typeAs(input); 69 | end 70 | self.gradInput:resizeAs(input) 71 | end 72 | 73 | self.gradInput:zero() 74 | 75 | 76 | 77 | local n_point_total = 0 78 | local cpu_input = input -- is this necessary? can it be gpu data?? to check 79 | 80 | for batch_idx = 1 , cpu_input:size(1) do 81 | 82 | n_point_total = n_point_total + target[batch_idx].n_point 83 | 84 | local x_arr = target[batch_idx].x 85 | local y_arr = target[batch_idx].y 86 | 87 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 88 | 89 | local ground_truth_arr = target[batch_idx].normal 90 | local weight = ground_truth_arr[{3,{}}]:clone():mul(-10):add(10.1) -- is it the 3rd element? 91 | local weighted_gt_arr = ground_truth_arr:clone() 92 | weighted_gt_arr[{1,{}}]:cmul(weight) 93 | weighted_gt_arr[{2,{}}]:cmul(weight) 94 | weighted_gt_arr[{3,{}}]:cmul(weight) 95 | 96 | local unsqueeze = nn.Unsqueeze(2):forward( weighted_gt_arr:double() ):cuda() 97 | 98 | local p2 = torch.Tensor(3, cpu_input:size()[3], target[batch_idx].n_point):zero():cuda() 99 | local p1 = torch.Tensor(batch_input:size(1), batch_input:size(2), batch_input:size(3)):zero():cuda() 100 | p2:scatter(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1), unsqueeze) 101 | p1:indexAdd(3, x_arr, p2) 102 | 103 | 104 | self.gradInput[{batch_idx,{}}]:copy(p1) 105 | 106 | 107 | end 108 | 109 | return self.gradInput:div( -n_point_total ) 110 | end 111 | -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/relative_depth_margin_log_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth_margin' 10 | require './normal_negative_cos' 11 | 12 | local relative_depth_margin_log_negative_cos, parent = torch.class('nn.relative_depth_margin_log_negative_cos', 'nn.Criterion') 13 | 14 | function relative_depth_margin_log_negative_cos:__init(w_normal, margin) 15 | print(string.format(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos() w_normal:%f, margin:%f", w_normal, margin)) 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit(margin) 18 | self.normal_crit = nn.normal_negative_cos() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.depth_to_normal = self.depth_to_normal:cuda() 23 | self.w_normal = w_normal 24 | 25 | self.__loss_normal = 0 26 | self.__loss_relative_depth = 0 27 | end 28 | 29 | function relative_depth_margin_log_negative_cos:updateOutput(input, target) 30 | -- the input is tensor taht represents the depth map 31 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 32 | local n_depth = target[1].n_sample 33 | local n_normal = target[2].n_sample 34 | 35 | assert( torch.type(target) == 'table' ); 36 | 37 | self.output = 0 38 | self.__loss_relative_depth = 0 39 | self.__loss_normal = 0 40 | 41 | if n_depth > 0 then 42 | self.__loss_relative_depth = self.depth_crit:forward(nn.Log():cuda():forward(input:sub(1, n_depth)), target[1]) -- to test 43 | self.output = self.output + self.__loss_relative_depth 44 | end 45 | if n_normal > 0 then -- to test 46 | -- first go through the depth->normal transormation: ---- to test 47 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 48 | -- then go through the criterion 49 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 50 | self.output = self.output + self.__loss_normal 51 | end 52 | 53 | return self.output 54 | end 55 | 56 | function relative_depth_margin_log_negative_cos:updateGradInput(input, target) 57 | -- the input is tensor taht represents the depth map 58 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 59 | 60 | -- pre-allocate memory and reset gradient to 0 61 | if self.gradInput then 62 | local nElement = self.gradInput:nElement() 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | end 68 | 69 | local n_depth = target[1].n_sample 70 | local n_normal = target[2].n_sample 71 | 72 | assert( torch.type(target) == 'table' ); 73 | 74 | if n_depth > 0 then 75 | self.gradInput:sub(1, n_depth):copy( self.depth_crit:backward(nn.Log():cuda():forward(input:sub(1, n_depth)), target[1]) ) -- to test 76 | self.gradInput:sub(1, n_depth):copy( nn.Log():cuda():backward(input:sub(1, n_depth), self.gradInput:sub(1, n_depth)) ) 77 | end 78 | if n_normal > 0 then -- to test 79 | -- then go through the criterion 80 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 81 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 82 | end 83 | 84 | return self.gradInput 85 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/relative_depth_margin_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth_margin' 10 | require './normal_negative_cos' 11 | 12 | local relative_depth_negative_cos, parent = torch.class('nn.relative_depth_negative_cos', 'nn.Criterion') 13 | 14 | function relative_depth_negative_cos:__init(w_normal, margin) 15 | print(string.format(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos() w_normal:%f, margin:%f", w_normal, margin)) 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit(margin) 18 | self.normal_crit = nn.normal_negative_cos() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.depth_to_normal = self.depth_to_normal:cuda() 23 | self.w_normal = w_normal 24 | 25 | self.__loss_normal = 0 26 | self.__loss_relative_depth = 0 27 | end 28 | 29 | function relative_depth_negative_cos:updateOutput(input, target) 30 | -- the input is tensor taht represents the depth map 31 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 32 | local n_depth = target[1].n_sample 33 | local n_normal = target[2].n_sample 34 | 35 | assert( torch.type(target) == 'table' ); 36 | 37 | self.output = 0 38 | self.__loss_relative_depth = 0 39 | self.__loss_normal = 0 40 | 41 | if n_depth > 0 then 42 | self.__loss_relative_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 43 | self.output = self.output + self.__loss_relative_depth 44 | end 45 | if n_normal > 0 then -- to test 46 | -- first go through the depth->normal transormation: ---- to test 47 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 48 | -- then go through the criterion 49 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 50 | self.output = self.output + self.__loss_normal 51 | end 52 | 53 | return self.output 54 | end 55 | 56 | function relative_depth_negative_cos:updateGradInput(input, target) 57 | -- the input is tensor taht represents the depth map 58 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 59 | 60 | -- pre-allocate memory and reset gradient to 0 61 | if self.gradInput then 62 | local nElement = self.gradInput:nElement() 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | end 68 | 69 | local n_depth = target[1].n_sample 70 | local n_normal = target[2].n_sample 71 | 72 | assert( torch.type(target) == 'table' ); 73 | 74 | if n_depth > 0 then 75 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 76 | end 77 | if n_normal > 0 then -- to test 78 | -- then go through the criterion 79 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 80 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 81 | end 82 | 83 | return self.gradInput 84 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/relative_depth_margin_negative_cos_var.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- depth variance loss 9 | require './depth_var_loss' 10 | 11 | -- sub criterions 12 | require './relative_depth_margin' 13 | require './normal_negative_cos' 14 | 15 | 16 | local relative_depth_margin_negative_cos_var, parent = torch.class('nn.relative_depth_margin_negative_cos_var', 'nn.Criterion') 17 | 18 | function relative_depth_margin_negative_cos_var:__init(w_normal, margin, d_var_thresh) 19 | print("\n>>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos_var()") 20 | parent.__init(self) 21 | self.depth_crit = nn.relative_depth_crit(margin) 22 | self.normal_crit = nn.normal_negative_cos() 23 | self.depth_to_normal = nn.Sequential() 24 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 25 | self.depth_to_normal:add(world_coord_to_normal()) 26 | self.depth_to_normal = self.depth_to_normal:cuda() 27 | self.w_normal = w_normal 28 | 29 | 30 | -- depth variance 31 | self.w_d_var = 1 32 | print(string.format("\t\tw_normal=%f, margin=%f, w_d_var=%f", w_normal, margin, self.w_d_var)) 33 | self.nn_depth_var = nn.depth_var_loss(d_var_thresh):cuda() 34 | 35 | 36 | self.__loss_normal = 0 37 | self.__loss_relative_depth = 0 38 | end 39 | 40 | function relative_depth_margin_negative_cos_var:updateOutput(input, target) 41 | -- the input is tensor taht represents the depth map 42 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 43 | local n_depth = target[1].n_sample 44 | local n_normal = target[2].n_sample 45 | 46 | assert( torch.type(target) == 'table' ); 47 | 48 | self.output = 0 49 | self.__loss_relative_depth = 0 50 | self.__loss_normal = 0 51 | 52 | if n_depth > 0 then 53 | self.__loss_relative_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 54 | self.output = self.output + self.__loss_relative_depth 55 | 56 | -- the depth variance loss 57 | self.output = self.output + self.w_d_var * self.nn_depth_var:forward(input:sub(1, n_depth), nil) 58 | end 59 | if n_normal > 0 then -- to test 60 | -- first go through the depth->normal transormation: ---- to test 61 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 62 | -- then go through the criterion 63 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 64 | self.output = self.output + self.__loss_normal 65 | end 66 | 67 | return self.output 68 | end 69 | 70 | function relative_depth_margin_negative_cos_var:updateGradInput(input, target) 71 | -- the input is tensor taht represents the depth map 72 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 73 | 74 | -- pre-allocate memory and reset gradient to 0 75 | if self.gradInput then 76 | local nElement = self.gradInput:nElement() 77 | if self.gradInput:type() ~= input:type() then 78 | self.gradInput = self.gradInput:typeAs(input); 79 | end 80 | self.gradInput:resizeAs(input) 81 | end 82 | 83 | local n_depth = target[1].n_sample 84 | local n_normal = target[2].n_sample 85 | 86 | assert( torch.type(target) == 'table' ); 87 | 88 | if n_depth > 0 then 89 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 90 | 91 | -- the depth variance loss 92 | local temp = self.nn_depth_var:backward(input:sub(1, n_depth), nil) 93 | temp:mul(self.w_d_var) 94 | self.gradInput:sub(1, n_depth):add(temp) 95 | end 96 | if n_normal > 0 then -- to test 97 | -- then go through the criterion 98 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 99 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 100 | end 101 | 102 | return self.gradInput 103 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/relative_depth_margin_negative_cos_var_cpu.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- --depth to normal 5 | -- require '../models/world_coord_to_normal' 6 | -- require '../models/img_coord_to_world_coord' 7 | 8 | -- -- depth variance loss 9 | -- require './depth_var_loss' 10 | 11 | -- sub criterions 12 | require './relative_depth_margin_cpu' 13 | require './normal_negative_cos_cpu' 14 | 15 | local relative_depth_margin_negative_cos_var_cpu, parent = torch.class('nn.relative_depth_margin_negative_cos_var_cpu', 'nn.Criterion') 16 | 17 | function relative_depth_margin_negative_cos_var_cpu:__init(w_normal, margin, d_var_thresh) 18 | print("\n>>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos_var_cpu()") 19 | 20 | parent.__init(self) 21 | self.depth_crit = nn.relative_depth_crit_cpu(margin) 22 | self.normal_crit = nn.normal_negative_cos_cpu() 23 | self.depth_to_normal = nn.Sequential() 24 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 25 | self.depth_to_normal:add(world_coord_to_normal()) 26 | self.w_normal = w_normal 27 | 28 | 29 | -- depth variance 30 | self.w_d_var = 1 31 | print(string.format("\t\tw_normal=%f, margin=%f, w_d_var=%f", w_normal, margin, self.w_d_var)) 32 | self.nn_depth_var = nn.depth_var_loss(d_var_thresh):cuda() 33 | 34 | 35 | end 36 | 37 | function relative_depth_margin_negative_cos_var_cpu:updateOutput(input, target) 38 | -- the input is tensor taht represents the depth map 39 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 40 | local n_depth = target[1].n_sample 41 | local n_normal = target[2].n_sample 42 | 43 | assert( torch.type(target) == 'table' ); 44 | 45 | self.output = 0 46 | if n_depth > 0 then 47 | self.output = self.output + self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 48 | 49 | -- the depth variance loss 50 | self.output = self.output + self.w_d_var * self.nn_depth_var:forward(input:sub(1, n_depth), nil) 51 | end 52 | if n_normal > 0 then -- to test 53 | -- first go through the depth->normal transormation: ---- to test 54 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 55 | -- then go through the criterion 56 | self.output = self.output + self.w_normal * self.normal_crit:forward( normal, target[2]) 57 | end 58 | 59 | return self.output 60 | end 61 | 62 | function relative_depth_margin_negative_cos_var_cpu:updateGradInput(input, target) 63 | -- the input is tensor taht represents the depth map 64 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 65 | 66 | -- pre-allocate memory and reset gradient to 0 67 | if self.gradInput then 68 | local nElement = self.gradInput:nElement() 69 | if self.gradInput:type() ~= input:type() then 70 | self.gradInput = self.gradInput:typeAs(input); 71 | end 72 | self.gradInput:resizeAs(input) 73 | end 74 | 75 | local n_depth = target[1].n_sample 76 | local n_normal = target[2].n_sample 77 | 78 | assert( torch.type(target) == 'table' ); 79 | 80 | if n_depth > 0 then 81 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 82 | 83 | -- the depth variance loss 84 | local temp = self.nn_depth_var:backward(input:sub(1, n_depth), nil) 85 | temp:mul(self.w_d_var) 86 | self.gradInput:sub(1, n_depth):add(temp) 87 | end 88 | if n_normal > 0 then -- to test 89 | -- then go through the criterion 90 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 91 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 92 | end 93 | 94 | return self.gradInput 95 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/relative_depth_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth' 10 | require './normal_negative_cos' 11 | 12 | local relative_depth_negative_cos, parent = torch.class('nn.relative_depth_negative_cos', 'nn.Criterion') 13 | 14 | function relative_depth_negative_cos:__init(w_normal) 15 | print(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_negative_cos()") 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit() 18 | self.normal_crit = nn.normal_negative_cos() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.depth_to_normal = self.depth_to_normal:cuda() 23 | self.w_normal = w_normal 24 | 25 | self.__loss_normal = 0 26 | self.__loss_relative_depth = 0 27 | end 28 | 29 | function relative_depth_negative_cos:updateOutput(input, target) 30 | -- the input is tensor taht represents the depth map 31 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 32 | local n_depth = target[1].n_sample 33 | local n_normal = target[2].n_sample 34 | 35 | assert( torch.type(target) == 'table' ); 36 | 37 | self.output = 0 38 | self.__loss_relative_depth = 0 39 | self.__loss_normal = 0 40 | 41 | if n_depth > 0 then 42 | self.__loss_relative_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 43 | self.output = self.output + self.__loss_relative_depth 44 | end 45 | if n_normal > 0 then -- to test 46 | -- first go through the depth->normal transormation: ---- to test 47 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 48 | -- then go through the criterion 49 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 50 | self.output = self.output + self.__loss_normal 51 | end 52 | 53 | return self.output 54 | end 55 | 56 | function relative_depth_negative_cos:updateGradInput(input, target) 57 | -- the input is tensor taht represents the depth map 58 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 59 | 60 | -- pre-allocate memory and reset gradient to 0 61 | if self.gradInput then 62 | local nElement = self.gradInput:nElement() 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | end 68 | 69 | local n_depth = target[1].n_sample 70 | local n_normal = target[2].n_sample 71 | 72 | assert( torch.type(target) == 'table' ); 73 | 74 | if n_depth > 0 then 75 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 76 | end 77 | if n_normal > 0 then -- to test 78 | -- then go through the criterion 79 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 80 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 81 | end 82 | 83 | return self.gradInput 84 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/relative_depth_negative_cos_cpu.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | -- require '../models/world_coord_to_normal' 6 | -- require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth_cpu' 10 | require './normal_negative_cos_cpu' 11 | 12 | local relative_depth_negative_cos, parent = torch.class('nn.relative_depth_negative_cos_cpu', 'nn.Criterion') 13 | 14 | function relative_depth_negative_cos:__init(w_normal) 15 | print(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_negative_cos()") 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit_cpu() 18 | self.normal_crit = nn.normal_negative_cos_cpu() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.w_normal = w_normal 23 | end 24 | 25 | function relative_depth_negative_cos:updateOutput(input, target) 26 | -- the input is tensor taht represents the depth map 27 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 28 | local n_depth = target[1].n_sample 29 | local n_normal = target[2].n_sample 30 | 31 | assert( torch.type(target) == 'table' ); 32 | 33 | self.output = 0 34 | if n_depth > 0 then 35 | self.output = self.output + self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 36 | end 37 | if n_normal > 0 then -- to test 38 | -- first go through the depth->normal transormation: ---- to test 39 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 40 | -- then go through the criterion 41 | self.output = self.output + self.w_normal * self.normal_crit:forward( normal, target[2]) 42 | end 43 | 44 | return self.output 45 | end 46 | 47 | function relative_depth_negative_cos:updateGradInput(input, target) 48 | -- the input is tensor taht represents the depth map 49 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 50 | 51 | -- pre-allocate memory and reset gradient to 0 52 | if self.gradInput then 53 | local nElement = self.gradInput:nElement() 54 | if self.gradInput:type() ~= input:type() then 55 | self.gradInput = self.gradInput:typeAs(input); 56 | end 57 | self.gradInput:resizeAs(input) 58 | end 59 | 60 | local n_depth = target[1].n_sample 61 | local n_normal = target[2].n_sample 62 | 63 | assert( torch.type(target) == 'table' ); 64 | 65 | if n_depth > 0 then 66 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 67 | end 68 | if n_normal > 0 then -- to test 69 | -- then go through the criterion 70 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 71 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 72 | end 73 | 74 | return self.gradInput 75 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/criterion/scale_inv_depth_loss.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local scale_inv_depth_loss, parent = torch.class('nn.scale_inv_depth_loss', 'nn.Criterion') 4 | 5 | 6 | function scale_inv_depth_loss:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function scale_inv_depth_loss:updateOutput(input, target) 14 | -- The input[1] and target are both 4D tensors, [batchSize, 4, height, width], and represents the normal maps 15 | -- input[2] is a mask that denotes which locations are valid (1 is valid) [batchSize, 4, height, width] 16 | -- the loss is (d1 - d2)^2 / (d1 + d2)^2 17 | 18 | 19 | self.output = 0 20 | 21 | local denominator = torch.pow(input[1] - target, 2) 22 | local nominator = torch.pow(input[1] + target, 2) 23 | 24 | local zero_mask = nominator:eq(0) 25 | nominator[zero_mask] = 1e-7 26 | denominator:cdiv(nominator) 27 | 28 | denominator:cmul(input[2]) 29 | self.output = torch.sum(denominator) 30 | 31 | 32 | return self.output / torch.sum(input[2]) 33 | end 34 | 35 | 36 | 37 | function scale_inv_depth_loss:updateGradInput(input, target) 38 | -- The input[1] and target are both 4D tensors, [batchSize, 4, height, width], and represents the normal maps 39 | -- input[2] is a mask that denotes which locations are valid (1 is valid) [batchSize, 4, height, width] 40 | -- the loss is (d1 - d2)^2 / (d1 + d2)^2 41 | 42 | -- pre-allocate memory and reset gradient to 0 43 | if self.gradInput then 44 | local nElement = self.gradInput:nElement() 45 | if self.gradInput:type() ~= input[1]:type() then 46 | self.gradInput = self.gradInput:typeAs(input[1]); 47 | end 48 | self.gradInput:resizeAs(input[1]) 49 | end 50 | 51 | self.gradInput:zero() 52 | 53 | 54 | self.gradInput:copy(input[1]) 55 | self.gradInput:csub(target) 56 | local temp_sum_3 = torch.pow(input[1] + target, 3) 57 | 58 | self.gradInput:cmul(target) 59 | self.gradInput:mul(4) 60 | 61 | 62 | 63 | local zero_mask = temp_sum_3:eq(0) 64 | temp_sum_3[zero_mask] = 1e-7 65 | self.gradInput:cdiv(temp_sum_3) 66 | 67 | self.gradInput:cmul(input[2]) 68 | 69 | -- print(self.gradInput[{1,2,1,1}]) 70 | -- print(target[{1,2,1,1}]) 71 | -- print(input[{1,2,1,1}]) 72 | -- io.read() 73 | 74 | return self.gradInput:div(torch.sum(input[2])) 75 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/load_data.lua: -------------------------------------------------------------------------------- 1 | local train_depth_path = nil 2 | local train_normal_path = nil 3 | 4 | local valid_depth_path = nil 5 | local valid_normal_path = nil 6 | 7 | local base_data_path = '../../data/' 8 | if g_args.t_depth_file ~= '' then 9 | train_depth_path = base_data_path .. g_args.t_depth_file 10 | end 11 | 12 | if g_args.t_normal_file ~= '' then 13 | train_normal_path = base_data_path .. g_args.t_normal_file 14 | end 15 | 16 | if g_args.v_depth_file ~= '' then 17 | valid_depth_path = base_data_path .. g_args.v_depth_file 18 | end 19 | 20 | if g_args.v_normal_file ~= '' then 21 | valid_normal_path = base_data_path .. g_args.v_normal_file 22 | end 23 | 24 | 25 | 26 | if train_depth_path == nil then 27 | print("Error: Missing training file for depth!") 28 | os.exit() 29 | end 30 | 31 | if valid_depth_path == nil then 32 | print("Error: Missing validation file for depth!") 33 | os.exit() 34 | end 35 | 36 | if train_normal_path == nil and train_depth_path == nil then 37 | print("Error: No training files at all.") 38 | os.exit() 39 | end 40 | 41 | if (train_normal_path == nil and valid_normal_path ~= nil) or (train_normal_path ~= nil and valid_normal_path == nil) then 42 | print("Error: Either train_normal_path or valid_normal_path is not valid") 43 | os.exit() 44 | end 45 | 46 | ------------------------------------------------------------------------------------------------------------------ 47 | 48 | 49 | function TrainDataLoader() 50 | local _train_depth_path = train_depth_path 51 | local _train_normal_path = train_normal_path 52 | if g_args.n_max_depth == 0 then 53 | _train_depth_path = nil 54 | print("\t\t>>>>>>>>>>>>Warning: No depth training data specified!") 55 | end 56 | 57 | if g_args.n_max_normal == 0 then 58 | _train_normal_path = nil 59 | print("\t\t>>>>>>>>>>>>Warning: No normal training data specified!") 60 | end 61 | 62 | if train_depth_path == nil and train_normal_path == nil then 63 | assert(false, ">>>>>>>>> Error: Both normal data and depth data are nil!") 64 | end 65 | 66 | return DataLoader(_train_depth_path, _train_normal_path, g_args.n_max_depth, g_args.n_max_normal) 67 | end 68 | 69 | function ValidDataLoader() 70 | return DataLoader(valid_depth_path, valid_normal_path) 71 | end 72 | 73 | function Train_During_Valid_DataLoader() 74 | local _n_max_depth = g_args.n_max_depth 75 | local _n_max_normal = g_args.n_max_normal 76 | if g_args.n_max_depth == 0 then 77 | _n_max_depth = 800 78 | end 79 | if g_args.n_max_normal == 0 then 80 | _n_max_normal = 5000 81 | end 82 | 83 | return DataLoader(train_depth_path, train_normal_path, _n_max_depth, _n_max_normal) 84 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/measure.lua: -------------------------------------------------------------------------------- 1 | function rmse(a, b) 2 | return torch.sqrt((a - b):pow(2):sum() / a:numel()) 3 | end 4 | 5 | function depth_rmse_linear(y1, y2) 6 | return rmse(y1, y2) 7 | end 8 | 9 | function depth_rmse_log(y1, y2) 10 | return rmse(torch.log(y1), torch.log(y2)) 11 | end 12 | 13 | function depth_scale_invariant_rmse_log(y1, y2) 14 | local ly1 = torch.log(y1) 15 | local ly2 = torch.log(y2) 16 | local d = ly1 - ly2 17 | local n = d:numel() 18 | local alpha = torch.pow(d, 2):sum() / n - (d:sum() / n) ^ 2 19 | return torch.sqrt((ly1 - ly2 + alpha):pow(2):sum() / (2 * n)) 20 | end 21 | 22 | function threshold_delta(y1, y2, thres) 23 | local to_count = torch.cmax(torch.cdiv(y1, y2), torch.cdiv(y2, y1)) 24 | return torch.le(to_count, thres):sum() / to_count:numel() 25 | end 26 | 27 | function threshold_1(y1, y2) 28 | return threshold_delta(y1, y2, 1.25) 29 | end 30 | 31 | function threshold_2(y1 ,y2) 32 | return threshold_delta(y1, y2, 1.25^2) 33 | end 34 | 35 | function threshold_3(y1, y2) 36 | return threshold_delta(y1, y2, 1.25^3) 37 | end 38 | 39 | -- y2 is the groundtruth 40 | function abs_rel_diff(y1, y2) 41 | return torch.cdiv(torch.abs(y1 - y2), y2):sum() / y1:numel() 42 | end 43 | 44 | function sqr_rel_diff(y1, y2) 45 | return torch.cdiv((y1 - y2):pow(2), y2):sum() / y1:numel() 46 | end 47 | 48 | local depth_measures = { 49 | ['delta<1.25' ] = threshold_1, 50 | ['delta<1.25^2' ] = threshold_2, 51 | ['delta<1.25^3' ] = threshold_3, 52 | ['abs_rel_diff' ] = abs_rel_diff, 53 | ['sqr_rel_diff' ] = sqr_rel_diff, 54 | ['RMSE_linear' ] = depth_rmse_linear, 55 | ['RMSE_log' ] = depth_rmse_log, 56 | ['RMSE_log(sc.inv)'] = depth_scale_invariant_rmse_log 57 | } 58 | 59 | function measure_depth(y1, y2, mask) 60 | local measures = {} 61 | for name, func in pairs(depth_measures) do 62 | measures[name] = func(y1:maskedSelect(mask), y2:maskedSelect(mask)) 63 | end 64 | return measures 65 | end 66 | 67 | 68 | function angle(n1, n2) 69 | assert(n1:size()[1] == 3) 70 | local n11 = torch.pow(n1, 2):sum(1) 71 | local n22 = torch.pow(n2, 2):sum(1) 72 | local n12 = torch.cmul(n1, n2):sum(1) 73 | 74 | return torch.acos(torch.cdiv(n12, torch.sqrt(torch.cmul(n11, n22))):clamp(-1, 1)) 75 | end 76 | 77 | 78 | function mean_angle_error(n1, n2) 79 | return angle(n1, n2):mean() / math.pi * 180 80 | end 81 | 82 | function median_angle_error(n1, n2) 83 | return torch.median(angle(n1, n2):view(-1):double())[1] / math.pi * 180 84 | end 85 | 86 | function rmse_error(n1, n2) 87 | return torch.sqrt(angle(n1, n2):pow(2):sum() / (n1:numel() / 3)) 88 | end 89 | 90 | function good_pixels(n1, n2, thres) 91 | return torch.le(angle(n1, n2), thres):sum() / (n1:numel() / 3) 92 | end 93 | 94 | function good_pixels_11_25(n1, n2) 95 | return good_pixels(n1, n2, 11.25 * math.pi / 180) 96 | end 97 | 98 | function good_pixels_22_5(n1, n2) 99 | return good_pixels(n1, n2, 22.5 * math.pi / 180) 100 | end 101 | 102 | function good_pixels_30(n1, n2) 103 | return good_pixels(n1, n2, 30 * math.pi / 180) 104 | end 105 | 106 | local normal_measures = { 107 | ['mean_angle_error' ] = mean_angle_error, 108 | ['median_angle_error'] = median_angle_error, 109 | ['within_11.25' ] = good_pixels_11_25, 110 | ['within_22.5' ] = good_pixels_22_5, 111 | ['within_30' ] = good_pixels_30 112 | } 113 | 114 | function measure_normal(n1, n2, mask) 115 | assert(n1:size()[1] == 1) 116 | 117 | local masked_n1 = torch.zeros(3, mask:sum()) 118 | local masked_n2 = torch.zeros(3, mask:sum()) 119 | for i =1,3 do 120 | masked_n1[i] = n1[{1, i}]:maskedSelect(mask) 121 | masked_n2[i] = n2[{1, i}]:maskedSelect(mask) 122 | end 123 | 124 | local measures = {} 125 | for name, func in pairs(normal_measures) do 126 | measures[name] = func(masked_n1, masked_n2) 127 | end 128 | 129 | return measures 130 | end 131 | -------------------------------------------------------------------------------- /src/experiment_KITTI/models/hourglass3.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | 141 | 142 | return model 143 | end 144 | 145 | 146 | require('../criterion/relative_depth_negative_cos') 147 | function get_criterion() 148 | print(g_args.w_n) 149 | return nn.relative_depth_negative_cos(g_args.w_n) 150 | end 151 | 152 | 153 | function f_depth_from_model_output() 154 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 155 | return ____get_depth_from_model_output 156 | end 157 | 158 | function ____get_depth_from_model_output(model_output) 159 | return model_output 160 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/models/hourglass3_softplus.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/relative_depth_negative_cos') 149 | function get_criterion() 150 | print(g_args.w_n) 151 | return nn.relative_depth_negative_cos(g_args.w_n) 152 | end 153 | 154 | 155 | function f_depth_from_model_output() 156 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 157 | return ____get_depth_from_model_output 158 | end 159 | 160 | function ____get_depth_from_model_output(model_output) 161 | return model_output 162 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/models/hourglass3_softplus_absolute_depth.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/absolute_depth_negative_cos') 149 | function get_criterion() 150 | return nn.absolute_depth_negative_cos(g_args.w_n) 151 | end 152 | 153 | 154 | function f_depth_from_model_output() 155 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 156 | return ____get_depth_from_model_output 157 | end 158 | 159 | function ____get_depth_from_model_output(model_output) 160 | return model_output 161 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/models/hourglass3_softplus_margin.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/relative_depth_margin_negative_cos') 149 | function get_criterion() 150 | return nn.relative_depth_negative_cos(g_args.w_n, g_args.margin) 151 | end 152 | 153 | 154 | function f_depth_from_model_output() 155 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 156 | return ____get_depth_from_model_output 157 | end 158 | 159 | function ____get_depth_from_model_output(model_output) 160 | return model_output 161 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/models/hourglass3_softplus_margin_log.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | return model 144 | end 145 | 146 | 147 | require('../criterion/relative_depth_margin_log_negative_cos') 148 | function get_criterion() 149 | return nn.relative_depth_margin_log_negative_cos(g_args.w_n, g_args.margin) 150 | end 151 | 152 | 153 | function f_depth_from_model_output() 154 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 155 | return ____get_depth_from_model_output 156 | end 157 | 158 | function ____get_depth_from_model_output(model_output) 159 | return model_output 160 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/models/hourglass3_softplus_margin_log_depth_from_normal.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | return model 144 | end 145 | 146 | 147 | require('../criterion/relative_depth_margin_log_normal_depth') 148 | function get_criterion() 149 | return nn.relative_depth_margin_log_normal_depth(g_args.w_n, g_args.margin) 150 | end 151 | 152 | 153 | function f_depth_from_model_output() 154 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 155 | return ____get_depth_from_model_output 156 | end 157 | 158 | function ____get_depth_from_model_output(model_output) 159 | return model_output 160 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/models/hourglass3_softplus_margin_var.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/relative_depth_margin_negative_cos_var') 149 | function get_criterion() 150 | return nn.relative_depth_margin_negative_cos_var(g_args.w_n, g_args.margin, g_args.var_thresh) 151 | end 152 | 153 | 154 | function f_depth_from_model_output() 155 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 156 | return ____get_depth_from_model_output 157 | end 158 | 159 | function ____get_depth_from_model_output(model_output) 160 | return model_output 161 | end -------------------------------------------------------------------------------- /src/experiment_KITTI/models/img_coord_to_world_coord.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require('../../common/KITTI_params') 3 | 4 | -- -- for debug only 5 | -- local g_input_width = 640 6 | -- local g_input_height = 480 7 | 8 | -- local g_fx_rgb = 5.1885790117450188e+02; 9 | -- local g_fy_rgb = -5.1946961112127485e+02; 10 | -- local g_cx_rgb = 3.2558244941119034e+02; 11 | -- local g_cy_rgb = 2.5373616633400465e+02; 12 | 13 | local img_coord_to_world_coord, parent = torch.class('nn.img_coord_to_world_coord', 'nn.Module') 14 | 15 | 16 | function img_coord_to_world_coord:__init() 17 | parent.__init(self) 18 | self.constant_x = torch.Tensor(g_input_height, g_input_width) -- this should be cuda tensor, maybe 19 | self.constant_y = torch.Tensor(g_input_height, g_input_width) 20 | for y = 1 , g_input_height do -- to test 21 | for x = 1 , g_input_width do 22 | self.constant_x[{y,x}] = (x - g_cx_rgb) / g_fx_rgb 23 | self.constant_y[{y,x}] = (y - g_cy_rgb) / g_fy_rgb 24 | end 25 | end 26 | end 27 | 28 | function img_coord_to_world_coord:updateOutput(input) -- the input is depth map, haven't checked the ouput though 29 | if self.output then 30 | if self.output:type() ~= input:type() then 31 | self.output = self.output:typeAs(input); 32 | end 33 | self.output:resize(input:size(1), 3, input:size(3), input:size(4)) 34 | 35 | if self.constant_x:type() ~= input:type() then 36 | self.constant_x = self.constant_x:typeAs(input); 37 | self.constant_y = self.constant_y:typeAs(input); 38 | end 39 | end 40 | 41 | self.output[{{}, 1, {}}]:copy(input) 42 | self.output[{{}, 2, {}}]:copy(input) 43 | self.output[{{}, 3, {}}]:copy(input) 44 | 45 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 46 | self.output[{batch_idx, 1, {}}]:cmul(self.constant_x) 47 | self.output[{batch_idx, 2, {}}]:cmul(self.constant_y) 48 | end 49 | 50 | return self.output 51 | end 52 | 53 | function img_coord_to_world_coord:updateGradInput(input, gradOutput) 54 | if self.gradInput then 55 | if self.gradInput:type() ~= input:type() then 56 | self.gradInput = self.gradInput:typeAs(input); 57 | end 58 | self.gradInput:resizeAs(input) 59 | self.gradInput:zero() 60 | end 61 | 62 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 63 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 1, {}}], self.constant_x) 64 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 2, {}}], self.constant_y) 65 | self.gradInput[{batch_idx, {}}]:add(gradOutput[{batch_idx, 3, {}}]) 66 | end 67 | 68 | return self.gradInput 69 | end 70 | -------------------------------------------------------------------------------- /src/experiment_KITTI/models/img_coord_to_world_coord_multi_res.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require('../../common/KITTI_params') 3 | 4 | -- -- for debug only 5 | -- local g_input_width = 640 6 | -- local g_input_height = 480 7 | 8 | -- local g_fx_rgb = 5.1885790117450188e+02; 9 | -- local g_fy_rgb = -5.1946961112127485e+02; 10 | -- local g_cx_rgb = 3.2558244941119034e+02; 11 | -- local g_cy_rgb = 2.5373616633400465e+02; 12 | 13 | local img_coord_to_world_coord_multi_res, parent = torch.class('nn.img_coord_to_world_coord_multi_res', 'nn.Module') 14 | 15 | 16 | function img_coord_to_world_coord_multi_res:__init(scale) 17 | local width = g_input_width / scale 18 | local height = g_input_height / scale 19 | local _cx_rgb = g_cx_rgb / scale 20 | local _cy_rgb = g_cy_rgb / scale 21 | local _fx_rgb = g_fx_rgb / scale 22 | local _fy_rgb = g_fy_rgb / scale 23 | 24 | parent.__init(self) 25 | self.constant_x = torch.Tensor(height, width) -- this should be cuda tensor, maybe 26 | self.constant_y = torch.Tensor(height, width) 27 | for y = 1 , height do -- to test 28 | for x = 1 , width do 29 | self.constant_x[{y,x}] = (x - _cx_rgb) / _fx_rgb 30 | self.constant_y[{y,x}] = (y - _cy_rgb) / _fy_rgb 31 | end 32 | end 33 | end 34 | 35 | function img_coord_to_world_coord_multi_res:updateOutput(input) -- the input is depth map, haven't checked the ouput though 36 | if self.output then 37 | if self.output:type() ~= input:type() then 38 | self.output = self.output:typeAs(input); 39 | end 40 | self.output:resize(input:size(1), 3, input:size(3), input:size(4)) 41 | 42 | if self.constant_x:type() ~= input:type() then 43 | self.constant_x = self.constant_x:typeAs(input); 44 | self.constant_y = self.constant_y:typeAs(input); 45 | end 46 | end 47 | 48 | self.output[{{}, 1, {}}]:copy(input) 49 | self.output[{{}, 2, {}}]:copy(input) 50 | self.output[{{}, 3, {}}]:copy(input) 51 | 52 | 53 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 54 | self.output[{batch_idx, 1, {}}]:cmul(self.constant_x) 55 | self.output[{batch_idx, 2, {}}]:cmul(self.constant_y) 56 | end 57 | 58 | return self.output 59 | end 60 | 61 | function img_coord_to_world_coord_multi_res:updateGradInput(input, gradOutput) 62 | if self.gradInput then 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | self.gradInput:zero() 68 | end 69 | 70 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 71 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 1, {}}], self.constant_x) 72 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 2, {}}], self.constant_y) 73 | self.gradInput[{batch_idx, {}}]:add(gradOutput[{batch_idx, 3, {}}]) 74 | end 75 | 76 | return self.gradInput 77 | end 78 | -------------------------------------------------------------------------------- /src/experiment_KITTI/models/layers/Residual.lua: -------------------------------------------------------------------------------- 1 | local conv = cudnn.SpatialConvolution 2 | local batchnorm = nn.SpatialBatchNormalization 3 | local relu = cudnn.ReLU 4 | 5 | -- Main convolutional block 6 | local function convBlock(numIn,numOut) 7 | return nn.Sequential() 8 | :add(batchnorm(numIn)) 9 | :add(relu(true)) 10 | :add(conv(numIn,numOut/2,1,1)) 11 | :add(batchnorm(numOut/2)) 12 | :add(relu(true)) 13 | :add(conv(numOut/2,numOut/2,3,3,1,1,1,1)) 14 | :add(batchnorm(numOut/2)) 15 | :add(relu(true)) 16 | :add(conv(numOut/2,numOut,1,1)) 17 | end 18 | 19 | -- Skip layer 20 | local function skipLayer(numIn,numOut) 21 | if numIn == numOut then 22 | return nn.Identity() 23 | else 24 | return nn.Sequential() 25 | :add(conv(numIn,numOut,1,1)) 26 | end 27 | end 28 | 29 | -- Residual block 30 | function Residual(numIn,numOut) 31 | return nn.Sequential() 32 | :add(nn.ConcatTable() 33 | :add(convBlock(numIn,numOut)) 34 | :add(skipLayer(numIn,numOut))) 35 | :add(nn.CAddTable(true)) 36 | end 37 | 38 | -------------------------------------------------------------------------------- /src/experiment_KITTI/models/layers/inception_new.lua: -------------------------------------------------------------------------------- 1 | 2 | function inception(input_size, config) -- activations: input_resolution * (config[1][1] + (#config - 1) * (out_a + out_b)) 3 | 4 | local concat = nn.Concat(2) 5 | 6 | -- Base 1 x 1 conv layer 7 | local conv = nn.Sequential() 8 | conv:add(cudnn.SpatialConvolution(input_size,config[1][1],1,1)) 9 | conv:add(nn.SpatialBatchNormalization(config[1][1], nil, nil, false)) 10 | conv:add(cudnn.ReLU(true)) -- input_R * config[1][1] * N 11 | concat:add(conv) 12 | 13 | -- Additional layers 14 | local num_conv = table.getn(config) 15 | for i = 2,num_conv do 16 | conv = nn.Sequential() 17 | local filt = config[i][1] 18 | local pad = (filt - 1) / 2 19 | local out_a = config[i][2] 20 | local out_b = config[i][3] 21 | -- Reduction 22 | conv:add(cudnn.SpatialConvolution(input_size,out_a,1,1)) 23 | conv:add(nn.SpatialBatchNormalization(out_a,nil,nil,false)) 24 | conv:add(cudnn.ReLU(true)) -- input_R * out_a * N 25 | -- Spatial Convolution 26 | conv:add(cudnn.SpatialConvolution(out_a,out_b,filt,filt,1,1,pad,pad)) 27 | conv:add(nn.SpatialBatchNormalization(out_b,nil,nil,false)) 28 | conv:add(cudnn.ReLU(true)) -- input_R * out_b * N 29 | concat:add(conv) 30 | end 31 | 32 | return concat 33 | 34 | end 35 | 36 | -------------------------------------------------------------------------------- /src/experiment_KITTI/validation_crit/validate_crit_NULL.lua: -------------------------------------------------------------------------------- 1 | require 'image' 2 | 3 | 4 | function evaluate( data_loader, model, criterion, max_n_sample ) 5 | --Return the relative depth loss per point pair, ERROR ratio(WKDR), the average normal loss, and the average angle difference between predicted and ground-truth normal 6 | return 0, 0, 0, 0, 0, 0, 0 7 | end -------------------------------------------------------------------------------- /src/experiment_NYU/DataPointer.lua: -------------------------------------------------------------------------------- 1 | require 'xlua' 2 | local DataPointer = torch.class('DataPointer') 3 | 4 | function DataPointer:__init(n_total) 5 | self.n_total = n_total 6 | if self.n_total > 0 then 7 | self.idx_perm = torch.randperm(self.n_total) 8 | self.current_pos = 1 9 | else 10 | self.idx_perm = nil 11 | self.current_pos = nil 12 | end 13 | end 14 | 15 | 16 | function DataPointer:load_next_batch(batch_size) 17 | if self.n_total <= 0 then 18 | return nil 19 | end 20 | 21 | if batch_size == 0 then 22 | return nil 23 | end 24 | 25 | -- get indices 26 | local indices = torch.Tensor() 27 | if batch_size + self.current_pos - 1 <= self.n_total then 28 | indices = self.idx_perm:narrow(1, self.current_pos, batch_size) 29 | else 30 | local rest = batch_size + self.current_pos - 1 - self.n_total 31 | 32 | local part1 = self.idx_perm:narrow(1, self.current_pos, (self.n_total - self.current_pos + 1) ) 33 | local part2 = self.idx_perm:narrow(1, 1, rest) 34 | indices = torch.cat(part1, part2) 35 | end 36 | 37 | 38 | -- update pointer 39 | self.current_pos = self.current_pos + batch_size 40 | if self.current_pos >= self.n_total then 41 | -- reset to the initial position 42 | self.current_pos = 1 43 | 44 | -- reshuffle the images 45 | self.idx_perm = torch.randperm(self.n_total); 46 | end 47 | 48 | return indices 49 | end 50 | 51 | -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/absolute_depth_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './normal_negative_cos' 10 | 11 | local absolute_depth_negative_cos, parent = torch.class('nn.absolute_depth_negative_cos', 'nn.Criterion') 12 | 13 | function absolute_depth_negative_cos:__init(w_normal) 14 | print(string.format(">>>>>>>>>>>>>>>>>>>>>>Criterion: absolute_depth_negative_cos() ")) 15 | parent.__init(self) 16 | self.depth_crit = nn.MSECriterion():cuda() 17 | self.normal_crit = nn.normal_negative_cos() 18 | self.depth_to_normal = nn.Sequential() 19 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 20 | self.depth_to_normal:add(world_coord_to_normal()) 21 | self.depth_to_normal = self.depth_to_normal:cuda() 22 | self.w_normal = w_normal 23 | 24 | self.__loss_normal = 0 25 | self.__loss_absolute_depth = 0 26 | end 27 | 28 | function absolute_depth_negative_cos:updateOutput(input, target) 29 | -- the input is tensor taht represents the depth map 30 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 31 | local n_depth = target[1].n_sample 32 | local n_normal = target[2].n_sample 33 | 34 | self.output = 0 35 | self.__loss_absolute_depth = 0 36 | self.__loss_normal = 0 37 | 38 | if n_depth > 0 then 39 | self.__loss_absolute_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1].full_metric_depth) -- to test 40 | self.output = self.output + self.__loss_absolute_depth 41 | end 42 | if n_normal > 0 then -- to test 43 | -- first go through the depth->normal transormation: ---- to test 44 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 45 | -- then go through the criterion 46 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 47 | self.output = self.output + self.__loss_normal 48 | end 49 | 50 | return self.output 51 | end 52 | 53 | function absolute_depth_negative_cos:updateGradInput(input, target) 54 | -- the input is tensor taht represents the depth map 55 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 56 | 57 | -- pre-allocate memory and reset gradient to 0 58 | if self.gradInput then 59 | local nElement = self.gradInput:nElement() 60 | if self.gradInput:type() ~= input:type() then 61 | self.gradInput = self.gradInput:typeAs(input); 62 | end 63 | self.gradInput:resizeAs(input) 64 | end 65 | 66 | local n_depth = target[1].n_sample 67 | local n_normal = target[2].n_sample 68 | 69 | assert( torch.type(target) == 'table' ); 70 | 71 | if n_depth > 0 then 72 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1].full_metric_depth)) -- to test 73 | end 74 | if n_normal > 0 then -- to test 75 | -- then go through the criterion 76 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 77 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 78 | end 79 | 80 | return self.gradInput 81 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/normal_l2.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_l2, parent = torch.class('nn.normal_l2', 'nn.Criterion') 4 | 5 | 6 | function normal_l2:__init() 7 | print(">>>>>>>>>>>>>>>>> normal loss = normal l2 loss") 8 | parent.__init(self) 9 | self.buffer = torch.Tensor() 10 | end 11 | 12 | 13 | 14 | function normal_l2:updateOutput(input, target) 15 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 16 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 17 | 18 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 19 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 20 | 21 | -- the loss is the negative cos(angle) 22 | self.output = 0 23 | local n_point_total = 0 24 | local cpu_input = input 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | n_point_total = n_point_total + target[batch_idx].n_point 28 | 29 | local x_arr = target[batch_idx].x -- to check: the length of x vary with each sample!!!!! 30 | local y_arr = target[batch_idx].y 31 | 32 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 33 | 34 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 35 | local ground_truth_arr = target[batch_idx].normal 36 | 37 | self.output = self.output + torch.sum( torch.pow(torch.csub(normal_arr, ground_truth_arr),2) ) -- dot product of normals , seems quite expensive move 38 | end 39 | 40 | return self.output / n_point_total 41 | end 42 | 43 | 44 | 45 | function normal_l2:updateGradInput(input, target) 46 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 47 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 48 | 49 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 50 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 51 | 52 | -- the loss is the negative cos(angle) 53 | 54 | -- only accept one single point!!!! 55 | 56 | -- pre-allocate memory and reset gradient to 0 57 | if self.gradInput then 58 | local nElement = self.gradInput:nElement() 59 | if self.gradInput:type() ~= input:type() then 60 | self.gradInput = self.gradInput:typeAs(input); 61 | end 62 | self.gradInput:resizeAs(input) 63 | end 64 | 65 | self.gradInput:zero() 66 | 67 | 68 | 69 | local n_point_total = 0 70 | local cpu_input = input 71 | 72 | for batch_idx = 1 , cpu_input:size(1) do 73 | 74 | n_point_total = n_point_total + target[batch_idx].n_point 75 | local x = target[batch_idx].x[{1}] 76 | local y = target[batch_idx].y[{1}] 77 | 78 | local batch_input = cpu_input[{batch_idx, {}}] 79 | 80 | local ground_truth_arr = target[batch_idx].normal 81 | 82 | self.gradInput[{batch_idx,{}, y, x}]:zero() 83 | self.gradInput[{batch_idx,{}, y, x}]:copy(batch_input[{{}, y, x}]) 84 | self.gradInput[{batch_idx,{}, y, x}]:csub(ground_truth_arr) 85 | self.gradInput[{batch_idx,{}, y, x}]:mul(2) 86 | end 87 | -- io.read() 88 | return self.gradInput:div( n_point_total ) 89 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/normal_neg_loss_fast.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos_fast, parent = torch.class('nn.normal_negative_cos_fast', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos_fast:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos_fast:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | 22 | local n_point_total = input:size(1) * input:size(3) * input:size(4) 23 | 24 | self.output = - torch.sum( torch.cmul(input, target) ) -- dot product of normals , seems quite expensive move 25 | 26 | return self.output / n_point_total 27 | end 28 | 29 | 30 | 31 | function normal_negative_cos_fast:updateGradInput(input, target) 32 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 33 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 34 | 35 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 36 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 37 | 38 | -- the loss is the negative cos(angle) 39 | 40 | 41 | 42 | -- pre-allocate memory and reset gradient to 0 43 | if self.gradInput then 44 | local nElement = self.gradInput:nElement() 45 | if self.gradInput:type() ~= input:type() then 46 | self.gradInput = self.gradInput:typeAs(input); 47 | end 48 | self.gradInput:resizeAs(input) 49 | end 50 | 51 | self.gradInput:zero() 52 | 53 | 54 | 55 | local n_point_total = input:size(1) * input:size(3) * input:size(4) 56 | 57 | self.gradInput:copy(target) 58 | 59 | return self.gradInput:div( -n_point_total ) 60 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/normal_negative_cos.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos, parent = torch.class('nn.normal_negative_cos', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | self.output = 0 22 | 23 | local n_point_total = 0 24 | local cpu_input = input 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | 28 | n_point_total = n_point_total + target[batch_idx].n_point 29 | 30 | local x_arr = target[batch_idx].x -- to check: the length of x vary with each sample!!!!! 31 | local y_arr = target[batch_idx].y 32 | 33 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 34 | 35 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 36 | local ground_truth_arr = target[batch_idx].normal 37 | 38 | 39 | self.output = self.output - torch.sum( torch.cmul(normal_arr, ground_truth_arr) ) -- dot product of normals , seems quite expensive move 40 | end 41 | 42 | return self.output / n_point_total 43 | end 44 | 45 | 46 | 47 | function normal_negative_cos:updateGradInput(input, target) 48 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 49 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 50 | 51 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 52 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 53 | 54 | -- the loss is the negative cos(angle) 55 | 56 | 57 | 58 | -- pre-allocate memory and reset gradient to 0 59 | if self.gradInput then 60 | local nElement = self.gradInput:nElement() 61 | if self.gradInput:type() ~= input:type() then 62 | self.gradInput = self.gradInput:typeAs(input); 63 | end 64 | self.gradInput:resizeAs(input) 65 | end 66 | 67 | self.gradInput:zero() 68 | 69 | 70 | 71 | local n_point_total = 0 72 | local cpu_input = input -- is this necessary? can it be gpu data?? to check 73 | 74 | for batch_idx = 1 , cpu_input:size(1) do 75 | 76 | n_point_total = n_point_total + target[batch_idx].n_point 77 | 78 | local x_arr = target[batch_idx].x 79 | local y_arr = target[batch_idx].y 80 | 81 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 82 | 83 | local unsqueeze = nn.Unsqueeze(2):forward( target[batch_idx].normal:double() ):cuda() 84 | 85 | local p2 = torch.Tensor(3, cpu_input:size()[3], target[batch_idx].n_point):zero():cuda() 86 | local p1 = torch.Tensor(batch_input:size(1), batch_input:size(2), batch_input:size(3)):zero():cuda() 87 | p2:scatter(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1), unsqueeze) 88 | p1:indexAdd(3, x_arr, p2) 89 | 90 | self.gradInput[{batch_idx,{}}]:copy(p1) 91 | end 92 | 93 | return self.gradInput:div( -n_point_total ) 94 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/normal_negative_cos_cpu.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos, parent = torch.class('nn.normal_negative_cos_cpu', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | self.output = 0 22 | 23 | local n_point_total = 0 24 | local cpu_input = input:double() -- is this necessary? can it be gpu data?? to check 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | 28 | n_point_total = n_point_total + target[batch_idx].n_point 29 | 30 | local x_arr = target[batch_idx].x:long() -- to check: the length of x vary with each sample!!!!! 31 | local y_arr = target[batch_idx].y:long() 32 | 33 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 34 | 35 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 36 | local ground_truth_arr = target[batch_idx].normal 37 | 38 | 39 | self.output = self.output - torch.sum( torch.cmul(normal_arr, ground_truth_arr) ) -- dot product of normals , seems quite expensive move 40 | end 41 | 42 | return self.output / n_point_total 43 | end 44 | 45 | 46 | 47 | function normal_negative_cos:updateGradInput(input, target) 48 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 49 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 50 | 51 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 52 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 53 | 54 | -- the loss is the negative cos(angle) 55 | 56 | 57 | 58 | -- pre-allocate memory and reset gradient to 0 59 | if self.gradInput then 60 | local nElement = self.gradInput:nElement() 61 | if self.gradInput:type() ~= input:type() then 62 | self.gradInput = self.gradInput:typeAs(input); 63 | end 64 | self.gradInput:resizeAs(input) 65 | end 66 | 67 | self.gradInput:zero() 68 | 69 | 70 | 71 | local n_point_total = 0 72 | local cpu_input = input:double() -- is this necessary? can it be gpu data?? to check 73 | 74 | for batch_idx = 1 , cpu_input:size(1) do 75 | 76 | n_point_total = n_point_total + target[batch_idx].n_point 77 | 78 | local x_arr = target[batch_idx].x:long() -- to check: the length of x vary with each sample!!!!! 79 | local y_arr = target[batch_idx].y:long() 80 | 81 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 82 | 83 | local unsqueeze = nn.Unsqueeze(2):forward( target[batch_idx].normal ) 84 | 85 | local p2 = torch.Tensor(3, cpu_input:size()[3], target[batch_idx].n_point):zero() 86 | local p1 = torch.Tensor(batch_input:size(1), batch_input:size(2), batch_input:size(3)):zero() 87 | p2:scatter(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1), unsqueeze) 88 | p1:indexAdd(3, x_arr, p2) 89 | 90 | self.gradInput[{batch_idx,{}}]:copy(p1) 91 | end 92 | 93 | return self.gradInput:div( -n_point_total ) 94 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/normal_negative_cos_weighted.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos_weighted, parent = torch.class('nn.normal_negative_cos_weighted', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos_weighted:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos_weighted:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | self.output = 0 22 | 23 | local n_point_total = 0 24 | local cpu_input = input 25 | 26 | for batch_idx = 1 , cpu_input:size(1) do 27 | 28 | n_point_total = n_point_total + target[batch_idx].n_point 29 | 30 | local x_arr = target[batch_idx].x -- to check: the length of x vary with each sample!!!!! 31 | local y_arr = target[batch_idx].y 32 | 33 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 34 | 35 | local normal_arr = batch_input:index(3, x_arr):gather(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1) ):squeeze() 36 | local ground_truth_arr = target[batch_idx].normal 37 | 38 | local weight = ground_truth_arr[{3,{}}]:clone():mul(-10):add(10.1) -- is it the 3rd element? 39 | local weight_3 = torch.Tensor(3, ground_truth_arr:size(2)) 40 | weight_3[{1,{}}]:copy(weight) 41 | weight_3[{2,{}}]:copy(weight) 42 | weight_3[{3,{}}]:copy(weight) 43 | 44 | 45 | self.output = self.output - torch.sum( torch.cmul( torch.cmul( normal_arr, ground_truth_arr ), weight_3:cuda()) ) -- dot product of normals , seems quite expensive move 46 | end 47 | 48 | return self.output / n_point_total 49 | end 50 | 51 | 52 | 53 | function normal_negative_cos_weighted:updateGradInput(input, target) 54 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 55 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 56 | 57 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 58 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 59 | 60 | -- the loss is the negative cos(angle) 61 | 62 | 63 | 64 | -- pre-allocate memory and reset gradient to 0 65 | if self.gradInput then 66 | local nElement = self.gradInput:nElement() 67 | if self.gradInput:type() ~= input:type() then 68 | self.gradInput = self.gradInput:typeAs(input); 69 | end 70 | self.gradInput:resizeAs(input) 71 | end 72 | 73 | self.gradInput:zero() 74 | 75 | 76 | 77 | local n_point_total = 0 78 | local cpu_input = input -- is this necessary? can it be gpu data?? to check 79 | 80 | for batch_idx = 1 , cpu_input:size(1) do 81 | 82 | n_point_total = n_point_total + target[batch_idx].n_point 83 | 84 | local x_arr = target[batch_idx].x 85 | local y_arr = target[batch_idx].y 86 | 87 | local batch_input = cpu_input[{batch_idx, {}}] -- batch_input is 3 dimension -- checked 88 | 89 | local ground_truth_arr = target[batch_idx].normal 90 | local weight = ground_truth_arr[{3,{}}]:clone():mul(-10):add(10.1) -- is it the 3rd element? 91 | local weighted_gt_arr = ground_truth_arr:clone() 92 | weighted_gt_arr[{1,{}}]:cmul(weight) 93 | weighted_gt_arr[{2,{}}]:cmul(weight) 94 | weighted_gt_arr[{3,{}}]:cmul(weight) 95 | 96 | local unsqueeze = nn.Unsqueeze(2):forward( weighted_gt_arr:double() ):cuda() 97 | 98 | local p2 = torch.Tensor(3, cpu_input:size()[3], target[batch_idx].n_point):zero():cuda() 99 | local p1 = torch.Tensor(batch_input:size(1), batch_input:size(2), batch_input:size(3)):zero():cuda() 100 | p2:scatter(2, torch.repeatTensor(y_arr:view(1,-1),3,1):view(3,1,-1), unsqueeze) 101 | p1:indexAdd(3, x_arr, p2) 102 | 103 | 104 | self.gradInput[{batch_idx,{}}]:copy(p1) 105 | 106 | 107 | end 108 | 109 | return self.gradInput:div( -n_point_total ) 110 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/relative_depth_margin_log_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth_margin' 10 | require './normal_negative_cos' 11 | 12 | local relative_depth_margin_log_negative_cos, parent = torch.class('nn.relative_depth_margin_log_negative_cos', 'nn.Criterion') 13 | 14 | function relative_depth_margin_log_negative_cos:__init(w_normal, margin) 15 | print(string.format(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos() w_normal:%f, margin:%f", w_normal, margin)) 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit(margin) 18 | self.normal_crit = nn.normal_negative_cos() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.depth_to_normal = self.depth_to_normal:cuda() 23 | self.w_normal = w_normal 24 | 25 | self.__loss_normal = 0 26 | self.__loss_relative_depth = 0 27 | end 28 | 29 | function relative_depth_margin_log_negative_cos:updateOutput(input, target) 30 | -- the input is tensor taht represents the depth map 31 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 32 | local n_depth = target[1].n_sample 33 | local n_normal = target[2].n_sample 34 | 35 | assert( torch.type(target) == 'table' ); 36 | 37 | self.output = 0 38 | self.__loss_relative_depth = 0 39 | self.__loss_normal = 0 40 | 41 | if n_depth > 0 then 42 | self.__loss_relative_depth = self.depth_crit:forward(nn.Log():cuda():forward(input:sub(1, n_depth)), target[1]) -- to test 43 | self.output = self.output + self.__loss_relative_depth 44 | end 45 | if n_normal > 0 then -- to test 46 | -- first go through the depth->normal transormation: ---- to test 47 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 48 | -- then go through the criterion 49 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 50 | self.output = self.output + self.__loss_normal 51 | end 52 | 53 | return self.output 54 | end 55 | 56 | function relative_depth_margin_log_negative_cos:updateGradInput(input, target) 57 | -- the input is tensor taht represents the depth map 58 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 59 | 60 | -- pre-allocate memory and reset gradient to 0 61 | if self.gradInput then 62 | local nElement = self.gradInput:nElement() 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | end 68 | 69 | local n_depth = target[1].n_sample 70 | local n_normal = target[2].n_sample 71 | 72 | assert( torch.type(target) == 'table' ); 73 | 74 | if n_depth > 0 then 75 | self.gradInput:sub(1, n_depth):copy( self.depth_crit:backward(nn.Log():cuda():forward(input:sub(1, n_depth)), target[1]) ) -- to test 76 | self.gradInput:sub(1, n_depth):copy( nn.Log():cuda():backward(input:sub(1, n_depth), self.gradInput:sub(1, n_depth)) ) 77 | end 78 | if n_normal > 0 then -- to test 79 | -- then go through the criterion 80 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 81 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 82 | end 83 | 84 | return self.gradInput 85 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/relative_depth_margin_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth_margin' 10 | require './normal_negative_cos' 11 | 12 | local relative_depth_negative_cos, parent = torch.class('nn.relative_depth_negative_cos', 'nn.Criterion') 13 | 14 | function relative_depth_negative_cos:__init(w_normal, margin) 15 | print(string.format(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos() w_normal:%f, margin:%f", w_normal, margin)) 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit(margin) 18 | self.normal_crit = nn.normal_negative_cos() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.depth_to_normal = self.depth_to_normal:cuda() 23 | self.w_normal = w_normal 24 | 25 | self.__loss_normal = 0 26 | self.__loss_relative_depth = 0 27 | end 28 | 29 | function relative_depth_negative_cos:updateOutput(input, target) 30 | -- the input is tensor taht represents the depth map 31 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 32 | local n_depth = target[1].n_sample 33 | local n_normal = target[2].n_sample 34 | 35 | assert( torch.type(target) == 'table' ); 36 | 37 | self.output = 0 38 | self.__loss_relative_depth = 0 39 | self.__loss_normal = 0 40 | 41 | if n_depth > 0 then 42 | self.__loss_relative_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 43 | self.output = self.output + self.__loss_relative_depth 44 | end 45 | if n_normal > 0 then -- to test 46 | -- first go through the depth->normal transormation: ---- to test 47 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 48 | -- then go through the criterion 49 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 50 | self.output = self.output + self.__loss_normal 51 | end 52 | 53 | return self.output 54 | end 55 | 56 | function relative_depth_negative_cos:updateGradInput(input, target) 57 | -- the input is tensor taht represents the depth map 58 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 59 | 60 | -- pre-allocate memory and reset gradient to 0 61 | if self.gradInput then 62 | local nElement = self.gradInput:nElement() 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | end 68 | 69 | local n_depth = target[1].n_sample 70 | local n_normal = target[2].n_sample 71 | 72 | assert( torch.type(target) == 'table' ); 73 | 74 | if n_depth > 0 then 75 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 76 | end 77 | if n_normal > 0 then -- to test 78 | -- then go through the criterion 79 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 80 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 81 | end 82 | 83 | return self.gradInput 84 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/relative_depth_margin_negative_cos_var.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- depth variance loss 9 | require './depth_var_loss' 10 | 11 | -- sub criterions 12 | require './relative_depth_margin' 13 | require './normal_negative_cos' 14 | 15 | 16 | local relative_depth_margin_negative_cos_var, parent = torch.class('nn.relative_depth_margin_negative_cos_var', 'nn.Criterion') 17 | 18 | function relative_depth_margin_negative_cos_var:__init(w_normal, margin, d_var_thresh) 19 | print("\n>>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos_var()") 20 | parent.__init(self) 21 | self.depth_crit = nn.relative_depth_crit(margin) 22 | self.normal_crit = nn.normal_negative_cos() 23 | self.depth_to_normal = nn.Sequential() 24 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 25 | self.depth_to_normal:add(world_coord_to_normal()) 26 | self.depth_to_normal = self.depth_to_normal:cuda() 27 | self.w_normal = w_normal 28 | 29 | 30 | -- depth variance 31 | self.w_d_var = 1 32 | print(string.format("\t\tw_normal=%f, margin=%f, w_d_var=%f", w_normal, margin, self.w_d_var)) 33 | self.nn_depth_var = nn.depth_var_loss(d_var_thresh):cuda() 34 | 35 | 36 | self.__loss_normal = 0 37 | self.__loss_relative_depth = 0 38 | end 39 | 40 | function relative_depth_margin_negative_cos_var:updateOutput(input, target) 41 | -- the input is tensor taht represents the depth map 42 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 43 | local n_depth = target[1].n_sample 44 | local n_normal = target[2].n_sample 45 | 46 | assert( torch.type(target) == 'table' ); 47 | 48 | self.output = 0 49 | self.__loss_relative_depth = 0 50 | self.__loss_normal = 0 51 | 52 | if n_depth > 0 then 53 | self.__loss_relative_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 54 | self.output = self.output + self.__loss_relative_depth 55 | 56 | -- the depth variance loss 57 | self.output = self.output + self.w_d_var * self.nn_depth_var:forward(input:sub(1, n_depth), nil) 58 | end 59 | if n_normal > 0 then -- to test 60 | -- first go through the depth->normal transormation: ---- to test 61 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 62 | -- then go through the criterion 63 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 64 | self.output = self.output + self.__loss_normal 65 | end 66 | 67 | return self.output 68 | end 69 | 70 | function relative_depth_margin_negative_cos_var:updateGradInput(input, target) 71 | -- the input is tensor taht represents the depth map 72 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 73 | 74 | -- pre-allocate memory and reset gradient to 0 75 | if self.gradInput then 76 | local nElement = self.gradInput:nElement() 77 | if self.gradInput:type() ~= input:type() then 78 | self.gradInput = self.gradInput:typeAs(input); 79 | end 80 | self.gradInput:resizeAs(input) 81 | end 82 | 83 | local n_depth = target[1].n_sample 84 | local n_normal = target[2].n_sample 85 | 86 | assert( torch.type(target) == 'table' ); 87 | 88 | if n_depth > 0 then 89 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 90 | 91 | -- the depth variance loss 92 | local temp = self.nn_depth_var:backward(input:sub(1, n_depth), nil) 93 | temp:mul(self.w_d_var) 94 | self.gradInput:sub(1, n_depth):add(temp) 95 | end 96 | if n_normal > 0 then -- to test 97 | -- then go through the criterion 98 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 99 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 100 | end 101 | 102 | return self.gradInput 103 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/relative_depth_margin_negative_cos_var_cpu.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- --depth to normal 5 | -- require '../models/world_coord_to_normal' 6 | -- require '../models/img_coord_to_world_coord' 7 | 8 | -- -- depth variance loss 9 | -- require './depth_var_loss' 10 | 11 | -- sub criterions 12 | require './relative_depth_margin_cpu' 13 | require './normal_negative_cos_cpu' 14 | 15 | local relative_depth_margin_negative_cos_var_cpu, parent = torch.class('nn.relative_depth_margin_negative_cos_var_cpu', 'nn.Criterion') 16 | 17 | function relative_depth_margin_negative_cos_var_cpu:__init(w_normal, margin, d_var_thresh) 18 | print("\n>>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_margin_negative_cos_var_cpu()") 19 | 20 | parent.__init(self) 21 | self.depth_crit = nn.relative_depth_crit_cpu(margin) 22 | self.normal_crit = nn.normal_negative_cos_cpu() 23 | self.depth_to_normal = nn.Sequential() 24 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 25 | self.depth_to_normal:add(world_coord_to_normal()) 26 | self.w_normal = w_normal 27 | 28 | 29 | -- depth variance 30 | self.w_d_var = 1 31 | print(string.format("\t\tw_normal=%f, margin=%f, w_d_var=%f", w_normal, margin, self.w_d_var)) 32 | self.nn_depth_var = nn.depth_var_loss(d_var_thresh):cuda() 33 | 34 | 35 | end 36 | 37 | function relative_depth_margin_negative_cos_var_cpu:updateOutput(input, target) 38 | -- the input is tensor taht represents the depth map 39 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 40 | local n_depth = target[1].n_sample 41 | local n_normal = target[2].n_sample 42 | 43 | assert( torch.type(target) == 'table' ); 44 | 45 | self.output = 0 46 | if n_depth > 0 then 47 | self.output = self.output + self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 48 | 49 | -- the depth variance loss 50 | self.output = self.output + self.w_d_var * self.nn_depth_var:forward(input:sub(1, n_depth), nil) 51 | end 52 | if n_normal > 0 then -- to test 53 | -- first go through the depth->normal transormation: ---- to test 54 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 55 | -- then go through the criterion 56 | self.output = self.output + self.w_normal * self.normal_crit:forward( normal, target[2]) 57 | end 58 | 59 | return self.output 60 | end 61 | 62 | function relative_depth_margin_negative_cos_var_cpu:updateGradInput(input, target) 63 | -- the input is tensor taht represents the depth map 64 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 65 | 66 | -- pre-allocate memory and reset gradient to 0 67 | if self.gradInput then 68 | local nElement = self.gradInput:nElement() 69 | if self.gradInput:type() ~= input:type() then 70 | self.gradInput = self.gradInput:typeAs(input); 71 | end 72 | self.gradInput:resizeAs(input) 73 | end 74 | 75 | local n_depth = target[1].n_sample 76 | local n_normal = target[2].n_sample 77 | 78 | assert( torch.type(target) == 'table' ); 79 | 80 | if n_depth > 0 then 81 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 82 | 83 | -- the depth variance loss 84 | local temp = self.nn_depth_var:backward(input:sub(1, n_depth), nil) 85 | temp:mul(self.w_d_var) 86 | self.gradInput:sub(1, n_depth):add(temp) 87 | end 88 | if n_normal > 0 then -- to test 89 | -- then go through the criterion 90 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 91 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 92 | end 93 | 94 | return self.gradInput 95 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/relative_depth_negative_cos.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | require '../models/world_coord_to_normal' 6 | require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth' 10 | require './normal_negative_cos' 11 | 12 | local relative_depth_negative_cos, parent = torch.class('nn.relative_depth_negative_cos', 'nn.Criterion') 13 | 14 | function relative_depth_negative_cos:__init(w_normal) 15 | print(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_negative_cos()") 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit() 18 | self.normal_crit = nn.normal_negative_cos() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.depth_to_normal = self.depth_to_normal:cuda() 23 | self.w_normal = w_normal 24 | 25 | self.__loss_normal = 0 26 | self.__loss_relative_depth = 0 27 | end 28 | 29 | function relative_depth_negative_cos:updateOutput(input, target) 30 | -- the input is tensor taht represents the depth map 31 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 32 | local n_depth = target[1].n_sample 33 | local n_normal = target[2].n_sample 34 | 35 | assert( torch.type(target) == 'table' ); 36 | 37 | self.output = 0 38 | self.__loss_relative_depth = 0 39 | self.__loss_normal = 0 40 | 41 | if n_depth > 0 then 42 | self.__loss_relative_depth = self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 43 | self.output = self.output + self.__loss_relative_depth 44 | end 45 | if n_normal > 0 then -- to test 46 | -- first go through the depth->normal transormation: ---- to test 47 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 48 | -- then go through the criterion 49 | self.__loss_normal = self.w_normal * self.normal_crit:forward( normal, target[2]) 50 | self.output = self.output + self.__loss_normal 51 | end 52 | 53 | return self.output 54 | end 55 | 56 | function relative_depth_negative_cos:updateGradInput(input, target) 57 | -- the input is tensor taht represents the depth map 58 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 59 | 60 | -- pre-allocate memory and reset gradient to 0 61 | if self.gradInput then 62 | local nElement = self.gradInput:nElement() 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | end 68 | 69 | local n_depth = target[1].n_sample 70 | local n_normal = target[2].n_sample 71 | 72 | assert( torch.type(target) == 'table' ); 73 | 74 | if n_depth > 0 then 75 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 76 | end 77 | if n_normal > 0 then -- to test 78 | -- then go through the criterion 79 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 80 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 81 | end 82 | 83 | return self.gradInput 84 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/relative_depth_negative_cos_cpu.lua: -------------------------------------------------------------------------------- 1 | -- require 'cunn' 2 | require 'nn' 3 | 4 | -- depth to normal 5 | -- require '../models/world_coord_to_normal' 6 | -- require '../models/img_coord_to_world_coord' 7 | 8 | -- sub criterions 9 | require './relative_depth_cpu' 10 | require './normal_negative_cos_cpu' 11 | 12 | local relative_depth_negative_cos, parent = torch.class('nn.relative_depth_negative_cos_cpu', 'nn.Criterion') 13 | 14 | function relative_depth_negative_cos:__init(w_normal) 15 | print(">>>>>>>>>>>>>>>>>>>>>>Criterion: relative_depth_negative_cos()") 16 | parent.__init(self) 17 | self.depth_crit = nn.relative_depth_crit_cpu() 18 | self.normal_crit = nn.normal_negative_cos_cpu() 19 | self.depth_to_normal = nn.Sequential() 20 | self.depth_to_normal:add(nn.img_coord_to_world_coord()) 21 | self.depth_to_normal:add(world_coord_to_normal()) 22 | self.w_normal = w_normal 23 | end 24 | 25 | function relative_depth_negative_cos:updateOutput(input, target) 26 | -- the input is tensor taht represents the depth map 27 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 28 | local n_depth = target[1].n_sample 29 | local n_normal = target[2].n_sample 30 | 31 | assert( torch.type(target) == 'table' ); 32 | 33 | self.output = 0 34 | if n_depth > 0 then 35 | self.output = self.output + self.depth_crit:forward(input:sub(1, n_depth), target[1]) -- to test 36 | end 37 | if n_normal > 0 then -- to test 38 | -- first go through the depth->normal transormation: ---- to test 39 | local normal = self.depth_to_normal:forward(input:sub(n_depth+1, -1)) 40 | -- then go through the criterion 41 | self.output = self.output + self.w_normal * self.normal_crit:forward( normal, target[2]) 42 | end 43 | 44 | return self.output 45 | end 46 | 47 | function relative_depth_negative_cos:updateGradInput(input, target) 48 | -- the input is tensor taht represents the depth map 49 | -- the target is a table, where the first component is a table that contains relative depth info, and the second component is a table that contains normal info. 50 | 51 | -- pre-allocate memory and reset gradient to 0 52 | if self.gradInput then 53 | local nElement = self.gradInput:nElement() 54 | if self.gradInput:type() ~= input:type() then 55 | self.gradInput = self.gradInput:typeAs(input); 56 | end 57 | self.gradInput:resizeAs(input) 58 | end 59 | 60 | local n_depth = target[1].n_sample 61 | local n_normal = target[2].n_sample 62 | 63 | assert( torch.type(target) == 'table' ); 64 | 65 | if n_depth > 0 then 66 | self.gradInput:sub(1, n_depth):copy(self.depth_crit:backward(input:sub(1, n_depth), target[1])) -- to test 67 | end 68 | if n_normal > 0 then -- to test 69 | -- then go through the criterion 70 | self.gradInput:sub(n_depth+1, -1):copy(self.depth_to_normal:backward( input:sub(n_depth+1, -1), self.normal_crit:backward( self.depth_to_normal.output, target[2])) ) 71 | self.gradInput:sub(n_depth+1, -1):mul(self.w_normal) 72 | end 73 | 74 | return self.gradInput 75 | end -------------------------------------------------------------------------------- /src/experiment_NYU/criterion/scale_inv_depth_loss.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local scale_inv_depth_loss, parent = torch.class('nn.scale_inv_depth_loss', 'nn.Criterion') 4 | 5 | 6 | function scale_inv_depth_loss:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function scale_inv_depth_loss:updateOutput(input, target) 14 | -- The input[1] and target are both 4D tensors, [batchSize, 4, height, width], and represents the normal maps 15 | -- input[2] is a mask that denotes which locations are valid (1 is valid) [batchSize, 4, height, width] 16 | -- the loss is (d1 - d2)^2 / (d1 + d2)^2 17 | 18 | 19 | self.output = 0 20 | 21 | local denominator = torch.pow(input[1] - target, 2) 22 | local nominator = torch.pow(input[1] + target, 2) 23 | 24 | local zero_mask = nominator:eq(0) 25 | nominator[zero_mask] = 1e-7 26 | denominator:cdiv(nominator) 27 | 28 | denominator:cmul(input[2]) 29 | self.output = torch.sum(denominator) 30 | 31 | 32 | return self.output / torch.sum(input[2]) 33 | end 34 | 35 | 36 | 37 | function scale_inv_depth_loss:updateGradInput(input, target) 38 | -- The input[1] and target are both 4D tensors, [batchSize, 4, height, width], and represents the normal maps 39 | -- input[2] is a mask that denotes which locations are valid (1 is valid) [batchSize, 4, height, width] 40 | -- the loss is (d1 - d2)^2 / (d1 + d2)^2 41 | 42 | -- pre-allocate memory and reset gradient to 0 43 | if self.gradInput then 44 | local nElement = self.gradInput:nElement() 45 | if self.gradInput:type() ~= input[1]:type() then 46 | self.gradInput = self.gradInput:typeAs(input[1]); 47 | end 48 | self.gradInput:resizeAs(input[1]) 49 | end 50 | 51 | self.gradInput:zero() 52 | 53 | 54 | self.gradInput:copy(input[1]) 55 | self.gradInput:csub(target) 56 | local temp_sum_3 = torch.pow(input[1] + target, 3) 57 | 58 | self.gradInput:cmul(target) 59 | self.gradInput:mul(4) 60 | 61 | 62 | 63 | local zero_mask = temp_sum_3:eq(0) 64 | temp_sum_3[zero_mask] = 1e-7 65 | self.gradInput:cdiv(temp_sum_3) 66 | 67 | self.gradInput:cmul(input[2]) 68 | 69 | -- print(self.gradInput[{1,2,1,1}]) 70 | -- print(target[{1,2,1,1}]) 71 | -- print(input[{1,2,1,1}]) 72 | -- io.read() 73 | 74 | return self.gradInput:div(torch.sum(input[2])) 75 | end -------------------------------------------------------------------------------- /src/experiment_NYU/header.lua: -------------------------------------------------------------------------------- 1 | -- nnlib = require('nn') 2 | nnlib = require('cudnn') 3 | -------------------------------------------------------------------------------- /src/experiment_NYU/hg.lua: -------------------------------------------------------------------------------- 1 | require('./Residual') 2 | 3 | local function lin2(numIn,numOut,inp) 4 | -- Apply 1x1 convolution, no stride, no padding 5 | local l_ = nn.Linear(numIn,numOut)(inp) 6 | return nnlib.ReLU(true)(nn.BatchNormalization(numOut)(l_)) 7 | end 8 | 9 | function hourglass(n, numIn, numOut, inp) 10 | local d = 64 11 | 12 | -- Upper branch 13 | local up1 = Residual(numIn,d)(inp) 14 | local up2 = Residual(d,d)(up1) 15 | local up4 = Residual(d,numOut)(up2) 16 | 17 | -- Lower branch 18 | local pool = nnlib.SpatialMaxPooling(2,2,2,2)(inp) 19 | local low1 = Residual(numIn,d)(pool) 20 | local low2 = Residual(d,d)(low1) 21 | local low5 = Residual(d,d)(low2) 22 | local low6 23 | if n > 1 then 24 | low6 = hourglass(n-1,d,numOut,low5) 25 | else 26 | low6 = Residual(d,numOut)(low5) 27 | end 28 | local low7 = Residual(numOut,numOut)(low6) 29 | local up5 = nn.SpatialUpSamplingNearest(2)(low7) 30 | 31 | -- Bring two branches together 32 | return nn.CAddTable()({up4,up5}) 33 | end 34 | 35 | function hourglass_inter_supervise(n, numIn, numOut, inp) 36 | local outNode 37 | 38 | local d = 64 39 | 40 | -- Upper branch 41 | local up1 = Residual(numIn,d)(inp) 42 | local up2 = Residual(d,d)(up1) 43 | local up4 = Residual(d,numOut)(up2) 44 | 45 | -- Lower branch 46 | local pool = nnlib.SpatialMaxPooling(2,2,2,2)(inp) 47 | local low1 = Residual(numIn,d)(pool) 48 | local low2 = Residual(d,d)(low1) 49 | local low5 = Residual(d,d)(low2) 50 | local low6 51 | if n > 1 then 52 | low6, outNode = hourglass(n-1,d,numOut,low5) 53 | else 54 | low6 = Residual(d,numOut)(low5) 55 | 56 | local flat = nn.View(-1,numOut*4*4):setNumInputDims(4)(low6) 57 | local l1 = lin2(numOut*4*4,1024,flat) 58 | local l2 = lin2(1024,1024,l1) 59 | outNode = nn.LogSoftMax()(nn.Linear(1024,g_nClass)(l2)) 60 | end 61 | local low7 = Residual(numOut,numOut)(low6) 62 | local up5 = nn.SpatialUpSamplingNearest(2)(low7) 63 | 64 | -- Bring two branches together 65 | return nn.CAddTable()({up4,up5}), outNode 66 | end 67 | 68 | function hourglass_identity_skip(n, numInOut, inp) 69 | local d = 64 70 | 71 | -- Upper branch: Identity connections 72 | -- local up1 = Residual(numIn,d)(inp) 73 | -- local up2 = Residual(d,d)(up1) 74 | -- local up4 = Residual(d,numOut)(up2) 75 | 76 | -- Lower branch 77 | local pool = nnlib.SpatialMaxPooling(2,2,2,2)(inp) 78 | local low1 = Residual(numInOut,d)(pool) 79 | local low2 = Residual(d,d)(low1) 80 | local low5 = Residual(d,d)(low2) 81 | local low6 82 | if n > 1 then 83 | low6 = hourglass_identity_skip(n-1,d,low5) 84 | else 85 | low6 = Residual(d,d)(low5) 86 | end 87 | local low7 = Residual(d,numInOut)(low6) 88 | local up5 = nn.SpatialUpSamplingNearest(2)(low7) 89 | 90 | -- Bring two branches together 91 | return nn.CAddTable()({inp,up5}) 92 | 93 | end 94 | 95 | function hg_module(n, numIn, numOut) 96 | local inp = nnlib.Identity()() 97 | local hg = hourglass(n, numIn, numOut, inp) 98 | return nn.gModule({inp}, {hg}) 99 | end 100 | 101 | function lin(numIn,numOut,inp) 102 | -- Apply 1x1 convolution, no stride, no padding 103 | local l_ = nnlib.SpatialConvolution(numIn,numOut,1,1,1,1,0,0)(inp) 104 | return nnlib.ReLU(true)(nn.SpatialBatchNormalization(numOut)(l_)) 105 | end 106 | 107 | -------------------------------------------------------------------------------- /src/experiment_NYU/load_data.lua: -------------------------------------------------------------------------------- 1 | local train_depth_path = nil 2 | local train_normal_path = nil 3 | 4 | local valid_depth_path = nil 5 | local valid_normal_path = nil 6 | 7 | local base_data_path = '../../data/' 8 | if g_args.t_depth_file ~= '' then 9 | train_depth_path = base_data_path .. g_args.t_depth_file 10 | end 11 | 12 | if g_args.t_normal_file ~= '' then 13 | train_normal_path = base_data_path .. g_args.t_normal_file 14 | end 15 | 16 | if g_args.v_depth_file ~= '' then 17 | valid_depth_path = base_data_path .. g_args.v_depth_file 18 | end 19 | 20 | if g_args.v_normal_file ~= '' then 21 | valid_normal_path = base_data_path .. g_args.v_normal_file 22 | end 23 | 24 | 25 | 26 | if train_depth_path == nil then 27 | print("Error: Missing training file for depth!") 28 | os.exit() 29 | end 30 | 31 | if valid_depth_path == nil then 32 | print("Error: Missing validation file for depth!") 33 | os.exit() 34 | end 35 | 36 | if train_normal_path == nil and train_depth_path == nil then 37 | print("Error: No training files at all.") 38 | os.exit() 39 | end 40 | 41 | if (train_normal_path == nil and valid_normal_path ~= nil) or (train_normal_path ~= nil and valid_normal_path == nil) then 42 | print("Error: Either train_normal_path or valid_normal_path is not valid") 43 | os.exit() 44 | end 45 | 46 | ------------------------------------------------------------------------------------------------------------------ 47 | 48 | 49 | function TrainDataLoader() 50 | local _train_depth_path = train_depth_path 51 | local _train_normal_path = train_normal_path 52 | if g_args.n_max_depth == 0 then 53 | _train_depth_path = nil 54 | print("\t\t>>>>>>>>>>>>Warning: No depth training data specified!") 55 | end 56 | 57 | if g_args.n_max_normal == 0 then 58 | _train_normal_path = nil 59 | print("\t\t>>>>>>>>>>>>Warning: No normal training data specified!") 60 | end 61 | 62 | if train_depth_path == nil and train_normal_path == nil then 63 | assert(false, ">>>>>>>>> Error: Both normal data and depth data are nil!") 64 | end 65 | 66 | return DataLoader(_train_depth_path, _train_normal_path, g_args.n_max_depth, g_args.n_max_normal) 67 | end 68 | 69 | function ValidDataLoader() 70 | return DataLoader(valid_depth_path, valid_normal_path) 71 | end 72 | 73 | function Train_During_Valid_DataLoader() 74 | local _n_max_depth = g_args.n_max_depth 75 | local _n_max_normal = g_args.n_max_normal 76 | if g_args.n_max_depth == 0 then 77 | _n_max_depth = 800 78 | end 79 | if g_args.n_max_normal == 0 then 80 | _n_max_normal = 5000 81 | end 82 | 83 | return DataLoader(train_depth_path, train_normal_path, _n_max_depth, _n_max_normal) 84 | end -------------------------------------------------------------------------------- /src/experiment_NYU/measure.lua: -------------------------------------------------------------------------------- 1 | function rmse(a, b) 2 | return torch.sqrt((a - b):pow(2):sum() / a:numel()) 3 | end 4 | 5 | function depth_rmse_linear(y1, y2) 6 | return rmse(y1, y2) 7 | end 8 | 9 | function depth_rmse_log(y1, y2) 10 | return rmse(torch.log(y1), torch.log(y2)) 11 | end 12 | 13 | function depth_scale_invariant_rmse_log(y1, y2) 14 | local ly1 = torch.log(y1) 15 | local ly2 = torch.log(y2) 16 | local d = ly1 - ly2 17 | local n = d:numel() 18 | local alpha = torch.pow(d, 2):sum() / n - (d:sum() / n) ^ 2 19 | return torch.sqrt((ly1 - ly2 + alpha):pow(2):sum() / (2 * n)) 20 | end 21 | 22 | function threshold_delta(y1, y2, thres) 23 | local to_count = torch.cmax(torch.cdiv(y1, y2), torch.cdiv(y2, y1)) 24 | return torch.le(to_count, thres):sum() / to_count:numel() 25 | end 26 | 27 | function threshold_1(y1, y2) 28 | return threshold_delta(y1, y2, 1.25) 29 | end 30 | 31 | function threshold_2(y1 ,y2) 32 | return threshold_delta(y1, y2, 1.25^2) 33 | end 34 | 35 | function threshold_3(y1, y2) 36 | return threshold_delta(y1, y2, 1.25^3) 37 | end 38 | 39 | -- y2 is the groundtruth 40 | function abs_rel_diff(y1, y2) 41 | return torch.cdiv(torch.abs(y1 - y2), y2):sum() / y1:numel() 42 | end 43 | 44 | function sqr_rel_diff(y1, y2) 45 | return torch.cdiv((y1 - y2):pow(2), y2):sum() / y1:numel() 46 | end 47 | 48 | local depth_measures = { 49 | ['delta<1.25' ] = threshold_1, 50 | ['delta<1.25^2' ] = threshold_2, 51 | ['delta<1.25^3' ] = threshold_3, 52 | ['abs_rel_diff' ] = abs_rel_diff, 53 | ['sqr_rel_diff' ] = sqr_rel_diff, 54 | ['RMSE_linear' ] = depth_rmse_linear, 55 | ['RMSE_log' ] = depth_rmse_log, 56 | ['RMSE_log(sc.inv)'] = depth_scale_invariant_rmse_log 57 | } 58 | 59 | function measure_depth(y1, y2, mask) 60 | local measures = {} 61 | for name, func in pairs(depth_measures) do 62 | measures[name] = func(y1:maskedSelect(mask), y2:maskedSelect(mask)) 63 | end 64 | return measures 65 | end 66 | 67 | 68 | function angle(n1, n2) 69 | assert(n1:size()[1] == 3) 70 | local n11 = torch.pow(n1, 2):sum(1) 71 | local n22 = torch.pow(n2, 2):sum(1) 72 | local n12 = torch.cmul(n1, n2):sum(1) 73 | 74 | return torch.acos(torch.cdiv(n12, torch.sqrt(torch.cmul(n11, n22))):clamp(-1, 1)) 75 | end 76 | 77 | 78 | function mean_angle_error(n1, n2) 79 | return angle(n1, n2):mean() / math.pi * 180 80 | end 81 | 82 | function median_angle_error(n1, n2) 83 | return torch.median(angle(n1, n2):view(-1):double())[1] / math.pi * 180 84 | end 85 | 86 | function rmse_error(n1, n2) 87 | return torch.sqrt(angle(n1, n2):pow(2):sum() / (n1:numel() / 3)) 88 | end 89 | 90 | function good_pixels(n1, n2, thres) 91 | return torch.le(angle(n1, n2), thres):sum() / (n1:numel() / 3) 92 | end 93 | 94 | function good_pixels_11_25(n1, n2) 95 | return good_pixels(n1, n2, 11.25 * math.pi / 180) 96 | end 97 | 98 | function good_pixels_22_5(n1, n2) 99 | return good_pixels(n1, n2, 22.5 * math.pi / 180) 100 | end 101 | 102 | function good_pixels_30(n1, n2) 103 | return good_pixels(n1, n2, 30 * math.pi / 180) 104 | end 105 | 106 | local normal_measures = { 107 | ['mean_angle_error' ] = mean_angle_error, 108 | ['median_angle_error'] = median_angle_error, 109 | ['within_11.25' ] = good_pixels_11_25, 110 | ['within_22.5' ] = good_pixels_22_5, 111 | ['within_30' ] = good_pixels_30 112 | } 113 | 114 | function measure_normal(n1, n2, mask) 115 | assert(n1:size()[1] == 1) 116 | 117 | local masked_n1 = torch.zeros(3, mask:sum()) 118 | local masked_n2 = torch.zeros(3, mask:sum()) 119 | for i =1,3 do 120 | masked_n1[i] = n1[{1, i}]:maskedSelect(mask) 121 | masked_n2[i] = n2[{1, i}]:maskedSelect(mask) 122 | end 123 | 124 | local measures = {} 125 | for name, func in pairs(normal_measures) do 126 | measures[name] = func(masked_n1, masked_n2) 127 | end 128 | 129 | return measures 130 | end 131 | -------------------------------------------------------------------------------- /src/experiment_NYU/models/hourglass3.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | 141 | 142 | return model 143 | end 144 | 145 | 146 | require('../criterion/relative_depth_negative_cos') 147 | function get_criterion() 148 | print(g_args.w_n) 149 | return nn.relative_depth_negative_cos(g_args.w_n) 150 | end 151 | 152 | 153 | function f_depth_from_model_output() 154 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 155 | return ____get_depth_from_model_output 156 | end 157 | 158 | function ____get_depth_from_model_output(model_output) 159 | return model_output 160 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/hourglass3_softplus.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/relative_depth_negative_cos') 149 | function get_criterion() 150 | print(g_args.w_n) 151 | return nn.relative_depth_negative_cos(g_args.w_n) 152 | end 153 | 154 | 155 | function f_depth_from_model_output() 156 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 157 | return ____get_depth_from_model_output 158 | end 159 | 160 | function ____get_depth_from_model_output(model_output) 161 | return model_output 162 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/hourglass3_softplus_absolute_depth.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/absolute_depth_negative_cos') 149 | function get_criterion() 150 | return nn.absolute_depth_negative_cos(g_args.w_n) 151 | end 152 | 153 | 154 | function f_depth_from_model_output() 155 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 156 | return ____get_depth_from_model_output 157 | end 158 | 159 | function ____get_depth_from_model_output(model_output) 160 | return model_output 161 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/hourglass3_softplus_margin.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/relative_depth_margin_negative_cos') 149 | function get_criterion() 150 | return nn.relative_depth_negative_cos(g_args.w_n, g_args.margin) 151 | end 152 | 153 | 154 | function f_depth_from_model_output() 155 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 156 | return ____get_depth_from_model_output 157 | end 158 | 159 | function ____get_depth_from_model_output(model_output) 160 | return model_output 161 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/hourglass3_softplus_margin_log.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | return model 144 | end 145 | 146 | 147 | require('../criterion/relative_depth_margin_log_negative_cos') 148 | function get_criterion() 149 | return nn.relative_depth_margin_log_negative_cos(g_args.w_n, g_args.margin) 150 | end 151 | 152 | 153 | function f_depth_from_model_output() 154 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 155 | return ____get_depth_from_model_output 156 | end 157 | 158 | function ____get_depth_from_model_output(model_output) 159 | return model_output 160 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/hourglass3_softplus_margin_log_depth_from_normal.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | return model 144 | end 145 | 146 | 147 | require('../criterion/relative_depth_margin_log_normal_depth') 148 | function get_criterion() 149 | return nn.relative_depth_margin_log_normal_depth(g_args.w_n, g_args.margin) 150 | end 151 | 152 | 153 | function f_depth_from_model_output() 154 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 155 | return ____get_depth_from_model_output 156 | end 157 | 158 | function ____get_depth_from_model_output(model_output) 159 | return model_output 160 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/hourglass3_softplus_margin_var.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('layers/inception_new.lua') 3 | 4 | 5 | function get_model() 6 | require 'cudnn' 7 | require 'cunn' 8 | local model = nn.Sequential() 9 | 10 | model:add(cudnn.SpatialConvolution(3,128,7,7,1,1,3,3)) 11 | model:add(nn.SpatialBatchNormalization(128)) 12 | model:add(cudnn.ReLU(true)) 13 | --model:add(nn.SpatialFractionalMaxPooling(2,2,128,128)) 14 | 15 | --model:add(nn.SpatialFractionalMaxPooling(2,2,64,64)) 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- input to _1channels is 256 23 | local _1channels = nn.ConcatTable() 24 | _1channels:add( 25 | nn.Sequential():add( 26 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 27 | ):add( 28 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}})) 29 | ) 30 | _1channels:add( 31 | nn.Sequential():add( 32 | nn.SpatialAveragePooling(2,2,2,2) 33 | ):add( 34 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 35 | ):add( 36 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 37 | ):add( 38 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 39 | ):add( 40 | nn.SpatialUpSamplingNearest(2) -- up to 8x, 256 channel 41 | ) 42 | ) 43 | _1channels = nn.Sequential():add(_1channels):add(nn.CAddTable()) 44 | 45 | 46 | -- input to _2channels is 256 47 | local _2channels = nn.ConcatTable() 48 | _2channels:add( 49 | nn.Sequential():add( 50 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 51 | ):add( 52 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 53 | ) 54 | ) 55 | _2channels:add( 56 | nn.Sequential():add( 57 | nn.SpatialAveragePooling(2,2,2,2) -- 8x 58 | ):add( 59 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 60 | ):add( 61 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 62 | ):add( 63 | _1channels -- down 16x then up to 8x 64 | ):add( 65 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 66 | ):add( 67 | inception(256, {{64}, {3,64,64}, {7,64,64}, {11,64,64}}) 68 | ):add( 69 | nn.SpatialUpSamplingNearest(2) -- up to 4x. 256 channel 70 | ) 71 | ) 72 | _2channels = nn.Sequential():add(_2channels):add(nn.CAddTable()) 73 | 74 | 75 | -- input to _3channels is 128 76 | local _3channels = nn.ConcatTable() 77 | _3channels:add( 78 | nn.Sequential():add( 79 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 4 x 80 | ):add( 81 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 82 | ):add( 83 | inception(128, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) --256 84 | ):add( 85 | _2channels 86 | ):add( 87 | inception(256, {{64}, {3,32,64}, {5,32,64}, {7,32,64}}) 88 | ):add( 89 | inception(256, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 90 | ):add( 91 | nn.SpatialUpSamplingNearest(2)) -- up to 2x , output is 128 channel 92 | ) 93 | 94 | _3channels:add( 95 | nn.Sequential():add( 96 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) --128 97 | ):add( 98 | inception(128, {{32}, {3,64,32}, {7,64,32}, {11,64,32}}) 99 | ) 100 | ) 101 | 102 | _3channels = nn.Sequential():add(_3channels):add(nn.CAddTable()) 103 | 104 | 105 | -- input to _4channels is 128 106 | local _4channels = nn.ConcatTable() 107 | _4channels:add( 108 | nn.Sequential():add( 109 | cudnn.SpatialMaxPooling(2, 2, 2, 2) -- 2 x 110 | ):add( 111 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) 112 | ):add( 113 | inception(128, {{32}, {3,32,32}, {5,32,32}, {7,32,32}}) -- 128 114 | ):add( 115 | _3channels 116 | ):add( 117 | inception(128, {{32}, {3,64,32}, {5,64,32}, {7,64,32}}) 118 | ):add( 119 | inception(128, {{16}, {3,32,16}, {7,32,16}, {11,32,16}}) 120 | ):add( 121 | nn.SpatialUpSamplingNearest(2) -- up to original, 64 channel 122 | ) 123 | 124 | ) 125 | 126 | _4channels:add( 127 | nn.Sequential():add( 128 | inception(128, {{16}, {3,64,16}, {7,64,16}, {11,64,16}}) 129 | --nn.Identity() 130 | ) 131 | ) 132 | 133 | _4channels = nn.Sequential():add(_4channels):add(nn.CAddTable()) 134 | 135 | 136 | model:add(_4channels) 137 | 138 | --Final Output 139 | model:add(cudnn.SpatialConvolution(64,1,3,3,1,1,1,1)); 140 | --Enforce the output depth to be positive or 0 141 | model:add(nn.SoftPlus(true)) 142 | 143 | 144 | return model 145 | end 146 | 147 | 148 | require('../criterion/relative_depth_margin_negative_cos_var') 149 | function get_criterion() 150 | return nn.relative_depth_margin_negative_cos_var(g_args.w_n, g_args.margin, g_args.var_thresh) 151 | end 152 | 153 | 154 | function f_depth_from_model_output() 155 | print(">>>>>>>>>>>>>>>>>>>>>>>>> depth = model_output") 156 | return ____get_depth_from_model_output 157 | end 158 | 159 | function ____get_depth_from_model_output(model_output) 160 | return model_output 161 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/img_coord_to_world_coord.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require('../../common/NYU_params') 3 | 4 | -- -- for debug only 5 | -- local g_input_width = 640 6 | -- local g_input_height = 480 7 | 8 | -- local g_fx_rgb = 5.1885790117450188e+02; 9 | -- local g_fy_rgb = -5.1946961112127485e+02; 10 | -- local g_cx_rgb = 3.2558244941119034e+02; 11 | -- local g_cy_rgb = 2.5373616633400465e+02; 12 | 13 | local img_coord_to_world_coord, parent = torch.class('nn.img_coord_to_world_coord', 'nn.Module') 14 | 15 | 16 | function img_coord_to_world_coord:__init() 17 | parent.__init(self) 18 | self.constant_x = torch.Tensor(g_input_height, g_input_width) -- this should be cuda tensor, maybe 19 | self.constant_y = torch.Tensor(g_input_height, g_input_width) 20 | for y = 1 , g_input_height do -- to test 21 | for x = 1 , g_input_width do 22 | self.constant_x[{y,x}] = (x - g_cx_rgb) / g_fx_rgb 23 | self.constant_y[{y,x}] = (y - g_cy_rgb) / g_fy_rgb 24 | end 25 | end 26 | end 27 | 28 | function img_coord_to_world_coord:updateOutput(input) -- the input is depth map, haven't checked the ouput though 29 | if self.output then 30 | if self.output:type() ~= input:type() then 31 | self.output = self.output:typeAs(input); 32 | end 33 | self.output:resize(input:size(1), 3, input:size(3), input:size(4)) 34 | 35 | if self.constant_x:type() ~= input:type() then 36 | self.constant_x = self.constant_x:typeAs(input); 37 | self.constant_y = self.constant_y:typeAs(input); 38 | end 39 | end 40 | 41 | self.output[{{}, 1, {}}]:copy(input) 42 | self.output[{{}, 2, {}}]:copy(input) 43 | self.output[{{}, 3, {}}]:copy(input) 44 | 45 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 46 | self.output[{batch_idx, 1, {}}]:cmul(self.constant_x) 47 | self.output[{batch_idx, 2, {}}]:cmul(self.constant_y) 48 | end 49 | 50 | return self.output 51 | end 52 | 53 | function img_coord_to_world_coord:updateGradInput(input, gradOutput) 54 | if self.gradInput then 55 | if self.gradInput:type() ~= input:type() then 56 | self.gradInput = self.gradInput:typeAs(input); 57 | end 58 | self.gradInput:resizeAs(input) 59 | self.gradInput:zero() 60 | end 61 | 62 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 63 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 1, {}}], self.constant_x) 64 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 2, {}}], self.constant_y) 65 | self.gradInput[{batch_idx, {}}]:add(gradOutput[{batch_idx, 3, {}}]) 66 | end 67 | 68 | return self.gradInput 69 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/img_coord_to_world_coord_focal.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require('../../common/NYU_params') 3 | 4 | -- -- for debug only 5 | -- local g_input_width = 10 6 | -- local g_input_height = 20 7 | 8 | 9 | 10 | 11 | 12 | 13 | local img_coord_to_world_coord_focal, parent = torch.class('nn.img_coord_to_world_coord_focal', 'nn.Module') 14 | 15 | 16 | function img_coord_to_world_coord_focal:__init() 17 | parent.__init(self) 18 | self.gradInput = {torch.Tensor(), torch.Tensor()} 19 | 20 | local _cx_rgb = g_input_width / 2 21 | local _cy_rgb = g_input_height / 2 22 | 23 | self.constant_x = torch.Tensor(g_input_height, g_input_width) -- this should be cuda tensor, maybe 24 | self.constant_y = torch.Tensor(g_input_height, g_input_width) 25 | for y = 1 , g_input_height do 26 | for x = 1 , g_input_width do 27 | self.constant_x[{y,x}] = (x - _cx_rgb) 28 | self.constant_y[{y,x}] = -(y - _cy_rgb) -- VERY IMPORTANT! The negative sign!!!!!!!!! to test 29 | end 30 | end 31 | end 32 | 33 | function img_coord_to_world_coord_focal:updateOutput(input) 34 | -- The input is an array. 35 | -- 1. The first component is the depth map. A 4D tensor. 36 | -- 2. The second component is the predicted focal length. A 2D tensor. The first dimension is the number of batch. -- to do. 37 | -- The ouput is a N x 3 x H x W tensor 38 | 39 | if self.output then 40 | if self.output:type() ~= input[1]:type() then 41 | self.output = self.output:typeAs(input[1]); 42 | end 43 | self.output:resize(input[1]:size(1), 3, input[1]:size(3), input[1]:size(4)) 44 | 45 | if self.constant_x:type() ~= input[1]:type() then 46 | self.constant_x = self.constant_x:typeAs(input[1]); 47 | self.constant_y = self.constant_y:typeAs(input[1]); 48 | end 49 | end 50 | assert(input[1]:size(1) == input[2]:size(1)) 51 | 52 | self.output[{{}, 1, {}}]:copy(input[1]) 53 | self.output[{{}, 2, {}}]:copy(input[1]) 54 | self.output[{{}, 3, {}}]:copy(input[1]) 55 | 56 | for batch_idx = 1 , input[1]:size(1) do -- this might not be the fastest way to do it 57 | self.output[{batch_idx, 1, {}}]:cmul(self.constant_x) 58 | self.output[{batch_idx, 2, {}}]:cmul(self.constant_y) 59 | 60 | self.output[{batch_idx, {1,2}, {}}]:div(input[2][{batch_idx,1}]) -- divided by the predicted focal length 61 | end 62 | 63 | return self.output 64 | end 65 | 66 | function img_coord_to_world_coord_focal:updateGradInput(input, gradOutput) 67 | if self.gradInput then 68 | if self.gradInput[1]:type() ~= input[1]:type() then 69 | self.gradInput[1] = self.gradInput[1]:typeAs(input[1]); 70 | end 71 | 72 | if self.gradInput[2]:type() ~= input[2]:type() then 73 | self.gradInput[2] = self.gradInput[2]:typeAs(input[2]); 74 | end 75 | 76 | self.gradInput[1]:resizeAs(input[1]) 77 | self.gradInput[2]:resizeAs(input[2]) 78 | self.gradInput[1]:zero() 79 | self.gradInput[2]:zero() 80 | end 81 | 82 | 83 | local buffer = self.output:clone() 84 | buffer:cmul(gradOutput) 85 | for batch_idx = 1 , input[1]:size(1) do -- this might not be the fastest way to do it -- to do 86 | -- depth 87 | self.gradInput[1][{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 1, {}}], self.constant_x) 88 | self.gradInput[1][{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 2, {}}], self.constant_y) 89 | self.gradInput[1][{batch_idx, {}}]:div(input[2][{batch_idx,1}]) 90 | 91 | self.gradInput[1][{batch_idx, {}}]:add(gradOutput[{batch_idx, 3, {}}]) 92 | 93 | -- focal length 94 | self.gradInput[2][{batch_idx,1}] = torch.sum(buffer[{batch_idx,{1,2}}]:div(-input[2][{batch_idx,1}])) 95 | end 96 | 97 | return self.gradInput 98 | end 99 | 100 | 101 | -------------------------------------------------------------------------------- /src/experiment_NYU/models/img_coord_to_world_coord_multi_res.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require('../../common/NYU_params') 3 | 4 | -- -- for debug only 5 | -- local g_input_width = 640 6 | -- local g_input_height = 480 7 | 8 | -- local g_fx_rgb = 5.1885790117450188e+02; 9 | -- local g_fy_rgb = -5.1946961112127485e+02; 10 | -- local g_cx_rgb = 3.2558244941119034e+02; 11 | -- local g_cy_rgb = 2.5373616633400465e+02; 12 | 13 | local img_coord_to_world_coord_multi_res, parent = torch.class('nn.img_coord_to_world_coord_multi_res', 'nn.Module') 14 | 15 | 16 | function img_coord_to_world_coord_multi_res:__init(scale) 17 | local width = g_input_width / scale 18 | local height = g_input_height / scale 19 | local _cx_rgb = g_cx_rgb / scale 20 | local _cy_rgb = g_cy_rgb / scale 21 | local _fx_rgb = g_fx_rgb / scale 22 | local _fy_rgb = g_fy_rgb / scale 23 | 24 | parent.__init(self) 25 | self.constant_x = torch.Tensor(height, width) -- this should be cuda tensor, maybe 26 | self.constant_y = torch.Tensor(height, width) 27 | for y = 1 , height do -- to test 28 | for x = 1 , width do 29 | self.constant_x[{y,x}] = (x - _cx_rgb) / _fx_rgb 30 | self.constant_y[{y,x}] = (y - _cy_rgb) / _fy_rgb 31 | end 32 | end 33 | end 34 | 35 | function img_coord_to_world_coord_multi_res:updateOutput(input) -- the input is depth map, haven't checked the ouput though 36 | if self.output then 37 | if self.output:type() ~= input:type() then 38 | self.output = self.output:typeAs(input); 39 | end 40 | self.output:resize(input:size(1), 3, input:size(3), input:size(4)) 41 | 42 | if self.constant_x:type() ~= input:type() then 43 | self.constant_x = self.constant_x:typeAs(input); 44 | self.constant_y = self.constant_y:typeAs(input); 45 | end 46 | end 47 | 48 | self.output[{{}, 1, {}}]:copy(input) 49 | self.output[{{}, 2, {}}]:copy(input) 50 | self.output[{{}, 3, {}}]:copy(input) 51 | 52 | 53 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 54 | self.output[{batch_idx, 1, {}}]:cmul(self.constant_x) 55 | self.output[{batch_idx, 2, {}}]:cmul(self.constant_y) 56 | end 57 | 58 | return self.output 59 | end 60 | 61 | function img_coord_to_world_coord_multi_res:updateGradInput(input, gradOutput) 62 | if self.gradInput then 63 | if self.gradInput:type() ~= input:type() then 64 | self.gradInput = self.gradInput:typeAs(input); 65 | end 66 | self.gradInput:resizeAs(input) 67 | self.gradInput:zero() 68 | end 69 | 70 | for batch_idx = 1 , input:size(1) do -- this might not be the fastest way to do it 71 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 1, {}}], self.constant_x) 72 | self.gradInput[{batch_idx, {}}]:addcmul(gradOutput[{batch_idx, 2, {}}], self.constant_y) 73 | self.gradInput[{batch_idx, {}}]:add(gradOutput[{batch_idx, 3, {}}]) 74 | end 75 | 76 | return self.gradInput 77 | end -------------------------------------------------------------------------------- /src/experiment_NYU/models/layers/Residual.lua: -------------------------------------------------------------------------------- 1 | local conv = cudnn.SpatialConvolution 2 | local batchnorm = nn.SpatialBatchNormalization 3 | local relu = cudnn.ReLU 4 | 5 | -- Main convolutional block 6 | local function convBlock(numIn,numOut) 7 | return nn.Sequential() 8 | :add(batchnorm(numIn)) 9 | :add(relu(true)) 10 | :add(conv(numIn,numOut/2,1,1)) 11 | :add(batchnorm(numOut/2)) 12 | :add(relu(true)) 13 | :add(conv(numOut/2,numOut/2,3,3,1,1,1,1)) 14 | :add(batchnorm(numOut/2)) 15 | :add(relu(true)) 16 | :add(conv(numOut/2,numOut,1,1)) 17 | end 18 | 19 | -- Skip layer 20 | local function skipLayer(numIn,numOut) 21 | if numIn == numOut then 22 | return nn.Identity() 23 | else 24 | return nn.Sequential() 25 | :add(conv(numIn,numOut,1,1)) 26 | end 27 | end 28 | 29 | -- Residual block 30 | function Residual(numIn,numOut) 31 | return nn.Sequential() 32 | :add(nn.ConcatTable() 33 | :add(convBlock(numIn,numOut)) 34 | :add(skipLayer(numIn,numOut))) 35 | :add(nn.CAddTable(true)) 36 | end 37 | 38 | -------------------------------------------------------------------------------- /src/experiment_NYU/models/layers/inception_new.lua: -------------------------------------------------------------------------------- 1 | 2 | function inception(input_size, config) -- activations: input_resolution * (config[1][1] + (#config - 1) * (out_a + out_b)) 3 | 4 | local concat = nn.Concat(2) 5 | 6 | -- Base 1 x 1 conv layer 7 | local conv = nn.Sequential() 8 | conv:add(cudnn.SpatialConvolution(input_size,config[1][1],1,1)) 9 | conv:add(nn.SpatialBatchNormalization(config[1][1], nil, nil, false)) 10 | conv:add(cudnn.ReLU(true)) -- input_R * config[1][1] * N 11 | concat:add(conv) 12 | 13 | -- Additional layers 14 | local num_conv = table.getn(config) 15 | for i = 2,num_conv do 16 | conv = nn.Sequential() 17 | local filt = config[i][1] 18 | local pad = (filt - 1) / 2 19 | local out_a = config[i][2] 20 | local out_b = config[i][3] 21 | -- Reduction 22 | conv:add(cudnn.SpatialConvolution(input_size,out_a,1,1)) 23 | conv:add(nn.SpatialBatchNormalization(out_a,nil,nil,false)) 24 | conv:add(cudnn.ReLU(true)) -- input_R * out_a * N 25 | -- Spatial Convolution 26 | conv:add(cudnn.SpatialConvolution(out_a,out_b,filt,filt,1,1,pad,pad)) 27 | conv:add(nn.SpatialBatchNormalization(out_b,nil,nil,false)) 28 | conv:add(cudnn.ReLU(true)) -- input_R * out_b * N 29 | concat:add(conv) 30 | end 31 | 32 | return concat 33 | 34 | end 35 | 36 | -------------------------------------------------------------------------------- /src/experiment_NYU/models/normal_neg_loss_fast.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | 3 | local normal_negative_cos_fast, parent = torch.class('nn.normal_negative_cos_fast', 'nn.Criterion') 4 | 5 | 6 | function normal_negative_cos_fast:__init() 7 | parent.__init(self) 8 | self.buffer = torch.Tensor() 9 | end 10 | 11 | 12 | 13 | function normal_negative_cos_fast:updateOutput(input, target) 14 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 15 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 16 | 17 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 18 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 19 | 20 | -- the loss is the negative cos(angle) 21 | 22 | local n_point_total = input:size(1) * input:size(3) * input:size(4) 23 | 24 | self.output = - torch.sum( torch.cmul(input, target) ) -- dot product of normals , seems quite expensive move 25 | 26 | return self.output / n_point_total 27 | end 28 | 29 | 30 | 31 | function normal_negative_cos_fast:updateGradInput(input, target) 32 | -- The input is 4D tensor, [batchSize, 3, height, width], and represents the normal maps 33 | -- The 1st channle is the x component, 2nd is the y component, 3rd is the z component!! 34 | 35 | -- The target is a table of the form defined in DataLoader.lua, with 3 components {x, y, normal}. Each of the 3 components is a tensor 36 | -- We assume that the input normal has all been normalized to be unit vector!!!!! 37 | 38 | -- the loss is the negative cos(angle) 39 | 40 | 41 | 42 | -- pre-allocate memory and reset gradient to 0 43 | if self.gradInput then 44 | local nElement = self.gradInput:nElement() 45 | if self.gradInput:type() ~= input:type() then 46 | self.gradInput = self.gradInput:typeAs(input); 47 | end 48 | self.gradInput:resizeAs(input) 49 | end 50 | 51 | self.gradInput:zero() 52 | 53 | 54 | 55 | local n_point_total = input:size(1) * input:size(3) * input:size(4) 56 | 57 | self.gradInput:copy(target) 58 | 59 | return self.gradInput:div( -n_point_total ) 60 | end -------------------------------------------------------------------------------- /src/experiment_NYU/validation_crit/validate_crit_NULL.lua: -------------------------------------------------------------------------------- 1 | require 'image' 2 | 3 | 4 | function evaluate( data_loader, model, criterion, max_n_sample ) 5 | --Return the relative depth loss per point pair, ERROR ratio(WKDR), the average normal loss, and the average angle difference between predicted and ground-truth normal 6 | return 0, 0, 0, 0, 0, 0, 0 7 | end --------------------------------------------------------------------------------