├── .gitignore ├── README.md ├── examples ├── example.py ├── sculpture1.png └── sculpture2.png └── python ├── __init__.py ├── blocks.py ├── demon_networks.py ├── demon_operators.py ├── grad_test.py ├── operators.py ├── utils_diff.py └── utils_pytorch.py /.gitignore: -------------------------------------------------------------------------------- 1 | #files 2 | 3 | # data 4 | test 5 | 6 | # ignore readme.txt 7 | readme.txt 8 | 9 | # pycharm file 10 | .idea 11 | 12 | # python file 13 | *.pyc 14 | 15 | # temp file 16 | *~ 17 | 18 | # init model 19 | *.pt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeMoN with PyTorch 2 | 3 | Data is available on [google drive](https://drive.google.com/drive/folders/0B8-9V4y1N7pxU25veV9aSlA4VGs?usp=sharing). 4 | -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | from matplotlib import pyplot as plt 4 | import os 5 | import sys 6 | 7 | from torch.autograd import Variable 8 | 9 | from python.demon_networks import * 10 | from util import load_parameters 11 | 12 | examples_dir = os.path.dirname(__file__) 13 | weights_dir = os.path.join(examples_dir,'..','weights') 14 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python')) 15 | 16 | def prepare_input_data(img1, img2, data_format): 17 | """Creates the arrays used as input from the two images.""" 18 | # scale images if necessary 19 | if img1.size[0] != 256 or img1.size[1] != 192: 20 | img1 = img1.resize((256,192)) 21 | if img2.size[0] != 256 or img2.size[1] != 192: 22 | img2 = img2.resize((256,192)) 23 | img2_2 = img2.resize((64,48)) 24 | 25 | # transform range from [0,255] to [-0.5,0.5] 26 | img1_arr = np.array(img1).astype(np.float32)/255 -0.5 27 | img2_arr = np.array(img2).astype(np.float32)/255 -0.5 28 | img2_2_arr = np.array(img2_2).astype(np.float32)/255 -0.5 29 | 30 | if data_format == 'channels_first': 31 | img1_arr = img1_arr.transpose([2,0,1]) 32 | img2_arr = img2_arr.transpose([2,0,1]) 33 | img2_2_arr = img2_2_arr.transpose([2,0,1]) 34 | image_pair = np.concatenate((img1_arr,img2_arr), axis=0) 35 | else: 36 | image_pair = np.concatenate((img1_arr,img2_arr),axis=-1) 37 | 38 | result = { 39 | 'image_pair': image_pair[np.newaxis,:], 40 | 'image1': img1_arr[np.newaxis,:], # first image 41 | 'image2_2': img2_2_arr[np.newaxis,:], # second image with (w=64,h=48) 42 | } 43 | return result 44 | 45 | # 46 | # DeMoN has been trained for specific internal camera parameters. 47 | # 48 | # If you use your own images try to adapt the intrinsics by cropping 49 | # to match the following normalized intrinsics: 50 | # 51 | # K = (0.89115971 0 0.5) 52 | # (0 1.18821287 0.5) 53 | # (0 0 1 ), 54 | # where K(1,1), K(2,2) are the focal lengths for x and y direction. 55 | # and (K(1,3), K(2,3)) is the principal point. 56 | # The parameters are normalized such that the image height and width is 1. 57 | # 58 | 59 | K = [[0.89115971, 0, 0.5], 60 | [0, 1.18821287, 0.5], 61 | [0, 0, 1]] 62 | intrinsics = Variable(torch.Tensor( K ), requires_grad=False) 63 | 64 | # read data 65 | img1 = Image.open(os.path.join(examples_dir,'sculpture1.png')) 66 | img2 = Image.open(os.path.join(examples_dir,'sculpture2.png')) 67 | 68 | input_data = prepare_input_data(img1,img2,'channels_first') 69 | 70 | """ the whole network """ 71 | bootstrap_net = BootstrapNet() 72 | iterative_net = IterativeNet() 73 | refinement_net = RefinementNet() 74 | 75 | """load parameters""" 76 | bootstrap_net.load_state_dict(torch.load('./bootstrap_net.pt')) 77 | iterative_net.load_state_dict(torch.load('./iterative_net.pt')) 78 | refinement_net.load_state_dict(torch.load('./refinement_net.pt')) 79 | 80 | # run the bootstrap net and 3 times iterative net 81 | img_pair = Variable( torch.FloatTensor(input_data['image_pair']), requires_grad=False ) 82 | img2 = Variable( torch.FloatTensor(input_data['image2_2']), requires_grad=False ) 83 | 84 | result = bootstrap_net(img_pair, img2) 85 | 86 | for i in range(3): 87 | result = iterative_net( 88 | img_pair, 89 | img2, 90 | intrinsics, 91 | result 92 | ) 93 | 94 | # run refinemnt net to refine and increase depth map resolution 95 | result = refinement_net(input_data['image1'], result['depth']) -------------------------------------------------------------------------------- /examples/sculpture1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvfish/pytorch_demon/dd01350537e9469502dc5a889a8a430bd9b3356a/examples/sculpture1.png -------------------------------------------------------------------------------- /examples/sculpture2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvfish/pytorch_demon/dd01350537e9469502dc5a889a8a430bd9b3356a/examples/sculpture2.png -------------------------------------------------------------------------------- /python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvfish/pytorch_demon/dd01350537e9469502dc5a889a8a430bd9b3356a/python/__init__.py -------------------------------------------------------------------------------- /python/blocks.py: -------------------------------------------------------------------------------- 1 | """" 2 | pytorch implementation of DeMoN - Depth and Motion Network 3 | 4 | this file provides the implementation of basic blocks used in DeMoN architecture, 5 | 6 | including FlowBlock, DepthMotBlock, RefinementBlock 7 | 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | from demon_operators import WarpImageLayer 14 | from demon_operators import DepthToFlowLayer 15 | from demon_operators import FlowToDepthLayer 16 | 17 | def convrelu2_block( num_inputs, num_outputs , kernel_size, stride, leaky_coef ): 18 | 19 | """ 20 | :param num_inputs: number of input channels 21 | :param num_outputs: number of output channels 22 | :param kernel_size: kernel size 23 | :param stride: stride 24 | :param leaky_coef: leaky ReLU coefficients 25 | :return: 2x(Conv + ReLU) block 26 | """ 27 | 28 | """ this block does two 1D convolutions, first on row, then on column """ 29 | 30 | input = num_inputs; output = num_outputs 31 | k = kernel_size; lc = leaky_coef 32 | 33 | if( not isinstance(stride, tuple)): 34 | s = (stride, stride) 35 | else: 36 | s = stride 37 | 38 | conv1_1 = nn.Conv2d( input, output[0], (k[0], 1), padding=(k[0] // 2, 0), stride=(s[0], 1) ) 39 | leaky_relu1_1 = nn.LeakyReLU( lc ) 40 | 41 | conv1_2 = nn.Conv2d( output[0], output[1], (1, k[1]), padding=(0, k[1] // 2), stride=(1, s[1]) ) 42 | leaky_relu1_2 = nn.LeakyReLU( lc ) 43 | 44 | return nn.Sequential( 45 | conv1_1, 46 | leaky_relu1_1, 47 | conv1_2, 48 | leaky_relu1_2 49 | ) 50 | 51 | def convrelu_block( num_inputs, num_outputs, kernel_size, stride, leaky_coef ): 52 | 53 | """ 54 | :param num_inputs: number of input channels 55 | :param num_outputs: number of output channels 56 | :param kernel_size: kernel size 57 | :param stride: stride 58 | :param leaky_coef: leaky ReLU coefficients 59 | :return: (Conv + ReLU) block 60 | """ 61 | 62 | """ this block does one 2D convolutions """ 63 | 64 | input = num_inputs; output = num_outputs 65 | k = kernel_size; lc = leaky_coef 66 | 67 | if( not isinstance(stride, tuple)): 68 | s = (stride, stride) 69 | else: 70 | s = stride 71 | 72 | conv1_1 = nn.Conv2d(input, output, k, padding=(k[0] // 2, k[1] // 2), stride=s ) 73 | leaky_relu1_1 = nn.LeakyReLU(lc) 74 | 75 | return nn.Sequential( 76 | conv1_1, 77 | leaky_relu1_1 78 | ) 79 | 80 | def predict_flow_block( num_inputs, num_outputs=4, intermediate_num_outputs=24): 81 | """ 82 | :param num_inputs: number of input channels 83 | :param predict_confidence: predict confidence or not 84 | :return: block for predicting flow 85 | """ 86 | 87 | """" 88 | this block is --> (Conv+ReLU) --> Conv --> , 89 | 90 | in the first prediction, input is 512 x 8 x 6, 91 | in the second prediction, input is 128 x 64 x 48 92 | 93 | """ 94 | 95 | conv1 = convrelu_block( num_inputs, intermediate_num_outputs, (3, 3), 1, 0.1) 96 | conv2 = nn.Conv2d( intermediate_num_outputs, num_outputs, (3, 3), padding=(1, 1), stride=1) 97 | 98 | return nn.Sequential( 99 | conv1, 100 | conv2 101 | ) 102 | 103 | def predict_motion_block( num_inputs , leaky_coef = 0.1): 104 | 105 | """ 106 | :param num_inputs: number of input channels 107 | :return: rotation, translation and scale 108 | """ 109 | 110 | """ 111 | this block is --> (Conv+ReLU) --> (FC+ReLU) --> (FC+ReLU) --> (FC+ReLU) -->, 112 | the output is rotation, translation and scale 113 | """ 114 | 115 | conv1 = convrelu_block( num_inputs, 128, (3, 3), 1, 0.1) 116 | 117 | fc1 = nn.Linear(128*8*6, 1024) 118 | fc2 = nn.Linear(1024, 128) 119 | fc3 = nn.Linear(128, 7) 120 | 121 | leaky_relu1 = nn.LeakyReLU(leaky_coef) 122 | leaky_relu2 = nn.LeakyReLU(leaky_coef) 123 | 124 | return conv1, \ 125 | nn.Sequential( 126 | fc1, 127 | leaky_relu1, 128 | fc2, 129 | leaky_relu2, 130 | fc3) 131 | 132 | class FlowBlock(nn.Module): 133 | 134 | def __init__(self, use_prev_predictions = False): 135 | 136 | super(FlowBlock, self).__init__() 137 | 138 | # self.conv1_1 = nn.Conv2d(6, 32, (9, 1), padding=(4, 0), stride=(2, 1) ) 139 | # self.leaky_relu1_1 = nn.LeakyReLU(0.1) 140 | # 141 | # self.conv1_2 = nn.Conv2d(32, 32, (1, 9), padding=(0, 4), stride=(1, 2) ) 142 | # self.leaky_relu1_2 = nn.LeakyReLU(0.1) 143 | # 144 | # self.conv1 = nn.Sequential( 145 | # self.conv1_1, 146 | # self.leaky_relu1_1, 147 | # self.conv1_2, 148 | # self.leaky_relu1_2) 149 | 150 | 151 | self.conv1 = convrelu2_block(6, (32, 32), (9, 9), 2, 0.1) 152 | 153 | if(not use_prev_predictions): 154 | self.conv2 = convrelu2_block(32, (64, 64), (7, 7), 2, 0.1) 155 | self.conv2_1 = convrelu2_block(64, (64, 64), (3, 3), 1, 0.1) 156 | else: 157 | """ in this case we also use the information from previous depth prediction """ 158 | self.warp_image = WarpImageLayer() 159 | self.depth_to_flow = DepthToFlowLayer(normalized_K=True) 160 | 161 | self.conv2 = convrelu2_block(32, (32, 32), (7, 7), 2, 0.1) 162 | self.conv2_extra_inputs = convrelu2_block(9, (32,32), (3, 3), 1, 0.1) 163 | self.conv2_1 = convrelu2_block(64, (64, 64), (3, 3), 1, 0.1) 164 | 165 | 166 | self.conv3 = convrelu2_block(64, (128,128), (5,5), 2, 0.1) 167 | self.conv3_1 = convrelu2_block(128, (128, 128), (3,3), 1, 0.1) 168 | 169 | self.conv4 = convrelu2_block(128, (256, 256), (5,5), 2, 0.1) 170 | self.conv4_1 = convrelu2_block(256, (256, 256), (3,3), 1, 0.1) 171 | 172 | 173 | """for conv5 layer, there is a mistake in the figure of demon paper, kernel size should be 5, not 3""" 174 | self.conv5 = convrelu2_block(256,(512, 512), (5,5), 2, 0.1) 175 | self.conv5_1 = convrelu2_block(512,(512, 512), (3,3), 1, 0.1) 176 | 177 | 178 | """five groups of convolution layers are finished""" 179 | 180 | self.flow1 = predict_flow_block(512, num_outputs=4) 181 | self.flow1_upconv = nn.ConvTranspose2d( 4, 2, (4,4), stride=(2,2), padding=1 ) 182 | 183 | self.upconv1 = nn.Sequential( 184 | nn.ConvTranspose2d( 512, 256, (4,4), stride=(2,2), padding=1), 185 | nn.LeakyReLU(0.1)) 186 | 187 | self.upconv2 = nn.Sequential( 188 | nn.ConvTranspose2d( 514, 128, (4,4), stride=(2,2), padding=1), 189 | nn.LeakyReLU(0.1)) 190 | 191 | self.upconv3 = nn.Sequential( 192 | nn.ConvTranspose2d( 256, 64, (4,4), stride=(2,2), padding=1), 193 | nn.LeakyReLU(0.1)) 194 | 195 | self.flow2 = predict_flow_block(128, num_outputs=4) 196 | 197 | 198 | def forward(self, image_pair, image2_2 = None, intrinsics = None, prev_predictions = None): 199 | """ 200 | image_pair: Tensor 201 | Image pair concatenated along the channel axis. 202 | 203 | image2_2: Tensor 204 | Second image at resolution level 2 (downsampled two times) 205 | 206 | intrinsics: Tensor 207 | The normalized intrinsic parameters 208 | 209 | prev_predictions: dict of Tensor 210 | Predictions from the previous depth block 211 | """ 212 | 213 | conv1 = self.conv1(image_pair) 214 | conv2 = self.conv2(conv1) 215 | 216 | if(prev_predictions == None): 217 | conv2_1 = self.conv2_1( conv2 ) 218 | else: 219 | depth = prev_predictions['depth'] 220 | normal = prev_predictions['normal'] 221 | rotation = prev_predictions['rotation'] 222 | translation = prev_predictions['translation'] 223 | 224 | flow = self.depth_to_flow(intrinsics, 225 | intrinsics, 226 | depth, 227 | rotation, 228 | translation) 229 | 230 | warped_im = self.warp_image(image2_2, flow) 231 | combined = torch.cat((warped_im, flow, depth, normal), 1) 232 | 233 | """use torch.cat to concatenate tensors""" 234 | extra = self.conv2_extra_inputs( combined ) 235 | conv2_1 = self.conv2_1( torch.cat((conv2, extra), 1) ) 236 | 237 | conv3 = self.conv3( conv2_1 ) 238 | conv3_1 = self.conv3_1( conv3 ) 239 | conv4 = self.conv4( conv3_1 ) 240 | conv4_1 = self.conv4_1( conv4 ) 241 | conv5 = self.conv5( conv4_1 ) 242 | conv5_1 = self.conv5_1( conv5 ) 243 | 244 | upconv1 = self.upconv1(conv5_1) 245 | flow1 = self.flow1(conv5_1) 246 | flow1_upconv = self.flow1_upconv(flow1) 247 | 248 | """ concatenation along the channel axis """ 249 | upconv2 = self.upconv2( torch.cat( (upconv1, conv4_1, flow1_upconv), 1 ) ) 250 | upconv3 = self.upconv3( torch.cat( (upconv2, conv3_1), 1 ) ) 251 | flow2 = self.flow2( torch.cat( (upconv3, conv2_1), 1) ) 252 | 253 | """flow2 combines flow and flow confidence""" 254 | 255 | return flow2 256 | 257 | """" 258 | DepthMotionBlock is very similar to FlowBlock, probably we should just write a general one 259 | """ 260 | # class DepthMotionBlock(nn.Module): 261 | class DepthMotionBlock(nn.Module): 262 | 263 | def __init__(self, use_prev_depthmotion = False): 264 | 265 | super(DepthMotionBlock, self).__init__() 266 | 267 | self.conv1 = convrelu2_block(6, (32,32), (9,9), 2, 0.1) 268 | self.conv2 = convrelu2_block(32,(32,32), (7,7), 2, 0.1) 269 | 270 | self.warp_image = WarpImageLayer() 271 | 272 | if(use_prev_depthmotion): 273 | self.conv2_extra_inputs = convrelu2_block(8, (32, 32), (3,3), 1, 0.1) 274 | self.flow_to_depth = FlowToDepthLayer(normalized_K=True) 275 | else: 276 | self.conv2_extra_inputs = convrelu2_block(7, (32, 32), (3,3), 1, 0.1) 277 | 278 | self.conv2_1 = convrelu2_block(64, (64, 64), (3,3), 1, 0.1) 279 | 280 | self.conv3 = convrelu2_block(64, (128,128), (5,5), 2, 0.1) 281 | self.conv3_1 = convrelu2_block(128, (128, 128), (3,3), 1, 0.1) 282 | 283 | self.conv4 = convrelu2_block(128, (256, 256), (5,5), 2, 0.1) 284 | self.conv4_1 = convrelu2_block(256, (256, 256), (3,3), 1, 0.1) 285 | 286 | """note that conv5 layer is different from FlowBlock, here the kernel size is 3""" 287 | self.conv5 = convrelu2_block(256,(512, 512), (3,3), 2, 0.1) 288 | self.conv5_1 = convrelu2_block(512,(512, 512), (3,3), 1, 0.1) 289 | 290 | self.motion_conv, self.motion_fc = predict_motion_block(512) 291 | 292 | """depth_normal predictione use the same architecture as predict_flow_block """ 293 | self.depth_normal = predict_flow_block(128, num_outputs=4) 294 | 295 | self.upconv1 = nn.Sequential( 296 | nn.ConvTranspose2d( 512, 256, (4,4), stride=(2,2), padding=1 ), 297 | nn.LeakyReLU(0.1)) 298 | 299 | self.upconv2 = nn.Sequential( 300 | nn.ConvTranspose2d( 512, 128, (4,4), stride=(2,2), padding=1 ), 301 | nn.LeakyReLU(0.1)) 302 | 303 | self.upconv3 = nn.Sequential( 304 | nn.ConvTranspose2d( 256, 64, (4,4), stride=(2,2), padding=1 ), 305 | nn.LeakyReLU(0.1)) 306 | 307 | def forward(self, image_pair, image2_2, prev_flow2, prev_flowconf2, 308 | prev_predictions = None, intrinsics = None): 309 | 310 | """ 311 | image_pair: Tensor 312 | Image pair concatenated along the channel axis. 313 | 314 | image2_2: Tensor 315 | Second image at resolution level 2 (downsampled two times) 316 | 317 | prev_flow2: Tensor 318 | The output of the flow network. Contains only the flow (2 channels) 319 | 320 | prev_flowconf2: Tensor 321 | The output of the flow network. Contains flow and flow confidence (4 channels) 322 | 323 | prev_rotation: Tensor 324 | The previously predicted rotation. 325 | 326 | prev_translation: Tensor 327 | The previously predicted translation. 328 | 329 | intrinsics: Tensor 330 | The normalized intrinsic parameters 331 | 332 | """ 333 | 334 | conv1 = self.conv1(image_pair) 335 | conv2 = self.conv2(conv1) 336 | 337 | """warp 2nd image""" 338 | warped_im = self.warp_image(image2_2, prev_flow2) 339 | 340 | if(prev_predictions == None): 341 | combined = torch.cat( (warped_im, prev_flowconf2), 1 ) 342 | else: 343 | prev_rotation = prev_predictions['rotation'] 344 | prev_translation = prev_predictions['translation'] 345 | depth = self.flow_to_depth(intrinsics, intrinsics, prev_flow2, prev_rotation, prev_translation) 346 | 347 | combined = torch.cat((warped_im, prev_flowconf2, depth), 1) 348 | 349 | # """ testing """ 350 | # combined = torch.cat((warped_im, prev_flowconf2, warped_im[0:1,0:1,:,:]), 1) 351 | 352 | extra = self.conv2_extra_inputs( combined ) 353 | conv2_1 = self.conv2_1( torch.cat((conv2, extra),1) ) 354 | 355 | conv3 = self.conv3( conv2_1 ) 356 | conv3_1 = self.conv3_1( conv3 ) 357 | conv4 = self.conv4( conv3_1 ) 358 | conv4_1 = self.conv4_1( conv4 ) 359 | conv5 = self.conv5( conv4_1 ) 360 | conv5_1 = self.conv5_1( conv5 ) 361 | 362 | upconv1 = self.upconv1(conv5_1) 363 | upconv2 = self.upconv2( torch.cat((upconv1, conv4_1), 1) ) 364 | upconv3 = self.upconv3( torch.cat((upconv2, conv3_1), 1) ) 365 | 366 | depth_normal = self.depth_normal( torch.cat((upconv3, conv2_1), 1) ) 367 | motion_conv = self.motion_conv(conv5_1) 368 | 369 | motion = self.motion_fc( 370 | motion_conv.view( 371 | motion_conv.size(0), 372 | 128 * 6 * 8 373 | )) 374 | 375 | scale = motion[:,6] 376 | rotation = motion[:,0:3] 377 | translation = motion[:,3:6] 378 | 379 | # batch_size = scale.size(0) 380 | 381 | # depth = depth_normal[:, 0:1, :, :] * scale.expand_as( depth_normal[:, 0:1, :, :] ) 382 | depth = depth_normal[:, 0:1, :, :] * scale.view(-1, 1, 1, 1) 383 | 384 | normal = depth_normal[:, 1:4, :, :] 385 | 386 | predictions = { 387 | 'depth': depth, 388 | 'normal': normal, 389 | 'rotation': rotation, 390 | 'translation': translation, 391 | } 392 | 393 | return predictions 394 | 395 | """ 396 | Refinement Block 397 | """ 398 | class RefinementBlock(nn.Module): 399 | 400 | def __init__(self): 401 | 402 | super(RefinementBlock, self).__init__() 403 | 404 | self.conv0 = convrelu_block(4, 32, (3,3), (1,1), 0.1) 405 | self.conv1 = convrelu_block(32, 64, (3,3), (2,2), 0.1) 406 | self.conv1_1 = convrelu_block(64, 64, (3,3), (1,1), 0.1) 407 | 408 | self.conv2 = convrelu_block(64, 128, (3,3), (2,2), 0.1) 409 | self.conv2_1 = convrelu_block(128, 128, (3,3), (1,1), 0.1) 410 | 411 | self.upconv1 = nn.Sequential( 412 | nn.ConvTranspose2d(128, 64, (4,4), stride=(2,2), padding=1), 413 | nn.LeakyReLU(0.1) 414 | ) 415 | 416 | self.upconv2 = nn.Sequential( 417 | nn.ConvTranspose2d(128, 32, (4,4), stride=(2,2), padding=1), 418 | nn.LeakyReLU(0.1) 419 | ) 420 | 421 | self.depth_refine = predict_flow_block(64, num_outputs=1, intermediate_num_outputs=16) 422 | 423 | def forward(self, image1, depth): 424 | 425 | """ 426 | :param image1: 427 | :param depth: 428 | :return: 429 | """ 430 | 431 | """ 432 | fix me, update upsampling 433 | """ 434 | W = image1.shape[-1] 435 | H = image1.shape[-2] 436 | 437 | up_sample = nn.Upsample(size=(H, W), mode='nearest') 438 | depth_upsampled = up_sample(depth) 439 | 440 | input = torch.cat( 441 | ( 442 | torch.autograd.Variable( 443 | torch.from_numpy(image1), 444 | requires_grad = False), 445 | depth_upsampled), 446 | 1) 447 | 448 | conv0 = self.conv0(input) 449 | conv1 = self.conv1(conv0) 450 | conv1_1 = self.conv1_1(conv1) 451 | 452 | conv2 = self.conv2(conv1_1) 453 | conv2_1 = self.conv2_1(conv2) 454 | 455 | upconv1 = self.upconv1(conv2_1) 456 | upconv2 = self.upconv2( torch.cat((upconv1, conv1_1), 1) ) 457 | 458 | depth_refine = self.depth_refine( torch.cat((upconv2, conv0), 1) ) 459 | 460 | return depth_refine 461 | 462 | 463 | -------------------------------------------------------------------------------- /python/demon_networks.py: -------------------------------------------------------------------------------- 1 | """" 2 | pytorch implementation of DeMoN - Depth and Motion Network 3 | 4 | this file provides the implementation of DeMoN architecture, 5 | 6 | including BootstrapNet, IterativeNet and RefinementNet 7 | 8 | """ 9 | 10 | 11 | from blocks import * 12 | import torch.nn as nn 13 | 14 | """ 15 | BootstrapNet, without any previous predictions 16 | """ 17 | class BootstrapNet(nn.Module): 18 | 19 | def __init__(self): 20 | 21 | super(BootstrapNet, self).__init__() 22 | 23 | self.flow_block = FlowBlock(use_prev_predictions=False) 24 | self.depth_motion_block = DepthMotionBlock(use_prev_depthmotion=False) 25 | 26 | 27 | def forward(self, image_pair, image2_2): 28 | 29 | flow = self.flow_block(image_pair) 30 | 31 | predictions = self.depth_motion_block(image_pair, image2_2, flow[:, 0:2, :, :], flow) 32 | 33 | return predictions 34 | 35 | """ 36 | IterativeNet, use previous depth and motion information 37 | """ 38 | 39 | class IterativeNet(nn.Module): 40 | 41 | def __init__(self): 42 | 43 | super(IterativeNet, self).__init__() 44 | 45 | self.flow_block = FlowBlock(use_prev_predictions=True) 46 | self.depth_motion_block = DepthMotionBlock(use_prev_depthmotion=True) 47 | 48 | def forward(self, image_pair, image2_2, intrinsics, prev_predictions): 49 | 50 | flow = self.flow_block(image_pair, image2_2, intrinsics, prev_predictions) 51 | 52 | predictions = self.depth_motion_block(image_pair, image2_2, 53 | flow[:, 0:2, :, :], flow, 54 | intrinsics=intrinsics, 55 | prev_predictions=prev_predictions) 56 | 57 | return predictions 58 | 59 | """ 60 | RefinementNet, refine depth output 61 | """ 62 | 63 | class RefinementNet(nn.Module): 64 | 65 | def __init__(self): 66 | 67 | super(RefinementNet, self).__init__() 68 | 69 | self.refinement_block = RefinementBlock() 70 | 71 | def forward(self, image1, depth): 72 | 73 | refinement = self.refinement_block(image1, depth) 74 | 75 | return refinement 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /python/demon_operators.py: -------------------------------------------------------------------------------- 1 | """ 2 | depth to flow and flow to depth layers 3 | 4 | """ 5 | 6 | import numpy as np 7 | 8 | import torch 9 | from torch.nn.modules.module import Module 10 | 11 | from torch.autograd import Variable 12 | 13 | from operators import matrix_inv 14 | from operators import axis_angle_to_rotation_matrix 15 | from operators import perspective_projection 16 | from operators import sample_from_images 17 | from operators import triangulate_flow 18 | 19 | class DepthToFlow(Module): 20 | 21 | def __init__(self, normalized_K = True, normalized_flow = True, inverse_depth = True): 22 | 23 | super(DepthToFlow, self).__init__() 24 | 25 | self.normalized_K = normalized_K 26 | self.normalized_flow = normalized_flow 27 | self.inverse_depth = inverse_depth 28 | 29 | def forward(self, K1, K2, depth, rot, trans): 30 | 31 | """ 32 | :param K1: intrinsics of 1st image, 3x3 33 | :param K2: intrinsics of 2nd image, 3x3 34 | :param depth: depth map of first image, 1 x height x width 35 | :param rot: rotation from first to second image, 3 36 | :param trans: translation from first to second, 3 37 | :return: normalized flow from 1st image to 2nd image, 2 x height x width 38 | """ 39 | 40 | height, width = depth.size() 41 | num = height * width 42 | 43 | if(self.normalized_K): 44 | tmp = Variable(torch.Tensor([[width, 0, 0], 45 | [0, height,0], 46 | [0, 0, 1]])) 47 | K1 = torch.mm(tmp, K1) 48 | K2 = torch.mm(tmp, K2) 49 | 50 | invK1 = matrix_inv(K1) 51 | rotation = axis_angle_to_rotation_matrix(rot) 52 | 53 | inv_width = 1.0 / width 54 | inv_height = 1.0 / height 55 | 56 | uu = torch.arange(0, width).expand(height, width) + 0.5 57 | vv = torch.arange(0,height).expand(width, height).transpose(1,0) + 0.5 58 | 59 | points_uvn = torch.cat([uu.contiguous().view(1, num), 60 | vv.contiguous().view(1, num), 61 | torch.ones(1, num)], 0) 62 | 63 | if(self.inverse_depth): 64 | points = (1.0 / depth).view(1, height * width) * \ 65 | torch.mm(invK1, Variable(points_uvn, requires_grad=False)) 66 | else: 67 | points = depth.resize(1, height * width) * \ 68 | torch.mm(invK1, Variable(points_uvn, requires_grad=False)) 69 | 70 | points2 = torch.mm(rotation, points) + trans.view(3, 1) 71 | 72 | points2_uv = perspective_projection(points2, K2) 73 | 74 | flow = points2_uv - Variable(points_uvn[0:2], requires_grad=False) 75 | 76 | if(self.normalized_flow): 77 | normalized_flow = torch.cat([flow[0].view(height, width)*inv_width, 78 | flow[1].view(height, width)*inv_height], 0) 79 | return normalized_flow.view(2, height, width) 80 | else: 81 | unnormalized_flow = torch.cat([flow[0].view(height, width), 82 | flow[1].view(height, width)], 0) 83 | return unnormalized_flow.view(2, height, width) 84 | 85 | class FlowToDepth(Module): 86 | 87 | def __init__(self, normalized_K = False, normalized_flow = True, inverse_depth = True): 88 | 89 | super(FlowToDepth, self).__init__() 90 | 91 | self.normalized_K = normalized_K 92 | self.normalized_flow = normalized_flow 93 | self.inverse_depth = inverse_depth 94 | 95 | def forward(self, K1, K2, flow, rot, trans): 96 | 97 | """ 98 | :param K1: intrinsics of 1st image, 3x3 99 | :param K2: intrinsics of 2nd image, 3x3 100 | :param flow: flow of first image, 2 x height x width 101 | :param rot: rotation from first to second image, 3 102 | :param trans: translation from first to second, 3 103 | :return: depth/inv_depth of first image, 1 x height x width 104 | """ 105 | 106 | rotation = axis_angle_to_rotation_matrix(rot) 107 | RT = torch.cat((rotation, trans.resize(3,1)), 1) 108 | 109 | height, width = flow.size(1), flow.size(2) 110 | 111 | if(self.normalized_K): 112 | tmp = Variable(torch.Tensor([[width, 0, 0], 113 | [0, height,0], 114 | [0, 0, 1]])) 115 | K1 = torch.mm(tmp, K1) 116 | K2 = torch.mm(tmp, K2) 117 | 118 | if(self.normalized_flow): 119 | unnormalized_flow = torch.cat([flow[0]*width, flow[1]*height], 0).view(2, height, width) 120 | else: 121 | unnormalized_flow = flow 122 | 123 | I0 = Variable( torch.cat((torch.eye(3), torch.zeros(3,1)), 1)) 124 | P1 = torch.mm( K1, I0) 125 | P2 = torch.mm( K2, RT) 126 | 127 | depth = triangulate_flow(P1, P2, unnormalized_flow) 128 | 129 | if(self.inverse_depth): 130 | return 1.0 / depth.contiguous().view(1, height, width) 131 | else: 132 | return depth.contiguous().view(1, height, width) 133 | 134 | class WarpImage(Module): 135 | 136 | def forward(self, image, flow, normalized_flow = True, border_mode = 'value', border_value = 0): 137 | """ 138 | :param image: input image, channels x height x width 139 | :param flow: flow image, 2 x height x width 140 | :param normalized_flow: whether flow is normalized or not, True or False 141 | :param border_mode: border mode, 'clamp' or 'value' 142 | :param border_value: border value, the value used outside the image borders. 143 | :return: warped image, channels x height x width 144 | """ 145 | 146 | C, H, W = image.size() 147 | 148 | grid_v, grid_u = np.mgrid[0:H, 0:W] 149 | grid_v = Variable(torch.Tensor(grid_v)) 150 | grid_u = Variable(torch.Tensor(grid_u)) 151 | 152 | if(not normalized_flow): 153 | new_u = grid_u + flow[0] 154 | new_v = grid_v + flow[1] 155 | else: 156 | new_u = grid_u + flow[0] * W 157 | new_v = grid_v + flow[1] * H 158 | 159 | proj = torch.cat([new_u.resize(1, H * W), 160 | new_v.resize(1, H * W)], 0) 161 | 162 | warped_image_vec = sample_from_images(proj, image, 163 | border_mode = border_mode, 164 | border_value = border_value) 165 | 166 | warped_image = warped_image_vec.resize(C, H, W) 167 | 168 | return warped_image 169 | 170 | """ 171 | batch depth to flow 172 | """ 173 | class DepthToFlowLayer(Module): 174 | 175 | def __init__(self, normalized_K = False, normalized_flow = True, inverse_depth = True): 176 | 177 | super(DepthToFlowLayer, self).__init__() 178 | 179 | self.depth_to_flow = DepthToFlow(normalized_K = normalized_K, 180 | normalized_flow = normalized_flow, 181 | inverse_depth = inverse_depth) 182 | 183 | def forward(self, K1, K2, depth, rot, trans, shared_K = True): 184 | 185 | """ 186 | :param K1: 3x3 if shared_K is True, otherwise K1 is nx3x3 187 | :param K2: 3x3 if shared_K is True, otherwise K2 is nx3x3 188 | :param depth: n x 1 x h x w 189 | :param rot: n x 3 190 | :param trans: n x3 191 | :param shared_K: if True, we share intrinsics for the depth images of the whole batch 192 | :return: n x 2 x h x w 193 | """ 194 | 195 | # depths = depth.chunk(depth.size(0), 0) 196 | # batch_size = len(depths) 197 | 198 | batch_size = depth.size(0) 199 | 200 | flows = () 201 | 202 | for i in range(batch_size): 203 | 204 | if(shared_K): 205 | flow = self.depth_to_flow( K1, K2, depth[i][0], rot[i], trans[i]) 206 | else: 207 | flow = self.depth_to_flow(K1[i], K2[i], depth[i][0], rot[i], trans[i]) 208 | 209 | flows += (flow,) 210 | 211 | flow = torch.stack(flows, 0) 212 | 213 | return flow 214 | 215 | """ 216 | batch flow to depth 217 | """ 218 | class FlowToDepthLayer(Module): 219 | 220 | def __init__(self, normalized_K=False, normalized_flow=True, inverse_depth=True): 221 | 222 | super(FlowToDepthLayer, self).__init__() 223 | 224 | self.flow_to_depth = FlowToDepth(normalized_K=normalized_K, 225 | normalized_flow=normalized_flow, 226 | inverse_depth=inverse_depth) 227 | 228 | def forward(self, K1, K2, flow, rot, trans, shared_K = True): 229 | """ 230 | :param K1: 3x3 if shared_K is True, otherwise K1 is nx3x3 231 | :param K2: 3x3 if shared_K is True, otherwise K2 is nx3x3 232 | :param flow: n x 2 x h x w 233 | :param rot: n x 3 234 | :param trans: n x 3 235 | :param shared_K: if True, we share intrinsics for the depth images of the whole batch 236 | :return: depth, n x 1 x h x w 237 | """ 238 | 239 | flows = flow.chunk(flow.size(0), 0) 240 | batch_size = len(flows) 241 | 242 | depths = () 243 | 244 | for i in range(batch_size): 245 | 246 | if(shared_K): 247 | depth = self.flow_to_depth(K1, K2, flows[i][0], rot[i], trans[i]) 248 | else: 249 | depth = self.flow_to_depth(K1[i], K2[i], flows[i][0], rot[i], trans[i]) 250 | 251 | depths += (depth,) 252 | 253 | depth = torch.stack(depths, 0) 254 | 255 | return depth 256 | 257 | """ 258 | warpping batch images 259 | """ 260 | class WarpImageLayer(Module): 261 | 262 | def __init__(self): 263 | 264 | super(WarpImageLayer, self).__init__() 265 | 266 | self.warp_image = WarpImage() 267 | 268 | def forward(self, image, flow, normalized_flow = True, border_mode = 'value', border_value = 0): 269 | 270 | """ 271 | :param image: image batch, n x C x H x W 272 | :param flow: flow batch, n X 2 X H X W 273 | :param normalized_flow: True or False 274 | :param border_mode: 'clamp' or 'border value' 275 | :param border_value: the values for out of image filling 276 | :return: warped batch images, n x C x H x W 277 | """ 278 | 279 | images = image.chunk(image.size(0), 0) 280 | 281 | batch_size = len(images) 282 | warped_images = () 283 | 284 | for i in range(batch_size): 285 | 286 | warped_image = self.warp_image(images[i][0], 287 | flow[i], 288 | normalized_flow = normalized_flow, 289 | border_mode = border_mode, 290 | border_value = border_value) 291 | 292 | warped_images += (warped_image,) 293 | 294 | warped_image = torch.stack(warped_images, 0) 295 | 296 | return warped_image 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | -------------------------------------------------------------------------------- /python/grad_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | test file 3 | """ 4 | 5 | """ 6 | gradient test, compare autograd gradient with numeric gradient for each operation 7 | rotation, rigid transformation, projection, sampling 8 | """ 9 | 10 | import numpy as np 11 | 12 | import torch 13 | from torch.autograd import Variable 14 | 15 | from operators import axis_angle_to_rotation_matrix 16 | from operators import rigid_transformation 17 | from operators import perspective_projection 18 | from operators import sample_from_images 19 | 20 | from operators import matrix_inv 21 | from operators import my_svd 22 | from operators import my_svd_trig 23 | 24 | from utils_diff import numdiff_jacobian 25 | 26 | from utils_pytorch import wrapper 27 | from utils_pytorch import gradient_check 28 | 29 | """ 30 | rotation operation 31 | """ 32 | 33 | # rot = Variable( torch.Tensor( np.random.rand(3) ), requires_grad = True ) 34 | rot = Variable( torch.Tensor( [ 0.56004662, 0.97220967, 0.71514336] ), requires_grad = True ) 35 | gradient_check(axis_angle_to_rotation_matrix, rot) 36 | 37 | func_wrapper = wrapper(axis_angle_to_rotation_matrix, rot ) 38 | func, jac, x0 = func_wrapper['func'], func_wrapper['jac'], func_wrapper['x0'] 39 | 40 | print numdiff_jacobian(func, x0) 41 | print jac(x0) 42 | 43 | """ 44 | matrix inverse 45 | """ 46 | 47 | print "matrix inverse" 48 | mat = Variable( torch.randn(3,3), requires_grad = True ) 49 | gradient_check(matrix_inv, mat) 50 | 51 | """ 52 | svd_trig 53 | """ 54 | 55 | print "svd_trig" 56 | mat = Variable( torch.randn(4,4), requires_grad = True ) 57 | gradient_check(my_svd_trig, mat) 58 | 59 | """ 60 | svd, need to turn on testing mode 61 | """ 62 | print "svd" 63 | mat = Variable( torch.randn(4,4), requires_grad = True ) 64 | my_svd(mat) 65 | # gradient_check(my_svd, mat) 66 | 67 | """rigid transformation """ 68 | trans = Variable( torch.Tensor( np.random.rand(3) ), requires_grad = True ) 69 | vertices = Variable( torch.Tensor( np.random.rand(3, 1) ) , requires_grad = False ) 70 | 71 | """rotation""" 72 | print "rotation" 73 | gradient_check(rigid_transformation, rot, trans, vertices, id_list=[0]) 74 | """translation""" 75 | print "translation" 76 | gradient_check(rigid_transformation, rot, trans, vertices, id_list=[1]) 77 | """vertices""" 78 | # print "rigid transformation, vertices gradient" 79 | # gradient_check(rigid_transformation, rot, trans, vertices, id_list=[2]) 80 | 81 | func_wrapper = wrapper( rigid_transformation, rot, trans, vertices ) 82 | func, jac, x0 = func_wrapper['func'], func_wrapper['jac'], func_wrapper['x0'] 83 | 84 | print numdiff_jacobian(func, x0) 85 | print jac(x0) 86 | 87 | """perspective projection""" 88 | print "perspective projection" 89 | K = Variable( torch.Tensor( np.random.rand(3,3) + 1 ), requires_grad = True ) 90 | X = Variable( torch.Tensor( np.random.rand(3, 1) ), requires_grad = True ) 91 | gradient_check( perspective_projection, X, K, id_list=[0]) 92 | 93 | """ image sampling """ 94 | print "image sampling" 95 | img = Variable(torch.from_numpy(np.random.rand(10,10,1)).float(), requires_grad = True) 96 | proj = Variable( 97 | torch.Tensor( 98 | np.array([[np.random.uniform(1,2) for i in range(2)] for j in range(2)])), 99 | requires_grad = True ) 100 | 101 | gradient_check(sample_from_images, proj, img, id_list=[0]) 102 | gradient_check(sample_from_images, proj, img, id_list=[1]) -------------------------------------------------------------------------------- /python/operators.py: -------------------------------------------------------------------------------- 1 | """ 2 | useful geometric operators for demon net 3 | 4 | axis_angle_to_rotation_matrix 5 | 6 | perspective_projection 7 | 8 | sample_from_images 9 | 10 | """ 11 | 12 | import numpy as np 13 | 14 | import torch 15 | from torch.autograd import Variable 16 | from torch.autograd import Function 17 | from torch.nn.modules.module import Module 18 | 19 | """ 20 | convert from axis-angle representation to rotation matrix 21 | http://www.ethaneade.com/latex2html/lie_groups/node37.html 22 | 23 | w in so3 to rotation matrix: 24 | 25 | R = exp(w_x) = I + (sin(\theta) / \theta)w_x + ((1-cos(\theta))\theta^2) w_x^2 26 | 27 | derivative dR_dwi: 28 | 29 | --- w_i [w]x + [w x (I - R)e_i]x 30 | ----------------------------------- R 31 | --- ||w||^{2} 32 | 33 | w: 3 34 | R: 3x3 35 | 36 | """ 37 | 38 | class AxisAngleToRotationMatrix(Function): 39 | 40 | def forward(self, w): 41 | 42 | R = self.forward_core(w) 43 | 44 | self.save_for_backward(w, R) 45 | 46 | return R 47 | 48 | @staticmethod 49 | def forward_core(w): 50 | 51 | theta = torch.norm(w) 52 | 53 | if (theta > 0): 54 | 55 | wx = torch.Tensor([[0, -w[2], w[1]], [w[2], 0, -w[0]], [-w[1], w[0], 0]]) 56 | R = torch.eye(3) + np.sin(theta) / theta * wx + ((1 - np.cos(theta)) / theta ** 2) * wx.mm(wx) 57 | 58 | else: 59 | 60 | R = torch.Tensor([[1, -w[2], w[1]], [w[2], 1, -w[0]], [-w[1], w[0], 1]]) 61 | 62 | return R 63 | 64 | def backward(self, grad_output): 65 | 66 | w, R = self.saved_tensors 67 | 68 | grad_w = self.backward_core(w, R, grad_output) 69 | 70 | return grad_w 71 | 72 | @staticmethod 73 | def backward_core(w, R, grad_output): 74 | 75 | grad_w = torch.zeros(3) 76 | 77 | theta = torch.norm(w) 78 | 79 | if (theta > 0): 80 | 81 | wx = torch.Tensor([[0, -w[2], w[1]], [w[2], 0, -w[0]], [-w[1], w[0], 0]]) 82 | 83 | for i in range(3): 84 | ei = torch.zeros(3, 1); 85 | ei[i] = 1 86 | temp = wx.mm((torch.eye(3) - R)).mm(ei) 87 | 88 | dR_dwi = (w[i] * wx + torch.Tensor([[0, -temp[2][0], temp[1][0]], 89 | [temp[2][0], 0, -temp[0][0]], 90 | [-temp[1][0], temp[0][0], 0]])).mm(R) / theta ** 2 91 | 92 | grad_w[i] = (grad_output * dR_dwi).sum() 93 | 94 | else: 95 | 96 | grad_w[0] = grad_output[2][1] - grad_output[1][2] 97 | grad_w[1] = grad_output[0][2] - grad_output[2][0] 98 | grad_w[2] = grad_output[1][0] - grad_output[0][1] 99 | 100 | return grad_w 101 | 102 | def axis_angle_to_rotation_matrix(w): 103 | 104 | return AxisAngleToRotationMatrix()(w) 105 | 106 | """ matrix inversion """ 107 | class MatrixInv(Function): 108 | 109 | """even give variable as input, in forward function, A will be torch.Tensor """ 110 | def forward(self, A): 111 | 112 | C = torch.inverse(A) 113 | self.save_for_backward(C) 114 | 115 | return C 116 | 117 | def backward(self, grad_output): 118 | 119 | C = self.saved_tensors[0] 120 | 121 | return -C.transpose(1,0).mm( grad_output ).mm( C.transpose(1,0) ) 122 | 123 | def matrix_inv(A): 124 | 125 | return MatrixInv()(A) 126 | 127 | """ 128 | my svd implementation which supports backprop 129 | reference "https://arxiv.org/pdf/1509.07838.pdf", page 7, proposition 1 130 | """ 131 | class MySVD(Function): 132 | 133 | def forward(self, X): 134 | 135 | """ 136 | :param X: m x n, m >= n 137 | :return: U(m x m), S(n x n) V(n x n) 138 | """ 139 | 140 | if(X.size(0) >= X.size(1)): 141 | 142 | U, S, V = torch.svd(X, some=False) 143 | 144 | """check the singular values to make sure they are different""" 145 | n = S.size(0) 146 | diff = S.view(n, 1) - S.view(1,n) 147 | if(torch.sum( diff == 0 ) > n): 148 | print "there are same singular values" 149 | 150 | S = torch.diag(S) 151 | 152 | self.save_for_backward(U, S, V) 153 | return U, S, V 154 | 155 | # """for testing backward""" 156 | # USV = torch.cat((U, S, V), 1) 157 | # self.save_for_backward(USV) 158 | # return USV 159 | 160 | else: 161 | 162 | print "we need X(m x n) has size m >= n" 163 | 164 | # """for testing backward""" 165 | # def backward(self, grad_output): 166 | # 167 | # USV = self.saved_tensors[0] 168 | # m = USV.size(0) 169 | # n = m 170 | # 171 | # U = USV[:, 0:m] 172 | # S = USV[:, m:2*m] 173 | # V = USV[:, 2*m:3*m] 174 | # 175 | # pL_pU = grad_output[:, 0:m] 176 | # pL_pS = grad_output[:, m:2*m] 177 | # pL_pV = grad_output[:, 2*m:3*m] 178 | 179 | def backward(self, *grad_output): 180 | 181 | U, S, V = self.saved_tensors 182 | 183 | m = U.size(0) 184 | n = V.size(0) 185 | 186 | pL_pU = grad_output[0] 187 | pL_pS = grad_output[1] 188 | pL_pV = grad_output[2] 189 | 190 | S_inv = torch.inverse(S) 191 | 192 | if(m > n): 193 | D = torch.mm( pL_pU[:, 0:n], S_inv ) - \ 194 | torch.mm( U[:, n:], torch.mm( pL_pU[:, n:].t(), torch.mm( U[:, 0:n], S_inv ) ) ) 195 | else: 196 | D = torch.mm(pL_pU[:, 0:n], S_inv) 197 | 198 | S_full = torch.cat((S, torch.zeros(m-n, n)), 0) 199 | pL_pS_full = torch.cat((pL_pS, torch.zeros(m-n, n)), 0) 200 | 201 | pL_pX_1 = torch.mm( D, V.t() ) 202 | 203 | temp = torch.diag(torch.diag(pL_pS_full - torch.mm(U.t(), D))) 204 | pL_pX_2 = torch.mm( U, torch.mm( temp, V.t() )) 205 | 206 | S2 = S*S 207 | K = torch.diag( S2 ).view(n,1) - torch.diag( S2 ).view(1, n) 208 | K = torch.pow(K, -1) 209 | K[torch.max(K) == K] = 0 210 | temp = torch.mm(V.t(), (pL_pV - torch.mm(V, torch.mm(D.t(), torch.mm( U, S_full )) ) )) 211 | temp2 = K.t() * temp 212 | temp_sym = 0.5 * (temp2 + temp2.t()) 213 | pL_pX_3 = 2 * torch.mm(U, torch.mm(S_full, torch.mm(temp_sym, V.t())) ) 214 | 215 | return pL_pX_1 + pL_pX_2 + pL_pX_3 216 | 217 | def my_svd(A): 218 | 219 | return MySVD()(A) 220 | 221 | """ 222 | svd tailored for triangulation. 223 | In triangulation, we only care about the last column of V 224 | """ 225 | class MySVDTrig(Function): 226 | 227 | def forward(self, X): 228 | 229 | """ 230 | :param X: m x n, m >= n 231 | :return: U(m x m), S(n x n) V(n x n) 232 | """ 233 | 234 | if (X.size(0) >= X.size(1)): 235 | 236 | U, S, V = torch.svd(X, some=False) 237 | 238 | """check the singular values to make sure they are different""" 239 | n = S.size(0) 240 | diff = S.view(n, 1) - S.view(1, n) 241 | if (torch.sum(diff == 0) > n): 242 | print "there are same singular values" 243 | 244 | S = torch.diag(S) 245 | 246 | self.save_for_backward(X) 247 | 248 | return V[:,-1] 249 | 250 | else: 251 | 252 | print "we need X(m x n) has size m >= n" 253 | 254 | def backward(self, grad_output): 255 | 256 | X = self.saved_tensors[0] 257 | U, S, V = torch.svd(X, some=False) 258 | S = torch.diag(S) 259 | 260 | # U, S, V = self.saved_tensors 261 | 262 | m = U.size(0) 263 | n = V.size(0) 264 | 265 | pL_pV = torch.cat((torch.zeros(n, n-1), grad_output), 1) 266 | 267 | if(m > n): 268 | S_full = torch.cat((S, torch.zeros(m - n, n)), 0) 269 | else: 270 | S_full = S 271 | 272 | S2 = S * S 273 | K = torch.diag(S2).view(n, 1) - torch.diag(S2).view(1, n) 274 | K = torch.pow(K, -1) 275 | K[torch.max(K) == K] = 0 276 | temp = torch.mm(V.t(), pL_pV) 277 | temp2 = K.t() * temp 278 | temp_sym = 0.5 * (temp2 + temp2.t()) 279 | pL_pX = 2 * torch.mm(U, torch.mm(S_full, torch.mm(temp_sym, V.t()))) 280 | 281 | return pL_pX 282 | 283 | def my_svd_trig(A): 284 | 285 | return MySVDTrig()(A) 286 | 287 | """ rigid transformation """ 288 | """ 289 | rot: 3 290 | trans: 3 291 | vertices: 3xn 292 | X: 3xn 293 | """ 294 | def rigid_transformation(rot, trans, vertices): 295 | 296 | rot_mat = axis_angle_to_rotation_matrix(rot) 297 | X = rot_mat.mm(vertices) + trans.view(3, 1) 298 | 299 | return X 300 | 301 | """ perspective projection """ 302 | """ 303 | X: 3xn 304 | K: 3x3 305 | """ 306 | def perspective_projection(X, K): 307 | 308 | KX = K.mm(X) 309 | num = X.size()[1] 310 | ProjX = torch.div(KX[0:2], KX[2].view(1, num)) 311 | 312 | return ProjX 313 | 314 | """ triangulation flow""" 315 | """ 316 | P1: P matrix of first image 317 | P2: P matrix of second image 318 | flow: optical flow from first image to second image 319 | return: return the depth value of points based on flow and P matrices 320 | """ 321 | def triangulate_flow(P1, P2, flow): 322 | 323 | H, W = flow.size(1), flow.size(2) 324 | 325 | grid_v, grid_u = np.mgrid[0:H, 0:W] 326 | 327 | grid_v = Variable(torch.Tensor(grid_v + 0.5)) 328 | grid_u = Variable(torch.Tensor(grid_u + 0.5)) 329 | 330 | uv1 = torch.cat((grid_u.view(1, H*W), 331 | grid_v.view(1, H*W)), 0) 332 | 333 | uv2 = uv1 + torch.cat((flow[0].view(1, H*W), 334 | flow[1].view(1, H*W)), 0) 335 | 336 | Ps = torch.stack((P1, P2), 0) 337 | W = torch.cat((uv1, uv2), 0) 338 | 339 | XYZ = triangulate_points(Ps, W) 340 | 341 | depth = XYZ[:,2] 342 | 343 | return depth 344 | 345 | """ triangulation points""" 346 | """ 347 | Ps: K x 3 x 4 348 | W: 2K x P 349 | return: triangulation results XYZ, P x 3 350 | """ 351 | def triangulate_points(Ps, W): 352 | 353 | cameras = Ps.size(0) 354 | points = W.size(1) 355 | 356 | A12 = torch.cat((Ps[:, 0, :], Ps[:, 1, :]), 0) 357 | A33 = torch.cat((Ps[:, 2, :], Ps[:, 2, :]), 0) 358 | 359 | UV = torch.cat( (W[0:cameras*2:2, :], 360 | W[1:cameras*2:2, :]), 0) 361 | 362 | XYZ = () 363 | 364 | for j in range(points): 365 | 366 | A = A12 - A33 * UV[:, j].contiguous().view(-1,1) 367 | 368 | """ 369 | fix me, write a svd layer which supports backprop 370 | A = U * S * V', 371 | here we just need my_svd_trig, return only the last column of V 372 | """ 373 | # U, S, V = my_svd(A) 374 | # X = V[:, -1] 375 | 376 | X = my_svd_trig(A) 377 | 378 | xyz = X[0:3] / X[3] 379 | 380 | XYZ += (xyz, ) 381 | 382 | return torch.cat(XYZ, 0).view(points, 3) 383 | 384 | """ 385 | sampling values from C * H * W images based on 2d projections 386 | img: C * H * W 387 | proj: 2 * P 388 | values: C * P 389 | img is provided as parameters, proj as input and sampled values as output 390 | """ 391 | 392 | class SamplingFromImages(Module): 393 | 394 | def forward(self, proj, img, border_mode = 'clamp', border_value = 0): 395 | 396 | C, H, W = img.size() 397 | 398 | if border_mode == 'clamp': 399 | 400 | proj_u = torch.clamp(proj[0], min=0, max=W - 1) 401 | proj_v = torch.clamp(proj[1], min=0, max=H - 1) 402 | 403 | ul = proj_u.int() 404 | vl = proj_v.int() 405 | 406 | delta_u = proj_u - ul.float() 407 | delta_v = proj_v - vl.float() 408 | 409 | # uu = torch.clamp(ul + 1, min=0, max=W - 1) 410 | # vu = torch.clamp(vl + 1, min=0, max=H - 1) 411 | 412 | """To make sure this is exactly the same as DeMoN""" 413 | uu = torch.clamp(proj[0] + 1, min=0, max=W - 1).int() 414 | vu = torch.clamp(proj[1] + 1, min=0, max=H - 1).int() 415 | 416 | # img_vec = img.view(C, H * W) 417 | img_vec = img.contiguous().view(C, H * W) 418 | 419 | img_ll = img_vec[:, (vl * W + ul).data.long()] 420 | img_ul = img_vec[:, (vu * W + ul).data.long()] 421 | img_lu = img_vec[:, (vl * W + uu).data.long()] 422 | img_uu = img_vec[:, (vu * W + uu).data.long()] 423 | 424 | values = (1 - delta_u) * (1 - delta_v) * img_ll + \ 425 | (1 - delta_u) * delta_v * img_ul + \ 426 | delta_u * (1 - delta_v) * img_lu + \ 427 | delta_u * delta_v * img_uu 428 | 429 | return values 430 | 431 | elif border_mode == 'value': 432 | 433 | ul = proj[0].int() 434 | vl = proj[1].int() 435 | 436 | delta_u = proj[0] - ul.float() 437 | delta_v = proj[1] - vl.float() 438 | 439 | uu = ul + 1 440 | vu = vl + 1 441 | 442 | """The up to date github version """ 443 | # mask = (ul >= 0) * (uu > 0) * (uu < W) * (vl >= 0) * (vu > 0) * (vu < H) 444 | 445 | """To make sure this is exactly the same as DeMoN code (the version on my desktop)""" 446 | mask = (ul >= 0) * (uu < W) * (vl >= 0) * (vu < H) 447 | 448 | ul = torch.clamp(ul, min=0, max=W - 1) 449 | vl = torch.clamp(vl, min=0, max=H - 1) 450 | uu = torch.clamp(uu, min=0, max=W - 1) 451 | vu = torch.clamp(vu, min=0, max=H - 1) 452 | 453 | # img_vec = img.view(C, H * W) 454 | img_vec = img.contiguous().view(C, H * W) 455 | 456 | img_ll = img_vec[:, (vl * W + ul).data.long()] 457 | img_ul = img_vec[:, (vu * W + ul).data.long()] 458 | img_lu = img_vec[:, (vl * W + uu).data.long()] 459 | img_uu = img_vec[:, (vu * W + uu).data.long()] 460 | 461 | # """this will also work in the pytorch0.2""" 462 | # img_ll = img.contiguous()[:, vl.data.long(), ul.data.long()] 463 | # img_ul = img.contiguous()[:, vu.data.long(), ul.data.long()] 464 | # img_lu = img.contiguous()[:, vl.data.long(), uu.data.long()] 465 | # img_uu = img.contiguous()[:, vu.data.long(), uu.data.long()] 466 | 467 | values = (1 - delta_u) * (1 - delta_v) * img_ll + \ 468 | (1 - delta_u) * delta_v * img_ul + \ 469 | delta_u * (1 - delta_v) * img_lu + \ 470 | delta_u * delta_v * img_uu 471 | 472 | boundary_mask = (mask == 0) 473 | 474 | values = values * mask.float() + \ 475 | border_value * boundary_mask.float() 476 | 477 | return values 478 | 479 | """ 480 | hand coded version, 481 | assume img is fixed 482 | code back propagation manually 483 | """ 484 | class MySamplingFromImages(Function): 485 | 486 | def __init__(self, img): 487 | 488 | self.img = img 489 | 490 | def forward(self, proj): 491 | 492 | img = self.img 493 | C, H, W = img.size() 494 | 495 | proj_u = torch.clamp( proj.data[0], min=0, max=W-1) 496 | proj_v = torch.clamp( proj.data[1], min=0, max=H-1) 497 | 498 | ul = proj_u.int() 499 | vl = proj_v.int() 500 | 501 | delta_u = proj_u - ul 502 | delta_v = proj_v - vl 503 | 504 | uu = torch.clamp(ul + 1, min=0, max=W-1) 505 | vu = torch.clamp(vl + 1, min=0, max=H-1) 506 | 507 | img_vec = img.view(C, H*W) 508 | 509 | img_ll = img_vec[:, vl*W + ul] 510 | img_ul = img_vec[:, vu*W + ul] 511 | img_lu = img_vec[:, vl*W + uu] 512 | img_uu = img_vec[:, vu*W + uu] 513 | 514 | values = (1 - delta_u) * (1 - delta_v) * img_ll + \ 515 | (1 - delta_u) * delta_v * img_ul + \ 516 | delta_u * (1 - delta_v) * img_lu + \ 517 | delta_u * delta_v * img_uu 518 | 519 | self.save_for_backward(proj.data, img_ll, img_ul, img_lu, img_uu) 520 | 521 | return values 522 | 523 | def backward(self, grad_output): 524 | 525 | """ 526 | :param grad_output: C x P 527 | :return: 2 x P 528 | """ 529 | 530 | img = self.img 531 | proj, img_ll, img_ul, img_lu, img_uu = self.saved_tensors 532 | 533 | C, H, W = img.size() 534 | P = proj.size(1) 535 | 536 | dv_dp = torch.zeros(C, 2, P) 537 | 538 | grad_proj = torch.zeros(2, P) 539 | 540 | mask = (proj[0] > 0) * (proj[0] < W - 1) * (proj[1] > 0) * (proj[1] < H - 1) 541 | ind = torch.nonzero(mask) 542 | 543 | ul = proj[0][mask].int() 544 | vl = proj[1][mask].int() 545 | 546 | delta_u = proj[0] - ul 547 | delta_v = proj[1] - vl 548 | 549 | dv_dp[:, 0, :][:, ind] = -(1 - delta_v) * img_ll - \ 550 | delta_v * img_ul + \ 551 | (1 - delta_v) * img_lu + \ 552 | delta_v * img_uu 553 | 554 | dv_dp[:, 1, :][:, ind] = -(1 - delta_u) * img_ll + \ 555 | (1 - delta_u) * img_ul - \ 556 | delta_u * img_lu + \ 557 | delta_u * img_uu 558 | 559 | grad_proj[0, :] = ( grad_output * dv_dp[:, 0, :]).sum(0) 560 | grad_proj[1, :] = ( grad_output * dv_dp[:, 1, :]).sum(0) 561 | 562 | return grad_proj 563 | 564 | def sample_from_images(proj, img, border_mode = 'clamp', border_value = 0): 565 | 566 | return SamplingFromImages()(proj, img, border_mode = border_mode, border_value = border_value) 567 | -------------------------------------------------------------------------------- /python/utils_diff.py: -------------------------------------------------------------------------------- 1 | import numdifftools as nd 2 | 3 | """ 4 | only useful when input is single dimension, while output is two dimension 5 | """ 6 | 7 | def numdiff_jacobian(func, input, order=2): 8 | 9 | result = func(input) 10 | 11 | Jfunc = nd.Jacobian(func, order=order) 12 | J = Jfunc(input) 13 | 14 | if( len(result.shape) == 2 ): 15 | J = J.transpose((1, 0, 2)).reshape(-1, J.shape[2]) 16 | 17 | return J 18 | 19 | -------------------------------------------------------------------------------- /python/utils_pytorch.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | import torch 6 | from torch.autograd import Variable 7 | 8 | import numdifftools as nd 9 | 10 | """ 11 | gradient checker 12 | """ 13 | def gradient_check(op, *args, **kwargs): 14 | 15 | """ 16 | examples 17 | gradient_check(rigid_transformation, rot, trans, vertices, id_list=[2]) 18 | """ 19 | 20 | if( not 'id_list' in kwargs.keys() ): 21 | kwargs.update({"id_list":[0]}) 22 | 23 | id_list = kwargs.get("id_list", [0]) 24 | 25 | for i in id_list: 26 | 27 | if(not isinstance(args[i], Variable)): 28 | raise Exception("input {:g} is not a variable".format(i)) 29 | 30 | if(isinstance(args[i], Variable) and not args[i].requires_grad): 31 | raise Exception("input {:g} doesn't require gradient".format(i)) 32 | 33 | nelems = args[i].numel() 34 | 35 | """ numerical gradient """ 36 | 37 | wrapper, p = numdiff_wrapper(op, args, kwargs, i) 38 | jacobian_numerical = numdiff_unified(wrapper, p) 39 | 40 | """ analytic gradient """ 41 | 42 | jacobian_analytic = [] 43 | 44 | if(len(kwargs.keys()) > 1): 45 | """function has dictionary inputs""" 46 | f = op(*args, **kwargs) 47 | else: 48 | f = op(*args) 49 | 50 | output_nelems = f.data.numel() 51 | 52 | for k in range(output_nelems): 53 | 54 | output_grad = torch.zeros(f.data.size()) 55 | output_grad.view(output_nelems, 1)[k] = 1 56 | 57 | f.backward(output_grad, retain_variables=True) 58 | 59 | jacobian_analytic.append( np.copy( args[i].grad.data.view( nelems ).numpy() ) ) 60 | 61 | for params_i in args: 62 | if(isinstance(params_i, torch.autograd.Variable) and params_i.requires_grad): 63 | params_i.grad.data.zero_() 64 | 65 | jacobian_analytic = np.asarray(jacobian_analytic) 66 | 67 | """ 68 | compare jacobian_analytic with jacobian_numerical 69 | """ 70 | 71 | if( np.allclose(jacobian_analytic, jacobian_numerical) ): 72 | 73 | print "gradient is correct" 74 | 75 | else: 76 | 77 | rel_error = np.linalg.norm( jacobian_analytic - jacobian_numerical ) / \ 78 | np.maximum( np.linalg.norm( jacobian_analytic ), np.linalg.norm( jacobian_numerical) ) 79 | 80 | print 'analytic jacobian :' 81 | print jacobian_analytic 82 | 83 | print 'numerical jacobian :' 84 | print jacobian_numerical 85 | 86 | print 'jacobian difference :' 87 | print jacobian_analytic - jacobian_numerical 88 | 89 | print 'relative error:' 90 | print rel_error 91 | 92 | def numdiff_wrapper(func, params, keywords, i): 93 | 94 | """ 95 | :param func: computational graph from pytorch 96 | :param params: variables of the computational graph 97 | :param i: the argument we want to test 98 | :return: the corresponding python function and evaluation point 99 | """ 100 | 101 | shape = params[i].data.numpy().shape 102 | p = params[i].data.numpy().reshape(-1) 103 | 104 | def wrapper_func(input): 105 | 106 | """ 107 | check if input is vector 108 | """ 109 | assert len(input.shape) == 1 110 | 111 | params[i].data = torch.Tensor(input.reshape((shape))) 112 | 113 | if(len(keywords.keys()) > 1): 114 | outputVar = func(*params, **keywords) 115 | else: 116 | outputVar = func(*params) 117 | 118 | output = outputVar.data.numpy() 119 | 120 | return output 121 | 122 | return wrapper_func, p 123 | 124 | def numdiff_unified(func, input): 125 | 126 | result = func(input) 127 | 128 | Jfunc = nd.Jacobian(func, order=10) 129 | J = Jfunc(input) 130 | 131 | if( len(input.shape) == 1 and len(result.shape) ==1 ): 132 | return J 133 | elif( len(input.shape) == 1 and len(result.shape) == 2 ): 134 | return J.transpose((1, 0, 2)).reshape(-1, J.shape[2]) 135 | else: 136 | print 'dimension not supported for numdiff, ' \ 137 | 'input has dim{:g} and output has dim{:g}'.format(len(input.shape()), len(result.shape())) 138 | 139 | def wrapper(func, *params, **keywords): 140 | 141 | """ 142 | func is pytorch computational graph 143 | check each param to see if it is pytorch Variable and need grad 144 | """ 145 | 146 | num = len(params) 147 | 148 | def from_vars_to_x(params): 149 | 150 | flags = [ (isinstance(params[i], Variable) and params[i].requires_grad) for i in range(num) ] 151 | 152 | x = np.array([]).reshape(-1,1) 153 | for i in range(num): 154 | if(flags[i]): 155 | x = np.concatenate((x, params[i].data.numpy().reshape((-1,1)) )) 156 | 157 | x = x.reshape(-1) 158 | 159 | return x, flags 160 | 161 | x0, flags = from_vars_to_x(params) 162 | 163 | def put_x_in_vars(x): 164 | 165 | pos = 0 166 | for i in range(num): 167 | 168 | if(flags[i]): 169 | 170 | xi = x[pos : params[i].data.numpy().size + pos] 171 | params[i].data = torch.Tensor( xi.reshape(( params[i].data.numpy().shape ) ) ) 172 | 173 | pos = pos + xi.size 174 | 175 | return params 176 | 177 | def wrapper_func(x): 178 | 179 | assert len(x.shape) == 1 180 | 181 | put_x_in_vars(x) 182 | 183 | if(len(keywords.keys()) >= 1): 184 | f = func(*params, **keywords) 185 | else: 186 | f = func(*params) 187 | 188 | output = f.data.numpy() 189 | 190 | return output 191 | 192 | def wrapper_func_jac(x): 193 | 194 | assert len(x.shape) == 1 195 | 196 | put_x_in_vars(x) 197 | 198 | """ analytic gradient """ 199 | jacobian_analytic = [] 200 | 201 | if(len(keywords.keys()) >= 1): 202 | """function has dictionary inputs""" 203 | f = func(*params, **keywords) 204 | else: 205 | f = func(*params) 206 | 207 | output_nelems = f.data.numel() 208 | 209 | for k in range(output_nelems): 210 | 211 | output_grad = torch.zeros(f.data.size()) 212 | output_grad.view(output_nelems, 1)[k] = 1 213 | 214 | f.backward(output_grad, retain_variables=True) 215 | 216 | jacobian_analytic_i = [] 217 | 218 | for i in range(num): 219 | if(flags[i]): 220 | nelems = params[i].numel() 221 | jacobian_analytic_i.append( np.copy( params[i].grad.data.view( nelems).numpy() ) ) 222 | 223 | jacobian_analytic.append( np.asarray(jacobian_analytic_i).reshape(-1) ) 224 | 225 | for params_i in params: 226 | if(isinstance(params_i, torch.autograd.Variable) and params_i.requires_grad): 227 | params_i.grad.data.zero_() 228 | 229 | jacobian_analytic = np.asarray(jacobian_analytic) 230 | 231 | return jacobian_analytic 232 | 233 | return dict(x0 = x0, 234 | x2vars = put_x_in_vars, 235 | func = wrapper_func, 236 | jac = wrapper_func_jac) 237 | --------------------------------------------------------------------------------