├── .gitignore ├── LICENSE ├── README.md ├── core ├── BaseLearner.py ├── DFLearner.py ├── UnFlow │ ├── __init__.py │ ├── ops │ │ ├── backward_warp_op.cc │ │ ├── backward_warp_op.cu.cc │ │ ├── correlation_op.cc │ │ ├── correlation_op.cu.cc │ │ ├── correlation_op.h │ │ ├── downsample_op.cc │ │ ├── downsample_op.cu.cc │ │ ├── forward_warp_op.cc │ │ └── forward_warp_op.cu.cc │ └── src │ │ ├── __init__.py │ │ └── e2eflow │ │ ├── __init__.py │ │ ├── core │ │ ├── __init__.py │ │ ├── augment.py │ │ ├── data.py │ │ ├── flow_util.py │ │ ├── flownet.py │ │ ├── image_warp.py │ │ ├── input.py │ │ ├── losses.py │ │ ├── spatial_transformer.py │ │ ├── supervised.py │ │ ├── train.py │ │ ├── unsupervised.py │ │ └── util.py │ │ ├── ops.py │ │ └── util.py ├── __init__.py ├── data_loader.py ├── flowlib.py ├── nets.py └── utils.py ├── data ├── __init__.py ├── kitti │ ├── __init__.py │ ├── excluded_frames.txt │ ├── kitti_odom_loader.py │ ├── kitti_raw_loader.py │ ├── static_frames.txt │ ├── test_files_eigen.txt │ ├── test_files_stereo.txt │ ├── test_scenes_eigen.txt │ ├── test_scenes_stereo.txt │ └── val_files_eigen.txt └── prepare_train_data.py ├── kitti_eval ├── __init__.py ├── depth_evaluation_utils.py ├── eval_depth.py ├── eval_pose.py └── pose_evaluation_utils.py ├── misc ├── prepare.sh └── zou2018dfnet.gif ├── test_flownet_2012.py ├── test_flownet_2015.py ├── test_kitti_depth.py └── train_df.py /.gitignore: -------------------------------------------------------------------------------- 1 | *DS_Store 2 | *ignore* 3 | *pyc 4 | *__pycache__ 5 | pretrained 6 | ckpt 7 | results 8 | *pose_data 9 | dataset 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Virginia Tech Vision and Learning Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency 2 | 3 | A TensorFlow re-implementation for [DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency](https://arxiv.org/abs/1809.01649). There are some minor differences from the model described in the paper: 4 | 5 | - Model in the paper uses 2-frame as input, while this code uses 5-frame as input (you might use any odd numbers of frames as input, though you would need to tune the hyper-parameters) 6 | - FlowNet in the paper is pre-trained on SYNTHIA, while this one is pre-trained on Cityscapes 7 | 8 | Please see the [project page](http://yuliang.vision/DF-Net/) for more details. 9 | 10 | 11 | 12 | 13 | ## Prerequisites 14 | This codebase was developed and tested with the following settings: 15 | ``` 16 | Python 3.6 17 | TensorFlow 1.2.0 (this is the only supported version) 18 | g++ 4.x (this is the only supported version) 19 | CUDA 8.0 20 | Unbuntu 14.04 21 | 4 Tesla K80 GPUs (w/ 12G memory each) 22 | ``` 23 | 24 | Some Python packages you might not have 25 | ``` 26 | pypng 27 | opencv-python 28 | ``` 29 | 30 | ## Installation 31 | 1. Clone this repository 32 | ```Shell 33 | git clone git@github.com:vt-vl-lab/DF-Net.git 34 | cd DF-Net 35 | ``` 36 | 37 | 2. Prepare models and training data 38 | ```Shell 39 | chmod +x ./misc/prepare.sh 40 | ./misc/prepare.sh 41 | ``` 42 | NOTE: Frames belonging to KITTI2012/2015 train/test scenes have been excluded in the provided training set. Add these frames back to the training set would improve the performance of DepthNet. 43 | 44 | ## Data preparation (for evaluation) 45 | After accepting their license conditions, download [KITTI raw](http://www.cvlibs.net/datasets/kitti/raw_data.php), [KITTI flow 2012](http://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php?benchmark=flow), [KITTI flow 2015](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow). 46 | 47 | Then you can make soft-link for them 48 | ```Shell 49 | cd dataset 50 | mkdir KITTI 51 | cd KITTI 52 | 53 | ln -s /path/to/KITTI/raw raw 54 | ln -s /path/to/KITTI/2012 flow2012 55 | ln -s /path/to/KITTI/2015 flow2015 56 | ``` 57 | 58 | **(Optional)** You can add those KITTI2012/2015 frames back to the training set, by commenting line81~line85 in `data/kitti/kitti_raw_loader.py`, and do 59 | ``` 60 | python data/prepare_train_data.py --dataset_name='kitti_raw_eigen' --dump_root=/path/to/save/ --num_threads=4 61 | ``` 62 | 63 | ## Training 64 | ``` 65 | export CUDA_VISIBLE_DEVICES=0,1,2,3 66 | python train_df.py --dataset_dir=/path/to/your/data --checkpoint_dir=/path/to/save/your/model 67 | ``` 68 | 69 | For the first time, custom CUDA operations for FlowNet will be compiled. If you have any compliation issues, please check `core/UnFlow/src/e2eflow/ops.py` 70 | - Line31: specify your CUDA path 71 | - Line32: Add `-I $CUDA_HOME/include`, where `$CUDA_HOME` is your CUDA directory 72 | - Line38: specify your g++ version 73 | 74 | ## Testing 75 | Test DepthNet on KITTI raw (You can use the validation set to selct the best model.) 76 | ``` 77 | python test_kitti_depth.py --dataset_dir=/path/to/your/data --output_dir=/path/to/save/your/prediction --ckpt_file=/path/to/your/ckpt --split="val or test" 78 | python kitti_eval/eval_depth.py --pred_file=/path/to/your/prediction --split="val or test" 79 | ``` 80 | 81 | Test FlowNet on KITTI 2012 (Please use training set) 82 | ``` 83 | python test_flownet_2012.py --dataset_dir=/path/to/your/data --ckpt_file=/path/to/your/ckpt 84 | ``` 85 | 86 | Test FlowNet on KITTI 2015 (Please use training set) 87 | ``` 88 | python test_flownet_2015.py --dataset_dir=/path/to/your/data --ckpt_file=/path/to/your/ckpt 89 | ``` 90 | 91 | NOTE: For KITTI 2012/2015 92 | - If you want to generate visualization colormap for **training set**, you can specify `output_dir` 93 | - If you want to test on **test set** and upload it to KITTI server, you can specify `output_dir` and test on test set. 94 | 95 | ## Pre-trained model performance 96 | You should get the following numbers if you use the pre-trained model `pretrained/dfnet` 97 | 98 | 99 | DepthNet (KITTI raw test test) 100 | 101 | abs rel | sq rel | rms | log rms | a1 | a2 | a3 | 102 | ---------------|------------|------------|------------|------------|------------|------------| 103 | 0.1452 | 1.2904 | 5.6115 | 0.2194 | 0.8114 | 0.9394 | 0.9767 | 104 | 105 | 106 | FlowNet (KITTI 2012/2015 training set) 107 | 108 | KITTI 2012 EPE | KITTI 2015 EPE | KITTI 2015 F1 | 109 | ---------------|----------------|---------------| 110 | 3.1052 | 7.4482 | 0.2695 | 111 | 112 | 113 | ## Citation 114 | If you find this code useful for your research, please consider citing the following paper: 115 | 116 | @inproceedings{zou2018dfnet, 117 | author = {Zou, Yuliang and Luo, Zelun and Huang, Jia-Bin}, 118 | title = {DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency}, 119 | booktitle = {European Conference on Computer Vision}, 120 | year = {2018} 121 | } 122 | 123 | 124 | ## Acknowledgement 125 | Codes are heavily borrowed from several great work, including [SfMLearner](https://github.com/tinghuiz/SfMLearner), [monodepth](https://github.com/mrharicot/monodepth), and [UnFlow](https://github.com/simonmeister/UnFlow). We thank [Shih-Yang Su](https://github.com/LemonATsu) for the code review. 126 | -------------------------------------------------------------------------------- /core/BaseLearner.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import time 4 | import math 5 | import numpy as np 6 | import tensorflow as tf 7 | import tensorflow.contrib.slim as slim 8 | 9 | from .data_loader import DataLoader 10 | from .nets import * 11 | from .utils import * 12 | from .flowlib import flow_to_image 13 | 14 | class BaseLearner(object): 15 | def __init__(self): 16 | pass 17 | 18 | def build_train_graph(self): 19 | raise NotImplementedError 20 | 21 | def collect_summaries(self): 22 | raise NotImplementedError 23 | 24 | def train(self, opt): 25 | raise NotImplementedError 26 | 27 | # Credit: https://github.com/mrharicot/monodepth/blob/master/average_gradients.py 28 | def average_gradients(self, tower_grads): 29 | average_grads = [] 30 | for grad_and_vars in zip(*tower_grads): 31 | grads = [] 32 | for g, _ in grad_and_vars: 33 | if g is not None: 34 | expanded_g = tf.expand_dims(g, 0) 35 | grads.append(expanded_g) 36 | if grads != []: 37 | grad = tf.concat(axis=0, values=grads) 38 | grad = tf.reduce_mean(grad, 0) 39 | v = grad_and_vars[0][1] 40 | grad_and_var = (grad, v) 41 | average_grads.append(grad_and_var) 42 | return average_grads 43 | 44 | def get_dp_flow(self, opt, s, src_pixel_coords): 45 | x_base = tf.range(int(opt.img_width/(2**s))) 46 | y_base = tf.range(int(opt.img_height/(2**s))) 47 | x_base = tf.stack([x_base]*int(opt.img_height/(2**s)), axis=0) 48 | y_base = tf.transpose(tf.stack([y_base]*int(opt.img_width/(2**s)), axis=0)) 49 | 50 | dp_flow_x = src_pixel_coords[:, :, :, 0] - tf.cast(x_base, tf.float32) 51 | dp_flow_y = src_pixel_coords[:, :, :, 1] - tf.cast(y_base, tf.float32) 52 | dp_flow = tf.stack([dp_flow_x, dp_flow_y], axis=-1) 53 | return dp_flow 54 | 55 | def get_in_range_mask(self, opt, s, flow): 56 | # 1 if the displacement is within the image 57 | x_min = 0.0 58 | x_max = int(opt.img_width/(2**s))-1 59 | y_min = 0.0 60 | y_max = int(opt.img_height/(2**s))-1 61 | 62 | x_base = tf.range(int(opt.img_width/(2**s))) 63 | y_base = tf.range(int(opt.img_height/(2**s))) 64 | x_base = tf.stack([x_base]*int(opt.img_height/(2**s)), axis=0) 65 | y_base = tf.transpose(tf.stack([y_base]*int(opt.img_width/(2**s)), axis=0)) 66 | 67 | pos_x = flow[:,:,:,0]+tf.cast(x_base, tf.float32) 68 | pos_y = flow[:,:,:,1]+tf.cast(y_base, tf.float32) 69 | inside_x = tf.logical_and(pos_x <= tf.cast(x_max, tf.float32), pos_x >= x_min) 70 | inside_y = tf.logical_and(pos_y <= tf.cast(y_max, tf.float32), pos_y >= y_min) 71 | inside = tf.expand_dims(tf.logical_and(inside_x, inside_y), axis=-1) 72 | return tf.stop_gradient(tf.cast(inside, tf.float32)) 73 | 74 | def get_fb_mask(self, flow, warped_flow, alpha1=0.01, alpha2=0.5): 75 | temp1 = tf.reduce_sum(tf.square(flow+warped_flow), axis=3, keep_dims=True) 76 | temp2 = tf.reduce_sum(tf.square(flow), axis=3, keep_dims=True)+tf.reduce_sum(tf.square(warped_flow), axis=3, keep_dims=True) 77 | occ_mask = tf.greater(temp1, alpha1*temp2+alpha2) 78 | return tf.stop_gradient(tf.cast(occ_mask, tf.float32)) 79 | 80 | # Crecit: https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/losses.py 81 | def ternary_loss(self, im1, im2_warped, valid_mask, max_distance=1): 82 | patch_size = 2*max_distance+1 83 | with tf.variable_scope('ternary_loss'): 84 | def _ternary_transform(image): 85 | intensities = tf.image.rgb_to_grayscale(image) * 255 86 | out_channels = patch_size * patch_size 87 | w = np.eye(out_channels).reshape((patch_size, patch_size, 1, out_channels)) 88 | weights = tf.constant(w, dtype=tf.float32) 89 | patches = tf.nn.conv2d(intensities, weights, strides=[1, 1, 1, 1], padding='SAME') 90 | 91 | transf = patches - intensities 92 | transf_norm = transf / tf.sqrt(0.81 + tf.square(transf)) 93 | return transf_norm 94 | 95 | def _hamming_distance(t1, t2): 96 | dist = tf.square(t1 - t2) 97 | dist_norm = dist / (0.1 + dist) 98 | dist_sum = tf.reduce_sum(dist_norm, 3, keep_dims=True) 99 | return dist_sum 100 | 101 | t1 = _ternary_transform(im1) 102 | t2 = _ternary_transform(im2_warped) 103 | dist = _hamming_distance(t1, t2) 104 | 105 | transform_mask = self.create_mask(valid_mask, [[max_distance, max_distance], [max_distance, max_distance]]) 106 | return self.charbonnier_loss(dist, valid_mask * transform_mask), dist 107 | 108 | def charbonnier_loss(self, x, mask=None, truncate=None, alpha=0.45, beta=1.0, epsilon=0.001): 109 | with tf.variable_scope('charbonnier_loss'): 110 | batch, height, width, channels = tf.unstack(tf.shape(x)) 111 | normalization = tf.cast(batch * height * width * channels, tf.float32) 112 | 113 | error = tf.pow(tf.square(x * beta) + tf.square(epsilon), alpha) 114 | 115 | if mask is not None: 116 | error = tf.multiply(mask, error) 117 | if truncate is not None: 118 | error = tf.minimum(error, truncate) 119 | 120 | return tf.reduce_sum(error) / normalization 121 | 122 | def create_mask(self, tensor, paddings): 123 | with tf.variable_scope('create_mask'): 124 | shape = tf.shape(tensor) 125 | inner_width = shape[1] - (paddings[0][0] + paddings[0][1]) 126 | inner_height = shape[2] - (paddings[1][0] + paddings[1][1]) 127 | inner = tf.ones([inner_width, inner_height]) 128 | 129 | mask2d = tf.pad(inner, paddings) 130 | mask3d = tf.tile(tf.expand_dims(mask2d, 0), [shape[0], 1, 1]) 131 | mask4d = tf.expand_dims(mask3d, 3) 132 | return tf.stop_gradient(mask4d) 133 | 134 | # Credit: https://github.com/mrharicot/monodepth/blob/master/monodepth_model.py 135 | def SSIM(self, x, y): 136 | C1 = 0.01 ** 2 137 | C2 = 0.03 ** 2 138 | 139 | mu_x = slim.avg_pool2d(x, 3, 1, 'VALID') 140 | mu_y = slim.avg_pool2d(y, 3, 1, 'VALID') 141 | 142 | sigma_x = slim.avg_pool2d(x ** 2, 3, 1, 'VALID') - mu_x ** 2 143 | sigma_y = slim.avg_pool2d(y ** 2, 3, 1, 'VALID') - mu_y ** 2 144 | sigma_xy = slim.avg_pool2d(x * y , 3, 1, 'VALID') - mu_x * mu_y 145 | 146 | SSIM_n = (2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2) 147 | SSIM_d = (mu_x ** 2 + mu_y ** 2 + C1) * (sigma_x + sigma_y + C2) 148 | SSIM = SSIM_n / SSIM_d 149 | 150 | return tf.clip_by_value((1 - SSIM) / 2, 0, 1) 151 | 152 | def compute_edge_aware_smooth_loss(self, pred_disp, img): 153 | """ 154 | Edge-aware L1-norm on first-order gradient 155 | """ 156 | def gradient(pred): 157 | D_dx = -pred[:, :, 1:, :] + pred[:, :, :-1, :] 158 | D_dy = -pred[:, 1:, :, :] + pred[:, :-1, :, :] 159 | return D_dx, D_dy 160 | img_dx, img_dy = gradient(img) 161 | disp_dx, disp_dy = gradient(pred_disp) 162 | 163 | weight_x = tf.exp(-tf.reduce_mean(tf.abs(img_dx), 3, keep_dims=True)) 164 | weight_y = tf.exp(-tf.reduce_mean(tf.abs(img_dy), 3, keep_dims=True)) 165 | 166 | loss = tf.reduce_mean(weight_x*tf.abs(disp_dx)) + tf.reduce_mean(weight_y*tf.abs(disp_dy)) 167 | return loss 168 | 169 | def compute_smooth_loss(self, pred_disp): 170 | """ 171 | L1-norm on second-order gradient 172 | """ 173 | def gradient(pred): 174 | D_dy = pred[:, 1:, :, :] - pred[:, :-1, :, :] 175 | D_dx = pred[:, :, 1:, :] - pred[:, :, :-1, :] 176 | return D_dx, D_dy 177 | dx, dy = gradient(pred_disp) 178 | dx2, dxdy = gradient(dx) 179 | dydx, dy2 = gradient(dy) 180 | return tf.reduce_mean(tf.abs(dx2)) + \ 181 | tf.reduce_mean(tf.abs(dxdy)) + \ 182 | tf.reduce_mean(tf.abs(dydx)) + \ 183 | tf.reduce_mean(tf.abs(dy2)) 184 | 185 | def flow_to_image_tf(self, flow): 186 | im_stack = [] 187 | for i in range(self.opt.batch_size//self.opt.num_gpus): 188 | temp = tf.py_func(flow_to_image, [flow[i,:,:,:]], tf.uint8) 189 | im_stack.append(temp) 190 | return tf.stack(im_stack, axis=0) 191 | 192 | # Credit: https://github.com/yzcjtr/GeoNet/blob/master/geonet_model.py 193 | def spatial_normalize(self, disp): 194 | _, curr_h, curr_w, curr_c = disp.get_shape().as_list() 195 | disp_mean = tf.reduce_mean(disp, axis=[1,2,3], keep_dims=True) 196 | disp_mean = tf.tile(disp_mean, [1, curr_h, curr_w, curr_c]) 197 | return disp/disp_mean 198 | 199 | def build_depth_test_graph(self): 200 | input_uint8 = tf.placeholder(tf.uint8, [self.batch_size, 201 | self.img_height, self.img_width, 3], name='raw_input') 202 | input_mc = self.preprocess_image(input_uint8) 203 | with tf.name_scope("depth_prediction"): 204 | pred_disp, depth_net_endpoints = disp_net_res50( 205 | input_mc, is_training=False) 206 | pred_depth = [1./disp for disp in pred_disp] 207 | pred_depth = pred_depth[0] 208 | self.inputs = input_uint8 209 | self.pred_depth = pred_depth 210 | self.depth_epts = depth_net_endpoints 211 | 212 | # Forward-backward 213 | def build_pose_fb_test_graph(self): 214 | input_uint8 = tf.placeholder(tf.uint8, [self.batch_size, 215 | self.img_height, self.img_width * self.seq_length, 3], 216 | name='raw_input') 217 | input_mc = self.preprocess_image(input_uint8) 218 | loader = DataLoader() 219 | tgt_image, src_image_stack = \ 220 | loader.batch_unpack_image_sequence( 221 | input_mc, self.img_height, self.img_width, self.num_source) 222 | with tf.name_scope("pose_prediction"): 223 | pred_poses, _ = pose_net_fb( 224 | tgt_image, src_image_stack, is_training=False) 225 | self.inputs = input_uint8 226 | self.pred_poses = pred_poses[:, :, :6] # Only the first half is used 227 | 228 | def preprocess_image(self, image, is_dp=True): 229 | # Assuming input image is uint8 230 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 231 | if is_dp: 232 | return image * 2. -1. 233 | else: 234 | mean = [104.920005, 110.1753, 114.785955] 235 | out = [] 236 | for i in range(0, int(image.shape[-1]), 3): 237 | r = image[:,:,:,i] - mean[0]/255. 238 | g = image[:,:,:,i+1] - mean[1]/255. 239 | b = image[:,:,:,i+2] - mean[2]/255. 240 | out += [r, g, b] 241 | return tf.stack(out, axis=-1) 242 | 243 | def minus_imagenet_rgb(self, image): 244 | mean = [122.7717, 115.9465, 102.9801] 245 | image = tf.cast(image, tf.float32) 246 | out = [] 247 | for i in range(0, int(image.shape[-1]), 3): 248 | r = image[:,:,:,i] - mean[0] 249 | g = image[:,:,:,i+1] - mean[1] 250 | b = image[:,:,:,i+2] - mean[2] 251 | out += [r, g, b] 252 | return tf.stack(out, axis=-1) 253 | 254 | def deprocess_image(self, image, is_dp=True): 255 | # Assuming input image is float32 256 | if is_dp: 257 | image = (image + 1.)/2. 258 | else: 259 | mean = [104.920005, 110.1753, 114.785955] 260 | r = image[:,:,:,0] + mean[0]/255. 261 | g = image[:,:,:,1] + mean[1]/255. 262 | b = image[:,:,:,2] + mean[2]/255. 263 | image = tf.stack([r, g, b], axis=-1) 264 | return tf.image.convert_image_dtype(image, dtype=tf.uint8) 265 | 266 | def setup_inference(self, 267 | img_height, 268 | img_width, 269 | mode, 270 | seq_length=3, 271 | batch_size=1): 272 | self.img_height = img_height 273 | self.img_width = img_width 274 | self.mode = mode 275 | self.batch_size = batch_size 276 | if self.mode == 'depth': 277 | self.build_depth_test_graph() 278 | if self.mode == 'pose': 279 | self.seq_length = seq_length 280 | self.num_source = seq_length - 1 281 | self.build_pose_fb_test_graph() 282 | 283 | def inference(self, inputs, sess, mode='depth'): 284 | fetches = {} 285 | if mode == 'depth': 286 | fetches['depth'] = self.pred_depth 287 | if mode == 'pose': 288 | fetches['pose'] = self.pred_poses 289 | results = sess.run(fetches, feed_dict={self.inputs:inputs}) 290 | return results 291 | 292 | def save(self, sess, checkpoint_dir, step): 293 | model_name = 'model' 294 | print(" [*] Saving checkpoint to %s..." % checkpoint_dir) 295 | if step == 'latest': 296 | self.saver.save(sess, 297 | os.path.join(checkpoint_dir, model_name + '.latest')) 298 | else: 299 | self.saver.save(sess, 300 | os.path.join(checkpoint_dir, model_name), 301 | global_step=step) 302 | 303 | if __name__ == '__main__': 304 | model = BaseLearner() 305 | -------------------------------------------------------------------------------- /core/UnFlow/__init__.py: -------------------------------------------------------------------------------- 1 | from .src import flownet 2 | -------------------------------------------------------------------------------- /core/UnFlow/ops/backward_warp_op.cc: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | 3 | #include 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 5 | #include "tensorflow/core/framework/op_kernel.h" 6 | #include "tensorflow/core/framework/register_types.h" 7 | #include "tensorflow/core/framework/tensor.h" 8 | #include "tensorflow/core/framework/tensor_shape.h" 9 | #include "tensorflow/core/framework/types.h" 10 | #include "tensorflow/core/lib/core/status.h" 11 | #include "tensorflow/core/platform/logging.h" 12 | #include "tensorflow/core/framework/op.h" 13 | #include "tensorflow/core/framework/shape_inference.h" 14 | #include "tensorflow/core/framework/common_shape_fns.h" 15 | 16 | // TODO assert input flow channel count = 2, assert matching numbers in all other dims 17 | 18 | typedef Eigen::GpuDevice GPUDevice; 19 | 20 | using namespace tensorflow; 21 | 22 | void BackwardWarp(const GPUDevice& d, 23 | typename TTypes::ConstTensor images, 24 | typename TTypes::ConstTensor flows, 25 | typename TTypes::Tensor output); 26 | 27 | void BackwardWarpGrad(const GPUDevice& d, 28 | typename TTypes::ConstTensor input_grad, 29 | typename TTypes::ConstTensor input_images, 30 | typename TTypes::ConstTensor flows, 31 | typename TTypes::Tensor output_grad); 32 | 33 | class BackwardWarpOp : public OpKernel { 34 | public: 35 | explicit BackwardWarpOp(OpKernelConstruction* context) : OpKernel(context) {} 36 | 37 | void Compute(OpKernelContext* context) override { 38 | const Tensor& input_images = context->input(0); 39 | const Tensor& input_flows = context->input(1); 40 | 41 | Tensor* output_images = NULL; 42 | OP_REQUIRES_OK(context, context->allocate_output(0, input_images.shape(), 43 | &output_images)); 44 | 45 | typename TTypes::ConstTensor image_data = input_images.tensor(); 46 | typename TTypes::ConstTensor flow_data = input_flows.tensor(); 47 | typename TTypes::Tensor output_data = output_images->tensor(); 48 | 49 | BackwardWarp(context->eigen_device(), 50 | image_data, flow_data, output_data); 51 | } 52 | }; 53 | 54 | class BackwardWarpOpGrad : public OpKernel { 55 | public: 56 | explicit BackwardWarpOpGrad(OpKernelConstruction* context) : OpKernel(context) {} 57 | 58 | void Compute(OpKernelContext* context) override { 59 | const Tensor& input = context->input(0); 60 | const Tensor& original_images = context->input(1); 61 | const Tensor& original_flows = context->input(2); 62 | 63 | Tensor* output = NULL; 64 | OP_REQUIRES_OK(context, context->allocate_output(0, original_flows.shape(), 65 | &output)); 66 | 67 | typename TTypes::ConstTensor input_data = input.tensor(); 68 | typename TTypes::ConstTensor flow_data = original_flows.tensor(); 69 | typename TTypes::ConstTensor image_data = original_images.tensor(); 70 | typename TTypes::Tensor output_data = output->tensor(); 71 | 72 | BackwardWarpGrad(context->eigen_device(), 73 | input_data, image_data, flow_data, output_data); 74 | } 75 | }; 76 | 77 | REGISTER_OP("BackwardWarp") 78 | .Input("images: float") 79 | .Input("flows: float") 80 | .Output("warped_images: float") 81 | .SetShapeFn(shape_inference::UnchangedShape); 82 | 83 | REGISTER_OP("BackwardWarpGrad") 84 | .Input("grads: float") 85 | .Input("original_images: float") 86 | .Input("original_flows: float") 87 | .Output("output: float") 88 | .SetShapeFn([](shape_inference::InferenceContext* c) { 89 | c->set_output(0, c->input(2)); 90 | return Status::OK(); 91 | }); 92 | 93 | #if GOOGLE_CUDA 94 | 95 | REGISTER_KERNEL_BUILDER(Name("BackwardWarp").Device(DEVICE_GPU), BackwardWarpOp); 96 | REGISTER_KERNEL_BUILDER(Name("BackwardWarpGrad").Device(DEVICE_GPU), BackwardWarpOpGrad); 97 | 98 | #endif // GOOGLE_CUDA 99 | -------------------------------------------------------------------------------- /core/UnFlow/ops/backward_warp_op.cu.cc: -------------------------------------------------------------------------------- 1 | #if GOOGLE_CUDA 2 | 3 | #define EIGEN_USE_GPU 4 | 5 | #include "tensorflow/core/framework/register_types.h" 6 | #include "tensorflow/core/framework/tensor_types.h" 7 | #include "tensorflow/core/platform/types.h" 8 | #include "tensorflow/core/util/cuda_kernel_helper.h" 9 | 10 | using namespace tensorflow; 11 | 12 | typedef Eigen::GpuDevice GPUDevice; 13 | 14 | __global__ void BackwardWarpKernel(const int32 nthreads, 15 | const float* images, const float* flows, 16 | int batch, int height, int width, int channels, 17 | float* output) { 18 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) { 19 | // out_idx = src_x + width * (src_y + height * b) 20 | int idx = out_idx; 21 | const int src_x = idx % width; 22 | idx /= width; 23 | const int src_y = idx % height; 24 | const int b = idx / height; 25 | 26 | const int flow_index = out_idx * 2; 27 | const float x = src_x + flows[flow_index]; 28 | const float y = src_y + flows[flow_index + 1]; 29 | 30 | const int x0 = floorf(x); 31 | const int x1 = x0 + 1; 32 | const int y0 = floorf(y); 33 | const int y1 = y0 + 1; 34 | 35 | const float w_right = x - x0; 36 | const float w_left = x1 - x; 37 | const float w_bottom = y - y0; 38 | const float w_top = y1 - y; 39 | 40 | for(int c = 0; c < channels; ++c) { 41 | float sum = 0.0; 42 | 43 | #define IMG(iy, ix) images[c + channels * (ix + width * (iy + height * b))] 44 | 45 | // top-left neighbor 46 | if(x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) { 47 | sum += w_left * w_top * IMG(y0, x0); 48 | } 49 | 50 | // top-right neigbor 51 | if(x1 >= 0 && x1 < width && y0 >= 0 && y0 < height) { 52 | sum += w_right * w_top * IMG(y0, x1); 53 | } 54 | 55 | // bottom-left neighbor 56 | if(x0 >= 0 && x0 < width && y1 >= 0 && y1 < height) { 57 | sum += w_left * w_bottom * IMG(y1, x0); 58 | } 59 | 60 | // bottom-right neighbor 61 | if(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height) { 62 | sum += w_right * w_bottom * IMG(y1, x1); 63 | } 64 | #undef IMG 65 | output[out_idx * channels + c] = sum; 66 | } 67 | } 68 | } 69 | 70 | __global__ void BackwardWarpGradKernel(const int32 nthreads, 71 | const float* input_grad, 72 | const float* input_images, const float* flows, 73 | int batch, int height, int width, int channels, 74 | float* output_grad) { 75 | CUDA_1D_KERNEL_LOOP(in_idx, nthreads) { 76 | // in_idx = x + width * (y + height * b) 77 | int idx = in_idx; 78 | const int src_x = idx % width; 79 | idx /= width; 80 | const int src_y = idx % height; 81 | const int b = idx / height; 82 | 83 | const int flow_index = in_idx * 2; 84 | const float x = src_x + flows[flow_index]; 85 | const float y = src_y + flows[flow_index + 1]; 86 | 87 | const int x0 = floorf(x); 88 | const int x1 = x0 + 1; 89 | const int y0 = floorf(y); 90 | const int y1 = y0 + 1; 91 | 92 | const float w_right = x - x0; 93 | const float w_left = x1 - x; 94 | const float w_bottom = y - y0; 95 | const float w_top = y1 - y; 96 | 97 | float du = 0.0; 98 | float dv = 0.0; 99 | 100 | for(int c = 0; c < channels; ++c) { 101 | float px; 102 | float din = input_grad[c + channels * in_idx]; 103 | 104 | #define IMG(iy, ix) input_images[c + channels * (ix + width * (iy + height * b))] 105 | 106 | // top-left neighbor 107 | if(x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) { 108 | px = IMG(y0, x0) * din; 109 | du -= w_top * px; 110 | dv -= w_left * px; 111 | } 112 | 113 | // top-right neigbor 114 | if(x1 >= 0 && x1 < width && y0 >= 0 && y0 < height) { 115 | px = IMG(y0, x1) * din; 116 | du += w_top * px; 117 | dv -= w_right * px; 118 | } 119 | 120 | // bottom-left neighbor 121 | if(x0 >= 0 && x0 < width && y1 >= 0 && y1 < height) { 122 | px = IMG(y1, x0) * din; 123 | du -= w_bottom * px; 124 | dv += w_left * px; 125 | } 126 | 127 | // bottom-right neighbor 128 | if(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height) { 129 | px = IMG(y1, x1) * din; 130 | du += w_bottom * px; 131 | dv += w_right * px; 132 | } 133 | #undef IMG 134 | } 135 | output_grad[in_idx * 2] = du; 136 | output_grad[in_idx * 2 + 1] = dv; 137 | } 138 | } 139 | 140 | void BackwardWarp(const GPUDevice& d, 141 | typename TTypes::ConstTensor images, 142 | typename TTypes::ConstTensor flows, 143 | typename TTypes::Tensor output) { 144 | const int batch = images.dimension(0); 145 | const int height = images.dimension(1); 146 | const int width = images.dimension(2); 147 | const int channels = images.dimension(3); 148 | 149 | const int total_count = batch * height * width; 150 | if (total_count == 0) return; 151 | 152 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); 153 | BackwardWarpKernel 154 | <<>>( 155 | config.virtual_thread_count, images.data(), flows.data(), 156 | batch, height, width, channels, 157 | output.data()); 158 | } 159 | 160 | void BackwardWarpGrad(const GPUDevice& d, 161 | typename TTypes::ConstTensor input_grad, 162 | typename TTypes::ConstTensor input_images, 163 | typename TTypes::ConstTensor flows, 164 | typename TTypes::Tensor output_grad) { 165 | const int batch = input_grad.dimension(0); 166 | const int height = input_grad.dimension(1); 167 | const int width = input_grad.dimension(2); 168 | const int channels = input_grad.dimension(3); 169 | 170 | int total_count; 171 | CudaLaunchConfig config; 172 | 173 | // Initialize output_grad with all zeros. 174 | total_count = batch * height * width; 175 | if (total_count == 0) return; 176 | config = GetCudaLaunchConfig(total_count, d); 177 | SetZero<<>>( 178 | config.virtual_thread_count, output_grad.data()); 179 | 180 | // Accumulate. 181 | total_count = batch * height * width; 182 | config = GetCudaLaunchConfig(total_count, d); 183 | BackwardWarpGradKernel 184 | <<>>( 185 | config.virtual_thread_count, input_grad.data(), 186 | input_images.data(), flows.data(), 187 | batch, height, width, channels, 188 | output_grad.data()); 189 | } 190 | 191 | 192 | #endif // GOOGLE_CUDA 193 | -------------------------------------------------------------------------------- /core/UnFlow/ops/correlation_op.cc: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | 3 | #include 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 5 | #include "tensorflow/core/framework/op_kernel.h" 6 | #include "tensorflow/core/framework/register_types.h" 7 | #include "tensorflow/core/framework/tensor.h" 8 | #include "tensorflow/core/framework/tensor_shape.h" 9 | #include "tensorflow/core/framework/types.h" 10 | #include "tensorflow/core/lib/core/status.h" 11 | #include "tensorflow/core/platform/logging.h" 12 | #include "tensorflow/core/framework/op.h" 13 | #include "tensorflow/core/framework/shape_inference.h" 14 | #include "tensorflow/core/framework/common_shape_fns.h" 15 | 16 | #include "correlation_op.h" 17 | 18 | typedef Eigen::GpuDevice GPUDevice; 19 | 20 | using namespace tensorflow; 21 | 22 | void Correlation(const GPUDevice& d, 23 | typename TTypes::ConstTensor input_0, 24 | typename TTypes::ConstTensor input_1, 25 | typename TTypes::Tensor output, 26 | typename TTypes::Tensor padded_0, 27 | typename TTypes::Tensor padded_1, 28 | CorrelationState params); 29 | 30 | void CorrelationGrad(const GPUDevice& d, 31 | typename TTypes::ConstTensor input_grad, 32 | typename TTypes::ConstTensor padded_0, 33 | typename TTypes::ConstTensor padded_1, 34 | typename TTypes::Tensor output_grad_0, 35 | typename TTypes::Tensor output_grad_1, 36 | CorrelationState params); 37 | 38 | class CorrelationOp : public OpKernel { 39 | public: 40 | explicit CorrelationOp(OpKernelConstruction* context) 41 | : OpKernel(context), attrs(context) {} 42 | 43 | void Compute(OpKernelContext* context) override { 44 | const Tensor& input_0 = context->input(0); 45 | const Tensor& input_1 = context->input(1); 46 | 47 | OP_REQUIRES(context, input_0.shape() == input_1.shape(), 48 | errors::InvalidArgument("Input shapes have to be the same")); 49 | 50 | typename TTypes::ConstTensor input_0_data = input_0.tensor(); 51 | typename TTypes::ConstTensor input_1_data = input_1.tensor(); 52 | 53 | const int batch = input_0_data.dimension(0); 54 | const int in_channels = input_0_data.dimension(1); 55 | const int in_height = input_0_data.dimension(2); 56 | const int in_width = input_0_data.dimension(3); 57 | 58 | CorrelationState st(attrs, in_height, in_width, in_channels); 59 | 60 | OP_REQUIRES(context, st.out_width * st.out_height > 0, 61 | errors::InvalidArgument("Invalid correlation settings")); 62 | 63 | Tensor* output = NULL; 64 | TensorShape output_shape({batch, st.out_channels, st.out_height, st.out_width}); 65 | OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); 66 | 67 | Tensor* padded_0 = NULL; 68 | Tensor* padded_1 = NULL; 69 | TensorShape padded_shape({batch, st.padded_height, st.padded_width, in_channels}); 70 | OP_REQUIRES_OK(context, context->allocate_output(1, padded_shape, &padded_0)); 71 | OP_REQUIRES_OK(context, context->allocate_output(2, padded_shape, &padded_1)); 72 | 73 | typename TTypes::Tensor output_data = output->tensor(); 74 | typename TTypes::Tensor padded_0_data = padded_0->tensor(); 75 | typename TTypes::Tensor padded_1_data = padded_1->tensor(); 76 | 77 | Correlation(context->eigen_device(), 78 | input_0_data, input_1_data, output_data, 79 | padded_0_data, padded_1_data, 80 | st); 81 | } 82 | 83 | private: 84 | CorrelationAttrs attrs; 85 | }; 86 | 87 | class CorrelationOpGrad : public OpKernel { 88 | public: 89 | explicit CorrelationOpGrad(OpKernelConstruction* context) 90 | : OpKernel(context), attrs(context) {} 91 | 92 | void Compute(OpKernelContext* context) override { 93 | const Tensor& input_grad = context->input(0); 94 | const Tensor& input_0 = context->input(1); 95 | const Tensor& input_1 = context->input(2); 96 | const Tensor& padded_0 = context->input(3); 97 | const Tensor& padded_1 = context->input(4); 98 | 99 | typename TTypes::ConstTensor input_grad_data = input_grad.tensor(); 100 | typename TTypes::ConstTensor input_0_data = input_0.tensor(); 101 | //typename TTypes::ConstTensor input_1_data = input_1.tensor(); 102 | typename TTypes::ConstTensor padded_0_data = padded_0.tensor(); 103 | typename TTypes::ConstTensor padded_1_data = padded_1.tensor(); 104 | 105 | const int in_channels = input_0_data.dimension(1); 106 | const int in_height = input_0_data.dimension(2); 107 | const int in_width = input_0_data.dimension(3); 108 | 109 | CorrelationState st(attrs, in_height, in_width, in_channels); 110 | 111 | Tensor* output_grad_0 = NULL; 112 | OP_REQUIRES_OK(context, context->allocate_output(0, input_0.shape(), 113 | &output_grad_0)); 114 | Tensor* output_grad_1 = NULL; 115 | OP_REQUIRES_OK(context, context->allocate_output(1, input_0.shape(), 116 | &output_grad_1)); 117 | 118 | typename TTypes::Tensor output_grad_0_data = output_grad_0->tensor(); 119 | typename TTypes::Tensor output_grad_1_data = output_grad_1->tensor(); 120 | 121 | CorrelationGrad(context->eigen_device(), 122 | input_grad_data, 123 | padded_0_data, padded_1_data, 124 | output_grad_0_data, output_grad_1_data, 125 | st); 126 | } 127 | private: 128 | CorrelationAttrs attrs; 129 | }; 130 | 131 | using shape_inference::DimensionHandle;; 132 | 133 | REGISTER_OP("Correlation") 134 | .Input("input_0: float") 135 | .Input("input_1: float") 136 | .Attr("kernel_size: int = 1") 137 | .Attr("max_displacement: int = 20") 138 | .Attr("pad: int = 20") 139 | .Attr("stride_1: int = 1") 140 | .Attr("stride_2: int = 2") 141 | .Output("correlation: float") 142 | .Output("padded_0: float") 143 | .Output("padded_1: float") 144 | .SetShapeFn([](shape_inference::InferenceContext* c) { 145 | CorrelationAttrs attrs; 146 | c->GetAttr("kernel_size", &attrs.kernel_size); 147 | c->GetAttr("max_displacement", &attrs.max_displacement); 148 | c->GetAttr("pad", &attrs.pad_size); 149 | c->GetAttr("stride_1", &attrs.stride_1); 150 | c->GetAttr("stride_2", &attrs.stride_2); 151 | 152 | DimensionHandle batch = c->Dim(c->input(0), 0); 153 | 154 | //padded_height = in_height + 2 * pad_size; 155 | //padded_width = in_width + 2 * pad_size; 156 | //kernel_radius = (kernel_size - 1) / 2; 157 | //border_size = max_displacement + kernel_radius; 158 | int neighborhood_grid_radius = attrs.max_displacement / attrs.stride_2; 159 | int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1; 160 | //out_width = ceil((float)(padded_width - border_size *2) / (float)stride_1); 161 | //out_height = ceil((float)(padded_height - border_size *2) / (float)stride_1); 162 | int out_channels = neighborhood_grid_width * neighborhood_grid_width; 163 | 164 | // TODO: support passing on output width and height 165 | 166 | c->set_output(0, c->MakeShape({batch, out_channels, c->UnknownDim(), c->UnknownDim()})); 167 | return Status::OK(); 168 | }); 169 | 170 | REGISTER_OP("CorrelationGrad") 171 | .Input("input_grad: float") 172 | .Input("original_input_0: float") 173 | .Input("original_input_1: float") 174 | .Input("padded_0: float") 175 | .Input("padded_1: float") 176 | .Attr("kernel_size: int = 1") 177 | .Attr("max_displacement: int = 20") 178 | .Attr("pad: int = 20") 179 | .Attr("stride_1: int = 1") 180 | .Attr("stride_2: int = 2") 181 | .Output("output_grad_0: float") 182 | .Output("output_grad_1: float") 183 | .SetShapeFn([](shape_inference::InferenceContext* c) { 184 | c->set_output(0, c->input(1)); 185 | c->set_output(1, c->input(2)); 186 | return Status::OK(); 187 | }); 188 | 189 | #if GOOGLE_CUDA 190 | 191 | REGISTER_KERNEL_BUILDER(Name("Correlation").Device(DEVICE_GPU), CorrelationOp); 192 | REGISTER_KERNEL_BUILDER(Name("CorrelationGrad").Device(DEVICE_GPU), CorrelationOpGrad); 193 | 194 | #endif // GOOGLE_CUDA 195 | -------------------------------------------------------------------------------- /core/UnFlow/ops/correlation_op.h: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | 3 | #include "tensorflow/core/framework/op_kernel.h" 4 | #include "tensorflow/core/framework/op.h" 5 | 6 | using namespace tensorflow; 7 | 8 | struct CorrelationAttrs { 9 | CorrelationAttrs(OpKernelConstruction* c) { 10 | OP_REQUIRES_OK(c, c->GetAttr("kernel_size", &kernel_size)); 11 | OP_REQUIRES_OK(c, c->GetAttr("max_displacement", &max_displacement)); 12 | OP_REQUIRES_OK(c, c->GetAttr("pad", &pad_size)); 13 | OP_REQUIRES_OK(c, c->GetAttr("stride_1", &stride_1)); 14 | OP_REQUIRES_OK(c, c->GetAttr("stride_2", &stride_2)); 15 | 16 | OP_REQUIRES(c, kernel_size % 2 != 0, 17 | errors::InvalidArgument("kernel_size must be odd")); 18 | } 19 | CorrelationAttrs() {} 20 | 21 | int pad_size; 22 | int stride_1; 23 | int stride_2; 24 | int max_displacement; 25 | int kernel_size; 26 | }; 27 | 28 | struct CorrelationState { 29 | CorrelationState(CorrelationAttrs attrs, int in_height, int in_width, int in_channels) { 30 | pad_size = attrs.pad_size; 31 | stride_1 = attrs.stride_1; 32 | stride_2 = attrs.stride_2; 33 | max_displacement = attrs.max_displacement; 34 | kernel_size = attrs.kernel_size; 35 | 36 | padded_height = in_height + 2 * pad_size; 37 | padded_width = in_width + 2 * pad_size; 38 | 39 | // Compute size of unreachable border region (on each side) 40 | kernel_radius = (kernel_size - 1) / 2; 41 | border_size = max_displacement + kernel_radius; 42 | 43 | // Given a center position in image 1, how many displaced positions in -x / +x 44 | // direction do we consider in image 2 (neighborhoodGridWidth): 45 | neighborhood_grid_radius = max_displacement / stride_2; 46 | neighborhood_grid_width = neighborhood_grid_radius * 2 + 1; 47 | 48 | out_width = ceil((float)(padded_width - border_size *2) / (float)stride_1); 49 | out_height = ceil((float)(padded_height - border_size *2) / (float)stride_1); 50 | // Top Channels amount to displacement combinations in X and Y direction: 51 | out_channels = neighborhood_grid_width * neighborhood_grid_width; 52 | } 53 | 54 | int pad_size; 55 | int stride_1; 56 | int stride_2; 57 | int kernel_radius; 58 | int max_displacement; 59 | int kernel_size; 60 | int neighborhood_grid_radius; 61 | int neighborhood_grid_width; 62 | int padded_height; 63 | int padded_width; 64 | int border_size; 65 | int out_height; 66 | int out_width; 67 | int out_channels; 68 | }; 69 | -------------------------------------------------------------------------------- /core/UnFlow/ops/downsample_op.cc: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | 3 | #include 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 5 | #include "tensorflow/core/framework/op_kernel.h" 6 | #include "tensorflow/core/framework/register_types.h" 7 | #include "tensorflow/core/framework/tensor.h" 8 | #include "tensorflow/core/framework/tensor_shape.h" 9 | #include "tensorflow/core/framework/types.h" 10 | #include "tensorflow/core/lib/core/status.h" 11 | #include "tensorflow/core/platform/logging.h" 12 | #include "tensorflow/core/framework/op.h" 13 | #include "tensorflow/core/framework/shape_inference.h" 14 | #include "tensorflow/core/framework/common_shape_fns.h" 15 | 16 | using namespace tensorflow; 17 | 18 | typedef Eigen::GpuDevice GPUDevice; 19 | 20 | void Downsample(const GPUDevice& d, 21 | typename TTypes::ConstTensor images, 22 | typename TTypes::Tensor output); 23 | 24 | class DownsampleOp : public OpKernel { 25 | public: 26 | explicit DownsampleOp(OpKernelConstruction* c) : OpKernel(c) { 27 | OP_REQUIRES_OK(c, c->GetAttr("scale", &scale)); 28 | } 29 | 30 | void Compute(OpKernelContext* context) override { 31 | const Tensor& input = context->input(0); 32 | 33 | typename TTypes::ConstTensor input_data = input.tensor(); 34 | 35 | 36 | OP_REQUIRES(context, 37 | input_data.dimension(1) % scale == 0 && 38 | input_data.dimension(2) % scale == 0, 39 | errors::InvalidArgument("Input height and width must be divisible by scale")); 40 | 41 | const int batch = input_data.dimension(0); 42 | const int height = input_data.dimension(1) / scale; 43 | const int width = input_data.dimension(2) / scale; 44 | const int channels = input_data.dimension(3); 45 | 46 | auto output_shape = TensorShape({batch, height, width, channels}); 47 | 48 | Tensor* output = NULL; 49 | OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); 50 | 51 | typename TTypes::Tensor output_data = output->tensor(); 52 | 53 | Downsample(context->eigen_device(), input_data, output_data); 54 | } 55 | private: 56 | int scale; 57 | }; 58 | 59 | using shape_inference::DimensionHandle; 60 | using shape_inference::ShapeHandle; 61 | 62 | REGISTER_OP("Downsample") 63 | .Input("images: float") 64 | .Attr("scale: int = 2") 65 | .Output("out_images: float") 66 | .SetShapeFn([](shape_inference::InferenceContext* c) { 67 | ShapeHandle in = c->input(0); 68 | int scale; 69 | DimensionHandle batch = c->Dim(in, 0); 70 | DimensionHandle channels = c->Dim(in, 3); 71 | DimensionHandle height; 72 | DimensionHandle width; 73 | 74 | c->GetAttr("scale", &scale); 75 | c->Divide(c->Dim(in, 1), scale, true, &height); 76 | c->Divide(c->Dim(in, 2), scale, true, &width); 77 | 78 | c->set_output(0, c->MakeShape({batch, height, width, channels})); 79 | return Status::OK(); 80 | }); 81 | 82 | #if GOOGLE_CUDA 83 | 84 | REGISTER_KERNEL_BUILDER(Name("Downsample").Device(DEVICE_GPU), DownsampleOp); 85 | 86 | #endif // GOOGLE_CUDA 87 | -------------------------------------------------------------------------------- /core/UnFlow/ops/downsample_op.cu.cc: -------------------------------------------------------------------------------- 1 | #if GOOGLE_CUDA 2 | 3 | #define EIGEN_USE_GPU 4 | 5 | #include "tensorflow/core/framework/register_types.h" 6 | #include "tensorflow/core/framework/tensor_types.h" 7 | #include "tensorflow/core/platform/types.h" 8 | #include "tensorflow/core/util/cuda_kernel_helper.h" 9 | 10 | using namespace tensorflow; 11 | 12 | typedef Eigen::GpuDevice GPUDevice; 13 | 14 | __global__ void DownsampleKernel(const int32 nthreads, 15 | const float* images, 16 | int batch, int in_height, int in_width, int channels, 17 | int out_height, int out_width, 18 | float* output) { 19 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) { 20 | // out_idx = x + out_width * (y + out_height * b) 21 | int idx = out_idx; 22 | const int c = idx % channels; 23 | idx /= channels; 24 | const int x = idx % out_width; 25 | idx /= out_width; 26 | const int y = idx % out_height; 27 | const int b = idx / out_height; 28 | 29 | const int scale_y = in_height / out_height; 30 | const int scale_x = in_width/ out_width; 31 | 32 | const int min_in_y = y * scale_y; 33 | const int min_in_x = x * scale_x; 34 | const int max_in_y = min_in_y + scale_y; 35 | const int max_in_x = min_in_x + scale_x; 36 | 37 | float sum = 0.0; 38 | 39 | for(int in_y = min_in_y; in_y < max_in_y; ++in_y) { 40 | for(int in_x = min_in_x; in_x < max_in_x; ++in_x) { 41 | sum += images[c + channels * (in_x + in_width * (in_y + in_height * b))]; 42 | } 43 | } 44 | 45 | sum /= scale_x * scale_y; 46 | output[c + channels * (x + out_width * (y + out_height * b))] = sum; 47 | } 48 | } 49 | 50 | void Downsample(const GPUDevice& d, 51 | typename TTypes::ConstTensor images, 52 | typename TTypes::Tensor output) { 53 | const int batch = images.dimension(0); 54 | const int in_height = images.dimension(1); 55 | const int in_width = images.dimension(2); 56 | const int channels = images.dimension(3); 57 | 58 | const int out_height = output.dimension(1); 59 | const int out_width = output.dimension(2); 60 | 61 | const int total_count = batch * out_height * out_width * channels; 62 | if (total_count == 0) return; 63 | 64 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); 65 | DownsampleKernel 66 | <<>>( 67 | config.virtual_thread_count, images.data(), 68 | batch, in_height, in_width, channels, 69 | out_height, out_width, 70 | output.data()); 71 | } 72 | 73 | #endif // GOOGLE_CUDA 74 | -------------------------------------------------------------------------------- /core/UnFlow/ops/forward_warp_op.cc: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | 3 | #include 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 5 | #include "tensorflow/core/framework/op_kernel.h" 6 | #include "tensorflow/core/framework/register_types.h" 7 | #include "tensorflow/core/framework/tensor.h" 8 | #include "tensorflow/core/framework/tensor_shape.h" 9 | #include "tensorflow/core/framework/types.h" 10 | #include "tensorflow/core/lib/core/status.h" 11 | #include "tensorflow/core/platform/logging.h" 12 | #include "tensorflow/core/framework/op.h" 13 | #include "tensorflow/core/framework/shape_inference.h" 14 | #include "tensorflow/core/framework/common_shape_fns.h" 15 | 16 | // TODO assert input flow channel count = 2, assert matching numbers in all other dims 17 | 18 | typedef Eigen::ThreadPoolDevice CPUDevice; 19 | typedef Eigen::GpuDevice GPUDevice; 20 | 21 | using namespace tensorflow; 22 | 23 | void ForwardWarp(const GPUDevice& d, 24 | typename TTypes::ConstTensor input, 25 | typename TTypes::Tensor output); 26 | 27 | void ForwardWarpGrad(const GPUDevice& d, 28 | typename TTypes::ConstTensor input_grad, 29 | typename TTypes::ConstTensor original_input, 30 | typename TTypes::Tensor output_grad); 31 | 32 | class ForwardWarpOp : public OpKernel { 33 | public: 34 | explicit ForwardWarpOp(OpKernelConstruction* context) : OpKernel(context) {} 35 | 36 | void Compute(OpKernelContext* context) override { 37 | const Tensor& input = context->input(0); 38 | 39 | typename TTypes::ConstTensor input_data = input.tensor(); 40 | 41 | const int batch = input_data.dimension(0); 42 | const int height = input_data.dimension(1); 43 | const int width = input_data.dimension(2); 44 | const int channels = input_data.dimension(3); 45 | 46 | auto output_shape = TensorShape({batch, height, width, 1}); 47 | Tensor* output = NULL; 48 | OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, 49 | &output)); 50 | typename TTypes::Tensor output_data = output->tensor(); 51 | 52 | ForwardWarp(context->eigen_device(), 53 | input_data, output_data); 54 | } 55 | }; 56 | 57 | class ForwardWarpOpGrad : public OpKernel { 58 | public: 59 | explicit ForwardWarpOpGrad(OpKernelConstruction* context) : OpKernel(context) {} 60 | 61 | void Compute(OpKernelContext* context) override { 62 | const Tensor& input = context->input(0); 63 | const Tensor& original_input = context->input(1); 64 | 65 | Tensor* output = NULL; 66 | OP_REQUIRES_OK(context, context->allocate_output(0, original_input.shape(), 67 | &output)); 68 | 69 | typename TTypes::ConstTensor input_data = input.tensor(); 70 | typename TTypes::ConstTensor original_data = original_input.tensor(); 71 | typename TTypes::Tensor output_data = output->tensor(); 72 | 73 | ForwardWarpGrad(context->eigen_device(), 74 | input_data, original_data, output_data); 75 | } 76 | }; 77 | 78 | using shape_inference::DimensionHandle; 79 | using shape_inference::ShapeHandle; 80 | 81 | REGISTER_OP("ForwardWarp") 82 | .Input("flows: float") 83 | .Output("output: float") 84 | .SetShapeFn([](shape_inference::InferenceContext* c) { 85 | ShapeHandle in = c->input(0); 86 | DimensionHandle batch = c->Dim(in, 0); 87 | DimensionHandle height = c->Dim(in, 1); 88 | DimensionHandle width = c->Dim(in, 2); 89 | c->set_output(0, c->MakeShape({batch, height, width, 1})); 90 | return Status::OK(); 91 | }); 92 | 93 | REGISTER_OP("ForwardWarpGrad") 94 | .Input("grads: float") 95 | .Input("original_flows: float") 96 | .Output("output: float") 97 | .SetShapeFn([](shape_inference::InferenceContext* c) { 98 | c->set_output(0, c->input(1)); 99 | return Status::OK(); 100 | }); 101 | 102 | #if GOOGLE_CUDA 103 | 104 | REGISTER_KERNEL_BUILDER(Name("ForwardWarp").Device(DEVICE_GPU), ForwardWarpOp); 105 | REGISTER_KERNEL_BUILDER(Name("ForwardWarpGrad").Device(DEVICE_GPU), ForwardWarpOpGrad); 106 | 107 | #endif // GOOGLE_CUDA 108 | -------------------------------------------------------------------------------- /core/UnFlow/ops/forward_warp_op.cu.cc: -------------------------------------------------------------------------------- 1 | #if GOOGLE_CUDA 2 | 3 | #define EIGEN_USE_GPU 4 | 5 | #include "tensorflow/core/framework/register_types.h" 6 | #include "tensorflow/core/framework/tensor_types.h" 7 | #include "tensorflow/core/platform/types.h" 8 | #include "tensorflow/core/util/cuda_kernel_helper.h" 9 | 10 | using namespace tensorflow; 11 | 12 | #define gauss(x, y, std) 13 | 14 | typedef Eigen::GpuDevice GPUDevice; 15 | 16 | __global__ void ForwardWarpKernel(const int32 nthreads, 17 | const float* flows, 18 | int batch, int height, int width, 19 | float* output) { 20 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) { 21 | // out_idx = x + width * (y + height * b) 22 | int idx = out_idx; 23 | const int src_x = idx % width; 24 | idx /= width; 25 | const int src_y = idx % height; 26 | const int b = idx / height; 27 | 28 | const int flow_index = out_idx * 2; 29 | const float target_x = src_x + flows[flow_index]; 30 | const float target_y = src_y + flows[flow_index + 1]; 31 | 32 | // Calculate distribution variance depending on similar neighbor flows 33 | // fixed variance for first tests!! 34 | 35 | // Compute valid neighbor range 36 | //int min_n_y = y + 2 > 0 ? floorf(pos_y) : 0; 37 | 38 | const float dist = 2.0; 39 | const float std = dist * 0.5; 40 | const int k = ceilf(dist + 2); 41 | // TODO variance different for x, y? 42 | 43 | // center pixel closest to mapping location 44 | //const int closest_x = roundf(target_x); 45 | //const int closest_y = roundf(target_y); 46 | if(floorf(target_x - k) < width && floorf(target_x + k) >= 0 47 | && floorf(target_y - k) < height && floorf(target_y + k) >= 0) { 48 | const int min_n_x = target_x - k > 0? floorf(target_x - k) : 0; 49 | const int min_n_y = target_y - k > 0? floorf(target_y - k) : 0; 50 | const int max_n_x = target_x + k < width? floorf(target_x + k) : width - 1; 51 | const int max_n_y = target_y + k < height? floorf(target_y + k) : height - 1; 52 | 53 | const float gauss_divisor = 2 * powf(std, 2); 54 | for(int n_x = min_n_x; n_x <= max_n_x; ++n_x) { 55 | for(int n_y = min_n_y; n_y <= max_n_y; ++n_y) { 56 | const float x = n_x - target_x; 57 | const float y = n_y - target_y; 58 | const float weight = expf(-(powf(x, 2) + powf(y, 2)) / gauss_divisor); 59 | CudaAtomicAdd(output + n_x + width * (n_y + height * b), weight); 60 | } 61 | } 62 | } 63 | 64 | } 65 | } 66 | 67 | __global__ void ForwardWarpGradKernel(const int32 nthreads, 68 | const float* input_grad, const float* flows, 69 | int batch, int height, int width, 70 | float* output_grad) { 71 | CUDA_1D_KERNEL_LOOP(in_idx, nthreads) { 72 | // in_idx = x + width * (y + height * b) 73 | int idx = in_idx; 74 | const int src_x = idx % width; 75 | idx /= width; 76 | const int src_y = idx % height; 77 | const int b = idx / height; 78 | 79 | const int flow_index = in_idx * 2; 80 | const float target_x = src_x + flows[flow_index]; 81 | const float target_y = src_y + flows[flow_index + 1]; 82 | 83 | // Calculate distribution variance depending on similar neighbor flows 84 | // fixed variance for first tests!! 85 | 86 | // Compute valid neighbor range 87 | //int min_n_y = y + 2 > 0 ? floorf(pos_y) : 0; 88 | 89 | const float dist = 2.0; 90 | const float std = dist * 0.5; 91 | const int k = ceilf(dist + 2); 92 | // TODO variance different for x, y? 93 | 94 | // center pixel closest to mapping location 95 | //const int closest_x = roundf(target_x); 96 | //const int closest_y = roundf(target_y); 97 | float du = 0.0; 98 | float dv = 0.0; 99 | 100 | if(floorf(target_x - k) < width && floorf(target_x + k) >= 0 101 | && floorf(target_y - k) < height && floorf(target_y + k) >= 0) { 102 | const int min_n_x = target_x - k > 0? floorf(target_x - k) : 0; 103 | const int min_n_y = target_y - k > 0? floorf(target_y - k) : 0; 104 | const int max_n_x = target_x + k < width? floorf(target_x + k) : width - 1; 105 | const int max_n_y = target_y + k < height? floorf(target_y + k) : height - 1; 106 | 107 | const float gauss_divisor = 2 * powf(std, 2); 108 | for(int n_x = min_n_x; n_x <= max_n_x; ++n_x) { 109 | for(int n_y = min_n_y; n_y <= max_n_y; ++n_y) { 110 | const float x = n_x - target_x; 111 | const float y = n_y - target_y; 112 | const float weight = expf(-(powf(x, 2) + powf(y, 2)) / gauss_divisor); 113 | 114 | const float din = input_grad[n_x + width * (n_y + height * b)]; 115 | const float factor = 2 * din * weight / gauss_divisor; 116 | du += factor * x; 117 | dv += factor * y; 118 | } 119 | } 120 | } 121 | 122 | output_grad[flow_index] = du; 123 | output_grad[flow_index + 1] = dv; 124 | } 125 | } 126 | 127 | void ForwardWarp(const GPUDevice& d, 128 | typename TTypes::ConstTensor flows, 129 | typename TTypes::Tensor output) { 130 | const int batch = flows.dimension(0); 131 | const int height = flows.dimension(1); 132 | const int width = flows.dimension(2); 133 | 134 | const int total_count = batch * height * width; 135 | if (total_count == 0) return; 136 | 137 | CudaLaunchConfig config; 138 | 139 | // Initialize output with all zeros. 140 | config = GetCudaLaunchConfig(total_count, d); 141 | SetZero<<>>( 142 | config.virtual_thread_count, output.data()); 143 | 144 | config = GetCudaLaunchConfig(total_count, d); 145 | ForwardWarpKernel 146 | <<>>( 147 | config.virtual_thread_count, flows.data(), 148 | batch, height, width, 149 | output.data()); 150 | } 151 | 152 | void ForwardWarpGrad(const GPUDevice& d, 153 | typename TTypes::ConstTensor input_grad, 154 | typename TTypes::ConstTensor flows, 155 | typename TTypes::Tensor output_grad) { 156 | const int batch = input_grad.dimension(0); 157 | const int height = input_grad.dimension(1); 158 | const int width = input_grad.dimension(2); 159 | 160 | int total_count = batch * height * width; 161 | if (total_count == 0) return; 162 | 163 | // Initialize output_grad with all zeros. 164 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); 165 | SetZero<<>>( 166 | config.virtual_thread_count, output_grad.data()); 167 | 168 | // Accumulate. 169 | config = GetCudaLaunchConfig(total_count, d); 170 | ForwardWarpGradKernel 171 | <<>>( 172 | config.virtual_thread_count, input_grad.data(), flows.data(), 173 | batch, height, width, 174 | output_grad.data()); 175 | } 176 | 177 | #endif // GOOGLE_CUDA 178 | -------------------------------------------------------------------------------- /core/UnFlow/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .e2eflow import flownet 2 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import flownet 2 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .flownet import flownet 2 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/augment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from .spatial_transformer import transformer 5 | 6 | 7 | def random_affine(tensors, *, 8 | max_translation_x=0.0, max_translation_y=0.0, 9 | max_rotation=0.0, min_scale=1.0, max_scale=1.0, 10 | horizontal_flipping=False): 11 | """Applies geometric augmentations to a list of tensors. 12 | 13 | Each element in the list is augmented in the same way. 14 | For all elements, num_batch must be equal while height, width and channels 15 | may differ. 16 | """ 17 | def _deg2rad(deg): 18 | return (deg * np.pi) / 180.0 19 | 20 | with tf.variable_scope('random_affine'): 21 | num_batch = tf.shape(tensors[0])[0] 22 | 23 | zero = tf.zeros([num_batch]) 24 | one = tf.ones([num_batch]) 25 | 26 | tx = tf.random_uniform([num_batch], -max_translation_x, max_translation_x) 27 | ty = tf.random_uniform([num_batch], -max_translation_y, max_translation_y) 28 | rot = tf.random_uniform([num_batch], -max_rotation, max_rotation) 29 | rad = _deg2rad(rot) 30 | scale = tf.random_uniform([num_batch], min_scale, max_scale) 31 | 32 | t1 = [[tf.cos(rad), -tf.sin(rad), tx], 33 | [tf.sin(rad), tf.cos(rad), ty]] 34 | t1 = tf.transpose(t1, [2, 0, 1]) 35 | 36 | scale_x = scale 37 | if horizontal_flipping: 38 | flip = tf.random_uniform([num_batch], 0, 1) 39 | flip = tf.where(tf.greater(flip, 0.5), -one, one) 40 | scale_x = scale_x * flip 41 | 42 | t2 = [[scale_x, zero, zero], 43 | [zero, scale, zero], 44 | [zero, zero, one]] 45 | t2 = tf.transpose(t2, [2, 0, 1]) 46 | 47 | t = tf.matmul(t1, t2) 48 | 49 | out = [] 50 | for tensor in tensors: 51 | shape = tf.shape(tensor) 52 | tensor = transformer(tensor, t, (shape[1], shape[2])) 53 | out.append(tf.stop_gradient(tensor)) 54 | return out 55 | 56 | 57 | def random_photometric(ims, *, 58 | noise_stddev=0.0, min_contrast=0.0, max_contrast=0.0, 59 | brightness_stddev=0.0, min_colour=1.0, max_colour=1.0, 60 | min_gamma=1.0, max_gamma=1.0): 61 | """Applies photometric augmentations to a list of image batches. 62 | 63 | Each image in the list is augmented in the same way. 64 | For all elements, num_batch must be equal while height and width may differ. 65 | 66 | Args: 67 | ims: list of 3-channel image batches normalized to [0, 1]. 68 | channel_mean: tensor of shape [3] which was used to normalize the pixel 69 | values ranging from 0 ... 255. 70 | 71 | Returns: 72 | Batch of normalized images with photometric augmentations. Has the same 73 | shape as the input batch. 74 | """ 75 | 76 | with tf.variable_scope('random_photometric'): 77 | num_batch = tf.shape(ims[0])[0] 78 | 79 | contrast = tf.random_uniform([num_batch, 1], min_contrast, max_contrast) 80 | gamma = tf.random_uniform([num_batch, 1], min_gamma, max_gamma) 81 | gamma_inv = 1.0 / gamma 82 | colour = tf.random_uniform([num_batch, 3], min_colour, max_colour) 83 | if noise_stddev > 0.0: 84 | noise = tf.random_normal([num_batch, 1], stddev=noise_stddev) 85 | else: 86 | noise = tf.zeros([num_batch, 1]) 87 | if brightness_stddev > 0.0: 88 | brightness = tf.random_normal([num_batch, 1], 89 | stddev=brightness_stddev) 90 | else: 91 | brightness = tf.zeros([num_batch, 1]) 92 | 93 | out = [] 94 | for im in ims: 95 | # Transpose to [height, width, num_batch, channels] 96 | im_re = tf.transpose(im, [1, 2, 0, 3]) 97 | im_re = im_re 98 | im_re = (im_re * (contrast + 1.0) + brightness) * colour 99 | im_re = tf.maximum(0.0, tf.minimum(1.0, im_re)) 100 | im_re = tf.pow(im_re, gamma_inv) 101 | 102 | im_re = im_re + noise 103 | 104 | # Subtract the mean again after clamping 105 | im_re = im_re 106 | 107 | im = tf.transpose(im_re, [2, 0, 1, 3]) 108 | im = tf.stop_gradient(im) 109 | out.append(im) 110 | return out 111 | 112 | 113 | def random_crop(tensors, size, seed=None, name=None): 114 | """Randomly crops multiple tensors (of the same shape) to a given size. 115 | 116 | Each tensor is cropped in the same way.""" 117 | with tf.name_scope(name, "random_crop", [size]) as name: 118 | size = tf.convert_to_tensor(size, dtype=tf.int32, name="size") 119 | if len(tensors) == 2: 120 | shape = tf.minimum(tf.shape(tensors[0]), tf.shape(tensors[1])) 121 | else: 122 | shape = tf.shape(tensors[0]) 123 | 124 | limit = shape - size + 1 125 | offset = tf.random_uniform( 126 | tf.shape(shape), 127 | dtype=size.dtype, 128 | maxval=size.dtype.max, 129 | seed=seed) % limit 130 | results = [] 131 | for tensor in tensors: 132 | result = tf.slice(tensor, offset, size) 133 | results.append(result) 134 | return results 135 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/data.py: -------------------------------------------------------------------------------- 1 | """Utility functions for providing data directories.""" 2 | import os 3 | import sys 4 | import zipfile 5 | import rarfile 6 | from urllib.request import FancyURLopener 7 | import shutil 8 | 9 | import numpy as np 10 | import matplotlib.image as mpimg 11 | 12 | 13 | class Data(): 14 | # Should be a list containing all subdirectories of the main data dir which 15 | # belong to this dataset 16 | dirs = None 17 | 18 | def __init__(self, data_dir, stat_log_dir, 19 | development=True, fast_dir=None): 20 | self.development = development 21 | self.data_dir = data_dir 22 | self.stat_log_dir = stat_log_dir 23 | if not os.path.isdir(data_dir): 24 | os.makedirs(data_dir) 25 | 26 | self._fetch_if_missing() 27 | 28 | self.fast_dir = fast_dir 29 | if fast_dir: 30 | print(">> Copying files to {}".format(fast_dir)) 31 | for d in self.dirs: 32 | src = os.path.join(data_dir, d) 33 | dst = os.path.join(fast_dir, d) 34 | if not os.path.isdir(dst): 35 | shutil.copytree(src, dst) 36 | print(">> Copied {}".format(d)) 37 | self.current_dir = fast_dir 38 | else: 39 | self.current_dir = data_dir 40 | 41 | if stat_log_dir: 42 | self.stat_log_file = os.path.join(stat_log_dir, 43 | self.__class__.__name__ + ".txt") 44 | self._ensure_statistics() 45 | 46 | def __del__(self): 47 | pass 48 | #if self.fast_dir: 49 | # print(">> Removing files from {}".format(self.fast_dir)) 50 | # for d in self.dirs: 51 | # shutil.rmtree(os.path.join(self.fast_dir, d)) 52 | 53 | def clear_statistics(self): 54 | """Delete saved statistics file if present.""" 55 | if self.stat_log_dir and os.path.isfile(self.stat_log_file): 56 | os.remove(self.stat_log_file) 57 | 58 | def _ensure_statistics(self): 59 | """Make sure we know the dataset statistics.""" 60 | if os.path.isfile(self.stat_log_file): 61 | vals = np.loadtxt(self.stat_log_file) 62 | self.mean = vals[0] 63 | self.stddev = vals[1] 64 | else: 65 | print(">> Computing statistics (mean, variance) for {}" 66 | .format(self.__class__.__name__)) 67 | mean, stddev = self.compute_statistics(self.get_raw_files()) 68 | self.mean = mean 69 | self.stddev = stddev 70 | os.makedirs(self.stat_log_dir, exist_ok=True) 71 | np.savetxt(self.stat_log_file, [mean, stddev]) 72 | print(">> Statistics complete") 73 | 74 | def get_raw_dirs(self): 75 | """Should return a list of all dirs containing training images. 76 | 77 | Note: self.current_dir should be used for loading input data. 78 | """ 79 | raise NotImplementedError() 80 | 81 | def get_raw_files(self): 82 | files = [] 83 | for d in self.get_raw_dirs(): 84 | for path in os.listdir(d): 85 | files.append(os.path.join(d, path)) 86 | return files 87 | 88 | def _fetch_if_missing(self): 89 | """A call to this must make subsequent calls to get_raw_files succeed. 90 | All subdirs of data_dir listed in self.dirs must exist after this call. 91 | """ 92 | raise NotImplementedError() 93 | 94 | def _download_and_extract(self, url, extract_to, ext='zip'): 95 | def _progress(count, block_size, total_size): 96 | if total_size > 0: 97 | print('\r>> Downloading %s %.1f%%' % (url, 98 | float(count * block_size) / float(total_size) * 100.0), end=' ') 99 | else: 100 | print('\r>> Downloading %s' % (url), end=' ') 101 | sys.stdout.flush() 102 | urlretrieve = FancyURLopener().retrieve 103 | local_zip_path = os.path.join(self.data_dir, 'tmp.' + ext) 104 | urlretrieve(url, local_zip_path, _progress) 105 | sys.stdout.write("\n>> Finished downloading. Unzipping...\n") 106 | if ext == 'zip': 107 | with zipfile.ZipFile(local_zip_path, "r") as zip_ref: 108 | zip_ref.extractall(extract_to) 109 | else: 110 | with rarfile.RarFile(local_zip_path, "r") as zip_ref: 111 | zip_ref.extractall(extract_to) 112 | 113 | sys.stdout.write(">> Finished unzipping.\n") 114 | os.remove(local_zip_path) 115 | 116 | self.clear_statistics() 117 | 118 | def compute_statistics(self, files): 119 | """Use welford's method to compute mean and variance of the given 120 | dataset. 121 | 122 | See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm.""" 123 | 124 | assert len(files) > 1 125 | 126 | n = 0 127 | mean = np.zeros(3) 128 | M2 = np.zeros(3) 129 | for j, filename in enumerate(files): 130 | #TODO ensure the pixel values are 0..255 131 | im = np.reshape(mpimg.imread(filename) * 255, [-1, 3]) 132 | for i in range(np.shape(im)[1]): 133 | n = n + 1 134 | delta = im[i] - mean 135 | mean += delta / n 136 | M2 += delta * (im[i] - mean) 137 | sys.stdout.write('\r>> Processed %.1f%%' % ( 138 | float(j) / float(len(files)) * 100.0)) 139 | sys.stdout.flush() 140 | var = M2 / (n - 1) 141 | stddev = np.sqrt(var) 142 | return np.float32(mean), np.float32(stddev) 143 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/flow_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def atan2(y, x): 6 | angle = tf.where(tf.greater(x,0.0), tf.atan(y/x), tf.zeros_like(x)) 7 | angle = tf.where(tf.logical_and(tf.less(x,0.0), tf.greater_equal(y,0.0)), 8 | tf.atan(y/x) + np.pi, angle) 9 | angle = tf.where(tf.logical_and(tf.less(x,0.0), tf.less(y,0.0)), 10 | tf.atan(y/x) - np.pi, angle) 11 | angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.greater(y,0.0)), 12 | np.pi * tf.ones_like(x), angle) 13 | angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.less(y,0.0)), 14 | -np.pi * tf.ones_like(x), angle) 15 | angle = tf.where(tf.logical_and(tf.equal(x,0.0),tf.equal(y,0.0)), 16 | np.nan * tf.zeros_like(x), angle) 17 | return angle 18 | 19 | 20 | def flow_to_color(flow, mask=None, max_flow=None): 21 | """Converts flow to 3-channel color image. 22 | 23 | Args: 24 | flow: tensor of shape [num_batch, height, width, 2]. 25 | mask: flow validity mask of shape [num_batch, height, width, 1]. 26 | """ 27 | n = 8 28 | num_batch, height, width, _ = tf.unstack(tf.shape(flow)) 29 | mask = tf.ones([num_batch, height, width, 1]) if mask is None else mask 30 | flow_u, flow_v = tf.unstack(flow, axis=3) 31 | if max_flow is not None: 32 | max_flow = tf.maximum(max_flow, 1) 33 | else: 34 | max_flow = tf.reduce_max(tf.abs(flow * mask)) 35 | mag = tf.sqrt(tf.reduce_sum(tf.square(flow), 3)) 36 | angle = atan2(flow_v, flow_u) 37 | 38 | im_h = tf.mod(angle / (2 * np.pi) + 1.0, 1.0) 39 | im_s = tf.clip_by_value(mag * n / max_flow, 0, 1) 40 | im_v = tf.clip_by_value(n - im_s, 0, 1) 41 | im_hsv = tf.stack([im_h, im_s, im_v], 3) 42 | im = tf.image.hsv_to_rgb(im_hsv) 43 | return im * mask 44 | 45 | 46 | def flow_error_image(flow_1, flow_2, mask_occ, mask_noc=None, log_colors=True): 47 | """Visualize the error between two flows as 3-channel color image. 48 | 49 | Adapted from the KITTI C++ devkit. 50 | 51 | Args: 52 | flow_1: first flow of shape [num_batch, height, width, 2]. 53 | flow_2: second flow (ground truth) 54 | mask_occ: flow validity mask of shape [num_batch, height, width, 1]. 55 | Equals 1 at (occluded and non-occluded) valid pixels. 56 | mask_noc: Is 1 only at valid pixels which are not occluded. 57 | """ 58 | mask_noc = tf.ones(tf.shape(mask_occ)) if mask_noc is None else mask_noc 59 | diff_sq = (flow_1 - flow_2) ** 2 60 | diff = tf.sqrt(tf.reduce_sum(diff_sq, [3], keep_dims=True)) 61 | if log_colors: 62 | num_batch, height, width, _ = tf.unstack(tf.shape(flow_1)) 63 | colormap = [ 64 | [0,0.0625,49,54,149], 65 | [0.0625,0.125,69,117,180], 66 | [0.125,0.25,116,173,209], 67 | [0.25,0.5,171,217,233], 68 | [0.5,1,224,243,248], 69 | [1,2,254,224,144], 70 | [2,4,253,174,97], 71 | [4,8,244,109,67], 72 | [8,16,215,48,39], 73 | [16,1000000000.0,165,0,38]] 74 | colormap = np.asarray(colormap, dtype=np.float32) 75 | colormap[:, 2:5] = colormap[:, 2:5] / 255 76 | mag = tf.sqrt(tf.reduce_sum(tf.square(flow_2), 3, keep_dims=True)) 77 | error = tf.minimum(diff / 3, 20 * diff / mag) 78 | im = tf.zeros([num_batch, height, width, 3]) 79 | for i in range(colormap.shape[0]): 80 | colors = colormap[i, :] 81 | cond = tf.logical_and(tf.greater_equal(error, colors[0]), 82 | tf.less(error, colors[1])) 83 | im = tf.where(tf.tile(cond, [1, 1, 1, 3]), 84 | tf.ones([num_batch, height, width, 1]) * colors[2:5], 85 | im) 86 | im = tf.where(tf.tile(tf.cast(mask_noc, tf.bool), [1, 1, 1, 3]), 87 | im, im * 0.5) 88 | im = im * mask_occ 89 | else: 90 | error = (tf.minimum(diff, 5) / 5) * mask_occ 91 | im_r = error # errors in occluded areas will be red 92 | im_g = error * mask_noc 93 | im_b = error * mask_noc 94 | im = tf.concat(axis=3, values=[im_r, im_g, im_b]) 95 | return im 96 | 97 | 98 | def flow_error_avg(flow_1, flow_2, mask): 99 | """Evaluates the average endpoint error between flow batches.""" 100 | with tf.variable_scope('flow_error_avg'): 101 | diff = euclidean(flow_1 - flow_2) * mask 102 | error = tf.reduce_sum(diff) / tf.reduce_sum(mask) 103 | return error 104 | 105 | 106 | def outlier_ratio(gt_flow, flow, mask, threshold=3.0, relative=0.05): 107 | diff = euclidean(gt_flow - flow) * mask 108 | if relative is not None: 109 | threshold = tf.maximum(threshold, euclidean(gt_flow) * relative) 110 | outliers = tf.cast(tf.greater_equal(diff, threshold), tf.float32) 111 | else: 112 | outliers = tf.cast(tf.greater_equal(diff, threshold), tf.float32) 113 | ratio = tf.reduce_sum(outliers) / tf.reduce_sum(mask) 114 | return ratio 115 | 116 | 117 | def outlier_pct(gt_flow, flow, mask, threshold=3.0, relative=0.05): 118 | frac = outlier_ratio(gt_flow, flow, mask, threshold, relative) * 100 119 | return frac 120 | 121 | 122 | def euclidean(t): 123 | return tf.sqrt(tf.reduce_sum(t ** 2, [3], keep_dims=True)) 124 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/flownet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import tensorflow.contrib.layers as layers 4 | 5 | from ..ops import correlation 6 | from .image_warp import image_warp 7 | 8 | from .flow_util import flow_to_color 9 | 10 | 11 | FLOW_SCALE = 5.0 12 | 13 | # (Yuliang) Add reuse_variables for forward flow 14 | def flownet(im1, im2, flownet_spec='S', full_resolution=False, train_all=False, 15 | backward_flow=False, reuse=False): 16 | num_batch, height, width, _ = tf.unstack(tf.shape(im1)) 17 | flownet_num = len(flownet_spec) 18 | assert flownet_num > 0 19 | flows_fw = [] 20 | flows_bw = [] 21 | for i, name in enumerate(flownet_spec): 22 | assert name in ('C', 'c', 'S', 's') 23 | channel_mult = 1 if name in ('C', 'S') else 3 / 8 24 | full_res = full_resolution and i == flownet_num - 1 25 | 26 | def scoped_block(reuse=False): 27 | if name.lower() == 'c': 28 | assert i == 0, 'FlowNetS must be used for refinement networks' 29 | 30 | with tf.variable_scope('flownet_c_features') as scope: 31 | # (Yuliang) 32 | if reuse: 33 | scope.reuse_variables() 34 | _, conv2_a, conv3_a = flownet_c_features(im1, channel_mult=channel_mult) 35 | _, conv2_b, conv3_b = flownet_c_features(im2, channel_mult=channel_mult, reuse=True) 36 | 37 | with tf.variable_scope('flownet_c') as scope: 38 | # (Yuliang) 39 | if reuse: 40 | scope.reuse_variables() 41 | flow_fw = flownet_c(conv3_a, conv3_b, conv2_a, 42 | full_res=full_res, 43 | channel_mult=channel_mult) 44 | flows_fw.append(flow_fw) 45 | if backward_flow: 46 | scope.reuse_variables() 47 | flow_bw = flownet_c(conv3_b, conv3_a, conv2_b, 48 | full_res=full_res, 49 | channel_mult=channel_mult) 50 | flows_bw.append(flow_bw) 51 | elif name.lower() == 's': 52 | def _flownet_s(im1, im2, flow=None): 53 | if flow is not None: 54 | flow = tf.image.resize_bilinear(flow, [height, width]) * 4 * FLOW_SCALE 55 | warp = image_warp(im2, flow) 56 | diff = tf.abs(warp - im1) 57 | if not train_all: 58 | flow = tf.stop_gradient(flow) 59 | warp = tf.stop_gradient(warp) 60 | diff = tf.stop_gradient(diff) 61 | 62 | inputs = tf.concat([im1, im2, flow, warp, diff], axis=3) 63 | inputs = tf.reshape(inputs, [num_batch, height, width, 14]) 64 | else: 65 | inputs = tf.concat([im1, im2], 3) 66 | return flownet_s(inputs, 67 | full_res=full_res, 68 | channel_mult=channel_mult) 69 | stacked = len(flows_fw) > 0 70 | with tf.variable_scope('flownet_s') as scope: 71 | # (Yuliang) 72 | if reuse: 73 | scope.reuse_variables() 74 | flow_fw = _flownet_s(im1, im2, flows_fw[-1][0] if stacked else None) 75 | flows_fw.append(flow_fw) 76 | if backward_flow: 77 | scope.reuse_variables() 78 | flow_bw = _flownet_s(im2, im1, flows_bw[-1][0] if stacked else None) 79 | flows_bw.append(flow_bw) 80 | 81 | if i > 0: 82 | scope_name = "stack_{}_flownet".format(i) 83 | with tf.variable_scope(scope_name): 84 | scoped_block(reuse) 85 | else: 86 | scoped_block(reuse) 87 | 88 | if backward_flow: 89 | return flows_fw, flows_bw 90 | return flows_fw 91 | 92 | 93 | def _leaky_relu(x): 94 | with tf.variable_scope('leaky_relu'): 95 | return tf.maximum(0.1 * x, x) 96 | 97 | 98 | def _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2, conv1=None, inputs=None, 99 | channel_mult=1, full_res=False, channels=2): 100 | m = channel_mult 101 | 102 | flow6 = slim.conv2d(conv6_1, channels, 3, scope='flow6', 103 | activation_fn=None) 104 | deconv5 = slim.conv2d_transpose(conv6_1, int(512 * m), 4, stride=2, 105 | scope='deconv5') 106 | flow6_up5 = slim.conv2d_transpose(flow6, channels, 4, stride=2, 107 | scope='flow6_up5', 108 | activation_fn=None) 109 | concat5 = tf.concat([conv5_1, deconv5, flow6_up5], 1) 110 | flow5 = slim.conv2d(concat5, channels, 3, scope='flow5', 111 | activation_fn=None) 112 | 113 | deconv4 = slim.conv2d_transpose(concat5, int(256 * m), 4, stride=2, 114 | scope='deconv4') 115 | flow5_up4 = slim.conv2d_transpose(flow5, channels, 4, stride=2, 116 | scope='flow5_up4', 117 | activation_fn=None) 118 | concat4 = tf.concat([conv4_1, deconv4, flow5_up4], 1) 119 | flow4 = slim.conv2d(concat4, channels, 3, scope='flow4', 120 | activation_fn=None) 121 | 122 | deconv3 = slim.conv2d_transpose(concat4, int(128 * m), 4, stride=2, 123 | scope='deconv3') 124 | flow4_up3 = slim.conv2d_transpose(flow4, channels, 4, stride=2, 125 | scope='flow4_up3', 126 | activation_fn=None) 127 | concat3 = tf.concat([conv3_1, deconv3, flow4_up3], 1) 128 | flow3 = slim.conv2d(concat3, channels, 3, scope='flow3', 129 | activation_fn=None) 130 | 131 | deconv2 = slim.conv2d_transpose(concat3, int(64 * m), 4, stride=2, 132 | scope='deconv2') 133 | flow3_up2 = slim.conv2d_transpose(flow3, channels, 4, stride=2, 134 | scope='flow3_up2', 135 | activation_fn=None) 136 | concat2 = tf.concat([conv2, deconv2, flow3_up2], 1) 137 | flow2 = slim.conv2d(concat2, channels, 3, scope='flow2', 138 | activation_fn=None) 139 | 140 | flows = [flow2, flow3, flow4, flow5, flow6] 141 | 142 | if full_res: 143 | with tf.variable_scope('full_res'): 144 | deconv1 = slim.conv2d_transpose(concat2, int(32 * m), 4, stride=2, 145 | scope='deconv1') 146 | flow2_up1 = slim.conv2d_transpose(flow2, channels, 4, stride=2, 147 | scope='flow2_up1', 148 | activation_fn=None) 149 | concat1 = tf.concat([conv1, deconv1, flow2_up1], 1) 150 | flow1 = slim.conv2d(concat1, channels, 3, scope='flow1', 151 | activation_fn=None) 152 | 153 | deconv0 = slim.conv2d_transpose(concat1, int(16 * m), 4, stride=2, 154 | scope='deconv0') 155 | flow1_up0 = slim.conv2d_transpose(flow1, channels, 4, stride=2, 156 | scope='flow1_up0', 157 | activation_fn=None) 158 | concat0 = tf.concat([inputs, deconv0, flow1_up0], 1) 159 | flow0 = slim.conv2d(concat0, channels, 3, scope='flow0', 160 | activation_fn=None) 161 | 162 | flows = [flow0, flow1] + flows 163 | 164 | return flows 165 | 166 | 167 | def nhwc_to_nchw(tensors): 168 | return [tf.transpose(t, [0, 3, 1, 2]) for t in tensors] 169 | 170 | 171 | def nchw_to_nhwc(tensors): 172 | return [tf.transpose(t, [0, 2, 3, 1]) for t in tensors] 173 | 174 | 175 | def flownet_s(inputs, channel_mult=1, full_res=False): 176 | """Given stacked inputs, returns flow predictions in decreasing resolution. 177 | 178 | Uses FlowNetSimple. 179 | """ 180 | m = channel_mult 181 | inputs = nhwc_to_nchw([inputs])[0] 182 | 183 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], 184 | data_format='NCHW', 185 | weights_regularizer=slim.l2_regularizer(0.0004), 186 | weights_initializer=layers.variance_scaling_initializer(), 187 | activation_fn=_leaky_relu): 188 | conv1 = slim.conv2d(inputs, int(64 * m), 7, stride=2, scope='conv1') 189 | conv2 = slim.conv2d(conv1, int(128 * m), 5, stride=2, scope='conv2') 190 | conv3 = slim.conv2d(conv2, int(256 * m), 5, stride=2, scope='conv3') 191 | conv3_1 = slim.conv2d(conv3, int(256 * m), 3, stride=1, scope='conv3_1') 192 | conv4 = slim.conv2d(conv3_1, int(512 * m), 3, stride=2, scope='conv4') 193 | conv4_1 = slim.conv2d(conv4, int(512 * m), 3, stride=1, scope='conv4_1') 194 | conv5 = slim.conv2d(conv4_1, int(512 * m), 3, stride=2, scope='conv5') 195 | conv5_1 = slim.conv2d(conv5, int(512 * m), 3, stride=1, scope='conv5_1') 196 | conv6 = slim.conv2d(conv5_1, int(1024 * m), 3, stride=2, scope='conv6') 197 | conv6_1 = slim.conv2d(conv6, int(1024 * m), 3, stride=1, scope='conv6_1') 198 | 199 | res = _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2, conv1, inputs, 200 | channel_mult=channel_mult, full_res=full_res) 201 | return nchw_to_nhwc(res) 202 | 203 | 204 | def flownet_c_features(im, channel_mult=1, reuse=None): 205 | m = channel_mult 206 | im = nhwc_to_nchw([im])[0] 207 | with slim.arg_scope([slim.conv2d], 208 | data_format='NCHW', 209 | weights_regularizer=slim.l2_regularizer(0.0004), 210 | weights_initializer=layers.variance_scaling_initializer(), 211 | activation_fn=_leaky_relu): 212 | conv1 = slim.conv2d(im, int(64 * m), 7, stride=2, scope='conv1', reuse=reuse) 213 | conv2 = slim.conv2d(conv1, int(128 * m), 5, stride=2, scope='conv2', reuse=reuse) 214 | conv3 = slim.conv2d(conv2, int(256 * m), 5, stride=2, scope='conv3', reuse=reuse) 215 | return conv1, conv2, conv3 216 | 217 | 218 | def flownet_c(conv3_a, conv3_b, conv2_a, channel_mult=1, full_res=False): 219 | """Given two images, returns flow predictions in decreasing resolution. 220 | 221 | Uses FlowNetCorr. 222 | """ 223 | m = channel_mult 224 | 225 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], 226 | data_format='NCHW', 227 | weights_regularizer=slim.l2_regularizer(0.0004), 228 | weights_initializer=layers.variance_scaling_initializer(), 229 | activation_fn=_leaky_relu): 230 | corr = correlation(conv3_a, conv3_b, 231 | pad=20, kernel_size=1, max_displacement=20, stride_1=1, stride_2=2) 232 | 233 | conv_redir = slim.conv2d(conv3_a, int(32 * m), 1, stride=1, scope='conv_redir') 234 | 235 | conv3_1 = slim.conv2d(tf.concat([conv_redir, corr], 1), int(256 * m), 3, 236 | stride=1, scope='conv3_1') 237 | conv4 = slim.conv2d(conv3_1, int(512 * m), 3, stride=2, scope='conv4') 238 | conv4_1 = slim.conv2d(conv4, int(512 * m), 3, stride=1, scope='conv4_1') 239 | conv5 = slim.conv2d(conv4_1, int(512 * m), 3, stride=2, scope='conv5') 240 | conv5_1 = slim.conv2d(conv5, int(512 * m), 3, stride=1, scope='conv5_1') 241 | conv6 = slim.conv2d(conv5_1, int(1024 * m), 3, stride=2, scope='conv6') 242 | conv6_1 = slim.conv2d(conv6, int(1024 * m), 3, stride=1, scope='conv6_1') 243 | 244 | res = _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2_a, 245 | channel_mult=channel_mult, full_res=full_res) 246 | return nchw_to_nhwc(res) 247 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/image_warp.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def image_warp(im, flow): 5 | """Performs a backward warp of an image using the predicted flow. 6 | 7 | Args: 8 | im: Batch of images. [num_batch, height, width, channels] 9 | flow: Batch of flow vectors. [num_batch, height, width, 2] 10 | Returns: 11 | warped: transformed image of the same shape as the input image. 12 | """ 13 | with tf.variable_scope('image_warp'): 14 | 15 | num_batch, height, width, channels = tf.unstack(tf.shape(im)) 16 | max_x = tf.cast(width - 1, 'int32') 17 | max_y = tf.cast(height - 1, 'int32') 18 | zero = tf.zeros([], dtype='int32') 19 | 20 | # We have to flatten our tensors to vectorize the interpolation 21 | im_flat = tf.reshape(im, [-1, channels]) 22 | flow_flat = tf.reshape(flow, [-1, 2]) 23 | 24 | # Floor the flow, as the final indices are integers 25 | # The fractional part is used to control the bilinear interpolation. 26 | flow_floor = tf.to_int32(tf.floor(flow_flat)) 27 | bilinear_weights = flow_flat - tf.floor(flow_flat) 28 | 29 | # Construct base indices which are displaced with the flow 30 | pos_x = tf.tile(tf.range(width), [height * num_batch]) 31 | grid_y = tf.tile(tf.expand_dims(tf.range(height), 1), [1, width]) 32 | pos_y = tf.tile(tf.reshape(grid_y, [-1]), [num_batch]) 33 | 34 | x = flow_floor[:, 0] 35 | y = flow_floor[:, 1] 36 | xw = bilinear_weights[:, 0] 37 | yw = bilinear_weights[:, 1] 38 | 39 | # Compute interpolation weights for 4 adjacent pixels 40 | # expand to num_batch * height * width x 1 for broadcasting in add_n below 41 | wa = tf.expand_dims((1 - xw) * (1 - yw), 1) # top left pixel 42 | wb = tf.expand_dims((1 - xw) * yw, 1) # bottom left pixel 43 | wc = tf.expand_dims(xw * (1 - yw), 1) # top right pixel 44 | wd = tf.expand_dims(xw * yw, 1) # bottom right pixel 45 | 46 | x0 = pos_x + x 47 | x1 = x0 + 1 48 | y0 = pos_y + y 49 | y1 = y0 + 1 50 | 51 | x0 = tf.clip_by_value(x0, zero, max_x) 52 | x1 = tf.clip_by_value(x1, zero, max_x) 53 | y0 = tf.clip_by_value(y0, zero, max_y) 54 | y1 = tf.clip_by_value(y1, zero, max_y) 55 | 56 | dim1 = width * height 57 | batch_offsets = tf.range(num_batch) * dim1 58 | base_grid = tf.tile(tf.expand_dims(batch_offsets, 1), [1, dim1]) 59 | base = tf.reshape(base_grid, [-1]) 60 | 61 | base_y0 = base + y0 * width 62 | base_y1 = base + y1 * width 63 | idx_a = base_y0 + x0 64 | idx_b = base_y1 + x0 65 | idx_c = base_y0 + x1 66 | idx_d = base_y1 + x1 67 | 68 | Ia = tf.gather(im_flat, idx_a) 69 | Ib = tf.gather(im_flat, idx_b) 70 | Ic = tf.gather(im_flat, idx_c) 71 | Id = tf.gather(im_flat, idx_d) 72 | 73 | warped_flat = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) 74 | warped = tf.reshape(warped_flat, [num_batch, height, width, channels]) 75 | 76 | return warped 77 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/input.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | from .augment import random_crop 8 | 9 | 10 | def resize_input(t, height, width, resized_h, resized_w): 11 | # Undo old resizing and apply bilinear 12 | t = tf.reshape(t, [resized_h, resized_w, 3]) 13 | t = tf.expand_dims(tf.image.resize_image_with_crop_or_pad(t, height, width), 0) 14 | return tf.image.resize_bilinear(t, [resized_h, resized_w]) 15 | 16 | 17 | def resize_output_crop(t, height, width, channels): 18 | _, oldh, oldw, c = tf.unstack(tf.shape(t)) 19 | t = tf.reshape(t, [oldh, oldw, c]) 20 | t = tf.image.resize_image_with_crop_or_pad(t, height, width) 21 | return tf.reshape(t, [1, height, width, channels]) 22 | 23 | 24 | def resize_output(t, height, width, channels): 25 | return tf.image.resize_bilinear(t, [height, width]) 26 | 27 | 28 | def resize_output_flow(t, height, width, channels): 29 | batch, old_height, old_width, _ = tf.unstack(tf.shape(t), num=4) 30 | t = tf.image.resize_bilinear(t, [height, width]) 31 | u, v = tf.unstack(t, axis=3) 32 | u *= tf.cast(width, tf.float32) / tf.cast(old_width, tf.float32) 33 | v *= tf.cast(height, tf.float32) / tf.cast(old_height, tf.float32) 34 | return tf.reshape(tf.stack([u, v], axis=3), [batch, height, width, 2]) 35 | 36 | 37 | def frame_name_to_num(name): 38 | stripped = name.split('.')[0].lstrip('0') 39 | if stripped == '': 40 | return 0 41 | return int(stripped) 42 | 43 | 44 | class Input(): 45 | mean = [104.920005, 110.1753, 114.785955] 46 | stddev = 1 / 0.0039216 47 | 48 | def __init__(self, data, batch_size, dims, *, 49 | num_threads=1, normalize=True, 50 | skipped_frames=False): 51 | assert len(dims) == 2 52 | self.data = data 53 | self.dims = dims 54 | self.batch_size = batch_size 55 | self.num_threads = num_threads 56 | self.normalize = normalize 57 | self.skipped_frames = skipped_frames 58 | 59 | def _resize_crop_or_pad(self, tensor): 60 | height, width = self.dims 61 | # return tf.image.resize_bilinear(tf.expand_dims(tensor, 0), [height, width]) 62 | return tf.image.resize_image_with_crop_or_pad(tensor, height, width) 63 | 64 | def _resize_image_fixed(self, image): 65 | height, width = self.dims 66 | return tf.reshape(self._resize_crop_or_pad(image), [height, width, 3]) 67 | 68 | def _normalize_image(self, image): 69 | return (image - self.mean) / self.stddev 70 | 71 | def _preprocess_image(self, image): 72 | image = self._resize_image_fixed(image) 73 | if self.normalize: 74 | image = self._normalize_image(image) 75 | return image 76 | 77 | def _input_images(self, image_dir, hold_out_inv=None): 78 | """Assumes that paired images are next to each other after ordering the 79 | files. 80 | """ 81 | image_dir = os.path.join(self.data.current_dir, image_dir) 82 | 83 | filenames_1 = [] 84 | filenames_2 = [] 85 | image_files = os.listdir(image_dir) 86 | image_files.sort() 87 | 88 | assert len(image_files) % 2 == 0, 'expected pairs of images' 89 | 90 | for i in range(len(image_files) // 2): 91 | filenames_1.append(os.path.join(image_dir, image_files[i * 2])) 92 | filenames_2.append(os.path.join(image_dir, image_files[i * 2 + 1])) 93 | 94 | if hold_out_inv is not None: 95 | filenames = list(zip(filenames_1, filenames_2)) 96 | random.seed(0) 97 | random.shuffle(filenames) 98 | filenames = filenames[:hold_out_inv] 99 | 100 | filenames_1, filenames_2 = zip(*filenames) 101 | filenames_1 = list(filenames_1) 102 | filenames_2 = list(filenames_2) 103 | 104 | input_1 = read_png_image(filenames_1, 1) 105 | input_2 = read_png_image(filenames_2, 1) 106 | image_1 = self._preprocess_image(input_1) 107 | image_2 = self._preprocess_image(input_2) 108 | return tf.shape(input_1), image_1, image_2 109 | 110 | def _input_test(self, image_dir, hold_out_inv=None): 111 | input_shape, im1, im2 = self._input_images(image_dir, hold_out_inv) 112 | return tf.train.batch( 113 | [im1, im2, input_shape], 114 | batch_size=self.batch_size, 115 | num_threads=self.num_threads, 116 | allow_smaller_final_batch=True) 117 | 118 | def get_normalization(self): 119 | return self.mean, self.stddev 120 | 121 | def input_raw(self, swap_images=True, sequence=True, 122 | needs_crop=True, shift=0, seed=0, 123 | center_crop=False, skip=0): 124 | """Constructs input of raw data. 125 | 126 | Args: 127 | sequence: Assumes that image file order in data_dirs corresponds to 128 | temporal order, if True. Otherwise, assumes uncorrelated pairs of 129 | images in lexicographical ordering. 130 | shift: number of examples to shift the input queue by. 131 | Useful to resume training. 132 | swap_images: for each pair (im1, im2), also include (im2, im1) 133 | seed: seed for filename shuffling. 134 | Returns: 135 | image_1: batch of first images 136 | image_2: batch of second images 137 | """ 138 | if not isinstance(skip, list): 139 | skip = [skip] 140 | 141 | data_dirs = self.data.get_raw_dirs() 142 | height, width = self.dims 143 | #assert batch_size % 2 == 0 144 | 145 | filenames = [] 146 | for dir_path in data_dirs: 147 | files = os.listdir(dir_path) 148 | files.sort() 149 | if sequence: 150 | steps = [1 + s for s in skip] 151 | stops = [len(files) - s for s in steps] 152 | else: 153 | steps = [2] 154 | stops = [len(files)] 155 | assert len(files) % 2 == 0 156 | for step, stop in zip(steps, stops): 157 | for i in range(0, stop, step): 158 | if self.skipped_frames and sequence: 159 | assert step == 1 160 | num_first = frame_name_to_num(files[i]) 161 | num_second = frame_name_to_num(files[i+1]) 162 | if num_first + 1 != num_second: 163 | continue 164 | fn1 = os.path.join(dir_path, files[i]) 165 | fn2 = os.path.join(dir_path, files[i + 1]) 166 | filenames.append((fn1, fn2)) 167 | 168 | random.seed(seed) 169 | random.shuffle(filenames) 170 | print("Training on {} frame pairs.".format(len(filenames))) 171 | 172 | filenames_extended = [] 173 | for fn1, fn2 in filenames: 174 | filenames_extended.append((fn1, fn2)) 175 | if swap_images: 176 | filenames_extended.append((fn2, fn1)) 177 | 178 | shift = shift % len(filenames_extended) 179 | filenames_extended = list(np.roll(filenames_extended, shift)) 180 | 181 | 182 | filenames_1, filenames_2 = zip(*filenames_extended) 183 | filenames_1 = list(filenames_1) 184 | filenames_2 = list(filenames_2) 185 | 186 | with tf.variable_scope('train_inputs'): 187 | image_1 = read_png_image(filenames_1) 188 | image_2 = read_png_image(filenames_2) 189 | 190 | if needs_crop: 191 | #if center_crop: 192 | # image_1 = tf.image.resize_image_with_crop_or_pad(image_1, height, width) 193 | # image_2 = tf.image.resize_image_with_crop_or_pad(image_1, height, width) 194 | #else: 195 | image_1, image_2 = random_crop([image_1, image_2], [height, width, 3]) 196 | else: 197 | image_1 = tf.reshape(image_1, [height, width, 3]) 198 | image_2 = tf.reshape(image_2, [height, width, 3]) 199 | 200 | if self.normalize: 201 | image_1 = self._normalize_image(image_1) 202 | image_2 = self._normalize_image(image_2) 203 | 204 | return tf.train.batch( 205 | [image_1, image_2], 206 | batch_size=self.batch_size, 207 | num_threads=self.num_threads) 208 | 209 | 210 | def read_png_image(filenames, num_epochs=None): 211 | """Given a list of filenames, constructs a reader op for images.""" 212 | filename_queue = tf.train.string_input_producer(filenames, 213 | shuffle=False, capacity=len(filenames)) 214 | reader = tf.WholeFileReader() 215 | _, value = reader.read(filename_queue) 216 | image_uint8 = tf.image.decode_png(value, channels=3) 217 | image = tf.cast(image_uint8, tf.float32) 218 | return image 219 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/spatial_transformer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | import tensorflow as tf 16 | 17 | 18 | def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs): 19 | """Spatial Transformer Layer 20 | Implements a spatial transformer layer as described in [1]_. 21 | Based on [2]_ and edited by David Dao for Tensorflow. 22 | Parameters 23 | ---------- 24 | U : float 25 | The output of a convolutional net should have the 26 | shape [num_batch, height, width, num_channels]. 27 | theta: float 28 | The output of the 29 | localisation network should be [num_batch, 6]. 30 | out_size: tuple of two ints 31 | The size of the output of the network (height, width) 32 | References 33 | ---------- 34 | .. [1] Spatial Transformer Networks 35 | Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu 36 | Submitted on 5 Jun 2015 37 | .. [2] https://github.com/skaae/transformer_network/blob/master/transformerlayer.py 38 | Notes 39 | ----- 40 | To initialize the network to the identity transform init 41 | ``theta`` to : 42 | identity = np.array([[1., 0., 0.], 43 | [0., 1., 0.]]) 44 | identity = identity.flatten() 45 | theta = tf.Variable(initial_value=identity) 46 | """ 47 | 48 | def _repeat(x, n_repeats): 49 | with tf.variable_scope('_repeat'): 50 | rep = tf.transpose( 51 | tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0]) 52 | rep = tf.cast(rep, 'int32') 53 | x = tf.matmul(tf.reshape(x, (-1, 1)), rep) 54 | return tf.reshape(x, [-1]) 55 | 56 | def _interpolate(im, x, y, out_size): 57 | with tf.variable_scope('_interpolate'): 58 | # constants 59 | num_batch = tf.shape(im)[0] 60 | height = tf.shape(im)[1] 61 | width = tf.shape(im)[2] 62 | channels = tf.shape(im)[3] 63 | 64 | x = tf.cast(x, 'float32') 65 | y = tf.cast(y, 'float32') 66 | height_f = tf.cast(height, 'float32') 67 | width_f = tf.cast(width, 'float32') 68 | out_height = out_size[0] 69 | out_width = out_size[1] 70 | zero = tf.zeros([], dtype='int32') 71 | max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') 72 | max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') 73 | 74 | # scale indices from [-1, 1] to [0, width/height] 75 | x = (x + 1.0)*(width_f) / 2.0 76 | y = (y + 1.0)*(height_f) / 2.0 77 | 78 | # do sampling 79 | x0 = tf.cast(tf.floor(x), 'int32') 80 | x1 = x0 + 1 81 | y0 = tf.cast(tf.floor(y), 'int32') 82 | y1 = y0 + 1 83 | 84 | x0 = tf.clip_by_value(x0, zero, max_x) 85 | x1 = tf.clip_by_value(x1, zero, max_x) 86 | y0 = tf.clip_by_value(y0, zero, max_y) 87 | y1 = tf.clip_by_value(y1, zero, max_y) 88 | dim2 = width 89 | dim1 = width*height 90 | base = _repeat(tf.range(num_batch)*dim1, out_height*out_width) 91 | base_y0 = base + y0*dim2 92 | base_y1 = base + y1*dim2 93 | idx_a = base_y0 + x0 94 | idx_b = base_y1 + x0 95 | idx_c = base_y0 + x1 96 | idx_d = base_y1 + x1 97 | 98 | # use indices to lookup pixels in the flat image and restore 99 | # channels dim 100 | im_flat = tf.reshape(im, tf.stack([-1, channels])) 101 | im_flat = tf.cast(im_flat, 'float32') 102 | Ia = tf.gather(im_flat, idx_a) 103 | Ib = tf.gather(im_flat, idx_b) 104 | Ic = tf.gather(im_flat, idx_c) 105 | Id = tf.gather(im_flat, idx_d) 106 | 107 | # and finally calculate interpolated values 108 | x0_f = tf.cast(x0, 'float32') 109 | x1_f = tf.cast(x1, 'float32') 110 | y0_f = tf.cast(y0, 'float32') 111 | y1_f = tf.cast(y1, 'float32') 112 | wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1) 113 | wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1) 114 | wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1) 115 | wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1) 116 | output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id]) 117 | return output 118 | 119 | def _meshgrid(height, width): 120 | with tf.variable_scope('_meshgrid'): 121 | # This should be equivalent to: 122 | # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), 123 | # np.linspace(-1, 1, height)) 124 | # ones = np.ones(np.prod(x_t.shape)) 125 | # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) 126 | x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), 127 | tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) 128 | y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), 129 | tf.ones(shape=tf.stack([1, width]))) 130 | 131 | x_t_flat = tf.reshape(x_t, (1, -1)) 132 | y_t_flat = tf.reshape(y_t, (1, -1)) 133 | 134 | ones = tf.ones_like(x_t_flat) 135 | grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones]) 136 | return grid 137 | 138 | def _transform(theta, input_dim, out_size): 139 | with tf.variable_scope('_transform'): 140 | num_batch = tf.shape(input_dim)[0] 141 | height = tf.shape(input_dim)[1] 142 | width = tf.shape(input_dim)[2] 143 | num_channels = tf.shape(input_dim)[3] 144 | theta = tf.reshape(theta, (-1, 2, 3)) 145 | theta = tf.cast(theta, 'float32') 146 | 147 | # grid of (x_t, y_t, 1), eq (1) in ref [1] 148 | height_f = tf.cast(height, 'float32') 149 | width_f = tf.cast(width, 'float32') 150 | out_height = out_size[0] 151 | out_width = out_size[1] 152 | grid = _meshgrid(out_height, out_width) 153 | grid = tf.expand_dims(grid, 0) 154 | grid = tf.reshape(grid, [-1]) 155 | grid = tf.tile(grid, tf.stack([num_batch])) 156 | grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) 157 | 158 | # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) 159 | T_g = tf.matmul(theta, grid) 160 | x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) 161 | y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) 162 | x_s_flat = tf.reshape(x_s, [-1]) 163 | y_s_flat = tf.reshape(y_s, [-1]) 164 | 165 | input_transformed = _interpolate( 166 | input_dim, x_s_flat, y_s_flat, 167 | out_size) 168 | 169 | output = tf.reshape( 170 | input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) 171 | return output 172 | 173 | with tf.variable_scope(name): 174 | output = _transform(theta, U, out_size) 175 | return output 176 | 177 | 178 | def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer'): 179 | """Batch Spatial Transformer Layer 180 | Parameters 181 | ---------- 182 | U : float 183 | tensor of inputs [num_batch,height,width,num_channels] 184 | thetas : float 185 | a set of transformations for each input [num_batch,num_transforms,6] 186 | out_size : int 187 | the size of the output [out_height,out_width] 188 | Returns: float 189 | Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels] 190 | """ 191 | with tf.variable_scope(name): 192 | num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2]) 193 | indices = [[i]*num_transforms for i in xrange(num_batch)] 194 | input_repeated = tf.gather(U, tf.reshape(indices, [-1])) 195 | return transformer(input_repeated, thetas, out_size) 196 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/supervised.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | 5 | from .augment import random_photometric 6 | from .flow_util import flow_to_color 7 | from .losses import charbonnier_loss 8 | from .flownet import flownet 9 | from .unsupervised import _track_image, _track_loss, FLOW_SCALE 10 | 11 | 12 | def supervised_loss(batch, params, normalization=None): 13 | channel_mean = tf.constant(normalization[0]) / 255.0 14 | im1, im2, flow_gt, mask_gt = batch 15 | im1 = im1 / 255.0 16 | im2 = im2 / 255.0 17 | im_shape = tf.shape(im1)[1:3] 18 | 19 | # ------------------------------------------------------------------------- 20 | 21 | im1_photo, im2_photo = random_photometric( 22 | [im1, im2], 23 | noise_stddev=0.04, min_contrast=-0.3, max_contrast=0.3, 24 | brightness_stddev=0.02, min_colour=0.9, max_colour=1.1, 25 | min_gamma=0.7, max_gamma=1.5) 26 | 27 | _track_image(im1_photo, 'im1_photo') 28 | _track_image(im2_photo, 'im2_photo') 29 | _track_image(flow_to_color(flow_gt), 'flow_gt') 30 | _track_image(mask_gt, 'mask_gt') 31 | 32 | # Images for neural network input with mean-zero values in [-1, 1] 33 | im1_photo = im1_photo - channel_mean 34 | im2_photo = im2_photo - channel_mean 35 | 36 | flownet_spec = params.get('flownet', 'S') 37 | full_resolution = params.get('full_res') 38 | train_all = params.get('train_all') 39 | # ------------------------------------------------------------------------- 40 | # FlowNet 41 | flows_fw = flownet(im1_photo, im2_photo, 42 | flownet_spec=flownet_spec, 43 | full_resolution=full_resolution, 44 | train_all=train_all) 45 | 46 | if not train_all: 47 | flows_fw = [flows_fw[-1]] 48 | final_loss = 0.0 49 | for i, net_flows in enumerate(reversed(flows_fw)): 50 | flow_fw = net_flows[0] 51 | if params.get('full_res'): 52 | final_flow_fw = flow_fw * FLOW_SCALE * 4 53 | else: 54 | final_flow_fw = tf.image.resize_bilinear(flow_fw, im_shape) * FLOW_SCALE * 4 55 | _track_image(flow_to_color(final_flow_fw), 'flow_pred_' + str(i)) 56 | 57 | net_loss = charbonnier_loss(final_flow_fw - flow_gt, mask_gt) 58 | final_loss += net_loss / (2 ** i) 59 | 60 | regularization_loss = tf.add_n(slim.losses.get_regularization_losses()) 61 | final_loss += regularization_loss 62 | _track_loss(regularization_loss, 'loss/regularization') 63 | _track_loss(final_loss, 'loss/combined') 64 | 65 | return final_loss 66 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/unsupervised.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | 5 | from .augment import random_affine, random_photometric 6 | from .flow_util import flow_to_color 7 | from .util import resize_area, resize_bilinear 8 | from .losses import compute_losses, create_border_mask 9 | from ..ops import downsample 10 | from .image_warp import image_warp 11 | from .flownet import flownet, FLOW_SCALE 12 | 13 | 14 | # REGISTER ALL POSSIBLE LOSS TERMS 15 | LOSSES = ['occ', 'sym', 'fb', 'grad', 'ternary', 'photo', 'smooth_1st', 'smooth_2nd'] 16 | 17 | 18 | def _track_loss(op, name): 19 | tf.add_to_collection('losses', tf.identity(op, name=name)) 20 | 21 | 22 | def _track_image(op, name): 23 | name = 'train/' + name 24 | tf.add_to_collection('train_images', tf.identity(op, name=name)) 25 | 26 | 27 | def unsupervised_loss(batch, params, normalization=None, augment=True, 28 | return_flow=False): 29 | channel_mean = tf.constant(normalization[0]) / 255.0 30 | im1, im2 = batch 31 | im1 = im1 / 255.0 32 | im2 = im2 / 255.0 33 | im_shape = tf.shape(im1)[1:3] 34 | 35 | # ------------------------------------------------------------------------- 36 | # Data & mask augmentation 37 | border_mask = create_border_mask(im1, 0.1) 38 | 39 | if augment: 40 | im1_geo, im2_geo, border_mask_global = random_affine( 41 | [im1, im2, border_mask], 42 | horizontal_flipping=True, 43 | min_scale=0.9, max_scale=1.1 44 | ) 45 | 46 | # augment locally 47 | im2_geo, border_mask_local = random_affine( 48 | [im2_geo, border_mask], 49 | min_scale=0.9, max_scale=1.1 50 | ) 51 | border_mask = border_mask_local * border_mask_global 52 | 53 | im1_photo, im2_photo = random_photometric( 54 | [im1_geo, im2_geo], 55 | noise_stddev=0.04, min_contrast=-0.3, max_contrast=0.3, 56 | brightness_stddev=0.02, min_colour=0.9, max_colour=1.1, 57 | min_gamma=0.7, max_gamma=1.5) 58 | 59 | _track_image(im1_photo, 'augmented1') 60 | _track_image(im2_photo, 'augmented2') 61 | else: 62 | im1_geo, im2_geo = im1, im2 63 | im1_photo, im2_photo = im1, im2 64 | 65 | # Images for loss comparisons with values in [0, 1] (scale to original using * 255) 66 | im1_norm = im1_geo 67 | im2_norm = im2_geo 68 | # Images for neural network input with mean-zero values in [-1, 1] 69 | im1_photo = im1_photo - channel_mean 70 | im2_photo = im2_photo - channel_mean 71 | 72 | flownet_spec = params.get('flownet', 'S') 73 | full_resolution = params.get('full_res') 74 | train_all = params.get('train_all') 75 | 76 | flows_fw, flows_bw = flownet(im1_photo, im2_photo, 77 | flownet_spec=flownet_spec, 78 | full_resolution=full_resolution, 79 | backward_flow=True, 80 | train_all=train_all) 81 | 82 | flows_fw = flows_fw[-1] 83 | flows_bw = flows_bw[-1] 84 | 85 | # ------------------------------------------------------------------------- 86 | # Losses 87 | layer_weights = [12.7, 4.35, 3.9, 3.4, 1.1] 88 | layer_patch_distances = [3, 2, 2, 1, 1] 89 | if full_resolution: 90 | layer_weights = [12.7, 5.5, 5.0, 4.35, 3.9, 3.4, 1.1] 91 | layer_patch_distances = [3, 3] + layer_patch_distances 92 | im1_s = im1_norm 93 | im2_s = im2_norm 94 | mask_s = border_mask 95 | final_flow_scale = FLOW_SCALE * 4 96 | final_flow_fw = flows_fw[0] * final_flow_scale 97 | final_flow_bw = flows_bw[0] * final_flow_scale 98 | else: 99 | im1_s = downsample(im1_norm, 4) 100 | im2_s = downsample(im2_norm, 4) 101 | mask_s = downsample(border_mask, 4) 102 | final_flow_scale = FLOW_SCALE 103 | final_flow_fw = tf.image.resize_bilinear(flows_fw[0], im_shape) * final_flow_scale * 4 104 | final_flow_bw = tf.image.resize_bilinear(flows_bw[0], im_shape) * final_flow_scale * 4 105 | 106 | combined_losses = dict() 107 | combined_loss = 0.0 108 | for loss in LOSSES: 109 | combined_losses[loss] = 0.0 110 | 111 | if params.get('pyramid_loss'): 112 | flow_enum = enumerate(zip(flows_fw, flows_bw)) 113 | else: 114 | flow_enum = [(0, (flows_fw[0], flows_bw[0]))] 115 | 116 | for i, flow_pair in flow_enum: 117 | layer_name = "loss" + str(i + 2) 118 | 119 | flow_scale = final_flow_scale / (2 ** i) 120 | 121 | with tf.variable_scope(layer_name): 122 | layer_weight = layer_weights[i] 123 | flow_fw_s, flow_bw_s = flow_pair 124 | 125 | mask_occlusion = params.get('mask_occlusion', '') 126 | assert mask_occlusion in ['fb', 'disocc', ''] 127 | 128 | losses = compute_losses(im1_s, im2_s, 129 | flow_fw_s * flow_scale, flow_bw_s * flow_scale, 130 | border_mask=mask_s if params.get('border_mask') else None, 131 | mask_occlusion=mask_occlusion, 132 | data_max_distance=layer_patch_distances[i]) 133 | 134 | layer_loss = 0.0 135 | 136 | for loss in LOSSES: 137 | weight_name = loss + '_weight' 138 | if params.get(weight_name): 139 | _track_loss(losses[loss], loss) 140 | layer_loss += params[weight_name] * losses[loss] 141 | combined_losses[loss] += layer_weight * losses[loss] 142 | 143 | combined_loss += layer_weight * layer_loss 144 | 145 | im1_s = downsample(im1_s, 2) 146 | im2_s = downsample(im2_s, 2) 147 | mask_s = downsample(mask_s, 2) 148 | 149 | regularization_loss = tf.losses.get_regularization_loss() 150 | final_loss = combined_loss + regularization_loss 151 | 152 | _track_loss(final_loss, 'loss/combined') 153 | 154 | for loss in LOSSES: 155 | _track_loss(combined_losses[loss], 'loss/' + loss) 156 | weight_name = loss + '_weight' 157 | if params.get(weight_name): 158 | weight = tf.identity(params[weight_name], name='weight/' + loss) 159 | tf.add_to_collection('params', weight) 160 | 161 | if not return_flow: 162 | return final_loss 163 | 164 | return final_loss, final_flow_fw, final_flow_bw 165 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/core/util.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def summarized_placeholder(name, prefix=None, key=tf.GraphKeys.SUMMARIES): 5 | prefix = '' if not prefix else prefix + '/' 6 | p = tf.placeholder(tf.float32, name=name) 7 | tf.summary.scalar(prefix + name, p, collections=[key]) 8 | return p 9 | 10 | 11 | def resize_area(tensor, like): 12 | _, h, w, _ = tf.unstack(tf.shape(like)) 13 | return tf.stop_gradient(tf.image.resize_area(tensor, [h, w])) 14 | 15 | 16 | def resize_bilinear(tensor, like): 17 | _, h, w, _ = tf.unstack(tf.shape(like)) 18 | return tf.stop_gradient(tf.image.resize_bilinear(tensor, [h, w])) 19 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/ops.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import tensorflow as tf 4 | import subprocess 5 | from tensorflow.python.framework import ops 6 | 7 | 8 | # Register ops for compilation here 9 | OP_NAMES = ['backward_warp', 'downsample', 'correlation', 'forward_warp'] 10 | 11 | 12 | cwd = os.getcwd() 13 | os.chdir(os.path.dirname(os.path.realpath(__file__))) 14 | os.chdir("../../ops") 15 | 16 | def compile(op=None): 17 | if op is not None: 18 | to_compile = [op] 19 | else: 20 | to_compile = OP_NAMES 21 | 22 | tf_inc = tf.sysconfig.get_include() 23 | for n in to_compile: 24 | base = n + "_op" 25 | fn_cu_cc = base + ".cu.cc" 26 | fn_cu_o = base + ".cu.o" 27 | fn_cc = base + ".cc" 28 | fn_o = base + ".o" 29 | fn_so = base + ".so" 30 | 31 | cuda_lib64_path_arg = "-L /usr/local/cuda-8.0/lib64" 32 | nvcc_cmd = "nvcc -std=c++11 -c -gencode=arch=compute_30,code=sm_30 -o {} -I {} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC" 33 | nvcc_cmd = nvcc_cmd.format(" ".join([fn_cu_o, fn_cu_cc]), 34 | tf_inc) 35 | subprocess.check_output(nvcc_cmd, shell=True) 36 | 37 | gcc_cmd = "{} -std=c++11 -shared -o {} -I {} -fPIC -lcudart -D GOOGLE_CUDA=1 {}" 38 | gcc_cmd = gcc_cmd.format('g++', 39 | " ".join([fn_so, fn_cu_o, fn_cc]), 40 | tf_inc, 41 | cuda_lib64_path_arg) 42 | subprocess.check_output(gcc_cmd, shell=True) 43 | 44 | 45 | if __name__ == "__main__": 46 | compile() 47 | 48 | 49 | module = sys.modules[__name__] 50 | for n in OP_NAMES: 51 | lib_path = './{}_op.so'.format(n) 52 | try: 53 | op_lib = tf.load_op_library(lib_path) 54 | except: 55 | compile(n) 56 | op_lib = tf.load_op_library(lib_path) 57 | setattr(module, '_' + n + '_module', op_lib) 58 | 59 | 60 | os.chdir(cwd) 61 | 62 | 63 | def correlation(first, second, **kwargs): 64 | return _correlation_module.correlation(first, second, **kwargs)[0] 65 | 66 | 67 | backward_warp = _backward_warp_module.backward_warp 68 | downsample = _downsample_module.downsample 69 | forward_warp = _forward_warp_module.forward_warp 70 | 71 | 72 | # Register op gradients 73 | 74 | @ops.RegisterGradient("BackwardWarp") 75 | def _BackwardWarpGrad(op, grad): 76 | grad0 = _backward_warp_module.backward_warp_grad( 77 | grad, op.inputs[0], op.inputs[1]) 78 | return [None, grad0] 79 | 80 | 81 | @ops.RegisterGradient("ForwardWarp") 82 | def _ForwardWarpGrad(op, grad): 83 | grad0 = _forward_warp_module.forward_warp_grad( 84 | grad, op.inputs[0]) 85 | return [grad0] 86 | 87 | 88 | @ops.RegisterGradient("Correlation") 89 | def _CorrelationGrad(op, in_grad, in_grad1, in_grad2): 90 | grad0, grad1 = _correlation_module.correlation_grad( 91 | in_grad, op.inputs[0], op.inputs[1], 92 | op.outputs[1], op.outputs[2], 93 | kernel_size=op.get_attr('kernel_size'), 94 | max_displacement=op.get_attr('max_displacement'), 95 | pad=op.get_attr('pad'), 96 | stride_1=op.get_attr('stride_1'), 97 | stride_2=op.get_attr('stride_2')) 98 | return [grad0, grad1] 99 | 100 | 101 | ops.NotDifferentiable("Downsample") 102 | -------------------------------------------------------------------------------- /core/UnFlow/src/e2eflow/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import configparser 4 | from shutil import rmtree 5 | import tensorflow as tf 6 | 7 | 8 | CONFIG_PATH = '../config.ini' 9 | #TMP_DIR = '/tmp/e2eflow' 10 | 11 | 12 | def upload_gdrive(upload_dir, gdrive_filename): 13 | # search for file in gdrive and capture id if it already exists 14 | lst_lines = subprocess.Popen(['../scripts/gdrive', 'list'], 15 | stdout=subprocess.PIPE) 16 | existing_id = None 17 | for line in lst_lines.stdout: 18 | splits = line.split() 19 | if str(splits[1], 'utf-8') == gdrive_filename: 20 | existing_id = str(splits[0], 'utf-8') 21 | tmp_path = os.path.join('/tmp', gdrive_filename) 22 | if os.path.isfile(tmp_path): 23 | os.remove(tmp_path) 24 | p = subprocess.Popen(['/usr/bin/zip', '-r', tmp_path, upload_dir]) 25 | p.wait() 26 | if existing_id: 27 | p = subprocess.Popen(['../scripts/gdrive', 'update', 28 | existing_id, tmp_path]) 29 | else: 30 | p = subprocess.Popen(['../scripts/gdrive', 'upload', 31 | '--name', gdrive_filename, 32 | tmp_path]) 33 | p.wait() 34 | os.remove(tmp_path) 35 | 36 | 37 | def config_dict(config_path=CONFIG_PATH): 38 | """Returns the config as dictionary, 39 | where the elements have intuitively correct types. 40 | """ 41 | 42 | config = configparser.ConfigParser() 43 | config.read(config_path) 44 | 45 | d = dict() 46 | for section_key in config.sections(): 47 | sd = dict() 48 | section = config[section_key] 49 | for key in section: 50 | val = section[key] 51 | try: 52 | sd[key] = int(val) 53 | except ValueError: 54 | try: 55 | sd[key] = float(val) 56 | except ValueError: 57 | try: 58 | sd[key] = section.getboolean(key) 59 | except ValueError: 60 | sd[key] = val 61 | d[section_key] = sd 62 | return d 63 | 64 | 65 | def convert_input_strings(config_dct, dirs): 66 | if 'manual_decay_iters' in config_dct and 'manual_decay_lrs' in config_dct: 67 | iters_lst = config_dct['manual_decay_iters'].split(',') 68 | lrs_lst = config_dct['manual_decay_lrs'].split(',') 69 | iters_lst = [int(i) for i in iters_lst] 70 | lrs_lst = [float(l) for l in lrs_lst] 71 | config_dct['manual_decay_iters'] = iters_lst 72 | config_dct['manual_decay_lrs'] = lrs_lst 73 | config_dct['num_iters'] = sum(iters_lst) 74 | 75 | if 'finetune' in config_dct: 76 | finetune = [] 77 | for name in config_dct['finetune'].split(","): 78 | ckpt_dir = os.path.join(dirs['checkpoints'], name) 79 | ckpt = tf.train.get_checkpoint_state(ckpt_dir) 80 | if ckpt is None: 81 | ckpt_dir = os.path.join(dirs['log'], 'ex', name) 82 | ckpt = tf.train.get_checkpoint_state(ckpt_dir) 83 | assert ckpt, "Could not load experiment " + name 84 | finetune.append(ckpt) 85 | config_dct['finetune'] = finetune 86 | 87 | 88 | def tryremove(name, file=False): 89 | try: 90 | if file: 91 | os.remove(name) 92 | else: 93 | rmtree(name) 94 | except OSError: 95 | pass 96 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | from .DFLearner import DFLearner 3 | from .flowlib import * 4 | from .UnFlow import * 5 | -------------------------------------------------------------------------------- /core/data_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import random 4 | import tensorflow as tf 5 | 6 | class DataLoader(object): 7 | def __init__(self, 8 | dataset_dir=None, 9 | batch_size=None, 10 | img_height=None, 11 | img_width=None, 12 | num_source=None, 13 | num_scales=None): 14 | self.dataset_dir = dataset_dir 15 | self.batch_size = batch_size 16 | self.img_height = img_height 17 | self.img_width = img_width 18 | self.num_source = num_source 19 | self.num_scales = num_scales 20 | 21 | def load_train_batch(self, is_training=True): 22 | """Load a batch of training instances. 23 | """ 24 | seed = random.randint(0, 2**31 - 1) 25 | # Load the list of training files into queues 26 | file_list = self.format_file_list(self.dataset_dir, 'train') 27 | image_paths_queue = tf.train.string_input_producer( 28 | file_list['image_file_list'], 29 | seed=seed, 30 | shuffle=True) 31 | cam_paths_queue = tf.train.string_input_producer( 32 | file_list['cam_file_list'], 33 | seed=seed, 34 | shuffle=True) 35 | self.steps_per_epoch = int( 36 | len(file_list['image_file_list'])//self.batch_size) 37 | 38 | # Load images 39 | img_reader = tf.WholeFileReader() 40 | _, image_contents = img_reader.read(image_paths_queue) 41 | image_seq = tf.image.decode_jpeg(image_contents) 42 | tgt_image, src_image_stack = \ 43 | self.unpack_image_sequence( 44 | image_seq, self.img_height, self.img_width, self.num_source) 45 | 46 | # Load camera intrinsics 47 | cam_reader = tf.TextLineReader() 48 | _, raw_cam_contents = cam_reader.read(cam_paths_queue) 49 | rec_def = [] 50 | for i in range(9): 51 | rec_def.append([1.]) 52 | raw_cam_vec = tf.decode_csv(raw_cam_contents, 53 | record_defaults=rec_def) 54 | raw_cam_vec = tf.stack(raw_cam_vec) 55 | intrinsics = tf.reshape(raw_cam_vec, [3, 3]) 56 | 57 | # Form training batches 58 | src_image_stack, tgt_image, intrinsics = \ 59 | tf.train.batch([src_image_stack, tgt_image, intrinsics], 60 | batch_size=self.batch_size) 61 | 62 | # Data augmentation 63 | image_all = tf.concat([tgt_image, src_image_stack], axis=3) 64 | image_all, intrinsics, image_augall = self.data_augmentation( 65 | image_all, intrinsics, self.img_height, self.img_width) 66 | tgt_image = image_all[:, :, :, :3] 67 | src_image_stack = image_all[:, :, :, 3:] 68 | tgt_image_aug = image_augall[:, :, :, :3] 69 | src_image_stack_aug = image_augall[:, :, :, 3:] 70 | intrinsics = self.get_multi_scale_intrinsics( 71 | intrinsics, self.num_scales) 72 | 73 | if is_training: 74 | return tgt_image, src_image_stack, intrinsics, tgt_image_aug, src_image_stack_aug 75 | else: 76 | return tgt_image, src_image_stack, intrinsics 77 | 78 | def make_intrinsics_matrix(self, fx, fy, cx, cy): 79 | # Assumes batch input 80 | batch_size = fx.get_shape().as_list()[0] 81 | zeros = tf.zeros_like(fx) 82 | r1 = tf.stack([fx, zeros, cx], axis=1) 83 | r2 = tf.stack([zeros, fy, cy], axis=1) 84 | r3 = tf.constant([0.,0.,1.], shape=[1, 3]) 85 | r3 = tf.tile(r3, [batch_size, 1]) 86 | intrinsics = tf.stack([r1, r2, r3], axis=1) 87 | return intrinsics 88 | 89 | def data_augmentation(self, im, intrinsics, out_h, out_w): 90 | # Random scaling 91 | def random_scaling(im, intrinsics): 92 | batch_size, in_h, in_w, _ = im.get_shape().as_list() 93 | scaling = tf.random_uniform([2], 1, 1.15) 94 | x_scaling = scaling[0] 95 | y_scaling = scaling[1] 96 | out_h = tf.cast(in_h * y_scaling, dtype=tf.int32) 97 | out_w = tf.cast(in_w * x_scaling, dtype=tf.int32) 98 | im = tf.image.resize_area(im, [out_h, out_w]) 99 | fx = intrinsics[:,0,0] * x_scaling 100 | fy = intrinsics[:,1,1] * y_scaling 101 | cx = intrinsics[:,0,2] * x_scaling 102 | cy = intrinsics[:,1,2] * y_scaling 103 | intrinsics = self.make_intrinsics_matrix(fx, fy, cx, cy) 104 | return im, intrinsics 105 | 106 | # Random cropping 107 | def random_cropping(im, intrinsics, out_h, out_w): 108 | # batch_size, in_h, in_w, _ = im.get_shape().as_list() 109 | batch_size, in_h, in_w, _ = tf.unstack(tf.shape(im)) 110 | offset_y = tf.random_uniform([1], 0, in_h - out_h + 1, dtype=tf.int32)[0] 111 | offset_x = tf.random_uniform([1], 0, in_w - out_w + 1, dtype=tf.int32)[0] 112 | im = tf.image.crop_to_bounding_box( 113 | im, offset_y, offset_x, out_h, out_w) 114 | fx = intrinsics[:,0,0] 115 | fy = intrinsics[:,1,1] 116 | cx = intrinsics[:,0,2] - tf.cast(offset_x, dtype=tf.float32) 117 | cy = intrinsics[:,1,2] - tf.cast(offset_y, dtype=tf.float32) 118 | intrinsics = self.make_intrinsics_matrix(fx, fy, cx, cy) 119 | return im, intrinsics 120 | 121 | # Random photometric augmentation 122 | # Credit: https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/augment.py 123 | def random_photometric(im, noise_stddev=0.04, min_contrast=-0.2, max_contrast=0.2, brightness_stddev=0.02, min_colour=0.9, max_colour=1.1, min_gamma=0.8, max_gamma=1.2): 124 | """ 125 | Applies photometric augmentations to a list of image batches. 126 | Args: 127 | im: list of 3-channel image batches normalized to [0, 1]. 128 | Returns: 129 | Batch of normalized images with photometric augmentations. Has the same shape as the input batch. 130 | """ 131 | batch_size, in_h, in_w, _ = im[0].get_shape().as_list() 132 | 133 | contrast = tf.random_uniform([batch_size, 1], min_contrast, max_contrast) 134 | gamma = tf.random_uniform([batch_size, 1], min_gamma, max_gamma) 135 | gamma_inv = 1.0/gamma 136 | colour = tf.random_uniform([batch_size, 3], min_colour, max_colour) 137 | if noise_stddev > 0.0: 138 | noise = tf.random_normal([batch_size, 1], stddev=noise_stddev) 139 | else: 140 | noise = tf.zeros([batch_size, 1]) 141 | if brightness_stddev > 0.0: 142 | brightness = tf.random_normal([batch_size, 1], stddev=brightness_stddev) 143 | else: 144 | brightness = tf.zeros([batch_size, 1]) 145 | 146 | out = [] 147 | for temp in im: 148 | # Transpose to [height, width, num_batch, channels] 149 | im_re = tf.transpose(temp, [1, 2, 0, 3]) 150 | im_re = (im_re * (contrast + 1.0) + brightness) * colour 151 | im_re = tf.maximum(0.0, tf.minimum(1.0, im_re)) 152 | im_re = tf.pow(im_re, gamma_inv) 153 | im_re = im_re + noise 154 | im_re = tf.maximum(0.0, tf.minimum(1.0, im_re)) 155 | 156 | temp = tf.transpose(im_re, [2, 0, 1, 3]) 157 | temp = tf.stop_gradient(temp) 158 | out.append(temp) 159 | return tf.concat(out, axis=-1) 160 | 161 | im, intrinsics = random_scaling(im, intrinsics) 162 | im, intrinsics = random_cropping(im, intrinsics, out_h, out_w) 163 | # [0, 255] -> [0, 1] 164 | im_photo = im/255. 165 | im_photo = [im_photo[:,:,:,3*i:3*(i+1)] for i in range(self.num_source+1)] 166 | im_photo = random_photometric(im_photo) 167 | # [0, 1] -> [0, 255] 168 | im_photo = im_photo*255. 169 | im = tf.cast(im, dtype=tf.uint8) 170 | im_photo = tf.cast(im_photo, dtype=tf.uint8) 171 | return im, intrinsics, im_photo 172 | 173 | def format_file_list(self, data_root, split): 174 | with open(data_root + '/%s.txt' % split, 'r') as f: 175 | frames = f.readlines() 176 | subfolders = [x.split(' ')[0] for x in frames] 177 | frame_ids = [x.split(' ')[1][:-1] for x in frames] 178 | image_file_list = [os.path.join(data_root, subfolders[i], 179 | frame_ids[i] + '.jpg') for i in range(len(frames))] 180 | cam_file_list = [os.path.join(data_root, subfolders[i], 181 | frame_ids[i] + '_cam.txt') for i in range(len(frames))] 182 | all_list = {} 183 | all_list['image_file_list'] = image_file_list 184 | all_list['cam_file_list'] = cam_file_list 185 | return all_list 186 | 187 | def unpack_image_sequence(self, image_seq, img_height, img_width, num_source): 188 | # Assuming the center image is the target frame 189 | tgt_start_idx = int(img_width * (num_source//2)) 190 | tgt_image = tf.slice(image_seq, 191 | [0, tgt_start_idx, 0], 192 | [-1, img_width, -1]) 193 | # Source frames before the target frame 194 | src_image_1 = tf.slice(image_seq, 195 | [0, 0, 0], 196 | [-1, int(img_width * (num_source//2)), -1]) 197 | # Source frames after the target frame 198 | src_image_2 = tf.slice(image_seq, 199 | [0, int(tgt_start_idx + img_width), 0], 200 | [-1, int(img_width * (num_source//2)), -1]) 201 | src_image_seq = tf.concat([src_image_1, src_image_2], axis=1) 202 | # Stack source frames along the color channels (i.e. [H, W, N*3]) 203 | src_image_stack = tf.concat([tf.slice(src_image_seq, 204 | [0, i*img_width, 0], 205 | [-1, img_width, -1]) 206 | for i in range(num_source)], axis=2) 207 | src_image_stack.set_shape([img_height, 208 | img_width, 209 | num_source * 3]) 210 | tgt_image.set_shape([img_height, img_width, 3]) 211 | return tgt_image, src_image_stack 212 | 213 | def batch_unpack_image_sequence(self, image_seq, img_height, img_width, num_source): 214 | # Assuming the center image is the target frame 215 | tgt_start_idx = int(img_width * (num_source//2)) 216 | tgt_image = tf.slice(image_seq, 217 | [0, 0, tgt_start_idx, 0], 218 | [-1, -1, img_width, -1]) 219 | # Source frames before the target frame 220 | src_image_1 = tf.slice(image_seq, 221 | [0, 0, 0, 0], 222 | [-1, -1, int(img_width * (num_source//2)), -1]) 223 | # Source frames after the target frame 224 | src_image_2 = tf.slice(image_seq, 225 | [0, 0, int(tgt_start_idx + img_width), 0], 226 | [-1, -1, int(img_width * (num_source//2)), -1]) 227 | src_image_seq = tf.concat([src_image_1, src_image_2], axis=2) 228 | # Stack source frames along the color channels (i.e. [B, H, W, N*3]) 229 | src_image_stack = tf.concat([tf.slice(src_image_seq, 230 | [0, 0, i*img_width, 0], 231 | [-1, -1, img_width, -1]) 232 | for i in range(num_source)], axis=3) 233 | return tgt_image, src_image_stack 234 | 235 | def get_multi_scale_intrinsics(self, intrinsics, num_scales): 236 | intrinsics_mscale = [] 237 | # Scale the intrinsics accordingly for each scale 238 | for s in range(num_scales): 239 | fx = intrinsics[:,0,0]/(2 ** s) 240 | fy = intrinsics[:,1,1]/(2 ** s) 241 | cx = intrinsics[:,0,2]/(2 ** s) 242 | cy = intrinsics[:,1,2]/(2 ** s) 243 | intrinsics_mscale.append( 244 | self.make_intrinsics_matrix(fx, fy, cx, cy)) 245 | intrinsics_mscale = tf.stack(intrinsics_mscale, axis=1) 246 | return intrinsics_mscale 247 | -------------------------------------------------------------------------------- /core/nets.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import tensorflow as tf 3 | import tensorflow.contrib.slim as slim 4 | from tensorflow.contrib.layers.python.layers import utils 5 | import numpy as np 6 | 7 | from .utils import flow_inverse_warp 8 | 9 | # Range of disparity/inverse depth values 10 | DISP_SCALING = 10 11 | MIN_DISP = 0.01 12 | 13 | def resize_like(inputs, ref, type='nearest'): 14 | iH, iW = inputs.get_shape()[1], inputs.get_shape()[2] 15 | rH, rW = ref.get_shape()[1], ref.get_shape()[2] 16 | if iH == rH and iW == rW: 17 | return inputs 18 | if type == 'nearest': 19 | return tf.image.resize_nearest_neighbor(inputs, [rH.value, rW.value]) 20 | elif type == 'bilinear': 21 | return tf.image.resize_bilinear(inputs, [rH.value, rW.value]) 22 | 23 | # Reference: https://github.com/sampepose/flownet2-tf/blob/master/src/utils.py 24 | def pad(tensor, num=1): 25 | """ 26 | Pads the given tensor along the height and width dimensions with `num` 0s on each side 27 | """ 28 | return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT") 29 | 30 | def pad_4(tensor, u, b, l, r): 31 | return tf.pad(tensor, [[0, 0], [u, b], [l, r], [0, 0]], 'CONSTANT') 32 | 33 | def antipad(tensor, num=1): 34 | """ 35 | Performs a crop. "padding" for a deconvolutional layer (conv2d tranpose) removes 36 | padding from the output rather than adding it to the input. 37 | """ 38 | batch, h, w, c = tensor.shape.as_list() 39 | return tf.slice(tensor, begin=[0, num, num, 0], size=[batch, h - 2 * num, w - 2 * num, c]) 40 | 41 | def antipad_4(tensor, u, b, l, r): 42 | batch, h, w, c = tensor.shape.as_list() 43 | return tf.slice(tensor, begin=[0, u, l, 0], size=[batch, h - u - b, w - l - r, c]) 44 | 45 | # Reference: https://github.com/scaelles/OSVOS-TensorFlow/blob/master/osvos.py 46 | def crop_features(feature, out_size): 47 | """Crop the center of a feature map 48 | Args: 49 | feature: Feature map to crop 50 | out_size: Size of the output feature map 51 | Returns: 52 | Tensor that performs the cropping 53 | """ 54 | up_size = tf.shape(feature) 55 | ini_w = tf.div(tf.subtract(up_size[1], out_size[1]), 2) 56 | ini_h = tf.div(tf.subtract(up_size[2], out_size[2]), 2) 57 | slice_input = tf.slice(feature, (0, ini_w, ini_h, 0), (-1, out_size[1], out_size[2], -1)) 58 | return tf.reshape(slice_input, [int(feature.get_shape()[0]), out_size[1], out_size[2], int(feature.get_shape()[3])]) 59 | 60 | # Reference: https://github.com/tensorflow/tensorflow/issues/4079 61 | def LeakyReLU(x, leak=0.1, name='lrelu'): 62 | with tf.variable_scope(name): 63 | f1 = 0.5 * (1.0 + leak) 64 | f2 = 0.5 * (1.0 - leak) 65 | return f1 * x + f2 * abs(x) 66 | 67 | # Both target->source and source->target 68 | def pose_net_fb(tgt_image, src_image_stack, is_training=True, reuse=False): 69 | inputs = tf.concat([tgt_image, src_image_stack], axis=3) 70 | H = inputs.get_shape()[1].value 71 | W = inputs.get_shape()[2].value 72 | num_source = int(src_image_stack.get_shape()[3].value//3) 73 | with tf.variable_scope('pose_net') as sc: 74 | if reuse: 75 | sc.reuse_variables() 76 | end_points_collection = sc.original_name_scope + '_end_points' 77 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], 78 | normalizer_fn=None, 79 | weights_regularizer=slim.l2_regularizer(0.05), 80 | activation_fn=tf.nn.relu, 81 | outputs_collections=end_points_collection): 82 | # cnv1 to cnv5b are shared between pose and explainability prediction 83 | cnv1 = slim.conv2d(inputs,16, [7, 7], stride=2, scope='cnv1') 84 | cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') 85 | cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') 86 | cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') 87 | cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') 88 | cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6') 89 | cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7') 90 | # Double the number of channels 91 | pose_pred = slim.conv2d(cnv7, 6*num_source*2, [1, 1], scope='pred', 92 | stride=1, normalizer_fn=None, activation_fn=None) 93 | pose_avg = tf.reduce_mean(pose_pred, [1, 2]) 94 | # Empirically we found that scaling by a small constant 95 | # facilitates training. 96 | # 1st half: target->source, 2nd half: source->target 97 | pose_final = 0.01 * tf.reshape(pose_avg, [-1, num_source, 6*2]) 98 | end_points = utils.convert_collection_to_dict(end_points_collection) 99 | return pose_final, end_points 100 | 101 | # helper functions 102 | # Credit: https://github.com/mrharicot/monodepth/blob/master/monodepth_model.py 103 | def resconv(x, num_layers, stride): 104 | do_proj = tf.shape(x)[3] != num_layers or stride == 2 105 | conv1 = slim.conv2d(x, num_layers, [1, 1], stride=1, activation_fn=tf.nn.elu) 106 | conv2 = slim.conv2d(conv1, num_layers, [3, 3], stride=stride, activation_fn=tf.nn.elu) 107 | conv3 = slim.conv2d(conv2, 4 * num_layers, [1, 1], stride=1, activation_fn=None) 108 | if do_proj: 109 | shortcut = slim.conv2d(x, 4* num_layers, [1, 1], stride=stride, activation_fn=None) 110 | else: 111 | shortcut = x 112 | return tf.nn.elu(conv3 + shortcut) 113 | 114 | def resblock(x, num_layers, num_blocks): 115 | out = x 116 | for i in range(num_blocks - 1): 117 | out = resconv(out, num_layers, 1) 118 | out = resconv(out, num_layers, 2) 119 | return out 120 | 121 | def upsample_nn(x, ratio): 122 | s = tf.shape(x) 123 | h = s[1] 124 | w = s[2] 125 | return tf.image.resize_nearest_neighbor(x, [h*ratio, w*ratio]) 126 | 127 | def upconv(x, num_layers, kernal, scale): 128 | upsample = upsample_nn(x, scale) 129 | conv = slim.conv2d(upsample, num_layers, [kernal, kernal], stride=1, activation_fn=tf.nn.elu) 130 | return conv 131 | 132 | def disp_net_res50(tgt_image, is_training=True, reuse=False, get_feature=False): 133 | H = tgt_image.get_shape()[1].value 134 | W = tgt_image.get_shape()[2].value 135 | with tf.variable_scope('depth_net_res50') as sc: 136 | if reuse: 137 | sc.reuse_variables() 138 | end_points_collection = sc.original_name_scope + '_end_points' 139 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], 140 | activation_fn=tf.nn.elu, 141 | outputs_collections=end_points_collection): 142 | # Encoder 143 | conv1 = slim.conv2d(tgt_image, 64, [7, 7], stride=2) # 1/2 144 | pool1 = slim.max_pool2d(conv1, [3, 3], padding='SAME') # 1/4 145 | conv2 = resblock(pool1, 64, 3) # 1/8 146 | conv3 = resblock(conv2, 128, 4) # 1/16 147 | conv4 = resblock(conv3, 256, 6) # 1/32 148 | conv5 = resblock(conv4, 512, 3) # 1/64 149 | 150 | # Decoder 151 | upconv6 = upconv(conv5, 512, 3, 2) # 1/32 152 | #upconv6 = slim.conv2d_transpose(conv5, 512, [3, 3], stride=2) 153 | upconv6 = resize_like(upconv6, conv4) 154 | concat6 = tf.concat([upconv6, conv4], 3) 155 | iconv6 = slim.conv2d(concat6, 512, [3, 3], stride=1) 156 | 157 | upconv5 = upconv(iconv6, 256, 3, 2) # 1/16 158 | #upconv5 = slim.conv2d_transpose(iconv6, 256, [3, 3], stride=2) 159 | upconv5 = resize_like(upconv5, conv3) 160 | concat5 = tf.concat([upconv5, conv3], 3) 161 | iconv5 = slim.conv2d(concat5, 256, [3, 3], stride=1) 162 | 163 | upconv4 = upconv(iconv5, 128, 3, 2) # 1/8 164 | #upconv4 = slim.conv2d_transpose(iconv5, 128, [3, 3], stride=2) 165 | upconv4 = resize_like(upconv4, conv2) 166 | concat4 = tf.concat([upconv4, conv2], 3) 167 | iconv4 = slim.conv2d(concat4, 128, [3, 3], stride=1) 168 | disp4 = DISP_SCALING * slim.conv2d(iconv4, 1, [3, 3], stride=1, 169 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp4') + MIN_DISP 170 | disp4_up = tf.image.resize_bilinear(disp4, [np.int(H/4), np.int(W/4)]) 171 | 172 | upconv3 = upconv(iconv4, 64, 3, 2) # 1/4 173 | #upconv3 = slim.conv2d_transpose(iconv4, 64, [3, 3], stride=2) 174 | upconv3 = resize_like(upconv3, pool1) 175 | disp4_up = resize_like(disp4_up, pool1) 176 | concat3 = tf.concat([upconv3, disp4_up, pool1], 3) 177 | iconv3 = slim.conv2d(concat3, 64, [3, 3], stride=1) 178 | disp3 = DISP_SCALING * slim.conv2d(iconv3, 1, [3, 3], stride=1, 179 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp3') + MIN_DISP 180 | disp3_up = tf.image.resize_bilinear(disp3, [np.int(H/2), np.int(W/2)]) 181 | 182 | upconv2 = upconv(iconv3, 32, 3, 2) # 1/2 183 | #upconv2 = slim.conv2d_transpose(iconv3, 32, [3, 3], stride=2) 184 | upconv2 = resize_like(upconv2, conv1) 185 | disp3_up = resize_like(disp3_up, conv1) 186 | concat2 = tf.concat([upconv2, disp3_up, conv1], 3) 187 | iconv2 = slim.conv2d(concat2, 32, [3, 3], stride=1) 188 | disp2 = DISP_SCALING * slim.conv2d(iconv2, 1, [3, 3], stride=1, 189 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp2') + MIN_DISP 190 | disp2_up = tf.image.resize_bilinear(disp2, [H, W]) 191 | 192 | upconv1 = upconv(iconv2, 16, 3, 2) 193 | #upconv1 = slim.conv2d_transpose(iconv2, 16, [3, 3], stride=2) 194 | upconv1 = resize_like(upconv1, disp2_up) 195 | concat1 = tf.concat([upconv1, disp2_up], 3) 196 | iconv1 = slim.conv2d(concat1, 16, [3, 3], stride=1) 197 | disp1 = DISP_SCALING * slim.conv2d(iconv1, 1, [3, 3], stride=1, 198 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp1') + MIN_DISP 199 | 200 | end_points = utils.convert_collection_to_dict(end_points_collection) 201 | 202 | if not get_feature: 203 | return [disp1, disp2, disp3, disp4], end_points 204 | else: 205 | return [disp1, disp2, disp3, disp4], conv5, end_points 206 | 207 | 208 | -------------------------------------------------------------------------------- /core/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def gray2rgb(im, cmap='gray'): 7 | cmap = plt.get_cmap(cmap) 8 | rgba_img = cmap(im.astype(np.float32)) 9 | rgb_img = np.delete(rgba_img, 3, 2) 10 | return rgb_img 11 | 12 | def normalize_depth_for_display(depth, pc=95, crop_percent=0, normalizer=None, cmap='gray'): 13 | # convert to disparity 14 | depth = 1./(depth + 1e-6) 15 | if normalizer is not None: 16 | depth = depth/normalizer 17 | else: 18 | depth = depth/(np.percentile(depth, pc) + 1e-6) 19 | depth = np.clip(depth, 0, 1) 20 | depth = gray2rgb(depth, cmap=cmap) 21 | keep_H = int(depth.shape[0] * (1-crop_percent)) 22 | depth = depth[:keep_H] 23 | depth = depth 24 | return depth 25 | 26 | # Add inverse_pose flag 27 | def euler2mat(z, y, x, inverse_pose=False): 28 | """Converts euler angles to rotation matrix 29 | TODO: remove the dimension for 'N' (deprecated for converting all source 30 | poses altogether) 31 | Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174 32 | Args: 33 | z: rotation angle along z axis (in radians) -- size = [B, N] 34 | y: rotation angle along y axis (in radians) -- size = [B, N] 35 | x: rotation angle along x axis (in radians) -- size = [B, N] 36 | Returns: 37 | Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3] 38 | """ 39 | B = tf.shape(z)[0] 40 | N = 1 41 | z = tf.clip_by_value(z, -np.pi, np.pi) 42 | y = tf.clip_by_value(y, -np.pi, np.pi) 43 | x = tf.clip_by_value(x, -np.pi, np.pi) 44 | if inverse_pose: 45 | z = -z 46 | y = -y 47 | x = -x 48 | 49 | # Expand to B x N x 1 x 1 50 | z = tf.expand_dims(tf.expand_dims(z, -1), -1) 51 | y = tf.expand_dims(tf.expand_dims(y, -1), -1) 52 | x = tf.expand_dims(tf.expand_dims(x, -1), -1) 53 | 54 | zeros = tf.zeros([B, N, 1, 1]) 55 | ones = tf.ones([B, N, 1, 1]) 56 | 57 | cosz = tf.cos(z) 58 | sinz = tf.sin(z) 59 | rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3) 60 | rotz_2 = tf.concat([sinz, cosz, zeros], axis=3) 61 | rotz_3 = tf.concat([zeros, zeros, ones], axis=3) 62 | zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2) 63 | 64 | cosy = tf.cos(y) 65 | siny = tf.sin(y) 66 | roty_1 = tf.concat([cosy, zeros, siny], axis=3) 67 | roty_2 = tf.concat([zeros, ones, zeros], axis=3) 68 | roty_3 = tf.concat([-siny,zeros, cosy], axis=3) 69 | ymat = tf.concat([roty_1, roty_2, roty_3], axis=2) 70 | 71 | cosx = tf.cos(x) 72 | sinx = tf.sin(x) 73 | rotx_1 = tf.concat([ones, zeros, zeros], axis=3) 74 | rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3) 75 | rotx_3 = tf.concat([zeros, sinx, cosx], axis=3) 76 | xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2) 77 | 78 | rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat) 79 | return rotMat 80 | 81 | # Add inverse_pose flag 82 | def pose_vec2mat(vec, inverse_pose=False): 83 | """Converts 6DoF parameters to transformation matrix 84 | Args: 85 | vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6] 86 | Returns: 87 | A transformation matrix -- [B, 4, 4] 88 | """ 89 | batch_size, _ = vec.get_shape().as_list() 90 | translation = tf.slice(vec, [0, 0], [-1, 3]) 91 | translation = tf.expand_dims(translation, -1) 92 | if inverse_pose: 93 | translation = -translation 94 | rx = tf.slice(vec, [0, 3], [-1, 1]) 95 | ry = tf.slice(vec, [0, 4], [-1, 1]) 96 | rz = tf.slice(vec, [0, 5], [-1, 1]) 97 | rot_mat = euler2mat(rz, ry, rx, inverse_pose) 98 | rot_mat = tf.squeeze(rot_mat, axis=[1]) 99 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) 100 | filler = tf.tile(filler, [batch_size, 1, 1]) 101 | transform_mat = tf.concat([rot_mat, translation], axis=2) 102 | transform_mat = tf.concat([transform_mat, filler], axis=1) 103 | return transform_mat 104 | 105 | def pixel2cam(depth, pixel_coords, intrinsics, is_homogeneous=True): 106 | """Transforms coordinates in the pixel frame to the camera frame. 107 | 108 | Args: 109 | depth: [batch, height, width] 110 | pixel_coords: homogeneous pixel coordinates [batch, 3, height, width] 111 | intrinsics: camera intrinsics [batch, 3, 3] 112 | is_homogeneous: return in homogeneous coordinates 113 | Returns: 114 | Coords in the camera frame [batch, 3 (4 if homogeneous), height, width] 115 | """ 116 | batch, height, width = depth.get_shape().as_list() 117 | depth = tf.reshape(depth, [batch, 1, -1]) 118 | pixel_coords = tf.reshape(pixel_coords, [batch, 3, -1]) 119 | cam_coords = tf.matmul(tf.matrix_inverse(intrinsics), pixel_coords) * depth 120 | if is_homogeneous: 121 | ones = tf.ones([batch, 1, height*width]) 122 | cam_coords = tf.concat([cam_coords, ones], axis=1) 123 | cam_coords = tf.reshape(cam_coords, [batch, -1, height, width]) 124 | return cam_coords 125 | 126 | def cam2pixel(cam_coords, proj): 127 | """Transforms coordinates in a camera frame to the pixel frame. 128 | 129 | Args: 130 | cam_coords: [batch, 4, height, width] 131 | proj: [batch, 4, 4] 132 | Returns: 133 | Pixel coordinates projected from the camera frame [batch, height, width, 2] 134 | """ 135 | batch, _, height, width = cam_coords.get_shape().as_list() 136 | cam_coords = tf.reshape(cam_coords, [batch, 4, -1]) 137 | unnormalized_pixel_coords = tf.matmul(proj, cam_coords) 138 | x_u = tf.slice(unnormalized_pixel_coords, [0, 0, 0], [-1, 1, -1]) 139 | y_u = tf.slice(unnormalized_pixel_coords, [0, 1, 0], [-1, 1, -1]) 140 | z_u = tf.slice(unnormalized_pixel_coords, [0, 2, 0], [-1, 1, -1]) 141 | x_n = x_u / (z_u + 1e-10) 142 | y_n = y_u / (z_u + 1e-10) 143 | pixel_coords = tf.concat([x_n, y_n], axis=1) 144 | pixel_coords = tf.reshape(pixel_coords, [batch, 2, height, width]) 145 | return tf.transpose(pixel_coords, perm=[0, 2, 3, 1]) 146 | 147 | def meshgrid(batch, height, width, is_homogeneous=True): 148 | """Construct a 2D meshgrid. 149 | 150 | Args: 151 | batch: batch size 152 | height: height of the grid 153 | width: width of the grid 154 | is_homogeneous: whether to return in homogeneous coordinates 155 | Returns: 156 | x,y grid coordinates [batch, 2 (3 if homogeneous), height, width] 157 | """ 158 | x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), 159 | tf.transpose(tf.expand_dims( 160 | tf.linspace(-1.0, 1.0, width), 1), [1, 0])) 161 | y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), 162 | tf.ones(shape=tf.stack([1, width]))) 163 | x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32) 164 | y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32) 165 | if is_homogeneous: 166 | ones = tf.ones_like(x_t) 167 | coords = tf.stack([x_t, y_t, ones], axis=0) 168 | else: 169 | coords = tf.stack([x_t, y_t], axis=0) 170 | coords = tf.tile(tf.expand_dims(coords, 0), [batch, 1, 1, 1]) 171 | return coords 172 | 173 | # Add inverse_pose flag 174 | def projective_inverse_warp(img, depth, pose, intrinsics, inverse_pose=False): 175 | """Inverse warp a source image to the target image plane based on projection. 176 | 177 | Args: 178 | img: the source image [batch, height_s, width_s, 3] 179 | depth: depth map of the target image [batch, height_t, width_t] 180 | pose: target to source camera transformation matrix [batch, 6], in the 181 | order of tx, ty, tz, rx, ry, rz 182 | intrinsics: camera intrinsics [batch, 3, 3] 183 | Returns: 184 | Source image inverse warped to the target image plane [batch, height_t, 185 | width_t, 3] 186 | """ 187 | batch, height, width, _ = img.get_shape().as_list() 188 | # Convert pose vector to matrix 189 | pose = pose_vec2mat(pose, inverse_pose) 190 | # Construct pixel grid coordinates 191 | pixel_coords = meshgrid(batch, height, width) 192 | # Convert pixel coordinates to the camera frame 193 | cam_coords = pixel2cam(depth, pixel_coords, intrinsics) 194 | # Construct a 4x4 intrinsic matrix (TODO: can it be 3x4?) 195 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) 196 | filler = tf.tile(filler, [batch, 1, 1]) 197 | intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2) 198 | intrinsics = tf.concat([intrinsics, filler], axis=1) 199 | # Get a 4x4 transformation matrix from 'target' camera frame to 'source' 200 | # pixel frame. 201 | proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose) 202 | src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel) 203 | output_img = bilinear_sampler(img, src_pixel_coords) 204 | return output_img, src_pixel_coords 205 | 206 | # (Yuliang) Inverse warp with flow 207 | def flow_inverse_warp(img, flow): 208 | batch, height, width, _ = img.get_shape().as_list() 209 | x_base = tf.range(width) 210 | y_base = tf.range(height) 211 | x_base = tf.stack([x_base]*height, axis=0) 212 | y_base = tf.transpose(tf.stack([y_base]*width, axis=0)) 213 | flow0 = flow[:, :, :, 0] 214 | flow1 = flow[:, :, :, 1] 215 | flow0 = flow0 + tf.cast(x_base, tf.float32) 216 | flow1 = flow1 + tf.cast(y_base, tf.float32) 217 | coords = tf.stack([flow0, flow1], axis=-1) 218 | output_img = bilinear_sampler(img, coords) 219 | return output_img 220 | 221 | def depth_pose_inverse_warp(img, depth, pose, intrinsics): 222 | batch, height, width, _ = img.get_shape().as_list() 223 | pixel_coords = meshgrid(batch, height, width) 224 | cam_coords = pixel2cam(depth, pixel_coords, intrinsics) 225 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) 226 | filler = tf.tile(filler, [batch, 1, 1]) 227 | intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2) 228 | intrinsics = tf.concat([intrinsics, filler], axis=1) 229 | proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose) 230 | src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel) 231 | output_img = bilinear_sampler(img, src_pixel_coords) 232 | return output_img, src_pixel_coords 233 | 234 | def bilinear_sampler(imgs, coords): 235 | """Construct a new image by bilinear sampling from the input image. 236 | 237 | Points falling outside the source image boundary have value 0. 238 | 239 | Args: 240 | imgs: source image to be sampled from [batch, height_s, width_s, channels] 241 | coords: coordinates of source pixels to sample from [batch, height_t, 242 | width_t, 2]. height_t/width_t correspond to the dimensions of the output 243 | image (don't need to be the same as height_s/width_s). The two channels 244 | correspond to x and y coordinates respectively. 245 | Returns: 246 | A new sampled image [batch, height_t, width_t, channels] 247 | """ 248 | def _repeat(x, n_repeats): 249 | rep = tf.transpose( 250 | tf.expand_dims(tf.ones(shape=tf.stack([ 251 | n_repeats, 252 | ])), 1), [1, 0]) 253 | rep = tf.cast(rep, 'float32') 254 | x = tf.matmul(tf.reshape(x, (-1, 1)), rep) 255 | return tf.reshape(x, [-1]) 256 | 257 | with tf.name_scope('image_sampling'): 258 | coords_x, coords_y = tf.split(coords, [1, 1], axis=3) 259 | inp_size = imgs.get_shape() 260 | coord_size = coords.get_shape() 261 | out_size = coords.get_shape().as_list() 262 | out_size[3] = imgs.get_shape().as_list()[3] 263 | 264 | coords_x = tf.cast(coords_x, 'float32') 265 | coords_y = tf.cast(coords_y, 'float32') 266 | 267 | x0 = tf.floor(coords_x) 268 | x1 = x0 + 1 269 | y0 = tf.floor(coords_y) 270 | y1 = y0 + 1 271 | 272 | y_max = tf.cast(tf.shape(imgs)[1] - 1, 'float32') 273 | x_max = tf.cast(tf.shape(imgs)[2] - 1, 'float32') 274 | zero = tf.zeros([1], dtype='float32') 275 | 276 | x0_safe = tf.clip_by_value(x0, zero, x_max) 277 | y0_safe = tf.clip_by_value(y0, zero, y_max) 278 | x1_safe = tf.clip_by_value(x1, zero, x_max) 279 | y1_safe = tf.clip_by_value(y1, zero, y_max) 280 | 281 | wt_x0 = x1_safe - coords_x 282 | wt_x1 = coords_x - x0_safe 283 | wt_y0 = y1_safe - coords_y 284 | wt_y1 = coords_y - y0_safe 285 | 286 | ## indices in the flat image to sample from 287 | dim2 = tf.cast(inp_size[2], 'float32') 288 | dim1 = tf.cast(inp_size[2] * inp_size[1], 'float32') 289 | base = tf.reshape( 290 | _repeat( 291 | tf.cast(tf.range(coord_size[0]), 'float32') * dim1, 292 | coord_size[1] * coord_size[2]), 293 | [out_size[0], out_size[1], out_size[2], 1]) 294 | 295 | base_y0 = base + y0_safe * dim2 296 | base_y1 = base + y1_safe * dim2 297 | idx00 = tf.reshape(x0_safe + base_y0, [-1]) 298 | idx01 = x0_safe + base_y1 299 | idx10 = x1_safe + base_y0 300 | idx11 = x1_safe + base_y1 301 | 302 | ## sample from imgs 303 | imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]])) 304 | imgs_flat = tf.cast(imgs_flat, 'float32') 305 | im00 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx00, 'int32')), out_size) 306 | im01 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx01, 'int32')), out_size) 307 | im10 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx10, 'int32')), out_size) 308 | im11 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx11, 'int32')), out_size) 309 | 310 | w00 = wt_x0 * wt_y0 311 | w01 = wt_x0 * wt_y1 312 | w10 = wt_x1 * wt_y0 313 | w11 = wt_x1 * wt_y1 314 | 315 | output = tf.add_n([ 316 | w00 * im00, w01 * im01, 317 | w10 * im10, w11 * im11 318 | ]) 319 | return output 320 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/data/__init__.py -------------------------------------------------------------------------------- /data/kitti/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/data/kitti/__init__.py -------------------------------------------------------------------------------- /data/kitti/kitti_odom_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from glob import glob 4 | import os 5 | import scipy.misc 6 | # import sys 7 | # sys.path.append('../../') 8 | # from utils.misc import * 9 | 10 | class kitti_odom_loader(object): 11 | def __init__(self, 12 | dataset_dir, 13 | img_height=128, 14 | img_width=416, 15 | seq_length=5): 16 | self.dataset_dir = dataset_dir 17 | self.img_height = img_height 18 | self.img_width = img_width 19 | self.seq_length = seq_length 20 | self.train_seqs = [0, 1, 2, 3, 4, 5, 6, 7, 8] 21 | self.test_seqs = [9, 10] 22 | 23 | self.collect_test_frames() 24 | self.collect_train_frames() 25 | 26 | def collect_test_frames(self): 27 | self.test_frames = [] 28 | for seq in self.test_seqs: 29 | seq_dir = os.path.join(self.dataset_dir, 'sequences', '%.2d' % seq) 30 | img_dir = os.path.join(seq_dir, 'image_2') 31 | N = len(glob(img_dir + '/*.png')) 32 | for n in range(N): 33 | self.test_frames.append('%.2d %.6d' % (seq, n)) 34 | self.num_test = len(self.test_frames) 35 | 36 | def collect_train_frames(self): 37 | self.train_frames = [] 38 | for seq in self.train_seqs: 39 | seq_dir = os.path.join(self.dataset_dir, 'sequences', '%.2d' % seq) 40 | img_dir = os.path.join(seq_dir, 'image_2') 41 | N = len(glob(img_dir + '/*.png')) 42 | for n in range(N): 43 | self.train_frames.append('%.2d %.6d' % (seq, n)) 44 | self.num_train = len(self.train_frames) 45 | 46 | def is_valid_sample(self, frames, tgt_idx): 47 | N = len(frames) 48 | tgt_drive, _ = frames[tgt_idx].split(' ') 49 | if self.seq_length % 2 != 0: 50 | half_offset = int((self.seq_length - 1)/2) 51 | min_src_idx = tgt_idx - half_offset 52 | max_src_idx = tgt_idx + half_offset 53 | if min_src_idx < 0 or max_src_idx >= N: 54 | return False 55 | min_src_drive, _ = frames[min_src_idx].split(' ') 56 | max_src_drive, _ = frames[max_src_idx].split(' ') 57 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive: 58 | return True 59 | return False 60 | else: 61 | left_offset = int(self.seq_length / 2 - 1) 62 | right_offset = int(self.seq_length / 2) 63 | min_src_idx = tgt_idx - left_offset 64 | max_src_idx = tgt_idx + right_offset 65 | if min_src_idx < 0 or max_src_idx >= N: 66 | return False 67 | min_src_drive, _ = frames[min_src_idx].split(' ') 68 | max_src_drive, _ = frames[max_src_idx].split(' ') 69 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive: 70 | return True 71 | return False 72 | 73 | def load_image_sequence(self, frames, tgt_idx, seq_length): 74 | if seq_length % 2 != 0: 75 | half_offset = int((seq_length - 1)/2) 76 | image_seq = [] 77 | for o in range(-half_offset, half_offset+1): 78 | curr_idx = tgt_idx + o 79 | curr_drive, curr_frame_id = frames[curr_idx].split(' ') 80 | curr_img = self.load_image(curr_drive, curr_frame_id) 81 | if o == 0: 82 | zoom_y = self.img_height/curr_img.shape[0] 83 | zoom_x = self.img_width/curr_img.shape[1] 84 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width)) 85 | image_seq.append(curr_img) 86 | return image_seq, zoom_x, zoom_y 87 | else: 88 | left_offset = int(seq_length / 2 - 1) 89 | right_offset = int(seq_length / 2) 90 | image_seq = [] 91 | for o in range(-left_offset, right_offset + 1): 92 | curr_idx = tgt_idx + o 93 | curr_drive, curr_frame_id = frames[curr_idx].split(' ') 94 | curr_img = self.load_image(curr_drive, curr_frame_id) 95 | if o == 0: 96 | zoom_y = self.img_height/curr_img.shape[0] 97 | zoom_x = self.img_width/curr_img.shape[1] 98 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width)) 99 | image_seq.append(curr_img) 100 | return image_seq, zoom_x, zoom_y 101 | 102 | def load_example(self, frames, tgt_idx, load_pose=False): 103 | image_seq, zoom_x, zoom_y = self.load_image_sequence(frames, tgt_idx, self.seq_length) 104 | tgt_drive, tgt_frame_id = frames[tgt_idx].split(' ') 105 | intrinsics = self.load_intrinsics(tgt_drive, tgt_frame_id) 106 | intrinsics = self.scale_intrinsics(intrinsics, zoom_x, zoom_y) 107 | example = {} 108 | example['intrinsics'] = intrinsics 109 | example['image_seq'] = image_seq 110 | example['folder_name'] = tgt_drive 111 | example['file_name'] = tgt_frame_id 112 | if load_pose: 113 | pass 114 | return example 115 | 116 | def get_train_example_with_idx(self, tgt_idx): 117 | if not self.is_valid_sample(self.train_frames, tgt_idx): 118 | return False 119 | example = self.load_example(self.train_frames, tgt_idx) 120 | return example 121 | 122 | # def load_frame(self, drive, frame_id): 123 | # img = self.load_image(drive, frame_id) 124 | # try: 125 | # scale_x = np.float(self.img_width)/img.shape[1] 126 | # except: 127 | # print("KITTI loading error!") 128 | # print("Drive = ", drive) 129 | # print("frame_id = ", frame_id) 130 | # raise 131 | # scale_y = np.float(self.img_height)/img.shape[0] 132 | # intrinsics = self.load_intrinsics(drive, frame_id) 133 | # intrinsics = self.scale_intrinsics(intrinsics, scale_x, scale_y) 134 | # img = self.crop_resize(img) 135 | # return img, intrinsics 136 | 137 | def load_image(self, drive, frame_id): 138 | img_file = os.path.join(self.dataset_dir, 'sequences', '%s/image_2/%s.png' % (drive, frame_id)) 139 | img = scipy.misc.imread(img_file) 140 | return img 141 | 142 | def load_intrinsics(self, drive, frame_id): 143 | calib_file = os.path.join(self.dataset_dir, 'sequences', '%s/calib.txt' % drive) 144 | proj_c2p, _ = self.read_calib_file(calib_file) 145 | intrinsics = proj_c2p[:3, :3] 146 | return intrinsics 147 | 148 | # def load_gt_odom(self, drive, tgt_idx, src_idx): 149 | # pose_file = os.path.join(self.dataset_dir, 'poses', '%s.txt' % drive) 150 | # with open(pose_file, 'r') as f: 151 | # poses = f.readlines() 152 | # filler = np.array([0, 0, 0, 1]).reshape((1,4)) 153 | # tgt_pose = np.array(poses[int(tgt_idx)][:-1].split(' ')).astype(np.float32).reshape(3,4) 154 | # tgt_pose = np.concatenate((tgt_pose, filler), axis=0) 155 | # src_pose = np.array(poses[int(src_idx)][:-1].split(' ')).astype(np.float32).reshape(3,4) 156 | # src_pose = np.concatenate((src_pose, filler), axis=0) 157 | # rel_pose = np.dot(np.linalg.inv(src_pose), tgt_pose) 158 | # rel_6DOF = pose_mat_to_6dof(rel_pose) 159 | # return rel_6DOF 160 | 161 | def read_calib_file(self, filepath, cid=2): 162 | """Read in a calibration file and parse into a dictionary.""" 163 | with open(filepath, 'r') as f: 164 | C = f.readlines() 165 | def parseLine(L, shape): 166 | data = L.split() 167 | data = np.array(data[1:]).reshape(shape).astype(np.float32) 168 | return data 169 | proj_c2p = parseLine(C[cid], shape=(3,4)) 170 | proj_v2c = parseLine(C[-1], shape=(3,4)) 171 | filler = np.array([0, 0, 0, 1]).reshape((1,4)) 172 | proj_v2c = np.concatenate((proj_v2c, filler), axis=0) 173 | return proj_c2p, proj_v2c 174 | 175 | def scale_intrinsics(self,mat, sx, sy): 176 | out = np.copy(mat) 177 | out[0,0] *= sx 178 | out[0,2] *= sx 179 | out[1,1] *= sy 180 | out[1,2] *= sy 181 | return out 182 | 183 | 184 | -------------------------------------------------------------------------------- /data/kitti/kitti_raw_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from glob import glob 4 | import os 5 | import scipy.misc 6 | 7 | class kitti_raw_loader(object): 8 | def __init__(self, 9 | dataset_dir, 10 | split, 11 | img_height=256, 12 | img_width=256, 13 | seq_length=5): 14 | dir_path = os.path.dirname(os.path.realpath(__file__)) 15 | static_frames_file = dir_path + '/static_frames.txt' 16 | excluded_frames_file = dir_path + '/excluded_frames.txt' 17 | test_scene_file = dir_path + '/test_scenes_' + split + '.txt' 18 | with open(test_scene_file, 'r') as f: 19 | test_scenes = f.readlines() 20 | self.test_scenes = [t[:-1] for t in test_scenes] 21 | self.dataset_dir = dataset_dir 22 | self.img_height = img_height 23 | self.img_width = img_width 24 | self.seq_length = seq_length 25 | self.cam_ids = ['02', '03'] 26 | self.date_list = ['2011_09_26', '2011_09_28', '2011_09_29', 27 | '2011_09_30', '2011_10_03'] 28 | self.collect_static_frames(static_frames_file) 29 | self.collect_excluded_frames(excluded_frames_file) 30 | self.collect_train_frames() 31 | 32 | def collect_static_frames(self, static_frames_file): 33 | with open(static_frames_file, 'r') as f: 34 | frames = f.readlines() 35 | self.static_frames = [] 36 | for fr in frames: 37 | if fr == '\n': 38 | continue 39 | date, drive, frame_id = fr.split(' ') 40 | curr_fid = '%.10d' % (np.int(frame_id[:-1])) 41 | for cid in self.cam_ids: 42 | self.static_frames.append(drive + ' ' + cid + ' ' + curr_fid) 43 | 44 | # Exclude KITTI flow frames 45 | def collect_excluded_frames(self, excluded_frames_file): 46 | with open(excluded_frames_file, 'r') as f: 47 | frames = f.readlines() 48 | self.excluded_frames = [] 49 | for fr in frames: 50 | if fr == '\n': 51 | continue 52 | date, drive, frame_id = fr.split(' ') 53 | curr_fid = '%.10d' % (np.int(frame_id[:-1])) 54 | for cid in self.cam_ids: 55 | self.excluded_frames.append(drive + ' ' + cid + ' ' + curr_fid) 56 | 57 | def collect_train_frames(self): 58 | all_frames = [] 59 | for date in self.date_list: 60 | drive_set = os.listdir(self.dataset_dir + date + '/') 61 | for dr in drive_set: 62 | drive_dir = os.path.join(self.dataset_dir, date, dr) 63 | if os.path.isdir(drive_dir): 64 | if dr[:-5] in self.test_scenes: 65 | continue 66 | for cam in self.cam_ids: 67 | img_dir = os.path.join(drive_dir, 'image_' + cam, 'data') 68 | N = len(glob(img_dir + '/*.png')) 69 | for n in range(N): 70 | frame_id = '%.10d' % n 71 | all_frames.append(dr + ' ' + cam + ' ' + frame_id) 72 | 73 | 74 | for s in self.static_frames: 75 | try: 76 | all_frames.remove(s) 77 | # print('removed static frame from training: %s' % s) 78 | except: 79 | pass 80 | 81 | for s in self.excluded_frames: 82 | try: 83 | all_frames.remove(s) 84 | except: 85 | pass 86 | 87 | self.train_frames = all_frames 88 | self.num_train = len(self.train_frames) 89 | 90 | def is_valid_sample(self, frames, tgt_idx): 91 | N = len(frames) 92 | tgt_drive, cid, _ = frames[tgt_idx].split(' ') 93 | if self.seq_length % 2 != 0: 94 | half_offset = int((self.seq_length - 1)/2) 95 | min_src_idx = tgt_idx - half_offset 96 | max_src_idx = tgt_idx + half_offset 97 | if min_src_idx < 0 or max_src_idx >= N: 98 | return False 99 | min_src_drive, min_src_cid, _ = frames[min_src_idx].split(' ') 100 | max_src_drive, max_src_cid, _ = frames[max_src_idx].split(' ') 101 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive and cid == min_src_cid and cid == max_src_cid: 102 | return True 103 | return False 104 | else: 105 | left_offset = int(self.seq_length / 2 - 1) 106 | right_offset = int(self.seq_length / 2) 107 | min_src_idx = tgt_idx - left_offset 108 | max_src_idx = tgt_idx + right_offset 109 | if min_src_idx < 0 or max_src_idx >= N: 110 | return False 111 | min_src_drive, min_src_cid, _ = frames[min_src_idx].split(' ') 112 | max_src_drive, max_src_cid, _ = frames[max_src_idx].split(' ') 113 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive and cid == min_src_cid and cid == max_src_cid: 114 | return True 115 | return False 116 | 117 | def get_train_example_with_idx(self, tgt_idx): 118 | if not self.is_valid_sample(self.train_frames, tgt_idx): 119 | return False 120 | example = self.load_example(self.train_frames, tgt_idx) 121 | return example 122 | 123 | def load_image_sequence(self, frames, tgt_idx, seq_length): 124 | if seq_length % 2 != 0: 125 | half_offset = int((seq_length - 1)/2) 126 | image_seq = [] 127 | for o in range(-half_offset, half_offset + 1): 128 | curr_idx = tgt_idx + o 129 | curr_drive, curr_cid, curr_frame_id = frames[curr_idx].split(' ') 130 | curr_img = self.load_image_raw(curr_drive, curr_cid, curr_frame_id) 131 | if o == 0: 132 | zoom_y = self.img_height/curr_img.shape[0] 133 | zoom_x = self.img_width/curr_img.shape[1] 134 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width)) 135 | image_seq.append(curr_img) 136 | return image_seq, zoom_x, zoom_y 137 | else: 138 | left_offset = int(seq_length / 2 - 1) 139 | right_offset = int(seq_length / 2) 140 | image_seq = [] 141 | for o in range(-left_offset, right_offset + 1): 142 | curr_idx = tgt_idx + o 143 | curr_drive, curr_cid, curr_frame_id = frames[curr_idx].split(' ') 144 | curr_img = self.load_image_raw(curr_drive, curr_cid, curr_frame_id) 145 | if o == 0: 146 | zoom_y = self.img_height/curr_img.shape[0] 147 | zoom_x = self.img_width/curr_img.shape[1] 148 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width)) 149 | image_seq.append(curr_img) 150 | return image_seq, zoom_x, zoom_y 151 | 152 | def load_example(self, frames, tgt_idx): 153 | image_seq, zoom_x, zoom_y = self.load_image_sequence(frames, tgt_idx, self.seq_length) 154 | tgt_drive, tgt_cid, tgt_frame_id = frames[tgt_idx].split(' ') 155 | intrinsics = self.load_intrinsics_raw(tgt_drive, tgt_cid, tgt_frame_id) 156 | intrinsics = self.scale_intrinsics(intrinsics, zoom_x, zoom_y) 157 | example = {} 158 | example['intrinsics'] = intrinsics 159 | example['image_seq'] = image_seq 160 | example['folder_name'] = tgt_drive + '_' + tgt_cid + '/' 161 | example['file_name'] = tgt_frame_id 162 | return example 163 | 164 | def load_image_raw(self, drive, cid, frame_id): 165 | date = drive[:10] 166 | img_file = os.path.join(self.dataset_dir, date, drive, 'image_' + cid, 'data', frame_id + '.png') 167 | img = scipy.misc.imread(img_file) 168 | return img 169 | 170 | def load_intrinsics_raw(self, drive, cid, frame_id): 171 | date = drive[:10] 172 | calib_file = os.path.join(self.dataset_dir, date, 'calib_cam_to_cam.txt') 173 | 174 | filedata = self.read_raw_calib_file(calib_file) 175 | P_rect = np.reshape(filedata['P_rect_' + cid], (3, 4)) 176 | intrinsics = P_rect[:3, :3] 177 | return intrinsics 178 | 179 | def read_raw_calib_file(self,filepath): 180 | # From https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py 181 | """Read in a calibration file and parse into a dictionary.""" 182 | data = {} 183 | 184 | with open(filepath, 'r') as f: 185 | for line in f.readlines(): 186 | key, value = line.split(':', 1) 187 | # The only non-float values in these files are dates, which 188 | # we don't care about anyway 189 | try: 190 | data[key] = np.array([float(x) for x in value.split()]) 191 | except ValueError: 192 | pass 193 | return data 194 | 195 | def scale_intrinsics(self, mat, sx, sy): 196 | out = np.copy(mat) 197 | out[0,0] *= sx 198 | out[0,2] *= sx 199 | out[1,1] *= sy 200 | out[1,2] *= sy 201 | return out 202 | 203 | 204 | -------------------------------------------------------------------------------- /data/kitti/test_files_stereo.txt: -------------------------------------------------------------------------------- 1 | training/image_2/000000_10.png 2 | training/image_2/000001_10.png 3 | training/image_2/000002_10.png 4 | training/image_2/000003_10.png 5 | training/image_2/000004_10.png 6 | training/image_2/000005_10.png 7 | training/image_2/000006_10.png 8 | training/image_2/000007_10.png 9 | training/image_2/000008_10.png 10 | training/image_2/000009_10.png 11 | training/image_2/000010_10.png 12 | training/image_2/000011_10.png 13 | training/image_2/000012_10.png 14 | training/image_2/000013_10.png 15 | training/image_2/000014_10.png 16 | training/image_2/000015_10.png 17 | training/image_2/000016_10.png 18 | training/image_2/000017_10.png 19 | training/image_2/000018_10.png 20 | training/image_2/000019_10.png 21 | training/image_2/000020_10.png 22 | training/image_2/000021_10.png 23 | training/image_2/000022_10.png 24 | training/image_2/000023_10.png 25 | training/image_2/000024_10.png 26 | training/image_2/000025_10.png 27 | training/image_2/000026_10.png 28 | training/image_2/000027_10.png 29 | training/image_2/000028_10.png 30 | training/image_2/000029_10.png 31 | training/image_2/000030_10.png 32 | training/image_2/000031_10.png 33 | training/image_2/000032_10.png 34 | training/image_2/000033_10.png 35 | training/image_2/000034_10.png 36 | training/image_2/000035_10.png 37 | training/image_2/000036_10.png 38 | training/image_2/000037_10.png 39 | training/image_2/000038_10.png 40 | training/image_2/000039_10.png 41 | training/image_2/000040_10.png 42 | training/image_2/000041_10.png 43 | training/image_2/000042_10.png 44 | training/image_2/000043_10.png 45 | training/image_2/000044_10.png 46 | training/image_2/000045_10.png 47 | training/image_2/000046_10.png 48 | training/image_2/000047_10.png 49 | training/image_2/000048_10.png 50 | training/image_2/000049_10.png 51 | training/image_2/000050_10.png 52 | training/image_2/000051_10.png 53 | training/image_2/000052_10.png 54 | training/image_2/000053_10.png 55 | training/image_2/000054_10.png 56 | training/image_2/000055_10.png 57 | training/image_2/000056_10.png 58 | training/image_2/000057_10.png 59 | training/image_2/000058_10.png 60 | training/image_2/000059_10.png 61 | training/image_2/000060_10.png 62 | training/image_2/000061_10.png 63 | training/image_2/000062_10.png 64 | training/image_2/000063_10.png 65 | training/image_2/000064_10.png 66 | training/image_2/000065_10.png 67 | training/image_2/000066_10.png 68 | training/image_2/000067_10.png 69 | training/image_2/000068_10.png 70 | training/image_2/000069_10.png 71 | training/image_2/000070_10.png 72 | training/image_2/000071_10.png 73 | training/image_2/000072_10.png 74 | training/image_2/000073_10.png 75 | training/image_2/000074_10.png 76 | training/image_2/000075_10.png 77 | training/image_2/000076_10.png 78 | training/image_2/000077_10.png 79 | training/image_2/000078_10.png 80 | training/image_2/000079_10.png 81 | training/image_2/000080_10.png 82 | training/image_2/000081_10.png 83 | training/image_2/000082_10.png 84 | training/image_2/000083_10.png 85 | training/image_2/000084_10.png 86 | training/image_2/000085_10.png 87 | training/image_2/000086_10.png 88 | training/image_2/000087_10.png 89 | training/image_2/000088_10.png 90 | training/image_2/000089_10.png 91 | training/image_2/000090_10.png 92 | training/image_2/000091_10.png 93 | training/image_2/000092_10.png 94 | training/image_2/000093_10.png 95 | training/image_2/000094_10.png 96 | training/image_2/000095_10.png 97 | training/image_2/000096_10.png 98 | training/image_2/000097_10.png 99 | training/image_2/000098_10.png 100 | training/image_2/000099_10.png 101 | training/image_2/000100_10.png 102 | training/image_2/000101_10.png 103 | training/image_2/000102_10.png 104 | training/image_2/000103_10.png 105 | training/image_2/000104_10.png 106 | training/image_2/000105_10.png 107 | training/image_2/000106_10.png 108 | training/image_2/000107_10.png 109 | training/image_2/000108_10.png 110 | training/image_2/000109_10.png 111 | training/image_2/000110_10.png 112 | training/image_2/000111_10.png 113 | training/image_2/000112_10.png 114 | training/image_2/000113_10.png 115 | training/image_2/000114_10.png 116 | training/image_2/000115_10.png 117 | training/image_2/000116_10.png 118 | training/image_2/000117_10.png 119 | training/image_2/000118_10.png 120 | training/image_2/000119_10.png 121 | training/image_2/000120_10.png 122 | training/image_2/000121_10.png 123 | training/image_2/000122_10.png 124 | training/image_2/000123_10.png 125 | training/image_2/000124_10.png 126 | training/image_2/000125_10.png 127 | training/image_2/000126_10.png 128 | training/image_2/000127_10.png 129 | training/image_2/000128_10.png 130 | training/image_2/000129_10.png 131 | training/image_2/000130_10.png 132 | training/image_2/000131_10.png 133 | training/image_2/000132_10.png 134 | training/image_2/000133_10.png 135 | training/image_2/000134_10.png 136 | training/image_2/000135_10.png 137 | training/image_2/000136_10.png 138 | training/image_2/000137_10.png 139 | training/image_2/000138_10.png 140 | training/image_2/000139_10.png 141 | training/image_2/000140_10.png 142 | training/image_2/000141_10.png 143 | training/image_2/000142_10.png 144 | training/image_2/000143_10.png 145 | training/image_2/000144_10.png 146 | training/image_2/000145_10.png 147 | training/image_2/000146_10.png 148 | training/image_2/000147_10.png 149 | training/image_2/000148_10.png 150 | training/image_2/000149_10.png 151 | training/image_2/000150_10.png 152 | training/image_2/000151_10.png 153 | training/image_2/000152_10.png 154 | training/image_2/000153_10.png 155 | training/image_2/000154_10.png 156 | training/image_2/000155_10.png 157 | training/image_2/000156_10.png 158 | training/image_2/000157_10.png 159 | training/image_2/000158_10.png 160 | training/image_2/000159_10.png 161 | training/image_2/000160_10.png 162 | training/image_2/000161_10.png 163 | training/image_2/000162_10.png 164 | training/image_2/000163_10.png 165 | training/image_2/000164_10.png 166 | training/image_2/000165_10.png 167 | training/image_2/000166_10.png 168 | training/image_2/000167_10.png 169 | training/image_2/000168_10.png 170 | training/image_2/000169_10.png 171 | training/image_2/000170_10.png 172 | training/image_2/000171_10.png 173 | training/image_2/000172_10.png 174 | training/image_2/000173_10.png 175 | training/image_2/000174_10.png 176 | training/image_2/000175_10.png 177 | training/image_2/000176_10.png 178 | training/image_2/000177_10.png 179 | training/image_2/000178_10.png 180 | training/image_2/000179_10.png 181 | training/image_2/000180_10.png 182 | training/image_2/000181_10.png 183 | training/image_2/000182_10.png 184 | training/image_2/000183_10.png 185 | training/image_2/000184_10.png 186 | training/image_2/000185_10.png 187 | training/image_2/000186_10.png 188 | training/image_2/000187_10.png 189 | training/image_2/000188_10.png 190 | training/image_2/000189_10.png 191 | training/image_2/000190_10.png 192 | training/image_2/000191_10.png 193 | training/image_2/000192_10.png 194 | training/image_2/000193_10.png 195 | training/image_2/000194_10.png 196 | training/image_2/000195_10.png 197 | training/image_2/000196_10.png 198 | training/image_2/000197_10.png 199 | training/image_2/000198_10.png 200 | training/image_2/000199_10.png 201 | -------------------------------------------------------------------------------- /data/kitti/test_scenes_eigen.txt: -------------------------------------------------------------------------------- 1 | 2011_09_26_drive_0117 2 | 2011_09_28_drive_0002 3 | 2011_09_26_drive_0052 4 | 2011_09_30_drive_0016 5 | 2011_09_26_drive_0059 6 | 2011_09_26_drive_0027 7 | 2011_09_26_drive_0020 8 | 2011_09_26_drive_0009 9 | 2011_09_26_drive_0013 10 | 2011_09_26_drive_0101 11 | 2011_09_26_drive_0046 12 | 2011_09_26_drive_0029 13 | 2011_09_26_drive_0064 14 | 2011_09_26_drive_0048 15 | 2011_10_03_drive_0027 16 | 2011_09_26_drive_0002 17 | 2011_09_26_drive_0036 18 | 2011_09_29_drive_0071 19 | 2011_10_03_drive_0047 20 | 2011_09_30_drive_0027 21 | 2011_09_26_drive_0086 22 | 2011_09_26_drive_0084 23 | 2011_09_26_drive_0096 24 | 2011_09_30_drive_0018 25 | 2011_09_26_drive_0106 26 | 2011_09_26_drive_0056 27 | 2011_09_26_drive_0023 28 | 2011_09_26_drive_0093 29 | -------------------------------------------------------------------------------- /data/kitti/test_scenes_stereo.txt: -------------------------------------------------------------------------------- 1 | 2011_09_26_drive_0005 2 | 2011_09_26_drive_0009 3 | 2011_09_26_drive_0011 4 | 2011_09_26_drive_0013 5 | 2011_09_26_drive_0014 6 | 2011_09_26_drive_0015 7 | 2011_09_26_drive_0017 8 | 2011_09_26_drive_0018 9 | 2011_09_26_drive_0019 10 | 2011_09_26_drive_0022 11 | 2011_09_26_drive_0027 12 | 2011_09_26_drive_0028 13 | 2011_09_26_drive_0029 14 | 2011_09_26_drive_0032 15 | 2011_09_26_drive_0036 16 | 2011_09_26_drive_0046 17 | 2011_09_26_drive_0051 18 | 2011_09_26_drive_0056 19 | 2011_09_26_drive_0057 20 | 2011_09_26_drive_0059 21 | 2011_09_26_drive_0070 22 | 2011_09_26_drive_0084 23 | 2011_09_26_drive_0096 24 | 2011_09_26_drive_0101 25 | 2011_09_26_drive_0104 26 | 2011_09_28_drive_0002 27 | 2011_09_29_drive_0004 28 | 2011_09_29_drive_0071 29 | 2011_10_03_drive_0047 30 | -------------------------------------------------------------------------------- /data/prepare_train_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import argparse 3 | import scipy.misc 4 | import numpy as np 5 | from glob import glob 6 | from joblib import Parallel, delayed 7 | import os 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--dataset_dir", type=str, default='./dataset/KITTI/raw/data/', help="where the dataset is stored") 11 | parser.add_argument("--dataset_name", type=str, default="kitti_raw_eigen", choices=["kitti_raw_eigen", "kitti_odom"]) 12 | parser.add_argument("--dump_root", type=str, default='./dataset/dfnet_train/', help="Where to dump the data") 13 | parser.add_argument("--seq_length", type=int, default=5, help="Length of each training sequence") 14 | parser.add_argument("--img_height", type=int, default=320, help="image height") 15 | parser.add_argument("--img_width", type=int, default=1152, help="image width") 16 | parser.add_argument("--num_threads", type=int, default=4, help="number of threads to use") 17 | args = parser.parse_args() 18 | 19 | def concat_image_seq(seq): 20 | for i, im in enumerate(seq): 21 | if i == 0: 22 | res = im 23 | else: 24 | res = np.hstack((res, im)) 25 | return res 26 | 27 | def dump_example(n): 28 | if n % 2000 == 0: 29 | print('Progress %d/%d....' % (n, data_loader.num_train)) 30 | example = data_loader.get_train_example_with_idx(n) 31 | if example == False: 32 | return 33 | image_seq = concat_image_seq(example['image_seq']) 34 | intrinsics = example['intrinsics'] 35 | fx = intrinsics[0, 0] 36 | fy = intrinsics[1, 1] 37 | cx = intrinsics[0, 2] 38 | cy = intrinsics[1, 2] 39 | dump_dir = os.path.join(args.dump_root, example['folder_name']) 40 | # if not os.path.isdir(dump_dir): 41 | # os.makedirs(dump_dir, exist_ok=True) 42 | try: 43 | os.makedirs(dump_dir) 44 | except OSError: 45 | if not os.path.isdir(dump_dir): 46 | raise 47 | dump_img_file = os.path.join(dump_dir, '%s.jpg' % example['file_name']) 48 | scipy.misc.imsave(dump_img_file, image_seq.astype(np.uint8)) 49 | dump_cam_file = os.path.join(dump_dir, '%s_cam.txt' % example['file_name']) 50 | with open(dump_cam_file, 'w') as f: 51 | f.write('%f,0.,%f,0.,%f,%f,0.,0.,1.' % (fx, cx, fy, cy)) 52 | # Ground truth pose 53 | if 'pose_seq' in example.keys(): 54 | pose_seq = example['pose_seq'] 55 | dump_pose_file = os.path.join(dump_dir, '%s_pose.txt' % example['file_name']) 56 | tgt2src_6d, src2tgt_6d = pose_seq 57 | with open(dump_pose_file, 'w') as f: 58 | tx, ty, tz, rx, ry, rz = tgt2src_6d 59 | f.write('%f,%f,%f,%f,%f,%f,' % (tx, ty, tz, rx, ry, rz)) 60 | tx, ty, tz, rx, ry, rz = src2tgt_6d 61 | f.write('%f,%f,%f,%f,%f,%f' % (tx, ty, tz, rx, ry, rz)) 62 | else: 63 | pass 64 | 65 | def main(): 66 | if not os.path.exists(args.dump_root): 67 | os.makedirs(args.dump_root) 68 | 69 | global data_loader 70 | if args.dataset_name == 'kitti_odom': 71 | from kitti.kitti_odom_loader import kitti_odom_loader 72 | data_loader = kitti_odom_loader(args.dataset_dir, 73 | img_height=args.img_height, 74 | img_width=args.img_width, 75 | seq_length=args.seq_length) 76 | 77 | if args.dataset_name == 'kitti_raw_eigen': 78 | from kitti.kitti_raw_loader import kitti_raw_loader 79 | data_loader = kitti_raw_loader(args.dataset_dir, 80 | split='eigen', 81 | img_height=args.img_height, 82 | img_width=args.img_width, 83 | seq_length=args.seq_length) 84 | 85 | #Parallel(n_jobs=args.num_threads)(delayed(dump_example)(n) for n in range(data_loader.num_train)) 86 | # Debug only 87 | for n in range(data_loader.num_train): 88 | dump_example(n) 89 | 90 | # Split into train/val 91 | np.random.seed(8964) 92 | subfolders = os.listdir(args.dump_root) 93 | with open(os.path.join(args.dump_root, 'train.txt'), 'w') as tf: 94 | with open(os.path.join(args.dump_root, 'val.txt'), 'w') as vf: 95 | for s in subfolders: 96 | if not os.path.isdir(os.path.join(args.dump_root, '%s' % s)): 97 | continue 98 | imfiles = glob(os.path.join(args.dump_root, s, '*.jpg')) 99 | frame_ids = [os.path.basename(fi).split('.')[0] for fi in imfiles] 100 | for frame in frame_ids: 101 | 102 | if np.random.random() < 0.1: 103 | vf.write('%s %s\n' % (s, frame)) 104 | else: 105 | tf.write('%s %s\n' % (s, frame)) 106 | 107 | 108 | main() 109 | 110 | -------------------------------------------------------------------------------- /kitti_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/kitti_eval/__init__.py -------------------------------------------------------------------------------- /kitti_eval/depth_evaluation_utils.py: -------------------------------------------------------------------------------- 1 | # Mostly based on the code written by Clement Godard: 2 | # https://github.com/mrharicot/monodepth/blob/master/utils/evaluation_utils.py 3 | import numpy as np 4 | # import pandas as pd 5 | import os 6 | import cv2 7 | from collections import Counter 8 | import pickle 9 | 10 | def compute_errors(gt, pred): 11 | thresh = np.maximum((gt / pred), (pred / gt)) 12 | a1 = (thresh < 1.25 ).mean() 13 | a2 = (thresh < 1.25 ** 2).mean() 14 | a3 = (thresh < 1.25 ** 3).mean() 15 | 16 | rmse = (gt - pred) ** 2 17 | rmse = np.sqrt(rmse.mean()) 18 | 19 | rmse_log = (np.log(gt) - np.log(pred)) ** 2 20 | rmse_log = np.sqrt(rmse_log.mean()) 21 | 22 | abs_rel = np.mean(np.abs(gt - pred) / gt) 23 | 24 | sq_rel = np.mean(((gt - pred)**2) / gt) 25 | 26 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 27 | 28 | ############################################################################### 29 | ####################### KITTI 30 | 31 | width_to_focal = dict() 32 | width_to_focal[1242] = 721.5377 33 | width_to_focal[1241] = 718.856 34 | width_to_focal[1224] = 707.0493 35 | width_to_focal[1238] = 718.3351 36 | 37 | def load_gt_disp_kitti(path): 38 | gt_disparities = [] 39 | for i in range(200): 40 | disp = cv2.imread(path + "/training/disp_noc_0/" + str(i).zfill(6) + "_10.png", -1) 41 | disp = disp.astype(np.float32) / 256 42 | gt_disparities.append(disp) 43 | return gt_disparities 44 | 45 | def convert_disps_to_depths_kitti(gt_disparities, pred_disparities): 46 | gt_depths = [] 47 | pred_depths = [] 48 | pred_disparities_resized = [] 49 | 50 | for i in range(len(gt_disparities)): 51 | gt_disp = gt_disparities[i] 52 | height, width = gt_disp.shape 53 | 54 | pred_disp = pred_disparities[i] 55 | pred_disp = width * cv2.resize(pred_disp, (width, height), interpolation=cv2.INTER_LINEAR) 56 | 57 | pred_disparities_resized.append(pred_disp) 58 | 59 | mask = gt_disp > 0 60 | 61 | gt_depth = width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask)) 62 | pred_depth = width_to_focal[width] * 0.54 / pred_disp 63 | 64 | gt_depths.append(gt_depth) 65 | pred_depths.append(pred_depth) 66 | return gt_depths, pred_depths, pred_disparities_resized 67 | 68 | 69 | ############################################################################### 70 | ####################### EIGEN 71 | 72 | def read_text_lines(file_path): 73 | f = open(file_path, 'r') 74 | lines = f.readlines() 75 | f.close() 76 | lines = [l.rstrip() for l in lines] 77 | return lines 78 | 79 | def read_file_data(files, data_root): 80 | gt_files = [] 81 | gt_calib = [] 82 | im_sizes = [] 83 | im_files = [] 84 | cams = [] 85 | num_probs = 0 86 | for filename in files: 87 | filename = filename.split()[0] 88 | splits = filename.split('/') 89 | # camera_id = filename[-1] # 2 is left, 3 is right 90 | date = splits[0] 91 | im_id = splits[4][:10] 92 | file_root = '{}/{}' 93 | 94 | im = filename 95 | vel = '{}/{}/velodyne_points/data/{}.bin'.format(splits[0], splits[1], im_id) 96 | 97 | if os.path.isfile(data_root + im): 98 | gt_files.append(data_root + vel) 99 | gt_calib.append(data_root + date + '/') 100 | im_sizes.append(cv2.imread(data_root + im).shape[:2]) 101 | im_files.append(data_root + im) 102 | cams.append(2) 103 | else: 104 | num_probs += 1 105 | print('{} missing'.format(data_root + im)) 106 | # print(num_probs, 'files missing') 107 | 108 | return gt_files, gt_calib, im_sizes, im_files, cams 109 | 110 | def load_velodyne_points(file_name): 111 | # adapted from https://github.com/hunse/kitti 112 | points = np.fromfile(file_name, dtype=np.float32).reshape(-1, 4) 113 | points[:, 3] = 1.0 # homogeneous 114 | return points 115 | 116 | 117 | def lin_interp(shape, xyd): 118 | # taken from https://github.com/hunse/kitti 119 | m, n = shape 120 | ij, d = xyd[:, 1::-1], xyd[:, 2] 121 | f = LinearNDInterpolator(ij, d, fill_value=0) 122 | J, I = np.meshgrid(np.arange(n), np.arange(m)) 123 | IJ = np.vstack([I.flatten(), J.flatten()]).T 124 | disparity = f(IJ).reshape(shape) 125 | return disparity 126 | 127 | 128 | def read_calib_file(path): 129 | # taken from https://github.com/hunse/kitti 130 | float_chars = set("0123456789.e+- ") 131 | data = {} 132 | with open(path, 'r') as f: 133 | for line in f.readlines(): 134 | key, value = line.split(':', 1) 135 | value = value.strip() 136 | data[key] = value 137 | if float_chars.issuperset(value): 138 | # try to cast to float array 139 | try: 140 | # (Yuliang) Add list() outside of map() for Python3.x 141 | data[key] = np.array(list(map(float, value.split(' ')))) 142 | except ValueError: 143 | # casting error: data[key] already eq. value, so pass 144 | pass 145 | 146 | return data 147 | 148 | 149 | def get_focal_length_baseline(calib_dir, cam=2): 150 | cam2cam = read_calib_file(calib_dir + 'calib_cam_to_cam.txt') 151 | P2_rect = cam2cam['P_rect_02'].reshape(3,4) 152 | P3_rect = cam2cam['P_rect_03'].reshape(3,4) 153 | 154 | # cam 2 is left of camera 0 -6cm 155 | # cam 3 is to the right +54cm 156 | b2 = P2_rect[0,3] / -P2_rect[0,0] 157 | b3 = P3_rect[0,3] / -P3_rect[0,0] 158 | baseline = b3-b2 159 | 160 | if cam==2: 161 | focal_length = P2_rect[0,0] 162 | elif cam==3: 163 | focal_length = P3_rect[0,0] 164 | 165 | return focal_length, baseline 166 | 167 | 168 | def sub2ind(matrixSize, rowSub, colSub): 169 | m, n = matrixSize 170 | return rowSub * (n-1) + colSub - 1 171 | 172 | def generate_depth_map(calib_dir, velo_file_name, im_shape, cam=2, interp=False, vel_depth=False): 173 | # load calibration files 174 | cam2cam = read_calib_file(calib_dir + 'calib_cam_to_cam.txt') 175 | velo2cam = read_calib_file(calib_dir + 'calib_velo_to_cam.txt') 176 | velo2cam = np.hstack((velo2cam['R'].reshape(3,3), velo2cam['T'][..., np.newaxis])) 177 | velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0]))) 178 | 179 | # compute projection matrix velodyne->image plane 180 | R_cam2rect = np.eye(4) 181 | R_cam2rect[:3,:3] = cam2cam['R_rect_00'].reshape(3,3) 182 | P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3,4) 183 | P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam) 184 | 185 | # load velodyne points and remove all behind image plane (approximation) 186 | # each row of the velodyne data is forward, left, up, reflectance 187 | velo = load_velodyne_points(velo_file_name) 188 | velo = velo[velo[:, 0] >= 0, :] 189 | 190 | # project the points to the camera 191 | velo_pts_im = np.dot(P_velo2im, velo.T).T 192 | velo_pts_im[:, :2] = velo_pts_im[:,:2] / velo_pts_im[:,2][..., np.newaxis] 193 | 194 | if vel_depth: 195 | velo_pts_im[:, 2] = velo[:, 0] 196 | 197 | # check if in bounds 198 | # use minus 1 to get the exact same value as KITTI matlab code 199 | velo_pts_im[:, 0] = np.round(velo_pts_im[:,0]) - 1 200 | velo_pts_im[:, 1] = np.round(velo_pts_im[:,1]) - 1 201 | val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0) 202 | val_inds = val_inds & (velo_pts_im[:,0] < im_shape[1]) & (velo_pts_im[:,1] < im_shape[0]) 203 | velo_pts_im = velo_pts_im[val_inds, :] 204 | 205 | # project to image 206 | depth = np.zeros((im_shape)) 207 | depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2] 208 | 209 | # find the duplicate points and choose the closest depth 210 | inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0]) 211 | # (Yuliang) iteritems() -> items() 212 | dupe_inds = [item for item, count in Counter(inds).items() if count > 1] 213 | for dd in dupe_inds: 214 | pts = np.where(inds==dd)[0] 215 | x_loc = int(velo_pts_im[pts[0], 0]) 216 | y_loc = int(velo_pts_im[pts[0], 1]) 217 | depth[y_loc, x_loc] = velo_pts_im[pts, 2].min() 218 | depth[depth<0] = 0 219 | 220 | if interp: 221 | # interpolate the depth map to fill in holes 222 | depth_interp = lin_interp(im_shape, velo_pts_im) 223 | return depth, depth_interp 224 | else: 225 | return depth 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /kitti_eval/eval_depth.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import sys 3 | import cv2 4 | import os 5 | import numpy as np 6 | import argparse 7 | from depth_evaluation_utils import * 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--kitti_dir", type=str, default='./dataset/KITTI/raw/data/', help='Path to the KITTI dataset directory') 11 | parser.add_argument("--pred_file", type=str, help="Path to the prediction file") 12 | parser.add_argument("--split", type=str, default='val') 13 | parser.add_argument('--min_depth', type=float, default=1e-3, help="Threshold for minimum depth") 14 | parser.add_argument('--max_depth', type=float, default=80, help="Threshold for maximum depth") 15 | args = parser.parse_args() 16 | 17 | def main(): 18 | if args.split == 'val': 19 | test_file_list = './data/kitti/val_files_eigen.txt' 20 | elif args.split == 'test': 21 | test_file_list = './data/kitti/test_files_eigen.txt' 22 | else: 23 | assert False 24 | 25 | pred_depths = np.load(args.pred_file) 26 | test_files = read_text_lines(test_file_list) 27 | gt_files, gt_calib, im_sizes, im_files, cams = \ 28 | read_file_data(test_files, args.kitti_dir) 29 | num_test = len(im_files) 30 | gt_depths = [] 31 | pred_depths_resized = [] 32 | invalid_ids = [] 33 | for t_id in range(num_test): 34 | camera_id = cams[t_id] # 2 is left, 3 is right 35 | # Some frames in val set do not have ground truth labels 36 | try: 37 | depth = generate_depth_map(gt_calib[t_id], 38 | gt_files[t_id], 39 | im_sizes[t_id], 40 | camera_id, 41 | False, 42 | True) 43 | gt_depths.append(depth.astype(np.float32)) 44 | 45 | pred_depths_resized.append( 46 | cv2.resize(pred_depths[t_id], 47 | (im_sizes[t_id][1], im_sizes[t_id][0]), 48 | interpolation=cv2.INTER_LINEAR)) 49 | except: 50 | invalid_ids.append(t_id) 51 | print(t_id) 52 | pred_depths = pred_depths_resized 53 | num_test -= len(invalid_ids) 54 | 55 | rms = np.zeros(num_test, np.float32) 56 | log_rms = np.zeros(num_test, np.float32) 57 | abs_rel = np.zeros(num_test, np.float32) 58 | sq_rel = np.zeros(num_test, np.float32) 59 | d1_all = np.zeros(num_test, np.float32) 60 | a1 = np.zeros(num_test, np.float32) 61 | a2 = np.zeros(num_test, np.float32) 62 | a3 = np.zeros(num_test, np.float32) 63 | for i in range(num_test): 64 | gt_depth = gt_depths[i] 65 | pred_depth = np.copy(pred_depths[i]) 66 | 67 | mask = np.logical_and(gt_depth > args.min_depth, 68 | gt_depth < args.max_depth) 69 | # crop used by Garg ECCV16 to reprocude Eigen NIPS14 results 70 | # if used on gt_size 370x1224 produces a crop of [-218, -3, 44, 1180] 71 | gt_height, gt_width = gt_depth.shape 72 | crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height, 73 | 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) 74 | 75 | crop_mask = np.zeros(mask.shape) 76 | crop_mask[crop[0]:crop[1],crop[2]:crop[3]] = 1 77 | mask = np.logical_and(mask, crop_mask) 78 | 79 | # Scale matching 80 | scalor = np.median(gt_depth[mask])/np.median(pred_depth[mask]) 81 | pred_depth[mask] *= scalor 82 | 83 | pred_depth[pred_depth < args.min_depth] = args.min_depth 84 | pred_depth[pred_depth > args.max_depth] = args.max_depth 85 | abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = \ 86 | compute_errors(gt_depth[mask], pred_depth[mask]) 87 | 88 | print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('abs_rel', 'sq_rel', 'rms', 'log_rms', 'd1_all', 'a1', 'a2', 'a3')) 89 | print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(abs_rel.mean(), sq_rel.mean(), rms.mean(), log_rms.mean(), d1_all.mean(), a1.mean(), a2.mean(), a3.mean())) 90 | 91 | main() 92 | -------------------------------------------------------------------------------- /kitti_eval/eval_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import numpy as np 4 | import argparse 5 | from glob import glob 6 | from pose_evaluation_utils import * 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--gtruth_dir", type=str, 10 | help='Path to the directory with ground-truth trajectories') 11 | parser.add_argument("--pred_dir", type=str, 12 | help="Path to the directory with predicted trajectories") 13 | args = parser.parse_args() 14 | 15 | def main(): 16 | pred_files = glob(os.path.join(args.pred_dir, '*.txt')) 17 | ate_all = [] 18 | for i in range(len(pred_files)): 19 | gtruth_file = os.path.join(args.gtruth_dir, os.path.basename(pred_files[i])) 20 | if not os.path.exists(gtruth_file): 21 | continue 22 | ate = compute_ate(gtruth_file, pred_files[i]) 23 | if ate == False: 24 | continue 25 | ate_all.append(ate) 26 | ate_all = np.array(ate_all) 27 | print("Predictions dir: %s" % args.pred_dir) 28 | print("ATE mean: %.4f, std: %.4f" % (np.mean(ate_all), np.std(ate_all))) 29 | main() 30 | -------------------------------------------------------------------------------- /misc/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Download pre-trained models 4 | echo "Downloading pre-trained models" 5 | wget https://filebox.ece.vt.edu/~ylzou/eccv2018dfnet/pretrained.tar 6 | tar -xvf pretrained.tar 7 | 8 | # Download training data 9 | echo "Downloading training data" 10 | mkdir dataset 11 | cd dataset 12 | wget https://filebox.ece.vt.edu/~ylzou/eccv2018dfnet/kitti_5frame_1152_320.tar 13 | tar -xvf kitti_5frame_1152_320.tar 14 | cd .. 15 | 16 | -------------------------------------------------------------------------------- /misc/zou2018dfnet.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/misc/zou2018dfnet.gif -------------------------------------------------------------------------------- /test_flownet_2012.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cv2 3 | import tensorflow as tf 4 | import numpy as np 5 | import os 6 | import PIL.Image as pil 7 | import png 8 | import scipy 9 | 10 | from core import flow_to_image 11 | from core import flownet 12 | 13 | flags = tf.app.flags 14 | flags.DEFINE_integer("batch_size", 1, "The size of of a sample batch") 15 | flags.DEFINE_integer("img_height", 384, "Image height") 16 | flags.DEFINE_integer("img_width", 1280, "Image width") 17 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/flow2012/training/', "Dataset directory") 18 | flags.DEFINE_string("output_dir", None, "Output directory") 19 | flags.DEFINE_string("ckpt_file", 'pretrained/unflowc_pre', "checkpoint file") 20 | FLAGS = flags.FLAGS 21 | 22 | FLOW_SCALE = 5.0 23 | 24 | # kitti 2012 has 194 training pairs, 195 test pairs 25 | if 'train' in FLAGS.dataset_dir: 26 | NUM = 194 27 | elif 'test' in FLAGS.dataset_dir: 28 | NUM = 195 29 | 30 | def get_flow(path): 31 | bgr = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 32 | invalid = bgr[:, :, 0] == 0 33 | out_flow = (bgr[:, :, 2:0:-1].astype('f4') - 2**15) / 64. 34 | out_flow[invalid] = 0 35 | return out_flow, bgr[:, :, 0] 36 | 37 | def compute_flow_error(gt_flow, pred_flow, mask): 38 | H, W, _ = gt_flow.shape 39 | old_H, old_W, _ = pred_flow.shape 40 | # Reshape predicted flow to have same size as ground truth 41 | pred0 = cv2.resize(pred_flow[:,:,0], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*W/old_W) 42 | pred1 = cv2.resize(pred_flow[:,:,1], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*H/old_H) 43 | pred = np.stack((pred0, pred1), axis=-1) * FLOW_SCALE 44 | 45 | err = np.sqrt(np.sum(np.square(gt_flow - pred), axis=-1)) 46 | err_valid = np.sum(err * mask) / np.sum(mask) 47 | return err_valid, pred 48 | 49 | def write_flow_png(name, flow): 50 | H, W, _ = flow.shape 51 | out = np.ones((H, W, 3), dtype=np.uint64) 52 | out[:,:,1] = np.minimum(np.maximum(flow[:,:,1]*64.+2**15, 0), 2**16).astype(np.uint64) 53 | out[:,:,0] = np.minimum(np.maximum(flow[:,:,0]*64.+2**15, 0), 2**16).astype(np.uint64) 54 | with open(name, 'wb') as f: 55 | writer = png.Writer(width=W, height=H, bitdepth=16) 56 | im2list = out.reshape(-1, out.shape[1]*out.shape[2]).tolist() 57 | writer.write(f, im2list) 58 | 59 | 60 | def pick_frame(path): 61 | new_files = [] 62 | # flow2012 dataset only has 194 pairs 63 | for i in range(NUM): 64 | frame1 = os.path.join(path, 'colored_0', '{:06d}'.format(i) + '_10.png') 65 | frame2 = os.path.join(path, 'colored_0', '{:06d}'.format(i) + '_11.png') 66 | new_files.append([frame1, frame2]) 67 | return new_files 68 | 69 | def main(_): 70 | new_files = pick_frame(FLAGS.dataset_dir) 71 | basename = os.path.basename(FLAGS.ckpt_file) 72 | 73 | im1_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3)) 74 | im2_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3)) 75 | pred_flows = flownet(im1_pl, im2_pl, flownet_spec='C') 76 | 77 | saver = tf.train.Saver([var for var in tf.all_variables() if 'flow' in var.name]) 78 | config = tf.ConfigProto() 79 | config.gpu_options.allow_growth = True 80 | errs = np.zeros(NUM) 81 | 82 | if not FLAGS.output_dir is None and not os.path.exists(FLAGS.output_dir): 83 | os.makedirs(FLAGS.output_dir) 84 | 85 | with tf.Session(config=config) as sess: 86 | saver.restore(sess, FLAGS.ckpt_file) 87 | # For val set 88 | for t in range(0, len(new_files)): 89 | if t % 100 == 0: 90 | print('processing %s: %d/%d' % (basename, t, len(new_files))) 91 | raw_im0 = pil.open(new_files[t][0]) 92 | raw_im1 = pil.open(new_files[t][1]) 93 | scaled_im0 = raw_im0.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS) 94 | scaled_im1 = raw_im1.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS) 95 | # Minus ImageNet channel mean 96 | channel_mean = np.array([104.920005, 110.1753, 114.785955]) 97 | scaled_im0 = (np.expand_dims(np.array(scaled_im0), axis=0).astype(np.float32)-channel_mean)/255. 98 | scaled_im1 = (np.expand_dims(np.array(scaled_im1), axis=0).astype(np.float32)-channel_mean)/255. 99 | feed_dict = {im1_pl: scaled_im0, im2_pl: scaled_im1} 100 | pred_flows_val = sess.run(pred_flows, feed_dict=feed_dict) 101 | pred_flow_val = pred_flows_val[-1][0] 102 | 103 | # Only for training set 104 | if 'train' in FLAGS.dataset_dir: 105 | # no occlusion 106 | #gt_flow, mask = get_flow(new_files[t][0].replace('colored_0', 'flow_noc')) 107 | # all 108 | gt_flow, mask = get_flow(new_files[t][0].replace('colored_0', 'flow_occ')) 109 | errs[t], scaled_pred = compute_flow_error(gt_flow, pred_flow_val[0,:,:,:], mask) 110 | 111 | # Save for eval 112 | if 'test' in FLAGS.dataset_dir: 113 | _, scaled_pred = compute_flow_error(np.array(raw_im0)[:,:,:2], pred_flow_val[0,:,:,:], np.array(raw_im0)[:,:,0]) 114 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]) 115 | write_flow_png(png_name, scaled_pred) 116 | 117 | # Save for visual colormap 118 | if not 'test' in FLAGS.dataset_dir and not FLAGS.output_dir is None: 119 | flow_im = flow_to_image(scaled_pred) 120 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]).replace('png', 'jpg') 121 | cv2.imwrite(png_name, flow_im[:,:,::-1]) 122 | 123 | print('{:>10}'.format('(valid) endpoint error')) 124 | print('{:10.4f}'.format(errs.mean())) 125 | 126 | if __name__ == '__main__': 127 | tf.app.run() 128 | -------------------------------------------------------------------------------- /test_flownet_2015.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cv2 3 | import tensorflow as tf 4 | import numpy as np 5 | import os 6 | import PIL.Image as pil 7 | import png 8 | import scipy 9 | 10 | from core import flow_to_image 11 | from core import flownet 12 | 13 | flags = tf.app.flags 14 | flags.DEFINE_integer("batch_size", 1, "The size of of a sample batch") 15 | flags.DEFINE_integer("img_height", 384, "Image height") 16 | flags.DEFINE_integer("img_width", 1280, "Image width") 17 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/flow2015/training/', "Dataset directory") 18 | flags.DEFINE_string("output_dir", None, "Output directory") 19 | flags.DEFINE_string("ckpt_file", 'pretrained/unflowc_pre', "checkpoint file") 20 | FLAGS = flags.FLAGS 21 | 22 | FLOW_SCALE = 5.0 23 | 24 | # kitti 2012 has 200 training pairs, 200 test pairs 25 | NUM = 200 26 | 27 | def get_flow(path): 28 | bgr = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) 29 | invalid = bgr[:, :, 0] == 0 30 | out_flow = (bgr[:, :, 2:0:-1].astype('f4') - 2**15) / 64. 31 | out_flow[invalid] = 0 32 | return out_flow, bgr[:, :, 0] 33 | 34 | def compute_flow_error(gt_flow, pred_flow, mask): 35 | H, W, _ = gt_flow.shape 36 | old_H, old_W, _ = pred_flow.shape 37 | # Reshape predicted flow to have same size as ground truth 38 | pred0 = cv2.resize(pred_flow[:,:,0], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*W/old_W) 39 | pred1 = cv2.resize(pred_flow[:,:,1], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*H/old_H) 40 | pred = np.stack((pred0, pred1), axis=-1) * FLOW_SCALE 41 | 42 | err = np.sqrt(np.sum(np.square(gt_flow - pred), axis=-1)) 43 | err_valid = np.sum(err * mask) / np.sum(mask) 44 | 45 | gt_mag = np.sqrt(np.sum(np.square(gt_flow), axis=-1)) 46 | mask1 = err > 3. 47 | mask2 = err / (gt_mag+1e-12) > 0.05 48 | final_mask = np.logical_and(np.logical_and(mask1, mask2), mask) 49 | f1 = final_mask.sum()/mask.sum() 50 | 51 | return err_valid, pred, f1 52 | 53 | def write_flow_png(name, flow): 54 | H, W, _ = flow.shape 55 | out = np.ones((H, W, 3), dtype=np.uint64) 56 | out[:,:,1] = np.minimum(np.maximum(flow[:,:,1]*64.+2**15, 0), 2**16).astype(np.uint64) 57 | out[:,:,0] = np.minimum(np.maximum(flow[:,:,0]*64.+2**15, 0), 2**16).astype(np.uint64) 58 | with open(name, 'wb') as f: 59 | writer = png.Writer(width=W, height=H, bitdepth=16) 60 | im2list = out.reshape(-1, out.shape[1]*out.shape[2]).tolist() 61 | writer.write(f, im2list) 62 | 63 | 64 | def pick_frame(path): 65 | new_files = [] 66 | for i in range(NUM): 67 | frame1 = os.path.join(path, 'image_2', '{:06d}'.format(i) + '_10.png') 68 | frame2 = os.path.join(path, 'image_2', '{:06d}'.format(i) + '_11.png') 69 | new_files.append([frame1, frame2]) 70 | return new_files 71 | 72 | def main(_): 73 | new_files = pick_frame(FLAGS.dataset_dir) 74 | basename = os.path.basename(FLAGS.ckpt_file) 75 | 76 | im1_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3)) 77 | im2_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3)) 78 | pred_flows = flownet(im1_pl, im2_pl, flownet_spec='C') 79 | 80 | saver = tf.train.Saver([var for var in tf.all_variables()]) 81 | config = tf.ConfigProto() 82 | config.gpu_options.allow_growth = True 83 | errs = np.zeros(NUM) 84 | f1 = np.zeros(NUM) 85 | 86 | if not FLAGS.output_dir is None and not os.path.exists(FLAGS.output_dir): 87 | os.makedirs(FLAGS.output_dir) 88 | 89 | with tf.Session(config=config) as sess: 90 | saver.restore(sess, FLAGS.ckpt_file) 91 | # For val set 92 | for t in range(0, len(new_files)): 93 | if t % 100 == 0: 94 | print('processing %s: %d/%d' % (basename, t, len(new_files))) 95 | raw_im0 = pil.open(new_files[t][0]) 96 | raw_im1 = pil.open(new_files[t][1]) 97 | scaled_im0 = raw_im0.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS) 98 | scaled_im1 = raw_im1.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS) 99 | # Minus ImageNet channel mean 100 | channel_mean = np.array([104.920005, 110.1753, 114.785955]) 101 | scaled_im0 = (np.expand_dims(np.array(scaled_im0), axis=0).astype(np.float32)-channel_mean)/255. 102 | scaled_im1 = (np.expand_dims(np.array(scaled_im1), axis=0).astype(np.float32)-channel_mean)/255. 103 | feed_dict = {im1_pl: scaled_im0, im2_pl: scaled_im1} 104 | pred_flows_val = sess.run(pred_flows, feed_dict=feed_dict) 105 | pred_flow_val = pred_flows_val[-1][0] 106 | 107 | # Only for training set 108 | if 'train' in FLAGS.dataset_dir: 109 | # no occlusion 110 | #gt_flow, mask = get_flow(new_files[t][0].replace('image_2', 'flow_noc')) 111 | # all 112 | gt_flow, mask = get_flow(new_files[t][0].replace('image_2', 'flow_occ')) 113 | errs[t], scaled_pred, f1[t] = compute_flow_error(gt_flow, pred_flow_val[0,:,:,:], mask) 114 | 115 | # Save for eval 116 | if 'test' in FLAGS.dataset_dir: 117 | _, scaled_pred, _ = compute_flow_error(np.array(raw_im0)[:,:,:2], pred_flow_val[0,:,:,:], np.array(raw_im0)[:,:,0]) 118 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]) 119 | write_flow_png(png_name, scaled_pred) 120 | 121 | # Save for visual colormap 122 | if not 'test' in FLAGS.dataset_dir and not FLAGS.output_dir is None: 123 | flow_im = flow_to_image(scaled_pred) 124 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]).replace('png', 'jpg') 125 | cv2.imwrite(png_name, flow_im[:,:,::-1]) 126 | 127 | print('{:>10}, {:>10}'.format('(valid) endpoint error', 'f1 score')) 128 | print('{:10.4f}, {:10.4f}'.format(errs.mean(), f1.mean())) 129 | 130 | if __name__ == '__main__': 131 | tf.app.run() 132 | -------------------------------------------------------------------------------- /test_kitti_depth.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import tensorflow as tf 3 | import numpy as np 4 | import os 5 | import PIL.Image as pil 6 | from core import DFLearner 7 | 8 | flags = tf.app.flags 9 | flags.DEFINE_integer("batch_size", 4, "The size of of a sample batch") 10 | flags.DEFINE_integer("img_height", 160, "Image height") 11 | flags.DEFINE_integer("img_width", 576, "Image width") 12 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/raw/data/', "Dataset directory") 13 | flags.DEFINE_string("split", 'val', 'val or test') 14 | flags.DEFINE_string("output_dir", None, "Output directory") 15 | flags.DEFINE_string("ckpt_file", None, "checkpoint file") 16 | FLAGS = flags.FLAGS 17 | 18 | def main(_): 19 | if FLAGS.split == 'val': 20 | txt_file = 'data/kitti/val_files_eigen.txt' 21 | elif FLAGS.split == 'test': 22 | txt_file = 'data/kitti/test_files_eigen.txt' 23 | else: 24 | assert False 25 | 26 | with open(txt_file, 'r') as f: 27 | test_files = f.readlines() 28 | test_files = [FLAGS.dataset_dir + t[:-1] for t in test_files] 29 | if not os.path.exists(FLAGS.output_dir): 30 | os.makedirs(FLAGS.output_dir) 31 | basename = os.path.basename(FLAGS.ckpt_file) 32 | output_file = os.path.join(FLAGS.output_dir, basename) 33 | model = DFLearner() 34 | model.setup_inference(img_height=FLAGS.img_height, 35 | img_width=FLAGS.img_width, 36 | batch_size=FLAGS.batch_size, 37 | mode='depth') 38 | saver = tf.train.Saver([var for var in tf.model_variables()]) 39 | config = tf.ConfigProto() 40 | config.gpu_options.allow_growth = True 41 | with tf.Session(config=config) as sess: 42 | saver.restore(sess, FLAGS.ckpt_file) 43 | pred_all = [] 44 | for t in range(0, len(test_files), FLAGS.batch_size): 45 | if t % 100 == 0: 46 | print('processing %s: %d/%d' % (basename, t, len(test_files))) 47 | inputs = np.zeros( 48 | (FLAGS.batch_size, FLAGS.img_height, FLAGS.img_width, 3), 49 | dtype=np.uint8) 50 | for b in range(FLAGS.batch_size): 51 | idx = t + b 52 | if idx >= len(test_files): 53 | break 54 | raw_im = pil.open(test_files[idx]) 55 | scaled_im = raw_im.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS) 56 | inputs[b] = np.array(scaled_im) 57 | pred = model.inference(inputs, sess, mode='depth') 58 | for b in range(FLAGS.batch_size): 59 | idx = t + b 60 | if idx >= len(test_files): 61 | break 62 | pred_all.append(pred['depth'][b,:,:,0]) 63 | np.save(output_file, pred_all) 64 | 65 | if __name__ == '__main__': 66 | tf.app.run() 67 | -------------------------------------------------------------------------------- /train_df.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import tensorflow as tf 3 | import pprint 4 | import random 5 | import numpy as np 6 | from core import DFLearner 7 | import os 8 | 9 | flags = tf.app.flags 10 | flags.DEFINE_string("dataset_dir", "./dataset/kitti_5frame_1152_320/", "Dataset directory") 11 | flags.DEFINE_string("checkpoint_dir", "./ckpt/dfnet", "Directory name to save the checkpoints") 12 | flags.DEFINE_string("ckpt_flow", "pretrained/unflowc_pre", "checkpoint for Flow Net") 13 | flags.DEFINE_string("ckpt_dp", "pretrained/cs_5frame_pre", "checkpoint for Depth Net and Pose Net") 14 | flags.DEFINE_string("ckpt_pose", None, "checkpoint for Pose Net, if not shared with Depth Net") 15 | flags.DEFINE_float("learning_rate", 0.0001, "Learning rate of for adam") 16 | flags.DEFINE_float("beta1", 0.9, "Momentum term of adam") 17 | flags.DEFINE_float("smooth_weight", 3.0, "Weight for smoothness") 18 | flags.DEFINE_float("alpha_image_loss", 0.85, "Weight between SSIM and L1 in the image loss") 19 | flags.DEFINE_float("depth_consistency", 0.2, "Weight for forward-backward depth consistency loss.") 20 | flags.DEFINE_float("flow_smooth_weight", 3.0, "Weight for flow smoothness") 21 | flags.DEFINE_float("flow_consistency", 0.2, "Weight for forward-backward flow consistency loss.") 22 | flags.DEFINE_float("cross_consistency", 0.5, "Weight for cross-network consistency loss") 23 | flags.DEFINE_integer("batch_size", 4, "The size of of a sample batch, must divisible by number of GPUs!") 24 | flags.DEFINE_integer("num_gpus", 4, "Number of GPUs for training, starting from 0.") 25 | flags.DEFINE_integer("img_height", 320, "Image height") 26 | flags.DEFINE_integer("img_width", 1152, "Image width") 27 | flags.DEFINE_integer("seq_length", 5, "Sequence length for each example") # Fixed. Don't change 28 | flags.DEFINE_integer("max_steps", 100000, "Maximum number of training iterations") 29 | flags.DEFINE_integer("summary_freq", 100, "Logging every log_freq iterations") 30 | flags.DEFINE_integer("save_latest_freq", 5000, \ 31 | "Save the latest model every save_latest_freq iterations (overwrites the previous latest model)") 32 | flags.DEFINE_boolean("continue_train", True, "Continue training from previous checkpoint") 33 | flags.DEFINE_boolean("scale_normalize", False, "Scale normalization for disparity.") # Set to True will break the training. 34 | flags.DEFINE_boolean("fix_pose", False, "Fix pose network") 35 | FLAGS = flags.FLAGS 36 | 37 | def main(_): 38 | seed = 8964 39 | tf.set_random_seed(seed) 40 | np.random.seed(seed) 41 | random.seed(seed) 42 | 43 | pp = pprint.PrettyPrinter() 44 | pp.pprint(flags.FLAGS.__flags) 45 | 46 | if not os.path.exists(FLAGS.checkpoint_dir): 47 | os.makedirs(FLAGS.checkpoint_dir) 48 | 49 | learner = DFLearner() 50 | learner.train(FLAGS) 51 | 52 | if __name__ == '__main__': 53 | tf.app.run() 54 | --------------------------------------------------------------------------------