├── .gitignore
├── LICENSE
├── README.md
├── core
    ├── BaseLearner.py
    ├── DFLearner.py
    ├── UnFlow
    │   ├── __init__.py
    │   ├── ops
    │   │   ├── backward_warp_op.cc
    │   │   ├── backward_warp_op.cu.cc
    │   │   ├── correlation_op.cc
    │   │   ├── correlation_op.cu.cc
    │   │   ├── correlation_op.h
    │   │   ├── downsample_op.cc
    │   │   ├── downsample_op.cu.cc
    │   │   ├── forward_warp_op.cc
    │   │   └── forward_warp_op.cu.cc
    │   └── src
    │   │   ├── __init__.py
    │   │   └── e2eflow
    │   │       ├── __init__.py
    │   │       ├── core
    │   │           ├── __init__.py
    │   │           ├── augment.py
    │   │           ├── data.py
    │   │           ├── flow_util.py
    │   │           ├── flownet.py
    │   │           ├── image_warp.py
    │   │           ├── input.py
    │   │           ├── losses.py
    │   │           ├── spatial_transformer.py
    │   │           ├── supervised.py
    │   │           ├── train.py
    │   │           ├── unsupervised.py
    │   │           └── util.py
    │   │       ├── ops.py
    │   │       └── util.py
    ├── __init__.py
    ├── data_loader.py
    ├── flowlib.py
    ├── nets.py
    └── utils.py
├── data
    ├── __init__.py
    ├── kitti
    │   ├── __init__.py
    │   ├── excluded_frames.txt
    │   ├── kitti_odom_loader.py
    │   ├── kitti_raw_loader.py
    │   ├── static_frames.txt
    │   ├── test_files_eigen.txt
    │   ├── test_files_stereo.txt
    │   ├── test_scenes_eigen.txt
    │   ├── test_scenes_stereo.txt
    │   └── val_files_eigen.txt
    └── prepare_train_data.py
├── kitti_eval
    ├── __init__.py
    ├── depth_evaluation_utils.py
    ├── eval_depth.py
    ├── eval_pose.py
    └── pose_evaluation_utils.py
├── misc
    ├── prepare.sh
    └── zou2018dfnet.gif
├── test_flownet_2012.py
├── test_flownet_2015.py
├── test_kitti_depth.py
└── train_df.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *DS_Store
 2 | *ignore*
 3 | *pyc
 4 | *__pycache__
 5 | pretrained
 6 | ckpt
 7 | results
 8 | *pose_data
 9 | dataset
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Virginia Tech Vision and Learning Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency
  2 | 
  3 | A TensorFlow re-implementation for [DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency](https://arxiv.org/abs/1809.01649). There are some minor differences from the model described in the paper:
  4 | 
  5 | - Model in the paper uses 2-frame as input, while this code uses 5-frame as input (you might use any odd numbers of frames as input, though you would need to tune the hyper-parameters)
  6 | - FlowNet in the paper is pre-trained on SYNTHIA, while this one is pre-trained on Cityscapes
  7 | 
  8 | Please see the [project page](http://yuliang.vision/DF-Net/) for more details. 
  9 | 
 10 | <img src="misc/zou2018dfnet.gif">
 11 | 
 12 | 
 13 | ## Prerequisites
 14 | This codebase was developed and tested with the following settings:
 15 | ```
 16 | Python 3.6
 17 | TensorFlow 1.2.0 (this is the only supported version)
 18 | g++ 4.x (this is the only supported version)
 19 | CUDA 8.0
 20 | Unbuntu 14.04
 21 | 4 Tesla K80 GPUs (w/ 12G memory each)
 22 | ```
 23 | 
 24 | Some Python packages you might not have
 25 | ```
 26 | pypng
 27 | opencv-python
 28 | ```
 29 | 
 30 | ## Installation
 31 | 1. Clone this repository
 32 | ```Shell
 33 | git clone git@github.com:vt-vl-lab/DF-Net.git
 34 | cd DF-Net
 35 | ```
 36 | 
 37 | 2. Prepare models and training data
 38 | ```Shell
 39 | chmod +x ./misc/prepare.sh
 40 | ./misc/prepare.sh
 41 | ```
 42 | NOTE: Frames belonging to KITTI2012/2015 train/test scenes have been excluded in the provided training set. Add these frames back to the training set would improve the performance of DepthNet.
 43 | 
 44 | ## Data preparation (for evaluation)
 45 | After accepting their license conditions, download [KITTI raw](http://www.cvlibs.net/datasets/kitti/raw_data.php), [KITTI flow 2012](http://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php?benchmark=flow), [KITTI flow 2015](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow).
 46 | 
 47 | Then you can make soft-link for them
 48 | ```Shell
 49 | cd dataset
 50 | mkdir KITTI
 51 | cd KITTI
 52 | 
 53 | ln -s /path/to/KITTI/raw raw
 54 | ln -s /path/to/KITTI/2012 flow2012
 55 | ln -s /path/to/KITTI/2015 flow2015
 56 | ```
 57 | 
 58 | **(Optional)** You can add those KITTI2012/2015 frames back to the training set, by commenting line81~line85 in `data/kitti/kitti_raw_loader.py`, and do
 59 | ```
 60 | python data/prepare_train_data.py --dataset_name='kitti_raw_eigen' --dump_root=/path/to/save/ --num_threads=4
 61 | ```
 62 | 
 63 | ## Training
 64 | ```
 65 | export CUDA_VISIBLE_DEVICES=0,1,2,3
 66 | python train_df.py --dataset_dir=/path/to/your/data --checkpoint_dir=/path/to/save/your/model
 67 | ```
 68 | 
 69 | For the first time, custom CUDA operations for FlowNet will be compiled. If you have any compliation issues, please check `core/UnFlow/src/e2eflow/ops.py` 
 70 | - Line31: specify your CUDA path
 71 | - Line32: Add `-I $CUDA_HOME/include`, where `$CUDA_HOME` is your CUDA directory
 72 | - Line38: specify your g++ version
 73 | 
 74 | ## Testing
 75 | Test DepthNet on KITTI raw (You can use the validation set to selct the best model.)
 76 | ```
 77 | python test_kitti_depth.py --dataset_dir=/path/to/your/data --output_dir=/path/to/save/your/prediction --ckpt_file=/path/to/your/ckpt --split="val or test"
 78 | python kitti_eval/eval_depth.py --pred_file=/path/to/your/prediction --split="val or test"
 79 | ```
 80 | 
 81 | Test FlowNet on KITTI 2012 (Please use training set)
 82 | ```
 83 | python test_flownet_2012.py --dataset_dir=/path/to/your/data --ckpt_file=/path/to/your/ckpt
 84 | ```
 85 | 
 86 | Test FlowNet on KITTI 2015 (Please use training set)
 87 | ```
 88 | python test_flownet_2015.py --dataset_dir=/path/to/your/data --ckpt_file=/path/to/your/ckpt
 89 | ```
 90 | 
 91 | NOTE: For KITTI 2012/2015
 92 | - If you want to generate visualization colormap for **training set**, you can specify `output_dir`
 93 | - If you want to test on **test set** and upload it to KITTI server, you can specify `output_dir` and test on test set.
 94 | 
 95 | ## Pre-trained model performance
 96 | You should get the following numbers if you use the pre-trained model `pretrained/dfnet`
 97 | 
 98 | 
 99 | DepthNet (KITTI raw test test)
100 | 
101 | abs rel | sq rel | rms | log rms | a1 | a2 | a3 |
102 | ---------------|------------|------------|------------|------------|------------|------------|
103 | 0.1452 | 1.2904 | 5.6115 | 0.2194 | 0.8114 | 0.9394 | 0.9767 |
104 | 
105 | 
106 | FlowNet (KITTI 2012/2015 training set)
107 | 
108 | KITTI 2012 EPE | KITTI 2015 EPE | KITTI 2015 F1 | 
109 | ---------------|----------------|---------------|
110 | 3.1052 | 7.4482 | 0.2695 |
111 | 
112 | 
113 | ## Citation
114 | If you find this code useful for your research, please consider citing the following paper:
115 | 
116 |     @inproceedings{zou2018dfnet,
117 |     author    = {Zou, Yuliang and Luo, Zelun and Huang, Jia-Bin}, 
118 |     title     = {DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency}, 
119 |     booktitle = {European Conference on Computer Vision},
120 |     year      = {2018}
121 |     }
122 | 
123 | 
124 | ## Acknowledgement
125 | Codes are heavily borrowed from several great work, including [SfMLearner](https://github.com/tinghuiz/SfMLearner), [monodepth](https://github.com/mrharicot/monodepth), and [UnFlow](https://github.com/simonmeister/UnFlow). We thank [Shih-Yang Su](https://github.com/LemonATsu) for the code review.
126 | 


--------------------------------------------------------------------------------
/core/BaseLearner.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import time
  4 | import math
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import tensorflow.contrib.slim as slim
  8 | 
  9 | from .data_loader import DataLoader
 10 | from .nets import *
 11 | from .utils import *
 12 | from .flowlib import flow_to_image
 13 | 
 14 | class BaseLearner(object):
 15 |     def __init__(self):
 16 |         pass
 17 |     
 18 |     def build_train_graph(self):
 19 |         raise NotImplementedError
 20 | 
 21 |     def collect_summaries(self):
 22 |         raise NotImplementedError
 23 | 
 24 |     def train(self, opt):
 25 |         raise NotImplementedError
 26 | 
 27 |     # Credit: https://github.com/mrharicot/monodepth/blob/master/average_gradients.py
 28 |     def average_gradients(self, tower_grads):
 29 |         average_grads = []
 30 |         for grad_and_vars in zip(*tower_grads):
 31 |             grads = []
 32 |             for g, _ in grad_and_vars:
 33 |                 if g is not None:
 34 |                     expanded_g = tf.expand_dims(g, 0)
 35 |                     grads.append(expanded_g)
 36 |             if grads != []:
 37 |                 grad = tf.concat(axis=0, values=grads)
 38 |                 grad = tf.reduce_mean(grad, 0)
 39 |                 v = grad_and_vars[0][1]
 40 |                 grad_and_var = (grad, v)
 41 |                 average_grads.append(grad_and_var)
 42 |         return average_grads
 43 | 
 44 |     def get_dp_flow(self, opt, s, src_pixel_coords):
 45 |         x_base = tf.range(int(opt.img_width/(2**s)))
 46 |         y_base = tf.range(int(opt.img_height/(2**s)))
 47 |         x_base = tf.stack([x_base]*int(opt.img_height/(2**s)), axis=0)
 48 |         y_base = tf.transpose(tf.stack([y_base]*int(opt.img_width/(2**s)), axis=0))
 49 | 
 50 |         dp_flow_x = src_pixel_coords[:, :, :, 0] - tf.cast(x_base, tf.float32)
 51 |         dp_flow_y = src_pixel_coords[:, :, :, 1] - tf.cast(y_base, tf.float32)
 52 |         dp_flow = tf.stack([dp_flow_x, dp_flow_y], axis=-1)
 53 |         return dp_flow
 54 | 
 55 |     def get_in_range_mask(self, opt, s, flow):
 56 |         # 1 if the displacement is within the image
 57 |         x_min = 0.0
 58 |         x_max = int(opt.img_width/(2**s))-1
 59 |         y_min = 0.0
 60 |         y_max = int(opt.img_height/(2**s))-1
 61 | 
 62 |         x_base = tf.range(int(opt.img_width/(2**s)))
 63 |         y_base = tf.range(int(opt.img_height/(2**s)))
 64 |         x_base = tf.stack([x_base]*int(opt.img_height/(2**s)), axis=0)
 65 |         y_base = tf.transpose(tf.stack([y_base]*int(opt.img_width/(2**s)), axis=0))
 66 | 
 67 |         pos_x = flow[:,:,:,0]+tf.cast(x_base, tf.float32)
 68 |         pos_y = flow[:,:,:,1]+tf.cast(y_base, tf.float32)
 69 |         inside_x = tf.logical_and(pos_x <= tf.cast(x_max, tf.float32), pos_x >= x_min)
 70 |         inside_y = tf.logical_and(pos_y <= tf.cast(y_max, tf.float32), pos_y >= y_min)
 71 |         inside = tf.expand_dims(tf.logical_and(inside_x, inside_y), axis=-1)
 72 |         return tf.stop_gradient(tf.cast(inside, tf.float32))
 73 | 
 74 |     def get_fb_mask(self, flow, warped_flow, alpha1=0.01, alpha2=0.5):
 75 |         temp1 = tf.reduce_sum(tf.square(flow+warped_flow), axis=3, keep_dims=True)
 76 |         temp2 = tf.reduce_sum(tf.square(flow), axis=3, keep_dims=True)+tf.reduce_sum(tf.square(warped_flow), axis=3, keep_dims=True)
 77 |         occ_mask = tf.greater(temp1, alpha1*temp2+alpha2)
 78 |         return tf.stop_gradient(tf.cast(occ_mask, tf.float32))
 79 | 
 80 |     # Crecit: https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/losses.py
 81 |     def ternary_loss(self, im1, im2_warped, valid_mask, max_distance=1):
 82 |         patch_size = 2*max_distance+1
 83 |         with tf.variable_scope('ternary_loss'):
 84 |             def _ternary_transform(image):
 85 |                 intensities = tf.image.rgb_to_grayscale(image) * 255
 86 |                 out_channels = patch_size * patch_size
 87 |                 w = np.eye(out_channels).reshape((patch_size, patch_size, 1, out_channels))
 88 |                 weights =  tf.constant(w, dtype=tf.float32)
 89 |                 patches = tf.nn.conv2d(intensities, weights, strides=[1, 1, 1, 1], padding='SAME')
 90 | 
 91 |                 transf = patches - intensities
 92 |                 transf_norm = transf / tf.sqrt(0.81 + tf.square(transf))
 93 |                 return transf_norm
 94 | 
 95 |             def _hamming_distance(t1, t2):
 96 |                 dist = tf.square(t1 - t2)
 97 |                 dist_norm = dist / (0.1 + dist)
 98 |                 dist_sum = tf.reduce_sum(dist_norm, 3, keep_dims=True)
 99 |                 return dist_sum
100 | 
101 |         t1 = _ternary_transform(im1)
102 |         t2 = _ternary_transform(im2_warped)
103 |         dist = _hamming_distance(t1, t2)
104 | 
105 |         transform_mask = self.create_mask(valid_mask, [[max_distance, max_distance], [max_distance, max_distance]])
106 |         return self.charbonnier_loss(dist, valid_mask * transform_mask), dist
107 | 
108 |     def charbonnier_loss(self, x, mask=None, truncate=None, alpha=0.45, beta=1.0, epsilon=0.001):
109 |         with tf.variable_scope('charbonnier_loss'):
110 |             batch, height, width, channels = tf.unstack(tf.shape(x))
111 |             normalization = tf.cast(batch * height * width * channels, tf.float32)
112 | 
113 |             error = tf.pow(tf.square(x * beta) + tf.square(epsilon), alpha)
114 | 
115 |             if mask is not None:
116 |                 error = tf.multiply(mask, error)
117 |             if truncate is not None:
118 |                 error = tf.minimum(error, truncate)
119 | 
120 |             return tf.reduce_sum(error) / normalization
121 | 
122 |     def create_mask(self, tensor, paddings):
123 |         with tf.variable_scope('create_mask'):
124 |             shape = tf.shape(tensor)
125 |             inner_width = shape[1] - (paddings[0][0] + paddings[0][1])
126 |             inner_height = shape[2] - (paddings[1][0] + paddings[1][1])
127 |             inner = tf.ones([inner_width, inner_height])
128 | 
129 |             mask2d = tf.pad(inner, paddings)
130 |             mask3d = tf.tile(tf.expand_dims(mask2d, 0), [shape[0], 1, 1])
131 |             mask4d = tf.expand_dims(mask3d, 3)
132 |             return tf.stop_gradient(mask4d)
133 | 
134 |     # Credit: https://github.com/mrharicot/monodepth/blob/master/monodepth_model.py
135 |     def SSIM(self, x, y):
136 |         C1 = 0.01 ** 2
137 |         C2 = 0.03 ** 2
138 | 
139 |         mu_x = slim.avg_pool2d(x, 3, 1, 'VALID')
140 |         mu_y = slim.avg_pool2d(y, 3, 1, 'VALID')
141 | 
142 |         sigma_x  = slim.avg_pool2d(x ** 2, 3, 1, 'VALID') - mu_x ** 2
143 |         sigma_y  = slim.avg_pool2d(y ** 2, 3, 1, 'VALID') - mu_y ** 2
144 |         sigma_xy = slim.avg_pool2d(x * y , 3, 1, 'VALID') - mu_x * mu_y
145 | 
146 |         SSIM_n = (2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2)
147 |         SSIM_d = (mu_x ** 2 + mu_y ** 2 + C1) * (sigma_x + sigma_y + C2)
148 |         SSIM = SSIM_n / SSIM_d
149 | 
150 |         return tf.clip_by_value((1 - SSIM) / 2, 0, 1)
151 | 
152 |     def compute_edge_aware_smooth_loss(self, pred_disp, img):
153 |         """
154 |         Edge-aware L1-norm on first-order gradient
155 |         """
156 |         def gradient(pred):
157 |             D_dx = -pred[:, :, 1:, :] + pred[:, :, :-1, :]
158 |             D_dy = -pred[:, 1:, :, :] + pred[:, :-1, :, :]
159 |             return D_dx, D_dy
160 |         img_dx, img_dy = gradient(img)
161 |         disp_dx, disp_dy = gradient(pred_disp)
162 | 
163 |         weight_x = tf.exp(-tf.reduce_mean(tf.abs(img_dx), 3, keep_dims=True))
164 |         weight_y = tf.exp(-tf.reduce_mean(tf.abs(img_dy), 3, keep_dims=True))
165 | 
166 |         loss = tf.reduce_mean(weight_x*tf.abs(disp_dx)) + tf.reduce_mean(weight_y*tf.abs(disp_dy))
167 |         return loss
168 | 
169 |     def compute_smooth_loss(self, pred_disp):
170 |         """
171 |         L1-norm on second-order gradient
172 |         """
173 |         def gradient(pred):
174 |             D_dy = pred[:, 1:, :, :] - pred[:, :-1, :, :]
175 |             D_dx = pred[:, :, 1:, :] - pred[:, :, :-1, :]
176 |             return D_dx, D_dy
177 |         dx, dy = gradient(pred_disp)
178 |         dx2, dxdy = gradient(dx)
179 |         dydx, dy2 = gradient(dy)
180 |         return tf.reduce_mean(tf.abs(dx2)) + \
181 |                tf.reduce_mean(tf.abs(dxdy)) + \
182 |                tf.reduce_mean(tf.abs(dydx)) + \
183 |                tf.reduce_mean(tf.abs(dy2))
184 | 
185 |     def flow_to_image_tf(self, flow):
186 |         im_stack = []
187 |         for i in range(self.opt.batch_size//self.opt.num_gpus):
188 |             temp = tf.py_func(flow_to_image, [flow[i,:,:,:]], tf.uint8)
189 |             im_stack.append(temp)
190 |         return tf.stack(im_stack, axis=0)
191 | 
192 |     # Credit: https://github.com/yzcjtr/GeoNet/blob/master/geonet_model.py
193 |     def spatial_normalize(self, disp):
194 |         _, curr_h, curr_w, curr_c = disp.get_shape().as_list()
195 |         disp_mean = tf.reduce_mean(disp, axis=[1,2,3], keep_dims=True)
196 |         disp_mean = tf.tile(disp_mean, [1, curr_h, curr_w, curr_c])
197 |         return disp/disp_mean
198 | 
199 |     def build_depth_test_graph(self):
200 |         input_uint8 = tf.placeholder(tf.uint8, [self.batch_size, 
201 |                     self.img_height, self.img_width, 3], name='raw_input')
202 |         input_mc = self.preprocess_image(input_uint8)
203 |         with tf.name_scope("depth_prediction"):
204 |             pred_disp, depth_net_endpoints = disp_net_res50(
205 |                 input_mc, is_training=False)
206 |             pred_depth = [1./disp for disp in pred_disp]
207 |         pred_depth = pred_depth[0]
208 |         self.inputs = input_uint8
209 |         self.pred_depth = pred_depth
210 |         self.depth_epts = depth_net_endpoints
211 | 
212 |     # Forward-backward
213 |     def build_pose_fb_test_graph(self):
214 |         input_uint8 = tf.placeholder(tf.uint8, [self.batch_size, 
215 |             self.img_height, self.img_width * self.seq_length, 3], 
216 |             name='raw_input')
217 |         input_mc = self.preprocess_image(input_uint8)
218 |         loader = DataLoader()
219 |         tgt_image, src_image_stack = \
220 |             loader.batch_unpack_image_sequence(
221 |                 input_mc, self.img_height, self.img_width, self.num_source)
222 |         with tf.name_scope("pose_prediction"):
223 |             pred_poses, _ = pose_net_fb(
224 |                 tgt_image, src_image_stack, is_training=False)
225 |             self.inputs = input_uint8
226 |             self.pred_poses = pred_poses[:, :, :6]    # Only the first half is used
227 | 
228 |     def preprocess_image(self, image, is_dp=True):
229 |         # Assuming input image is uint8
230 |         image = tf.image.convert_image_dtype(image, dtype=tf.float32)
231 |         if is_dp:
232 |             return image * 2. -1.
233 |         else:
234 |             mean = [104.920005, 110.1753, 114.785955]
235 |             out = []
236 |             for i in range(0, int(image.shape[-1]), 3):
237 |                 r = image[:,:,:,i] - mean[0]/255.
238 |                 g = image[:,:,:,i+1] - mean[1]/255.
239 |                 b = image[:,:,:,i+2] - mean[2]/255.
240 |                 out += [r, g, b]
241 |             return tf.stack(out, axis=-1)
242 | 
243 |     def minus_imagenet_rgb(self, image):
244 |         mean = [122.7717, 115.9465, 102.9801]
245 |         image = tf.cast(image, tf.float32)
246 |         out = []
247 |         for i in range(0, int(image.shape[-1]), 3):
248 |             r = image[:,:,:,i] - mean[0]
249 |             g = image[:,:,:,i+1] - mean[1]
250 |             b = image[:,:,:,i+2] - mean[2]
251 |             out += [r, g, b]
252 |         return tf.stack(out, axis=-1)
253 | 
254 |     def deprocess_image(self, image, is_dp=True):
255 |         # Assuming input image is float32
256 |         if is_dp:
257 |             image = (image + 1.)/2.
258 |         else:
259 |             mean = [104.920005, 110.1753, 114.785955]
260 |             r = image[:,:,:,0] + mean[0]/255.
261 |             g = image[:,:,:,1] + mean[1]/255.
262 |             b = image[:,:,:,2] + mean[2]/255.
263 |             image = tf.stack([r, g, b], axis=-1)
264 |         return tf.image.convert_image_dtype(image, dtype=tf.uint8)
265 | 
266 |     def setup_inference(self, 
267 |                         img_height,
268 |                         img_width,
269 |                         mode,
270 |                         seq_length=3,
271 |                         batch_size=1):
272 |         self.img_height = img_height
273 |         self.img_width = img_width
274 |         self.mode = mode
275 |         self.batch_size = batch_size
276 |         if self.mode == 'depth':
277 |             self.build_depth_test_graph()
278 |         if self.mode == 'pose':
279 |             self.seq_length = seq_length
280 |             self.num_source = seq_length - 1
281 |             self.build_pose_fb_test_graph()
282 | 
283 |     def inference(self, inputs, sess, mode='depth'):
284 |         fetches = {}
285 |         if mode == 'depth':
286 |             fetches['depth'] = self.pred_depth
287 |         if mode == 'pose':
288 |             fetches['pose'] = self.pred_poses
289 |         results = sess.run(fetches, feed_dict={self.inputs:inputs})
290 |         return results
291 | 
292 |     def save(self, sess, checkpoint_dir, step):
293 |         model_name = 'model'
294 |         print(" [*] Saving checkpoint to %s..." % checkpoint_dir)
295 |         if step == 'latest':
296 |             self.saver.save(sess, 
297 |                             os.path.join(checkpoint_dir, model_name + '.latest'))
298 |         else:
299 |             self.saver.save(sess, 
300 |                             os.path.join(checkpoint_dir, model_name),
301 |                             global_step=step)
302 | 
303 | if __name__ == '__main__':
304 |     model = BaseLearner()
305 | 


--------------------------------------------------------------------------------
/core/UnFlow/__init__.py:
--------------------------------------------------------------------------------
1 | from .src import flownet
2 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/backward_warp_op.cc:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_THREADS
 2 | 
 3 | #include <memory>
 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 5 | #include "tensorflow/core/framework/op_kernel.h"
 6 | #include "tensorflow/core/framework/register_types.h"
 7 | #include "tensorflow/core/framework/tensor.h"
 8 | #include "tensorflow/core/framework/tensor_shape.h"
 9 | #include "tensorflow/core/framework/types.h"
10 | #include "tensorflow/core/lib/core/status.h"
11 | #include "tensorflow/core/platform/logging.h"
12 | #include "tensorflow/core/framework/op.h"
13 | #include "tensorflow/core/framework/shape_inference.h"
14 | #include "tensorflow/core/framework/common_shape_fns.h"
15 | 
16 | // TODO assert input flow channel count = 2, assert matching numbers in all other dims
17 | 
18 | typedef Eigen::GpuDevice GPUDevice;
19 | 
20 | using namespace tensorflow;
21 | 
22 | void BackwardWarp(const GPUDevice& d,
23 |                   typename TTypes<float, 4>::ConstTensor images,
24 |                   typename TTypes<float, 4>::ConstTensor flows,
25 |                   typename TTypes<float, 4>::Tensor output);
26 | 
27 | void BackwardWarpGrad(const GPUDevice& d,
28 |                       typename TTypes<float, 4>::ConstTensor input_grad,
29 |                       typename TTypes<float, 4>::ConstTensor input_images,
30 |                       typename TTypes<float, 4>::ConstTensor flows,
31 |                       typename TTypes<float, 4>::Tensor output_grad);
32 | 
33 | class BackwardWarpOp : public OpKernel {
34 | public:
35 |   explicit BackwardWarpOp(OpKernelConstruction* context) : OpKernel(context) {}
36 | 
37 |   void Compute(OpKernelContext* context) override {
38 |     const Tensor& input_images = context->input(0);
39 |     const Tensor& input_flows = context->input(1);
40 | 
41 |     Tensor* output_images = NULL;
42 |     OP_REQUIRES_OK(context, context->allocate_output(0, input_images.shape(),
43 |                                                      &output_images));
44 | 
45 |     typename TTypes<float, 4>::ConstTensor image_data = input_images.tensor<float, 4>();
46 |     typename TTypes<float, 4>::ConstTensor flow_data = input_flows.tensor<float, 4>();
47 |     typename TTypes<float, 4>::Tensor output_data = output_images->tensor<float, 4>();
48 | 
49 |     BackwardWarp(context->eigen_device<GPUDevice>(),
50 |                  image_data, flow_data, output_data);
51 |   }
52 | };
53 | 
54 | class BackwardWarpOpGrad : public OpKernel {
55 | public:
56 |   explicit BackwardWarpOpGrad(OpKernelConstruction* context) : OpKernel(context) {}
57 | 
58 |   void Compute(OpKernelContext* context) override {
59 |     const Tensor& input = context->input(0);
60 |     const Tensor& original_images = context->input(1);
61 |     const Tensor& original_flows = context->input(2);
62 | 
63 |     Tensor* output = NULL;
64 |     OP_REQUIRES_OK(context, context->allocate_output(0, original_flows.shape(),
65 |                                                      &output));
66 | 
67 |     typename TTypes<float, 4>::ConstTensor input_data = input.tensor<float, 4>();
68 |     typename TTypes<float, 4>::ConstTensor flow_data = original_flows.tensor<float, 4>();
69 |     typename TTypes<float, 4>::ConstTensor image_data = original_images.tensor<float, 4>();
70 |     typename TTypes<float, 4>::Tensor output_data = output->tensor<float, 4>();
71 | 
72 |     BackwardWarpGrad(context->eigen_device<GPUDevice>(),
73 |                      input_data, image_data, flow_data, output_data);
74 |   }
75 | };
76 | 
77 | REGISTER_OP("BackwardWarp")
78 |   .Input("images: float")
79 |   .Input("flows: float")
80 |   .Output("warped_images: float")
81 |   .SetShapeFn(shape_inference::UnchangedShape);
82 | 
83 | REGISTER_OP("BackwardWarpGrad")
84 |   .Input("grads: float")
85 |   .Input("original_images: float")
86 |   .Input("original_flows: float")
87 |   .Output("output: float")
88 |   .SetShapeFn([](shape_inference::InferenceContext* c) {
89 |     c->set_output(0, c->input(2));
90 |     return Status::OK();
91 |   });
92 | 
93 | #if GOOGLE_CUDA
94 | 
95 | REGISTER_KERNEL_BUILDER(Name("BackwardWarp").Device(DEVICE_GPU), BackwardWarpOp);
96 | REGISTER_KERNEL_BUILDER(Name("BackwardWarpGrad").Device(DEVICE_GPU), BackwardWarpOpGrad);
97 | 
98 | #endif // GOOGLE_CUDA
99 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/backward_warp_op.cu.cc:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #define EIGEN_USE_GPU
  4 | 
  5 | #include "tensorflow/core/framework/register_types.h"
  6 | #include "tensorflow/core/framework/tensor_types.h"
  7 | #include "tensorflow/core/platform/types.h"
  8 | #include "tensorflow/core/util/cuda_kernel_helper.h"
  9 | 
 10 | using namespace tensorflow;
 11 | 
 12 | typedef Eigen::GpuDevice GPUDevice;
 13 | 
 14 | __global__ void BackwardWarpKernel(const int32 nthreads,
 15 |                                    const float* images, const float* flows,
 16 |                                    int batch, int height, int width, int channels,
 17 |                                    float* output) {
 18 |   CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
 19 |     // out_idx = src_x + width * (src_y + height * b)
 20 |     int idx = out_idx;
 21 |     const int src_x = idx % width;
 22 |     idx /= width;
 23 |     const int src_y = idx % height;
 24 |     const int b = idx / height;
 25 | 
 26 |     const int flow_index = out_idx * 2;
 27 |     const float x = src_x + flows[flow_index];
 28 |     const float y = src_y + flows[flow_index + 1];
 29 | 
 30 |     const int x0 = floorf(x);
 31 |     const int x1 = x0 + 1;
 32 |     const int y0 = floorf(y);
 33 |     const int y1 = y0 + 1;
 34 | 
 35 |     const float w_right = x - x0;
 36 |     const float w_left = x1 - x;
 37 |     const float w_bottom = y - y0;
 38 |     const float w_top = y1 - y;
 39 | 
 40 |     for(int c = 0; c < channels; ++c) {
 41 |       float sum = 0.0;
 42 | 
 43 | #define IMG(iy, ix) images[c + channels * (ix + width * (iy + height * b))]
 44 | 
 45 |       // top-left neighbor
 46 | 	  	if(x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) {
 47 | 	  		sum += w_left * w_top * IMG(y0, x0);
 48 | 	  	}
 49 | 
 50 |       // top-right neigbor
 51 | 	  	if(x1 >= 0 && x1 < width && y0 >= 0 && y0 < height) {
 52 | 	  		sum += w_right * w_top * IMG(y0, x1);
 53 | 	  	}
 54 | 
 55 |       // bottom-left neighbor
 56 | 	  	if(x0 >= 0 && x0 < width && y1 >= 0 && y1 < height) {
 57 | 	  		sum += w_left * w_bottom * IMG(y1, x0);
 58 | 	  	}
 59 | 
 60 |       // bottom-right neighbor
 61 | 	  	if(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height) {
 62 | 	  		sum += w_right * w_bottom * IMG(y1, x1);
 63 | 	  	}
 64 | #undef IMG
 65 |       output[out_idx * channels + c] = sum;
 66 |     }
 67 |   }
 68 | }
 69 | 
 70 | __global__ void BackwardWarpGradKernel(const int32 nthreads,
 71 |                                        const float* input_grad,
 72 |                                        const float* input_images, const float* flows,
 73 |                                        int batch, int height, int width, int channels,
 74 |                                        float* output_grad) {
 75 |   CUDA_1D_KERNEL_LOOP(in_idx, nthreads) {
 76 |     // in_idx =  x + width * (y + height * b)
 77 |     int idx = in_idx;
 78 |     const int src_x = idx % width;
 79 |     idx /= width;
 80 |     const int src_y = idx % height;
 81 |     const int b = idx / height;
 82 | 
 83 |     const int flow_index = in_idx * 2;
 84 |     const float x = src_x + flows[flow_index];
 85 |     const float y = src_y + flows[flow_index + 1];
 86 | 
 87 |     const int x0 = floorf(x);
 88 |     const int x1 = x0 + 1;
 89 |     const int y0 = floorf(y);
 90 |     const int y1 = y0 + 1;
 91 | 
 92 |     const float w_right = x - x0;
 93 |     const float w_left = x1 - x;
 94 |     const float w_bottom = y - y0;
 95 |     const float w_top = y1 - y;
 96 | 
 97 |     float du = 0.0;
 98 |     float dv = 0.0;
 99 | 
100 |     for(int c = 0; c < channels; ++c) {
101 |       float px;
102 |       float din = input_grad[c + channels * in_idx];
103 | 
104 | #define IMG(iy, ix) input_images[c + channels * (ix + width * (iy + height * b))]
105 | 
106 |       // top-left neighbor
107 |       if(x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) {
108 |         px = IMG(y0, x0) * din;
109 |         du -= w_top * px;
110 |         dv -= w_left * px;
111 |       }
112 | 
113 |       // top-right neigbor
114 |       if(x1 >= 0 && x1 < width && y0 >= 0 && y0 < height) {
115 |         px = IMG(y0, x1) * din;
116 |         du += w_top * px;
117 |         dv -= w_right * px;
118 |       }
119 | 
120 |       // bottom-left neighbor
121 |       if(x0 >= 0 && x0 < width && y1 >= 0 && y1 < height) {
122 |         px = IMG(y1, x0) * din;
123 |         du -= w_bottom * px;
124 |         dv += w_left * px;
125 |       }
126 | 
127 |       // bottom-right neighbor
128 |       if(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height) {
129 |         px = IMG(y1, x1) * din;
130 |         du += w_bottom * px;
131 |         dv += w_right * px;
132 |       }
133 | #undef IMG
134 |     }
135 |     output_grad[in_idx * 2] = du;
136 |     output_grad[in_idx * 2 + 1] = dv;
137 |   }
138 | }
139 | 
140 | void BackwardWarp(const GPUDevice& d,
141 |                   typename TTypes<float, 4>::ConstTensor images,
142 |                   typename TTypes<float, 4>::ConstTensor flows,
143 |                   typename TTypes<float, 4>::Tensor output) {
144 |   const int batch = images.dimension(0);
145 |   const int height = images.dimension(1);
146 |   const int width = images.dimension(2);
147 |   const int channels = images.dimension(3);
148 | 
149 |   const int total_count = batch * height * width;
150 |   if (total_count == 0) return;
151 | 
152 |   CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
153 |   BackwardWarpKernel
154 |     <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
155 |       config.virtual_thread_count, images.data(), flows.data(),
156 |       batch, height, width, channels,
157 |       output.data());
158 | }
159 | 
160 | void BackwardWarpGrad(const GPUDevice& d,
161 |                       typename TTypes<float, 4>::ConstTensor input_grad,
162 |                       typename TTypes<float, 4>::ConstTensor input_images,
163 |                       typename TTypes<float, 4>::ConstTensor flows,
164 |                       typename TTypes<float, 4>::Tensor output_grad) {
165 |   const int batch = input_grad.dimension(0);
166 |   const int height = input_grad.dimension(1);
167 |   const int width = input_grad.dimension(2);
168 |   const int channels = input_grad.dimension(3);
169 | 
170 |   int total_count;
171 |   CudaLaunchConfig config;
172 | 
173 |   // Initialize output_grad with all zeros.
174 |   total_count = batch * height * width;
175 |   if (total_count == 0) return;
176 |   config = GetCudaLaunchConfig(total_count, d);
177 |   SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
178 |       config.virtual_thread_count, output_grad.data());
179 | 
180 |   // Accumulate.
181 |   total_count = batch * height * width;
182 |   config = GetCudaLaunchConfig(total_count, d);
183 |   BackwardWarpGradKernel
184 |     <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
185 |       config.virtual_thread_count, input_grad.data(),
186 |       input_images.data(), flows.data(),
187 |       batch, height, width, channels,
188 |       output_grad.data());
189 | }
190 | 
191 | 
192 | #endif  // GOOGLE_CUDA
193 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/correlation_op.cc:
--------------------------------------------------------------------------------
  1 | #define EIGEN_USE_THREADS
  2 | 
  3 | #include <memory>
  4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
  5 | #include "tensorflow/core/framework/op_kernel.h"
  6 | #include "tensorflow/core/framework/register_types.h"
  7 | #include "tensorflow/core/framework/tensor.h"
  8 | #include "tensorflow/core/framework/tensor_shape.h"
  9 | #include "tensorflow/core/framework/types.h"
 10 | #include "tensorflow/core/lib/core/status.h"
 11 | #include "tensorflow/core/platform/logging.h"
 12 | #include "tensorflow/core/framework/op.h"
 13 | #include "tensorflow/core/framework/shape_inference.h"
 14 | #include "tensorflow/core/framework/common_shape_fns.h"
 15 | 
 16 | #include "correlation_op.h"
 17 | 
 18 | typedef Eigen::GpuDevice GPUDevice;
 19 | 
 20 | using namespace tensorflow;
 21 | 
 22 | void Correlation(const GPUDevice& d,
 23 |                  typename TTypes<float, 4>::ConstTensor input_0,
 24 |                  typename TTypes<float, 4>::ConstTensor input_1,
 25 |                  typename TTypes<float, 4>::Tensor output,
 26 |                  typename TTypes<float, 4>::Tensor padded_0,
 27 |                  typename TTypes<float, 4>::Tensor padded_1,
 28 |                  CorrelationState params);
 29 | 
 30 | void CorrelationGrad(const GPUDevice& d,
 31 |                      typename TTypes<float, 4>::ConstTensor input_grad,
 32 |                      typename TTypes<float, 4>::ConstTensor padded_0,
 33 |                      typename TTypes<float, 4>::ConstTensor padded_1,
 34 |                      typename TTypes<float, 4>::Tensor output_grad_0,
 35 |                      typename TTypes<float, 4>::Tensor output_grad_1,
 36 |                      CorrelationState params);
 37 | 
 38 | class CorrelationOp : public OpKernel {
 39 | public:
 40 |   explicit CorrelationOp(OpKernelConstruction* context)
 41 |   : OpKernel(context), attrs(context) {}
 42 | 
 43 |   void Compute(OpKernelContext* context) override {
 44 |     const Tensor& input_0 = context->input(0);
 45 |     const Tensor& input_1 = context->input(1);
 46 | 
 47 |     OP_REQUIRES(context, input_0.shape() == input_1.shape(),
 48 |                 errors::InvalidArgument("Input shapes have to be the same"));
 49 | 
 50 |     typename TTypes<float, 4>::ConstTensor input_0_data = input_0.tensor<float, 4>();
 51 |     typename TTypes<float, 4>::ConstTensor input_1_data = input_1.tensor<float, 4>();
 52 | 
 53 |     const int batch = input_0_data.dimension(0);
 54 |     const int in_channels = input_0_data.dimension(1);
 55 |     const int in_height = input_0_data.dimension(2);
 56 |     const int in_width = input_0_data.dimension(3);
 57 | 
 58 |     CorrelationState st(attrs, in_height, in_width, in_channels);
 59 | 
 60 |     OP_REQUIRES(context, st.out_width * st.out_height > 0,
 61 |                 errors::InvalidArgument("Invalid correlation settings"));
 62 | 
 63 |     Tensor* output = NULL;
 64 |     TensorShape output_shape({batch, st.out_channels, st.out_height, st.out_width});
 65 |     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
 66 | 
 67 |     Tensor* padded_0 = NULL;
 68 |     Tensor* padded_1 = NULL;
 69 |     TensorShape padded_shape({batch, st.padded_height, st.padded_width, in_channels});
 70 |     OP_REQUIRES_OK(context, context->allocate_output(1, padded_shape, &padded_0));
 71 |     OP_REQUIRES_OK(context, context->allocate_output(2, padded_shape, &padded_1));
 72 | 
 73 |     typename TTypes<float, 4>::Tensor output_data = output->tensor<float, 4>();
 74 |     typename TTypes<float, 4>::Tensor padded_0_data = padded_0->tensor<float, 4>();
 75 |     typename TTypes<float, 4>::Tensor padded_1_data = padded_1->tensor<float, 4>();
 76 | 
 77 |     Correlation(context->eigen_device<GPUDevice>(),
 78 |                 input_0_data, input_1_data, output_data,
 79 |                 padded_0_data, padded_1_data,
 80 |                 st);
 81 |   }
 82 | 
 83 | private:
 84 |   CorrelationAttrs attrs;
 85 | };
 86 | 
 87 | class CorrelationOpGrad : public OpKernel {
 88 | public:
 89 |   explicit CorrelationOpGrad(OpKernelConstruction* context)
 90 |   : OpKernel(context), attrs(context) {}
 91 | 
 92 |   void Compute(OpKernelContext* context) override {
 93 |     const Tensor& input_grad = context->input(0);
 94 |     const Tensor& input_0 = context->input(1);
 95 |     const Tensor& input_1 = context->input(2);
 96 |     const Tensor& padded_0 = context->input(3);
 97 |     const Tensor& padded_1 = context->input(4);
 98 | 
 99 |     typename TTypes<float, 4>::ConstTensor input_grad_data = input_grad.tensor<float, 4>();
100 |     typename TTypes<float, 4>::ConstTensor input_0_data = input_0.tensor<float, 4>();
101 |     //typename TTypes<float, 4>::ConstTensor input_1_data = input_1.tensor<float, 4>();
102 |     typename TTypes<float, 4>::ConstTensor padded_0_data = padded_0.tensor<float, 4>();
103 |     typename TTypes<float, 4>::ConstTensor padded_1_data = padded_1.tensor<float, 4>();
104 | 
105 |     const int in_channels = input_0_data.dimension(1);
106 |     const int in_height = input_0_data.dimension(2);
107 |     const int in_width = input_0_data.dimension(3);
108 | 
109 |     CorrelationState st(attrs, in_height, in_width, in_channels);
110 | 
111 |     Tensor* output_grad_0 = NULL;
112 |     OP_REQUIRES_OK(context, context->allocate_output(0, input_0.shape(),
113 |                                                      &output_grad_0));
114 |     Tensor* output_grad_1 = NULL;
115 |     OP_REQUIRES_OK(context, context->allocate_output(1, input_0.shape(),
116 |                                                      &output_grad_1));
117 | 
118 |     typename TTypes<float, 4>::Tensor output_grad_0_data = output_grad_0->tensor<float, 4>();
119 |     typename TTypes<float, 4>::Tensor output_grad_1_data = output_grad_1->tensor<float, 4>();
120 | 
121 |     CorrelationGrad(context->eigen_device<GPUDevice>(),
122 |                     input_grad_data,
123 |                     padded_0_data, padded_1_data,
124 |                     output_grad_0_data, output_grad_1_data,
125 |                     st);
126 |   }
127 | private:
128 |   CorrelationAttrs attrs;
129 | };
130 | 
131 | using shape_inference::DimensionHandle;;
132 | 
133 | REGISTER_OP("Correlation")
134 |   .Input("input_0: float")
135 |   .Input("input_1: float")
136 |   .Attr("kernel_size: int = 1")
137 |   .Attr("max_displacement: int = 20")
138 |   .Attr("pad: int = 20")
139 |   .Attr("stride_1: int = 1")
140 |   .Attr("stride_2: int = 2")
141 |   .Output("correlation: float")
142 |   .Output("padded_0: float")
143 |   .Output("padded_1: float")
144 |   .SetShapeFn([](shape_inference::InferenceContext* c) {
145 |     CorrelationAttrs attrs;
146 |     c->GetAttr("kernel_size", &attrs.kernel_size);
147 |     c->GetAttr("max_displacement", &attrs.max_displacement);
148 |     c->GetAttr("pad", &attrs.pad_size);
149 |     c->GetAttr("stride_1", &attrs.stride_1);
150 |     c->GetAttr("stride_2", &attrs.stride_2);
151 | 
152 |     DimensionHandle batch = c->Dim(c->input(0), 0);
153 | 
154 |     //padded_height = in_height + 2 * pad_size;
155 |     //padded_width = in_width + 2 * pad_size;
156 |     //kernel_radius = (kernel_size - 1) / 2;
157 |     //border_size = max_displacement + kernel_radius;
158 |     int neighborhood_grid_radius = attrs.max_displacement / attrs.stride_2;
159 |     int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
160 |     //out_width = ceil((float)(padded_width - border_size *2) / (float)stride_1);
161 |     //out_height = ceil((float)(padded_height - border_size *2) / (float)stride_1);
162 |     int out_channels = neighborhood_grid_width * neighborhood_grid_width;
163 | 
164 |     // TODO: support passing on output width and height
165 | 
166 |     c->set_output(0, c->MakeShape({batch, out_channels, c->UnknownDim(), c->UnknownDim()}));
167 |     return Status::OK();
168 |   });
169 | 
170 | REGISTER_OP("CorrelationGrad")
171 |   .Input("input_grad: float")
172 |   .Input("original_input_0: float")
173 |   .Input("original_input_1: float")
174 |   .Input("padded_0: float")
175 |   .Input("padded_1: float")
176 |   .Attr("kernel_size: int = 1")
177 |   .Attr("max_displacement: int = 20")
178 |   .Attr("pad: int = 20")
179 |   .Attr("stride_1: int = 1")
180 |   .Attr("stride_2: int = 2")
181 |   .Output("output_grad_0: float")
182 |   .Output("output_grad_1: float")
183 |   .SetShapeFn([](shape_inference::InferenceContext* c) {
184 |     c->set_output(0, c->input(1));
185 |     c->set_output(1, c->input(2));
186 |     return Status::OK();
187 |   });
188 | 
189 | #if GOOGLE_CUDA
190 | 
191 | REGISTER_KERNEL_BUILDER(Name("Correlation").Device(DEVICE_GPU), CorrelationOp);
192 | REGISTER_KERNEL_BUILDER(Name("CorrelationGrad").Device(DEVICE_GPU), CorrelationOpGrad);
193 | 
194 | #endif // GOOGLE_CUDA
195 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/correlation_op.h:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_THREADS
 2 | 
 3 | #include "tensorflow/core/framework/op_kernel.h"
 4 | #include "tensorflow/core/framework/op.h"
 5 | 
 6 | using namespace tensorflow;
 7 | 
 8 | struct CorrelationAttrs {
 9 |   CorrelationAttrs(OpKernelConstruction* c) {
10 |     OP_REQUIRES_OK(c, c->GetAttr("kernel_size", &kernel_size));
11 |     OP_REQUIRES_OK(c, c->GetAttr("max_displacement", &max_displacement));
12 |     OP_REQUIRES_OK(c, c->GetAttr("pad", &pad_size));
13 |     OP_REQUIRES_OK(c, c->GetAttr("stride_1", &stride_1));
14 |     OP_REQUIRES_OK(c, c->GetAttr("stride_2", &stride_2));
15 | 
16 |     OP_REQUIRES(c, kernel_size % 2 != 0,
17 |                 errors::InvalidArgument("kernel_size must be odd"));
18 |   }
19 |   CorrelationAttrs() {}
20 | 
21 |   int pad_size;
22 |   int stride_1;
23 |   int stride_2;
24 |   int max_displacement;
25 |   int kernel_size;
26 | };
27 | 
28 | struct CorrelationState {
29 |   CorrelationState(CorrelationAttrs attrs, int in_height, int in_width, int in_channels) {
30 |     pad_size = attrs.pad_size;
31 |     stride_1 = attrs.stride_1;
32 |     stride_2 = attrs.stride_2;
33 |     max_displacement = attrs.max_displacement;
34 |     kernel_size = attrs.kernel_size;
35 | 
36 |     padded_height = in_height + 2 * pad_size;
37 |     padded_width = in_width + 2 * pad_size;
38 | 
39 |     // Compute size of unreachable border region (on each side)
40 |     kernel_radius = (kernel_size - 1) / 2;
41 |     border_size = max_displacement + kernel_radius;
42 | 
43 |     // Given a center position in image 1, how many displaced positions in -x / +x
44 |     // direction do we consider in image 2 (neighborhoodGridWidth):
45 |     neighborhood_grid_radius = max_displacement / stride_2;
46 |     neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
47 | 
48 |     out_width = ceil((float)(padded_width - border_size *2) / (float)stride_1);
49 |     out_height = ceil((float)(padded_height - border_size *2) / (float)stride_1);
50 |     // Top Channels amount to displacement combinations in X and Y direction:
51 |     out_channels = neighborhood_grid_width * neighborhood_grid_width;
52 |   }
53 | 
54 |   int pad_size;
55 |   int stride_1;
56 |   int stride_2;
57 |   int kernel_radius;
58 |   int max_displacement;
59 |   int kernel_size;
60 |   int neighborhood_grid_radius;
61 |   int neighborhood_grid_width;
62 |   int padded_height;
63 |   int padded_width;
64 |   int border_size;
65 |   int out_height;
66 |   int out_width;
67 |   int out_channels;
68 | };
69 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/downsample_op.cc:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_THREADS
 2 | 
 3 | #include <memory>
 4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 5 | #include "tensorflow/core/framework/op_kernel.h"
 6 | #include "tensorflow/core/framework/register_types.h"
 7 | #include "tensorflow/core/framework/tensor.h"
 8 | #include "tensorflow/core/framework/tensor_shape.h"
 9 | #include "tensorflow/core/framework/types.h"
10 | #include "tensorflow/core/lib/core/status.h"
11 | #include "tensorflow/core/platform/logging.h"
12 | #include "tensorflow/core/framework/op.h"
13 | #include "tensorflow/core/framework/shape_inference.h"
14 | #include "tensorflow/core/framework/common_shape_fns.h"
15 | 
16 | using namespace tensorflow;
17 | 
18 | typedef Eigen::GpuDevice GPUDevice;
19 | 
20 | void Downsample(const GPUDevice& d,
21 |                 typename TTypes<float, 4>::ConstTensor images,
22 |                 typename TTypes<float, 4>::Tensor output);
23 | 
24 | class DownsampleOp : public OpKernel {
25 | public:
26 |   explicit DownsampleOp(OpKernelConstruction* c) : OpKernel(c) {
27 |     OP_REQUIRES_OK(c, c->GetAttr("scale", &scale));
28 |   }
29 | 
30 |   void Compute(OpKernelContext* context) override {
31 |     const Tensor& input = context->input(0);
32 | 
33 |     typename TTypes<float, 4>::ConstTensor input_data = input.tensor<float, 4>();
34 | 
35 | 
36 |     OP_REQUIRES(context,
37 |                 input_data.dimension(1) % scale == 0 &&
38 |                   input_data.dimension(2) % scale == 0,
39 |                 errors::InvalidArgument("Input height and width must be divisible by scale"));
40 | 
41 |     const int batch = input_data.dimension(0);
42 |     const int height = input_data.dimension(1) / scale;
43 |     const int width = input_data.dimension(2) / scale;
44 |     const int channels = input_data.dimension(3);
45 | 
46 |     auto output_shape = TensorShape({batch, height, width, channels});
47 | 
48 |     Tensor* output = NULL;
49 |     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
50 | 
51 |     typename TTypes<float, 4>::Tensor output_data = output->tensor<float, 4>();
52 | 
53 |     Downsample(context->eigen_device<GPUDevice>(), input_data, output_data);
54 |   }
55 | private:
56 |   int scale;
57 | };
58 | 
59 | using shape_inference::DimensionHandle;
60 | using shape_inference::ShapeHandle;
61 | 
62 | REGISTER_OP("Downsample")
63 |   .Input("images: float")
64 |   .Attr("scale: int = 2")
65 |   .Output("out_images: float")
66 |   .SetShapeFn([](shape_inference::InferenceContext* c) {
67 |     ShapeHandle in = c->input(0);
68 |     int scale;
69 |     DimensionHandle batch = c->Dim(in, 0);
70 |     DimensionHandle channels = c->Dim(in, 3);
71 |     DimensionHandle height;
72 |     DimensionHandle width;
73 | 
74 |     c->GetAttr("scale", &scale);
75 |     c->Divide(c->Dim(in, 1), scale, true, &height);
76 |     c->Divide(c->Dim(in, 2), scale, true, &width);
77 | 
78 |     c->set_output(0, c->MakeShape({batch, height, width, channels}));
79 |     return Status::OK();
80 |   });
81 | 
82 | #if GOOGLE_CUDA
83 | 
84 | REGISTER_KERNEL_BUILDER(Name("Downsample").Device(DEVICE_GPU), DownsampleOp);
85 | 
86 | #endif // GOOGLE_CUDA
87 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/downsample_op.cu.cc:
--------------------------------------------------------------------------------
 1 | #if GOOGLE_CUDA
 2 | 
 3 | #define EIGEN_USE_GPU
 4 | 
 5 | #include "tensorflow/core/framework/register_types.h"
 6 | #include "tensorflow/core/framework/tensor_types.h"
 7 | #include "tensorflow/core/platform/types.h"
 8 | #include "tensorflow/core/util/cuda_kernel_helper.h"
 9 | 
10 | using namespace tensorflow;
11 | 
12 | typedef Eigen::GpuDevice GPUDevice;
13 | 
14 | __global__ void DownsampleKernel(const int32 nthreads,
15 |                                  const float* images,
16 |                                  int batch, int in_height, int in_width, int channels,
17 |                                  int out_height, int out_width,
18 |                                  float* output) {
19 |   CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
20 |     // out_idx = x + out_width * (y + out_height * b)
21 |     int idx = out_idx;
22 |     const int c = idx % channels;
23 |     idx /= channels;
24 |     const int x = idx % out_width;
25 |     idx /= out_width;
26 |     const int y = idx % out_height;
27 |     const int b = idx / out_height;
28 | 
29 |     const int scale_y = in_height / out_height;
30 |     const int scale_x = in_width/ out_width;
31 | 
32 |     const int min_in_y = y * scale_y;
33 |     const int min_in_x = x * scale_x;
34 |     const int max_in_y = min_in_y + scale_y;
35 |     const int max_in_x = min_in_x + scale_x;
36 | 
37 |     float sum = 0.0;
38 | 
39 |     for(int in_y = min_in_y; in_y < max_in_y; ++in_y) {
40 |       for(int in_x = min_in_x; in_x < max_in_x; ++in_x) {
41 |         sum += images[c + channels * (in_x + in_width * (in_y + in_height * b))];
42 |       }
43 |     }
44 | 
45 |     sum /= scale_x * scale_y;
46 |     output[c + channels * (x + out_width * (y + out_height * b))] = sum;
47 |   }
48 | }
49 | 
50 | void Downsample(const GPUDevice& d,
51 |                 typename TTypes<float, 4>::ConstTensor images,
52 |                 typename TTypes<float, 4>::Tensor output) {
53 |   const int batch = images.dimension(0);
54 |   const int in_height = images.dimension(1);
55 |   const int in_width = images.dimension(2);
56 |   const int channels = images.dimension(3);
57 | 
58 |   const int out_height = output.dimension(1);
59 |   const int out_width = output.dimension(2);
60 | 
61 |   const int total_count = batch * out_height * out_width * channels;
62 |   if (total_count == 0) return;
63 | 
64 |   CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
65 |   DownsampleKernel
66 |     <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
67 |       config.virtual_thread_count, images.data(),
68 |       batch, in_height, in_width, channels,
69 |       out_height, out_width,
70 |       output.data());
71 | }
72 | 
73 | #endif  // GOOGLE_CUDA
74 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/forward_warp_op.cc:
--------------------------------------------------------------------------------
  1 | #define EIGEN_USE_THREADS
  2 | 
  3 | #include <memory>
  4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
  5 | #include "tensorflow/core/framework/op_kernel.h"
  6 | #include "tensorflow/core/framework/register_types.h"
  7 | #include "tensorflow/core/framework/tensor.h"
  8 | #include "tensorflow/core/framework/tensor_shape.h"
  9 | #include "tensorflow/core/framework/types.h"
 10 | #include "tensorflow/core/lib/core/status.h"
 11 | #include "tensorflow/core/platform/logging.h"
 12 | #include "tensorflow/core/framework/op.h"
 13 | #include "tensorflow/core/framework/shape_inference.h"
 14 | #include "tensorflow/core/framework/common_shape_fns.h"
 15 | 
 16 | // TODO assert input flow channel count = 2, assert matching numbers in all other dims
 17 | 
 18 | typedef Eigen::ThreadPoolDevice CPUDevice;
 19 | typedef Eigen::GpuDevice GPUDevice;
 20 | 
 21 | using namespace tensorflow;
 22 | 
 23 | void ForwardWarp(const GPUDevice& d,
 24 |                  typename TTypes<float, 4>::ConstTensor input,
 25 |                  typename TTypes<float, 4>::Tensor output);
 26 | 
 27 | void ForwardWarpGrad(const GPUDevice& d,
 28 |                      typename TTypes<float, 4>::ConstTensor input_grad,
 29 |                      typename TTypes<float, 4>::ConstTensor original_input,
 30 |                      typename TTypes<float, 4>::Tensor output_grad);
 31 | 
 32 | class ForwardWarpOp : public OpKernel {
 33 | public:
 34 |   explicit ForwardWarpOp(OpKernelConstruction* context) : OpKernel(context) {}
 35 | 
 36 |   void Compute(OpKernelContext* context) override {
 37 |     const Tensor& input = context->input(0);
 38 | 
 39 |     typename TTypes<float, 4>::ConstTensor input_data = input.tensor<float, 4>();
 40 | 
 41 |     const int batch = input_data.dimension(0);
 42 |     const int height = input_data.dimension(1);
 43 |     const int width = input_data.dimension(2);
 44 |     const int channels = input_data.dimension(3);
 45 | 
 46 |     auto output_shape = TensorShape({batch, height, width, 1});
 47 |     Tensor* output = NULL;
 48 |     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape,
 49 |                                                      &output));
 50 |     typename TTypes<float, 4>::Tensor output_data = output->tensor<float, 4>();
 51 | 
 52 |     ForwardWarp(context->eigen_device<GPUDevice>(),
 53 |                 input_data, output_data);
 54 |   }
 55 | };
 56 | 
 57 | class ForwardWarpOpGrad : public OpKernel {
 58 | public:
 59 |   explicit ForwardWarpOpGrad(OpKernelConstruction* context) : OpKernel(context) {}
 60 | 
 61 |   void Compute(OpKernelContext* context) override {
 62 |     const Tensor& input = context->input(0);
 63 |     const Tensor& original_input = context->input(1);
 64 | 
 65 |     Tensor* output = NULL;
 66 |     OP_REQUIRES_OK(context, context->allocate_output(0, original_input.shape(),
 67 |                                                      &output));
 68 | 
 69 |     typename TTypes<float, 4>::ConstTensor input_data = input.tensor<float, 4>();
 70 |     typename TTypes<float, 4>::ConstTensor original_data = original_input.tensor<float, 4>();
 71 |     typename TTypes<float, 4>::Tensor output_data = output->tensor<float, 4>();
 72 | 
 73 |     ForwardWarpGrad(context->eigen_device<GPUDevice>(),
 74 |                      input_data, original_data, output_data);
 75 |   }
 76 | };
 77 | 
 78 | using shape_inference::DimensionHandle;
 79 | using shape_inference::ShapeHandle;
 80 | 
 81 | REGISTER_OP("ForwardWarp")
 82 |   .Input("flows: float")
 83 |   .Output("output: float")
 84 |   .SetShapeFn([](shape_inference::InferenceContext* c) {
 85 |     ShapeHandle in = c->input(0);
 86 |     DimensionHandle batch = c->Dim(in, 0);
 87 |     DimensionHandle height = c->Dim(in, 1);
 88 |     DimensionHandle width = c->Dim(in, 2);
 89 |     c->set_output(0, c->MakeShape({batch, height, width, 1}));
 90 |     return Status::OK();
 91 |   });
 92 | 
 93 | REGISTER_OP("ForwardWarpGrad")
 94 |   .Input("grads: float")
 95 |   .Input("original_flows: float")
 96 |   .Output("output: float")
 97 |   .SetShapeFn([](shape_inference::InferenceContext* c) {
 98 |     c->set_output(0, c->input(1));
 99 |     return Status::OK();
100 |   });
101 | 
102 | #if GOOGLE_CUDA
103 | 
104 | REGISTER_KERNEL_BUILDER(Name("ForwardWarp").Device(DEVICE_GPU), ForwardWarpOp);
105 | REGISTER_KERNEL_BUILDER(Name("ForwardWarpGrad").Device(DEVICE_GPU), ForwardWarpOpGrad);
106 | 
107 | #endif // GOOGLE_CUDA
108 | 


--------------------------------------------------------------------------------
/core/UnFlow/ops/forward_warp_op.cu.cc:
--------------------------------------------------------------------------------
  1 | #if GOOGLE_CUDA
  2 | 
  3 | #define EIGEN_USE_GPU
  4 | 
  5 | #include "tensorflow/core/framework/register_types.h"
  6 | #include "tensorflow/core/framework/tensor_types.h"
  7 | #include "tensorflow/core/platform/types.h"
  8 | #include "tensorflow/core/util/cuda_kernel_helper.h"
  9 | 
 10 | using namespace tensorflow;
 11 | 
 12 | #define gauss(x, y, std)
 13 | 
 14 | typedef Eigen::GpuDevice GPUDevice;
 15 | 
 16 | __global__ void ForwardWarpKernel(const int32 nthreads,
 17 |                                   const float* flows,
 18 |                                   int batch, int height, int width,
 19 |                                   float* output) {
 20 |   CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
 21 |     // out_idx = x + width * (y + height * b)
 22 |     int idx = out_idx;
 23 |     const int src_x = idx % width;
 24 |     idx /= width;
 25 |     const int src_y = idx % height;
 26 |     const int b = idx / height;
 27 | 
 28 |     const int flow_index = out_idx * 2;
 29 |     const float target_x = src_x + flows[flow_index];
 30 |     const float target_y = src_y + flows[flow_index + 1];
 31 | 
 32 |     // Calculate distribution variance depending on similar neighbor flows
 33 |     // fixed variance for first tests!!
 34 | 
 35 |     // Compute valid neighbor range
 36 |     //int min_n_y = y + 2 > 0 ? floorf(pos_y) : 0;
 37 | 
 38 |     const float dist = 2.0;
 39 |     const float std = dist * 0.5;
 40 |     const int k = ceilf(dist + 2);
 41 |     // TODO variance different for x, y?
 42 | 
 43 |     // center pixel closest to mapping location
 44 |     //const int closest_x = roundf(target_x);
 45 |     //const int closest_y = roundf(target_y);
 46 |     if(floorf(target_x - k) < width && floorf(target_x + k) >= 0
 47 |         && floorf(target_y - k) < height && floorf(target_y + k) >= 0) {
 48 |       const int min_n_x = target_x - k > 0? floorf(target_x - k) : 0;
 49 |       const int min_n_y = target_y - k > 0? floorf(target_y - k) : 0;
 50 |       const int max_n_x = target_x + k < width? floorf(target_x + k) : width - 1;
 51 |       const int max_n_y = target_y + k < height? floorf(target_y + k) : height - 1;
 52 | 
 53 |       const float gauss_divisor = 2 * powf(std, 2);
 54 |       for(int n_x = min_n_x; n_x <= max_n_x; ++n_x) {
 55 |         for(int n_y = min_n_y; n_y <= max_n_y; ++n_y) {
 56 |           const float x = n_x - target_x;
 57 |           const float y = n_y - target_y;
 58 |           const float weight = expf(-(powf(x, 2) + powf(y, 2)) / gauss_divisor);
 59 |           CudaAtomicAdd(output + n_x + width * (n_y + height * b), weight);
 60 |         }
 61 |       }
 62 |     }
 63 | 
 64 |   }
 65 | }
 66 | 
 67 | __global__ void ForwardWarpGradKernel(const int32 nthreads,
 68 |                                       const float* input_grad, const float* flows,
 69 |                                       int batch, int height, int width,
 70 |                                       float* output_grad) {
 71 |   CUDA_1D_KERNEL_LOOP(in_idx, nthreads) {
 72 |     // in_idx =  x + width * (y + height * b)
 73 |     int idx = in_idx;
 74 |     const int src_x = idx % width;
 75 |     idx /= width;
 76 |     const int src_y = idx % height;
 77 |     const int b = idx / height;
 78 | 
 79 |     const int flow_index = in_idx * 2;
 80 |     const float target_x = src_x + flows[flow_index];
 81 |     const float target_y = src_y + flows[flow_index + 1];
 82 | 
 83 |     // Calculate distribution variance depending on similar neighbor flows
 84 |     // fixed variance for first tests!!
 85 | 
 86 |     // Compute valid neighbor range
 87 |     //int min_n_y = y + 2 > 0 ? floorf(pos_y) : 0;
 88 | 
 89 |     const float dist = 2.0;
 90 |     const float std = dist * 0.5;
 91 |     const int k = ceilf(dist + 2);
 92 |     // TODO variance different for x, y?
 93 | 
 94 |     // center pixel closest to mapping location
 95 |     //const int closest_x = roundf(target_x);
 96 |     //const int closest_y = roundf(target_y);
 97 |     float du = 0.0;
 98 |     float dv = 0.0;
 99 | 
100 |     if(floorf(target_x - k) < width && floorf(target_x + k) >= 0
101 |         && floorf(target_y - k) < height && floorf(target_y + k) >= 0) {
102 |       const int min_n_x = target_x - k > 0? floorf(target_x - k) : 0;
103 |       const int min_n_y = target_y - k > 0? floorf(target_y - k) : 0;
104 |       const int max_n_x = target_x + k < width? floorf(target_x + k) : width - 1;
105 |       const int max_n_y = target_y + k < height? floorf(target_y + k) : height - 1;
106 | 
107 |       const float gauss_divisor = 2 * powf(std, 2);
108 |       for(int n_x = min_n_x; n_x <= max_n_x; ++n_x) {
109 |         for(int n_y = min_n_y; n_y <= max_n_y; ++n_y) {
110 |           const float x = n_x - target_x;
111 |           const float y = n_y - target_y;
112 |           const float weight = expf(-(powf(x, 2) + powf(y, 2)) / gauss_divisor);
113 | 
114 |           const float din = input_grad[n_x + width * (n_y + height * b)];
115 |           const float factor = 2 * din * weight / gauss_divisor;
116 |           du += factor * x;
117 |           dv += factor * y;
118 |         }
119 |       }
120 |     }
121 | 
122 |     output_grad[flow_index] = du;
123 |     output_grad[flow_index + 1] = dv;
124 |   }
125 | }
126 | 
127 | void ForwardWarp(const GPUDevice& d,
128 |                  typename TTypes<float, 4>::ConstTensor flows,
129 |                  typename TTypes<float, 4>::Tensor output) {
130 |   const int batch = flows.dimension(0);
131 |   const int height = flows.dimension(1);
132 |   const int width = flows.dimension(2);
133 | 
134 |   const int total_count = batch * height * width;
135 |   if (total_count == 0) return;
136 | 
137 |   CudaLaunchConfig config;
138 | 
139 |   // Initialize output with all zeros.
140 |   config = GetCudaLaunchConfig(total_count, d);
141 |   SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
142 |       config.virtual_thread_count, output.data());
143 | 
144 |   config = GetCudaLaunchConfig(total_count, d);
145 |   ForwardWarpKernel
146 |     <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
147 |       config.virtual_thread_count, flows.data(),
148 |       batch, height, width,
149 |       output.data());
150 | }
151 | 
152 | void ForwardWarpGrad(const GPUDevice& d,
153 |                      typename TTypes<float, 4>::ConstTensor input_grad,
154 |                      typename TTypes<float, 4>::ConstTensor flows,
155 |                      typename TTypes<float, 4>::Tensor output_grad) {
156 |   const int batch = input_grad.dimension(0);
157 |   const int height = input_grad.dimension(1);
158 |   const int width = input_grad.dimension(2);
159 | 
160 |   int total_count = batch * height * width;
161 |   if (total_count == 0) return;
162 | 
163 |   // Initialize output_grad with all zeros.
164 |   CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
165 |   SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
166 |       config.virtual_thread_count, output_grad.data());
167 | 
168 |   // Accumulate.
169 |   config = GetCudaLaunchConfig(total_count, d);
170 |   ForwardWarpGradKernel
171 |     <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
172 |       config.virtual_thread_count, input_grad.data(), flows.data(),
173 |       batch, height, width,
174 |       output_grad.data());
175 | }
176 | 
177 | #endif  // GOOGLE_CUDA
178 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .e2eflow import flownet
2 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import flownet
2 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .flownet import flownet
2 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/augment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from .spatial_transformer import transformer
  5 | 
  6 | 
  7 | def random_affine(tensors, *,
  8 |                   max_translation_x=0.0, max_translation_y=0.0,
  9 |                   max_rotation=0.0, min_scale=1.0, max_scale=1.0,
 10 |                   horizontal_flipping=False):
 11 |     """Applies geometric augmentations to a list of tensors.
 12 | 
 13 |     Each element in the list is augmented in the same way.
 14 |     For all elements, num_batch must be equal while height, width and channels
 15 |     may differ.
 16 |     """
 17 |     def _deg2rad(deg):
 18 |         return (deg * np.pi) / 180.0
 19 | 
 20 |     with tf.variable_scope('random_affine'):
 21 |         num_batch = tf.shape(tensors[0])[0]
 22 | 
 23 |         zero = tf.zeros([num_batch])
 24 |         one = tf.ones([num_batch])
 25 | 
 26 |         tx = tf.random_uniform([num_batch], -max_translation_x, max_translation_x)
 27 |         ty = tf.random_uniform([num_batch], -max_translation_y, max_translation_y)
 28 |         rot = tf.random_uniform([num_batch], -max_rotation, max_rotation)
 29 |         rad = _deg2rad(rot)
 30 |         scale = tf.random_uniform([num_batch], min_scale, max_scale)
 31 | 
 32 |         t1 = [[tf.cos(rad), -tf.sin(rad), tx],
 33 |               [tf.sin(rad), tf.cos(rad), ty]]
 34 |         t1 = tf.transpose(t1, [2, 0, 1])
 35 | 
 36 |         scale_x = scale
 37 |         if horizontal_flipping:
 38 |             flip = tf.random_uniform([num_batch], 0, 1)
 39 |             flip = tf.where(tf.greater(flip, 0.5), -one, one)
 40 |             scale_x = scale_x * flip
 41 | 
 42 |         t2 = [[scale_x, zero, zero],
 43 |               [zero, scale, zero],
 44 |               [zero, zero, one]]
 45 |         t2 = tf.transpose(t2, [2, 0, 1])
 46 | 
 47 |         t = tf.matmul(t1, t2)
 48 | 
 49 |         out = []
 50 |         for tensor in tensors:
 51 |             shape = tf.shape(tensor)
 52 |             tensor = transformer(tensor, t, (shape[1], shape[2]))
 53 |             out.append(tf.stop_gradient(tensor))
 54 |     return out
 55 | 
 56 | 
 57 | def random_photometric(ims, *,
 58 |                        noise_stddev=0.0, min_contrast=0.0, max_contrast=0.0,
 59 |                        brightness_stddev=0.0, min_colour=1.0, max_colour=1.0,
 60 |                        min_gamma=1.0, max_gamma=1.0):
 61 |     """Applies photometric augmentations to a list of image batches.
 62 | 
 63 |     Each image in the list is augmented in the same way.
 64 |     For all elements, num_batch must be equal while height and width may differ.
 65 | 
 66 |     Args:
 67 |         ims: list of 3-channel image batches normalized to [0, 1].
 68 |         channel_mean: tensor of shape [3] which was used to normalize the pixel
 69 |             values ranging from 0 ... 255.
 70 | 
 71 |     Returns:
 72 |         Batch of normalized images with photometric augmentations. Has the same
 73 |         shape as the input batch.
 74 |     """
 75 | 
 76 |     with tf.variable_scope('random_photometric'):
 77 |         num_batch = tf.shape(ims[0])[0]
 78 | 
 79 |         contrast = tf.random_uniform([num_batch, 1], min_contrast, max_contrast)
 80 |         gamma = tf.random_uniform([num_batch, 1], min_gamma, max_gamma)
 81 |         gamma_inv = 1.0 / gamma
 82 |         colour = tf.random_uniform([num_batch, 3], min_colour, max_colour)
 83 |         if noise_stddev > 0.0:
 84 |             noise = tf.random_normal([num_batch, 1], stddev=noise_stddev)
 85 |         else:
 86 |             noise = tf.zeros([num_batch, 1])
 87 |         if brightness_stddev > 0.0:
 88 |             brightness = tf.random_normal([num_batch, 1],
 89 |                                           stddev=brightness_stddev)
 90 |         else:
 91 |             brightness = tf.zeros([num_batch, 1])
 92 | 
 93 |         out = []
 94 |         for im in ims:
 95 |             # Transpose to [height, width, num_batch, channels]
 96 |             im_re = tf.transpose(im, [1, 2, 0, 3])
 97 |             im_re = im_re
 98 |             im_re = (im_re * (contrast + 1.0) + brightness) * colour
 99 |             im_re = tf.maximum(0.0, tf.minimum(1.0, im_re))
100 |             im_re = tf.pow(im_re, gamma_inv)
101 | 
102 |             im_re = im_re + noise
103 | 
104 |             # Subtract the mean again after clamping
105 |             im_re = im_re
106 | 
107 |             im = tf.transpose(im_re, [2, 0, 1, 3])
108 |             im = tf.stop_gradient(im)
109 |             out.append(im)
110 |         return out
111 | 
112 | 
113 | def random_crop(tensors, size, seed=None, name=None):
114 |     """Randomly crops multiple tensors (of the same shape) to a given size.
115 | 
116 |     Each tensor is cropped in the same way."""
117 |     with tf.name_scope(name, "random_crop", [size]) as name:
118 |         size = tf.convert_to_tensor(size, dtype=tf.int32, name="size")
119 |         if len(tensors) == 2:
120 |             shape = tf.minimum(tf.shape(tensors[0]), tf.shape(tensors[1]))
121 |         else:
122 |             shape = tf.shape(tensors[0])
123 | 
124 |         limit = shape - size + 1
125 |         offset = tf.random_uniform(
126 |            tf.shape(shape),
127 |            dtype=size.dtype,
128 |            maxval=size.dtype.max,
129 |            seed=seed) % limit
130 |         results = []
131 |         for tensor in tensors:
132 |             result = tf.slice(tensor, offset, size)
133 |             results.append(result)
134 |         return results
135 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/data.py:
--------------------------------------------------------------------------------
  1 | """Utility functions for providing data directories."""
  2 | import os
  3 | import sys
  4 | import zipfile
  5 | import rarfile
  6 | from urllib.request import FancyURLopener
  7 | import shutil
  8 | 
  9 | import numpy as np
 10 | import matplotlib.image as mpimg
 11 | 
 12 | 
 13 | class Data():
 14 |     # Should be a list containing all subdirectories of the main data dir which
 15 |     # belong to this dataset
 16 |     dirs = None
 17 | 
 18 |     def __init__(self, data_dir, stat_log_dir,
 19 |                  development=True, fast_dir=None):
 20 |         self.development = development
 21 |         self.data_dir = data_dir
 22 |         self.stat_log_dir = stat_log_dir
 23 |         if not os.path.isdir(data_dir):
 24 |             os.makedirs(data_dir)
 25 | 
 26 |         self._fetch_if_missing()
 27 | 
 28 |         self.fast_dir = fast_dir
 29 |         if fast_dir:
 30 |             print(">> Copying files to {}".format(fast_dir))
 31 |             for d in self.dirs:
 32 |                 src = os.path.join(data_dir, d)
 33 |                 dst = os.path.join(fast_dir, d)
 34 |                 if not os.path.isdir(dst):
 35 |                     shutil.copytree(src, dst)
 36 |                     print(">> Copied {}".format(d))
 37 |             self.current_dir = fast_dir
 38 |         else:
 39 |             self.current_dir = data_dir
 40 | 
 41 |         if stat_log_dir:
 42 |             self.stat_log_file = os.path.join(stat_log_dir,
 43 |                                               self.__class__.__name__ + ".txt")
 44 |             self._ensure_statistics()
 45 | 
 46 |     def __del__(self):
 47 |         pass
 48 |         #if self.fast_dir:
 49 |         #    print(">> Removing files from {}".format(self.fast_dir))
 50 |         #    for d in self.dirs:
 51 |         #        shutil.rmtree(os.path.join(self.fast_dir, d))
 52 | 
 53 |     def clear_statistics(self):
 54 |         """Delete saved statistics file if present."""
 55 |         if self.stat_log_dir and os.path.isfile(self.stat_log_file):
 56 |             os.remove(self.stat_log_file)
 57 | 
 58 |     def _ensure_statistics(self):
 59 |         """Make sure we know the dataset statistics."""
 60 |         if os.path.isfile(self.stat_log_file):
 61 |             vals = np.loadtxt(self.stat_log_file)
 62 |             self.mean = vals[0]
 63 |             self.stddev = vals[1]
 64 |         else:
 65 |             print(">> Computing statistics (mean, variance) for {}"
 66 |                   .format(self.__class__.__name__))
 67 |             mean, stddev = self.compute_statistics(self.get_raw_files())
 68 |             self.mean = mean
 69 |             self.stddev = stddev
 70 |             os.makedirs(self.stat_log_dir, exist_ok=True)
 71 |             np.savetxt(self.stat_log_file, [mean, stddev])
 72 |             print(">> Statistics complete")
 73 | 
 74 |     def get_raw_dirs(self):
 75 |         """Should return a list of all dirs containing training images.
 76 | 
 77 |         Note: self.current_dir should be used for loading input data.
 78 |         """
 79 |         raise NotImplementedError()
 80 | 
 81 |     def get_raw_files(self):
 82 |         files = []
 83 |         for d in self.get_raw_dirs():
 84 |             for path in os.listdir(d):
 85 |                 files.append(os.path.join(d, path))
 86 |         return files
 87 | 
 88 |     def _fetch_if_missing(self):
 89 |         """A call to this must make subsequent calls to get_raw_files succeed.
 90 |         All subdirs of data_dir listed in self.dirs must exist after this call.
 91 |         """
 92 |         raise NotImplementedError()
 93 | 
 94 |     def _download_and_extract(self, url, extract_to, ext='zip'):
 95 |         def _progress(count, block_size, total_size):
 96 |             if total_size > 0:
 97 |                 print('\r>> Downloading %s %.1f%%' % (url,
 98 |                       float(count * block_size) / float(total_size) * 100.0), end=' ')
 99 |             else:
100 |                 print('\r>> Downloading %s' % (url), end=' ')
101 |             sys.stdout.flush()
102 |         urlretrieve = FancyURLopener().retrieve
103 |         local_zip_path = os.path.join(self.data_dir, 'tmp.' + ext)
104 |         urlretrieve(url, local_zip_path, _progress)
105 |         sys.stdout.write("\n>> Finished downloading. Unzipping...\n")
106 |         if ext == 'zip':
107 |             with zipfile.ZipFile(local_zip_path, "r") as zip_ref:
108 |                 zip_ref.extractall(extract_to)
109 |         else:
110 |             with rarfile.RarFile(local_zip_path, "r") as zip_ref:
111 |                 zip_ref.extractall(extract_to)
112 | 
113 |         sys.stdout.write(">> Finished unzipping.\n")
114 |         os.remove(local_zip_path)
115 | 
116 |         self.clear_statistics()
117 | 
118 |     def compute_statistics(self, files):
119 |         """Use welford's method to compute mean and variance of the given
120 |         dataset.
121 | 
122 |         See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm."""
123 | 
124 |         assert len(files) > 1
125 | 
126 |         n = 0
127 |         mean = np.zeros(3)
128 |         M2 = np.zeros(3)
129 |         for j, filename in enumerate(files):
130 |             #TODO ensure the pixel values are 0..255
131 |             im = np.reshape(mpimg.imread(filename) * 255, [-1, 3])
132 |             for i in range(np.shape(im)[1]):
133 |                 n = n + 1
134 |                 delta = im[i] - mean
135 |                 mean += delta / n
136 |                 M2 += delta * (im[i] - mean)
137 |             sys.stdout.write('\r>> Processed %.1f%%' % (
138 |                 float(j) / float(len(files)) * 100.0))
139 |             sys.stdout.flush()
140 |         var = M2 / (n - 1)
141 |         stddev = np.sqrt(var)
142 |         return np.float32(mean), np.float32(stddev)
143 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/flow_util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | def atan2(y, x):
  6 |     angle = tf.where(tf.greater(x,0.0), tf.atan(y/x), tf.zeros_like(x))
  7 |     angle = tf.where(tf.logical_and(tf.less(x,0.0), tf.greater_equal(y,0.0)),
  8 |                       tf.atan(y/x) + np.pi, angle)
  9 |     angle = tf.where(tf.logical_and(tf.less(x,0.0), tf.less(y,0.0)),
 10 |                       tf.atan(y/x) - np.pi, angle)
 11 |     angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.greater(y,0.0)),
 12 |                       np.pi * tf.ones_like(x), angle)
 13 |     angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.less(y,0.0)),
 14 |                       -np.pi * tf.ones_like(x), angle)
 15 |     angle = tf.where(tf.logical_and(tf.equal(x,0.0),tf.equal(y,0.0)),
 16 |                       np.nan * tf.zeros_like(x), angle)
 17 |     return angle
 18 | 
 19 | 
 20 | def flow_to_color(flow, mask=None, max_flow=None):
 21 |     """Converts flow to 3-channel color image.
 22 | 
 23 |     Args:
 24 |         flow: tensor of shape [num_batch, height, width, 2].
 25 |         mask: flow validity mask of shape [num_batch, height, width, 1].
 26 |     """
 27 |     n = 8
 28 |     num_batch, height, width, _ = tf.unstack(tf.shape(flow))
 29 |     mask = tf.ones([num_batch, height, width, 1]) if mask is None else mask
 30 |     flow_u, flow_v = tf.unstack(flow, axis=3)
 31 |     if max_flow is not None:
 32 |         max_flow = tf.maximum(max_flow, 1)
 33 |     else:
 34 |         max_flow = tf.reduce_max(tf.abs(flow * mask))
 35 |     mag = tf.sqrt(tf.reduce_sum(tf.square(flow), 3))
 36 |     angle = atan2(flow_v, flow_u)
 37 | 
 38 |     im_h = tf.mod(angle / (2 * np.pi) + 1.0, 1.0)
 39 |     im_s = tf.clip_by_value(mag * n / max_flow, 0, 1)
 40 |     im_v = tf.clip_by_value(n - im_s, 0, 1)
 41 |     im_hsv = tf.stack([im_h, im_s, im_v], 3)
 42 |     im = tf.image.hsv_to_rgb(im_hsv)
 43 |     return im * mask
 44 | 
 45 | 
 46 | def flow_error_image(flow_1, flow_2, mask_occ, mask_noc=None, log_colors=True):
 47 |     """Visualize the error between two flows as 3-channel color image.
 48 | 
 49 |     Adapted from the KITTI C++ devkit.
 50 | 
 51 |     Args:
 52 |         flow_1: first flow of shape [num_batch, height, width, 2].
 53 |         flow_2: second flow (ground truth)
 54 |         mask_occ: flow validity mask of shape [num_batch, height, width, 1].
 55 |             Equals 1 at (occluded and non-occluded) valid pixels.
 56 |         mask_noc: Is 1 only at valid pixels which are not occluded.
 57 |     """
 58 |     mask_noc = tf.ones(tf.shape(mask_occ)) if mask_noc is None else mask_noc
 59 |     diff_sq = (flow_1 - flow_2) ** 2
 60 |     diff = tf.sqrt(tf.reduce_sum(diff_sq, [3], keep_dims=True))
 61 |     if log_colors:
 62 |         num_batch, height, width, _ = tf.unstack(tf.shape(flow_1))
 63 |         colormap = [
 64 |             [0,0.0625,49,54,149],
 65 |             [0.0625,0.125,69,117,180],
 66 |             [0.125,0.25,116,173,209],
 67 |             [0.25,0.5,171,217,233],
 68 |             [0.5,1,224,243,248],
 69 |             [1,2,254,224,144],
 70 |             [2,4,253,174,97],
 71 |             [4,8,244,109,67],
 72 |             [8,16,215,48,39],
 73 |             [16,1000000000.0,165,0,38]]
 74 |         colormap = np.asarray(colormap, dtype=np.float32)
 75 |         colormap[:, 2:5] = colormap[:, 2:5] / 255
 76 |         mag = tf.sqrt(tf.reduce_sum(tf.square(flow_2), 3, keep_dims=True))
 77 |         error = tf.minimum(diff / 3, 20 * diff / mag)
 78 |         im = tf.zeros([num_batch, height, width, 3])
 79 |         for i in range(colormap.shape[0]):
 80 |             colors = colormap[i, :]
 81 |             cond = tf.logical_and(tf.greater_equal(error, colors[0]),
 82 |                                   tf.less(error, colors[1]))
 83 |             im = tf.where(tf.tile(cond, [1, 1, 1, 3]),
 84 |                            tf.ones([num_batch, height, width, 1]) * colors[2:5],
 85 |                            im)
 86 |         im = tf.where(tf.tile(tf.cast(mask_noc, tf.bool), [1, 1, 1, 3]),
 87 |                        im, im * 0.5)
 88 |         im = im * mask_occ
 89 |     else:
 90 |         error = (tf.minimum(diff, 5) / 5) * mask_occ
 91 |         im_r = error # errors in occluded areas will be red
 92 |         im_g = error * mask_noc
 93 |         im_b = error * mask_noc
 94 |         im = tf.concat(axis=3, values=[im_r, im_g, im_b])
 95 |     return im
 96 | 
 97 | 
 98 | def flow_error_avg(flow_1, flow_2, mask):
 99 |     """Evaluates the average endpoint error between flow batches."""
100 |     with tf.variable_scope('flow_error_avg'):
101 |         diff = euclidean(flow_1 - flow_2) * mask
102 |         error = tf.reduce_sum(diff) / tf.reduce_sum(mask)
103 |         return error
104 | 
105 | 
106 | def outlier_ratio(gt_flow, flow, mask, threshold=3.0, relative=0.05):
107 |     diff = euclidean(gt_flow - flow) * mask
108 |     if relative is not None:
109 |         threshold = tf.maximum(threshold, euclidean(gt_flow) * relative)
110 |         outliers = tf.cast(tf.greater_equal(diff, threshold), tf.float32)
111 |     else:
112 |         outliers = tf.cast(tf.greater_equal(diff, threshold), tf.float32)
113 |     ratio = tf.reduce_sum(outliers) / tf.reduce_sum(mask)
114 |     return ratio
115 | 
116 | 
117 | def outlier_pct(gt_flow, flow, mask, threshold=3.0, relative=0.05):
118 |     frac = outlier_ratio(gt_flow, flow, mask, threshold, relative) * 100
119 |     return frac
120 | 
121 | 
122 | def euclidean(t):
123 |     return tf.sqrt(tf.reduce_sum(t ** 2, [3], keep_dims=True))
124 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/flownet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib.slim as slim
  3 | import tensorflow.contrib.layers as layers
  4 | 
  5 | from ..ops import correlation
  6 | from .image_warp import image_warp
  7 | 
  8 | from .flow_util import flow_to_color
  9 | 
 10 | 
 11 | FLOW_SCALE = 5.0
 12 | 
 13 | # (Yuliang) Add reuse_variables for forward flow
 14 | def flownet(im1, im2, flownet_spec='S', full_resolution=False, train_all=False,
 15 |             backward_flow=False, reuse=False):
 16 |     num_batch, height, width, _ = tf.unstack(tf.shape(im1))
 17 |     flownet_num = len(flownet_spec)
 18 |     assert flownet_num > 0
 19 |     flows_fw = []
 20 |     flows_bw = []
 21 |     for i, name in enumerate(flownet_spec):
 22 |         assert name in ('C', 'c', 'S', 's')
 23 |         channel_mult = 1 if name in ('C', 'S') else 3 / 8
 24 |         full_res = full_resolution and i == flownet_num - 1
 25 | 
 26 |         def scoped_block(reuse=False):
 27 |             if name.lower() == 'c':
 28 |                 assert i == 0, 'FlowNetS must be used for refinement networks'
 29 | 
 30 |                 with tf.variable_scope('flownet_c_features') as scope:
 31 |                     # (Yuliang)
 32 |                     if reuse:
 33 |                         scope.reuse_variables()
 34 |                     _, conv2_a, conv3_a = flownet_c_features(im1, channel_mult=channel_mult)
 35 |                     _, conv2_b, conv3_b = flownet_c_features(im2, channel_mult=channel_mult, reuse=True)
 36 | 
 37 |                 with tf.variable_scope('flownet_c') as scope:
 38 |                     # (Yuliang)
 39 |                     if reuse:
 40 |                         scope.reuse_variables()
 41 |                     flow_fw = flownet_c(conv3_a, conv3_b, conv2_a,
 42 |                                         full_res=full_res,
 43 |                                         channel_mult=channel_mult)
 44 |                     flows_fw.append(flow_fw)
 45 |                     if backward_flow:
 46 |                         scope.reuse_variables()
 47 |                         flow_bw = flownet_c(conv3_b, conv3_a, conv2_b,
 48 |                                             full_res=full_res,
 49 |                                             channel_mult=channel_mult)
 50 |                         flows_bw.append(flow_bw)
 51 |             elif name.lower() == 's':
 52 |                 def _flownet_s(im1, im2, flow=None):
 53 |                     if flow is not None:
 54 |                         flow = tf.image.resize_bilinear(flow, [height, width]) * 4 * FLOW_SCALE
 55 |                         warp = image_warp(im2, flow)
 56 |                         diff = tf.abs(warp - im1)
 57 |                         if not train_all:
 58 |                             flow = tf.stop_gradient(flow)
 59 |                             warp = tf.stop_gradient(warp)
 60 |                             diff = tf.stop_gradient(diff)
 61 | 
 62 |                         inputs = tf.concat([im1, im2, flow, warp, diff], axis=3)
 63 |                         inputs = tf.reshape(inputs, [num_batch, height, width, 14])
 64 |                     else:
 65 |                         inputs = tf.concat([im1, im2], 3)
 66 |                     return flownet_s(inputs,
 67 |                                      full_res=full_res,
 68 |                                      channel_mult=channel_mult)
 69 |                 stacked = len(flows_fw) > 0
 70 |                 with tf.variable_scope('flownet_s') as scope:
 71 |                     # (Yuliang)
 72 |                     if reuse:
 73 |                         scope.reuse_variables()
 74 |                     flow_fw = _flownet_s(im1, im2, flows_fw[-1][0] if stacked else None)
 75 |                     flows_fw.append(flow_fw)
 76 |                     if backward_flow:
 77 |                         scope.reuse_variables()
 78 |                         flow_bw = _flownet_s(im2, im1, flows_bw[-1][0]  if stacked else None)
 79 |                         flows_bw.append(flow_bw)
 80 | 
 81 |         if i > 0:
 82 |             scope_name = "stack_{}_flownet".format(i)
 83 |             with tf.variable_scope(scope_name):
 84 |                 scoped_block(reuse)
 85 |         else:
 86 |             scoped_block(reuse)
 87 | 
 88 |     if backward_flow:
 89 |         return flows_fw, flows_bw
 90 |     return flows_fw
 91 | 
 92 | 
 93 | def _leaky_relu(x):
 94 |     with tf.variable_scope('leaky_relu'):
 95 |         return tf.maximum(0.1 * x, x)
 96 | 
 97 | 
 98 | def _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2, conv1=None, inputs=None,
 99 |                     channel_mult=1, full_res=False, channels=2):
100 |     m = channel_mult
101 | 
102 |     flow6 = slim.conv2d(conv6_1, channels, 3, scope='flow6',
103 |                         activation_fn=None)
104 |     deconv5 = slim.conv2d_transpose(conv6_1, int(512 * m), 4, stride=2,
105 |                                    scope='deconv5')
106 |     flow6_up5 = slim.conv2d_transpose(flow6, channels, 4, stride=2,
107 |                                      scope='flow6_up5',
108 |                                      activation_fn=None)
109 |     concat5 = tf.concat([conv5_1, deconv5, flow6_up5], 1)
110 |     flow5 = slim.conv2d(concat5, channels, 3, scope='flow5',
111 |                        activation_fn=None)
112 | 
113 |     deconv4 = slim.conv2d_transpose(concat5, int(256 * m), 4, stride=2,
114 |                                    scope='deconv4')
115 |     flow5_up4 = slim.conv2d_transpose(flow5, channels, 4, stride=2,
116 |                                      scope='flow5_up4',
117 |                                      activation_fn=None)
118 |     concat4 = tf.concat([conv4_1, deconv4, flow5_up4], 1)
119 |     flow4 = slim.conv2d(concat4, channels, 3, scope='flow4',
120 |                        activation_fn=None)
121 | 
122 |     deconv3 = slim.conv2d_transpose(concat4, int(128 * m), 4, stride=2,
123 |                                    scope='deconv3')
124 |     flow4_up3 = slim.conv2d_transpose(flow4, channels, 4, stride=2,
125 |                                      scope='flow4_up3',
126 |                                      activation_fn=None)
127 |     concat3 = tf.concat([conv3_1, deconv3, flow4_up3], 1)
128 |     flow3 = slim.conv2d(concat3, channels, 3, scope='flow3',
129 |                        activation_fn=None)
130 | 
131 |     deconv2 = slim.conv2d_transpose(concat3, int(64 * m), 4, stride=2,
132 |                                    scope='deconv2')
133 |     flow3_up2 = slim.conv2d_transpose(flow3, channels, 4, stride=2,
134 |                                      scope='flow3_up2',
135 |                                      activation_fn=None)
136 |     concat2 = tf.concat([conv2, deconv2, flow3_up2], 1)
137 |     flow2 = slim.conv2d(concat2, channels, 3, scope='flow2',
138 |                        activation_fn=None)
139 | 
140 |     flows = [flow2, flow3, flow4, flow5, flow6]
141 | 
142 |     if full_res:
143 |         with tf.variable_scope('full_res'):
144 |             deconv1 = slim.conv2d_transpose(concat2, int(32 * m), 4, stride=2,
145 |                                            scope='deconv1')
146 |             flow2_up1 = slim.conv2d_transpose(flow2, channels, 4, stride=2,
147 |                                              scope='flow2_up1',
148 |                                              activation_fn=None)
149 |             concat1 = tf.concat([conv1, deconv1, flow2_up1], 1)
150 |             flow1 = slim.conv2d(concat1, channels, 3, scope='flow1',
151 |                                 activation_fn=None)
152 | 
153 |             deconv0 = slim.conv2d_transpose(concat1, int(16 * m), 4, stride=2,
154 |                                            scope='deconv0')
155 |             flow1_up0 = slim.conv2d_transpose(flow1, channels, 4, stride=2,
156 |                                              scope='flow1_up0',
157 |                                              activation_fn=None)
158 |             concat0 = tf.concat([inputs, deconv0, flow1_up0], 1)
159 |             flow0 = slim.conv2d(concat0, channels, 3, scope='flow0',
160 |                                 activation_fn=None)
161 | 
162 |             flows = [flow0, flow1] + flows
163 | 
164 |     return flows
165 | 
166 | 
167 | def nhwc_to_nchw(tensors):
168 |     return [tf.transpose(t, [0, 3, 1, 2]) for t in tensors]
169 | 
170 | 
171 | def nchw_to_nhwc(tensors):
172 |     return [tf.transpose(t, [0, 2, 3, 1]) for t in tensors]
173 | 
174 | 
175 | def flownet_s(inputs, channel_mult=1, full_res=False):
176 |     """Given stacked inputs, returns flow predictions in decreasing resolution.
177 | 
178 |     Uses FlowNetSimple.
179 |     """
180 |     m = channel_mult
181 |     inputs = nhwc_to_nchw([inputs])[0]
182 | 
183 |     with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
184 |                         data_format='NCHW',
185 |                         weights_regularizer=slim.l2_regularizer(0.0004),
186 |                         weights_initializer=layers.variance_scaling_initializer(),
187 |                         activation_fn=_leaky_relu):
188 |         conv1 = slim.conv2d(inputs, int(64 * m), 7, stride=2, scope='conv1')
189 |         conv2 = slim.conv2d(conv1, int(128 * m), 5, stride=2, scope='conv2')
190 |         conv3 = slim.conv2d(conv2, int(256 * m), 5, stride=2, scope='conv3')
191 |         conv3_1 = slim.conv2d(conv3, int(256 * m), 3, stride=1, scope='conv3_1')
192 |         conv4 = slim.conv2d(conv3_1, int(512 * m), 3, stride=2, scope='conv4')
193 |         conv4_1 = slim.conv2d(conv4, int(512 * m), 3, stride=1, scope='conv4_1')
194 |         conv5 = slim.conv2d(conv4_1, int(512 * m), 3, stride=2, scope='conv5')
195 |         conv5_1 = slim.conv2d(conv5, int(512 * m), 3, stride=1, scope='conv5_1')
196 |         conv6 = slim.conv2d(conv5_1, int(1024 * m), 3, stride=2, scope='conv6')
197 |         conv6_1 = slim.conv2d(conv6, int(1024 * m), 3, stride=1, scope='conv6_1')
198 | 
199 |         res = _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2, conv1, inputs,
200 |                               channel_mult=channel_mult, full_res=full_res)
201 |         return nchw_to_nhwc(res)
202 | 
203 | 
204 | def flownet_c_features(im, channel_mult=1, reuse=None):
205 |     m = channel_mult
206 |     im = nhwc_to_nchw([im])[0]
207 |     with slim.arg_scope([slim.conv2d],
208 |                         data_format='NCHW',
209 |                         weights_regularizer=slim.l2_regularizer(0.0004),
210 |                         weights_initializer=layers.variance_scaling_initializer(),
211 |                         activation_fn=_leaky_relu):
212 |         conv1 = slim.conv2d(im, int(64 * m), 7, stride=2, scope='conv1', reuse=reuse)
213 |         conv2 = slim.conv2d(conv1, int(128 * m), 5, stride=2, scope='conv2', reuse=reuse)
214 |         conv3 = slim.conv2d(conv2, int(256 * m), 5, stride=2, scope='conv3', reuse=reuse)
215 |         return conv1, conv2, conv3
216 | 
217 | 
218 | def flownet_c(conv3_a, conv3_b, conv2_a, channel_mult=1, full_res=False):
219 |     """Given two images, returns flow predictions in decreasing resolution.
220 | 
221 |     Uses FlowNetCorr.
222 |     """
223 |     m = channel_mult
224 | 
225 |     with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
226 |                         data_format='NCHW',
227 |                         weights_regularizer=slim.l2_regularizer(0.0004),
228 |                         weights_initializer=layers.variance_scaling_initializer(),
229 |                         activation_fn=_leaky_relu):
230 |         corr = correlation(conv3_a, conv3_b,
231 |                            pad=20, kernel_size=1, max_displacement=20, stride_1=1, stride_2=2)
232 | 
233 |         conv_redir = slim.conv2d(conv3_a, int(32 * m), 1, stride=1, scope='conv_redir')
234 | 
235 |         conv3_1 = slim.conv2d(tf.concat([conv_redir, corr], 1), int(256 * m), 3,
236 |                               stride=1, scope='conv3_1')
237 |         conv4 = slim.conv2d(conv3_1, int(512 * m), 3, stride=2, scope='conv4')
238 |         conv4_1 = slim.conv2d(conv4, int(512 * m), 3, stride=1, scope='conv4_1')
239 |         conv5 = slim.conv2d(conv4_1, int(512 * m), 3, stride=2, scope='conv5')
240 |         conv5_1 = slim.conv2d(conv5, int(512 * m), 3, stride=1, scope='conv5_1')
241 |         conv6 = slim.conv2d(conv5_1, int(1024 * m), 3, stride=2, scope='conv6')
242 |         conv6_1 = slim.conv2d(conv6, int(1024 * m), 3, stride=1, scope='conv6_1')
243 | 
244 |         res = _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2_a,
245 |                               channel_mult=channel_mult, full_res=full_res)
246 |         return nchw_to_nhwc(res)
247 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/image_warp.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def image_warp(im, flow):
 5 |     """Performs a backward warp of an image using the predicted flow.
 6 | 
 7 |     Args:
 8 |         im: Batch of images. [num_batch, height, width, channels]
 9 |         flow: Batch of flow vectors. [num_batch, height, width, 2]
10 |     Returns:
11 |         warped: transformed image of the same shape as the input image.
12 |     """
13 |     with tf.variable_scope('image_warp'):
14 | 
15 |         num_batch, height, width, channels = tf.unstack(tf.shape(im))
16 |         max_x = tf.cast(width - 1, 'int32')
17 |         max_y = tf.cast(height - 1, 'int32')
18 |         zero = tf.zeros([], dtype='int32')
19 | 
20 |         # We have to flatten our tensors to vectorize the interpolation
21 |         im_flat = tf.reshape(im, [-1, channels])
22 |         flow_flat = tf.reshape(flow, [-1, 2])
23 | 
24 |         # Floor the flow, as the final indices are integers
25 |         # The fractional part is used to control the bilinear interpolation.
26 |         flow_floor = tf.to_int32(tf.floor(flow_flat))
27 |         bilinear_weights = flow_flat - tf.floor(flow_flat)
28 | 
29 |         # Construct base indices which are displaced with the flow
30 |         pos_x = tf.tile(tf.range(width), [height * num_batch])
31 |         grid_y = tf.tile(tf.expand_dims(tf.range(height), 1), [1, width])
32 |         pos_y = tf.tile(tf.reshape(grid_y, [-1]), [num_batch])
33 | 
34 |         x = flow_floor[:, 0]
35 |         y = flow_floor[:, 1]
36 |         xw = bilinear_weights[:, 0]
37 |         yw = bilinear_weights[:, 1]
38 | 
39 |         # Compute interpolation weights for 4 adjacent pixels
40 |         # expand to num_batch * height * width x 1 for broadcasting in add_n below
41 |         wa = tf.expand_dims((1 - xw) * (1 - yw), 1) # top left pixel
42 |         wb = tf.expand_dims((1 - xw) * yw, 1) # bottom left pixel
43 |         wc = tf.expand_dims(xw * (1 - yw), 1) # top right pixel
44 |         wd = tf.expand_dims(xw * yw, 1) # bottom right pixel
45 | 
46 |         x0 = pos_x + x
47 |         x1 = x0 + 1
48 |         y0 = pos_y + y
49 |         y1 = y0 + 1
50 | 
51 |         x0 = tf.clip_by_value(x0, zero, max_x)
52 |         x1 = tf.clip_by_value(x1, zero, max_x)
53 |         y0 = tf.clip_by_value(y0, zero, max_y)
54 |         y1 = tf.clip_by_value(y1, zero, max_y)
55 | 
56 |         dim1 = width * height
57 |         batch_offsets = tf.range(num_batch) * dim1
58 |         base_grid = tf.tile(tf.expand_dims(batch_offsets, 1), [1, dim1])
59 |         base = tf.reshape(base_grid, [-1])
60 | 
61 |         base_y0 = base + y0 * width
62 |         base_y1 = base + y1 * width
63 |         idx_a = base_y0 + x0
64 |         idx_b = base_y1 + x0
65 |         idx_c = base_y0 + x1
66 |         idx_d = base_y1 + x1
67 | 
68 |         Ia = tf.gather(im_flat, idx_a)
69 |         Ib = tf.gather(im_flat, idx_b)
70 |         Ic = tf.gather(im_flat, idx_c)
71 |         Id = tf.gather(im_flat, idx_d)
72 | 
73 |         warped_flat = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
74 |         warped = tf.reshape(warped_flat, [num_batch, height, width, channels])
75 | 
76 |         return warped
77 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/input.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | from .augment import random_crop
  8 | 
  9 | 
 10 | def resize_input(t, height, width, resized_h, resized_w):
 11 |     # Undo old resizing and apply bilinear
 12 |     t = tf.reshape(t, [resized_h, resized_w, 3])
 13 |     t = tf.expand_dims(tf.image.resize_image_with_crop_or_pad(t, height, width), 0)
 14 |     return tf.image.resize_bilinear(t, [resized_h, resized_w])
 15 | 
 16 | 
 17 | def resize_output_crop(t, height, width, channels):
 18 |     _, oldh, oldw, c = tf.unstack(tf.shape(t))
 19 |     t = tf.reshape(t, [oldh, oldw, c])
 20 |     t = tf.image.resize_image_with_crop_or_pad(t, height, width)
 21 |     return tf.reshape(t, [1, height, width, channels])
 22 | 
 23 | 
 24 | def resize_output(t, height, width, channels):
 25 |     return tf.image.resize_bilinear(t, [height, width])
 26 | 
 27 | 
 28 | def resize_output_flow(t, height, width, channels):
 29 |     batch, old_height, old_width, _ = tf.unstack(tf.shape(t), num=4)
 30 |     t = tf.image.resize_bilinear(t, [height, width])
 31 |     u, v = tf.unstack(t, axis=3)
 32 |     u *= tf.cast(width, tf.float32) / tf.cast(old_width, tf.float32)
 33 |     v *= tf.cast(height, tf.float32) / tf.cast(old_height, tf.float32)
 34 |     return tf.reshape(tf.stack([u, v], axis=3), [batch, height, width, 2])
 35 | 
 36 | 
 37 | def frame_name_to_num(name):
 38 |     stripped = name.split('.')[0].lstrip('0')
 39 |     if stripped == '':
 40 |         return 0
 41 |     return int(stripped)
 42 | 
 43 | 
 44 | class Input():
 45 |     mean = [104.920005, 110.1753, 114.785955]
 46 |     stddev = 1 / 0.0039216
 47 | 
 48 |     def __init__(self, data, batch_size, dims, *,
 49 |                  num_threads=1, normalize=True,
 50 |                  skipped_frames=False):
 51 |         assert len(dims) == 2
 52 |         self.data = data
 53 |         self.dims = dims
 54 |         self.batch_size = batch_size
 55 |         self.num_threads = num_threads
 56 |         self.normalize = normalize
 57 |         self.skipped_frames = skipped_frames
 58 | 
 59 |     def _resize_crop_or_pad(self, tensor):
 60 |         height, width = self.dims
 61 |         # return tf.image.resize_bilinear(tf.expand_dims(tensor, 0), [height, width])
 62 |         return tf.image.resize_image_with_crop_or_pad(tensor, height, width)
 63 | 
 64 |     def _resize_image_fixed(self, image):
 65 |         height, width = self.dims
 66 |         return tf.reshape(self._resize_crop_or_pad(image), [height, width, 3])
 67 | 
 68 |     def _normalize_image(self, image):
 69 |         return (image - self.mean) / self.stddev
 70 | 
 71 |     def _preprocess_image(self, image):
 72 |         image = self._resize_image_fixed(image)
 73 |         if self.normalize:
 74 |             image = self._normalize_image(image)
 75 |         return image
 76 | 
 77 |     def _input_images(self, image_dir, hold_out_inv=None):
 78 |         """Assumes that paired images are next to each other after ordering the
 79 |         files.
 80 |         """
 81 |         image_dir = os.path.join(self.data.current_dir, image_dir)
 82 | 
 83 |         filenames_1 = []
 84 |         filenames_2 = []
 85 |         image_files = os.listdir(image_dir)
 86 |         image_files.sort()
 87 | 
 88 |         assert len(image_files) % 2 == 0, 'expected pairs of images'
 89 | 
 90 |         for i in range(len(image_files) // 2):
 91 |             filenames_1.append(os.path.join(image_dir, image_files[i * 2]))
 92 |             filenames_2.append(os.path.join(image_dir, image_files[i * 2 + 1]))
 93 | 
 94 |         if hold_out_inv is not None:
 95 |             filenames = list(zip(filenames_1, filenames_2))
 96 |             random.seed(0)
 97 |             random.shuffle(filenames)
 98 |             filenames = filenames[:hold_out_inv]
 99 | 
100 |             filenames_1, filenames_2 = zip(*filenames)
101 |             filenames_1 = list(filenames_1)
102 |             filenames_2 = list(filenames_2)
103 | 
104 |         input_1 = read_png_image(filenames_1, 1)
105 |         input_2 = read_png_image(filenames_2, 1)
106 |         image_1 = self._preprocess_image(input_1)
107 |         image_2 = self._preprocess_image(input_2)
108 |         return tf.shape(input_1), image_1, image_2
109 | 
110 |     def _input_test(self, image_dir, hold_out_inv=None):
111 |         input_shape, im1, im2 = self._input_images(image_dir, hold_out_inv)
112 |         return tf.train.batch(
113 |             [im1, im2, input_shape],
114 |             batch_size=self.batch_size,
115 |             num_threads=self.num_threads,
116 |             allow_smaller_final_batch=True)
117 | 
118 |     def get_normalization(self):
119 |         return self.mean, self.stddev
120 | 
121 |     def input_raw(self, swap_images=True, sequence=True,
122 |                   needs_crop=True, shift=0, seed=0,
123 |                   center_crop=False, skip=0):
124 |         """Constructs input of raw data.
125 | 
126 |         Args:
127 |             sequence: Assumes that image file order in data_dirs corresponds to
128 |                 temporal order, if True. Otherwise, assumes uncorrelated pairs of
129 |                 images in lexicographical ordering.
130 |             shift: number of examples to shift the input queue by.
131 |                 Useful to resume training.
132 |             swap_images: for each pair (im1, im2), also include (im2, im1)
133 |             seed: seed for filename shuffling.
134 |         Returns:
135 |             image_1: batch of first images
136 |             image_2: batch of second images
137 |         """
138 |         if not isinstance(skip, list):
139 |             skip = [skip]
140 | 
141 |         data_dirs = self.data.get_raw_dirs()
142 |         height, width = self.dims
143 |         #assert batch_size % 2 == 0
144 | 
145 |         filenames = []
146 |         for dir_path in data_dirs:
147 |             files = os.listdir(dir_path)
148 |             files.sort()
149 |             if sequence:
150 |                 steps = [1 + s for s in skip]
151 |                 stops = [len(files) - s for s in steps]
152 |             else:
153 |                 steps = [2]
154 |                 stops = [len(files)]
155 |                 assert len(files) % 2 == 0
156 |             for step, stop in zip(steps, stops):
157 |                 for i in range(0, stop, step):
158 |                     if self.skipped_frames and sequence:
159 |                         assert step == 1
160 |                         num_first = frame_name_to_num(files[i])
161 |                         num_second = frame_name_to_num(files[i+1])
162 |                         if num_first + 1 != num_second:
163 |                             continue
164 |                     fn1 = os.path.join(dir_path, files[i])
165 |                     fn2 = os.path.join(dir_path, files[i + 1])
166 |                     filenames.append((fn1, fn2))
167 | 
168 |         random.seed(seed)
169 |         random.shuffle(filenames)
170 |         print("Training on {} frame pairs.".format(len(filenames)))
171 | 
172 |         filenames_extended = []
173 |         for fn1, fn2 in filenames:
174 |             filenames_extended.append((fn1, fn2))
175 |             if swap_images:
176 |                 filenames_extended.append((fn2, fn1))
177 | 
178 |         shift = shift % len(filenames_extended)
179 |         filenames_extended = list(np.roll(filenames_extended, shift))
180 | 
181 | 
182 |         filenames_1, filenames_2 = zip(*filenames_extended)
183 |         filenames_1 = list(filenames_1)
184 |         filenames_2 = list(filenames_2)
185 | 
186 |         with tf.variable_scope('train_inputs'):
187 |             image_1 = read_png_image(filenames_1)
188 |             image_2 = read_png_image(filenames_2)
189 | 
190 |             if needs_crop:
191 |                 #if center_crop:
192 |                 #    image_1 = tf.image.resize_image_with_crop_or_pad(image_1, height, width)
193 |                 #    image_2 = tf.image.resize_image_with_crop_or_pad(image_1, height, width)
194 |                 #else:
195 |                 image_1, image_2 = random_crop([image_1, image_2], [height, width, 3])
196 |             else:
197 |                 image_1 = tf.reshape(image_1, [height, width, 3])
198 |                 image_2 = tf.reshape(image_2, [height, width, 3])
199 | 
200 |             if self.normalize:
201 |                 image_1 = self._normalize_image(image_1)
202 |                 image_2 = self._normalize_image(image_2)
203 | 
204 |             return tf.train.batch(
205 |                 [image_1, image_2],
206 |                 batch_size=self.batch_size,
207 |                 num_threads=self.num_threads)
208 | 
209 | 
210 | def read_png_image(filenames, num_epochs=None):
211 |     """Given a list of filenames, constructs a reader op for images."""
212 |     filename_queue = tf.train.string_input_producer(filenames,
213 |         shuffle=False, capacity=len(filenames))
214 |     reader = tf.WholeFileReader()
215 |     _, value = reader.read(filename_queue)
216 |     image_uint8 = tf.image.decode_png(value, channels=3)
217 |     image = tf.cast(image_uint8, tf.float32)
218 |     return image
219 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/spatial_transformer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | import tensorflow as tf
 16 | 
 17 | 
 18 | def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
 19 |     """Spatial Transformer Layer
 20 |     Implements a spatial transformer layer as described in [1]_.
 21 |     Based on [2]_ and edited by David Dao for Tensorflow.
 22 |     Parameters
 23 |     ----------
 24 |     U : float
 25 |         The output of a convolutional net should have the
 26 |         shape [num_batch, height, width, num_channels].
 27 |     theta: float
 28 |         The output of the
 29 |         localisation network should be [num_batch, 6].
 30 |     out_size: tuple of two ints
 31 |         The size of the output of the network (height, width)
 32 |     References
 33 |     ----------
 34 |     .. [1]  Spatial Transformer Networks
 35 |             Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu
 36 |             Submitted on 5 Jun 2015
 37 |     .. [2]  https://github.com/skaae/transformer_network/blob/master/transformerlayer.py
 38 |     Notes
 39 |     -----
 40 |     To initialize the network to the identity transform init
 41 |     ``theta`` to :
 42 |         identity = np.array([[1., 0., 0.],
 43 |                              [0., 1., 0.]])
 44 |         identity = identity.flatten()
 45 |         theta = tf.Variable(initial_value=identity)
 46 |     """
 47 | 
 48 |     def _repeat(x, n_repeats):
 49 |         with tf.variable_scope('_repeat'):
 50 |             rep = tf.transpose(
 51 |                 tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0])
 52 |             rep = tf.cast(rep, 'int32')
 53 |             x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
 54 |             return tf.reshape(x, [-1])
 55 | 
 56 |     def _interpolate(im, x, y, out_size):
 57 |         with tf.variable_scope('_interpolate'):
 58 |             # constants
 59 |             num_batch = tf.shape(im)[0]
 60 |             height = tf.shape(im)[1]
 61 |             width = tf.shape(im)[2]
 62 |             channels = tf.shape(im)[3]
 63 | 
 64 |             x = tf.cast(x, 'float32')
 65 |             y = tf.cast(y, 'float32')
 66 |             height_f = tf.cast(height, 'float32')
 67 |             width_f = tf.cast(width, 'float32')
 68 |             out_height = out_size[0]
 69 |             out_width = out_size[1]
 70 |             zero = tf.zeros([], dtype='int32')
 71 |             max_y = tf.cast(tf.shape(im)[1] - 1, 'int32')
 72 |             max_x = tf.cast(tf.shape(im)[2] - 1, 'int32')
 73 | 
 74 |             # scale indices from [-1, 1] to [0, width/height]
 75 |             x = (x + 1.0)*(width_f) / 2.0
 76 |             y = (y + 1.0)*(height_f) / 2.0
 77 | 
 78 |             # do sampling
 79 |             x0 = tf.cast(tf.floor(x), 'int32')
 80 |             x1 = x0 + 1
 81 |             y0 = tf.cast(tf.floor(y), 'int32')
 82 |             y1 = y0 + 1
 83 | 
 84 |             x0 = tf.clip_by_value(x0, zero, max_x)
 85 |             x1 = tf.clip_by_value(x1, zero, max_x)
 86 |             y0 = tf.clip_by_value(y0, zero, max_y)
 87 |             y1 = tf.clip_by_value(y1, zero, max_y)
 88 |             dim2 = width
 89 |             dim1 = width*height
 90 |             base = _repeat(tf.range(num_batch)*dim1, out_height*out_width)
 91 |             base_y0 = base + y0*dim2
 92 |             base_y1 = base + y1*dim2
 93 |             idx_a = base_y0 + x0
 94 |             idx_b = base_y1 + x0
 95 |             idx_c = base_y0 + x1
 96 |             idx_d = base_y1 + x1
 97 | 
 98 |             # use indices to lookup pixels in the flat image and restore
 99 |             # channels dim
100 |             im_flat = tf.reshape(im, tf.stack([-1, channels]))
101 |             im_flat = tf.cast(im_flat, 'float32')
102 |             Ia = tf.gather(im_flat, idx_a)
103 |             Ib = tf.gather(im_flat, idx_b)
104 |             Ic = tf.gather(im_flat, idx_c)
105 |             Id = tf.gather(im_flat, idx_d)
106 | 
107 |             # and finally calculate interpolated values
108 |             x0_f = tf.cast(x0, 'float32')
109 |             x1_f = tf.cast(x1, 'float32')
110 |             y0_f = tf.cast(y0, 'float32')
111 |             y1_f = tf.cast(y1, 'float32')
112 |             wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1)
113 |             wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1)
114 |             wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1)
115 |             wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1)
116 |             output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
117 |             return output
118 | 
119 |     def _meshgrid(height, width):
120 |         with tf.variable_scope('_meshgrid'):
121 |             # This should be equivalent to:
122 |             #  x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
123 |             #                         np.linspace(-1, 1, height))
124 |             #  ones = np.ones(np.prod(x_t.shape))
125 |             #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
126 |             x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
127 |                             tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
128 |             y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
129 |                             tf.ones(shape=tf.stack([1, width])))
130 | 
131 |             x_t_flat = tf.reshape(x_t, (1, -1))
132 |             y_t_flat = tf.reshape(y_t, (1, -1))
133 | 
134 |             ones = tf.ones_like(x_t_flat)
135 |             grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones])
136 |             return grid
137 | 
138 |     def _transform(theta, input_dim, out_size):
139 |         with tf.variable_scope('_transform'):
140 |             num_batch = tf.shape(input_dim)[0]
141 |             height = tf.shape(input_dim)[1]
142 |             width = tf.shape(input_dim)[2]
143 |             num_channels = tf.shape(input_dim)[3]
144 |             theta = tf.reshape(theta, (-1, 2, 3))
145 |             theta = tf.cast(theta, 'float32')
146 | 
147 |             # grid of (x_t, y_t, 1), eq (1) in ref [1]
148 |             height_f = tf.cast(height, 'float32')
149 |             width_f = tf.cast(width, 'float32')
150 |             out_height = out_size[0]
151 |             out_width = out_size[1]
152 |             grid = _meshgrid(out_height, out_width)
153 |             grid = tf.expand_dims(grid, 0)
154 |             grid = tf.reshape(grid, [-1])
155 |             grid = tf.tile(grid, tf.stack([num_batch]))
156 |             grid = tf.reshape(grid, tf.stack([num_batch, 3, -1]))
157 | 
158 |             # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
159 |             T_g = tf.matmul(theta, grid)
160 |             x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1])
161 |             y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1])
162 |             x_s_flat = tf.reshape(x_s, [-1])
163 |             y_s_flat = tf.reshape(y_s, [-1])
164 | 
165 |             input_transformed = _interpolate(
166 |                 input_dim, x_s_flat, y_s_flat,
167 |                 out_size)
168 | 
169 |             output = tf.reshape(
170 |                 input_transformed, tf.stack([num_batch, out_height, out_width, num_channels]))
171 |             return output
172 | 
173 |     with tf.variable_scope(name):
174 |         output = _transform(theta, U, out_size)
175 |         return output
176 | 
177 | 
178 | def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer'):
179 |     """Batch Spatial Transformer Layer
180 |     Parameters
181 |     ----------
182 |     U : float
183 |         tensor of inputs [num_batch,height,width,num_channels]
184 |     thetas : float
185 |         a set of transformations for each input [num_batch,num_transforms,6]
186 |     out_size : int
187 |         the size of the output [out_height,out_width]
188 |     Returns: float
189 |         Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels]
190 |     """
191 |     with tf.variable_scope(name):
192 |         num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2])
193 |         indices = [[i]*num_transforms for i in xrange(num_batch)]
194 |         input_repeated = tf.gather(U, tf.reshape(indices, [-1]))
195 |         return transformer(input_repeated, thetas, out_size)
196 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/supervised.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | import numpy as np
 4 | 
 5 | from .augment import random_photometric
 6 | from .flow_util import flow_to_color
 7 | from .losses import charbonnier_loss
 8 | from .flownet import flownet
 9 | from .unsupervised import _track_image, _track_loss, FLOW_SCALE
10 | 
11 | 
12 | def supervised_loss(batch, params, normalization=None):
13 |     channel_mean = tf.constant(normalization[0]) / 255.0
14 |     im1, im2, flow_gt, mask_gt = batch
15 |     im1 = im1 / 255.0
16 |     im2 = im2 / 255.0
17 |     im_shape = tf.shape(im1)[1:3]
18 | 
19 |     # -------------------------------------------------------------------------
20 | 
21 |     im1_photo, im2_photo = random_photometric(
22 |         [im1, im2],
23 |         noise_stddev=0.04, min_contrast=-0.3, max_contrast=0.3,
24 |         brightness_stddev=0.02, min_colour=0.9, max_colour=1.1,
25 |         min_gamma=0.7, max_gamma=1.5)
26 | 
27 |     _track_image(im1_photo, 'im1_photo')
28 |     _track_image(im2_photo, 'im2_photo')
29 |     _track_image(flow_to_color(flow_gt), 'flow_gt')
30 |     _track_image(mask_gt, 'mask_gt')
31 | 
32 |     # Images for neural network input with mean-zero values in [-1, 1]
33 |     im1_photo = im1_photo - channel_mean
34 |     im2_photo = im2_photo - channel_mean
35 | 
36 |     flownet_spec = params.get('flownet', 'S')
37 |     full_resolution = params.get('full_res')
38 |     train_all = params.get('train_all')
39 |     # -------------------------------------------------------------------------
40 |     # FlowNet
41 |     flows_fw = flownet(im1_photo, im2_photo,
42 |                        flownet_spec=flownet_spec,
43 |                        full_resolution=full_resolution,
44 |                        train_all=train_all)
45 |     
46 |     if not train_all:
47 |         flows_fw = [flows_fw[-1]]
48 |     final_loss = 0.0
49 |     for i, net_flows in enumerate(reversed(flows_fw)):
50 |         flow_fw = net_flows[0]
51 |         if params.get('full_res'):
52 |             final_flow_fw = flow_fw * FLOW_SCALE * 4
53 |         else:
54 |             final_flow_fw = tf.image.resize_bilinear(flow_fw, im_shape) * FLOW_SCALE * 4
55 |         _track_image(flow_to_color(final_flow_fw), 'flow_pred_' + str(i))
56 | 
57 |         net_loss = charbonnier_loss(final_flow_fw - flow_gt, mask_gt)
58 |         final_loss += net_loss / (2 ** i)
59 | 
60 |     regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
61 |     final_loss += regularization_loss
62 |     _track_loss(regularization_loss, 'loss/regularization')
63 |     _track_loss(final_loss, 'loss/combined')
64 | 
65 |     return final_loss
66 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/unsupervised.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib.slim as slim
  3 | import numpy as np
  4 | 
  5 | from .augment import random_affine, random_photometric
  6 | from .flow_util import flow_to_color
  7 | from .util import resize_area, resize_bilinear
  8 | from .losses import compute_losses, create_border_mask
  9 | from ..ops import downsample
 10 | from .image_warp import image_warp
 11 | from .flownet import flownet, FLOW_SCALE
 12 | 
 13 | 
 14 | # REGISTER ALL POSSIBLE LOSS TERMS
 15 | LOSSES = ['occ', 'sym', 'fb', 'grad', 'ternary', 'photo', 'smooth_1st', 'smooth_2nd']
 16 | 
 17 | 
 18 | def _track_loss(op, name):
 19 |     tf.add_to_collection('losses', tf.identity(op, name=name))
 20 | 
 21 | 
 22 | def _track_image(op, name):
 23 |     name = 'train/' + name
 24 |     tf.add_to_collection('train_images', tf.identity(op, name=name))
 25 | 
 26 | 
 27 | def unsupervised_loss(batch, params, normalization=None, augment=True,
 28 |                       return_flow=False):
 29 |     channel_mean = tf.constant(normalization[0]) / 255.0
 30 |     im1, im2 = batch
 31 |     im1 = im1 / 255.0
 32 |     im2 = im2 / 255.0
 33 |     im_shape = tf.shape(im1)[1:3]
 34 | 
 35 |     # -------------------------------------------------------------------------
 36 |     # Data & mask augmentation
 37 |     border_mask = create_border_mask(im1, 0.1)
 38 | 
 39 |     if augment:
 40 |         im1_geo, im2_geo, border_mask_global = random_affine(
 41 |             [im1, im2, border_mask],
 42 |             horizontal_flipping=True,
 43 |             min_scale=0.9, max_scale=1.1
 44 |             )
 45 | 
 46 |         # augment locally
 47 |         im2_geo, border_mask_local = random_affine(
 48 |             [im2_geo, border_mask],
 49 |             min_scale=0.9, max_scale=1.1
 50 |             )
 51 |         border_mask = border_mask_local * border_mask_global
 52 | 
 53 |         im1_photo, im2_photo = random_photometric(
 54 |             [im1_geo, im2_geo],
 55 |             noise_stddev=0.04, min_contrast=-0.3, max_contrast=0.3,
 56 |             brightness_stddev=0.02, min_colour=0.9, max_colour=1.1,
 57 |             min_gamma=0.7, max_gamma=1.5)
 58 | 
 59 |         _track_image(im1_photo, 'augmented1')
 60 |         _track_image(im2_photo, 'augmented2')
 61 |     else:
 62 |         im1_geo, im2_geo = im1, im2
 63 |         im1_photo, im2_photo = im1, im2
 64 | 
 65 |     # Images for loss comparisons with values in [0, 1] (scale to original using * 255)
 66 |     im1_norm = im1_geo
 67 |     im2_norm = im2_geo
 68 |     # Images for neural network input with mean-zero values in [-1, 1]
 69 |     im1_photo = im1_photo - channel_mean
 70 |     im2_photo = im2_photo - channel_mean
 71 | 
 72 |     flownet_spec = params.get('flownet', 'S')
 73 |     full_resolution = params.get('full_res')
 74 |     train_all = params.get('train_all')
 75 | 
 76 |     flows_fw, flows_bw = flownet(im1_photo, im2_photo,
 77 |                                  flownet_spec=flownet_spec,
 78 |                                  full_resolution=full_resolution,
 79 |                                  backward_flow=True,
 80 |                                  train_all=train_all)
 81 | 
 82 |     flows_fw = flows_fw[-1]
 83 |     flows_bw = flows_bw[-1]
 84 | 
 85 |     # -------------------------------------------------------------------------
 86 |     # Losses
 87 |     layer_weights = [12.7, 4.35, 3.9, 3.4, 1.1]
 88 |     layer_patch_distances = [3, 2, 2, 1, 1]
 89 |     if full_resolution:
 90 |         layer_weights = [12.7, 5.5, 5.0, 4.35, 3.9, 3.4, 1.1]
 91 |         layer_patch_distances = [3, 3] + layer_patch_distances
 92 |         im1_s = im1_norm
 93 |         im2_s = im2_norm
 94 |         mask_s = border_mask
 95 |         final_flow_scale = FLOW_SCALE * 4
 96 |         final_flow_fw = flows_fw[0] * final_flow_scale
 97 |         final_flow_bw = flows_bw[0] * final_flow_scale
 98 |     else:
 99 |         im1_s = downsample(im1_norm, 4)
100 |         im2_s = downsample(im2_norm, 4)
101 |         mask_s = downsample(border_mask, 4)
102 |         final_flow_scale = FLOW_SCALE
103 |         final_flow_fw = tf.image.resize_bilinear(flows_fw[0], im_shape) * final_flow_scale * 4
104 |         final_flow_bw = tf.image.resize_bilinear(flows_bw[0], im_shape) * final_flow_scale * 4
105 | 
106 |     combined_losses = dict()
107 |     combined_loss = 0.0
108 |     for loss in LOSSES:
109 |         combined_losses[loss] = 0.0
110 | 
111 |     if params.get('pyramid_loss'):
112 |         flow_enum = enumerate(zip(flows_fw, flows_bw))
113 |     else:
114 |         flow_enum = [(0, (flows_fw[0], flows_bw[0]))]
115 | 
116 |     for i, flow_pair in flow_enum:
117 |         layer_name = "loss" + str(i + 2)
118 | 
119 |         flow_scale = final_flow_scale / (2 ** i)
120 | 
121 |         with tf.variable_scope(layer_name):
122 |             layer_weight = layer_weights[i]
123 |             flow_fw_s, flow_bw_s = flow_pair
124 | 
125 |             mask_occlusion = params.get('mask_occlusion', '')
126 |             assert mask_occlusion in ['fb', 'disocc', '']
127 | 
128 |             losses = compute_losses(im1_s, im2_s,
129 |                                     flow_fw_s * flow_scale, flow_bw_s * flow_scale,
130 |                                     border_mask=mask_s if params.get('border_mask') else None,
131 |                                     mask_occlusion=mask_occlusion,
132 |                                     data_max_distance=layer_patch_distances[i])
133 | 
134 |             layer_loss = 0.0
135 | 
136 |             for loss in LOSSES:
137 |                 weight_name = loss + '_weight'
138 |                 if params.get(weight_name):
139 |                     _track_loss(losses[loss], loss)
140 |                     layer_loss += params[weight_name] * losses[loss]
141 |                     combined_losses[loss] += layer_weight * losses[loss]
142 | 
143 |             combined_loss += layer_weight * layer_loss
144 | 
145 |             im1_s = downsample(im1_s, 2)
146 |             im2_s = downsample(im2_s, 2)
147 |             mask_s = downsample(mask_s, 2)
148 | 
149 |     regularization_loss = tf.losses.get_regularization_loss()
150 |     final_loss = combined_loss + regularization_loss
151 | 
152 |     _track_loss(final_loss, 'loss/combined')
153 | 
154 |     for loss in LOSSES:
155 |         _track_loss(combined_losses[loss], 'loss/' + loss)
156 |         weight_name = loss + '_weight'
157 |         if params.get(weight_name):
158 |             weight = tf.identity(params[weight_name], name='weight/' + loss)
159 |             tf.add_to_collection('params', weight)
160 | 
161 |     if not return_flow:
162 |         return final_loss
163 | 
164 |     return final_loss, final_flow_fw, final_flow_bw
165 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/util.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def summarized_placeholder(name, prefix=None, key=tf.GraphKeys.SUMMARIES):
 5 |     prefix = '' if not prefix else prefix + '/'
 6 |     p = tf.placeholder(tf.float32, name=name)
 7 |     tf.summary.scalar(prefix + name, p, collections=[key])
 8 |     return p
 9 | 
10 | 
11 | def resize_area(tensor, like):
12 |     _, h, w, _ = tf.unstack(tf.shape(like))
13 |     return tf.stop_gradient(tf.image.resize_area(tensor, [h, w]))
14 | 
15 | 
16 | def resize_bilinear(tensor, like):
17 |     _, h, w, _ = tf.unstack(tf.shape(like))
18 |     return tf.stop_gradient(tf.image.resize_bilinear(tensor, [h, w]))
19 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/ops.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import tensorflow as tf
  4 | import subprocess
  5 | from tensorflow.python.framework import ops
  6 | 
  7 | 
  8 | # Register ops for compilation here
  9 | OP_NAMES = ['backward_warp', 'downsample', 'correlation', 'forward_warp']
 10 | 
 11 | 
 12 | cwd = os.getcwd()
 13 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
 14 | os.chdir("../../ops")
 15 | 
 16 | def compile(op=None):
 17 |     if op is not None:
 18 |         to_compile = [op]
 19 |     else:
 20 |         to_compile = OP_NAMES
 21 | 
 22 |     tf_inc = tf.sysconfig.get_include()
 23 |     for n in to_compile:
 24 |         base = n + "_op"
 25 |         fn_cu_cc = base + ".cu.cc"
 26 |         fn_cu_o = base + ".cu.o"
 27 |         fn_cc = base + ".cc"
 28 |         fn_o = base + ".o"
 29 |         fn_so = base + ".so"
 30 | 
 31 |         cuda_lib64_path_arg = "-L /usr/local/cuda-8.0/lib64"
 32 |         nvcc_cmd = "nvcc -std=c++11 -c -gencode=arch=compute_30,code=sm_30 -o {} -I {} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC"
 33 |         nvcc_cmd = nvcc_cmd.format(" ".join([fn_cu_o, fn_cu_cc]),
 34 |                                    tf_inc)
 35 |         subprocess.check_output(nvcc_cmd, shell=True)
 36 | 
 37 |         gcc_cmd = "{} -std=c++11 -shared -o {} -I {} -fPIC -lcudart -D GOOGLE_CUDA=1 {}"
 38 |         gcc_cmd = gcc_cmd.format('g++',
 39 |                                 " ".join([fn_so, fn_cu_o, fn_cc]),
 40 |                                  tf_inc,
 41 |                                  cuda_lib64_path_arg)
 42 |         subprocess.check_output(gcc_cmd, shell=True)
 43 | 
 44 | 
 45 | if __name__ == "__main__":
 46 |     compile()
 47 | 
 48 | 
 49 | module = sys.modules[__name__]
 50 | for n in OP_NAMES:
 51 |     lib_path = './{}_op.so'.format(n)
 52 |     try:
 53 |         op_lib = tf.load_op_library(lib_path)
 54 |     except:
 55 |         compile(n)
 56 |         op_lib = tf.load_op_library(lib_path)
 57 |     setattr(module, '_' + n + '_module', op_lib)
 58 | 
 59 | 
 60 | os.chdir(cwd)
 61 | 
 62 | 
 63 | def correlation(first, second, **kwargs):
 64 |     return _correlation_module.correlation(first, second, **kwargs)[0]
 65 | 
 66 | 
 67 | backward_warp = _backward_warp_module.backward_warp
 68 | downsample = _downsample_module.downsample
 69 | forward_warp = _forward_warp_module.forward_warp
 70 | 
 71 | 
 72 | # Register op gradients
 73 | 
 74 | @ops.RegisterGradient("BackwardWarp")
 75 | def _BackwardWarpGrad(op, grad):
 76 |     grad0 = _backward_warp_module.backward_warp_grad(
 77 |         grad, op.inputs[0], op.inputs[1])
 78 |     return [None, grad0]
 79 | 
 80 | 
 81 | @ops.RegisterGradient("ForwardWarp")
 82 | def _ForwardWarpGrad(op, grad):
 83 |     grad0 = _forward_warp_module.forward_warp_grad(
 84 |         grad, op.inputs[0])
 85 |     return [grad0]
 86 | 
 87 | 
 88 | @ops.RegisterGradient("Correlation")
 89 | def _CorrelationGrad(op, in_grad, in_grad1, in_grad2):
 90 |     grad0, grad1 = _correlation_module.correlation_grad(
 91 |         in_grad, op.inputs[0], op.inputs[1],
 92 |         op.outputs[1], op.outputs[2],
 93 |         kernel_size=op.get_attr('kernel_size'),
 94 |         max_displacement=op.get_attr('max_displacement'),
 95 |         pad=op.get_attr('pad'),
 96 |         stride_1=op.get_attr('stride_1'),
 97 |         stride_2=op.get_attr('stride_2'))
 98 |     return [grad0, grad1]
 99 | 
100 | 
101 | ops.NotDifferentiable("Downsample")
102 | 


--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import configparser
 4 | from shutil import rmtree
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | CONFIG_PATH = '../config.ini'
 9 | #TMP_DIR = '/tmp/e2eflow'
10 | 
11 | 
12 | def upload_gdrive(upload_dir, gdrive_filename):
13 |     # search for file in gdrive and capture id if it already exists
14 |     lst_lines = subprocess.Popen(['../scripts/gdrive', 'list'],
15 |                                  stdout=subprocess.PIPE)
16 |     existing_id = None
17 |     for line in lst_lines.stdout:
18 |         splits = line.split()
19 |         if str(splits[1], 'utf-8') == gdrive_filename:
20 |             existing_id = str(splits[0], 'utf-8')
21 |     tmp_path = os.path.join('/tmp', gdrive_filename)
22 |     if os.path.isfile(tmp_path):
23 |         os.remove(tmp_path)
24 |     p = subprocess.Popen(['/usr/bin/zip', '-r', tmp_path, upload_dir])
25 |     p.wait()
26 |     if existing_id:
27 |         p = subprocess.Popen(['../scripts/gdrive', 'update',
28 |                               existing_id, tmp_path])
29 |     else:
30 |         p = subprocess.Popen(['../scripts/gdrive', 'upload',
31 |                               '--name', gdrive_filename,
32 |                                tmp_path])
33 |     p.wait()
34 |     os.remove(tmp_path)
35 | 
36 | 
37 | def config_dict(config_path=CONFIG_PATH):
38 |     """Returns the config as dictionary,
39 |     where the elements have intuitively correct types.
40 |     """
41 | 
42 |     config = configparser.ConfigParser()
43 |     config.read(config_path)
44 | 
45 |     d = dict()
46 |     for section_key in config.sections():
47 |         sd = dict()
48 |         section = config[section_key]
49 |         for key in section:
50 |             val = section[key]
51 |             try:
52 |                 sd[key] = int(val)
53 |             except ValueError:
54 |                 try:
55 |                     sd[key] = float(val)
56 |                 except ValueError:
57 |                     try:
58 |                         sd[key] = section.getboolean(key)
59 |                     except ValueError:
60 |                         sd[key] = val
61 |         d[section_key] = sd
62 |     return d
63 | 
64 | 
65 | def convert_input_strings(config_dct, dirs):
66 |     if 'manual_decay_iters' in config_dct and 'manual_decay_lrs' in config_dct:
67 |         iters_lst = config_dct['manual_decay_iters'].split(',')
68 |         lrs_lst = config_dct['manual_decay_lrs'].split(',')
69 |         iters_lst = [int(i) for i in iters_lst]
70 |         lrs_lst = [float(l) for l in lrs_lst]
71 |         config_dct['manual_decay_iters'] = iters_lst
72 |         config_dct['manual_decay_lrs'] = lrs_lst
73 |         config_dct['num_iters'] = sum(iters_lst)
74 | 
75 |     if 'finetune' in config_dct:
76 |         finetune = []
77 |         for name in config_dct['finetune'].split(","):
78 |             ckpt_dir = os.path.join(dirs['checkpoints'], name)
79 |             ckpt = tf.train.get_checkpoint_state(ckpt_dir)
80 |             if ckpt is None:
81 |               ckpt_dir = os.path.join(dirs['log'], 'ex', name)
82 |               ckpt = tf.train.get_checkpoint_state(ckpt_dir)
83 |             assert ckpt, "Could not load experiment " + name
84 |             finetune.append(ckpt)
85 |         config_dct['finetune'] = finetune
86 | 
87 | 
88 | def tryremove(name, file=False):
89 |     try:
90 |         if file:
91 |             os.remove(name)
92 |         else:
93 |             rmtree(name)
94 |     except OSError:
95 |         pass
96 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | from .DFLearner import DFLearner
3 | from .flowlib import *
4 | from .UnFlow import *
5 | 


--------------------------------------------------------------------------------
/core/data_loader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import random
  4 | import tensorflow as tf
  5 | 
  6 | class DataLoader(object):
  7 |     def __init__(self, 
  8 |                  dataset_dir=None, 
  9 |                  batch_size=None, 
 10 |                  img_height=None, 
 11 |                  img_width=None, 
 12 |                  num_source=None, 
 13 |                  num_scales=None):
 14 |         self.dataset_dir = dataset_dir
 15 |         self.batch_size = batch_size
 16 |         self.img_height = img_height
 17 |         self.img_width = img_width
 18 |         self.num_source = num_source
 19 |         self.num_scales = num_scales
 20 | 
 21 |     def load_train_batch(self, is_training=True):
 22 |         """Load a batch of training instances.
 23 |         """
 24 |         seed = random.randint(0, 2**31 - 1)
 25 |         # Load the list of training files into queues
 26 |         file_list = self.format_file_list(self.dataset_dir, 'train')
 27 |         image_paths_queue = tf.train.string_input_producer(
 28 |             file_list['image_file_list'], 
 29 |             seed=seed, 
 30 |             shuffle=True)
 31 |         cam_paths_queue = tf.train.string_input_producer(
 32 |             file_list['cam_file_list'], 
 33 |             seed=seed, 
 34 |             shuffle=True)
 35 |         self.steps_per_epoch = int(
 36 |             len(file_list['image_file_list'])//self.batch_size)
 37 | 
 38 |         # Load images
 39 |         img_reader = tf.WholeFileReader()
 40 |         _, image_contents = img_reader.read(image_paths_queue)
 41 |         image_seq = tf.image.decode_jpeg(image_contents)
 42 |         tgt_image, src_image_stack = \
 43 |             self.unpack_image_sequence(
 44 |                 image_seq, self.img_height, self.img_width, self.num_source)
 45 | 
 46 |         # Load camera intrinsics
 47 |         cam_reader = tf.TextLineReader()
 48 |         _, raw_cam_contents = cam_reader.read(cam_paths_queue)
 49 |         rec_def = []
 50 |         for i in range(9):
 51 |             rec_def.append([1.])
 52 |         raw_cam_vec = tf.decode_csv(raw_cam_contents, 
 53 |                                     record_defaults=rec_def)
 54 |         raw_cam_vec = tf.stack(raw_cam_vec)
 55 |         intrinsics = tf.reshape(raw_cam_vec, [3, 3])
 56 | 
 57 |         # Form training batches
 58 |         src_image_stack, tgt_image, intrinsics = \
 59 |                 tf.train.batch([src_image_stack, tgt_image, intrinsics], 
 60 |                                batch_size=self.batch_size)
 61 | 
 62 |         # Data augmentation
 63 |         image_all = tf.concat([tgt_image, src_image_stack], axis=3)
 64 |         image_all, intrinsics, image_augall = self.data_augmentation(
 65 |             image_all, intrinsics, self.img_height, self.img_width)
 66 |         tgt_image = image_all[:, :, :, :3]
 67 |         src_image_stack = image_all[:, :, :, 3:]
 68 |         tgt_image_aug = image_augall[:, :, :, :3]
 69 |         src_image_stack_aug = image_augall[:, :, :, 3:]
 70 |         intrinsics = self.get_multi_scale_intrinsics(
 71 |             intrinsics, self.num_scales)
 72 | 
 73 |         if is_training:
 74 |             return tgt_image, src_image_stack, intrinsics, tgt_image_aug, src_image_stack_aug
 75 |         else:
 76 |             return tgt_image, src_image_stack, intrinsics
 77 | 
 78 |     def make_intrinsics_matrix(self, fx, fy, cx, cy):
 79 |         # Assumes batch input
 80 |         batch_size = fx.get_shape().as_list()[0]
 81 |         zeros = tf.zeros_like(fx)
 82 |         r1 = tf.stack([fx, zeros, cx], axis=1)
 83 |         r2 = tf.stack([zeros, fy, cy], axis=1)
 84 |         r3 = tf.constant([0.,0.,1.], shape=[1, 3])
 85 |         r3 = tf.tile(r3, [batch_size, 1])
 86 |         intrinsics = tf.stack([r1, r2, r3], axis=1)
 87 |         return intrinsics
 88 | 
 89 |     def data_augmentation(self, im, intrinsics, out_h, out_w):
 90 |         # Random scaling
 91 |         def random_scaling(im, intrinsics):
 92 |             batch_size, in_h, in_w, _ = im.get_shape().as_list()
 93 |             scaling = tf.random_uniform([2], 1, 1.15)
 94 |             x_scaling = scaling[0]
 95 |             y_scaling = scaling[1]
 96 |             out_h = tf.cast(in_h * y_scaling, dtype=tf.int32)
 97 |             out_w = tf.cast(in_w * x_scaling, dtype=tf.int32)
 98 |             im = tf.image.resize_area(im, [out_h, out_w])
 99 |             fx = intrinsics[:,0,0] * x_scaling
100 |             fy = intrinsics[:,1,1] * y_scaling
101 |             cx = intrinsics[:,0,2] * x_scaling
102 |             cy = intrinsics[:,1,2] * y_scaling
103 |             intrinsics = self.make_intrinsics_matrix(fx, fy, cx, cy)
104 |             return im, intrinsics
105 | 
106 |         # Random cropping
107 |         def random_cropping(im, intrinsics, out_h, out_w):
108 |             # batch_size, in_h, in_w, _ = im.get_shape().as_list()
109 |             batch_size, in_h, in_w, _ = tf.unstack(tf.shape(im))
110 |             offset_y = tf.random_uniform([1], 0, in_h - out_h + 1, dtype=tf.int32)[0]
111 |             offset_x = tf.random_uniform([1], 0, in_w - out_w + 1, dtype=tf.int32)[0]
112 |             im = tf.image.crop_to_bounding_box(
113 |                 im, offset_y, offset_x, out_h, out_w)
114 |             fx = intrinsics[:,0,0]
115 |             fy = intrinsics[:,1,1]
116 |             cx = intrinsics[:,0,2] - tf.cast(offset_x, dtype=tf.float32)
117 |             cy = intrinsics[:,1,2] - tf.cast(offset_y, dtype=tf.float32)
118 |             intrinsics = self.make_intrinsics_matrix(fx, fy, cx, cy)
119 |             return im, intrinsics
120 | 
121 |         # Random photometric augmentation
122 |         # Credit: https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/augment.py
123 |         def random_photometric(im, noise_stddev=0.04, min_contrast=-0.2, max_contrast=0.2, brightness_stddev=0.02, min_colour=0.9, max_colour=1.1, min_gamma=0.8, max_gamma=1.2):
124 |             """
125 |             Applies photometric augmentations to a list of image batches.
126 |             Args:
127 |                 im: list of 3-channel image batches normalized to [0, 1].
128 |             Returns:
129 |                 Batch of normalized images with photometric augmentations. Has the same shape as the input batch.
130 |             """
131 |             batch_size, in_h, in_w, _ = im[0].get_shape().as_list()
132 | 
133 |             contrast = tf.random_uniform([batch_size, 1], min_contrast, max_contrast)
134 |             gamma = tf.random_uniform([batch_size, 1], min_gamma, max_gamma)
135 |             gamma_inv = 1.0/gamma
136 |             colour = tf.random_uniform([batch_size, 3], min_colour, max_colour)
137 |             if noise_stddev > 0.0:
138 |                 noise = tf.random_normal([batch_size, 1], stddev=noise_stddev)
139 |             else:
140 |                 noise = tf.zeros([batch_size, 1])
141 |             if brightness_stddev > 0.0:
142 |                 brightness = tf.random_normal([batch_size, 1], stddev=brightness_stddev)
143 |             else:
144 |                 brightness = tf.zeros([batch_size, 1])
145 | 
146 |             out = []
147 |             for temp in im:
148 |                 # Transpose to [height, width, num_batch, channels]
149 |                 im_re = tf.transpose(temp, [1, 2, 0, 3])
150 |                 im_re = (im_re * (contrast + 1.0) + brightness) * colour
151 |                 im_re = tf.maximum(0.0, tf.minimum(1.0, im_re))
152 |                 im_re = tf.pow(im_re, gamma_inv)
153 |                 im_re = im_re + noise
154 |                 im_re = tf.maximum(0.0, tf.minimum(1.0, im_re))
155 | 
156 |                 temp = tf.transpose(im_re, [2, 0, 1, 3])
157 |                 temp = tf.stop_gradient(temp)
158 |                 out.append(temp)
159 |             return tf.concat(out, axis=-1)
160 | 
161 |         im, intrinsics = random_scaling(im, intrinsics)
162 |         im, intrinsics = random_cropping(im, intrinsics, out_h, out_w)
163 |         # [0, 255] -> [0, 1]
164 |         im_photo = im/255.
165 |         im_photo = [im_photo[:,:,:,3*i:3*(i+1)] for i in range(self.num_source+1)]
166 |         im_photo = random_photometric(im_photo)
167 |         # [0, 1] -> [0, 255]
168 |         im_photo = im_photo*255.
169 |         im = tf.cast(im, dtype=tf.uint8)
170 |         im_photo = tf.cast(im_photo, dtype=tf.uint8)
171 |         return im, intrinsics, im_photo
172 | 
173 |     def format_file_list(self, data_root, split):
174 |         with open(data_root + '/%s.txt' % split, 'r') as f:
175 |             frames = f.readlines()
176 |         subfolders = [x.split(' ')[0] for x in frames]
177 |         frame_ids = [x.split(' ')[1][:-1] for x in frames]
178 |         image_file_list = [os.path.join(data_root, subfolders[i], 
179 |             frame_ids[i] + '.jpg') for i in range(len(frames))]
180 |         cam_file_list = [os.path.join(data_root, subfolders[i], 
181 |             frame_ids[i] + '_cam.txt') for i in range(len(frames))]
182 |         all_list = {}
183 |         all_list['image_file_list'] = image_file_list
184 |         all_list['cam_file_list'] = cam_file_list
185 |         return all_list
186 | 
187 |     def unpack_image_sequence(self, image_seq, img_height, img_width, num_source):
188 |         # Assuming the center image is the target frame
189 |         tgt_start_idx = int(img_width * (num_source//2))
190 |         tgt_image = tf.slice(image_seq, 
191 |                              [0, tgt_start_idx, 0], 
192 |                              [-1, img_width, -1])
193 |         # Source frames before the target frame
194 |         src_image_1 = tf.slice(image_seq, 
195 |                                [0, 0, 0], 
196 |                                [-1, int(img_width * (num_source//2)), -1])
197 |         # Source frames after the target frame
198 |         src_image_2 = tf.slice(image_seq, 
199 |                                [0, int(tgt_start_idx + img_width), 0], 
200 |                                [-1, int(img_width * (num_source//2)), -1])
201 |         src_image_seq = tf.concat([src_image_1, src_image_2], axis=1)
202 |         # Stack source frames along the color channels (i.e. [H, W, N*3])
203 |         src_image_stack = tf.concat([tf.slice(src_image_seq, 
204 |                                     [0, i*img_width, 0], 
205 |                                     [-1, img_width, -1]) 
206 |                                     for i in range(num_source)], axis=2)
207 |         src_image_stack.set_shape([img_height, 
208 |                                    img_width, 
209 |                                    num_source * 3])
210 |         tgt_image.set_shape([img_height, img_width, 3])
211 |         return tgt_image, src_image_stack
212 | 
213 |     def batch_unpack_image_sequence(self, image_seq, img_height, img_width, num_source):
214 |         # Assuming the center image is the target frame
215 |         tgt_start_idx = int(img_width * (num_source//2))
216 |         tgt_image = tf.slice(image_seq, 
217 |                              [0, 0, tgt_start_idx, 0], 
218 |                              [-1, -1, img_width, -1])
219 |         # Source frames before the target frame
220 |         src_image_1 = tf.slice(image_seq, 
221 |                                [0, 0, 0, 0], 
222 |                                [-1, -1, int(img_width * (num_source//2)), -1])
223 |         # Source frames after the target frame
224 |         src_image_2 = tf.slice(image_seq, 
225 |                                [0, 0, int(tgt_start_idx + img_width), 0], 
226 |                                [-1, -1, int(img_width * (num_source//2)), -1])
227 |         src_image_seq = tf.concat([src_image_1, src_image_2], axis=2)
228 |         # Stack source frames along the color channels (i.e. [B, H, W, N*3])
229 |         src_image_stack = tf.concat([tf.slice(src_image_seq, 
230 |                                     [0, 0, i*img_width, 0], 
231 |                                     [-1, -1, img_width, -1]) 
232 |                                     for i in range(num_source)], axis=3)
233 |         return tgt_image, src_image_stack
234 | 
235 |     def get_multi_scale_intrinsics(self, intrinsics, num_scales):
236 |         intrinsics_mscale = []
237 |         # Scale the intrinsics accordingly for each scale
238 |         for s in range(num_scales):
239 |             fx = intrinsics[:,0,0]/(2 ** s)
240 |             fy = intrinsics[:,1,1]/(2 ** s)
241 |             cx = intrinsics[:,0,2]/(2 ** s)
242 |             cy = intrinsics[:,1,2]/(2 ** s)
243 |             intrinsics_mscale.append(
244 |                 self.make_intrinsics_matrix(fx, fy, cx, cy))
245 |         intrinsics_mscale = tf.stack(intrinsics_mscale, axis=1)
246 |         return intrinsics_mscale
247 | 


--------------------------------------------------------------------------------
/core/nets.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import tensorflow as tf
  3 | import tensorflow.contrib.slim as slim
  4 | from tensorflow.contrib.layers.python.layers import utils
  5 | import numpy as np
  6 | 
  7 | from .utils import flow_inverse_warp
  8 | 
  9 | # Range of disparity/inverse depth values
 10 | DISP_SCALING = 10
 11 | MIN_DISP = 0.01
 12 | 
 13 | def resize_like(inputs, ref, type='nearest'):
 14 |     iH, iW = inputs.get_shape()[1], inputs.get_shape()[2]
 15 |     rH, rW = ref.get_shape()[1], ref.get_shape()[2]
 16 |     if iH == rH and iW == rW:
 17 |         return inputs
 18 |     if type == 'nearest':
 19 |         return tf.image.resize_nearest_neighbor(inputs, [rH.value, rW.value])
 20 |     elif type == 'bilinear':
 21 |         return tf.image.resize_bilinear(inputs, [rH.value, rW.value])
 22 | 
 23 | # Reference: https://github.com/sampepose/flownet2-tf/blob/master/src/utils.py
 24 | def pad(tensor, num=1):
 25 |     """
 26 |     Pads the given tensor along the height and width dimensions with `num` 0s on each side
 27 |     """
 28 |     return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT")
 29 | 
 30 | def pad_4(tensor, u, b, l, r):
 31 |     return tf.pad(tensor, [[0, 0], [u, b], [l, r], [0, 0]], 'CONSTANT')
 32 | 
 33 | def antipad(tensor, num=1):
 34 |     """
 35 |     Performs a crop. "padding" for a deconvolutional layer (conv2d tranpose) removes
 36 |     padding from the output rather than adding it to the input.
 37 |     """
 38 |     batch, h, w, c = tensor.shape.as_list()
 39 |     return tf.slice(tensor, begin=[0, num, num, 0], size=[batch, h - 2 * num, w - 2 * num, c])
 40 | 
 41 | def antipad_4(tensor, u, b, l, r):
 42 |     batch, h, w, c = tensor.shape.as_list()
 43 |     return tf.slice(tensor, begin=[0, u, l, 0], size=[batch, h - u - b, w - l - r, c])
 44 | 
 45 | # Reference: https://github.com/scaelles/OSVOS-TensorFlow/blob/master/osvos.py
 46 | def crop_features(feature, out_size):
 47 |     """Crop the center of a feature map
 48 |     Args:
 49 |     feature: Feature map to crop
 50 |     out_size: Size of the output feature map
 51 |     Returns:
 52 |     Tensor that performs the cropping
 53 |     """
 54 |     up_size = tf.shape(feature)
 55 |     ini_w = tf.div(tf.subtract(up_size[1], out_size[1]), 2)
 56 |     ini_h = tf.div(tf.subtract(up_size[2], out_size[2]), 2)
 57 |     slice_input = tf.slice(feature, (0, ini_w, ini_h, 0), (-1, out_size[1], out_size[2], -1))
 58 |     return tf.reshape(slice_input, [int(feature.get_shape()[0]), out_size[1], out_size[2], int(feature.get_shape()[3])])
 59 | 
 60 | # Reference: https://github.com/tensorflow/tensorflow/issues/4079
 61 | def LeakyReLU(x, leak=0.1, name='lrelu'):
 62 |     with tf.variable_scope(name):
 63 |         f1 = 0.5 * (1.0 + leak)
 64 |         f2 = 0.5 * (1.0 - leak)
 65 |         return f1 * x + f2 * abs(x)
 66 | 
 67 | # Both target->source and source->target
 68 | def pose_net_fb(tgt_image, src_image_stack, is_training=True, reuse=False):
 69 |     inputs = tf.concat([tgt_image, src_image_stack], axis=3)
 70 |     H = inputs.get_shape()[1].value
 71 |     W = inputs.get_shape()[2].value
 72 |     num_source = int(src_image_stack.get_shape()[3].value//3)
 73 |     with tf.variable_scope('pose_net') as sc:
 74 |         if reuse:
 75 |             sc.reuse_variables()
 76 |         end_points_collection = sc.original_name_scope + '_end_points'
 77 |         with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
 78 |                             normalizer_fn=None,
 79 |                             weights_regularizer=slim.l2_regularizer(0.05),
 80 |                             activation_fn=tf.nn.relu,
 81 |                             outputs_collections=end_points_collection):
 82 |             # cnv1 to cnv5b are shared between pose and explainability prediction
 83 |             cnv1  = slim.conv2d(inputs,16,  [7, 7], stride=2, scope='cnv1')
 84 |             cnv2  = slim.conv2d(cnv1, 32,  [5, 5], stride=2, scope='cnv2')
 85 |             cnv3  = slim.conv2d(cnv2, 64,  [3, 3], stride=2, scope='cnv3')
 86 |             cnv4  = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4')
 87 |             cnv5  = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5')
 88 |             cnv6  = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6')
 89 |             cnv7  = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7')
 90 |             # Double the number of channels
 91 |             pose_pred = slim.conv2d(cnv7, 6*num_source*2, [1, 1], scope='pred', 
 92 |                 stride=1, normalizer_fn=None, activation_fn=None)
 93 |             pose_avg = tf.reduce_mean(pose_pred, [1, 2])
 94 |             # Empirically we found that scaling by a small constant 
 95 |             # facilitates training.
 96 |             # 1st half: target->source, 2nd half: source->target
 97 |             pose_final = 0.01 * tf.reshape(pose_avg, [-1, num_source, 6*2])
 98 |             end_points = utils.convert_collection_to_dict(end_points_collection)
 99 |             return pose_final, end_points
100 | 
101 | # helper functions
102 | # Credit: https://github.com/mrharicot/monodepth/blob/master/monodepth_model.py
103 | def resconv(x, num_layers, stride):
104 |     do_proj = tf.shape(x)[3] != num_layers or stride == 2
105 |     conv1 = slim.conv2d(x, num_layers, [1, 1], stride=1, activation_fn=tf.nn.elu)
106 |     conv2 = slim.conv2d(conv1, num_layers, [3, 3], stride=stride, activation_fn=tf.nn.elu)
107 |     conv3 = slim.conv2d(conv2, 4 * num_layers, [1, 1], stride=1, activation_fn=None)
108 |     if do_proj:
109 |         shortcut = slim.conv2d(x, 4* num_layers, [1, 1], stride=stride, activation_fn=None)
110 |     else:
111 |         shortcut = x
112 |     return tf.nn.elu(conv3 + shortcut)
113 | 
114 | def resblock(x, num_layers, num_blocks):
115 |     out = x
116 |     for i in range(num_blocks - 1):
117 |         out = resconv(out, num_layers, 1)
118 |     out = resconv(out, num_layers, 2)
119 |     return out
120 | 
121 | def upsample_nn(x, ratio):
122 |     s = tf.shape(x)
123 |     h = s[1]
124 |     w = s[2]
125 |     return tf.image.resize_nearest_neighbor(x, [h*ratio, w*ratio])
126 | 
127 | def upconv(x, num_layers, kernal, scale):
128 |     upsample = upsample_nn(x, scale)
129 |     conv = slim.conv2d(upsample, num_layers, [kernal, kernal], stride=1, activation_fn=tf.nn.elu)
130 |     return conv
131 | 
132 | def disp_net_res50(tgt_image, is_training=True, reuse=False, get_feature=False):
133 |     H = tgt_image.get_shape()[1].value
134 |     W = tgt_image.get_shape()[2].value
135 |     with tf.variable_scope('depth_net_res50') as sc:
136 |         if reuse:
137 |             sc.reuse_variables()
138 |         end_points_collection = sc.original_name_scope + '_end_points'
139 |         with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
140 |                             activation_fn=tf.nn.elu,
141 |                             outputs_collections=end_points_collection):
142 |             # Encoder
143 |             conv1 = slim.conv2d(tgt_image, 64, [7, 7], stride=2)      # 1/2
144 |             pool1 = slim.max_pool2d(conv1, [3, 3], padding='SAME')    # 1/4
145 |             conv2 = resblock(pool1, 64, 3)                            # 1/8
146 |             conv3 = resblock(conv2, 128, 4)                           # 1/16
147 |             conv4 = resblock(conv3, 256, 6)                           # 1/32
148 |             conv5 = resblock(conv4, 512, 3)                           # 1/64
149 | 
150 |             # Decoder
151 |             upconv6 = upconv(conv5, 512, 3, 2)                        # 1/32
152 |             #upconv6 = slim.conv2d_transpose(conv5, 512, [3, 3], stride=2)
153 |             upconv6 = resize_like(upconv6, conv4)
154 |             concat6 = tf.concat([upconv6, conv4], 3)
155 |             iconv6  = slim.conv2d(concat6, 512, [3, 3], stride=1)
156 | 
157 |             upconv5 = upconv(iconv6, 256, 3, 2)                       # 1/16
158 |             #upconv5 = slim.conv2d_transpose(iconv6, 256, [3, 3], stride=2)
159 |             upconv5 = resize_like(upconv5, conv3)
160 |             concat5 = tf.concat([upconv5, conv3], 3)
161 |             iconv5  = slim.conv2d(concat5, 256, [3, 3], stride=1)
162 | 
163 |             upconv4 = upconv(iconv5, 128, 3, 2)                       # 1/8
164 |             #upconv4 = slim.conv2d_transpose(iconv5, 128, [3, 3], stride=2)
165 |             upconv4 = resize_like(upconv4, conv2)
166 |             concat4 = tf.concat([upconv4, conv2], 3)
167 |             iconv4  = slim.conv2d(concat4, 128, [3, 3], stride=1)
168 |             disp4   = DISP_SCALING * slim.conv2d(iconv4, 1, [3, 3], stride=1, 
169 |                 activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp4') + MIN_DISP
170 |             disp4_up = tf.image.resize_bilinear(disp4, [np.int(H/4), np.int(W/4)])
171 | 
172 |             upconv3  = upconv(iconv4, 64, 3, 2)                       # 1/4
173 |             #upconv3  = slim.conv2d_transpose(iconv4, 64, [3, 3], stride=2)
174 |             upconv3  = resize_like(upconv3, pool1)
175 |             disp4_up = resize_like(disp4_up, pool1)
176 |             concat3  = tf.concat([upconv3, disp4_up, pool1], 3)
177 |             iconv3   = slim.conv2d(concat3, 64, [3, 3], stride=1)
178 |             disp3    = DISP_SCALING * slim.conv2d(iconv3, 1, [3, 3], stride=1,
179 |                 activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp3') + MIN_DISP
180 |             disp3_up = tf.image.resize_bilinear(disp3, [np.int(H/2), np.int(W/2)])
181 | 
182 |             upconv2  = upconv(iconv3, 32, 3, 2)                       # 1/2
183 |             #upconv2  = slim.conv2d_transpose(iconv3, 32, [3, 3], stride=2)
184 |             upconv2  = resize_like(upconv2, conv1)
185 |             disp3_up = resize_like(disp3_up, conv1)
186 |             concat2  = tf.concat([upconv2, disp3_up, conv1], 3)
187 |             iconv2   = slim.conv2d(concat2, 32, [3, 3], stride=1)
188 |             disp2    = DISP_SCALING * slim.conv2d(iconv2, 1, [3, 3], stride=1,
189 |                 activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp2') + MIN_DISP
190 |             disp2_up = tf.image.resize_bilinear(disp2, [H, W])
191 | 
192 |             upconv1 = upconv(iconv2, 16, 3, 2)
193 |             #upconv1 = slim.conv2d_transpose(iconv2, 16, [3, 3], stride=2)
194 |             upconv1 = resize_like(upconv1, disp2_up)
195 |             concat1 = tf.concat([upconv1, disp2_up], 3)
196 |             iconv1  = slim.conv2d(concat1, 16, [3, 3], stride=1)
197 |             disp1   = DISP_SCALING * slim.conv2d(iconv1, 1, [3, 3], stride=1,
198 |                 activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp1') + MIN_DISP
199 | 
200 |             end_points = utils.convert_collection_to_dict(end_points_collection)
201 | 
202 |             if not get_feature:
203 |                 return [disp1, disp2, disp3, disp4], end_points
204 |             else:
205 |                 return [disp1, disp2, disp3, disp4], conv5, end_points
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/core/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | def gray2rgb(im, cmap='gray'):
  7 |     cmap = plt.get_cmap(cmap)
  8 |     rgba_img = cmap(im.astype(np.float32))
  9 |     rgb_img = np.delete(rgba_img, 3, 2)
 10 |     return rgb_img
 11 | 
 12 | def normalize_depth_for_display(depth, pc=95, crop_percent=0, normalizer=None, cmap='gray'):
 13 |     # convert to disparity
 14 |     depth = 1./(depth + 1e-6)
 15 |     if normalizer is not None:
 16 |         depth = depth/normalizer
 17 |     else:
 18 |         depth = depth/(np.percentile(depth, pc) + 1e-6)
 19 |     depth = np.clip(depth, 0, 1)
 20 |     depth = gray2rgb(depth, cmap=cmap)
 21 |     keep_H = int(depth.shape[0] * (1-crop_percent))
 22 |     depth = depth[:keep_H]
 23 |     depth = depth
 24 |     return depth
 25 | 
 26 | # Add inverse_pose flag
 27 | def euler2mat(z, y, x, inverse_pose=False):
 28 |   """Converts euler angles to rotation matrix
 29 |    TODO: remove the dimension for 'N' (deprecated for converting all source
 30 |          poses altogether)
 31 |    Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174
 32 |   Args:
 33 |       z: rotation angle along z axis (in radians) -- size = [B, N]
 34 |       y: rotation angle along y axis (in radians) -- size = [B, N]
 35 |       x: rotation angle along x axis (in radians) -- size = [B, N]
 36 |   Returns:
 37 |       Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3]
 38 |   """
 39 |   B = tf.shape(z)[0]
 40 |   N = 1
 41 |   z = tf.clip_by_value(z, -np.pi, np.pi)
 42 |   y = tf.clip_by_value(y, -np.pi, np.pi)
 43 |   x = tf.clip_by_value(x, -np.pi, np.pi)
 44 |   if inverse_pose:
 45 |     z = -z
 46 |     y = -y
 47 |     x = -x
 48 | 
 49 |   # Expand to B x N x 1 x 1
 50 |   z = tf.expand_dims(tf.expand_dims(z, -1), -1)
 51 |   y = tf.expand_dims(tf.expand_dims(y, -1), -1)
 52 |   x = tf.expand_dims(tf.expand_dims(x, -1), -1)
 53 | 
 54 |   zeros = tf.zeros([B, N, 1, 1])
 55 |   ones  = tf.ones([B, N, 1, 1])
 56 | 
 57 |   cosz = tf.cos(z)
 58 |   sinz = tf.sin(z)
 59 |   rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3)
 60 |   rotz_2 = tf.concat([sinz,  cosz, zeros], axis=3)
 61 |   rotz_3 = tf.concat([zeros, zeros, ones], axis=3)
 62 |   zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2)
 63 | 
 64 |   cosy = tf.cos(y)
 65 |   siny = tf.sin(y)
 66 |   roty_1 = tf.concat([cosy, zeros, siny], axis=3)
 67 |   roty_2 = tf.concat([zeros, ones, zeros], axis=3)
 68 |   roty_3 = tf.concat([-siny,zeros, cosy], axis=3)
 69 |   ymat = tf.concat([roty_1, roty_2, roty_3], axis=2)
 70 | 
 71 |   cosx = tf.cos(x)
 72 |   sinx = tf.sin(x)
 73 |   rotx_1 = tf.concat([ones, zeros, zeros], axis=3)
 74 |   rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3)
 75 |   rotx_3 = tf.concat([zeros, sinx, cosx], axis=3)
 76 |   xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2)
 77 | 
 78 |   rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat)
 79 |   return rotMat
 80 | 
 81 | # Add inverse_pose flag
 82 | def pose_vec2mat(vec, inverse_pose=False):
 83 |   """Converts 6DoF parameters to transformation matrix
 84 |   Args:
 85 |       vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
 86 |   Returns:
 87 |       A transformation matrix -- [B, 4, 4]
 88 |   """
 89 |   batch_size, _ = vec.get_shape().as_list()
 90 |   translation = tf.slice(vec, [0, 0], [-1, 3])
 91 |   translation = tf.expand_dims(translation, -1)
 92 |   if inverse_pose:
 93 |     translation = -translation
 94 |   rx = tf.slice(vec, [0, 3], [-1, 1])
 95 |   ry = tf.slice(vec, [0, 4], [-1, 1])
 96 |   rz = tf.slice(vec, [0, 5], [-1, 1])
 97 |   rot_mat = euler2mat(rz, ry, rx, inverse_pose)
 98 |   rot_mat = tf.squeeze(rot_mat, axis=[1])
 99 |   filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
100 |   filler = tf.tile(filler, [batch_size, 1, 1])
101 |   transform_mat = tf.concat([rot_mat, translation], axis=2)
102 |   transform_mat = tf.concat([transform_mat, filler], axis=1)
103 |   return transform_mat
104 | 
105 | def pixel2cam(depth, pixel_coords, intrinsics, is_homogeneous=True):
106 |   """Transforms coordinates in the pixel frame to the camera frame.
107 | 
108 |   Args:
109 |     depth: [batch, height, width]
110 |     pixel_coords: homogeneous pixel coordinates [batch, 3, height, width]
111 |     intrinsics: camera intrinsics [batch, 3, 3]
112 |     is_homogeneous: return in homogeneous coordinates
113 |   Returns:
114 |     Coords in the camera frame [batch, 3 (4 if homogeneous), height, width]
115 |   """
116 |   batch, height, width = depth.get_shape().as_list()
117 |   depth = tf.reshape(depth, [batch, 1, -1])
118 |   pixel_coords = tf.reshape(pixel_coords, [batch, 3, -1])
119 |   cam_coords = tf.matmul(tf.matrix_inverse(intrinsics), pixel_coords) * depth
120 |   if is_homogeneous:
121 |     ones = tf.ones([batch, 1, height*width])
122 |     cam_coords = tf.concat([cam_coords, ones], axis=1)
123 |   cam_coords = tf.reshape(cam_coords, [batch, -1, height, width])
124 |   return cam_coords
125 | 
126 | def cam2pixel(cam_coords, proj):
127 |   """Transforms coordinates in a camera frame to the pixel frame.
128 | 
129 |   Args:
130 |     cam_coords: [batch, 4, height, width]
131 |     proj: [batch, 4, 4]
132 |   Returns:
133 |     Pixel coordinates projected from the camera frame [batch, height, width, 2]
134 |   """
135 |   batch, _, height, width = cam_coords.get_shape().as_list()
136 |   cam_coords = tf.reshape(cam_coords, [batch, 4, -1])
137 |   unnormalized_pixel_coords = tf.matmul(proj, cam_coords)
138 |   x_u = tf.slice(unnormalized_pixel_coords, [0, 0, 0], [-1, 1, -1])
139 |   y_u = tf.slice(unnormalized_pixel_coords, [0, 1, 0], [-1, 1, -1])
140 |   z_u = tf.slice(unnormalized_pixel_coords, [0, 2, 0], [-1, 1, -1])
141 |   x_n = x_u / (z_u + 1e-10)
142 |   y_n = y_u / (z_u + 1e-10)
143 |   pixel_coords = tf.concat([x_n, y_n], axis=1)
144 |   pixel_coords = tf.reshape(pixel_coords, [batch, 2, height, width])
145 |   return tf.transpose(pixel_coords, perm=[0, 2, 3, 1])
146 | 
147 | def meshgrid(batch, height, width, is_homogeneous=True):
148 |   """Construct a 2D meshgrid.
149 | 
150 |   Args:
151 |     batch: batch size
152 |     height: height of the grid
153 |     width: width of the grid
154 |     is_homogeneous: whether to return in homogeneous coordinates
155 |   Returns:
156 |     x,y grid coordinates [batch, 2 (3 if homogeneous), height, width]
157 |   """
158 |   x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
159 |                   tf.transpose(tf.expand_dims(
160 |                       tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
161 |   y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
162 |                   tf.ones(shape=tf.stack([1, width])))
163 |   x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32)
164 |   y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32)
165 |   if is_homogeneous:
166 |     ones = tf.ones_like(x_t)
167 |     coords = tf.stack([x_t, y_t, ones], axis=0)
168 |   else:
169 |     coords = tf.stack([x_t, y_t], axis=0)
170 |   coords = tf.tile(tf.expand_dims(coords, 0), [batch, 1, 1, 1])
171 |   return coords
172 | 
173 | # Add inverse_pose flag
174 | def projective_inverse_warp(img, depth, pose, intrinsics, inverse_pose=False):
175 |   """Inverse warp a source image to the target image plane based on projection.
176 | 
177 |   Args:
178 |     img: the source image [batch, height_s, width_s, 3]
179 |     depth: depth map of the target image [batch, height_t, width_t]
180 |     pose: target to source camera transformation matrix [batch, 6], in the
181 |           order of tx, ty, tz, rx, ry, rz
182 |     intrinsics: camera intrinsics [batch, 3, 3]
183 |   Returns:
184 |     Source image inverse warped to the target image plane [batch, height_t,
185 |     width_t, 3]
186 |   """
187 |   batch, height, width, _ = img.get_shape().as_list()
188 |   # Convert pose vector to matrix
189 |   pose = pose_vec2mat(pose, inverse_pose)
190 |   # Construct pixel grid coordinates
191 |   pixel_coords = meshgrid(batch, height, width)
192 |   # Convert pixel coordinates to the camera frame
193 |   cam_coords = pixel2cam(depth, pixel_coords, intrinsics)
194 |   # Construct a 4x4 intrinsic matrix (TODO: can it be 3x4?)
195 |   filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
196 |   filler = tf.tile(filler, [batch, 1, 1])
197 |   intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2)
198 |   intrinsics = tf.concat([intrinsics, filler], axis=1)
199 |   # Get a 4x4 transformation matrix from 'target' camera frame to 'source'
200 |   # pixel frame.
201 |   proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose)
202 |   src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel)
203 |   output_img = bilinear_sampler(img, src_pixel_coords)
204 |   return output_img, src_pixel_coords
205 | 
206 | # (Yuliang) Inverse warp with flow
207 | def flow_inverse_warp(img, flow):
208 |   batch, height, width, _ = img.get_shape().as_list()
209 |   x_base = tf.range(width)
210 |   y_base = tf.range(height)
211 |   x_base = tf.stack([x_base]*height, axis=0)
212 |   y_base = tf.transpose(tf.stack([y_base]*width, axis=0))
213 |   flow0 = flow[:, :, :, 0]
214 |   flow1 = flow[:, :, :, 1]
215 |   flow0 = flow0 + tf.cast(x_base, tf.float32)
216 |   flow1 = flow1 + tf.cast(y_base, tf.float32)
217 |   coords = tf.stack([flow0, flow1], axis=-1)
218 |   output_img = bilinear_sampler(img, coords)
219 |   return output_img
220 | 
221 | def depth_pose_inverse_warp(img, depth, pose, intrinsics):
222 |   batch, height, width, _ = img.get_shape().as_list()
223 |   pixel_coords = meshgrid(batch, height, width)
224 |   cam_coords = pixel2cam(depth, pixel_coords, intrinsics)
225 |   filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
226 |   filler = tf.tile(filler, [batch, 1, 1])
227 |   intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2)
228 |   intrinsics = tf.concat([intrinsics, filler], axis=1)
229 |   proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose)
230 |   src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel)
231 |   output_img = bilinear_sampler(img, src_pixel_coords)
232 |   return output_img, src_pixel_coords
233 | 
234 | def bilinear_sampler(imgs, coords):
235 |   """Construct a new image by bilinear sampling from the input image.
236 | 
237 |   Points falling outside the source image boundary have value 0.
238 | 
239 |   Args:
240 |     imgs: source image to be sampled from [batch, height_s, width_s, channels]
241 |     coords: coordinates of source pixels to sample from [batch, height_t,
242 |       width_t, 2]. height_t/width_t correspond to the dimensions of the output
243 |       image (don't need to be the same as height_s/width_s). The two channels
244 |       correspond to x and y coordinates respectively.
245 |   Returns:
246 |     A new sampled image [batch, height_t, width_t, channels]
247 |   """
248 |   def _repeat(x, n_repeats):
249 |     rep = tf.transpose(
250 |         tf.expand_dims(tf.ones(shape=tf.stack([
251 |             n_repeats,
252 |         ])), 1), [1, 0])
253 |     rep = tf.cast(rep, 'float32')
254 |     x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
255 |     return tf.reshape(x, [-1])
256 | 
257 |   with tf.name_scope('image_sampling'):
258 |     coords_x, coords_y = tf.split(coords, [1, 1], axis=3)
259 |     inp_size = imgs.get_shape()
260 |     coord_size = coords.get_shape()
261 |     out_size = coords.get_shape().as_list()
262 |     out_size[3] = imgs.get_shape().as_list()[3]
263 | 
264 |     coords_x = tf.cast(coords_x, 'float32')
265 |     coords_y = tf.cast(coords_y, 'float32')
266 | 
267 |     x0 = tf.floor(coords_x)
268 |     x1 = x0 + 1
269 |     y0 = tf.floor(coords_y)
270 |     y1 = y0 + 1
271 | 
272 |     y_max = tf.cast(tf.shape(imgs)[1] - 1, 'float32')
273 |     x_max = tf.cast(tf.shape(imgs)[2] - 1, 'float32')
274 |     zero = tf.zeros([1], dtype='float32')
275 | 
276 |     x0_safe = tf.clip_by_value(x0, zero, x_max)
277 |     y0_safe = tf.clip_by_value(y0, zero, y_max)
278 |     x1_safe = tf.clip_by_value(x1, zero, x_max)
279 |     y1_safe = tf.clip_by_value(y1, zero, y_max)
280 | 
281 |     wt_x0 = x1_safe - coords_x
282 |     wt_x1 = coords_x - x0_safe
283 |     wt_y0 = y1_safe - coords_y
284 |     wt_y1 = coords_y - y0_safe
285 | 
286 |     ## indices in the flat image to sample from
287 |     dim2 = tf.cast(inp_size[2], 'float32')
288 |     dim1 = tf.cast(inp_size[2] * inp_size[1], 'float32')
289 |     base = tf.reshape(
290 |         _repeat(
291 |             tf.cast(tf.range(coord_size[0]), 'float32') * dim1,
292 |             coord_size[1] * coord_size[2]),
293 |         [out_size[0], out_size[1], out_size[2], 1])
294 | 
295 |     base_y0 = base + y0_safe * dim2
296 |     base_y1 = base + y1_safe * dim2
297 |     idx00 = tf.reshape(x0_safe + base_y0, [-1])
298 |     idx01 = x0_safe + base_y1
299 |     idx10 = x1_safe + base_y0
300 |     idx11 = x1_safe + base_y1
301 | 
302 |     ## sample from imgs
303 |     imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]]))
304 |     imgs_flat = tf.cast(imgs_flat, 'float32')
305 |     im00 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx00, 'int32')), out_size)
306 |     im01 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx01, 'int32')), out_size)
307 |     im10 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx10, 'int32')), out_size)
308 |     im11 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx11, 'int32')), out_size)
309 | 
310 |     w00 = wt_x0 * wt_y0
311 |     w01 = wt_x0 * wt_y1
312 |     w10 = wt_x1 * wt_y0
313 |     w11 = wt_x1 * wt_y1
314 | 
315 |     output = tf.add_n([
316 |         w00 * im00, w01 * im01,
317 |         w10 * im10, w11 * im11
318 |     ])
319 |     return output
320 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/data/__init__.py


--------------------------------------------------------------------------------
/data/kitti/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/data/kitti/__init__.py


--------------------------------------------------------------------------------
/data/kitti/kitti_odom_loader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | from glob import glob
  4 | import os
  5 | import scipy.misc
  6 | # import sys
  7 | # sys.path.append('../../')
  8 | # from utils.misc import *
  9 | 
 10 | class kitti_odom_loader(object):
 11 |     def __init__(self,
 12 |                  dataset_dir,
 13 |                  img_height=128,
 14 |                  img_width=416,
 15 |                  seq_length=5):
 16 |         self.dataset_dir = dataset_dir
 17 |         self.img_height = img_height
 18 |         self.img_width = img_width
 19 |         self.seq_length = seq_length
 20 |         self.train_seqs = [0, 1, 2, 3, 4, 5, 6, 7, 8]
 21 |         self.test_seqs = [9, 10]
 22 | 
 23 |         self.collect_test_frames()
 24 |         self.collect_train_frames()
 25 | 
 26 |     def collect_test_frames(self):
 27 |         self.test_frames = []
 28 |         for seq in self.test_seqs:
 29 |             seq_dir = os.path.join(self.dataset_dir, 'sequences', '%.2d' % seq)
 30 |             img_dir = os.path.join(seq_dir, 'image_2')
 31 |             N = len(glob(img_dir + '/*.png'))
 32 |             for n in range(N):
 33 |                 self.test_frames.append('%.2d %.6d' % (seq, n))
 34 |         self.num_test = len(self.test_frames)
 35 |         
 36 |     def collect_train_frames(self):
 37 |         self.train_frames = []
 38 |         for seq in self.train_seqs:
 39 |             seq_dir = os.path.join(self.dataset_dir, 'sequences', '%.2d' % seq)
 40 |             img_dir = os.path.join(seq_dir, 'image_2')
 41 |             N = len(glob(img_dir + '/*.png'))
 42 |             for n in range(N):
 43 |                 self.train_frames.append('%.2d %.6d' % (seq, n))
 44 |         self.num_train = len(self.train_frames)
 45 |     
 46 |     def is_valid_sample(self, frames, tgt_idx):
 47 |         N = len(frames)
 48 |         tgt_drive, _ = frames[tgt_idx].split(' ')
 49 |         if self.seq_length % 2 != 0:
 50 |             half_offset = int((self.seq_length - 1)/2)
 51 |             min_src_idx = tgt_idx - half_offset
 52 |             max_src_idx = tgt_idx + half_offset
 53 |             if min_src_idx < 0 or max_src_idx >= N:
 54 |                 return False
 55 |             min_src_drive, _ = frames[min_src_idx].split(' ')
 56 |             max_src_drive, _ = frames[max_src_idx].split(' ')
 57 |             if tgt_drive == min_src_drive and tgt_drive == max_src_drive:
 58 |                 return True
 59 |             return False
 60 |         else:
 61 |             left_offset = int(self.seq_length / 2 - 1)
 62 |             right_offset = int(self.seq_length / 2)
 63 |             min_src_idx = tgt_idx - left_offset
 64 |             max_src_idx = tgt_idx + right_offset
 65 |             if min_src_idx < 0 or max_src_idx >= N:
 66 |                 return False
 67 |             min_src_drive, _ = frames[min_src_idx].split(' ')
 68 |             max_src_drive, _ = frames[max_src_idx].split(' ') 
 69 |             if tgt_drive == min_src_drive and tgt_drive == max_src_drive:
 70 |                 return True
 71 |             return False
 72 | 
 73 |     def load_image_sequence(self, frames, tgt_idx, seq_length):
 74 |         if seq_length % 2 != 0:
 75 |             half_offset = int((seq_length - 1)/2)
 76 |             image_seq = []
 77 |             for o in range(-half_offset, half_offset+1):
 78 |                 curr_idx = tgt_idx + o
 79 |                 curr_drive, curr_frame_id = frames[curr_idx].split(' ')
 80 |                 curr_img = self.load_image(curr_drive, curr_frame_id)
 81 |                 if o == 0:
 82 |                     zoom_y = self.img_height/curr_img.shape[0]
 83 |                     zoom_x = self.img_width/curr_img.shape[1]
 84 |                 curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
 85 |                 image_seq.append(curr_img)
 86 |             return image_seq, zoom_x, zoom_y
 87 |         else:
 88 |             left_offset = int(seq_length / 2 - 1)
 89 |             right_offset = int(seq_length / 2)
 90 |             image_seq = []
 91 |             for o in range(-left_offset, right_offset + 1):
 92 |                 curr_idx = tgt_idx + o
 93 |                 curr_drive, curr_frame_id = frames[curr_idx].split(' ')
 94 |                 curr_img = self.load_image(curr_drive, curr_frame_id)
 95 |                 if o == 0:
 96 |                     zoom_y = self.img_height/curr_img.shape[0]
 97 |                     zoom_x = self.img_width/curr_img.shape[1]
 98 |                 curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
 99 |                 image_seq.append(curr_img)
100 |             return image_seq, zoom_x, zoom_y
101 | 
102 |     def load_example(self, frames, tgt_idx, load_pose=False):
103 |         image_seq, zoom_x, zoom_y = self.load_image_sequence(frames, tgt_idx, self.seq_length)
104 |         tgt_drive, tgt_frame_id = frames[tgt_idx].split(' ')
105 |         intrinsics = self.load_intrinsics(tgt_drive, tgt_frame_id)
106 |         intrinsics = self.scale_intrinsics(intrinsics, zoom_x, zoom_y)        
107 |         example = {}
108 |         example['intrinsics'] = intrinsics
109 |         example['image_seq'] = image_seq
110 |         example['folder_name'] = tgt_drive
111 |         example['file_name'] = tgt_frame_id
112 |         if load_pose:
113 |             pass
114 |         return example
115 | 
116 |     def get_train_example_with_idx(self, tgt_idx):
117 |         if not self.is_valid_sample(self.train_frames, tgt_idx):
118 |             return False
119 |         example = self.load_example(self.train_frames, tgt_idx)
120 |         return example
121 | 
122 |     # def load_frame(self, drive, frame_id):
123 |     #     img = self.load_image(drive, frame_id)
124 |     #     try:
125 |     #         scale_x = np.float(self.img_width)/img.shape[1]
126 |     #     except:
127 |     #         print("KITTI loading error!")
128 |     #         print("Drive = ", drive)
129 |     #         print("frame_id = ", frame_id)
130 |     #         raise
131 |     #     scale_y = np.float(self.img_height)/img.shape[0]
132 |     #     intrinsics = self.load_intrinsics(drive, frame_id)
133 |     #     intrinsics = self.scale_intrinsics(intrinsics, scale_x, scale_y)
134 |     #     img = self.crop_resize(img)
135 |     #     return img, intrinsics
136 | 
137 |     def load_image(self, drive, frame_id):
138 |         img_file = os.path.join(self.dataset_dir, 'sequences', '%s/image_2/%s.png' % (drive, frame_id))
139 |         img = scipy.misc.imread(img_file)
140 |         return img
141 | 
142 |     def load_intrinsics(self, drive, frame_id):
143 |         calib_file = os.path.join(self.dataset_dir, 'sequences', '%s/calib.txt' % drive)
144 |         proj_c2p, _ = self.read_calib_file(calib_file)
145 |         intrinsics = proj_c2p[:3, :3]
146 |         return intrinsics
147 | 
148 |     # def load_gt_odom(self, drive, tgt_idx, src_idx):
149 |     #     pose_file = os.path.join(self.dataset_dir, 'poses', '%s.txt' % drive)
150 |     #     with open(pose_file, 'r') as f:
151 |     #         poses = f.readlines()
152 |     #     filler = np.array([0, 0, 0, 1]).reshape((1,4))
153 |     #     tgt_pose = np.array(poses[int(tgt_idx)][:-1].split(' ')).astype(np.float32).reshape(3,4)
154 |     #     tgt_pose = np.concatenate((tgt_pose, filler), axis=0)
155 |     #     src_pose = np.array(poses[int(src_idx)][:-1].split(' ')).astype(np.float32).reshape(3,4)
156 |     #     src_pose = np.concatenate((src_pose, filler), axis=0)
157 |     #     rel_pose = np.dot(np.linalg.inv(src_pose), tgt_pose)
158 |     #     rel_6DOF = pose_mat_to_6dof(rel_pose)
159 |     #     return rel_6DOF
160 | 
161 |     def read_calib_file(self, filepath, cid=2):
162 |         """Read in a calibration file and parse into a dictionary."""
163 |         with open(filepath, 'r') as f:
164 |             C = f.readlines()
165 |         def parseLine(L, shape):
166 |             data = L.split()
167 |             data = np.array(data[1:]).reshape(shape).astype(np.float32)
168 |             return data
169 |         proj_c2p = parseLine(C[cid], shape=(3,4))
170 |         proj_v2c = parseLine(C[-1], shape=(3,4))
171 |         filler = np.array([0, 0, 0, 1]).reshape((1,4))
172 |         proj_v2c = np.concatenate((proj_v2c, filler), axis=0)
173 |         return proj_c2p, proj_v2c
174 | 
175 |     def scale_intrinsics(self,mat, sx, sy):
176 |         out = np.copy(mat)
177 |         out[0,0] *= sx
178 |         out[0,2] *= sx
179 |         out[1,1] *= sy
180 |         out[1,2] *= sy
181 |         return out
182 | 
183 | 
184 | 


--------------------------------------------------------------------------------
/data/kitti/kitti_raw_loader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | from glob import glob
  4 | import os
  5 | import scipy.misc
  6 | 
  7 | class kitti_raw_loader(object):
  8 |     def __init__(self, 
  9 |                  dataset_dir,
 10 |                  split,
 11 |                  img_height=256,
 12 |                  img_width=256,
 13 |                  seq_length=5):
 14 |         dir_path = os.path.dirname(os.path.realpath(__file__))
 15 |         static_frames_file = dir_path + '/static_frames.txt'
 16 |         excluded_frames_file = dir_path + '/excluded_frames.txt'
 17 |         test_scene_file = dir_path + '/test_scenes_' + split + '.txt'
 18 |         with open(test_scene_file, 'r') as f:
 19 |             test_scenes = f.readlines()
 20 |         self.test_scenes = [t[:-1] for t in test_scenes]
 21 |         self.dataset_dir = dataset_dir
 22 |         self.img_height = img_height
 23 |         self.img_width = img_width
 24 |         self.seq_length = seq_length
 25 |         self.cam_ids = ['02', '03']
 26 |         self.date_list = ['2011_09_26', '2011_09_28', '2011_09_29', 
 27 |                           '2011_09_30', '2011_10_03']
 28 |         self.collect_static_frames(static_frames_file)
 29 |         self.collect_excluded_frames(excluded_frames_file)
 30 |         self.collect_train_frames()
 31 | 
 32 |     def collect_static_frames(self, static_frames_file):
 33 |         with open(static_frames_file, 'r') as f:
 34 |             frames = f.readlines()
 35 |         self.static_frames = []
 36 |         for fr in frames:
 37 |             if fr == '\n':
 38 |                 continue
 39 |             date, drive, frame_id = fr.split(' ')
 40 |             curr_fid = '%.10d' % (np.int(frame_id[:-1]))
 41 |             for cid in self.cam_ids:
 42 |                 self.static_frames.append(drive + ' ' + cid + ' ' + curr_fid)
 43 | 
 44 |     # Exclude KITTI flow frames
 45 |     def collect_excluded_frames(self, excluded_frames_file):
 46 |         with open(excluded_frames_file, 'r') as f:
 47 |             frames = f.readlines()
 48 |         self.excluded_frames = []
 49 |         for fr in frames:
 50 |             if fr == '\n':
 51 |                 continue
 52 |             date, drive, frame_id = fr.split(' ')
 53 |             curr_fid = '%.10d' % (np.int(frame_id[:-1]))
 54 |             for cid in self.cam_ids:
 55 |                 self.excluded_frames.append(drive + ' ' + cid + ' ' + curr_fid)
 56 |         
 57 |     def collect_train_frames(self):
 58 |         all_frames = []
 59 |         for date in self.date_list:
 60 |             drive_set = os.listdir(self.dataset_dir + date + '/')
 61 |             for dr in drive_set:
 62 |                 drive_dir = os.path.join(self.dataset_dir, date, dr)
 63 |                 if os.path.isdir(drive_dir):
 64 |                     if dr[:-5] in self.test_scenes:
 65 |                         continue
 66 |                     for cam in self.cam_ids:
 67 |                         img_dir = os.path.join(drive_dir, 'image_' + cam, 'data')
 68 |                         N = len(glob(img_dir + '/*.png'))
 69 |                         for n in range(N):
 70 |                             frame_id = '%.10d' % n
 71 |                             all_frames.append(dr + ' ' + cam + ' ' + frame_id)
 72 |                         
 73 | 
 74 |         for s in self.static_frames:
 75 |             try:
 76 |                 all_frames.remove(s)
 77 |                 # print('removed static frame from training: %s' % s)
 78 |             except:
 79 |                 pass
 80 | 
 81 |         for s in self.excluded_frames:
 82 |             try:
 83 |                 all_frames.remove(s)
 84 |             except:
 85 |                 pass
 86 | 
 87 |         self.train_frames = all_frames
 88 |         self.num_train = len(self.train_frames)
 89 | 
 90 |     def is_valid_sample(self, frames, tgt_idx):
 91 |         N = len(frames)
 92 |         tgt_drive, cid, _ = frames[tgt_idx].split(' ')
 93 |         if self.seq_length % 2 != 0:
 94 |             half_offset = int((self.seq_length - 1)/2)
 95 |             min_src_idx = tgt_idx - half_offset
 96 |             max_src_idx = tgt_idx + half_offset
 97 |             if min_src_idx < 0 or max_src_idx >= N:
 98 |                 return False
 99 |             min_src_drive, min_src_cid, _ = frames[min_src_idx].split(' ')
100 |             max_src_drive, max_src_cid, _ = frames[max_src_idx].split(' ')
101 |             if tgt_drive == min_src_drive and tgt_drive == max_src_drive and cid == min_src_cid and cid == max_src_cid:
102 |                 return True
103 |             return False
104 |         else:
105 |             left_offset = int(self.seq_length / 2 - 1)
106 |             right_offset = int(self.seq_length / 2)
107 |             min_src_idx = tgt_idx - left_offset
108 |             max_src_idx = tgt_idx + right_offset
109 |             if min_src_idx < 0 or max_src_idx >= N:
110 |                 return False
111 |             min_src_drive, min_src_cid, _ = frames[min_src_idx].split(' ')
112 |             max_src_drive, max_src_cid, _ = frames[max_src_idx].split(' ')
113 |             if tgt_drive == min_src_drive and tgt_drive == max_src_drive and cid == min_src_cid and cid == max_src_cid:
114 |                 return True
115 |             return False
116 | 
117 |     def get_train_example_with_idx(self, tgt_idx):
118 |         if not self.is_valid_sample(self.train_frames, tgt_idx):
119 |             return False
120 |         example = self.load_example(self.train_frames, tgt_idx)
121 |         return example
122 | 
123 |     def load_image_sequence(self, frames, tgt_idx, seq_length):
124 |         if seq_length % 2 != 0:
125 |             half_offset = int((seq_length - 1)/2)
126 |             image_seq = []
127 |             for o in range(-half_offset, half_offset + 1):
128 |                 curr_idx = tgt_idx + o
129 |                 curr_drive, curr_cid, curr_frame_id = frames[curr_idx].split(' ')
130 |                 curr_img = self.load_image_raw(curr_drive, curr_cid, curr_frame_id)
131 |                 if o == 0:
132 |                     zoom_y = self.img_height/curr_img.shape[0]
133 |                     zoom_x = self.img_width/curr_img.shape[1]
134 |                 curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
135 |                 image_seq.append(curr_img)
136 |             return image_seq, zoom_x, zoom_y
137 |         else:
138 |             left_offset = int(seq_length / 2 - 1)
139 |             right_offset = int(seq_length / 2)
140 |             image_seq = []
141 |             for o in range(-left_offset, right_offset + 1):
142 |                 curr_idx = tgt_idx + o
143 |                 curr_drive, curr_cid, curr_frame_id = frames[curr_idx].split(' ')
144 |                 curr_img = self.load_image_raw(curr_drive, curr_cid, curr_frame_id)
145 |                 if o == 0:
146 |                     zoom_y = self.img_height/curr_img.shape[0]
147 |                     zoom_x = self.img_width/curr_img.shape[1]
148 |                 curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
149 |                 image_seq.append(curr_img)
150 |             return image_seq, zoom_x, zoom_y
151 | 
152 |     def load_example(self, frames, tgt_idx):
153 |         image_seq, zoom_x, zoom_y = self.load_image_sequence(frames, tgt_idx, self.seq_length)
154 |         tgt_drive, tgt_cid, tgt_frame_id = frames[tgt_idx].split(' ')
155 |         intrinsics = self.load_intrinsics_raw(tgt_drive, tgt_cid, tgt_frame_id)
156 |         intrinsics = self.scale_intrinsics(intrinsics, zoom_x, zoom_y)
157 |         example = {}
158 |         example['intrinsics'] = intrinsics
159 |         example['image_seq'] = image_seq
160 |         example['folder_name'] = tgt_drive + '_' + tgt_cid + '/'
161 |         example['file_name'] = tgt_frame_id
162 |         return example
163 | 
164 |     def load_image_raw(self, drive, cid, frame_id):
165 |         date = drive[:10]
166 |         img_file = os.path.join(self.dataset_dir, date, drive, 'image_' + cid, 'data', frame_id + '.png')
167 |         img = scipy.misc.imread(img_file)
168 |         return img
169 | 
170 |     def load_intrinsics_raw(self, drive, cid, frame_id):
171 |         date = drive[:10]
172 |         calib_file = os.path.join(self.dataset_dir, date, 'calib_cam_to_cam.txt')
173 | 
174 |         filedata = self.read_raw_calib_file(calib_file)
175 |         P_rect = np.reshape(filedata['P_rect_' + cid], (3, 4))
176 |         intrinsics = P_rect[:3, :3]
177 |         return intrinsics
178 | 
179 |     def read_raw_calib_file(self,filepath):
180 |         # From https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
181 |         """Read in a calibration file and parse into a dictionary."""
182 |         data = {}
183 | 
184 |         with open(filepath, 'r') as f:
185 |             for line in f.readlines():
186 |                 key, value = line.split(':', 1)
187 |                 # The only non-float values in these files are dates, which
188 |                 # we don't care about anyway
189 |                 try:
190 |                         data[key] = np.array([float(x) for x in value.split()])
191 |                 except ValueError:
192 |                         pass
193 |         return data
194 | 
195 |     def scale_intrinsics(self, mat, sx, sy):
196 |         out = np.copy(mat)
197 |         out[0,0] *= sx
198 |         out[0,2] *= sx
199 |         out[1,1] *= sy
200 |         out[1,2] *= sy
201 |         return out
202 | 
203 | 
204 | 


--------------------------------------------------------------------------------
/data/kitti/test_files_stereo.txt:
--------------------------------------------------------------------------------
  1 | training/image_2/000000_10.png
  2 | training/image_2/000001_10.png
  3 | training/image_2/000002_10.png
  4 | training/image_2/000003_10.png
  5 | training/image_2/000004_10.png
  6 | training/image_2/000005_10.png
  7 | training/image_2/000006_10.png
  8 | training/image_2/000007_10.png
  9 | training/image_2/000008_10.png
 10 | training/image_2/000009_10.png
 11 | training/image_2/000010_10.png
 12 | training/image_2/000011_10.png
 13 | training/image_2/000012_10.png
 14 | training/image_2/000013_10.png
 15 | training/image_2/000014_10.png
 16 | training/image_2/000015_10.png
 17 | training/image_2/000016_10.png
 18 | training/image_2/000017_10.png
 19 | training/image_2/000018_10.png
 20 | training/image_2/000019_10.png
 21 | training/image_2/000020_10.png
 22 | training/image_2/000021_10.png
 23 | training/image_2/000022_10.png
 24 | training/image_2/000023_10.png
 25 | training/image_2/000024_10.png
 26 | training/image_2/000025_10.png
 27 | training/image_2/000026_10.png
 28 | training/image_2/000027_10.png
 29 | training/image_2/000028_10.png
 30 | training/image_2/000029_10.png
 31 | training/image_2/000030_10.png
 32 | training/image_2/000031_10.png
 33 | training/image_2/000032_10.png
 34 | training/image_2/000033_10.png
 35 | training/image_2/000034_10.png
 36 | training/image_2/000035_10.png
 37 | training/image_2/000036_10.png
 38 | training/image_2/000037_10.png
 39 | training/image_2/000038_10.png
 40 | training/image_2/000039_10.png
 41 | training/image_2/000040_10.png
 42 | training/image_2/000041_10.png
 43 | training/image_2/000042_10.png
 44 | training/image_2/000043_10.png
 45 | training/image_2/000044_10.png
 46 | training/image_2/000045_10.png
 47 | training/image_2/000046_10.png
 48 | training/image_2/000047_10.png
 49 | training/image_2/000048_10.png
 50 | training/image_2/000049_10.png
 51 | training/image_2/000050_10.png
 52 | training/image_2/000051_10.png
 53 | training/image_2/000052_10.png
 54 | training/image_2/000053_10.png
 55 | training/image_2/000054_10.png
 56 | training/image_2/000055_10.png
 57 | training/image_2/000056_10.png
 58 | training/image_2/000057_10.png
 59 | training/image_2/000058_10.png
 60 | training/image_2/000059_10.png
 61 | training/image_2/000060_10.png
 62 | training/image_2/000061_10.png
 63 | training/image_2/000062_10.png
 64 | training/image_2/000063_10.png
 65 | training/image_2/000064_10.png
 66 | training/image_2/000065_10.png
 67 | training/image_2/000066_10.png
 68 | training/image_2/000067_10.png
 69 | training/image_2/000068_10.png
 70 | training/image_2/000069_10.png
 71 | training/image_2/000070_10.png
 72 | training/image_2/000071_10.png
 73 | training/image_2/000072_10.png
 74 | training/image_2/000073_10.png
 75 | training/image_2/000074_10.png
 76 | training/image_2/000075_10.png
 77 | training/image_2/000076_10.png
 78 | training/image_2/000077_10.png
 79 | training/image_2/000078_10.png
 80 | training/image_2/000079_10.png
 81 | training/image_2/000080_10.png
 82 | training/image_2/000081_10.png
 83 | training/image_2/000082_10.png
 84 | training/image_2/000083_10.png
 85 | training/image_2/000084_10.png
 86 | training/image_2/000085_10.png
 87 | training/image_2/000086_10.png
 88 | training/image_2/000087_10.png
 89 | training/image_2/000088_10.png
 90 | training/image_2/000089_10.png
 91 | training/image_2/000090_10.png
 92 | training/image_2/000091_10.png
 93 | training/image_2/000092_10.png
 94 | training/image_2/000093_10.png
 95 | training/image_2/000094_10.png
 96 | training/image_2/000095_10.png
 97 | training/image_2/000096_10.png
 98 | training/image_2/000097_10.png
 99 | training/image_2/000098_10.png
100 | training/image_2/000099_10.png
101 | training/image_2/000100_10.png
102 | training/image_2/000101_10.png
103 | training/image_2/000102_10.png
104 | training/image_2/000103_10.png
105 | training/image_2/000104_10.png
106 | training/image_2/000105_10.png
107 | training/image_2/000106_10.png
108 | training/image_2/000107_10.png
109 | training/image_2/000108_10.png
110 | training/image_2/000109_10.png
111 | training/image_2/000110_10.png
112 | training/image_2/000111_10.png
113 | training/image_2/000112_10.png
114 | training/image_2/000113_10.png
115 | training/image_2/000114_10.png
116 | training/image_2/000115_10.png
117 | training/image_2/000116_10.png
118 | training/image_2/000117_10.png
119 | training/image_2/000118_10.png
120 | training/image_2/000119_10.png
121 | training/image_2/000120_10.png
122 | training/image_2/000121_10.png
123 | training/image_2/000122_10.png
124 | training/image_2/000123_10.png
125 | training/image_2/000124_10.png
126 | training/image_2/000125_10.png
127 | training/image_2/000126_10.png
128 | training/image_2/000127_10.png
129 | training/image_2/000128_10.png
130 | training/image_2/000129_10.png
131 | training/image_2/000130_10.png
132 | training/image_2/000131_10.png
133 | training/image_2/000132_10.png
134 | training/image_2/000133_10.png
135 | training/image_2/000134_10.png
136 | training/image_2/000135_10.png
137 | training/image_2/000136_10.png
138 | training/image_2/000137_10.png
139 | training/image_2/000138_10.png
140 | training/image_2/000139_10.png
141 | training/image_2/000140_10.png
142 | training/image_2/000141_10.png
143 | training/image_2/000142_10.png
144 | training/image_2/000143_10.png
145 | training/image_2/000144_10.png
146 | training/image_2/000145_10.png
147 | training/image_2/000146_10.png
148 | training/image_2/000147_10.png
149 | training/image_2/000148_10.png
150 | training/image_2/000149_10.png
151 | training/image_2/000150_10.png
152 | training/image_2/000151_10.png
153 | training/image_2/000152_10.png
154 | training/image_2/000153_10.png
155 | training/image_2/000154_10.png
156 | training/image_2/000155_10.png
157 | training/image_2/000156_10.png
158 | training/image_2/000157_10.png
159 | training/image_2/000158_10.png
160 | training/image_2/000159_10.png
161 | training/image_2/000160_10.png
162 | training/image_2/000161_10.png
163 | training/image_2/000162_10.png
164 | training/image_2/000163_10.png
165 | training/image_2/000164_10.png
166 | training/image_2/000165_10.png
167 | training/image_2/000166_10.png
168 | training/image_2/000167_10.png
169 | training/image_2/000168_10.png
170 | training/image_2/000169_10.png
171 | training/image_2/000170_10.png
172 | training/image_2/000171_10.png
173 | training/image_2/000172_10.png
174 | training/image_2/000173_10.png
175 | training/image_2/000174_10.png
176 | training/image_2/000175_10.png
177 | training/image_2/000176_10.png
178 | training/image_2/000177_10.png
179 | training/image_2/000178_10.png
180 | training/image_2/000179_10.png
181 | training/image_2/000180_10.png
182 | training/image_2/000181_10.png
183 | training/image_2/000182_10.png
184 | training/image_2/000183_10.png
185 | training/image_2/000184_10.png
186 | training/image_2/000185_10.png
187 | training/image_2/000186_10.png
188 | training/image_2/000187_10.png
189 | training/image_2/000188_10.png
190 | training/image_2/000189_10.png
191 | training/image_2/000190_10.png
192 | training/image_2/000191_10.png
193 | training/image_2/000192_10.png
194 | training/image_2/000193_10.png
195 | training/image_2/000194_10.png
196 | training/image_2/000195_10.png
197 | training/image_2/000196_10.png
198 | training/image_2/000197_10.png
199 | training/image_2/000198_10.png
200 | training/image_2/000199_10.png
201 | 


--------------------------------------------------------------------------------
/data/kitti/test_scenes_eigen.txt:
--------------------------------------------------------------------------------
 1 | 2011_09_26_drive_0117
 2 | 2011_09_28_drive_0002
 3 | 2011_09_26_drive_0052
 4 | 2011_09_30_drive_0016
 5 | 2011_09_26_drive_0059
 6 | 2011_09_26_drive_0027
 7 | 2011_09_26_drive_0020
 8 | 2011_09_26_drive_0009
 9 | 2011_09_26_drive_0013
10 | 2011_09_26_drive_0101
11 | 2011_09_26_drive_0046
12 | 2011_09_26_drive_0029
13 | 2011_09_26_drive_0064
14 | 2011_09_26_drive_0048
15 | 2011_10_03_drive_0027
16 | 2011_09_26_drive_0002
17 | 2011_09_26_drive_0036
18 | 2011_09_29_drive_0071
19 | 2011_10_03_drive_0047
20 | 2011_09_30_drive_0027
21 | 2011_09_26_drive_0086
22 | 2011_09_26_drive_0084
23 | 2011_09_26_drive_0096
24 | 2011_09_30_drive_0018
25 | 2011_09_26_drive_0106
26 | 2011_09_26_drive_0056
27 | 2011_09_26_drive_0023
28 | 2011_09_26_drive_0093
29 | 


--------------------------------------------------------------------------------
/data/kitti/test_scenes_stereo.txt:
--------------------------------------------------------------------------------
 1 | 2011_09_26_drive_0005
 2 | 2011_09_26_drive_0009
 3 | 2011_09_26_drive_0011
 4 | 2011_09_26_drive_0013
 5 | 2011_09_26_drive_0014
 6 | 2011_09_26_drive_0015
 7 | 2011_09_26_drive_0017
 8 | 2011_09_26_drive_0018
 9 | 2011_09_26_drive_0019
10 | 2011_09_26_drive_0022
11 | 2011_09_26_drive_0027
12 | 2011_09_26_drive_0028
13 | 2011_09_26_drive_0029
14 | 2011_09_26_drive_0032
15 | 2011_09_26_drive_0036
16 | 2011_09_26_drive_0046
17 | 2011_09_26_drive_0051
18 | 2011_09_26_drive_0056
19 | 2011_09_26_drive_0057
20 | 2011_09_26_drive_0059
21 | 2011_09_26_drive_0070
22 | 2011_09_26_drive_0084
23 | 2011_09_26_drive_0096
24 | 2011_09_26_drive_0101
25 | 2011_09_26_drive_0104
26 | 2011_09_28_drive_0002
27 | 2011_09_29_drive_0004
28 | 2011_09_29_drive_0071
29 | 2011_10_03_drive_0047
30 | 


--------------------------------------------------------------------------------
/data/prepare_train_data.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import argparse
  3 | import scipy.misc
  4 | import numpy as np
  5 | from glob import glob
  6 | from joblib import Parallel, delayed
  7 | import os
  8 | 
  9 | parser = argparse.ArgumentParser()
 10 | parser.add_argument("--dataset_dir", type=str, default='./dataset/KITTI/raw/data/', help="where the dataset is stored")
 11 | parser.add_argument("--dataset_name", type=str, default="kitti_raw_eigen", choices=["kitti_raw_eigen", "kitti_odom"])
 12 | parser.add_argument("--dump_root", type=str, default='./dataset/dfnet_train/', help="Where to dump the data")
 13 | parser.add_argument("--seq_length", type=int, default=5, help="Length of each training sequence")
 14 | parser.add_argument("--img_height", type=int, default=320, help="image height")
 15 | parser.add_argument("--img_width", type=int, default=1152, help="image width")
 16 | parser.add_argument("--num_threads", type=int, default=4, help="number of threads to use")
 17 | args = parser.parse_args()
 18 | 
 19 | def concat_image_seq(seq):
 20 |     for i, im in enumerate(seq):
 21 |         if i == 0:
 22 |             res = im
 23 |         else:
 24 |             res = np.hstack((res, im))
 25 |     return res
 26 | 
 27 | def dump_example(n):
 28 |     if n % 2000 == 0:
 29 |         print('Progress %d/%d....' % (n, data_loader.num_train))
 30 |     example = data_loader.get_train_example_with_idx(n)
 31 |     if example == False:
 32 |         return
 33 |     image_seq = concat_image_seq(example['image_seq'])
 34 |     intrinsics = example['intrinsics']
 35 |     fx = intrinsics[0, 0]
 36 |     fy = intrinsics[1, 1]
 37 |     cx = intrinsics[0, 2]
 38 |     cy = intrinsics[1, 2]
 39 |     dump_dir = os.path.join(args.dump_root, example['folder_name'])
 40 |     # if not os.path.isdir(dump_dir):
 41 |     #     os.makedirs(dump_dir, exist_ok=True)
 42 |     try: 
 43 |         os.makedirs(dump_dir)
 44 |     except OSError:
 45 |         if not os.path.isdir(dump_dir):
 46 |             raise
 47 |     dump_img_file = os.path.join(dump_dir, '%s.jpg' % example['file_name'])
 48 |     scipy.misc.imsave(dump_img_file, image_seq.astype(np.uint8))
 49 |     dump_cam_file = os.path.join(dump_dir, '%s_cam.txt' % example['file_name'])
 50 |     with open(dump_cam_file, 'w') as f:
 51 |         f.write('%f,0.,%f,0.,%f,%f,0.,0.,1.' % (fx, cx, fy, cy))
 52 |     # Ground truth pose
 53 |     if 'pose_seq' in example.keys():
 54 |         pose_seq = example['pose_seq']
 55 |         dump_pose_file = os.path.join(dump_dir, '%s_pose.txt' % example['file_name'])
 56 |         tgt2src_6d, src2tgt_6d = pose_seq
 57 |         with open(dump_pose_file, 'w') as f:
 58 |             tx, ty, tz, rx, ry, rz = tgt2src_6d
 59 |             f.write('%f,%f,%f,%f,%f,%f,' % (tx, ty, tz, rx, ry, rz))
 60 |             tx, ty, tz, rx, ry, rz = src2tgt_6d
 61 |             f.write('%f,%f,%f,%f,%f,%f' % (tx, ty, tz, rx, ry, rz))
 62 |     else:
 63 |         pass
 64 | 
 65 | def main():
 66 |     if not os.path.exists(args.dump_root):
 67 |         os.makedirs(args.dump_root)
 68 | 
 69 |     global data_loader
 70 |     if args.dataset_name == 'kitti_odom':
 71 |         from kitti.kitti_odom_loader import kitti_odom_loader
 72 |         data_loader = kitti_odom_loader(args.dataset_dir,
 73 |                                         img_height=args.img_height,
 74 |                                         img_width=args.img_width,
 75 |                                         seq_length=args.seq_length)
 76 | 
 77 |     if args.dataset_name == 'kitti_raw_eigen':
 78 |         from kitti.kitti_raw_loader import kitti_raw_loader
 79 |         data_loader = kitti_raw_loader(args.dataset_dir,
 80 |                                        split='eigen',
 81 |                                        img_height=args.img_height,
 82 |                                        img_width=args.img_width,
 83 |                                        seq_length=args.seq_length)
 84 | 
 85 |     #Parallel(n_jobs=args.num_threads)(delayed(dump_example)(n) for n in range(data_loader.num_train))
 86 |     # Debug only
 87 |     for n in range(data_loader.num_train):
 88 |         dump_example(n)
 89 | 
 90 |     # Split into train/val
 91 |     np.random.seed(8964)
 92 |     subfolders = os.listdir(args.dump_root)
 93 |     with open(os.path.join(args.dump_root, 'train.txt'), 'w') as tf:
 94 |         with open(os.path.join(args.dump_root, 'val.txt'), 'w') as vf:
 95 |             for s in subfolders:
 96 |                 if not os.path.isdir(os.path.join(args.dump_root, '%s' % s)):
 97 |                     continue
 98 |                 imfiles = glob(os.path.join(args.dump_root, s, '*.jpg'))
 99 |                 frame_ids = [os.path.basename(fi).split('.')[0] for fi in imfiles]
100 |                 for frame in frame_ids:
101 |                     
102 |                     if np.random.random() < 0.1:
103 |                         vf.write('%s %s\n' % (s, frame))
104 |                     else:
105 |                         tf.write('%s %s\n' % (s, frame))
106 |                     
107 | 
108 | main()
109 | 
110 | 


--------------------------------------------------------------------------------
/kitti_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/kitti_eval/__init__.py


--------------------------------------------------------------------------------
/kitti_eval/depth_evaluation_utils.py:
--------------------------------------------------------------------------------
  1 | # Mostly based on the code written by Clement Godard: 
  2 | # https://github.com/mrharicot/monodepth/blob/master/utils/evaluation_utils.py
  3 | import numpy as np
  4 | # import pandas as pd
  5 | import os
  6 | import cv2
  7 | from collections import Counter
  8 | import pickle
  9 | 
 10 | def compute_errors(gt, pred):
 11 |     thresh = np.maximum((gt / pred), (pred / gt))
 12 |     a1 = (thresh < 1.25   ).mean()
 13 |     a2 = (thresh < 1.25 ** 2).mean()
 14 |     a3 = (thresh < 1.25 ** 3).mean()
 15 | 
 16 |     rmse = (gt - pred) ** 2
 17 |     rmse = np.sqrt(rmse.mean())
 18 | 
 19 |     rmse_log = (np.log(gt) - np.log(pred)) ** 2
 20 |     rmse_log = np.sqrt(rmse_log.mean())
 21 | 
 22 |     abs_rel = np.mean(np.abs(gt - pred) / gt)
 23 | 
 24 |     sq_rel = np.mean(((gt - pred)**2) / gt)
 25 | 
 26 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
 27 | 
 28 | ###############################################################################
 29 | #######################  KITTI
 30 | 
 31 | width_to_focal = dict()
 32 | width_to_focal[1242] = 721.5377
 33 | width_to_focal[1241] = 718.856
 34 | width_to_focal[1224] = 707.0493
 35 | width_to_focal[1238] = 718.3351
 36 | 
 37 | def load_gt_disp_kitti(path):
 38 |     gt_disparities = []
 39 |     for i in range(200):
 40 |         disp = cv2.imread(path + "/training/disp_noc_0/" + str(i).zfill(6) + "_10.png", -1)
 41 |         disp = disp.astype(np.float32) / 256
 42 |         gt_disparities.append(disp)
 43 |     return gt_disparities
 44 | 
 45 | def convert_disps_to_depths_kitti(gt_disparities, pred_disparities):
 46 |     gt_depths = []
 47 |     pred_depths = []
 48 |     pred_disparities_resized = []
 49 |     
 50 |     for i in range(len(gt_disparities)):
 51 |         gt_disp = gt_disparities[i]
 52 |         height, width = gt_disp.shape
 53 | 
 54 |         pred_disp = pred_disparities[i]
 55 |         pred_disp = width * cv2.resize(pred_disp, (width, height), interpolation=cv2.INTER_LINEAR)
 56 | 
 57 |         pred_disparities_resized.append(pred_disp) 
 58 | 
 59 |         mask = gt_disp > 0
 60 | 
 61 |         gt_depth = width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask))
 62 |         pred_depth = width_to_focal[width] * 0.54 / pred_disp
 63 | 
 64 |         gt_depths.append(gt_depth)
 65 |         pred_depths.append(pred_depth)
 66 |     return gt_depths, pred_depths, pred_disparities_resized
 67 | 
 68 | 
 69 | ###############################################################################
 70 | #######################  EIGEN
 71 | 
 72 | def read_text_lines(file_path):
 73 |     f = open(file_path, 'r')
 74 |     lines = f.readlines()
 75 |     f.close()
 76 |     lines = [l.rstrip() for l in lines]
 77 |     return lines
 78 | 
 79 | def read_file_data(files, data_root):
 80 |     gt_files = []
 81 |     gt_calib = []
 82 |     im_sizes = []
 83 |     im_files = []
 84 |     cams = []
 85 |     num_probs = 0
 86 |     for filename in files:
 87 |         filename = filename.split()[0]
 88 |         splits = filename.split('/')
 89 | #         camera_id = filename[-1]   # 2 is left, 3 is right
 90 |         date = splits[0]
 91 |         im_id = splits[4][:10]
 92 |         file_root = '{}/{}'
 93 |         
 94 |         im = filename
 95 |         vel = '{}/{}/velodyne_points/data/{}.bin'.format(splits[0], splits[1], im_id)
 96 | 
 97 |         if os.path.isfile(data_root + im):
 98 |             gt_files.append(data_root + vel)
 99 |             gt_calib.append(data_root + date + '/')
100 |             im_sizes.append(cv2.imread(data_root + im).shape[:2])
101 |             im_files.append(data_root + im)
102 |             cams.append(2)
103 |         else:
104 |             num_probs += 1
105 |             print('{} missing'.format(data_root + im))
106 |     # print(num_probs, 'files missing')
107 | 
108 |     return gt_files, gt_calib, im_sizes, im_files, cams
109 | 
110 | def load_velodyne_points(file_name):
111 |     # adapted from https://github.com/hunse/kitti
112 |     points = np.fromfile(file_name, dtype=np.float32).reshape(-1, 4)
113 |     points[:, 3] = 1.0  # homogeneous
114 |     return points
115 | 
116 | 
117 | def lin_interp(shape, xyd):
118 |     # taken from https://github.com/hunse/kitti
119 |     m, n = shape
120 |     ij, d = xyd[:, 1::-1], xyd[:, 2]
121 |     f = LinearNDInterpolator(ij, d, fill_value=0)
122 |     J, I = np.meshgrid(np.arange(n), np.arange(m))
123 |     IJ = np.vstack([I.flatten(), J.flatten()]).T
124 |     disparity = f(IJ).reshape(shape)
125 |     return disparity
126 | 
127 | 
128 | def read_calib_file(path):
129 |     # taken from https://github.com/hunse/kitti
130 |     float_chars = set("0123456789.e+- ")
131 |     data = {}
132 |     with open(path, 'r') as f:
133 |         for line in f.readlines():
134 |             key, value = line.split(':', 1)
135 |             value = value.strip()
136 |             data[key] = value
137 |             if float_chars.issuperset(value):
138 |                 # try to cast to float array
139 |                 try:
140 |                     # (Yuliang) Add list() outside of map() for Python3.x
141 |                     data[key] = np.array(list(map(float, value.split(' '))))
142 |                 except ValueError:
143 |                     # casting error: data[key] already eq. value, so pass
144 |                     pass
145 | 
146 |     return data
147 | 
148 | 
149 | def get_focal_length_baseline(calib_dir, cam=2):
150 |     cam2cam = read_calib_file(calib_dir + 'calib_cam_to_cam.txt')
151 |     P2_rect = cam2cam['P_rect_02'].reshape(3,4)
152 |     P3_rect = cam2cam['P_rect_03'].reshape(3,4)
153 | 
154 |     # cam 2 is left of camera 0  -6cm
155 |     # cam 3 is to the right  +54cm
156 |     b2 = P2_rect[0,3] / -P2_rect[0,0]
157 |     b3 = P3_rect[0,3] / -P3_rect[0,0]
158 |     baseline = b3-b2
159 | 
160 |     if cam==2:
161 |         focal_length = P2_rect[0,0]
162 |     elif cam==3:
163 |         focal_length = P3_rect[0,0]
164 | 
165 |     return focal_length, baseline
166 | 
167 | 
168 | def sub2ind(matrixSize, rowSub, colSub):
169 |     m, n = matrixSize
170 |     return rowSub * (n-1) + colSub - 1
171 | 
172 | def generate_depth_map(calib_dir, velo_file_name, im_shape, cam=2, interp=False, vel_depth=False):
173 |     # load calibration files
174 |     cam2cam = read_calib_file(calib_dir + 'calib_cam_to_cam.txt')
175 |     velo2cam = read_calib_file(calib_dir + 'calib_velo_to_cam.txt')
176 |     velo2cam = np.hstack((velo2cam['R'].reshape(3,3), velo2cam['T'][..., np.newaxis]))
177 |     velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))
178 | 
179 |     # compute projection matrix velodyne->image plane
180 |     R_cam2rect = np.eye(4)
181 |     R_cam2rect[:3,:3] = cam2cam['R_rect_00'].reshape(3,3)
182 |     P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3,4)
183 |     P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)
184 | 
185 |     # load velodyne points and remove all behind image plane (approximation)
186 |     # each row of the velodyne data is forward, left, up, reflectance
187 |     velo = load_velodyne_points(velo_file_name)
188 |     velo = velo[velo[:, 0] >= 0, :]
189 | 
190 |     # project the points to the camera
191 |     velo_pts_im = np.dot(P_velo2im, velo.T).T
192 |     velo_pts_im[:, :2] = velo_pts_im[:,:2] / velo_pts_im[:,2][..., np.newaxis]
193 | 
194 |     if vel_depth:
195 |         velo_pts_im[:, 2] = velo[:, 0]
196 | 
197 |     # check if in bounds
198 |     # use minus 1 to get the exact same value as KITTI matlab code
199 |     velo_pts_im[:, 0] = np.round(velo_pts_im[:,0]) - 1
200 |     velo_pts_im[:, 1] = np.round(velo_pts_im[:,1]) - 1
201 |     val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
202 |     val_inds = val_inds & (velo_pts_im[:,0] < im_shape[1]) & (velo_pts_im[:,1] < im_shape[0])
203 |     velo_pts_im = velo_pts_im[val_inds, :]
204 | 
205 |     # project to image
206 |     depth = np.zeros((im_shape))
207 |     depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2]
208 | 
209 |     # find the duplicate points and choose the closest depth
210 |     inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
211 |     # (Yuliang) iteritems() -> items()
212 |     dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
213 |     for dd in dupe_inds:
214 |         pts = np.where(inds==dd)[0]
215 |         x_loc = int(velo_pts_im[pts[0], 0])
216 |         y_loc = int(velo_pts_im[pts[0], 1])
217 |         depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
218 |     depth[depth<0] = 0
219 | 
220 |     if interp:
221 |         # interpolate the depth map to fill in holes
222 |         depth_interp = lin_interp(im_shape, velo_pts_im)
223 |         return depth, depth_interp
224 |     else:
225 |         return depth
226 | 
227 | 
228 | 
229 | 


--------------------------------------------------------------------------------
/kitti_eval/eval_depth.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import sys
 3 | import cv2
 4 | import os
 5 | import numpy as np
 6 | import argparse
 7 | from depth_evaluation_utils import *
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("--kitti_dir", type=str, default='./dataset/KITTI/raw/data/', help='Path to the KITTI dataset directory')
11 | parser.add_argument("--pred_file", type=str, help="Path to the prediction file")
12 | parser.add_argument("--split", type=str, default='val')
13 | parser.add_argument('--min_depth', type=float, default=1e-3, help="Threshold for minimum depth")
14 | parser.add_argument('--max_depth', type=float, default=80, help="Threshold for maximum depth")
15 | args = parser.parse_args()
16 | 
17 | def main():
18 |     if args.split == 'val':
19 |         test_file_list = './data/kitti/val_files_eigen.txt'
20 |     elif args.split == 'test':
21 |         test_file_list = './data/kitti/test_files_eigen.txt'
22 |     else:
23 |         assert False
24 | 
25 |     pred_depths = np.load(args.pred_file)
26 |     test_files = read_text_lines(test_file_list)
27 |     gt_files, gt_calib, im_sizes, im_files, cams = \
28 |         read_file_data(test_files, args.kitti_dir)
29 |     num_test = len(im_files)
30 |     gt_depths = []
31 |     pred_depths_resized = []
32 |     invalid_ids = []
33 |     for t_id in range(num_test):
34 |         camera_id = cams[t_id]  # 2 is left, 3 is right
35 |         # Some frames in val set do not have ground truth labels
36 |         try:
37 |             depth = generate_depth_map(gt_calib[t_id], 
38 |                                        gt_files[t_id], 
39 |                                        im_sizes[t_id], 
40 |                                        camera_id, 
41 |                                        False, 
42 |                                        True)
43 |             gt_depths.append(depth.astype(np.float32))
44 | 
45 |             pred_depths_resized.append(
46 |                 cv2.resize(pred_depths[t_id], 
47 |                            (im_sizes[t_id][1], im_sizes[t_id][0]), 
48 |                            interpolation=cv2.INTER_LINEAR))
49 |         except:
50 |             invalid_ids.append(t_id)
51 |             print(t_id)
52 |     pred_depths = pred_depths_resized
53 |     num_test -= len(invalid_ids)
54 | 
55 |     rms     = np.zeros(num_test, np.float32)
56 |     log_rms = np.zeros(num_test, np.float32)
57 |     abs_rel = np.zeros(num_test, np.float32)
58 |     sq_rel  = np.zeros(num_test, np.float32)
59 |     d1_all  = np.zeros(num_test, np.float32)
60 |     a1      = np.zeros(num_test, np.float32)
61 |     a2      = np.zeros(num_test, np.float32)
62 |     a3      = np.zeros(num_test, np.float32)
63 |     for i in range(num_test):    
64 |         gt_depth = gt_depths[i]
65 |         pred_depth = np.copy(pred_depths[i])
66 | 
67 |         mask = np.logical_and(gt_depth > args.min_depth, 
68 |                               gt_depth < args.max_depth)
69 |         # crop used by Garg ECCV16 to reprocude Eigen NIPS14 results
70 |         # if used on gt_size 370x1224 produces a crop of [-218, -3, 44, 1180]
71 |         gt_height, gt_width = gt_depth.shape
72 |         crop = np.array([0.40810811 * gt_height,  0.99189189 * gt_height,   
73 |                          0.03594771 * gt_width,   0.96405229 * gt_width]).astype(np.int32)
74 | 
75 |         crop_mask = np.zeros(mask.shape)
76 |         crop_mask[crop[0]:crop[1],crop[2]:crop[3]] = 1
77 |         mask = np.logical_and(mask, crop_mask)
78 | 
79 |         # Scale matching
80 |         scalor = np.median(gt_depth[mask])/np.median(pred_depth[mask])
81 |         pred_depth[mask] *= scalor
82 | 
83 |         pred_depth[pred_depth < args.min_depth] = args.min_depth
84 |         pred_depth[pred_depth > args.max_depth] = args.max_depth
85 |         abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = \
86 |             compute_errors(gt_depth[mask], pred_depth[mask])
87 | 
88 |     print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('abs_rel', 'sq_rel', 'rms', 'log_rms', 'd1_all', 'a1', 'a2', 'a3'))
89 |     print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(abs_rel.mean(), sq_rel.mean(), rms.mean(), log_rms.mean(), d1_all.mean(), a1.mean(), a2.mean(), a3.mean()))
90 | 
91 | main()
92 | 


--------------------------------------------------------------------------------
/kitti_eval/eval_pose.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import os
 3 | import numpy as np
 4 | import argparse
 5 | from glob import glob
 6 | from pose_evaluation_utils import *
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument("--gtruth_dir", type=str, 
10 |     help='Path to the directory with ground-truth trajectories')
11 | parser.add_argument("--pred_dir", type=str, 
12 |     help="Path to the directory with predicted trajectories")
13 | args = parser.parse_args()
14 | 
15 | def main():
16 |     pred_files = glob(os.path.join(args.pred_dir, '*.txt'))
17 |     ate_all = []
18 |     for i in range(len(pred_files)):
19 |         gtruth_file = os.path.join(args.gtruth_dir, os.path.basename(pred_files[i]))
20 |         if not os.path.exists(gtruth_file):
21 |             continue
22 |         ate = compute_ate(gtruth_file, pred_files[i])
23 |         if ate == False:
24 |             continue
25 |         ate_all.append(ate)
26 |     ate_all = np.array(ate_all)
27 |     print("Predictions dir: %s" % args.pred_dir)
28 |     print("ATE mean: %.4f, std: %.4f" % (np.mean(ate_all), np.std(ate_all)))
29 | main()
30 | 


--------------------------------------------------------------------------------
/misc/prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download pre-trained models
 4 | echo "Downloading pre-trained models"
 5 | wget https://filebox.ece.vt.edu/~ylzou/eccv2018dfnet/pretrained.tar
 6 | tar -xvf pretrained.tar
 7 | 
 8 | # Download training data
 9 | echo "Downloading training data"
10 | mkdir dataset
11 | cd dataset
12 | wget https://filebox.ece.vt.edu/~ylzou/eccv2018dfnet/kitti_5frame_1152_320.tar
13 | tar -xvf kitti_5frame_1152_320.tar
14 | cd ..
15 | 
16 | 


--------------------------------------------------------------------------------
/misc/zou2018dfnet.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/misc/zou2018dfnet.gif


--------------------------------------------------------------------------------
/test_flownet_2012.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import cv2
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | import PIL.Image as pil
  7 | import png
  8 | import scipy
  9 | 
 10 | from core import flow_to_image
 11 | from core import flownet
 12 | 
 13 | flags = tf.app.flags
 14 | flags.DEFINE_integer("batch_size", 1, "The size of of a sample batch")
 15 | flags.DEFINE_integer("img_height", 384, "Image height")
 16 | flags.DEFINE_integer("img_width", 1280, "Image width")
 17 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/flow2012/training/', "Dataset directory")
 18 | flags.DEFINE_string("output_dir", None, "Output directory")
 19 | flags.DEFINE_string("ckpt_file", 'pretrained/unflowc_pre', "checkpoint file")
 20 | FLAGS = flags.FLAGS
 21 | 
 22 | FLOW_SCALE = 5.0
 23 | 
 24 | # kitti 2012 has 194 training pairs, 195 test pairs
 25 | if 'train' in FLAGS.dataset_dir:
 26 |     NUM = 194
 27 | elif 'test' in FLAGS.dataset_dir:
 28 |     NUM = 195
 29 | 
 30 | def get_flow(path):
 31 |     bgr = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
 32 |     invalid = bgr[:, :, 0] == 0
 33 |     out_flow = (bgr[:, :, 2:0:-1].astype('f4') - 2**15) / 64.
 34 |     out_flow[invalid] = 0
 35 |     return out_flow, bgr[:, :, 0]
 36 | 
 37 | def compute_flow_error(gt_flow, pred_flow, mask):
 38 |     H, W, _ = gt_flow.shape
 39 |     old_H, old_W, _ = pred_flow.shape
 40 |     # Reshape predicted flow to have same size as ground truth
 41 |     pred0 = cv2.resize(pred_flow[:,:,0], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*W/old_W)
 42 |     pred1 = cv2.resize(pred_flow[:,:,1], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*H/old_H)
 43 |     pred = np.stack((pred0, pred1), axis=-1) * FLOW_SCALE
 44 | 
 45 |     err = np.sqrt(np.sum(np.square(gt_flow - pred), axis=-1))
 46 |     err_valid = np.sum(err * mask) / np.sum(mask)
 47 |     return err_valid, pred
 48 | 
 49 | def write_flow_png(name, flow):
 50 |     H, W, _ = flow.shape
 51 |     out = np.ones((H, W, 3), dtype=np.uint64)
 52 |     out[:,:,1] = np.minimum(np.maximum(flow[:,:,1]*64.+2**15, 0), 2**16).astype(np.uint64)
 53 |     out[:,:,0] = np.minimum(np.maximum(flow[:,:,0]*64.+2**15, 0), 2**16).astype(np.uint64)
 54 |     with open(name, 'wb') as f:
 55 |         writer = png.Writer(width=W, height=H, bitdepth=16)
 56 |         im2list = out.reshape(-1, out.shape[1]*out.shape[2]).tolist()
 57 |         writer.write(f, im2list)
 58 | 
 59 | 
 60 | def pick_frame(path):
 61 |     new_files = []
 62 |     # flow2012 dataset only has 194 pairs
 63 |     for i in range(NUM):
 64 |         frame1 = os.path.join(path, 'colored_0', '{:06d}'.format(i) + '_10.png')
 65 |         frame2 = os.path.join(path, 'colored_0', '{:06d}'.format(i) + '_11.png')
 66 |         new_files.append([frame1, frame2])
 67 |     return new_files
 68 | 
 69 | def main(_):
 70 |     new_files = pick_frame(FLAGS.dataset_dir)
 71 |     basename = os.path.basename(FLAGS.ckpt_file)
 72 | 
 73 |     im1_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
 74 |     im2_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
 75 |     pred_flows = flownet(im1_pl, im2_pl, flownet_spec='C')
 76 | 
 77 |     saver = tf.train.Saver([var for var in tf.all_variables() if 'flow' in var.name]) 
 78 |     config = tf.ConfigProto()
 79 |     config.gpu_options.allow_growth = True
 80 |     errs = np.zeros(NUM)
 81 | 
 82 |     if not FLAGS.output_dir is None and not os.path.exists(FLAGS.output_dir):
 83 |         os.makedirs(FLAGS.output_dir)
 84 | 
 85 |     with tf.Session(config=config) as sess:
 86 |         saver.restore(sess, FLAGS.ckpt_file)
 87 |         # For val set
 88 |         for t in range(0, len(new_files)):
 89 |             if t % 100 == 0:
 90 |                 print('processing %s: %d/%d' % (basename, t, len(new_files)))
 91 |             raw_im0 = pil.open(new_files[t][0])
 92 |             raw_im1 = pil.open(new_files[t][1])
 93 |             scaled_im0 = raw_im0.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
 94 |             scaled_im1 = raw_im1.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
 95 |             # Minus ImageNet channel mean
 96 |             channel_mean = np.array([104.920005, 110.1753, 114.785955])
 97 |             scaled_im0 = (np.expand_dims(np.array(scaled_im0), axis=0).astype(np.float32)-channel_mean)/255.
 98 |             scaled_im1 = (np.expand_dims(np.array(scaled_im1), axis=0).astype(np.float32)-channel_mean)/255.
 99 |             feed_dict = {im1_pl: scaled_im0, im2_pl: scaled_im1}
100 |             pred_flows_val = sess.run(pred_flows, feed_dict=feed_dict)           
101 |             pred_flow_val = pred_flows_val[-1][0]
102 | 
103 |             # Only for training set
104 |             if 'train' in FLAGS.dataset_dir:
105 |                 # no occlusion
106 |                 #gt_flow, mask = get_flow(new_files[t][0].replace('colored_0', 'flow_noc'))
107 |                 # all
108 |                 gt_flow, mask = get_flow(new_files[t][0].replace('colored_0', 'flow_occ'))
109 |                 errs[t], scaled_pred = compute_flow_error(gt_flow, pred_flow_val[0,:,:,:], mask)
110 | 
111 |             # Save for eval
112 |             if 'test' in FLAGS.dataset_dir:
113 |                 _, scaled_pred = compute_flow_error(np.array(raw_im0)[:,:,:2], pred_flow_val[0,:,:,:], np.array(raw_im0)[:,:,0])
114 |                 png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1])
115 |                 write_flow_png(png_name, scaled_pred)
116 | 
117 |             # Save for visual colormap
118 |             if not 'test' in FLAGS.dataset_dir and not FLAGS.output_dir is None:
119 |                 flow_im = flow_to_image(scaled_pred)
120 |                 png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]).replace('png', 'jpg')
121 |                 cv2.imwrite(png_name, flow_im[:,:,::-1])
122 | 
123 |         print('{:>10}'.format('(valid) endpoint error'))
124 |         print('{:10.4f}'.format(errs.mean()))
125 | 
126 | if __name__ == '__main__':
127 |     tf.app.run()
128 | 


--------------------------------------------------------------------------------
/test_flownet_2015.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import cv2
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | import PIL.Image as pil
  7 | import png
  8 | import scipy
  9 | 
 10 | from core import flow_to_image
 11 | from core import flownet
 12 | 
 13 | flags = tf.app.flags
 14 | flags.DEFINE_integer("batch_size", 1, "The size of of a sample batch")
 15 | flags.DEFINE_integer("img_height", 384, "Image height")
 16 | flags.DEFINE_integer("img_width", 1280, "Image width")
 17 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/flow2015/training/', "Dataset directory")
 18 | flags.DEFINE_string("output_dir", None, "Output directory")
 19 | flags.DEFINE_string("ckpt_file", 'pretrained/unflowc_pre', "checkpoint file")
 20 | FLAGS = flags.FLAGS
 21 | 
 22 | FLOW_SCALE = 5.0
 23 | 
 24 | # kitti 2012 has 200 training pairs, 200 test pairs
 25 | NUM = 200
 26 | 
 27 | def get_flow(path):
 28 |     bgr = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
 29 |     invalid = bgr[:, :, 0] == 0
 30 |     out_flow = (bgr[:, :, 2:0:-1].astype('f4') - 2**15) / 64.
 31 |     out_flow[invalid] = 0
 32 |     return out_flow, bgr[:, :, 0]
 33 | 
 34 | def compute_flow_error(gt_flow, pred_flow, mask):
 35 |     H, W, _ = gt_flow.shape
 36 |     old_H, old_W, _ = pred_flow.shape
 37 |     # Reshape predicted flow to have same size as ground truth
 38 |     pred0 = cv2.resize(pred_flow[:,:,0], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*W/old_W)
 39 |     pred1 = cv2.resize(pred_flow[:,:,1], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*H/old_H)
 40 |     pred = np.stack((pred0, pred1), axis=-1) * FLOW_SCALE
 41 | 
 42 |     err = np.sqrt(np.sum(np.square(gt_flow - pred), axis=-1))
 43 |     err_valid = np.sum(err * mask) / np.sum(mask)
 44 | 
 45 |     gt_mag = np.sqrt(np.sum(np.square(gt_flow), axis=-1))
 46 |     mask1 = err > 3.
 47 |     mask2 = err / (gt_mag+1e-12) > 0.05
 48 |     final_mask = np.logical_and(np.logical_and(mask1, mask2), mask)
 49 |     f1 = final_mask.sum()/mask.sum()
 50 | 
 51 |     return err_valid, pred, f1
 52 | 
 53 | def write_flow_png(name, flow):
 54 |     H, W, _ = flow.shape
 55 |     out = np.ones((H, W, 3), dtype=np.uint64)
 56 |     out[:,:,1] = np.minimum(np.maximum(flow[:,:,1]*64.+2**15, 0), 2**16).astype(np.uint64)
 57 |     out[:,:,0] = np.minimum(np.maximum(flow[:,:,0]*64.+2**15, 0), 2**16).astype(np.uint64)
 58 |     with open(name, 'wb') as f:
 59 |         writer = png.Writer(width=W, height=H, bitdepth=16)
 60 |         im2list = out.reshape(-1, out.shape[1]*out.shape[2]).tolist()
 61 |         writer.write(f, im2list)
 62 | 
 63 | 
 64 | def pick_frame(path):
 65 |     new_files = []
 66 |     for i in range(NUM):
 67 |         frame1 = os.path.join(path, 'image_2', '{:06d}'.format(i) + '_10.png')
 68 |         frame2 = os.path.join(path, 'image_2', '{:06d}'.format(i) + '_11.png')
 69 |         new_files.append([frame1, frame2])
 70 |     return new_files
 71 | 
 72 | def main(_):
 73 |     new_files = pick_frame(FLAGS.dataset_dir)
 74 |     basename = os.path.basename(FLAGS.ckpt_file)
 75 | 
 76 |     im1_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
 77 |     im2_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
 78 |     pred_flows = flownet(im1_pl, im2_pl, flownet_spec='C')
 79 | 
 80 |     saver = tf.train.Saver([var for var in tf.all_variables()]) 
 81 |     config = tf.ConfigProto()
 82 |     config.gpu_options.allow_growth = True
 83 |     errs = np.zeros(NUM)
 84 |     f1   = np.zeros(NUM)
 85 | 
 86 |     if not FLAGS.output_dir is None and not os.path.exists(FLAGS.output_dir):
 87 |         os.makedirs(FLAGS.output_dir)
 88 | 
 89 |     with tf.Session(config=config) as sess:
 90 |         saver.restore(sess, FLAGS.ckpt_file)
 91 |         # For val set
 92 |         for t in range(0, len(new_files)):
 93 |             if t % 100 == 0:
 94 |                 print('processing %s: %d/%d' % (basename, t, len(new_files)))
 95 |             raw_im0 = pil.open(new_files[t][0])
 96 |             raw_im1 = pil.open(new_files[t][1])
 97 |             scaled_im0 = raw_im0.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
 98 |             scaled_im1 = raw_im1.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
 99 |             # Minus ImageNet channel mean
100 |             channel_mean = np.array([104.920005, 110.1753, 114.785955])
101 |             scaled_im0 = (np.expand_dims(np.array(scaled_im0), axis=0).astype(np.float32)-channel_mean)/255.
102 |             scaled_im1 = (np.expand_dims(np.array(scaled_im1), axis=0).astype(np.float32)-channel_mean)/255.
103 |             feed_dict = {im1_pl: scaled_im0, im2_pl: scaled_im1}
104 |             pred_flows_val = sess.run(pred_flows, feed_dict=feed_dict)           
105 |             pred_flow_val = pred_flows_val[-1][0]
106 | 
107 |             # Only for training set
108 |             if 'train' in FLAGS.dataset_dir:
109 |                 # no occlusion
110 |                 #gt_flow, mask = get_flow(new_files[t][0].replace('image_2', 'flow_noc'))
111 |                 # all
112 |                 gt_flow, mask = get_flow(new_files[t][0].replace('image_2', 'flow_occ'))
113 |                 errs[t], scaled_pred, f1[t] = compute_flow_error(gt_flow, pred_flow_val[0,:,:,:], mask)
114 | 
115 |             # Save for eval
116 |             if 'test' in FLAGS.dataset_dir:
117 |                 _, scaled_pred, _ = compute_flow_error(np.array(raw_im0)[:,:,:2], pred_flow_val[0,:,:,:], np.array(raw_im0)[:,:,0])
118 |                 png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1])
119 |                 write_flow_png(png_name, scaled_pred)
120 | 
121 |             # Save for visual colormap
122 |             if not 'test' in FLAGS.dataset_dir and not FLAGS.output_dir is None:
123 |                 flow_im = flow_to_image(scaled_pred)
124 |                 png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]).replace('png', 'jpg')
125 |                 cv2.imwrite(png_name, flow_im[:,:,::-1])
126 | 
127 |         print('{:>10}, {:>10}'.format('(valid) endpoint error', 'f1 score'))
128 |         print('{:10.4f}, {:10.4f}'.format(errs.mean(), f1.mean()))
129 | 
130 | if __name__ == '__main__':
131 |     tf.app.run()
132 | 


--------------------------------------------------------------------------------
/test_kitti_depth.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import os
 5 | import PIL.Image as pil
 6 | from core import DFLearner
 7 | 
 8 | flags = tf.app.flags
 9 | flags.DEFINE_integer("batch_size", 4, "The size of of a sample batch")
10 | flags.DEFINE_integer("img_height", 160, "Image height")
11 | flags.DEFINE_integer("img_width", 576, "Image width")
12 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/raw/data/', "Dataset directory")
13 | flags.DEFINE_string("split", 'val', 'val or test')
14 | flags.DEFINE_string("output_dir", None, "Output directory")
15 | flags.DEFINE_string("ckpt_file", None, "checkpoint file")
16 | FLAGS = flags.FLAGS
17 | 
18 | def main(_):
19 |     if FLAGS.split == 'val':
20 |         txt_file = 'data/kitti/val_files_eigen.txt'
21 |     elif FLAGS.split == 'test':
22 |         txt_file = 'data/kitti/test_files_eigen.txt'
23 |     else:
24 |         assert False
25 | 
26 |     with open(txt_file, 'r') as f:
27 |         test_files = f.readlines()
28 |         test_files = [FLAGS.dataset_dir + t[:-1] for t in test_files]
29 |     if not os.path.exists(FLAGS.output_dir):
30 |         os.makedirs(FLAGS.output_dir)
31 |     basename = os.path.basename(FLAGS.ckpt_file)
32 |     output_file = os.path.join(FLAGS.output_dir, basename)
33 |     model = DFLearner()
34 |     model.setup_inference(img_height=FLAGS.img_height,
35 |                         img_width=FLAGS.img_width,
36 |                         batch_size=FLAGS.batch_size,
37 |                         mode='depth')
38 |     saver = tf.train.Saver([var for var in tf.model_variables()]) 
39 |     config = tf.ConfigProto()
40 |     config.gpu_options.allow_growth = True
41 |     with tf.Session(config=config) as sess:
42 |         saver.restore(sess, FLAGS.ckpt_file)
43 |         pred_all = []
44 |         for t in range(0, len(test_files), FLAGS.batch_size):
45 |             if t % 100 == 0:
46 |                 print('processing %s: %d/%d' % (basename, t, len(test_files)))
47 |             inputs = np.zeros(
48 |                 (FLAGS.batch_size, FLAGS.img_height, FLAGS.img_width, 3), 
49 |                 dtype=np.uint8)
50 |             for b in range(FLAGS.batch_size):
51 |                 idx = t + b
52 |                 if idx >= len(test_files):
53 |                     break
54 |                 raw_im = pil.open(test_files[idx])
55 |                 scaled_im = raw_im.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
56 |                 inputs[b] = np.array(scaled_im)
57 |             pred = model.inference(inputs, sess, mode='depth')
58 |             for b in range(FLAGS.batch_size):
59 |                 idx = t + b
60 |                 if idx >= len(test_files):
61 |                     break
62 |                 pred_all.append(pred['depth'][b,:,:,0])
63 |         np.save(output_file, pred_all)
64 | 
65 | if __name__ == '__main__':
66 |     tf.app.run()
67 | 


--------------------------------------------------------------------------------
/train_df.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import tensorflow as tf
 3 | import pprint
 4 | import random
 5 | import numpy as np
 6 | from core import DFLearner
 7 | import os
 8 | 
 9 | flags = tf.app.flags
10 | flags.DEFINE_string("dataset_dir", "./dataset/kitti_5frame_1152_320/", "Dataset directory")
11 | flags.DEFINE_string("checkpoint_dir", "./ckpt/dfnet", "Directory name to save the checkpoints")
12 | flags.DEFINE_string("ckpt_flow", "pretrained/unflowc_pre", "checkpoint for Flow Net")
13 | flags.DEFINE_string("ckpt_dp", "pretrained/cs_5frame_pre", "checkpoint for Depth Net and Pose Net")
14 | flags.DEFINE_string("ckpt_pose", None, "checkpoint for Pose Net, if not shared with Depth Net")
15 | flags.DEFINE_float("learning_rate", 0.0001, "Learning rate of for adam")
16 | flags.DEFINE_float("beta1", 0.9, "Momentum term of adam")
17 | flags.DEFINE_float("smooth_weight", 3.0, "Weight for smoothness")
18 | flags.DEFINE_float("alpha_image_loss", 0.85, "Weight between SSIM and L1 in the image loss")
19 | flags.DEFINE_float("depth_consistency", 0.2, "Weight for forward-backward depth consistency loss.")
20 | flags.DEFINE_float("flow_smooth_weight", 3.0, "Weight for flow smoothness")
21 | flags.DEFINE_float("flow_consistency", 0.2, "Weight for forward-backward flow consistency loss.")
22 | flags.DEFINE_float("cross_consistency", 0.5, "Weight for cross-network consistency loss")
23 | flags.DEFINE_integer("batch_size", 4, "The size of of a sample batch, must divisible by number of GPUs!")
24 | flags.DEFINE_integer("num_gpus", 4, "Number of GPUs for training, starting from 0.")
25 | flags.DEFINE_integer("img_height", 320, "Image height")
26 | flags.DEFINE_integer("img_width", 1152, "Image width")
27 | flags.DEFINE_integer("seq_length", 5, "Sequence length for each example") # Fixed. Don't change
28 | flags.DEFINE_integer("max_steps", 100000, "Maximum number of training iterations")
29 | flags.DEFINE_integer("summary_freq", 100, "Logging every log_freq iterations")
30 | flags.DEFINE_integer("save_latest_freq", 5000, \
31 |     "Save the latest model every save_latest_freq iterations (overwrites the previous latest model)")
32 | flags.DEFINE_boolean("continue_train", True, "Continue training from previous checkpoint")
33 | flags.DEFINE_boolean("scale_normalize", False, "Scale normalization for disparity.") # Set to True will break the training.
34 | flags.DEFINE_boolean("fix_pose", False, "Fix pose network")
35 | FLAGS = flags.FLAGS
36 | 
37 | def main(_):
38 |     seed = 8964
39 |     tf.set_random_seed(seed)
40 |     np.random.seed(seed)
41 |     random.seed(seed)
42 | 
43 |     pp = pprint.PrettyPrinter()
44 |     pp.pprint(flags.FLAGS.__flags)
45 |     
46 |     if not os.path.exists(FLAGS.checkpoint_dir):
47 |         os.makedirs(FLAGS.checkpoint_dir)
48 |         
49 |     learner = DFLearner()
50 |     learner.train(FLAGS)
51 | 
52 | if __name__ == '__main__':
53 |     tf.app.run()
54 | 


--------------------------------------------------------------------------------