├── .gitignore
├── LICENSE
├── README.md
├── core
├── BaseLearner.py
├── DFLearner.py
├── UnFlow
│ ├── __init__.py
│ ├── ops
│ │ ├── backward_warp_op.cc
│ │ ├── backward_warp_op.cu.cc
│ │ ├── correlation_op.cc
│ │ ├── correlation_op.cu.cc
│ │ ├── correlation_op.h
│ │ ├── downsample_op.cc
│ │ ├── downsample_op.cu.cc
│ │ ├── forward_warp_op.cc
│ │ └── forward_warp_op.cu.cc
│ └── src
│ │ ├── __init__.py
│ │ └── e2eflow
│ │ ├── __init__.py
│ │ ├── core
│ │ ├── __init__.py
│ │ ├── augment.py
│ │ ├── data.py
│ │ ├── flow_util.py
│ │ ├── flownet.py
│ │ ├── image_warp.py
│ │ ├── input.py
│ │ ├── losses.py
│ │ ├── spatial_transformer.py
│ │ ├── supervised.py
│ │ ├── train.py
│ │ ├── unsupervised.py
│ │ └── util.py
│ │ ├── ops.py
│ │ └── util.py
├── __init__.py
├── data_loader.py
├── flowlib.py
├── nets.py
└── utils.py
├── data
├── __init__.py
├── kitti
│ ├── __init__.py
│ ├── excluded_frames.txt
│ ├── kitti_odom_loader.py
│ ├── kitti_raw_loader.py
│ ├── static_frames.txt
│ ├── test_files_eigen.txt
│ ├── test_files_stereo.txt
│ ├── test_scenes_eigen.txt
│ ├── test_scenes_stereo.txt
│ └── val_files_eigen.txt
└── prepare_train_data.py
├── kitti_eval
├── __init__.py
├── depth_evaluation_utils.py
├── eval_depth.py
├── eval_pose.py
└── pose_evaluation_utils.py
├── misc
├── prepare.sh
└── zou2018dfnet.gif
├── test_flownet_2012.py
├── test_flownet_2015.py
├── test_kitti_depth.py
└── train_df.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *DS_Store
2 | *ignore*
3 | *pyc
4 | *__pycache__
5 | pretrained
6 | ckpt
7 | results
8 | *pose_data
9 | dataset
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Virginia Tech Vision and Learning Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency
2 |
3 | A TensorFlow re-implementation for [DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency](https://arxiv.org/abs/1809.01649). There are some minor differences from the model described in the paper:
4 |
5 | - Model in the paper uses 2-frame as input, while this code uses 5-frame as input (you might use any odd numbers of frames as input, though you would need to tune the hyper-parameters)
6 | - FlowNet in the paper is pre-trained on SYNTHIA, while this one is pre-trained on Cityscapes
7 |
8 | Please see the [project page](http://yuliang.vision/DF-Net/) for more details.
9 |
10 |
11 |
12 |
13 | ## Prerequisites
14 | This codebase was developed and tested with the following settings:
15 | ```
16 | Python 3.6
17 | TensorFlow 1.2.0 (this is the only supported version)
18 | g++ 4.x (this is the only supported version)
19 | CUDA 8.0
20 | Unbuntu 14.04
21 | 4 Tesla K80 GPUs (w/ 12G memory each)
22 | ```
23 |
24 | Some Python packages you might not have
25 | ```
26 | pypng
27 | opencv-python
28 | ```
29 |
30 | ## Installation
31 | 1. Clone this repository
32 | ```Shell
33 | git clone git@github.com:vt-vl-lab/DF-Net.git
34 | cd DF-Net
35 | ```
36 |
37 | 2. Prepare models and training data
38 | ```Shell
39 | chmod +x ./misc/prepare.sh
40 | ./misc/prepare.sh
41 | ```
42 | NOTE: Frames belonging to KITTI2012/2015 train/test scenes have been excluded in the provided training set. Add these frames back to the training set would improve the performance of DepthNet.
43 |
44 | ## Data preparation (for evaluation)
45 | After accepting their license conditions, download [KITTI raw](http://www.cvlibs.net/datasets/kitti/raw_data.php), [KITTI flow 2012](http://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php?benchmark=flow), [KITTI flow 2015](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow).
46 |
47 | Then you can make soft-link for them
48 | ```Shell
49 | cd dataset
50 | mkdir KITTI
51 | cd KITTI
52 |
53 | ln -s /path/to/KITTI/raw raw
54 | ln -s /path/to/KITTI/2012 flow2012
55 | ln -s /path/to/KITTI/2015 flow2015
56 | ```
57 |
58 | **(Optional)** You can add those KITTI2012/2015 frames back to the training set, by commenting line81~line85 in `data/kitti/kitti_raw_loader.py`, and do
59 | ```
60 | python data/prepare_train_data.py --dataset_name='kitti_raw_eigen' --dump_root=/path/to/save/ --num_threads=4
61 | ```
62 |
63 | ## Training
64 | ```
65 | export CUDA_VISIBLE_DEVICES=0,1,2,3
66 | python train_df.py --dataset_dir=/path/to/your/data --checkpoint_dir=/path/to/save/your/model
67 | ```
68 |
69 | For the first time, custom CUDA operations for FlowNet will be compiled. If you have any compliation issues, please check `core/UnFlow/src/e2eflow/ops.py`
70 | - Line31: specify your CUDA path
71 | - Line32: Add `-I $CUDA_HOME/include`, where `$CUDA_HOME` is your CUDA directory
72 | - Line38: specify your g++ version
73 |
74 | ## Testing
75 | Test DepthNet on KITTI raw (You can use the validation set to selct the best model.)
76 | ```
77 | python test_kitti_depth.py --dataset_dir=/path/to/your/data --output_dir=/path/to/save/your/prediction --ckpt_file=/path/to/your/ckpt --split="val or test"
78 | python kitti_eval/eval_depth.py --pred_file=/path/to/your/prediction --split="val or test"
79 | ```
80 |
81 | Test FlowNet on KITTI 2012 (Please use training set)
82 | ```
83 | python test_flownet_2012.py --dataset_dir=/path/to/your/data --ckpt_file=/path/to/your/ckpt
84 | ```
85 |
86 | Test FlowNet on KITTI 2015 (Please use training set)
87 | ```
88 | python test_flownet_2015.py --dataset_dir=/path/to/your/data --ckpt_file=/path/to/your/ckpt
89 | ```
90 |
91 | NOTE: For KITTI 2012/2015
92 | - If you want to generate visualization colormap for **training set**, you can specify `output_dir`
93 | - If you want to test on **test set** and upload it to KITTI server, you can specify `output_dir` and test on test set.
94 |
95 | ## Pre-trained model performance
96 | You should get the following numbers if you use the pre-trained model `pretrained/dfnet`
97 |
98 |
99 | DepthNet (KITTI raw test test)
100 |
101 | abs rel | sq rel | rms | log rms | a1 | a2 | a3 |
102 | ---------------|------------|------------|------------|------------|------------|------------|
103 | 0.1452 | 1.2904 | 5.6115 | 0.2194 | 0.8114 | 0.9394 | 0.9767 |
104 |
105 |
106 | FlowNet (KITTI 2012/2015 training set)
107 |
108 | KITTI 2012 EPE | KITTI 2015 EPE | KITTI 2015 F1 |
109 | ---------------|----------------|---------------|
110 | 3.1052 | 7.4482 | 0.2695 |
111 |
112 |
113 | ## Citation
114 | If you find this code useful for your research, please consider citing the following paper:
115 |
116 | @inproceedings{zou2018dfnet,
117 | author = {Zou, Yuliang and Luo, Zelun and Huang, Jia-Bin},
118 | title = {DF-Net: Unsupervised Joint Learning of Depth and Flow using Cross-Task Consistency},
119 | booktitle = {European Conference on Computer Vision},
120 | year = {2018}
121 | }
122 |
123 |
124 | ## Acknowledgement
125 | Codes are heavily borrowed from several great work, including [SfMLearner](https://github.com/tinghuiz/SfMLearner), [monodepth](https://github.com/mrharicot/monodepth), and [UnFlow](https://github.com/simonmeister/UnFlow). We thank [Shih-Yang Su](https://github.com/LemonATsu) for the code review.
126 |
--------------------------------------------------------------------------------
/core/BaseLearner.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import os
3 | import time
4 | import math
5 | import numpy as np
6 | import tensorflow as tf
7 | import tensorflow.contrib.slim as slim
8 |
9 | from .data_loader import DataLoader
10 | from .nets import *
11 | from .utils import *
12 | from .flowlib import flow_to_image
13 |
14 | class BaseLearner(object):
15 | def __init__(self):
16 | pass
17 |
18 | def build_train_graph(self):
19 | raise NotImplementedError
20 |
21 | def collect_summaries(self):
22 | raise NotImplementedError
23 |
24 | def train(self, opt):
25 | raise NotImplementedError
26 |
27 | # Credit: https://github.com/mrharicot/monodepth/blob/master/average_gradients.py
28 | def average_gradients(self, tower_grads):
29 | average_grads = []
30 | for grad_and_vars in zip(*tower_grads):
31 | grads = []
32 | for g, _ in grad_and_vars:
33 | if g is not None:
34 | expanded_g = tf.expand_dims(g, 0)
35 | grads.append(expanded_g)
36 | if grads != []:
37 | grad = tf.concat(axis=0, values=grads)
38 | grad = tf.reduce_mean(grad, 0)
39 | v = grad_and_vars[0][1]
40 | grad_and_var = (grad, v)
41 | average_grads.append(grad_and_var)
42 | return average_grads
43 |
44 | def get_dp_flow(self, opt, s, src_pixel_coords):
45 | x_base = tf.range(int(opt.img_width/(2**s)))
46 | y_base = tf.range(int(opt.img_height/(2**s)))
47 | x_base = tf.stack([x_base]*int(opt.img_height/(2**s)), axis=0)
48 | y_base = tf.transpose(tf.stack([y_base]*int(opt.img_width/(2**s)), axis=0))
49 |
50 | dp_flow_x = src_pixel_coords[:, :, :, 0] - tf.cast(x_base, tf.float32)
51 | dp_flow_y = src_pixel_coords[:, :, :, 1] - tf.cast(y_base, tf.float32)
52 | dp_flow = tf.stack([dp_flow_x, dp_flow_y], axis=-1)
53 | return dp_flow
54 |
55 | def get_in_range_mask(self, opt, s, flow):
56 | # 1 if the displacement is within the image
57 | x_min = 0.0
58 | x_max = int(opt.img_width/(2**s))-1
59 | y_min = 0.0
60 | y_max = int(opt.img_height/(2**s))-1
61 |
62 | x_base = tf.range(int(opt.img_width/(2**s)))
63 | y_base = tf.range(int(opt.img_height/(2**s)))
64 | x_base = tf.stack([x_base]*int(opt.img_height/(2**s)), axis=0)
65 | y_base = tf.transpose(tf.stack([y_base]*int(opt.img_width/(2**s)), axis=0))
66 |
67 | pos_x = flow[:,:,:,0]+tf.cast(x_base, tf.float32)
68 | pos_y = flow[:,:,:,1]+tf.cast(y_base, tf.float32)
69 | inside_x = tf.logical_and(pos_x <= tf.cast(x_max, tf.float32), pos_x >= x_min)
70 | inside_y = tf.logical_and(pos_y <= tf.cast(y_max, tf.float32), pos_y >= y_min)
71 | inside = tf.expand_dims(tf.logical_and(inside_x, inside_y), axis=-1)
72 | return tf.stop_gradient(tf.cast(inside, tf.float32))
73 |
74 | def get_fb_mask(self, flow, warped_flow, alpha1=0.01, alpha2=0.5):
75 | temp1 = tf.reduce_sum(tf.square(flow+warped_flow), axis=3, keep_dims=True)
76 | temp2 = tf.reduce_sum(tf.square(flow), axis=3, keep_dims=True)+tf.reduce_sum(tf.square(warped_flow), axis=3, keep_dims=True)
77 | occ_mask = tf.greater(temp1, alpha1*temp2+alpha2)
78 | return tf.stop_gradient(tf.cast(occ_mask, tf.float32))
79 |
80 | # Crecit: https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/losses.py
81 | def ternary_loss(self, im1, im2_warped, valid_mask, max_distance=1):
82 | patch_size = 2*max_distance+1
83 | with tf.variable_scope('ternary_loss'):
84 | def _ternary_transform(image):
85 | intensities = tf.image.rgb_to_grayscale(image) * 255
86 | out_channels = patch_size * patch_size
87 | w = np.eye(out_channels).reshape((patch_size, patch_size, 1, out_channels))
88 | weights = tf.constant(w, dtype=tf.float32)
89 | patches = tf.nn.conv2d(intensities, weights, strides=[1, 1, 1, 1], padding='SAME')
90 |
91 | transf = patches - intensities
92 | transf_norm = transf / tf.sqrt(0.81 + tf.square(transf))
93 | return transf_norm
94 |
95 | def _hamming_distance(t1, t2):
96 | dist = tf.square(t1 - t2)
97 | dist_norm = dist / (0.1 + dist)
98 | dist_sum = tf.reduce_sum(dist_norm, 3, keep_dims=True)
99 | return dist_sum
100 |
101 | t1 = _ternary_transform(im1)
102 | t2 = _ternary_transform(im2_warped)
103 | dist = _hamming_distance(t1, t2)
104 |
105 | transform_mask = self.create_mask(valid_mask, [[max_distance, max_distance], [max_distance, max_distance]])
106 | return self.charbonnier_loss(dist, valid_mask * transform_mask), dist
107 |
108 | def charbonnier_loss(self, x, mask=None, truncate=None, alpha=0.45, beta=1.0, epsilon=0.001):
109 | with tf.variable_scope('charbonnier_loss'):
110 | batch, height, width, channels = tf.unstack(tf.shape(x))
111 | normalization = tf.cast(batch * height * width * channels, tf.float32)
112 |
113 | error = tf.pow(tf.square(x * beta) + tf.square(epsilon), alpha)
114 |
115 | if mask is not None:
116 | error = tf.multiply(mask, error)
117 | if truncate is not None:
118 | error = tf.minimum(error, truncate)
119 |
120 | return tf.reduce_sum(error) / normalization
121 |
122 | def create_mask(self, tensor, paddings):
123 | with tf.variable_scope('create_mask'):
124 | shape = tf.shape(tensor)
125 | inner_width = shape[1] - (paddings[0][0] + paddings[0][1])
126 | inner_height = shape[2] - (paddings[1][0] + paddings[1][1])
127 | inner = tf.ones([inner_width, inner_height])
128 |
129 | mask2d = tf.pad(inner, paddings)
130 | mask3d = tf.tile(tf.expand_dims(mask2d, 0), [shape[0], 1, 1])
131 | mask4d = tf.expand_dims(mask3d, 3)
132 | return tf.stop_gradient(mask4d)
133 |
134 | # Credit: https://github.com/mrharicot/monodepth/blob/master/monodepth_model.py
135 | def SSIM(self, x, y):
136 | C1 = 0.01 ** 2
137 | C2 = 0.03 ** 2
138 |
139 | mu_x = slim.avg_pool2d(x, 3, 1, 'VALID')
140 | mu_y = slim.avg_pool2d(y, 3, 1, 'VALID')
141 |
142 | sigma_x = slim.avg_pool2d(x ** 2, 3, 1, 'VALID') - mu_x ** 2
143 | sigma_y = slim.avg_pool2d(y ** 2, 3, 1, 'VALID') - mu_y ** 2
144 | sigma_xy = slim.avg_pool2d(x * y , 3, 1, 'VALID') - mu_x * mu_y
145 |
146 | SSIM_n = (2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2)
147 | SSIM_d = (mu_x ** 2 + mu_y ** 2 + C1) * (sigma_x + sigma_y + C2)
148 | SSIM = SSIM_n / SSIM_d
149 |
150 | return tf.clip_by_value((1 - SSIM) / 2, 0, 1)
151 |
152 | def compute_edge_aware_smooth_loss(self, pred_disp, img):
153 | """
154 | Edge-aware L1-norm on first-order gradient
155 | """
156 | def gradient(pred):
157 | D_dx = -pred[:, :, 1:, :] + pred[:, :, :-1, :]
158 | D_dy = -pred[:, 1:, :, :] + pred[:, :-1, :, :]
159 | return D_dx, D_dy
160 | img_dx, img_dy = gradient(img)
161 | disp_dx, disp_dy = gradient(pred_disp)
162 |
163 | weight_x = tf.exp(-tf.reduce_mean(tf.abs(img_dx), 3, keep_dims=True))
164 | weight_y = tf.exp(-tf.reduce_mean(tf.abs(img_dy), 3, keep_dims=True))
165 |
166 | loss = tf.reduce_mean(weight_x*tf.abs(disp_dx)) + tf.reduce_mean(weight_y*tf.abs(disp_dy))
167 | return loss
168 |
169 | def compute_smooth_loss(self, pred_disp):
170 | """
171 | L1-norm on second-order gradient
172 | """
173 | def gradient(pred):
174 | D_dy = pred[:, 1:, :, :] - pred[:, :-1, :, :]
175 | D_dx = pred[:, :, 1:, :] - pred[:, :, :-1, :]
176 | return D_dx, D_dy
177 | dx, dy = gradient(pred_disp)
178 | dx2, dxdy = gradient(dx)
179 | dydx, dy2 = gradient(dy)
180 | return tf.reduce_mean(tf.abs(dx2)) + \
181 | tf.reduce_mean(tf.abs(dxdy)) + \
182 | tf.reduce_mean(tf.abs(dydx)) + \
183 | tf.reduce_mean(tf.abs(dy2))
184 |
185 | def flow_to_image_tf(self, flow):
186 | im_stack = []
187 | for i in range(self.opt.batch_size//self.opt.num_gpus):
188 | temp = tf.py_func(flow_to_image, [flow[i,:,:,:]], tf.uint8)
189 | im_stack.append(temp)
190 | return tf.stack(im_stack, axis=0)
191 |
192 | # Credit: https://github.com/yzcjtr/GeoNet/blob/master/geonet_model.py
193 | def spatial_normalize(self, disp):
194 | _, curr_h, curr_w, curr_c = disp.get_shape().as_list()
195 | disp_mean = tf.reduce_mean(disp, axis=[1,2,3], keep_dims=True)
196 | disp_mean = tf.tile(disp_mean, [1, curr_h, curr_w, curr_c])
197 | return disp/disp_mean
198 |
199 | def build_depth_test_graph(self):
200 | input_uint8 = tf.placeholder(tf.uint8, [self.batch_size,
201 | self.img_height, self.img_width, 3], name='raw_input')
202 | input_mc = self.preprocess_image(input_uint8)
203 | with tf.name_scope("depth_prediction"):
204 | pred_disp, depth_net_endpoints = disp_net_res50(
205 | input_mc, is_training=False)
206 | pred_depth = [1./disp for disp in pred_disp]
207 | pred_depth = pred_depth[0]
208 | self.inputs = input_uint8
209 | self.pred_depth = pred_depth
210 | self.depth_epts = depth_net_endpoints
211 |
212 | # Forward-backward
213 | def build_pose_fb_test_graph(self):
214 | input_uint8 = tf.placeholder(tf.uint8, [self.batch_size,
215 | self.img_height, self.img_width * self.seq_length, 3],
216 | name='raw_input')
217 | input_mc = self.preprocess_image(input_uint8)
218 | loader = DataLoader()
219 | tgt_image, src_image_stack = \
220 | loader.batch_unpack_image_sequence(
221 | input_mc, self.img_height, self.img_width, self.num_source)
222 | with tf.name_scope("pose_prediction"):
223 | pred_poses, _ = pose_net_fb(
224 | tgt_image, src_image_stack, is_training=False)
225 | self.inputs = input_uint8
226 | self.pred_poses = pred_poses[:, :, :6] # Only the first half is used
227 |
228 | def preprocess_image(self, image, is_dp=True):
229 | # Assuming input image is uint8
230 | image = tf.image.convert_image_dtype(image, dtype=tf.float32)
231 | if is_dp:
232 | return image * 2. -1.
233 | else:
234 | mean = [104.920005, 110.1753, 114.785955]
235 | out = []
236 | for i in range(0, int(image.shape[-1]), 3):
237 | r = image[:,:,:,i] - mean[0]/255.
238 | g = image[:,:,:,i+1] - mean[1]/255.
239 | b = image[:,:,:,i+2] - mean[2]/255.
240 | out += [r, g, b]
241 | return tf.stack(out, axis=-1)
242 |
243 | def minus_imagenet_rgb(self, image):
244 | mean = [122.7717, 115.9465, 102.9801]
245 | image = tf.cast(image, tf.float32)
246 | out = []
247 | for i in range(0, int(image.shape[-1]), 3):
248 | r = image[:,:,:,i] - mean[0]
249 | g = image[:,:,:,i+1] - mean[1]
250 | b = image[:,:,:,i+2] - mean[2]
251 | out += [r, g, b]
252 | return tf.stack(out, axis=-1)
253 |
254 | def deprocess_image(self, image, is_dp=True):
255 | # Assuming input image is float32
256 | if is_dp:
257 | image = (image + 1.)/2.
258 | else:
259 | mean = [104.920005, 110.1753, 114.785955]
260 | r = image[:,:,:,0] + mean[0]/255.
261 | g = image[:,:,:,1] + mean[1]/255.
262 | b = image[:,:,:,2] + mean[2]/255.
263 | image = tf.stack([r, g, b], axis=-1)
264 | return tf.image.convert_image_dtype(image, dtype=tf.uint8)
265 |
266 | def setup_inference(self,
267 | img_height,
268 | img_width,
269 | mode,
270 | seq_length=3,
271 | batch_size=1):
272 | self.img_height = img_height
273 | self.img_width = img_width
274 | self.mode = mode
275 | self.batch_size = batch_size
276 | if self.mode == 'depth':
277 | self.build_depth_test_graph()
278 | if self.mode == 'pose':
279 | self.seq_length = seq_length
280 | self.num_source = seq_length - 1
281 | self.build_pose_fb_test_graph()
282 |
283 | def inference(self, inputs, sess, mode='depth'):
284 | fetches = {}
285 | if mode == 'depth':
286 | fetches['depth'] = self.pred_depth
287 | if mode == 'pose':
288 | fetches['pose'] = self.pred_poses
289 | results = sess.run(fetches, feed_dict={self.inputs:inputs})
290 | return results
291 |
292 | def save(self, sess, checkpoint_dir, step):
293 | model_name = 'model'
294 | print(" [*] Saving checkpoint to %s..." % checkpoint_dir)
295 | if step == 'latest':
296 | self.saver.save(sess,
297 | os.path.join(checkpoint_dir, model_name + '.latest'))
298 | else:
299 | self.saver.save(sess,
300 | os.path.join(checkpoint_dir, model_name),
301 | global_step=step)
302 |
303 | if __name__ == '__main__':
304 | model = BaseLearner()
305 |
--------------------------------------------------------------------------------
/core/UnFlow/__init__.py:
--------------------------------------------------------------------------------
1 | from .src import flownet
2 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/backward_warp_op.cc:
--------------------------------------------------------------------------------
1 | #define EIGEN_USE_THREADS
2 |
3 | #include
4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
5 | #include "tensorflow/core/framework/op_kernel.h"
6 | #include "tensorflow/core/framework/register_types.h"
7 | #include "tensorflow/core/framework/tensor.h"
8 | #include "tensorflow/core/framework/tensor_shape.h"
9 | #include "tensorflow/core/framework/types.h"
10 | #include "tensorflow/core/lib/core/status.h"
11 | #include "tensorflow/core/platform/logging.h"
12 | #include "tensorflow/core/framework/op.h"
13 | #include "tensorflow/core/framework/shape_inference.h"
14 | #include "tensorflow/core/framework/common_shape_fns.h"
15 |
16 | // TODO assert input flow channel count = 2, assert matching numbers in all other dims
17 |
18 | typedef Eigen::GpuDevice GPUDevice;
19 |
20 | using namespace tensorflow;
21 |
22 | void BackwardWarp(const GPUDevice& d,
23 | typename TTypes::ConstTensor images,
24 | typename TTypes::ConstTensor flows,
25 | typename TTypes::Tensor output);
26 |
27 | void BackwardWarpGrad(const GPUDevice& d,
28 | typename TTypes::ConstTensor input_grad,
29 | typename TTypes::ConstTensor input_images,
30 | typename TTypes::ConstTensor flows,
31 | typename TTypes::Tensor output_grad);
32 |
33 | class BackwardWarpOp : public OpKernel {
34 | public:
35 | explicit BackwardWarpOp(OpKernelConstruction* context) : OpKernel(context) {}
36 |
37 | void Compute(OpKernelContext* context) override {
38 | const Tensor& input_images = context->input(0);
39 | const Tensor& input_flows = context->input(1);
40 |
41 | Tensor* output_images = NULL;
42 | OP_REQUIRES_OK(context, context->allocate_output(0, input_images.shape(),
43 | &output_images));
44 |
45 | typename TTypes::ConstTensor image_data = input_images.tensor();
46 | typename TTypes::ConstTensor flow_data = input_flows.tensor();
47 | typename TTypes::Tensor output_data = output_images->tensor();
48 |
49 | BackwardWarp(context->eigen_device(),
50 | image_data, flow_data, output_data);
51 | }
52 | };
53 |
54 | class BackwardWarpOpGrad : public OpKernel {
55 | public:
56 | explicit BackwardWarpOpGrad(OpKernelConstruction* context) : OpKernel(context) {}
57 |
58 | void Compute(OpKernelContext* context) override {
59 | const Tensor& input = context->input(0);
60 | const Tensor& original_images = context->input(1);
61 | const Tensor& original_flows = context->input(2);
62 |
63 | Tensor* output = NULL;
64 | OP_REQUIRES_OK(context, context->allocate_output(0, original_flows.shape(),
65 | &output));
66 |
67 | typename TTypes::ConstTensor input_data = input.tensor();
68 | typename TTypes::ConstTensor flow_data = original_flows.tensor();
69 | typename TTypes::ConstTensor image_data = original_images.tensor();
70 | typename TTypes::Tensor output_data = output->tensor();
71 |
72 | BackwardWarpGrad(context->eigen_device(),
73 | input_data, image_data, flow_data, output_data);
74 | }
75 | };
76 |
77 | REGISTER_OP("BackwardWarp")
78 | .Input("images: float")
79 | .Input("flows: float")
80 | .Output("warped_images: float")
81 | .SetShapeFn(shape_inference::UnchangedShape);
82 |
83 | REGISTER_OP("BackwardWarpGrad")
84 | .Input("grads: float")
85 | .Input("original_images: float")
86 | .Input("original_flows: float")
87 | .Output("output: float")
88 | .SetShapeFn([](shape_inference::InferenceContext* c) {
89 | c->set_output(0, c->input(2));
90 | return Status::OK();
91 | });
92 |
93 | #if GOOGLE_CUDA
94 |
95 | REGISTER_KERNEL_BUILDER(Name("BackwardWarp").Device(DEVICE_GPU), BackwardWarpOp);
96 | REGISTER_KERNEL_BUILDER(Name("BackwardWarpGrad").Device(DEVICE_GPU), BackwardWarpOpGrad);
97 |
98 | #endif // GOOGLE_CUDA
99 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/backward_warp_op.cu.cc:
--------------------------------------------------------------------------------
1 | #if GOOGLE_CUDA
2 |
3 | #define EIGEN_USE_GPU
4 |
5 | #include "tensorflow/core/framework/register_types.h"
6 | #include "tensorflow/core/framework/tensor_types.h"
7 | #include "tensorflow/core/platform/types.h"
8 | #include "tensorflow/core/util/cuda_kernel_helper.h"
9 |
10 | using namespace tensorflow;
11 |
12 | typedef Eigen::GpuDevice GPUDevice;
13 |
14 | __global__ void BackwardWarpKernel(const int32 nthreads,
15 | const float* images, const float* flows,
16 | int batch, int height, int width, int channels,
17 | float* output) {
18 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
19 | // out_idx = src_x + width * (src_y + height * b)
20 | int idx = out_idx;
21 | const int src_x = idx % width;
22 | idx /= width;
23 | const int src_y = idx % height;
24 | const int b = idx / height;
25 |
26 | const int flow_index = out_idx * 2;
27 | const float x = src_x + flows[flow_index];
28 | const float y = src_y + flows[flow_index + 1];
29 |
30 | const int x0 = floorf(x);
31 | const int x1 = x0 + 1;
32 | const int y0 = floorf(y);
33 | const int y1 = y0 + 1;
34 |
35 | const float w_right = x - x0;
36 | const float w_left = x1 - x;
37 | const float w_bottom = y - y0;
38 | const float w_top = y1 - y;
39 |
40 | for(int c = 0; c < channels; ++c) {
41 | float sum = 0.0;
42 |
43 | #define IMG(iy, ix) images[c + channels * (ix + width * (iy + height * b))]
44 |
45 | // top-left neighbor
46 | if(x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) {
47 | sum += w_left * w_top * IMG(y0, x0);
48 | }
49 |
50 | // top-right neigbor
51 | if(x1 >= 0 && x1 < width && y0 >= 0 && y0 < height) {
52 | sum += w_right * w_top * IMG(y0, x1);
53 | }
54 |
55 | // bottom-left neighbor
56 | if(x0 >= 0 && x0 < width && y1 >= 0 && y1 < height) {
57 | sum += w_left * w_bottom * IMG(y1, x0);
58 | }
59 |
60 | // bottom-right neighbor
61 | if(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height) {
62 | sum += w_right * w_bottom * IMG(y1, x1);
63 | }
64 | #undef IMG
65 | output[out_idx * channels + c] = sum;
66 | }
67 | }
68 | }
69 |
70 | __global__ void BackwardWarpGradKernel(const int32 nthreads,
71 | const float* input_grad,
72 | const float* input_images, const float* flows,
73 | int batch, int height, int width, int channels,
74 | float* output_grad) {
75 | CUDA_1D_KERNEL_LOOP(in_idx, nthreads) {
76 | // in_idx = x + width * (y + height * b)
77 | int idx = in_idx;
78 | const int src_x = idx % width;
79 | idx /= width;
80 | const int src_y = idx % height;
81 | const int b = idx / height;
82 |
83 | const int flow_index = in_idx * 2;
84 | const float x = src_x + flows[flow_index];
85 | const float y = src_y + flows[flow_index + 1];
86 |
87 | const int x0 = floorf(x);
88 | const int x1 = x0 + 1;
89 | const int y0 = floorf(y);
90 | const int y1 = y0 + 1;
91 |
92 | const float w_right = x - x0;
93 | const float w_left = x1 - x;
94 | const float w_bottom = y - y0;
95 | const float w_top = y1 - y;
96 |
97 | float du = 0.0;
98 | float dv = 0.0;
99 |
100 | for(int c = 0; c < channels; ++c) {
101 | float px;
102 | float din = input_grad[c + channels * in_idx];
103 |
104 | #define IMG(iy, ix) input_images[c + channels * (ix + width * (iy + height * b))]
105 |
106 | // top-left neighbor
107 | if(x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) {
108 | px = IMG(y0, x0) * din;
109 | du -= w_top * px;
110 | dv -= w_left * px;
111 | }
112 |
113 | // top-right neigbor
114 | if(x1 >= 0 && x1 < width && y0 >= 0 && y0 < height) {
115 | px = IMG(y0, x1) * din;
116 | du += w_top * px;
117 | dv -= w_right * px;
118 | }
119 |
120 | // bottom-left neighbor
121 | if(x0 >= 0 && x0 < width && y1 >= 0 && y1 < height) {
122 | px = IMG(y1, x0) * din;
123 | du -= w_bottom * px;
124 | dv += w_left * px;
125 | }
126 |
127 | // bottom-right neighbor
128 | if(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height) {
129 | px = IMG(y1, x1) * din;
130 | du += w_bottom * px;
131 | dv += w_right * px;
132 | }
133 | #undef IMG
134 | }
135 | output_grad[in_idx * 2] = du;
136 | output_grad[in_idx * 2 + 1] = dv;
137 | }
138 | }
139 |
140 | void BackwardWarp(const GPUDevice& d,
141 | typename TTypes::ConstTensor images,
142 | typename TTypes::ConstTensor flows,
143 | typename TTypes::Tensor output) {
144 | const int batch = images.dimension(0);
145 | const int height = images.dimension(1);
146 | const int width = images.dimension(2);
147 | const int channels = images.dimension(3);
148 |
149 | const int total_count = batch * height * width;
150 | if (total_count == 0) return;
151 |
152 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
153 | BackwardWarpKernel
154 | <<>>(
155 | config.virtual_thread_count, images.data(), flows.data(),
156 | batch, height, width, channels,
157 | output.data());
158 | }
159 |
160 | void BackwardWarpGrad(const GPUDevice& d,
161 | typename TTypes::ConstTensor input_grad,
162 | typename TTypes::ConstTensor input_images,
163 | typename TTypes::ConstTensor flows,
164 | typename TTypes::Tensor output_grad) {
165 | const int batch = input_grad.dimension(0);
166 | const int height = input_grad.dimension(1);
167 | const int width = input_grad.dimension(2);
168 | const int channels = input_grad.dimension(3);
169 |
170 | int total_count;
171 | CudaLaunchConfig config;
172 |
173 | // Initialize output_grad with all zeros.
174 | total_count = batch * height * width;
175 | if (total_count == 0) return;
176 | config = GetCudaLaunchConfig(total_count, d);
177 | SetZero<<>>(
178 | config.virtual_thread_count, output_grad.data());
179 |
180 | // Accumulate.
181 | total_count = batch * height * width;
182 | config = GetCudaLaunchConfig(total_count, d);
183 | BackwardWarpGradKernel
184 | <<>>(
185 | config.virtual_thread_count, input_grad.data(),
186 | input_images.data(), flows.data(),
187 | batch, height, width, channels,
188 | output_grad.data());
189 | }
190 |
191 |
192 | #endif // GOOGLE_CUDA
193 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/correlation_op.cc:
--------------------------------------------------------------------------------
1 | #define EIGEN_USE_THREADS
2 |
3 | #include
4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
5 | #include "tensorflow/core/framework/op_kernel.h"
6 | #include "tensorflow/core/framework/register_types.h"
7 | #include "tensorflow/core/framework/tensor.h"
8 | #include "tensorflow/core/framework/tensor_shape.h"
9 | #include "tensorflow/core/framework/types.h"
10 | #include "tensorflow/core/lib/core/status.h"
11 | #include "tensorflow/core/platform/logging.h"
12 | #include "tensorflow/core/framework/op.h"
13 | #include "tensorflow/core/framework/shape_inference.h"
14 | #include "tensorflow/core/framework/common_shape_fns.h"
15 |
16 | #include "correlation_op.h"
17 |
18 | typedef Eigen::GpuDevice GPUDevice;
19 |
20 | using namespace tensorflow;
21 |
22 | void Correlation(const GPUDevice& d,
23 | typename TTypes::ConstTensor input_0,
24 | typename TTypes::ConstTensor input_1,
25 | typename TTypes::Tensor output,
26 | typename TTypes::Tensor padded_0,
27 | typename TTypes::Tensor padded_1,
28 | CorrelationState params);
29 |
30 | void CorrelationGrad(const GPUDevice& d,
31 | typename TTypes::ConstTensor input_grad,
32 | typename TTypes::ConstTensor padded_0,
33 | typename TTypes::ConstTensor padded_1,
34 | typename TTypes::Tensor output_grad_0,
35 | typename TTypes::Tensor output_grad_1,
36 | CorrelationState params);
37 |
38 | class CorrelationOp : public OpKernel {
39 | public:
40 | explicit CorrelationOp(OpKernelConstruction* context)
41 | : OpKernel(context), attrs(context) {}
42 |
43 | void Compute(OpKernelContext* context) override {
44 | const Tensor& input_0 = context->input(0);
45 | const Tensor& input_1 = context->input(1);
46 |
47 | OP_REQUIRES(context, input_0.shape() == input_1.shape(),
48 | errors::InvalidArgument("Input shapes have to be the same"));
49 |
50 | typename TTypes::ConstTensor input_0_data = input_0.tensor();
51 | typename TTypes::ConstTensor input_1_data = input_1.tensor();
52 |
53 | const int batch = input_0_data.dimension(0);
54 | const int in_channels = input_0_data.dimension(1);
55 | const int in_height = input_0_data.dimension(2);
56 | const int in_width = input_0_data.dimension(3);
57 |
58 | CorrelationState st(attrs, in_height, in_width, in_channels);
59 |
60 | OP_REQUIRES(context, st.out_width * st.out_height > 0,
61 | errors::InvalidArgument("Invalid correlation settings"));
62 |
63 | Tensor* output = NULL;
64 | TensorShape output_shape({batch, st.out_channels, st.out_height, st.out_width});
65 | OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
66 |
67 | Tensor* padded_0 = NULL;
68 | Tensor* padded_1 = NULL;
69 | TensorShape padded_shape({batch, st.padded_height, st.padded_width, in_channels});
70 | OP_REQUIRES_OK(context, context->allocate_output(1, padded_shape, &padded_0));
71 | OP_REQUIRES_OK(context, context->allocate_output(2, padded_shape, &padded_1));
72 |
73 | typename TTypes::Tensor output_data = output->tensor();
74 | typename TTypes::Tensor padded_0_data = padded_0->tensor();
75 | typename TTypes::Tensor padded_1_data = padded_1->tensor();
76 |
77 | Correlation(context->eigen_device(),
78 | input_0_data, input_1_data, output_data,
79 | padded_0_data, padded_1_data,
80 | st);
81 | }
82 |
83 | private:
84 | CorrelationAttrs attrs;
85 | };
86 |
87 | class CorrelationOpGrad : public OpKernel {
88 | public:
89 | explicit CorrelationOpGrad(OpKernelConstruction* context)
90 | : OpKernel(context), attrs(context) {}
91 |
92 | void Compute(OpKernelContext* context) override {
93 | const Tensor& input_grad = context->input(0);
94 | const Tensor& input_0 = context->input(1);
95 | const Tensor& input_1 = context->input(2);
96 | const Tensor& padded_0 = context->input(3);
97 | const Tensor& padded_1 = context->input(4);
98 |
99 | typename TTypes::ConstTensor input_grad_data = input_grad.tensor();
100 | typename TTypes::ConstTensor input_0_data = input_0.tensor();
101 | //typename TTypes::ConstTensor input_1_data = input_1.tensor();
102 | typename TTypes::ConstTensor padded_0_data = padded_0.tensor();
103 | typename TTypes::ConstTensor padded_1_data = padded_1.tensor();
104 |
105 | const int in_channels = input_0_data.dimension(1);
106 | const int in_height = input_0_data.dimension(2);
107 | const int in_width = input_0_data.dimension(3);
108 |
109 | CorrelationState st(attrs, in_height, in_width, in_channels);
110 |
111 | Tensor* output_grad_0 = NULL;
112 | OP_REQUIRES_OK(context, context->allocate_output(0, input_0.shape(),
113 | &output_grad_0));
114 | Tensor* output_grad_1 = NULL;
115 | OP_REQUIRES_OK(context, context->allocate_output(1, input_0.shape(),
116 | &output_grad_1));
117 |
118 | typename TTypes::Tensor output_grad_0_data = output_grad_0->tensor();
119 | typename TTypes::Tensor output_grad_1_data = output_grad_1->tensor();
120 |
121 | CorrelationGrad(context->eigen_device(),
122 | input_grad_data,
123 | padded_0_data, padded_1_data,
124 | output_grad_0_data, output_grad_1_data,
125 | st);
126 | }
127 | private:
128 | CorrelationAttrs attrs;
129 | };
130 |
131 | using shape_inference::DimensionHandle;;
132 |
133 | REGISTER_OP("Correlation")
134 | .Input("input_0: float")
135 | .Input("input_1: float")
136 | .Attr("kernel_size: int = 1")
137 | .Attr("max_displacement: int = 20")
138 | .Attr("pad: int = 20")
139 | .Attr("stride_1: int = 1")
140 | .Attr("stride_2: int = 2")
141 | .Output("correlation: float")
142 | .Output("padded_0: float")
143 | .Output("padded_1: float")
144 | .SetShapeFn([](shape_inference::InferenceContext* c) {
145 | CorrelationAttrs attrs;
146 | c->GetAttr("kernel_size", &attrs.kernel_size);
147 | c->GetAttr("max_displacement", &attrs.max_displacement);
148 | c->GetAttr("pad", &attrs.pad_size);
149 | c->GetAttr("stride_1", &attrs.stride_1);
150 | c->GetAttr("stride_2", &attrs.stride_2);
151 |
152 | DimensionHandle batch = c->Dim(c->input(0), 0);
153 |
154 | //padded_height = in_height + 2 * pad_size;
155 | //padded_width = in_width + 2 * pad_size;
156 | //kernel_radius = (kernel_size - 1) / 2;
157 | //border_size = max_displacement + kernel_radius;
158 | int neighborhood_grid_radius = attrs.max_displacement / attrs.stride_2;
159 | int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
160 | //out_width = ceil((float)(padded_width - border_size *2) / (float)stride_1);
161 | //out_height = ceil((float)(padded_height - border_size *2) / (float)stride_1);
162 | int out_channels = neighborhood_grid_width * neighborhood_grid_width;
163 |
164 | // TODO: support passing on output width and height
165 |
166 | c->set_output(0, c->MakeShape({batch, out_channels, c->UnknownDim(), c->UnknownDim()}));
167 | return Status::OK();
168 | });
169 |
170 | REGISTER_OP("CorrelationGrad")
171 | .Input("input_grad: float")
172 | .Input("original_input_0: float")
173 | .Input("original_input_1: float")
174 | .Input("padded_0: float")
175 | .Input("padded_1: float")
176 | .Attr("kernel_size: int = 1")
177 | .Attr("max_displacement: int = 20")
178 | .Attr("pad: int = 20")
179 | .Attr("stride_1: int = 1")
180 | .Attr("stride_2: int = 2")
181 | .Output("output_grad_0: float")
182 | .Output("output_grad_1: float")
183 | .SetShapeFn([](shape_inference::InferenceContext* c) {
184 | c->set_output(0, c->input(1));
185 | c->set_output(1, c->input(2));
186 | return Status::OK();
187 | });
188 |
189 | #if GOOGLE_CUDA
190 |
191 | REGISTER_KERNEL_BUILDER(Name("Correlation").Device(DEVICE_GPU), CorrelationOp);
192 | REGISTER_KERNEL_BUILDER(Name("CorrelationGrad").Device(DEVICE_GPU), CorrelationOpGrad);
193 |
194 | #endif // GOOGLE_CUDA
195 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/correlation_op.h:
--------------------------------------------------------------------------------
1 | #define EIGEN_USE_THREADS
2 |
3 | #include "tensorflow/core/framework/op_kernel.h"
4 | #include "tensorflow/core/framework/op.h"
5 |
6 | using namespace tensorflow;
7 |
8 | struct CorrelationAttrs {
9 | CorrelationAttrs(OpKernelConstruction* c) {
10 | OP_REQUIRES_OK(c, c->GetAttr("kernel_size", &kernel_size));
11 | OP_REQUIRES_OK(c, c->GetAttr("max_displacement", &max_displacement));
12 | OP_REQUIRES_OK(c, c->GetAttr("pad", &pad_size));
13 | OP_REQUIRES_OK(c, c->GetAttr("stride_1", &stride_1));
14 | OP_REQUIRES_OK(c, c->GetAttr("stride_2", &stride_2));
15 |
16 | OP_REQUIRES(c, kernel_size % 2 != 0,
17 | errors::InvalidArgument("kernel_size must be odd"));
18 | }
19 | CorrelationAttrs() {}
20 |
21 | int pad_size;
22 | int stride_1;
23 | int stride_2;
24 | int max_displacement;
25 | int kernel_size;
26 | };
27 |
28 | struct CorrelationState {
29 | CorrelationState(CorrelationAttrs attrs, int in_height, int in_width, int in_channels) {
30 | pad_size = attrs.pad_size;
31 | stride_1 = attrs.stride_1;
32 | stride_2 = attrs.stride_2;
33 | max_displacement = attrs.max_displacement;
34 | kernel_size = attrs.kernel_size;
35 |
36 | padded_height = in_height + 2 * pad_size;
37 | padded_width = in_width + 2 * pad_size;
38 |
39 | // Compute size of unreachable border region (on each side)
40 | kernel_radius = (kernel_size - 1) / 2;
41 | border_size = max_displacement + kernel_radius;
42 |
43 | // Given a center position in image 1, how many displaced positions in -x / +x
44 | // direction do we consider in image 2 (neighborhoodGridWidth):
45 | neighborhood_grid_radius = max_displacement / stride_2;
46 | neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
47 |
48 | out_width = ceil((float)(padded_width - border_size *2) / (float)stride_1);
49 | out_height = ceil((float)(padded_height - border_size *2) / (float)stride_1);
50 | // Top Channels amount to displacement combinations in X and Y direction:
51 | out_channels = neighborhood_grid_width * neighborhood_grid_width;
52 | }
53 |
54 | int pad_size;
55 | int stride_1;
56 | int stride_2;
57 | int kernel_radius;
58 | int max_displacement;
59 | int kernel_size;
60 | int neighborhood_grid_radius;
61 | int neighborhood_grid_width;
62 | int padded_height;
63 | int padded_width;
64 | int border_size;
65 | int out_height;
66 | int out_width;
67 | int out_channels;
68 | };
69 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/downsample_op.cc:
--------------------------------------------------------------------------------
1 | #define EIGEN_USE_THREADS
2 |
3 | #include
4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
5 | #include "tensorflow/core/framework/op_kernel.h"
6 | #include "tensorflow/core/framework/register_types.h"
7 | #include "tensorflow/core/framework/tensor.h"
8 | #include "tensorflow/core/framework/tensor_shape.h"
9 | #include "tensorflow/core/framework/types.h"
10 | #include "tensorflow/core/lib/core/status.h"
11 | #include "tensorflow/core/platform/logging.h"
12 | #include "tensorflow/core/framework/op.h"
13 | #include "tensorflow/core/framework/shape_inference.h"
14 | #include "tensorflow/core/framework/common_shape_fns.h"
15 |
16 | using namespace tensorflow;
17 |
18 | typedef Eigen::GpuDevice GPUDevice;
19 |
20 | void Downsample(const GPUDevice& d,
21 | typename TTypes::ConstTensor images,
22 | typename TTypes::Tensor output);
23 |
24 | class DownsampleOp : public OpKernel {
25 | public:
26 | explicit DownsampleOp(OpKernelConstruction* c) : OpKernel(c) {
27 | OP_REQUIRES_OK(c, c->GetAttr("scale", &scale));
28 | }
29 |
30 | void Compute(OpKernelContext* context) override {
31 | const Tensor& input = context->input(0);
32 |
33 | typename TTypes::ConstTensor input_data = input.tensor();
34 |
35 |
36 | OP_REQUIRES(context,
37 | input_data.dimension(1) % scale == 0 &&
38 | input_data.dimension(2) % scale == 0,
39 | errors::InvalidArgument("Input height and width must be divisible by scale"));
40 |
41 | const int batch = input_data.dimension(0);
42 | const int height = input_data.dimension(1) / scale;
43 | const int width = input_data.dimension(2) / scale;
44 | const int channels = input_data.dimension(3);
45 |
46 | auto output_shape = TensorShape({batch, height, width, channels});
47 |
48 | Tensor* output = NULL;
49 | OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
50 |
51 | typename TTypes::Tensor output_data = output->tensor();
52 |
53 | Downsample(context->eigen_device(), input_data, output_data);
54 | }
55 | private:
56 | int scale;
57 | };
58 |
59 | using shape_inference::DimensionHandle;
60 | using shape_inference::ShapeHandle;
61 |
62 | REGISTER_OP("Downsample")
63 | .Input("images: float")
64 | .Attr("scale: int = 2")
65 | .Output("out_images: float")
66 | .SetShapeFn([](shape_inference::InferenceContext* c) {
67 | ShapeHandle in = c->input(0);
68 | int scale;
69 | DimensionHandle batch = c->Dim(in, 0);
70 | DimensionHandle channels = c->Dim(in, 3);
71 | DimensionHandle height;
72 | DimensionHandle width;
73 |
74 | c->GetAttr("scale", &scale);
75 | c->Divide(c->Dim(in, 1), scale, true, &height);
76 | c->Divide(c->Dim(in, 2), scale, true, &width);
77 |
78 | c->set_output(0, c->MakeShape({batch, height, width, channels}));
79 | return Status::OK();
80 | });
81 |
82 | #if GOOGLE_CUDA
83 |
84 | REGISTER_KERNEL_BUILDER(Name("Downsample").Device(DEVICE_GPU), DownsampleOp);
85 |
86 | #endif // GOOGLE_CUDA
87 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/downsample_op.cu.cc:
--------------------------------------------------------------------------------
1 | #if GOOGLE_CUDA
2 |
3 | #define EIGEN_USE_GPU
4 |
5 | #include "tensorflow/core/framework/register_types.h"
6 | #include "tensorflow/core/framework/tensor_types.h"
7 | #include "tensorflow/core/platform/types.h"
8 | #include "tensorflow/core/util/cuda_kernel_helper.h"
9 |
10 | using namespace tensorflow;
11 |
12 | typedef Eigen::GpuDevice GPUDevice;
13 |
14 | __global__ void DownsampleKernel(const int32 nthreads,
15 | const float* images,
16 | int batch, int in_height, int in_width, int channels,
17 | int out_height, int out_width,
18 | float* output) {
19 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
20 | // out_idx = x + out_width * (y + out_height * b)
21 | int idx = out_idx;
22 | const int c = idx % channels;
23 | idx /= channels;
24 | const int x = idx % out_width;
25 | idx /= out_width;
26 | const int y = idx % out_height;
27 | const int b = idx / out_height;
28 |
29 | const int scale_y = in_height / out_height;
30 | const int scale_x = in_width/ out_width;
31 |
32 | const int min_in_y = y * scale_y;
33 | const int min_in_x = x * scale_x;
34 | const int max_in_y = min_in_y + scale_y;
35 | const int max_in_x = min_in_x + scale_x;
36 |
37 | float sum = 0.0;
38 |
39 | for(int in_y = min_in_y; in_y < max_in_y; ++in_y) {
40 | for(int in_x = min_in_x; in_x < max_in_x; ++in_x) {
41 | sum += images[c + channels * (in_x + in_width * (in_y + in_height * b))];
42 | }
43 | }
44 |
45 | sum /= scale_x * scale_y;
46 | output[c + channels * (x + out_width * (y + out_height * b))] = sum;
47 | }
48 | }
49 |
50 | void Downsample(const GPUDevice& d,
51 | typename TTypes::ConstTensor images,
52 | typename TTypes::Tensor output) {
53 | const int batch = images.dimension(0);
54 | const int in_height = images.dimension(1);
55 | const int in_width = images.dimension(2);
56 | const int channels = images.dimension(3);
57 |
58 | const int out_height = output.dimension(1);
59 | const int out_width = output.dimension(2);
60 |
61 | const int total_count = batch * out_height * out_width * channels;
62 | if (total_count == 0) return;
63 |
64 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
65 | DownsampleKernel
66 | <<>>(
67 | config.virtual_thread_count, images.data(),
68 | batch, in_height, in_width, channels,
69 | out_height, out_width,
70 | output.data());
71 | }
72 |
73 | #endif // GOOGLE_CUDA
74 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/forward_warp_op.cc:
--------------------------------------------------------------------------------
1 | #define EIGEN_USE_THREADS
2 |
3 | #include
4 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
5 | #include "tensorflow/core/framework/op_kernel.h"
6 | #include "tensorflow/core/framework/register_types.h"
7 | #include "tensorflow/core/framework/tensor.h"
8 | #include "tensorflow/core/framework/tensor_shape.h"
9 | #include "tensorflow/core/framework/types.h"
10 | #include "tensorflow/core/lib/core/status.h"
11 | #include "tensorflow/core/platform/logging.h"
12 | #include "tensorflow/core/framework/op.h"
13 | #include "tensorflow/core/framework/shape_inference.h"
14 | #include "tensorflow/core/framework/common_shape_fns.h"
15 |
16 | // TODO assert input flow channel count = 2, assert matching numbers in all other dims
17 |
18 | typedef Eigen::ThreadPoolDevice CPUDevice;
19 | typedef Eigen::GpuDevice GPUDevice;
20 |
21 | using namespace tensorflow;
22 |
23 | void ForwardWarp(const GPUDevice& d,
24 | typename TTypes::ConstTensor input,
25 | typename TTypes::Tensor output);
26 |
27 | void ForwardWarpGrad(const GPUDevice& d,
28 | typename TTypes::ConstTensor input_grad,
29 | typename TTypes::ConstTensor original_input,
30 | typename TTypes::Tensor output_grad);
31 |
32 | class ForwardWarpOp : public OpKernel {
33 | public:
34 | explicit ForwardWarpOp(OpKernelConstruction* context) : OpKernel(context) {}
35 |
36 | void Compute(OpKernelContext* context) override {
37 | const Tensor& input = context->input(0);
38 |
39 | typename TTypes::ConstTensor input_data = input.tensor();
40 |
41 | const int batch = input_data.dimension(0);
42 | const int height = input_data.dimension(1);
43 | const int width = input_data.dimension(2);
44 | const int channels = input_data.dimension(3);
45 |
46 | auto output_shape = TensorShape({batch, height, width, 1});
47 | Tensor* output = NULL;
48 | OP_REQUIRES_OK(context, context->allocate_output(0, output_shape,
49 | &output));
50 | typename TTypes::Tensor output_data = output->tensor();
51 |
52 | ForwardWarp(context->eigen_device(),
53 | input_data, output_data);
54 | }
55 | };
56 |
57 | class ForwardWarpOpGrad : public OpKernel {
58 | public:
59 | explicit ForwardWarpOpGrad(OpKernelConstruction* context) : OpKernel(context) {}
60 |
61 | void Compute(OpKernelContext* context) override {
62 | const Tensor& input = context->input(0);
63 | const Tensor& original_input = context->input(1);
64 |
65 | Tensor* output = NULL;
66 | OP_REQUIRES_OK(context, context->allocate_output(0, original_input.shape(),
67 | &output));
68 |
69 | typename TTypes::ConstTensor input_data = input.tensor();
70 | typename TTypes::ConstTensor original_data = original_input.tensor();
71 | typename TTypes::Tensor output_data = output->tensor();
72 |
73 | ForwardWarpGrad(context->eigen_device(),
74 | input_data, original_data, output_data);
75 | }
76 | };
77 |
78 | using shape_inference::DimensionHandle;
79 | using shape_inference::ShapeHandle;
80 |
81 | REGISTER_OP("ForwardWarp")
82 | .Input("flows: float")
83 | .Output("output: float")
84 | .SetShapeFn([](shape_inference::InferenceContext* c) {
85 | ShapeHandle in = c->input(0);
86 | DimensionHandle batch = c->Dim(in, 0);
87 | DimensionHandle height = c->Dim(in, 1);
88 | DimensionHandle width = c->Dim(in, 2);
89 | c->set_output(0, c->MakeShape({batch, height, width, 1}));
90 | return Status::OK();
91 | });
92 |
93 | REGISTER_OP("ForwardWarpGrad")
94 | .Input("grads: float")
95 | .Input("original_flows: float")
96 | .Output("output: float")
97 | .SetShapeFn([](shape_inference::InferenceContext* c) {
98 | c->set_output(0, c->input(1));
99 | return Status::OK();
100 | });
101 |
102 | #if GOOGLE_CUDA
103 |
104 | REGISTER_KERNEL_BUILDER(Name("ForwardWarp").Device(DEVICE_GPU), ForwardWarpOp);
105 | REGISTER_KERNEL_BUILDER(Name("ForwardWarpGrad").Device(DEVICE_GPU), ForwardWarpOpGrad);
106 |
107 | #endif // GOOGLE_CUDA
108 |
--------------------------------------------------------------------------------
/core/UnFlow/ops/forward_warp_op.cu.cc:
--------------------------------------------------------------------------------
1 | #if GOOGLE_CUDA
2 |
3 | #define EIGEN_USE_GPU
4 |
5 | #include "tensorflow/core/framework/register_types.h"
6 | #include "tensorflow/core/framework/tensor_types.h"
7 | #include "tensorflow/core/platform/types.h"
8 | #include "tensorflow/core/util/cuda_kernel_helper.h"
9 |
10 | using namespace tensorflow;
11 |
12 | #define gauss(x, y, std)
13 |
14 | typedef Eigen::GpuDevice GPUDevice;
15 |
16 | __global__ void ForwardWarpKernel(const int32 nthreads,
17 | const float* flows,
18 | int batch, int height, int width,
19 | float* output) {
20 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
21 | // out_idx = x + width * (y + height * b)
22 | int idx = out_idx;
23 | const int src_x = idx % width;
24 | idx /= width;
25 | const int src_y = idx % height;
26 | const int b = idx / height;
27 |
28 | const int flow_index = out_idx * 2;
29 | const float target_x = src_x + flows[flow_index];
30 | const float target_y = src_y + flows[flow_index + 1];
31 |
32 | // Calculate distribution variance depending on similar neighbor flows
33 | // fixed variance for first tests!!
34 |
35 | // Compute valid neighbor range
36 | //int min_n_y = y + 2 > 0 ? floorf(pos_y) : 0;
37 |
38 | const float dist = 2.0;
39 | const float std = dist * 0.5;
40 | const int k = ceilf(dist + 2);
41 | // TODO variance different for x, y?
42 |
43 | // center pixel closest to mapping location
44 | //const int closest_x = roundf(target_x);
45 | //const int closest_y = roundf(target_y);
46 | if(floorf(target_x - k) < width && floorf(target_x + k) >= 0
47 | && floorf(target_y - k) < height && floorf(target_y + k) >= 0) {
48 | const int min_n_x = target_x - k > 0? floorf(target_x - k) : 0;
49 | const int min_n_y = target_y - k > 0? floorf(target_y - k) : 0;
50 | const int max_n_x = target_x + k < width? floorf(target_x + k) : width - 1;
51 | const int max_n_y = target_y + k < height? floorf(target_y + k) : height - 1;
52 |
53 | const float gauss_divisor = 2 * powf(std, 2);
54 | for(int n_x = min_n_x; n_x <= max_n_x; ++n_x) {
55 | for(int n_y = min_n_y; n_y <= max_n_y; ++n_y) {
56 | const float x = n_x - target_x;
57 | const float y = n_y - target_y;
58 | const float weight = expf(-(powf(x, 2) + powf(y, 2)) / gauss_divisor);
59 | CudaAtomicAdd(output + n_x + width * (n_y + height * b), weight);
60 | }
61 | }
62 | }
63 |
64 | }
65 | }
66 |
67 | __global__ void ForwardWarpGradKernel(const int32 nthreads,
68 | const float* input_grad, const float* flows,
69 | int batch, int height, int width,
70 | float* output_grad) {
71 | CUDA_1D_KERNEL_LOOP(in_idx, nthreads) {
72 | // in_idx = x + width * (y + height * b)
73 | int idx = in_idx;
74 | const int src_x = idx % width;
75 | idx /= width;
76 | const int src_y = idx % height;
77 | const int b = idx / height;
78 |
79 | const int flow_index = in_idx * 2;
80 | const float target_x = src_x + flows[flow_index];
81 | const float target_y = src_y + flows[flow_index + 1];
82 |
83 | // Calculate distribution variance depending on similar neighbor flows
84 | // fixed variance for first tests!!
85 |
86 | // Compute valid neighbor range
87 | //int min_n_y = y + 2 > 0 ? floorf(pos_y) : 0;
88 |
89 | const float dist = 2.0;
90 | const float std = dist * 0.5;
91 | const int k = ceilf(dist + 2);
92 | // TODO variance different for x, y?
93 |
94 | // center pixel closest to mapping location
95 | //const int closest_x = roundf(target_x);
96 | //const int closest_y = roundf(target_y);
97 | float du = 0.0;
98 | float dv = 0.0;
99 |
100 | if(floorf(target_x - k) < width && floorf(target_x + k) >= 0
101 | && floorf(target_y - k) < height && floorf(target_y + k) >= 0) {
102 | const int min_n_x = target_x - k > 0? floorf(target_x - k) : 0;
103 | const int min_n_y = target_y - k > 0? floorf(target_y - k) : 0;
104 | const int max_n_x = target_x + k < width? floorf(target_x + k) : width - 1;
105 | const int max_n_y = target_y + k < height? floorf(target_y + k) : height - 1;
106 |
107 | const float gauss_divisor = 2 * powf(std, 2);
108 | for(int n_x = min_n_x; n_x <= max_n_x; ++n_x) {
109 | for(int n_y = min_n_y; n_y <= max_n_y; ++n_y) {
110 | const float x = n_x - target_x;
111 | const float y = n_y - target_y;
112 | const float weight = expf(-(powf(x, 2) + powf(y, 2)) / gauss_divisor);
113 |
114 | const float din = input_grad[n_x + width * (n_y + height * b)];
115 | const float factor = 2 * din * weight / gauss_divisor;
116 | du += factor * x;
117 | dv += factor * y;
118 | }
119 | }
120 | }
121 |
122 | output_grad[flow_index] = du;
123 | output_grad[flow_index + 1] = dv;
124 | }
125 | }
126 |
127 | void ForwardWarp(const GPUDevice& d,
128 | typename TTypes::ConstTensor flows,
129 | typename TTypes::Tensor output) {
130 | const int batch = flows.dimension(0);
131 | const int height = flows.dimension(1);
132 | const int width = flows.dimension(2);
133 |
134 | const int total_count = batch * height * width;
135 | if (total_count == 0) return;
136 |
137 | CudaLaunchConfig config;
138 |
139 | // Initialize output with all zeros.
140 | config = GetCudaLaunchConfig(total_count, d);
141 | SetZero<<>>(
142 | config.virtual_thread_count, output.data());
143 |
144 | config = GetCudaLaunchConfig(total_count, d);
145 | ForwardWarpKernel
146 | <<>>(
147 | config.virtual_thread_count, flows.data(),
148 | batch, height, width,
149 | output.data());
150 | }
151 |
152 | void ForwardWarpGrad(const GPUDevice& d,
153 | typename TTypes::ConstTensor input_grad,
154 | typename TTypes::ConstTensor flows,
155 | typename TTypes::Tensor output_grad) {
156 | const int batch = input_grad.dimension(0);
157 | const int height = input_grad.dimension(1);
158 | const int width = input_grad.dimension(2);
159 |
160 | int total_count = batch * height * width;
161 | if (total_count == 0) return;
162 |
163 | // Initialize output_grad with all zeros.
164 | CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
165 | SetZero<<>>(
166 | config.virtual_thread_count, output_grad.data());
167 |
168 | // Accumulate.
169 | config = GetCudaLaunchConfig(total_count, d);
170 | ForwardWarpGradKernel
171 | <<>>(
172 | config.virtual_thread_count, input_grad.data(), flows.data(),
173 | batch, height, width,
174 | output_grad.data());
175 | }
176 |
177 | #endif // GOOGLE_CUDA
178 |
--------------------------------------------------------------------------------
/core/UnFlow/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .e2eflow import flownet
2 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import flownet
2 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .flownet import flownet
2 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/augment.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from .spatial_transformer import transformer
5 |
6 |
7 | def random_affine(tensors, *,
8 | max_translation_x=0.0, max_translation_y=0.0,
9 | max_rotation=0.0, min_scale=1.0, max_scale=1.0,
10 | horizontal_flipping=False):
11 | """Applies geometric augmentations to a list of tensors.
12 |
13 | Each element in the list is augmented in the same way.
14 | For all elements, num_batch must be equal while height, width and channels
15 | may differ.
16 | """
17 | def _deg2rad(deg):
18 | return (deg * np.pi) / 180.0
19 |
20 | with tf.variable_scope('random_affine'):
21 | num_batch = tf.shape(tensors[0])[0]
22 |
23 | zero = tf.zeros([num_batch])
24 | one = tf.ones([num_batch])
25 |
26 | tx = tf.random_uniform([num_batch], -max_translation_x, max_translation_x)
27 | ty = tf.random_uniform([num_batch], -max_translation_y, max_translation_y)
28 | rot = tf.random_uniform([num_batch], -max_rotation, max_rotation)
29 | rad = _deg2rad(rot)
30 | scale = tf.random_uniform([num_batch], min_scale, max_scale)
31 |
32 | t1 = [[tf.cos(rad), -tf.sin(rad), tx],
33 | [tf.sin(rad), tf.cos(rad), ty]]
34 | t1 = tf.transpose(t1, [2, 0, 1])
35 |
36 | scale_x = scale
37 | if horizontal_flipping:
38 | flip = tf.random_uniform([num_batch], 0, 1)
39 | flip = tf.where(tf.greater(flip, 0.5), -one, one)
40 | scale_x = scale_x * flip
41 |
42 | t2 = [[scale_x, zero, zero],
43 | [zero, scale, zero],
44 | [zero, zero, one]]
45 | t2 = tf.transpose(t2, [2, 0, 1])
46 |
47 | t = tf.matmul(t1, t2)
48 |
49 | out = []
50 | for tensor in tensors:
51 | shape = tf.shape(tensor)
52 | tensor = transformer(tensor, t, (shape[1], shape[2]))
53 | out.append(tf.stop_gradient(tensor))
54 | return out
55 |
56 |
57 | def random_photometric(ims, *,
58 | noise_stddev=0.0, min_contrast=0.0, max_contrast=0.0,
59 | brightness_stddev=0.0, min_colour=1.0, max_colour=1.0,
60 | min_gamma=1.0, max_gamma=1.0):
61 | """Applies photometric augmentations to a list of image batches.
62 |
63 | Each image in the list is augmented in the same way.
64 | For all elements, num_batch must be equal while height and width may differ.
65 |
66 | Args:
67 | ims: list of 3-channel image batches normalized to [0, 1].
68 | channel_mean: tensor of shape [3] which was used to normalize the pixel
69 | values ranging from 0 ... 255.
70 |
71 | Returns:
72 | Batch of normalized images with photometric augmentations. Has the same
73 | shape as the input batch.
74 | """
75 |
76 | with tf.variable_scope('random_photometric'):
77 | num_batch = tf.shape(ims[0])[0]
78 |
79 | contrast = tf.random_uniform([num_batch, 1], min_contrast, max_contrast)
80 | gamma = tf.random_uniform([num_batch, 1], min_gamma, max_gamma)
81 | gamma_inv = 1.0 / gamma
82 | colour = tf.random_uniform([num_batch, 3], min_colour, max_colour)
83 | if noise_stddev > 0.0:
84 | noise = tf.random_normal([num_batch, 1], stddev=noise_stddev)
85 | else:
86 | noise = tf.zeros([num_batch, 1])
87 | if brightness_stddev > 0.0:
88 | brightness = tf.random_normal([num_batch, 1],
89 | stddev=brightness_stddev)
90 | else:
91 | brightness = tf.zeros([num_batch, 1])
92 |
93 | out = []
94 | for im in ims:
95 | # Transpose to [height, width, num_batch, channels]
96 | im_re = tf.transpose(im, [1, 2, 0, 3])
97 | im_re = im_re
98 | im_re = (im_re * (contrast + 1.0) + brightness) * colour
99 | im_re = tf.maximum(0.0, tf.minimum(1.0, im_re))
100 | im_re = tf.pow(im_re, gamma_inv)
101 |
102 | im_re = im_re + noise
103 |
104 | # Subtract the mean again after clamping
105 | im_re = im_re
106 |
107 | im = tf.transpose(im_re, [2, 0, 1, 3])
108 | im = tf.stop_gradient(im)
109 | out.append(im)
110 | return out
111 |
112 |
113 | def random_crop(tensors, size, seed=None, name=None):
114 | """Randomly crops multiple tensors (of the same shape) to a given size.
115 |
116 | Each tensor is cropped in the same way."""
117 | with tf.name_scope(name, "random_crop", [size]) as name:
118 | size = tf.convert_to_tensor(size, dtype=tf.int32, name="size")
119 | if len(tensors) == 2:
120 | shape = tf.minimum(tf.shape(tensors[0]), tf.shape(tensors[1]))
121 | else:
122 | shape = tf.shape(tensors[0])
123 |
124 | limit = shape - size + 1
125 | offset = tf.random_uniform(
126 | tf.shape(shape),
127 | dtype=size.dtype,
128 | maxval=size.dtype.max,
129 | seed=seed) % limit
130 | results = []
131 | for tensor in tensors:
132 | result = tf.slice(tensor, offset, size)
133 | results.append(result)
134 | return results
135 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/data.py:
--------------------------------------------------------------------------------
1 | """Utility functions for providing data directories."""
2 | import os
3 | import sys
4 | import zipfile
5 | import rarfile
6 | from urllib.request import FancyURLopener
7 | import shutil
8 |
9 | import numpy as np
10 | import matplotlib.image as mpimg
11 |
12 |
13 | class Data():
14 | # Should be a list containing all subdirectories of the main data dir which
15 | # belong to this dataset
16 | dirs = None
17 |
18 | def __init__(self, data_dir, stat_log_dir,
19 | development=True, fast_dir=None):
20 | self.development = development
21 | self.data_dir = data_dir
22 | self.stat_log_dir = stat_log_dir
23 | if not os.path.isdir(data_dir):
24 | os.makedirs(data_dir)
25 |
26 | self._fetch_if_missing()
27 |
28 | self.fast_dir = fast_dir
29 | if fast_dir:
30 | print(">> Copying files to {}".format(fast_dir))
31 | for d in self.dirs:
32 | src = os.path.join(data_dir, d)
33 | dst = os.path.join(fast_dir, d)
34 | if not os.path.isdir(dst):
35 | shutil.copytree(src, dst)
36 | print(">> Copied {}".format(d))
37 | self.current_dir = fast_dir
38 | else:
39 | self.current_dir = data_dir
40 |
41 | if stat_log_dir:
42 | self.stat_log_file = os.path.join(stat_log_dir,
43 | self.__class__.__name__ + ".txt")
44 | self._ensure_statistics()
45 |
46 | def __del__(self):
47 | pass
48 | #if self.fast_dir:
49 | # print(">> Removing files from {}".format(self.fast_dir))
50 | # for d in self.dirs:
51 | # shutil.rmtree(os.path.join(self.fast_dir, d))
52 |
53 | def clear_statistics(self):
54 | """Delete saved statistics file if present."""
55 | if self.stat_log_dir and os.path.isfile(self.stat_log_file):
56 | os.remove(self.stat_log_file)
57 |
58 | def _ensure_statistics(self):
59 | """Make sure we know the dataset statistics."""
60 | if os.path.isfile(self.stat_log_file):
61 | vals = np.loadtxt(self.stat_log_file)
62 | self.mean = vals[0]
63 | self.stddev = vals[1]
64 | else:
65 | print(">> Computing statistics (mean, variance) for {}"
66 | .format(self.__class__.__name__))
67 | mean, stddev = self.compute_statistics(self.get_raw_files())
68 | self.mean = mean
69 | self.stddev = stddev
70 | os.makedirs(self.stat_log_dir, exist_ok=True)
71 | np.savetxt(self.stat_log_file, [mean, stddev])
72 | print(">> Statistics complete")
73 |
74 | def get_raw_dirs(self):
75 | """Should return a list of all dirs containing training images.
76 |
77 | Note: self.current_dir should be used for loading input data.
78 | """
79 | raise NotImplementedError()
80 |
81 | def get_raw_files(self):
82 | files = []
83 | for d in self.get_raw_dirs():
84 | for path in os.listdir(d):
85 | files.append(os.path.join(d, path))
86 | return files
87 |
88 | def _fetch_if_missing(self):
89 | """A call to this must make subsequent calls to get_raw_files succeed.
90 | All subdirs of data_dir listed in self.dirs must exist after this call.
91 | """
92 | raise NotImplementedError()
93 |
94 | def _download_and_extract(self, url, extract_to, ext='zip'):
95 | def _progress(count, block_size, total_size):
96 | if total_size > 0:
97 | print('\r>> Downloading %s %.1f%%' % (url,
98 | float(count * block_size) / float(total_size) * 100.0), end=' ')
99 | else:
100 | print('\r>> Downloading %s' % (url), end=' ')
101 | sys.stdout.flush()
102 | urlretrieve = FancyURLopener().retrieve
103 | local_zip_path = os.path.join(self.data_dir, 'tmp.' + ext)
104 | urlretrieve(url, local_zip_path, _progress)
105 | sys.stdout.write("\n>> Finished downloading. Unzipping...\n")
106 | if ext == 'zip':
107 | with zipfile.ZipFile(local_zip_path, "r") as zip_ref:
108 | zip_ref.extractall(extract_to)
109 | else:
110 | with rarfile.RarFile(local_zip_path, "r") as zip_ref:
111 | zip_ref.extractall(extract_to)
112 |
113 | sys.stdout.write(">> Finished unzipping.\n")
114 | os.remove(local_zip_path)
115 |
116 | self.clear_statistics()
117 |
118 | def compute_statistics(self, files):
119 | """Use welford's method to compute mean and variance of the given
120 | dataset.
121 |
122 | See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm."""
123 |
124 | assert len(files) > 1
125 |
126 | n = 0
127 | mean = np.zeros(3)
128 | M2 = np.zeros(3)
129 | for j, filename in enumerate(files):
130 | #TODO ensure the pixel values are 0..255
131 | im = np.reshape(mpimg.imread(filename) * 255, [-1, 3])
132 | for i in range(np.shape(im)[1]):
133 | n = n + 1
134 | delta = im[i] - mean
135 | mean += delta / n
136 | M2 += delta * (im[i] - mean)
137 | sys.stdout.write('\r>> Processed %.1f%%' % (
138 | float(j) / float(len(files)) * 100.0))
139 | sys.stdout.flush()
140 | var = M2 / (n - 1)
141 | stddev = np.sqrt(var)
142 | return np.float32(mean), np.float32(stddev)
143 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/flow_util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 |
5 | def atan2(y, x):
6 | angle = tf.where(tf.greater(x,0.0), tf.atan(y/x), tf.zeros_like(x))
7 | angle = tf.where(tf.logical_and(tf.less(x,0.0), tf.greater_equal(y,0.0)),
8 | tf.atan(y/x) + np.pi, angle)
9 | angle = tf.where(tf.logical_and(tf.less(x,0.0), tf.less(y,0.0)),
10 | tf.atan(y/x) - np.pi, angle)
11 | angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.greater(y,0.0)),
12 | np.pi * tf.ones_like(x), angle)
13 | angle = tf.where(tf.logical_and(tf.equal(x,0.0), tf.less(y,0.0)),
14 | -np.pi * tf.ones_like(x), angle)
15 | angle = tf.where(tf.logical_and(tf.equal(x,0.0),tf.equal(y,0.0)),
16 | np.nan * tf.zeros_like(x), angle)
17 | return angle
18 |
19 |
20 | def flow_to_color(flow, mask=None, max_flow=None):
21 | """Converts flow to 3-channel color image.
22 |
23 | Args:
24 | flow: tensor of shape [num_batch, height, width, 2].
25 | mask: flow validity mask of shape [num_batch, height, width, 1].
26 | """
27 | n = 8
28 | num_batch, height, width, _ = tf.unstack(tf.shape(flow))
29 | mask = tf.ones([num_batch, height, width, 1]) if mask is None else mask
30 | flow_u, flow_v = tf.unstack(flow, axis=3)
31 | if max_flow is not None:
32 | max_flow = tf.maximum(max_flow, 1)
33 | else:
34 | max_flow = tf.reduce_max(tf.abs(flow * mask))
35 | mag = tf.sqrt(tf.reduce_sum(tf.square(flow), 3))
36 | angle = atan2(flow_v, flow_u)
37 |
38 | im_h = tf.mod(angle / (2 * np.pi) + 1.0, 1.0)
39 | im_s = tf.clip_by_value(mag * n / max_flow, 0, 1)
40 | im_v = tf.clip_by_value(n - im_s, 0, 1)
41 | im_hsv = tf.stack([im_h, im_s, im_v], 3)
42 | im = tf.image.hsv_to_rgb(im_hsv)
43 | return im * mask
44 |
45 |
46 | def flow_error_image(flow_1, flow_2, mask_occ, mask_noc=None, log_colors=True):
47 | """Visualize the error between two flows as 3-channel color image.
48 |
49 | Adapted from the KITTI C++ devkit.
50 |
51 | Args:
52 | flow_1: first flow of shape [num_batch, height, width, 2].
53 | flow_2: second flow (ground truth)
54 | mask_occ: flow validity mask of shape [num_batch, height, width, 1].
55 | Equals 1 at (occluded and non-occluded) valid pixels.
56 | mask_noc: Is 1 only at valid pixels which are not occluded.
57 | """
58 | mask_noc = tf.ones(tf.shape(mask_occ)) if mask_noc is None else mask_noc
59 | diff_sq = (flow_1 - flow_2) ** 2
60 | diff = tf.sqrt(tf.reduce_sum(diff_sq, [3], keep_dims=True))
61 | if log_colors:
62 | num_batch, height, width, _ = tf.unstack(tf.shape(flow_1))
63 | colormap = [
64 | [0,0.0625,49,54,149],
65 | [0.0625,0.125,69,117,180],
66 | [0.125,0.25,116,173,209],
67 | [0.25,0.5,171,217,233],
68 | [0.5,1,224,243,248],
69 | [1,2,254,224,144],
70 | [2,4,253,174,97],
71 | [4,8,244,109,67],
72 | [8,16,215,48,39],
73 | [16,1000000000.0,165,0,38]]
74 | colormap = np.asarray(colormap, dtype=np.float32)
75 | colormap[:, 2:5] = colormap[:, 2:5] / 255
76 | mag = tf.sqrt(tf.reduce_sum(tf.square(flow_2), 3, keep_dims=True))
77 | error = tf.minimum(diff / 3, 20 * diff / mag)
78 | im = tf.zeros([num_batch, height, width, 3])
79 | for i in range(colormap.shape[0]):
80 | colors = colormap[i, :]
81 | cond = tf.logical_and(tf.greater_equal(error, colors[0]),
82 | tf.less(error, colors[1]))
83 | im = tf.where(tf.tile(cond, [1, 1, 1, 3]),
84 | tf.ones([num_batch, height, width, 1]) * colors[2:5],
85 | im)
86 | im = tf.where(tf.tile(tf.cast(mask_noc, tf.bool), [1, 1, 1, 3]),
87 | im, im * 0.5)
88 | im = im * mask_occ
89 | else:
90 | error = (tf.minimum(diff, 5) / 5) * mask_occ
91 | im_r = error # errors in occluded areas will be red
92 | im_g = error * mask_noc
93 | im_b = error * mask_noc
94 | im = tf.concat(axis=3, values=[im_r, im_g, im_b])
95 | return im
96 |
97 |
98 | def flow_error_avg(flow_1, flow_2, mask):
99 | """Evaluates the average endpoint error between flow batches."""
100 | with tf.variable_scope('flow_error_avg'):
101 | diff = euclidean(flow_1 - flow_2) * mask
102 | error = tf.reduce_sum(diff) / tf.reduce_sum(mask)
103 | return error
104 |
105 |
106 | def outlier_ratio(gt_flow, flow, mask, threshold=3.0, relative=0.05):
107 | diff = euclidean(gt_flow - flow) * mask
108 | if relative is not None:
109 | threshold = tf.maximum(threshold, euclidean(gt_flow) * relative)
110 | outliers = tf.cast(tf.greater_equal(diff, threshold), tf.float32)
111 | else:
112 | outliers = tf.cast(tf.greater_equal(diff, threshold), tf.float32)
113 | ratio = tf.reduce_sum(outliers) / tf.reduce_sum(mask)
114 | return ratio
115 |
116 |
117 | def outlier_pct(gt_flow, flow, mask, threshold=3.0, relative=0.05):
118 | frac = outlier_ratio(gt_flow, flow, mask, threshold, relative) * 100
119 | return frac
120 |
121 |
122 | def euclidean(t):
123 | return tf.sqrt(tf.reduce_sum(t ** 2, [3], keep_dims=True))
124 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/flownet.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.contrib.slim as slim
3 | import tensorflow.contrib.layers as layers
4 |
5 | from ..ops import correlation
6 | from .image_warp import image_warp
7 |
8 | from .flow_util import flow_to_color
9 |
10 |
11 | FLOW_SCALE = 5.0
12 |
13 | # (Yuliang) Add reuse_variables for forward flow
14 | def flownet(im1, im2, flownet_spec='S', full_resolution=False, train_all=False,
15 | backward_flow=False, reuse=False):
16 | num_batch, height, width, _ = tf.unstack(tf.shape(im1))
17 | flownet_num = len(flownet_spec)
18 | assert flownet_num > 0
19 | flows_fw = []
20 | flows_bw = []
21 | for i, name in enumerate(flownet_spec):
22 | assert name in ('C', 'c', 'S', 's')
23 | channel_mult = 1 if name in ('C', 'S') else 3 / 8
24 | full_res = full_resolution and i == flownet_num - 1
25 |
26 | def scoped_block(reuse=False):
27 | if name.lower() == 'c':
28 | assert i == 0, 'FlowNetS must be used for refinement networks'
29 |
30 | with tf.variable_scope('flownet_c_features') as scope:
31 | # (Yuliang)
32 | if reuse:
33 | scope.reuse_variables()
34 | _, conv2_a, conv3_a = flownet_c_features(im1, channel_mult=channel_mult)
35 | _, conv2_b, conv3_b = flownet_c_features(im2, channel_mult=channel_mult, reuse=True)
36 |
37 | with tf.variable_scope('flownet_c') as scope:
38 | # (Yuliang)
39 | if reuse:
40 | scope.reuse_variables()
41 | flow_fw = flownet_c(conv3_a, conv3_b, conv2_a,
42 | full_res=full_res,
43 | channel_mult=channel_mult)
44 | flows_fw.append(flow_fw)
45 | if backward_flow:
46 | scope.reuse_variables()
47 | flow_bw = flownet_c(conv3_b, conv3_a, conv2_b,
48 | full_res=full_res,
49 | channel_mult=channel_mult)
50 | flows_bw.append(flow_bw)
51 | elif name.lower() == 's':
52 | def _flownet_s(im1, im2, flow=None):
53 | if flow is not None:
54 | flow = tf.image.resize_bilinear(flow, [height, width]) * 4 * FLOW_SCALE
55 | warp = image_warp(im2, flow)
56 | diff = tf.abs(warp - im1)
57 | if not train_all:
58 | flow = tf.stop_gradient(flow)
59 | warp = tf.stop_gradient(warp)
60 | diff = tf.stop_gradient(diff)
61 |
62 | inputs = tf.concat([im1, im2, flow, warp, diff], axis=3)
63 | inputs = tf.reshape(inputs, [num_batch, height, width, 14])
64 | else:
65 | inputs = tf.concat([im1, im2], 3)
66 | return flownet_s(inputs,
67 | full_res=full_res,
68 | channel_mult=channel_mult)
69 | stacked = len(flows_fw) > 0
70 | with tf.variable_scope('flownet_s') as scope:
71 | # (Yuliang)
72 | if reuse:
73 | scope.reuse_variables()
74 | flow_fw = _flownet_s(im1, im2, flows_fw[-1][0] if stacked else None)
75 | flows_fw.append(flow_fw)
76 | if backward_flow:
77 | scope.reuse_variables()
78 | flow_bw = _flownet_s(im2, im1, flows_bw[-1][0] if stacked else None)
79 | flows_bw.append(flow_bw)
80 |
81 | if i > 0:
82 | scope_name = "stack_{}_flownet".format(i)
83 | with tf.variable_scope(scope_name):
84 | scoped_block(reuse)
85 | else:
86 | scoped_block(reuse)
87 |
88 | if backward_flow:
89 | return flows_fw, flows_bw
90 | return flows_fw
91 |
92 |
93 | def _leaky_relu(x):
94 | with tf.variable_scope('leaky_relu'):
95 | return tf.maximum(0.1 * x, x)
96 |
97 |
98 | def _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2, conv1=None, inputs=None,
99 | channel_mult=1, full_res=False, channels=2):
100 | m = channel_mult
101 |
102 | flow6 = slim.conv2d(conv6_1, channels, 3, scope='flow6',
103 | activation_fn=None)
104 | deconv5 = slim.conv2d_transpose(conv6_1, int(512 * m), 4, stride=2,
105 | scope='deconv5')
106 | flow6_up5 = slim.conv2d_transpose(flow6, channels, 4, stride=2,
107 | scope='flow6_up5',
108 | activation_fn=None)
109 | concat5 = tf.concat([conv5_1, deconv5, flow6_up5], 1)
110 | flow5 = slim.conv2d(concat5, channels, 3, scope='flow5',
111 | activation_fn=None)
112 |
113 | deconv4 = slim.conv2d_transpose(concat5, int(256 * m), 4, stride=2,
114 | scope='deconv4')
115 | flow5_up4 = slim.conv2d_transpose(flow5, channels, 4, stride=2,
116 | scope='flow5_up4',
117 | activation_fn=None)
118 | concat4 = tf.concat([conv4_1, deconv4, flow5_up4], 1)
119 | flow4 = slim.conv2d(concat4, channels, 3, scope='flow4',
120 | activation_fn=None)
121 |
122 | deconv3 = slim.conv2d_transpose(concat4, int(128 * m), 4, stride=2,
123 | scope='deconv3')
124 | flow4_up3 = slim.conv2d_transpose(flow4, channels, 4, stride=2,
125 | scope='flow4_up3',
126 | activation_fn=None)
127 | concat3 = tf.concat([conv3_1, deconv3, flow4_up3], 1)
128 | flow3 = slim.conv2d(concat3, channels, 3, scope='flow3',
129 | activation_fn=None)
130 |
131 | deconv2 = slim.conv2d_transpose(concat3, int(64 * m), 4, stride=2,
132 | scope='deconv2')
133 | flow3_up2 = slim.conv2d_transpose(flow3, channels, 4, stride=2,
134 | scope='flow3_up2',
135 | activation_fn=None)
136 | concat2 = tf.concat([conv2, deconv2, flow3_up2], 1)
137 | flow2 = slim.conv2d(concat2, channels, 3, scope='flow2',
138 | activation_fn=None)
139 |
140 | flows = [flow2, flow3, flow4, flow5, flow6]
141 |
142 | if full_res:
143 | with tf.variable_scope('full_res'):
144 | deconv1 = slim.conv2d_transpose(concat2, int(32 * m), 4, stride=2,
145 | scope='deconv1')
146 | flow2_up1 = slim.conv2d_transpose(flow2, channels, 4, stride=2,
147 | scope='flow2_up1',
148 | activation_fn=None)
149 | concat1 = tf.concat([conv1, deconv1, flow2_up1], 1)
150 | flow1 = slim.conv2d(concat1, channels, 3, scope='flow1',
151 | activation_fn=None)
152 |
153 | deconv0 = slim.conv2d_transpose(concat1, int(16 * m), 4, stride=2,
154 | scope='deconv0')
155 | flow1_up0 = slim.conv2d_transpose(flow1, channels, 4, stride=2,
156 | scope='flow1_up0',
157 | activation_fn=None)
158 | concat0 = tf.concat([inputs, deconv0, flow1_up0], 1)
159 | flow0 = slim.conv2d(concat0, channels, 3, scope='flow0',
160 | activation_fn=None)
161 |
162 | flows = [flow0, flow1] + flows
163 |
164 | return flows
165 |
166 |
167 | def nhwc_to_nchw(tensors):
168 | return [tf.transpose(t, [0, 3, 1, 2]) for t in tensors]
169 |
170 |
171 | def nchw_to_nhwc(tensors):
172 | return [tf.transpose(t, [0, 2, 3, 1]) for t in tensors]
173 |
174 |
175 | def flownet_s(inputs, channel_mult=1, full_res=False):
176 | """Given stacked inputs, returns flow predictions in decreasing resolution.
177 |
178 | Uses FlowNetSimple.
179 | """
180 | m = channel_mult
181 | inputs = nhwc_to_nchw([inputs])[0]
182 |
183 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
184 | data_format='NCHW',
185 | weights_regularizer=slim.l2_regularizer(0.0004),
186 | weights_initializer=layers.variance_scaling_initializer(),
187 | activation_fn=_leaky_relu):
188 | conv1 = slim.conv2d(inputs, int(64 * m), 7, stride=2, scope='conv1')
189 | conv2 = slim.conv2d(conv1, int(128 * m), 5, stride=2, scope='conv2')
190 | conv3 = slim.conv2d(conv2, int(256 * m), 5, stride=2, scope='conv3')
191 | conv3_1 = slim.conv2d(conv3, int(256 * m), 3, stride=1, scope='conv3_1')
192 | conv4 = slim.conv2d(conv3_1, int(512 * m), 3, stride=2, scope='conv4')
193 | conv4_1 = slim.conv2d(conv4, int(512 * m), 3, stride=1, scope='conv4_1')
194 | conv5 = slim.conv2d(conv4_1, int(512 * m), 3, stride=2, scope='conv5')
195 | conv5_1 = slim.conv2d(conv5, int(512 * m), 3, stride=1, scope='conv5_1')
196 | conv6 = slim.conv2d(conv5_1, int(1024 * m), 3, stride=2, scope='conv6')
197 | conv6_1 = slim.conv2d(conv6, int(1024 * m), 3, stride=1, scope='conv6_1')
198 |
199 | res = _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2, conv1, inputs,
200 | channel_mult=channel_mult, full_res=full_res)
201 | return nchw_to_nhwc(res)
202 |
203 |
204 | def flownet_c_features(im, channel_mult=1, reuse=None):
205 | m = channel_mult
206 | im = nhwc_to_nchw([im])[0]
207 | with slim.arg_scope([slim.conv2d],
208 | data_format='NCHW',
209 | weights_regularizer=slim.l2_regularizer(0.0004),
210 | weights_initializer=layers.variance_scaling_initializer(),
211 | activation_fn=_leaky_relu):
212 | conv1 = slim.conv2d(im, int(64 * m), 7, stride=2, scope='conv1', reuse=reuse)
213 | conv2 = slim.conv2d(conv1, int(128 * m), 5, stride=2, scope='conv2', reuse=reuse)
214 | conv3 = slim.conv2d(conv2, int(256 * m), 5, stride=2, scope='conv3', reuse=reuse)
215 | return conv1, conv2, conv3
216 |
217 |
218 | def flownet_c(conv3_a, conv3_b, conv2_a, channel_mult=1, full_res=False):
219 | """Given two images, returns flow predictions in decreasing resolution.
220 |
221 | Uses FlowNetCorr.
222 | """
223 | m = channel_mult
224 |
225 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
226 | data_format='NCHW',
227 | weights_regularizer=slim.l2_regularizer(0.0004),
228 | weights_initializer=layers.variance_scaling_initializer(),
229 | activation_fn=_leaky_relu):
230 | corr = correlation(conv3_a, conv3_b,
231 | pad=20, kernel_size=1, max_displacement=20, stride_1=1, stride_2=2)
232 |
233 | conv_redir = slim.conv2d(conv3_a, int(32 * m), 1, stride=1, scope='conv_redir')
234 |
235 | conv3_1 = slim.conv2d(tf.concat([conv_redir, corr], 1), int(256 * m), 3,
236 | stride=1, scope='conv3_1')
237 | conv4 = slim.conv2d(conv3_1, int(512 * m), 3, stride=2, scope='conv4')
238 | conv4_1 = slim.conv2d(conv4, int(512 * m), 3, stride=1, scope='conv4_1')
239 | conv5 = slim.conv2d(conv4_1, int(512 * m), 3, stride=2, scope='conv5')
240 | conv5_1 = slim.conv2d(conv5, int(512 * m), 3, stride=1, scope='conv5_1')
241 | conv6 = slim.conv2d(conv5_1, int(1024 * m), 3, stride=2, scope='conv6')
242 | conv6_1 = slim.conv2d(conv6, int(1024 * m), 3, stride=1, scope='conv6_1')
243 |
244 | res = _flownet_upconv(conv6_1, conv5_1, conv4_1, conv3_1, conv2_a,
245 | channel_mult=channel_mult, full_res=full_res)
246 | return nchw_to_nhwc(res)
247 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/image_warp.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | def image_warp(im, flow):
5 | """Performs a backward warp of an image using the predicted flow.
6 |
7 | Args:
8 | im: Batch of images. [num_batch, height, width, channels]
9 | flow: Batch of flow vectors. [num_batch, height, width, 2]
10 | Returns:
11 | warped: transformed image of the same shape as the input image.
12 | """
13 | with tf.variable_scope('image_warp'):
14 |
15 | num_batch, height, width, channels = tf.unstack(tf.shape(im))
16 | max_x = tf.cast(width - 1, 'int32')
17 | max_y = tf.cast(height - 1, 'int32')
18 | zero = tf.zeros([], dtype='int32')
19 |
20 | # We have to flatten our tensors to vectorize the interpolation
21 | im_flat = tf.reshape(im, [-1, channels])
22 | flow_flat = tf.reshape(flow, [-1, 2])
23 |
24 | # Floor the flow, as the final indices are integers
25 | # The fractional part is used to control the bilinear interpolation.
26 | flow_floor = tf.to_int32(tf.floor(flow_flat))
27 | bilinear_weights = flow_flat - tf.floor(flow_flat)
28 |
29 | # Construct base indices which are displaced with the flow
30 | pos_x = tf.tile(tf.range(width), [height * num_batch])
31 | grid_y = tf.tile(tf.expand_dims(tf.range(height), 1), [1, width])
32 | pos_y = tf.tile(tf.reshape(grid_y, [-1]), [num_batch])
33 |
34 | x = flow_floor[:, 0]
35 | y = flow_floor[:, 1]
36 | xw = bilinear_weights[:, 0]
37 | yw = bilinear_weights[:, 1]
38 |
39 | # Compute interpolation weights for 4 adjacent pixels
40 | # expand to num_batch * height * width x 1 for broadcasting in add_n below
41 | wa = tf.expand_dims((1 - xw) * (1 - yw), 1) # top left pixel
42 | wb = tf.expand_dims((1 - xw) * yw, 1) # bottom left pixel
43 | wc = tf.expand_dims(xw * (1 - yw), 1) # top right pixel
44 | wd = tf.expand_dims(xw * yw, 1) # bottom right pixel
45 |
46 | x0 = pos_x + x
47 | x1 = x0 + 1
48 | y0 = pos_y + y
49 | y1 = y0 + 1
50 |
51 | x0 = tf.clip_by_value(x0, zero, max_x)
52 | x1 = tf.clip_by_value(x1, zero, max_x)
53 | y0 = tf.clip_by_value(y0, zero, max_y)
54 | y1 = tf.clip_by_value(y1, zero, max_y)
55 |
56 | dim1 = width * height
57 | batch_offsets = tf.range(num_batch) * dim1
58 | base_grid = tf.tile(tf.expand_dims(batch_offsets, 1), [1, dim1])
59 | base = tf.reshape(base_grid, [-1])
60 |
61 | base_y0 = base + y0 * width
62 | base_y1 = base + y1 * width
63 | idx_a = base_y0 + x0
64 | idx_b = base_y1 + x0
65 | idx_c = base_y0 + x1
66 | idx_d = base_y1 + x1
67 |
68 | Ia = tf.gather(im_flat, idx_a)
69 | Ib = tf.gather(im_flat, idx_b)
70 | Ic = tf.gather(im_flat, idx_c)
71 | Id = tf.gather(im_flat, idx_d)
72 |
73 | warped_flat = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
74 | warped = tf.reshape(warped_flat, [num_batch, height, width, channels])
75 |
76 | return warped
77 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/input.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 |
4 | import numpy as np
5 | import tensorflow as tf
6 |
7 | from .augment import random_crop
8 |
9 |
10 | def resize_input(t, height, width, resized_h, resized_w):
11 | # Undo old resizing and apply bilinear
12 | t = tf.reshape(t, [resized_h, resized_w, 3])
13 | t = tf.expand_dims(tf.image.resize_image_with_crop_or_pad(t, height, width), 0)
14 | return tf.image.resize_bilinear(t, [resized_h, resized_w])
15 |
16 |
17 | def resize_output_crop(t, height, width, channels):
18 | _, oldh, oldw, c = tf.unstack(tf.shape(t))
19 | t = tf.reshape(t, [oldh, oldw, c])
20 | t = tf.image.resize_image_with_crop_or_pad(t, height, width)
21 | return tf.reshape(t, [1, height, width, channels])
22 |
23 |
24 | def resize_output(t, height, width, channels):
25 | return tf.image.resize_bilinear(t, [height, width])
26 |
27 |
28 | def resize_output_flow(t, height, width, channels):
29 | batch, old_height, old_width, _ = tf.unstack(tf.shape(t), num=4)
30 | t = tf.image.resize_bilinear(t, [height, width])
31 | u, v = tf.unstack(t, axis=3)
32 | u *= tf.cast(width, tf.float32) / tf.cast(old_width, tf.float32)
33 | v *= tf.cast(height, tf.float32) / tf.cast(old_height, tf.float32)
34 | return tf.reshape(tf.stack([u, v], axis=3), [batch, height, width, 2])
35 |
36 |
37 | def frame_name_to_num(name):
38 | stripped = name.split('.')[0].lstrip('0')
39 | if stripped == '':
40 | return 0
41 | return int(stripped)
42 |
43 |
44 | class Input():
45 | mean = [104.920005, 110.1753, 114.785955]
46 | stddev = 1 / 0.0039216
47 |
48 | def __init__(self, data, batch_size, dims, *,
49 | num_threads=1, normalize=True,
50 | skipped_frames=False):
51 | assert len(dims) == 2
52 | self.data = data
53 | self.dims = dims
54 | self.batch_size = batch_size
55 | self.num_threads = num_threads
56 | self.normalize = normalize
57 | self.skipped_frames = skipped_frames
58 |
59 | def _resize_crop_or_pad(self, tensor):
60 | height, width = self.dims
61 | # return tf.image.resize_bilinear(tf.expand_dims(tensor, 0), [height, width])
62 | return tf.image.resize_image_with_crop_or_pad(tensor, height, width)
63 |
64 | def _resize_image_fixed(self, image):
65 | height, width = self.dims
66 | return tf.reshape(self._resize_crop_or_pad(image), [height, width, 3])
67 |
68 | def _normalize_image(self, image):
69 | return (image - self.mean) / self.stddev
70 |
71 | def _preprocess_image(self, image):
72 | image = self._resize_image_fixed(image)
73 | if self.normalize:
74 | image = self._normalize_image(image)
75 | return image
76 |
77 | def _input_images(self, image_dir, hold_out_inv=None):
78 | """Assumes that paired images are next to each other after ordering the
79 | files.
80 | """
81 | image_dir = os.path.join(self.data.current_dir, image_dir)
82 |
83 | filenames_1 = []
84 | filenames_2 = []
85 | image_files = os.listdir(image_dir)
86 | image_files.sort()
87 |
88 | assert len(image_files) % 2 == 0, 'expected pairs of images'
89 |
90 | for i in range(len(image_files) // 2):
91 | filenames_1.append(os.path.join(image_dir, image_files[i * 2]))
92 | filenames_2.append(os.path.join(image_dir, image_files[i * 2 + 1]))
93 |
94 | if hold_out_inv is not None:
95 | filenames = list(zip(filenames_1, filenames_2))
96 | random.seed(0)
97 | random.shuffle(filenames)
98 | filenames = filenames[:hold_out_inv]
99 |
100 | filenames_1, filenames_2 = zip(*filenames)
101 | filenames_1 = list(filenames_1)
102 | filenames_2 = list(filenames_2)
103 |
104 | input_1 = read_png_image(filenames_1, 1)
105 | input_2 = read_png_image(filenames_2, 1)
106 | image_1 = self._preprocess_image(input_1)
107 | image_2 = self._preprocess_image(input_2)
108 | return tf.shape(input_1), image_1, image_2
109 |
110 | def _input_test(self, image_dir, hold_out_inv=None):
111 | input_shape, im1, im2 = self._input_images(image_dir, hold_out_inv)
112 | return tf.train.batch(
113 | [im1, im2, input_shape],
114 | batch_size=self.batch_size,
115 | num_threads=self.num_threads,
116 | allow_smaller_final_batch=True)
117 |
118 | def get_normalization(self):
119 | return self.mean, self.stddev
120 |
121 | def input_raw(self, swap_images=True, sequence=True,
122 | needs_crop=True, shift=0, seed=0,
123 | center_crop=False, skip=0):
124 | """Constructs input of raw data.
125 |
126 | Args:
127 | sequence: Assumes that image file order in data_dirs corresponds to
128 | temporal order, if True. Otherwise, assumes uncorrelated pairs of
129 | images in lexicographical ordering.
130 | shift: number of examples to shift the input queue by.
131 | Useful to resume training.
132 | swap_images: for each pair (im1, im2), also include (im2, im1)
133 | seed: seed for filename shuffling.
134 | Returns:
135 | image_1: batch of first images
136 | image_2: batch of second images
137 | """
138 | if not isinstance(skip, list):
139 | skip = [skip]
140 |
141 | data_dirs = self.data.get_raw_dirs()
142 | height, width = self.dims
143 | #assert batch_size % 2 == 0
144 |
145 | filenames = []
146 | for dir_path in data_dirs:
147 | files = os.listdir(dir_path)
148 | files.sort()
149 | if sequence:
150 | steps = [1 + s for s in skip]
151 | stops = [len(files) - s for s in steps]
152 | else:
153 | steps = [2]
154 | stops = [len(files)]
155 | assert len(files) % 2 == 0
156 | for step, stop in zip(steps, stops):
157 | for i in range(0, stop, step):
158 | if self.skipped_frames and sequence:
159 | assert step == 1
160 | num_first = frame_name_to_num(files[i])
161 | num_second = frame_name_to_num(files[i+1])
162 | if num_first + 1 != num_second:
163 | continue
164 | fn1 = os.path.join(dir_path, files[i])
165 | fn2 = os.path.join(dir_path, files[i + 1])
166 | filenames.append((fn1, fn2))
167 |
168 | random.seed(seed)
169 | random.shuffle(filenames)
170 | print("Training on {} frame pairs.".format(len(filenames)))
171 |
172 | filenames_extended = []
173 | for fn1, fn2 in filenames:
174 | filenames_extended.append((fn1, fn2))
175 | if swap_images:
176 | filenames_extended.append((fn2, fn1))
177 |
178 | shift = shift % len(filenames_extended)
179 | filenames_extended = list(np.roll(filenames_extended, shift))
180 |
181 |
182 | filenames_1, filenames_2 = zip(*filenames_extended)
183 | filenames_1 = list(filenames_1)
184 | filenames_2 = list(filenames_2)
185 |
186 | with tf.variable_scope('train_inputs'):
187 | image_1 = read_png_image(filenames_1)
188 | image_2 = read_png_image(filenames_2)
189 |
190 | if needs_crop:
191 | #if center_crop:
192 | # image_1 = tf.image.resize_image_with_crop_or_pad(image_1, height, width)
193 | # image_2 = tf.image.resize_image_with_crop_or_pad(image_1, height, width)
194 | #else:
195 | image_1, image_2 = random_crop([image_1, image_2], [height, width, 3])
196 | else:
197 | image_1 = tf.reshape(image_1, [height, width, 3])
198 | image_2 = tf.reshape(image_2, [height, width, 3])
199 |
200 | if self.normalize:
201 | image_1 = self._normalize_image(image_1)
202 | image_2 = self._normalize_image(image_2)
203 |
204 | return tf.train.batch(
205 | [image_1, image_2],
206 | batch_size=self.batch_size,
207 | num_threads=self.num_threads)
208 |
209 |
210 | def read_png_image(filenames, num_epochs=None):
211 | """Given a list of filenames, constructs a reader op for images."""
212 | filename_queue = tf.train.string_input_producer(filenames,
213 | shuffle=False, capacity=len(filenames))
214 | reader = tf.WholeFileReader()
215 | _, value = reader.read(filename_queue)
216 | image_uint8 = tf.image.decode_png(value, channels=3)
217 | image = tf.cast(image_uint8, tf.float32)
218 | return image
219 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/spatial_transformer.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | import tensorflow as tf
16 |
17 |
18 | def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
19 | """Spatial Transformer Layer
20 | Implements a spatial transformer layer as described in [1]_.
21 | Based on [2]_ and edited by David Dao for Tensorflow.
22 | Parameters
23 | ----------
24 | U : float
25 | The output of a convolutional net should have the
26 | shape [num_batch, height, width, num_channels].
27 | theta: float
28 | The output of the
29 | localisation network should be [num_batch, 6].
30 | out_size: tuple of two ints
31 | The size of the output of the network (height, width)
32 | References
33 | ----------
34 | .. [1] Spatial Transformer Networks
35 | Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu
36 | Submitted on 5 Jun 2015
37 | .. [2] https://github.com/skaae/transformer_network/blob/master/transformerlayer.py
38 | Notes
39 | -----
40 | To initialize the network to the identity transform init
41 | ``theta`` to :
42 | identity = np.array([[1., 0., 0.],
43 | [0., 1., 0.]])
44 | identity = identity.flatten()
45 | theta = tf.Variable(initial_value=identity)
46 | """
47 |
48 | def _repeat(x, n_repeats):
49 | with tf.variable_scope('_repeat'):
50 | rep = tf.transpose(
51 | tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0])
52 | rep = tf.cast(rep, 'int32')
53 | x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
54 | return tf.reshape(x, [-1])
55 |
56 | def _interpolate(im, x, y, out_size):
57 | with tf.variable_scope('_interpolate'):
58 | # constants
59 | num_batch = tf.shape(im)[0]
60 | height = tf.shape(im)[1]
61 | width = tf.shape(im)[2]
62 | channels = tf.shape(im)[3]
63 |
64 | x = tf.cast(x, 'float32')
65 | y = tf.cast(y, 'float32')
66 | height_f = tf.cast(height, 'float32')
67 | width_f = tf.cast(width, 'float32')
68 | out_height = out_size[0]
69 | out_width = out_size[1]
70 | zero = tf.zeros([], dtype='int32')
71 | max_y = tf.cast(tf.shape(im)[1] - 1, 'int32')
72 | max_x = tf.cast(tf.shape(im)[2] - 1, 'int32')
73 |
74 | # scale indices from [-1, 1] to [0, width/height]
75 | x = (x + 1.0)*(width_f) / 2.0
76 | y = (y + 1.0)*(height_f) / 2.0
77 |
78 | # do sampling
79 | x0 = tf.cast(tf.floor(x), 'int32')
80 | x1 = x0 + 1
81 | y0 = tf.cast(tf.floor(y), 'int32')
82 | y1 = y0 + 1
83 |
84 | x0 = tf.clip_by_value(x0, zero, max_x)
85 | x1 = tf.clip_by_value(x1, zero, max_x)
86 | y0 = tf.clip_by_value(y0, zero, max_y)
87 | y1 = tf.clip_by_value(y1, zero, max_y)
88 | dim2 = width
89 | dim1 = width*height
90 | base = _repeat(tf.range(num_batch)*dim1, out_height*out_width)
91 | base_y0 = base + y0*dim2
92 | base_y1 = base + y1*dim2
93 | idx_a = base_y0 + x0
94 | idx_b = base_y1 + x0
95 | idx_c = base_y0 + x1
96 | idx_d = base_y1 + x1
97 |
98 | # use indices to lookup pixels in the flat image and restore
99 | # channels dim
100 | im_flat = tf.reshape(im, tf.stack([-1, channels]))
101 | im_flat = tf.cast(im_flat, 'float32')
102 | Ia = tf.gather(im_flat, idx_a)
103 | Ib = tf.gather(im_flat, idx_b)
104 | Ic = tf.gather(im_flat, idx_c)
105 | Id = tf.gather(im_flat, idx_d)
106 |
107 | # and finally calculate interpolated values
108 | x0_f = tf.cast(x0, 'float32')
109 | x1_f = tf.cast(x1, 'float32')
110 | y0_f = tf.cast(y0, 'float32')
111 | y1_f = tf.cast(y1, 'float32')
112 | wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1)
113 | wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1)
114 | wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1)
115 | wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1)
116 | output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
117 | return output
118 |
119 | def _meshgrid(height, width):
120 | with tf.variable_scope('_meshgrid'):
121 | # This should be equivalent to:
122 | # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
123 | # np.linspace(-1, 1, height))
124 | # ones = np.ones(np.prod(x_t.shape))
125 | # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
126 | x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
127 | tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
128 | y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
129 | tf.ones(shape=tf.stack([1, width])))
130 |
131 | x_t_flat = tf.reshape(x_t, (1, -1))
132 | y_t_flat = tf.reshape(y_t, (1, -1))
133 |
134 | ones = tf.ones_like(x_t_flat)
135 | grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones])
136 | return grid
137 |
138 | def _transform(theta, input_dim, out_size):
139 | with tf.variable_scope('_transform'):
140 | num_batch = tf.shape(input_dim)[0]
141 | height = tf.shape(input_dim)[1]
142 | width = tf.shape(input_dim)[2]
143 | num_channels = tf.shape(input_dim)[3]
144 | theta = tf.reshape(theta, (-1, 2, 3))
145 | theta = tf.cast(theta, 'float32')
146 |
147 | # grid of (x_t, y_t, 1), eq (1) in ref [1]
148 | height_f = tf.cast(height, 'float32')
149 | width_f = tf.cast(width, 'float32')
150 | out_height = out_size[0]
151 | out_width = out_size[1]
152 | grid = _meshgrid(out_height, out_width)
153 | grid = tf.expand_dims(grid, 0)
154 | grid = tf.reshape(grid, [-1])
155 | grid = tf.tile(grid, tf.stack([num_batch]))
156 | grid = tf.reshape(grid, tf.stack([num_batch, 3, -1]))
157 |
158 | # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
159 | T_g = tf.matmul(theta, grid)
160 | x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1])
161 | y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1])
162 | x_s_flat = tf.reshape(x_s, [-1])
163 | y_s_flat = tf.reshape(y_s, [-1])
164 |
165 | input_transformed = _interpolate(
166 | input_dim, x_s_flat, y_s_flat,
167 | out_size)
168 |
169 | output = tf.reshape(
170 | input_transformed, tf.stack([num_batch, out_height, out_width, num_channels]))
171 | return output
172 |
173 | with tf.variable_scope(name):
174 | output = _transform(theta, U, out_size)
175 | return output
176 |
177 |
178 | def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer'):
179 | """Batch Spatial Transformer Layer
180 | Parameters
181 | ----------
182 | U : float
183 | tensor of inputs [num_batch,height,width,num_channels]
184 | thetas : float
185 | a set of transformations for each input [num_batch,num_transforms,6]
186 | out_size : int
187 | the size of the output [out_height,out_width]
188 | Returns: float
189 | Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels]
190 | """
191 | with tf.variable_scope(name):
192 | num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2])
193 | indices = [[i]*num_transforms for i in xrange(num_batch)]
194 | input_repeated = tf.gather(U, tf.reshape(indices, [-1]))
195 | return transformer(input_repeated, thetas, out_size)
196 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/supervised.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.contrib.slim as slim
3 | import numpy as np
4 |
5 | from .augment import random_photometric
6 | from .flow_util import flow_to_color
7 | from .losses import charbonnier_loss
8 | from .flownet import flownet
9 | from .unsupervised import _track_image, _track_loss, FLOW_SCALE
10 |
11 |
12 | def supervised_loss(batch, params, normalization=None):
13 | channel_mean = tf.constant(normalization[0]) / 255.0
14 | im1, im2, flow_gt, mask_gt = batch
15 | im1 = im1 / 255.0
16 | im2 = im2 / 255.0
17 | im_shape = tf.shape(im1)[1:3]
18 |
19 | # -------------------------------------------------------------------------
20 |
21 | im1_photo, im2_photo = random_photometric(
22 | [im1, im2],
23 | noise_stddev=0.04, min_contrast=-0.3, max_contrast=0.3,
24 | brightness_stddev=0.02, min_colour=0.9, max_colour=1.1,
25 | min_gamma=0.7, max_gamma=1.5)
26 |
27 | _track_image(im1_photo, 'im1_photo')
28 | _track_image(im2_photo, 'im2_photo')
29 | _track_image(flow_to_color(flow_gt), 'flow_gt')
30 | _track_image(mask_gt, 'mask_gt')
31 |
32 | # Images for neural network input with mean-zero values in [-1, 1]
33 | im1_photo = im1_photo - channel_mean
34 | im2_photo = im2_photo - channel_mean
35 |
36 | flownet_spec = params.get('flownet', 'S')
37 | full_resolution = params.get('full_res')
38 | train_all = params.get('train_all')
39 | # -------------------------------------------------------------------------
40 | # FlowNet
41 | flows_fw = flownet(im1_photo, im2_photo,
42 | flownet_spec=flownet_spec,
43 | full_resolution=full_resolution,
44 | train_all=train_all)
45 |
46 | if not train_all:
47 | flows_fw = [flows_fw[-1]]
48 | final_loss = 0.0
49 | for i, net_flows in enumerate(reversed(flows_fw)):
50 | flow_fw = net_flows[0]
51 | if params.get('full_res'):
52 | final_flow_fw = flow_fw * FLOW_SCALE * 4
53 | else:
54 | final_flow_fw = tf.image.resize_bilinear(flow_fw, im_shape) * FLOW_SCALE * 4
55 | _track_image(flow_to_color(final_flow_fw), 'flow_pred_' + str(i))
56 |
57 | net_loss = charbonnier_loss(final_flow_fw - flow_gt, mask_gt)
58 | final_loss += net_loss / (2 ** i)
59 |
60 | regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
61 | final_loss += regularization_loss
62 | _track_loss(regularization_loss, 'loss/regularization')
63 | _track_loss(final_loss, 'loss/combined')
64 |
65 | return final_loss
66 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/unsupervised.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.contrib.slim as slim
3 | import numpy as np
4 |
5 | from .augment import random_affine, random_photometric
6 | from .flow_util import flow_to_color
7 | from .util import resize_area, resize_bilinear
8 | from .losses import compute_losses, create_border_mask
9 | from ..ops import downsample
10 | from .image_warp import image_warp
11 | from .flownet import flownet, FLOW_SCALE
12 |
13 |
14 | # REGISTER ALL POSSIBLE LOSS TERMS
15 | LOSSES = ['occ', 'sym', 'fb', 'grad', 'ternary', 'photo', 'smooth_1st', 'smooth_2nd']
16 |
17 |
18 | def _track_loss(op, name):
19 | tf.add_to_collection('losses', tf.identity(op, name=name))
20 |
21 |
22 | def _track_image(op, name):
23 | name = 'train/' + name
24 | tf.add_to_collection('train_images', tf.identity(op, name=name))
25 |
26 |
27 | def unsupervised_loss(batch, params, normalization=None, augment=True,
28 | return_flow=False):
29 | channel_mean = tf.constant(normalization[0]) / 255.0
30 | im1, im2 = batch
31 | im1 = im1 / 255.0
32 | im2 = im2 / 255.0
33 | im_shape = tf.shape(im1)[1:3]
34 |
35 | # -------------------------------------------------------------------------
36 | # Data & mask augmentation
37 | border_mask = create_border_mask(im1, 0.1)
38 |
39 | if augment:
40 | im1_geo, im2_geo, border_mask_global = random_affine(
41 | [im1, im2, border_mask],
42 | horizontal_flipping=True,
43 | min_scale=0.9, max_scale=1.1
44 | )
45 |
46 | # augment locally
47 | im2_geo, border_mask_local = random_affine(
48 | [im2_geo, border_mask],
49 | min_scale=0.9, max_scale=1.1
50 | )
51 | border_mask = border_mask_local * border_mask_global
52 |
53 | im1_photo, im2_photo = random_photometric(
54 | [im1_geo, im2_geo],
55 | noise_stddev=0.04, min_contrast=-0.3, max_contrast=0.3,
56 | brightness_stddev=0.02, min_colour=0.9, max_colour=1.1,
57 | min_gamma=0.7, max_gamma=1.5)
58 |
59 | _track_image(im1_photo, 'augmented1')
60 | _track_image(im2_photo, 'augmented2')
61 | else:
62 | im1_geo, im2_geo = im1, im2
63 | im1_photo, im2_photo = im1, im2
64 |
65 | # Images for loss comparisons with values in [0, 1] (scale to original using * 255)
66 | im1_norm = im1_geo
67 | im2_norm = im2_geo
68 | # Images for neural network input with mean-zero values in [-1, 1]
69 | im1_photo = im1_photo - channel_mean
70 | im2_photo = im2_photo - channel_mean
71 |
72 | flownet_spec = params.get('flownet', 'S')
73 | full_resolution = params.get('full_res')
74 | train_all = params.get('train_all')
75 |
76 | flows_fw, flows_bw = flownet(im1_photo, im2_photo,
77 | flownet_spec=flownet_spec,
78 | full_resolution=full_resolution,
79 | backward_flow=True,
80 | train_all=train_all)
81 |
82 | flows_fw = flows_fw[-1]
83 | flows_bw = flows_bw[-1]
84 |
85 | # -------------------------------------------------------------------------
86 | # Losses
87 | layer_weights = [12.7, 4.35, 3.9, 3.4, 1.1]
88 | layer_patch_distances = [3, 2, 2, 1, 1]
89 | if full_resolution:
90 | layer_weights = [12.7, 5.5, 5.0, 4.35, 3.9, 3.4, 1.1]
91 | layer_patch_distances = [3, 3] + layer_patch_distances
92 | im1_s = im1_norm
93 | im2_s = im2_norm
94 | mask_s = border_mask
95 | final_flow_scale = FLOW_SCALE * 4
96 | final_flow_fw = flows_fw[0] * final_flow_scale
97 | final_flow_bw = flows_bw[0] * final_flow_scale
98 | else:
99 | im1_s = downsample(im1_norm, 4)
100 | im2_s = downsample(im2_norm, 4)
101 | mask_s = downsample(border_mask, 4)
102 | final_flow_scale = FLOW_SCALE
103 | final_flow_fw = tf.image.resize_bilinear(flows_fw[0], im_shape) * final_flow_scale * 4
104 | final_flow_bw = tf.image.resize_bilinear(flows_bw[0], im_shape) * final_flow_scale * 4
105 |
106 | combined_losses = dict()
107 | combined_loss = 0.0
108 | for loss in LOSSES:
109 | combined_losses[loss] = 0.0
110 |
111 | if params.get('pyramid_loss'):
112 | flow_enum = enumerate(zip(flows_fw, flows_bw))
113 | else:
114 | flow_enum = [(0, (flows_fw[0], flows_bw[0]))]
115 |
116 | for i, flow_pair in flow_enum:
117 | layer_name = "loss" + str(i + 2)
118 |
119 | flow_scale = final_flow_scale / (2 ** i)
120 |
121 | with tf.variable_scope(layer_name):
122 | layer_weight = layer_weights[i]
123 | flow_fw_s, flow_bw_s = flow_pair
124 |
125 | mask_occlusion = params.get('mask_occlusion', '')
126 | assert mask_occlusion in ['fb', 'disocc', '']
127 |
128 | losses = compute_losses(im1_s, im2_s,
129 | flow_fw_s * flow_scale, flow_bw_s * flow_scale,
130 | border_mask=mask_s if params.get('border_mask') else None,
131 | mask_occlusion=mask_occlusion,
132 | data_max_distance=layer_patch_distances[i])
133 |
134 | layer_loss = 0.0
135 |
136 | for loss in LOSSES:
137 | weight_name = loss + '_weight'
138 | if params.get(weight_name):
139 | _track_loss(losses[loss], loss)
140 | layer_loss += params[weight_name] * losses[loss]
141 | combined_losses[loss] += layer_weight * losses[loss]
142 |
143 | combined_loss += layer_weight * layer_loss
144 |
145 | im1_s = downsample(im1_s, 2)
146 | im2_s = downsample(im2_s, 2)
147 | mask_s = downsample(mask_s, 2)
148 |
149 | regularization_loss = tf.losses.get_regularization_loss()
150 | final_loss = combined_loss + regularization_loss
151 |
152 | _track_loss(final_loss, 'loss/combined')
153 |
154 | for loss in LOSSES:
155 | _track_loss(combined_losses[loss], 'loss/' + loss)
156 | weight_name = loss + '_weight'
157 | if params.get(weight_name):
158 | weight = tf.identity(params[weight_name], name='weight/' + loss)
159 | tf.add_to_collection('params', weight)
160 |
161 | if not return_flow:
162 | return final_loss
163 |
164 | return final_loss, final_flow_fw, final_flow_bw
165 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/core/util.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | def summarized_placeholder(name, prefix=None, key=tf.GraphKeys.SUMMARIES):
5 | prefix = '' if not prefix else prefix + '/'
6 | p = tf.placeholder(tf.float32, name=name)
7 | tf.summary.scalar(prefix + name, p, collections=[key])
8 | return p
9 |
10 |
11 | def resize_area(tensor, like):
12 | _, h, w, _ = tf.unstack(tf.shape(like))
13 | return tf.stop_gradient(tf.image.resize_area(tensor, [h, w]))
14 |
15 |
16 | def resize_bilinear(tensor, like):
17 | _, h, w, _ = tf.unstack(tf.shape(like))
18 | return tf.stop_gradient(tf.image.resize_bilinear(tensor, [h, w]))
19 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/ops.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import tensorflow as tf
4 | import subprocess
5 | from tensorflow.python.framework import ops
6 |
7 |
8 | # Register ops for compilation here
9 | OP_NAMES = ['backward_warp', 'downsample', 'correlation', 'forward_warp']
10 |
11 |
12 | cwd = os.getcwd()
13 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
14 | os.chdir("../../ops")
15 |
16 | def compile(op=None):
17 | if op is not None:
18 | to_compile = [op]
19 | else:
20 | to_compile = OP_NAMES
21 |
22 | tf_inc = tf.sysconfig.get_include()
23 | for n in to_compile:
24 | base = n + "_op"
25 | fn_cu_cc = base + ".cu.cc"
26 | fn_cu_o = base + ".cu.o"
27 | fn_cc = base + ".cc"
28 | fn_o = base + ".o"
29 | fn_so = base + ".so"
30 |
31 | cuda_lib64_path_arg = "-L /usr/local/cuda-8.0/lib64"
32 | nvcc_cmd = "nvcc -std=c++11 -c -gencode=arch=compute_30,code=sm_30 -o {} -I {} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC"
33 | nvcc_cmd = nvcc_cmd.format(" ".join([fn_cu_o, fn_cu_cc]),
34 | tf_inc)
35 | subprocess.check_output(nvcc_cmd, shell=True)
36 |
37 | gcc_cmd = "{} -std=c++11 -shared -o {} -I {} -fPIC -lcudart -D GOOGLE_CUDA=1 {}"
38 | gcc_cmd = gcc_cmd.format('g++',
39 | " ".join([fn_so, fn_cu_o, fn_cc]),
40 | tf_inc,
41 | cuda_lib64_path_arg)
42 | subprocess.check_output(gcc_cmd, shell=True)
43 |
44 |
45 | if __name__ == "__main__":
46 | compile()
47 |
48 |
49 | module = sys.modules[__name__]
50 | for n in OP_NAMES:
51 | lib_path = './{}_op.so'.format(n)
52 | try:
53 | op_lib = tf.load_op_library(lib_path)
54 | except:
55 | compile(n)
56 | op_lib = tf.load_op_library(lib_path)
57 | setattr(module, '_' + n + '_module', op_lib)
58 |
59 |
60 | os.chdir(cwd)
61 |
62 |
63 | def correlation(first, second, **kwargs):
64 | return _correlation_module.correlation(first, second, **kwargs)[0]
65 |
66 |
67 | backward_warp = _backward_warp_module.backward_warp
68 | downsample = _downsample_module.downsample
69 | forward_warp = _forward_warp_module.forward_warp
70 |
71 |
72 | # Register op gradients
73 |
74 | @ops.RegisterGradient("BackwardWarp")
75 | def _BackwardWarpGrad(op, grad):
76 | grad0 = _backward_warp_module.backward_warp_grad(
77 | grad, op.inputs[0], op.inputs[1])
78 | return [None, grad0]
79 |
80 |
81 | @ops.RegisterGradient("ForwardWarp")
82 | def _ForwardWarpGrad(op, grad):
83 | grad0 = _forward_warp_module.forward_warp_grad(
84 | grad, op.inputs[0])
85 | return [grad0]
86 |
87 |
88 | @ops.RegisterGradient("Correlation")
89 | def _CorrelationGrad(op, in_grad, in_grad1, in_grad2):
90 | grad0, grad1 = _correlation_module.correlation_grad(
91 | in_grad, op.inputs[0], op.inputs[1],
92 | op.outputs[1], op.outputs[2],
93 | kernel_size=op.get_attr('kernel_size'),
94 | max_displacement=op.get_attr('max_displacement'),
95 | pad=op.get_attr('pad'),
96 | stride_1=op.get_attr('stride_1'),
97 | stride_2=op.get_attr('stride_2'))
98 | return [grad0, grad1]
99 |
100 |
101 | ops.NotDifferentiable("Downsample")
102 |
--------------------------------------------------------------------------------
/core/UnFlow/src/e2eflow/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import configparser
4 | from shutil import rmtree
5 | import tensorflow as tf
6 |
7 |
8 | CONFIG_PATH = '../config.ini'
9 | #TMP_DIR = '/tmp/e2eflow'
10 |
11 |
12 | def upload_gdrive(upload_dir, gdrive_filename):
13 | # search for file in gdrive and capture id if it already exists
14 | lst_lines = subprocess.Popen(['../scripts/gdrive', 'list'],
15 | stdout=subprocess.PIPE)
16 | existing_id = None
17 | for line in lst_lines.stdout:
18 | splits = line.split()
19 | if str(splits[1], 'utf-8') == gdrive_filename:
20 | existing_id = str(splits[0], 'utf-8')
21 | tmp_path = os.path.join('/tmp', gdrive_filename)
22 | if os.path.isfile(tmp_path):
23 | os.remove(tmp_path)
24 | p = subprocess.Popen(['/usr/bin/zip', '-r', tmp_path, upload_dir])
25 | p.wait()
26 | if existing_id:
27 | p = subprocess.Popen(['../scripts/gdrive', 'update',
28 | existing_id, tmp_path])
29 | else:
30 | p = subprocess.Popen(['../scripts/gdrive', 'upload',
31 | '--name', gdrive_filename,
32 | tmp_path])
33 | p.wait()
34 | os.remove(tmp_path)
35 |
36 |
37 | def config_dict(config_path=CONFIG_PATH):
38 | """Returns the config as dictionary,
39 | where the elements have intuitively correct types.
40 | """
41 |
42 | config = configparser.ConfigParser()
43 | config.read(config_path)
44 |
45 | d = dict()
46 | for section_key in config.sections():
47 | sd = dict()
48 | section = config[section_key]
49 | for key in section:
50 | val = section[key]
51 | try:
52 | sd[key] = int(val)
53 | except ValueError:
54 | try:
55 | sd[key] = float(val)
56 | except ValueError:
57 | try:
58 | sd[key] = section.getboolean(key)
59 | except ValueError:
60 | sd[key] = val
61 | d[section_key] = sd
62 | return d
63 |
64 |
65 | def convert_input_strings(config_dct, dirs):
66 | if 'manual_decay_iters' in config_dct and 'manual_decay_lrs' in config_dct:
67 | iters_lst = config_dct['manual_decay_iters'].split(',')
68 | lrs_lst = config_dct['manual_decay_lrs'].split(',')
69 | iters_lst = [int(i) for i in iters_lst]
70 | lrs_lst = [float(l) for l in lrs_lst]
71 | config_dct['manual_decay_iters'] = iters_lst
72 | config_dct['manual_decay_lrs'] = lrs_lst
73 | config_dct['num_iters'] = sum(iters_lst)
74 |
75 | if 'finetune' in config_dct:
76 | finetune = []
77 | for name in config_dct['finetune'].split(","):
78 | ckpt_dir = os.path.join(dirs['checkpoints'], name)
79 | ckpt = tf.train.get_checkpoint_state(ckpt_dir)
80 | if ckpt is None:
81 | ckpt_dir = os.path.join(dirs['log'], 'ex', name)
82 | ckpt = tf.train.get_checkpoint_state(ckpt_dir)
83 | assert ckpt, "Could not load experiment " + name
84 | finetune.append(ckpt)
85 | config_dct['finetune'] = finetune
86 |
87 |
88 | def tryremove(name, file=False):
89 | try:
90 | if file:
91 | os.remove(name)
92 | else:
93 | rmtree(name)
94 | except OSError:
95 | pass
96 |
--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | from .DFLearner import DFLearner
3 | from .flowlib import *
4 | from .UnFlow import *
5 |
--------------------------------------------------------------------------------
/core/data_loader.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import os
3 | import random
4 | import tensorflow as tf
5 |
6 | class DataLoader(object):
7 | def __init__(self,
8 | dataset_dir=None,
9 | batch_size=None,
10 | img_height=None,
11 | img_width=None,
12 | num_source=None,
13 | num_scales=None):
14 | self.dataset_dir = dataset_dir
15 | self.batch_size = batch_size
16 | self.img_height = img_height
17 | self.img_width = img_width
18 | self.num_source = num_source
19 | self.num_scales = num_scales
20 |
21 | def load_train_batch(self, is_training=True):
22 | """Load a batch of training instances.
23 | """
24 | seed = random.randint(0, 2**31 - 1)
25 | # Load the list of training files into queues
26 | file_list = self.format_file_list(self.dataset_dir, 'train')
27 | image_paths_queue = tf.train.string_input_producer(
28 | file_list['image_file_list'],
29 | seed=seed,
30 | shuffle=True)
31 | cam_paths_queue = tf.train.string_input_producer(
32 | file_list['cam_file_list'],
33 | seed=seed,
34 | shuffle=True)
35 | self.steps_per_epoch = int(
36 | len(file_list['image_file_list'])//self.batch_size)
37 |
38 | # Load images
39 | img_reader = tf.WholeFileReader()
40 | _, image_contents = img_reader.read(image_paths_queue)
41 | image_seq = tf.image.decode_jpeg(image_contents)
42 | tgt_image, src_image_stack = \
43 | self.unpack_image_sequence(
44 | image_seq, self.img_height, self.img_width, self.num_source)
45 |
46 | # Load camera intrinsics
47 | cam_reader = tf.TextLineReader()
48 | _, raw_cam_contents = cam_reader.read(cam_paths_queue)
49 | rec_def = []
50 | for i in range(9):
51 | rec_def.append([1.])
52 | raw_cam_vec = tf.decode_csv(raw_cam_contents,
53 | record_defaults=rec_def)
54 | raw_cam_vec = tf.stack(raw_cam_vec)
55 | intrinsics = tf.reshape(raw_cam_vec, [3, 3])
56 |
57 | # Form training batches
58 | src_image_stack, tgt_image, intrinsics = \
59 | tf.train.batch([src_image_stack, tgt_image, intrinsics],
60 | batch_size=self.batch_size)
61 |
62 | # Data augmentation
63 | image_all = tf.concat([tgt_image, src_image_stack], axis=3)
64 | image_all, intrinsics, image_augall = self.data_augmentation(
65 | image_all, intrinsics, self.img_height, self.img_width)
66 | tgt_image = image_all[:, :, :, :3]
67 | src_image_stack = image_all[:, :, :, 3:]
68 | tgt_image_aug = image_augall[:, :, :, :3]
69 | src_image_stack_aug = image_augall[:, :, :, 3:]
70 | intrinsics = self.get_multi_scale_intrinsics(
71 | intrinsics, self.num_scales)
72 |
73 | if is_training:
74 | return tgt_image, src_image_stack, intrinsics, tgt_image_aug, src_image_stack_aug
75 | else:
76 | return tgt_image, src_image_stack, intrinsics
77 |
78 | def make_intrinsics_matrix(self, fx, fy, cx, cy):
79 | # Assumes batch input
80 | batch_size = fx.get_shape().as_list()[0]
81 | zeros = tf.zeros_like(fx)
82 | r1 = tf.stack([fx, zeros, cx], axis=1)
83 | r2 = tf.stack([zeros, fy, cy], axis=1)
84 | r3 = tf.constant([0.,0.,1.], shape=[1, 3])
85 | r3 = tf.tile(r3, [batch_size, 1])
86 | intrinsics = tf.stack([r1, r2, r3], axis=1)
87 | return intrinsics
88 |
89 | def data_augmentation(self, im, intrinsics, out_h, out_w):
90 | # Random scaling
91 | def random_scaling(im, intrinsics):
92 | batch_size, in_h, in_w, _ = im.get_shape().as_list()
93 | scaling = tf.random_uniform([2], 1, 1.15)
94 | x_scaling = scaling[0]
95 | y_scaling = scaling[1]
96 | out_h = tf.cast(in_h * y_scaling, dtype=tf.int32)
97 | out_w = tf.cast(in_w * x_scaling, dtype=tf.int32)
98 | im = tf.image.resize_area(im, [out_h, out_w])
99 | fx = intrinsics[:,0,0] * x_scaling
100 | fy = intrinsics[:,1,1] * y_scaling
101 | cx = intrinsics[:,0,2] * x_scaling
102 | cy = intrinsics[:,1,2] * y_scaling
103 | intrinsics = self.make_intrinsics_matrix(fx, fy, cx, cy)
104 | return im, intrinsics
105 |
106 | # Random cropping
107 | def random_cropping(im, intrinsics, out_h, out_w):
108 | # batch_size, in_h, in_w, _ = im.get_shape().as_list()
109 | batch_size, in_h, in_w, _ = tf.unstack(tf.shape(im))
110 | offset_y = tf.random_uniform([1], 0, in_h - out_h + 1, dtype=tf.int32)[0]
111 | offset_x = tf.random_uniform([1], 0, in_w - out_w + 1, dtype=tf.int32)[0]
112 | im = tf.image.crop_to_bounding_box(
113 | im, offset_y, offset_x, out_h, out_w)
114 | fx = intrinsics[:,0,0]
115 | fy = intrinsics[:,1,1]
116 | cx = intrinsics[:,0,2] - tf.cast(offset_x, dtype=tf.float32)
117 | cy = intrinsics[:,1,2] - tf.cast(offset_y, dtype=tf.float32)
118 | intrinsics = self.make_intrinsics_matrix(fx, fy, cx, cy)
119 | return im, intrinsics
120 |
121 | # Random photometric augmentation
122 | # Credit: https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/augment.py
123 | def random_photometric(im, noise_stddev=0.04, min_contrast=-0.2, max_contrast=0.2, brightness_stddev=0.02, min_colour=0.9, max_colour=1.1, min_gamma=0.8, max_gamma=1.2):
124 | """
125 | Applies photometric augmentations to a list of image batches.
126 | Args:
127 | im: list of 3-channel image batches normalized to [0, 1].
128 | Returns:
129 | Batch of normalized images with photometric augmentations. Has the same shape as the input batch.
130 | """
131 | batch_size, in_h, in_w, _ = im[0].get_shape().as_list()
132 |
133 | contrast = tf.random_uniform([batch_size, 1], min_contrast, max_contrast)
134 | gamma = tf.random_uniform([batch_size, 1], min_gamma, max_gamma)
135 | gamma_inv = 1.0/gamma
136 | colour = tf.random_uniform([batch_size, 3], min_colour, max_colour)
137 | if noise_stddev > 0.0:
138 | noise = tf.random_normal([batch_size, 1], stddev=noise_stddev)
139 | else:
140 | noise = tf.zeros([batch_size, 1])
141 | if brightness_stddev > 0.0:
142 | brightness = tf.random_normal([batch_size, 1], stddev=brightness_stddev)
143 | else:
144 | brightness = tf.zeros([batch_size, 1])
145 |
146 | out = []
147 | for temp in im:
148 | # Transpose to [height, width, num_batch, channels]
149 | im_re = tf.transpose(temp, [1, 2, 0, 3])
150 | im_re = (im_re * (contrast + 1.0) + brightness) * colour
151 | im_re = tf.maximum(0.0, tf.minimum(1.0, im_re))
152 | im_re = tf.pow(im_re, gamma_inv)
153 | im_re = im_re + noise
154 | im_re = tf.maximum(0.0, tf.minimum(1.0, im_re))
155 |
156 | temp = tf.transpose(im_re, [2, 0, 1, 3])
157 | temp = tf.stop_gradient(temp)
158 | out.append(temp)
159 | return tf.concat(out, axis=-1)
160 |
161 | im, intrinsics = random_scaling(im, intrinsics)
162 | im, intrinsics = random_cropping(im, intrinsics, out_h, out_w)
163 | # [0, 255] -> [0, 1]
164 | im_photo = im/255.
165 | im_photo = [im_photo[:,:,:,3*i:3*(i+1)] for i in range(self.num_source+1)]
166 | im_photo = random_photometric(im_photo)
167 | # [0, 1] -> [0, 255]
168 | im_photo = im_photo*255.
169 | im = tf.cast(im, dtype=tf.uint8)
170 | im_photo = tf.cast(im_photo, dtype=tf.uint8)
171 | return im, intrinsics, im_photo
172 |
173 | def format_file_list(self, data_root, split):
174 | with open(data_root + '/%s.txt' % split, 'r') as f:
175 | frames = f.readlines()
176 | subfolders = [x.split(' ')[0] for x in frames]
177 | frame_ids = [x.split(' ')[1][:-1] for x in frames]
178 | image_file_list = [os.path.join(data_root, subfolders[i],
179 | frame_ids[i] + '.jpg') for i in range(len(frames))]
180 | cam_file_list = [os.path.join(data_root, subfolders[i],
181 | frame_ids[i] + '_cam.txt') for i in range(len(frames))]
182 | all_list = {}
183 | all_list['image_file_list'] = image_file_list
184 | all_list['cam_file_list'] = cam_file_list
185 | return all_list
186 |
187 | def unpack_image_sequence(self, image_seq, img_height, img_width, num_source):
188 | # Assuming the center image is the target frame
189 | tgt_start_idx = int(img_width * (num_source//2))
190 | tgt_image = tf.slice(image_seq,
191 | [0, tgt_start_idx, 0],
192 | [-1, img_width, -1])
193 | # Source frames before the target frame
194 | src_image_1 = tf.slice(image_seq,
195 | [0, 0, 0],
196 | [-1, int(img_width * (num_source//2)), -1])
197 | # Source frames after the target frame
198 | src_image_2 = tf.slice(image_seq,
199 | [0, int(tgt_start_idx + img_width), 0],
200 | [-1, int(img_width * (num_source//2)), -1])
201 | src_image_seq = tf.concat([src_image_1, src_image_2], axis=1)
202 | # Stack source frames along the color channels (i.e. [H, W, N*3])
203 | src_image_stack = tf.concat([tf.slice(src_image_seq,
204 | [0, i*img_width, 0],
205 | [-1, img_width, -1])
206 | for i in range(num_source)], axis=2)
207 | src_image_stack.set_shape([img_height,
208 | img_width,
209 | num_source * 3])
210 | tgt_image.set_shape([img_height, img_width, 3])
211 | return tgt_image, src_image_stack
212 |
213 | def batch_unpack_image_sequence(self, image_seq, img_height, img_width, num_source):
214 | # Assuming the center image is the target frame
215 | tgt_start_idx = int(img_width * (num_source//2))
216 | tgt_image = tf.slice(image_seq,
217 | [0, 0, tgt_start_idx, 0],
218 | [-1, -1, img_width, -1])
219 | # Source frames before the target frame
220 | src_image_1 = tf.slice(image_seq,
221 | [0, 0, 0, 0],
222 | [-1, -1, int(img_width * (num_source//2)), -1])
223 | # Source frames after the target frame
224 | src_image_2 = tf.slice(image_seq,
225 | [0, 0, int(tgt_start_idx + img_width), 0],
226 | [-1, -1, int(img_width * (num_source//2)), -1])
227 | src_image_seq = tf.concat([src_image_1, src_image_2], axis=2)
228 | # Stack source frames along the color channels (i.e. [B, H, W, N*3])
229 | src_image_stack = tf.concat([tf.slice(src_image_seq,
230 | [0, 0, i*img_width, 0],
231 | [-1, -1, img_width, -1])
232 | for i in range(num_source)], axis=3)
233 | return tgt_image, src_image_stack
234 |
235 | def get_multi_scale_intrinsics(self, intrinsics, num_scales):
236 | intrinsics_mscale = []
237 | # Scale the intrinsics accordingly for each scale
238 | for s in range(num_scales):
239 | fx = intrinsics[:,0,0]/(2 ** s)
240 | fy = intrinsics[:,1,1]/(2 ** s)
241 | cx = intrinsics[:,0,2]/(2 ** s)
242 | cy = intrinsics[:,1,2]/(2 ** s)
243 | intrinsics_mscale.append(
244 | self.make_intrinsics_matrix(fx, fy, cx, cy))
245 | intrinsics_mscale = tf.stack(intrinsics_mscale, axis=1)
246 | return intrinsics_mscale
247 |
--------------------------------------------------------------------------------
/core/nets.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import tensorflow as tf
3 | import tensorflow.contrib.slim as slim
4 | from tensorflow.contrib.layers.python.layers import utils
5 | import numpy as np
6 |
7 | from .utils import flow_inverse_warp
8 |
9 | # Range of disparity/inverse depth values
10 | DISP_SCALING = 10
11 | MIN_DISP = 0.01
12 |
13 | def resize_like(inputs, ref, type='nearest'):
14 | iH, iW = inputs.get_shape()[1], inputs.get_shape()[2]
15 | rH, rW = ref.get_shape()[1], ref.get_shape()[2]
16 | if iH == rH and iW == rW:
17 | return inputs
18 | if type == 'nearest':
19 | return tf.image.resize_nearest_neighbor(inputs, [rH.value, rW.value])
20 | elif type == 'bilinear':
21 | return tf.image.resize_bilinear(inputs, [rH.value, rW.value])
22 |
23 | # Reference: https://github.com/sampepose/flownet2-tf/blob/master/src/utils.py
24 | def pad(tensor, num=1):
25 | """
26 | Pads the given tensor along the height and width dimensions with `num` 0s on each side
27 | """
28 | return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT")
29 |
30 | def pad_4(tensor, u, b, l, r):
31 | return tf.pad(tensor, [[0, 0], [u, b], [l, r], [0, 0]], 'CONSTANT')
32 |
33 | def antipad(tensor, num=1):
34 | """
35 | Performs a crop. "padding" for a deconvolutional layer (conv2d tranpose) removes
36 | padding from the output rather than adding it to the input.
37 | """
38 | batch, h, w, c = tensor.shape.as_list()
39 | return tf.slice(tensor, begin=[0, num, num, 0], size=[batch, h - 2 * num, w - 2 * num, c])
40 |
41 | def antipad_4(tensor, u, b, l, r):
42 | batch, h, w, c = tensor.shape.as_list()
43 | return tf.slice(tensor, begin=[0, u, l, 0], size=[batch, h - u - b, w - l - r, c])
44 |
45 | # Reference: https://github.com/scaelles/OSVOS-TensorFlow/blob/master/osvos.py
46 | def crop_features(feature, out_size):
47 | """Crop the center of a feature map
48 | Args:
49 | feature: Feature map to crop
50 | out_size: Size of the output feature map
51 | Returns:
52 | Tensor that performs the cropping
53 | """
54 | up_size = tf.shape(feature)
55 | ini_w = tf.div(tf.subtract(up_size[1], out_size[1]), 2)
56 | ini_h = tf.div(tf.subtract(up_size[2], out_size[2]), 2)
57 | slice_input = tf.slice(feature, (0, ini_w, ini_h, 0), (-1, out_size[1], out_size[2], -1))
58 | return tf.reshape(slice_input, [int(feature.get_shape()[0]), out_size[1], out_size[2], int(feature.get_shape()[3])])
59 |
60 | # Reference: https://github.com/tensorflow/tensorflow/issues/4079
61 | def LeakyReLU(x, leak=0.1, name='lrelu'):
62 | with tf.variable_scope(name):
63 | f1 = 0.5 * (1.0 + leak)
64 | f2 = 0.5 * (1.0 - leak)
65 | return f1 * x + f2 * abs(x)
66 |
67 | # Both target->source and source->target
68 | def pose_net_fb(tgt_image, src_image_stack, is_training=True, reuse=False):
69 | inputs = tf.concat([tgt_image, src_image_stack], axis=3)
70 | H = inputs.get_shape()[1].value
71 | W = inputs.get_shape()[2].value
72 | num_source = int(src_image_stack.get_shape()[3].value//3)
73 | with tf.variable_scope('pose_net') as sc:
74 | if reuse:
75 | sc.reuse_variables()
76 | end_points_collection = sc.original_name_scope + '_end_points'
77 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
78 | normalizer_fn=None,
79 | weights_regularizer=slim.l2_regularizer(0.05),
80 | activation_fn=tf.nn.relu,
81 | outputs_collections=end_points_collection):
82 | # cnv1 to cnv5b are shared between pose and explainability prediction
83 | cnv1 = slim.conv2d(inputs,16, [7, 7], stride=2, scope='cnv1')
84 | cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2')
85 | cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3')
86 | cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4')
87 | cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5')
88 | cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6')
89 | cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7')
90 | # Double the number of channels
91 | pose_pred = slim.conv2d(cnv7, 6*num_source*2, [1, 1], scope='pred',
92 | stride=1, normalizer_fn=None, activation_fn=None)
93 | pose_avg = tf.reduce_mean(pose_pred, [1, 2])
94 | # Empirically we found that scaling by a small constant
95 | # facilitates training.
96 | # 1st half: target->source, 2nd half: source->target
97 | pose_final = 0.01 * tf.reshape(pose_avg, [-1, num_source, 6*2])
98 | end_points = utils.convert_collection_to_dict(end_points_collection)
99 | return pose_final, end_points
100 |
101 | # helper functions
102 | # Credit: https://github.com/mrharicot/monodepth/blob/master/monodepth_model.py
103 | def resconv(x, num_layers, stride):
104 | do_proj = tf.shape(x)[3] != num_layers or stride == 2
105 | conv1 = slim.conv2d(x, num_layers, [1, 1], stride=1, activation_fn=tf.nn.elu)
106 | conv2 = slim.conv2d(conv1, num_layers, [3, 3], stride=stride, activation_fn=tf.nn.elu)
107 | conv3 = slim.conv2d(conv2, 4 * num_layers, [1, 1], stride=1, activation_fn=None)
108 | if do_proj:
109 | shortcut = slim.conv2d(x, 4* num_layers, [1, 1], stride=stride, activation_fn=None)
110 | else:
111 | shortcut = x
112 | return tf.nn.elu(conv3 + shortcut)
113 |
114 | def resblock(x, num_layers, num_blocks):
115 | out = x
116 | for i in range(num_blocks - 1):
117 | out = resconv(out, num_layers, 1)
118 | out = resconv(out, num_layers, 2)
119 | return out
120 |
121 | def upsample_nn(x, ratio):
122 | s = tf.shape(x)
123 | h = s[1]
124 | w = s[2]
125 | return tf.image.resize_nearest_neighbor(x, [h*ratio, w*ratio])
126 |
127 | def upconv(x, num_layers, kernal, scale):
128 | upsample = upsample_nn(x, scale)
129 | conv = slim.conv2d(upsample, num_layers, [kernal, kernal], stride=1, activation_fn=tf.nn.elu)
130 | return conv
131 |
132 | def disp_net_res50(tgt_image, is_training=True, reuse=False, get_feature=False):
133 | H = tgt_image.get_shape()[1].value
134 | W = tgt_image.get_shape()[2].value
135 | with tf.variable_scope('depth_net_res50') as sc:
136 | if reuse:
137 | sc.reuse_variables()
138 | end_points_collection = sc.original_name_scope + '_end_points'
139 | with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
140 | activation_fn=tf.nn.elu,
141 | outputs_collections=end_points_collection):
142 | # Encoder
143 | conv1 = slim.conv2d(tgt_image, 64, [7, 7], stride=2) # 1/2
144 | pool1 = slim.max_pool2d(conv1, [3, 3], padding='SAME') # 1/4
145 | conv2 = resblock(pool1, 64, 3) # 1/8
146 | conv3 = resblock(conv2, 128, 4) # 1/16
147 | conv4 = resblock(conv3, 256, 6) # 1/32
148 | conv5 = resblock(conv4, 512, 3) # 1/64
149 |
150 | # Decoder
151 | upconv6 = upconv(conv5, 512, 3, 2) # 1/32
152 | #upconv6 = slim.conv2d_transpose(conv5, 512, [3, 3], stride=2)
153 | upconv6 = resize_like(upconv6, conv4)
154 | concat6 = tf.concat([upconv6, conv4], 3)
155 | iconv6 = slim.conv2d(concat6, 512, [3, 3], stride=1)
156 |
157 | upconv5 = upconv(iconv6, 256, 3, 2) # 1/16
158 | #upconv5 = slim.conv2d_transpose(iconv6, 256, [3, 3], stride=2)
159 | upconv5 = resize_like(upconv5, conv3)
160 | concat5 = tf.concat([upconv5, conv3], 3)
161 | iconv5 = slim.conv2d(concat5, 256, [3, 3], stride=1)
162 |
163 | upconv4 = upconv(iconv5, 128, 3, 2) # 1/8
164 | #upconv4 = slim.conv2d_transpose(iconv5, 128, [3, 3], stride=2)
165 | upconv4 = resize_like(upconv4, conv2)
166 | concat4 = tf.concat([upconv4, conv2], 3)
167 | iconv4 = slim.conv2d(concat4, 128, [3, 3], stride=1)
168 | disp4 = DISP_SCALING * slim.conv2d(iconv4, 1, [3, 3], stride=1,
169 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp4') + MIN_DISP
170 | disp4_up = tf.image.resize_bilinear(disp4, [np.int(H/4), np.int(W/4)])
171 |
172 | upconv3 = upconv(iconv4, 64, 3, 2) # 1/4
173 | #upconv3 = slim.conv2d_transpose(iconv4, 64, [3, 3], stride=2)
174 | upconv3 = resize_like(upconv3, pool1)
175 | disp4_up = resize_like(disp4_up, pool1)
176 | concat3 = tf.concat([upconv3, disp4_up, pool1], 3)
177 | iconv3 = slim.conv2d(concat3, 64, [3, 3], stride=1)
178 | disp3 = DISP_SCALING * slim.conv2d(iconv3, 1, [3, 3], stride=1,
179 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp3') + MIN_DISP
180 | disp3_up = tf.image.resize_bilinear(disp3, [np.int(H/2), np.int(W/2)])
181 |
182 | upconv2 = upconv(iconv3, 32, 3, 2) # 1/2
183 | #upconv2 = slim.conv2d_transpose(iconv3, 32, [3, 3], stride=2)
184 | upconv2 = resize_like(upconv2, conv1)
185 | disp3_up = resize_like(disp3_up, conv1)
186 | concat2 = tf.concat([upconv2, disp3_up, conv1], 3)
187 | iconv2 = slim.conv2d(concat2, 32, [3, 3], stride=1)
188 | disp2 = DISP_SCALING * slim.conv2d(iconv2, 1, [3, 3], stride=1,
189 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp2') + MIN_DISP
190 | disp2_up = tf.image.resize_bilinear(disp2, [H, W])
191 |
192 | upconv1 = upconv(iconv2, 16, 3, 2)
193 | #upconv1 = slim.conv2d_transpose(iconv2, 16, [3, 3], stride=2)
194 | upconv1 = resize_like(upconv1, disp2_up)
195 | concat1 = tf.concat([upconv1, disp2_up], 3)
196 | iconv1 = slim.conv2d(concat1, 16, [3, 3], stride=1)
197 | disp1 = DISP_SCALING * slim.conv2d(iconv1, 1, [3, 3], stride=1,
198 | activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp1') + MIN_DISP
199 |
200 | end_points = utils.convert_collection_to_dict(end_points_collection)
201 |
202 | if not get_feature:
203 | return [disp1, disp2, disp3, disp4], end_points
204 | else:
205 | return [disp1, disp2, disp3, disp4], conv5, end_points
206 |
207 |
208 |
--------------------------------------------------------------------------------
/core/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | import tensorflow as tf
5 |
6 | def gray2rgb(im, cmap='gray'):
7 | cmap = plt.get_cmap(cmap)
8 | rgba_img = cmap(im.astype(np.float32))
9 | rgb_img = np.delete(rgba_img, 3, 2)
10 | return rgb_img
11 |
12 | def normalize_depth_for_display(depth, pc=95, crop_percent=0, normalizer=None, cmap='gray'):
13 | # convert to disparity
14 | depth = 1./(depth + 1e-6)
15 | if normalizer is not None:
16 | depth = depth/normalizer
17 | else:
18 | depth = depth/(np.percentile(depth, pc) + 1e-6)
19 | depth = np.clip(depth, 0, 1)
20 | depth = gray2rgb(depth, cmap=cmap)
21 | keep_H = int(depth.shape[0] * (1-crop_percent))
22 | depth = depth[:keep_H]
23 | depth = depth
24 | return depth
25 |
26 | # Add inverse_pose flag
27 | def euler2mat(z, y, x, inverse_pose=False):
28 | """Converts euler angles to rotation matrix
29 | TODO: remove the dimension for 'N' (deprecated for converting all source
30 | poses altogether)
31 | Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174
32 | Args:
33 | z: rotation angle along z axis (in radians) -- size = [B, N]
34 | y: rotation angle along y axis (in radians) -- size = [B, N]
35 | x: rotation angle along x axis (in radians) -- size = [B, N]
36 | Returns:
37 | Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3]
38 | """
39 | B = tf.shape(z)[0]
40 | N = 1
41 | z = tf.clip_by_value(z, -np.pi, np.pi)
42 | y = tf.clip_by_value(y, -np.pi, np.pi)
43 | x = tf.clip_by_value(x, -np.pi, np.pi)
44 | if inverse_pose:
45 | z = -z
46 | y = -y
47 | x = -x
48 |
49 | # Expand to B x N x 1 x 1
50 | z = tf.expand_dims(tf.expand_dims(z, -1), -1)
51 | y = tf.expand_dims(tf.expand_dims(y, -1), -1)
52 | x = tf.expand_dims(tf.expand_dims(x, -1), -1)
53 |
54 | zeros = tf.zeros([B, N, 1, 1])
55 | ones = tf.ones([B, N, 1, 1])
56 |
57 | cosz = tf.cos(z)
58 | sinz = tf.sin(z)
59 | rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3)
60 | rotz_2 = tf.concat([sinz, cosz, zeros], axis=3)
61 | rotz_3 = tf.concat([zeros, zeros, ones], axis=3)
62 | zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2)
63 |
64 | cosy = tf.cos(y)
65 | siny = tf.sin(y)
66 | roty_1 = tf.concat([cosy, zeros, siny], axis=3)
67 | roty_2 = tf.concat([zeros, ones, zeros], axis=3)
68 | roty_3 = tf.concat([-siny,zeros, cosy], axis=3)
69 | ymat = tf.concat([roty_1, roty_2, roty_3], axis=2)
70 |
71 | cosx = tf.cos(x)
72 | sinx = tf.sin(x)
73 | rotx_1 = tf.concat([ones, zeros, zeros], axis=3)
74 | rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3)
75 | rotx_3 = tf.concat([zeros, sinx, cosx], axis=3)
76 | xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2)
77 |
78 | rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat)
79 | return rotMat
80 |
81 | # Add inverse_pose flag
82 | def pose_vec2mat(vec, inverse_pose=False):
83 | """Converts 6DoF parameters to transformation matrix
84 | Args:
85 | vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
86 | Returns:
87 | A transformation matrix -- [B, 4, 4]
88 | """
89 | batch_size, _ = vec.get_shape().as_list()
90 | translation = tf.slice(vec, [0, 0], [-1, 3])
91 | translation = tf.expand_dims(translation, -1)
92 | if inverse_pose:
93 | translation = -translation
94 | rx = tf.slice(vec, [0, 3], [-1, 1])
95 | ry = tf.slice(vec, [0, 4], [-1, 1])
96 | rz = tf.slice(vec, [0, 5], [-1, 1])
97 | rot_mat = euler2mat(rz, ry, rx, inverse_pose)
98 | rot_mat = tf.squeeze(rot_mat, axis=[1])
99 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
100 | filler = tf.tile(filler, [batch_size, 1, 1])
101 | transform_mat = tf.concat([rot_mat, translation], axis=2)
102 | transform_mat = tf.concat([transform_mat, filler], axis=1)
103 | return transform_mat
104 |
105 | def pixel2cam(depth, pixel_coords, intrinsics, is_homogeneous=True):
106 | """Transforms coordinates in the pixel frame to the camera frame.
107 |
108 | Args:
109 | depth: [batch, height, width]
110 | pixel_coords: homogeneous pixel coordinates [batch, 3, height, width]
111 | intrinsics: camera intrinsics [batch, 3, 3]
112 | is_homogeneous: return in homogeneous coordinates
113 | Returns:
114 | Coords in the camera frame [batch, 3 (4 if homogeneous), height, width]
115 | """
116 | batch, height, width = depth.get_shape().as_list()
117 | depth = tf.reshape(depth, [batch, 1, -1])
118 | pixel_coords = tf.reshape(pixel_coords, [batch, 3, -1])
119 | cam_coords = tf.matmul(tf.matrix_inverse(intrinsics), pixel_coords) * depth
120 | if is_homogeneous:
121 | ones = tf.ones([batch, 1, height*width])
122 | cam_coords = tf.concat([cam_coords, ones], axis=1)
123 | cam_coords = tf.reshape(cam_coords, [batch, -1, height, width])
124 | return cam_coords
125 |
126 | def cam2pixel(cam_coords, proj):
127 | """Transforms coordinates in a camera frame to the pixel frame.
128 |
129 | Args:
130 | cam_coords: [batch, 4, height, width]
131 | proj: [batch, 4, 4]
132 | Returns:
133 | Pixel coordinates projected from the camera frame [batch, height, width, 2]
134 | """
135 | batch, _, height, width = cam_coords.get_shape().as_list()
136 | cam_coords = tf.reshape(cam_coords, [batch, 4, -1])
137 | unnormalized_pixel_coords = tf.matmul(proj, cam_coords)
138 | x_u = tf.slice(unnormalized_pixel_coords, [0, 0, 0], [-1, 1, -1])
139 | y_u = tf.slice(unnormalized_pixel_coords, [0, 1, 0], [-1, 1, -1])
140 | z_u = tf.slice(unnormalized_pixel_coords, [0, 2, 0], [-1, 1, -1])
141 | x_n = x_u / (z_u + 1e-10)
142 | y_n = y_u / (z_u + 1e-10)
143 | pixel_coords = tf.concat([x_n, y_n], axis=1)
144 | pixel_coords = tf.reshape(pixel_coords, [batch, 2, height, width])
145 | return tf.transpose(pixel_coords, perm=[0, 2, 3, 1])
146 |
147 | def meshgrid(batch, height, width, is_homogeneous=True):
148 | """Construct a 2D meshgrid.
149 |
150 | Args:
151 | batch: batch size
152 | height: height of the grid
153 | width: width of the grid
154 | is_homogeneous: whether to return in homogeneous coordinates
155 | Returns:
156 | x,y grid coordinates [batch, 2 (3 if homogeneous), height, width]
157 | """
158 | x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
159 | tf.transpose(tf.expand_dims(
160 | tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
161 | y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
162 | tf.ones(shape=tf.stack([1, width])))
163 | x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32)
164 | y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32)
165 | if is_homogeneous:
166 | ones = tf.ones_like(x_t)
167 | coords = tf.stack([x_t, y_t, ones], axis=0)
168 | else:
169 | coords = tf.stack([x_t, y_t], axis=0)
170 | coords = tf.tile(tf.expand_dims(coords, 0), [batch, 1, 1, 1])
171 | return coords
172 |
173 | # Add inverse_pose flag
174 | def projective_inverse_warp(img, depth, pose, intrinsics, inverse_pose=False):
175 | """Inverse warp a source image to the target image plane based on projection.
176 |
177 | Args:
178 | img: the source image [batch, height_s, width_s, 3]
179 | depth: depth map of the target image [batch, height_t, width_t]
180 | pose: target to source camera transformation matrix [batch, 6], in the
181 | order of tx, ty, tz, rx, ry, rz
182 | intrinsics: camera intrinsics [batch, 3, 3]
183 | Returns:
184 | Source image inverse warped to the target image plane [batch, height_t,
185 | width_t, 3]
186 | """
187 | batch, height, width, _ = img.get_shape().as_list()
188 | # Convert pose vector to matrix
189 | pose = pose_vec2mat(pose, inverse_pose)
190 | # Construct pixel grid coordinates
191 | pixel_coords = meshgrid(batch, height, width)
192 | # Convert pixel coordinates to the camera frame
193 | cam_coords = pixel2cam(depth, pixel_coords, intrinsics)
194 | # Construct a 4x4 intrinsic matrix (TODO: can it be 3x4?)
195 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
196 | filler = tf.tile(filler, [batch, 1, 1])
197 | intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2)
198 | intrinsics = tf.concat([intrinsics, filler], axis=1)
199 | # Get a 4x4 transformation matrix from 'target' camera frame to 'source'
200 | # pixel frame.
201 | proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose)
202 | src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel)
203 | output_img = bilinear_sampler(img, src_pixel_coords)
204 | return output_img, src_pixel_coords
205 |
206 | # (Yuliang) Inverse warp with flow
207 | def flow_inverse_warp(img, flow):
208 | batch, height, width, _ = img.get_shape().as_list()
209 | x_base = tf.range(width)
210 | y_base = tf.range(height)
211 | x_base = tf.stack([x_base]*height, axis=0)
212 | y_base = tf.transpose(tf.stack([y_base]*width, axis=0))
213 | flow0 = flow[:, :, :, 0]
214 | flow1 = flow[:, :, :, 1]
215 | flow0 = flow0 + tf.cast(x_base, tf.float32)
216 | flow1 = flow1 + tf.cast(y_base, tf.float32)
217 | coords = tf.stack([flow0, flow1], axis=-1)
218 | output_img = bilinear_sampler(img, coords)
219 | return output_img
220 |
221 | def depth_pose_inverse_warp(img, depth, pose, intrinsics):
222 | batch, height, width, _ = img.get_shape().as_list()
223 | pixel_coords = meshgrid(batch, height, width)
224 | cam_coords = pixel2cam(depth, pixel_coords, intrinsics)
225 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
226 | filler = tf.tile(filler, [batch, 1, 1])
227 | intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2)
228 | intrinsics = tf.concat([intrinsics, filler], axis=1)
229 | proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose)
230 | src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel)
231 | output_img = bilinear_sampler(img, src_pixel_coords)
232 | return output_img, src_pixel_coords
233 |
234 | def bilinear_sampler(imgs, coords):
235 | """Construct a new image by bilinear sampling from the input image.
236 |
237 | Points falling outside the source image boundary have value 0.
238 |
239 | Args:
240 | imgs: source image to be sampled from [batch, height_s, width_s, channels]
241 | coords: coordinates of source pixels to sample from [batch, height_t,
242 | width_t, 2]. height_t/width_t correspond to the dimensions of the output
243 | image (don't need to be the same as height_s/width_s). The two channels
244 | correspond to x and y coordinates respectively.
245 | Returns:
246 | A new sampled image [batch, height_t, width_t, channels]
247 | """
248 | def _repeat(x, n_repeats):
249 | rep = tf.transpose(
250 | tf.expand_dims(tf.ones(shape=tf.stack([
251 | n_repeats,
252 | ])), 1), [1, 0])
253 | rep = tf.cast(rep, 'float32')
254 | x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
255 | return tf.reshape(x, [-1])
256 |
257 | with tf.name_scope('image_sampling'):
258 | coords_x, coords_y = tf.split(coords, [1, 1], axis=3)
259 | inp_size = imgs.get_shape()
260 | coord_size = coords.get_shape()
261 | out_size = coords.get_shape().as_list()
262 | out_size[3] = imgs.get_shape().as_list()[3]
263 |
264 | coords_x = tf.cast(coords_x, 'float32')
265 | coords_y = tf.cast(coords_y, 'float32')
266 |
267 | x0 = tf.floor(coords_x)
268 | x1 = x0 + 1
269 | y0 = tf.floor(coords_y)
270 | y1 = y0 + 1
271 |
272 | y_max = tf.cast(tf.shape(imgs)[1] - 1, 'float32')
273 | x_max = tf.cast(tf.shape(imgs)[2] - 1, 'float32')
274 | zero = tf.zeros([1], dtype='float32')
275 |
276 | x0_safe = tf.clip_by_value(x0, zero, x_max)
277 | y0_safe = tf.clip_by_value(y0, zero, y_max)
278 | x1_safe = tf.clip_by_value(x1, zero, x_max)
279 | y1_safe = tf.clip_by_value(y1, zero, y_max)
280 |
281 | wt_x0 = x1_safe - coords_x
282 | wt_x1 = coords_x - x0_safe
283 | wt_y0 = y1_safe - coords_y
284 | wt_y1 = coords_y - y0_safe
285 |
286 | ## indices in the flat image to sample from
287 | dim2 = tf.cast(inp_size[2], 'float32')
288 | dim1 = tf.cast(inp_size[2] * inp_size[1], 'float32')
289 | base = tf.reshape(
290 | _repeat(
291 | tf.cast(tf.range(coord_size[0]), 'float32') * dim1,
292 | coord_size[1] * coord_size[2]),
293 | [out_size[0], out_size[1], out_size[2], 1])
294 |
295 | base_y0 = base + y0_safe * dim2
296 | base_y1 = base + y1_safe * dim2
297 | idx00 = tf.reshape(x0_safe + base_y0, [-1])
298 | idx01 = x0_safe + base_y1
299 | idx10 = x1_safe + base_y0
300 | idx11 = x1_safe + base_y1
301 |
302 | ## sample from imgs
303 | imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]]))
304 | imgs_flat = tf.cast(imgs_flat, 'float32')
305 | im00 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx00, 'int32')), out_size)
306 | im01 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx01, 'int32')), out_size)
307 | im10 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx10, 'int32')), out_size)
308 | im11 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx11, 'int32')), out_size)
309 |
310 | w00 = wt_x0 * wt_y0
311 | w01 = wt_x0 * wt_y1
312 | w10 = wt_x1 * wt_y0
313 | w11 = wt_x1 * wt_y1
314 |
315 | output = tf.add_n([
316 | w00 * im00, w01 * im01,
317 | w10 * im10, w11 * im11
318 | ])
319 | return output
320 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/data/__init__.py
--------------------------------------------------------------------------------
/data/kitti/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/data/kitti/__init__.py
--------------------------------------------------------------------------------
/data/kitti/kitti_odom_loader.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | from glob import glob
4 | import os
5 | import scipy.misc
6 | # import sys
7 | # sys.path.append('../../')
8 | # from utils.misc import *
9 |
10 | class kitti_odom_loader(object):
11 | def __init__(self,
12 | dataset_dir,
13 | img_height=128,
14 | img_width=416,
15 | seq_length=5):
16 | self.dataset_dir = dataset_dir
17 | self.img_height = img_height
18 | self.img_width = img_width
19 | self.seq_length = seq_length
20 | self.train_seqs = [0, 1, 2, 3, 4, 5, 6, 7, 8]
21 | self.test_seqs = [9, 10]
22 |
23 | self.collect_test_frames()
24 | self.collect_train_frames()
25 |
26 | def collect_test_frames(self):
27 | self.test_frames = []
28 | for seq in self.test_seqs:
29 | seq_dir = os.path.join(self.dataset_dir, 'sequences', '%.2d' % seq)
30 | img_dir = os.path.join(seq_dir, 'image_2')
31 | N = len(glob(img_dir + '/*.png'))
32 | for n in range(N):
33 | self.test_frames.append('%.2d %.6d' % (seq, n))
34 | self.num_test = len(self.test_frames)
35 |
36 | def collect_train_frames(self):
37 | self.train_frames = []
38 | for seq in self.train_seqs:
39 | seq_dir = os.path.join(self.dataset_dir, 'sequences', '%.2d' % seq)
40 | img_dir = os.path.join(seq_dir, 'image_2')
41 | N = len(glob(img_dir + '/*.png'))
42 | for n in range(N):
43 | self.train_frames.append('%.2d %.6d' % (seq, n))
44 | self.num_train = len(self.train_frames)
45 |
46 | def is_valid_sample(self, frames, tgt_idx):
47 | N = len(frames)
48 | tgt_drive, _ = frames[tgt_idx].split(' ')
49 | if self.seq_length % 2 != 0:
50 | half_offset = int((self.seq_length - 1)/2)
51 | min_src_idx = tgt_idx - half_offset
52 | max_src_idx = tgt_idx + half_offset
53 | if min_src_idx < 0 or max_src_idx >= N:
54 | return False
55 | min_src_drive, _ = frames[min_src_idx].split(' ')
56 | max_src_drive, _ = frames[max_src_idx].split(' ')
57 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive:
58 | return True
59 | return False
60 | else:
61 | left_offset = int(self.seq_length / 2 - 1)
62 | right_offset = int(self.seq_length / 2)
63 | min_src_idx = tgt_idx - left_offset
64 | max_src_idx = tgt_idx + right_offset
65 | if min_src_idx < 0 or max_src_idx >= N:
66 | return False
67 | min_src_drive, _ = frames[min_src_idx].split(' ')
68 | max_src_drive, _ = frames[max_src_idx].split(' ')
69 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive:
70 | return True
71 | return False
72 |
73 | def load_image_sequence(self, frames, tgt_idx, seq_length):
74 | if seq_length % 2 != 0:
75 | half_offset = int((seq_length - 1)/2)
76 | image_seq = []
77 | for o in range(-half_offset, half_offset+1):
78 | curr_idx = tgt_idx + o
79 | curr_drive, curr_frame_id = frames[curr_idx].split(' ')
80 | curr_img = self.load_image(curr_drive, curr_frame_id)
81 | if o == 0:
82 | zoom_y = self.img_height/curr_img.shape[0]
83 | zoom_x = self.img_width/curr_img.shape[1]
84 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
85 | image_seq.append(curr_img)
86 | return image_seq, zoom_x, zoom_y
87 | else:
88 | left_offset = int(seq_length / 2 - 1)
89 | right_offset = int(seq_length / 2)
90 | image_seq = []
91 | for o in range(-left_offset, right_offset + 1):
92 | curr_idx = tgt_idx + o
93 | curr_drive, curr_frame_id = frames[curr_idx].split(' ')
94 | curr_img = self.load_image(curr_drive, curr_frame_id)
95 | if o == 0:
96 | zoom_y = self.img_height/curr_img.shape[0]
97 | zoom_x = self.img_width/curr_img.shape[1]
98 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
99 | image_seq.append(curr_img)
100 | return image_seq, zoom_x, zoom_y
101 |
102 | def load_example(self, frames, tgt_idx, load_pose=False):
103 | image_seq, zoom_x, zoom_y = self.load_image_sequence(frames, tgt_idx, self.seq_length)
104 | tgt_drive, tgt_frame_id = frames[tgt_idx].split(' ')
105 | intrinsics = self.load_intrinsics(tgt_drive, tgt_frame_id)
106 | intrinsics = self.scale_intrinsics(intrinsics, zoom_x, zoom_y)
107 | example = {}
108 | example['intrinsics'] = intrinsics
109 | example['image_seq'] = image_seq
110 | example['folder_name'] = tgt_drive
111 | example['file_name'] = tgt_frame_id
112 | if load_pose:
113 | pass
114 | return example
115 |
116 | def get_train_example_with_idx(self, tgt_idx):
117 | if not self.is_valid_sample(self.train_frames, tgt_idx):
118 | return False
119 | example = self.load_example(self.train_frames, tgt_idx)
120 | return example
121 |
122 | # def load_frame(self, drive, frame_id):
123 | # img = self.load_image(drive, frame_id)
124 | # try:
125 | # scale_x = np.float(self.img_width)/img.shape[1]
126 | # except:
127 | # print("KITTI loading error!")
128 | # print("Drive = ", drive)
129 | # print("frame_id = ", frame_id)
130 | # raise
131 | # scale_y = np.float(self.img_height)/img.shape[0]
132 | # intrinsics = self.load_intrinsics(drive, frame_id)
133 | # intrinsics = self.scale_intrinsics(intrinsics, scale_x, scale_y)
134 | # img = self.crop_resize(img)
135 | # return img, intrinsics
136 |
137 | def load_image(self, drive, frame_id):
138 | img_file = os.path.join(self.dataset_dir, 'sequences', '%s/image_2/%s.png' % (drive, frame_id))
139 | img = scipy.misc.imread(img_file)
140 | return img
141 |
142 | def load_intrinsics(self, drive, frame_id):
143 | calib_file = os.path.join(self.dataset_dir, 'sequences', '%s/calib.txt' % drive)
144 | proj_c2p, _ = self.read_calib_file(calib_file)
145 | intrinsics = proj_c2p[:3, :3]
146 | return intrinsics
147 |
148 | # def load_gt_odom(self, drive, tgt_idx, src_idx):
149 | # pose_file = os.path.join(self.dataset_dir, 'poses', '%s.txt' % drive)
150 | # with open(pose_file, 'r') as f:
151 | # poses = f.readlines()
152 | # filler = np.array([0, 0, 0, 1]).reshape((1,4))
153 | # tgt_pose = np.array(poses[int(tgt_idx)][:-1].split(' ')).astype(np.float32).reshape(3,4)
154 | # tgt_pose = np.concatenate((tgt_pose, filler), axis=0)
155 | # src_pose = np.array(poses[int(src_idx)][:-1].split(' ')).astype(np.float32).reshape(3,4)
156 | # src_pose = np.concatenate((src_pose, filler), axis=0)
157 | # rel_pose = np.dot(np.linalg.inv(src_pose), tgt_pose)
158 | # rel_6DOF = pose_mat_to_6dof(rel_pose)
159 | # return rel_6DOF
160 |
161 | def read_calib_file(self, filepath, cid=2):
162 | """Read in a calibration file and parse into a dictionary."""
163 | with open(filepath, 'r') as f:
164 | C = f.readlines()
165 | def parseLine(L, shape):
166 | data = L.split()
167 | data = np.array(data[1:]).reshape(shape).astype(np.float32)
168 | return data
169 | proj_c2p = parseLine(C[cid], shape=(3,4))
170 | proj_v2c = parseLine(C[-1], shape=(3,4))
171 | filler = np.array([0, 0, 0, 1]).reshape((1,4))
172 | proj_v2c = np.concatenate((proj_v2c, filler), axis=0)
173 | return proj_c2p, proj_v2c
174 |
175 | def scale_intrinsics(self,mat, sx, sy):
176 | out = np.copy(mat)
177 | out[0,0] *= sx
178 | out[0,2] *= sx
179 | out[1,1] *= sy
180 | out[1,2] *= sy
181 | return out
182 |
183 |
184 |
--------------------------------------------------------------------------------
/data/kitti/kitti_raw_loader.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | from glob import glob
4 | import os
5 | import scipy.misc
6 |
7 | class kitti_raw_loader(object):
8 | def __init__(self,
9 | dataset_dir,
10 | split,
11 | img_height=256,
12 | img_width=256,
13 | seq_length=5):
14 | dir_path = os.path.dirname(os.path.realpath(__file__))
15 | static_frames_file = dir_path + '/static_frames.txt'
16 | excluded_frames_file = dir_path + '/excluded_frames.txt'
17 | test_scene_file = dir_path + '/test_scenes_' + split + '.txt'
18 | with open(test_scene_file, 'r') as f:
19 | test_scenes = f.readlines()
20 | self.test_scenes = [t[:-1] for t in test_scenes]
21 | self.dataset_dir = dataset_dir
22 | self.img_height = img_height
23 | self.img_width = img_width
24 | self.seq_length = seq_length
25 | self.cam_ids = ['02', '03']
26 | self.date_list = ['2011_09_26', '2011_09_28', '2011_09_29',
27 | '2011_09_30', '2011_10_03']
28 | self.collect_static_frames(static_frames_file)
29 | self.collect_excluded_frames(excluded_frames_file)
30 | self.collect_train_frames()
31 |
32 | def collect_static_frames(self, static_frames_file):
33 | with open(static_frames_file, 'r') as f:
34 | frames = f.readlines()
35 | self.static_frames = []
36 | for fr in frames:
37 | if fr == '\n':
38 | continue
39 | date, drive, frame_id = fr.split(' ')
40 | curr_fid = '%.10d' % (np.int(frame_id[:-1]))
41 | for cid in self.cam_ids:
42 | self.static_frames.append(drive + ' ' + cid + ' ' + curr_fid)
43 |
44 | # Exclude KITTI flow frames
45 | def collect_excluded_frames(self, excluded_frames_file):
46 | with open(excluded_frames_file, 'r') as f:
47 | frames = f.readlines()
48 | self.excluded_frames = []
49 | for fr in frames:
50 | if fr == '\n':
51 | continue
52 | date, drive, frame_id = fr.split(' ')
53 | curr_fid = '%.10d' % (np.int(frame_id[:-1]))
54 | for cid in self.cam_ids:
55 | self.excluded_frames.append(drive + ' ' + cid + ' ' + curr_fid)
56 |
57 | def collect_train_frames(self):
58 | all_frames = []
59 | for date in self.date_list:
60 | drive_set = os.listdir(self.dataset_dir + date + '/')
61 | for dr in drive_set:
62 | drive_dir = os.path.join(self.dataset_dir, date, dr)
63 | if os.path.isdir(drive_dir):
64 | if dr[:-5] in self.test_scenes:
65 | continue
66 | for cam in self.cam_ids:
67 | img_dir = os.path.join(drive_dir, 'image_' + cam, 'data')
68 | N = len(glob(img_dir + '/*.png'))
69 | for n in range(N):
70 | frame_id = '%.10d' % n
71 | all_frames.append(dr + ' ' + cam + ' ' + frame_id)
72 |
73 |
74 | for s in self.static_frames:
75 | try:
76 | all_frames.remove(s)
77 | # print('removed static frame from training: %s' % s)
78 | except:
79 | pass
80 |
81 | for s in self.excluded_frames:
82 | try:
83 | all_frames.remove(s)
84 | except:
85 | pass
86 |
87 | self.train_frames = all_frames
88 | self.num_train = len(self.train_frames)
89 |
90 | def is_valid_sample(self, frames, tgt_idx):
91 | N = len(frames)
92 | tgt_drive, cid, _ = frames[tgt_idx].split(' ')
93 | if self.seq_length % 2 != 0:
94 | half_offset = int((self.seq_length - 1)/2)
95 | min_src_idx = tgt_idx - half_offset
96 | max_src_idx = tgt_idx + half_offset
97 | if min_src_idx < 0 or max_src_idx >= N:
98 | return False
99 | min_src_drive, min_src_cid, _ = frames[min_src_idx].split(' ')
100 | max_src_drive, max_src_cid, _ = frames[max_src_idx].split(' ')
101 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive and cid == min_src_cid and cid == max_src_cid:
102 | return True
103 | return False
104 | else:
105 | left_offset = int(self.seq_length / 2 - 1)
106 | right_offset = int(self.seq_length / 2)
107 | min_src_idx = tgt_idx - left_offset
108 | max_src_idx = tgt_idx + right_offset
109 | if min_src_idx < 0 or max_src_idx >= N:
110 | return False
111 | min_src_drive, min_src_cid, _ = frames[min_src_idx].split(' ')
112 | max_src_drive, max_src_cid, _ = frames[max_src_idx].split(' ')
113 | if tgt_drive == min_src_drive and tgt_drive == max_src_drive and cid == min_src_cid and cid == max_src_cid:
114 | return True
115 | return False
116 |
117 | def get_train_example_with_idx(self, tgt_idx):
118 | if not self.is_valid_sample(self.train_frames, tgt_idx):
119 | return False
120 | example = self.load_example(self.train_frames, tgt_idx)
121 | return example
122 |
123 | def load_image_sequence(self, frames, tgt_idx, seq_length):
124 | if seq_length % 2 != 0:
125 | half_offset = int((seq_length - 1)/2)
126 | image_seq = []
127 | for o in range(-half_offset, half_offset + 1):
128 | curr_idx = tgt_idx + o
129 | curr_drive, curr_cid, curr_frame_id = frames[curr_idx].split(' ')
130 | curr_img = self.load_image_raw(curr_drive, curr_cid, curr_frame_id)
131 | if o == 0:
132 | zoom_y = self.img_height/curr_img.shape[0]
133 | zoom_x = self.img_width/curr_img.shape[1]
134 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
135 | image_seq.append(curr_img)
136 | return image_seq, zoom_x, zoom_y
137 | else:
138 | left_offset = int(seq_length / 2 - 1)
139 | right_offset = int(seq_length / 2)
140 | image_seq = []
141 | for o in range(-left_offset, right_offset + 1):
142 | curr_idx = tgt_idx + o
143 | curr_drive, curr_cid, curr_frame_id = frames[curr_idx].split(' ')
144 | curr_img = self.load_image_raw(curr_drive, curr_cid, curr_frame_id)
145 | if o == 0:
146 | zoom_y = self.img_height/curr_img.shape[0]
147 | zoom_x = self.img_width/curr_img.shape[1]
148 | curr_img = scipy.misc.imresize(curr_img, (self.img_height, self.img_width))
149 | image_seq.append(curr_img)
150 | return image_seq, zoom_x, zoom_y
151 |
152 | def load_example(self, frames, tgt_idx):
153 | image_seq, zoom_x, zoom_y = self.load_image_sequence(frames, tgt_idx, self.seq_length)
154 | tgt_drive, tgt_cid, tgt_frame_id = frames[tgt_idx].split(' ')
155 | intrinsics = self.load_intrinsics_raw(tgt_drive, tgt_cid, tgt_frame_id)
156 | intrinsics = self.scale_intrinsics(intrinsics, zoom_x, zoom_y)
157 | example = {}
158 | example['intrinsics'] = intrinsics
159 | example['image_seq'] = image_seq
160 | example['folder_name'] = tgt_drive + '_' + tgt_cid + '/'
161 | example['file_name'] = tgt_frame_id
162 | return example
163 |
164 | def load_image_raw(self, drive, cid, frame_id):
165 | date = drive[:10]
166 | img_file = os.path.join(self.dataset_dir, date, drive, 'image_' + cid, 'data', frame_id + '.png')
167 | img = scipy.misc.imread(img_file)
168 | return img
169 |
170 | def load_intrinsics_raw(self, drive, cid, frame_id):
171 | date = drive[:10]
172 | calib_file = os.path.join(self.dataset_dir, date, 'calib_cam_to_cam.txt')
173 |
174 | filedata = self.read_raw_calib_file(calib_file)
175 | P_rect = np.reshape(filedata['P_rect_' + cid], (3, 4))
176 | intrinsics = P_rect[:3, :3]
177 | return intrinsics
178 |
179 | def read_raw_calib_file(self,filepath):
180 | # From https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
181 | """Read in a calibration file and parse into a dictionary."""
182 | data = {}
183 |
184 | with open(filepath, 'r') as f:
185 | for line in f.readlines():
186 | key, value = line.split(':', 1)
187 | # The only non-float values in these files are dates, which
188 | # we don't care about anyway
189 | try:
190 | data[key] = np.array([float(x) for x in value.split()])
191 | except ValueError:
192 | pass
193 | return data
194 |
195 | def scale_intrinsics(self, mat, sx, sy):
196 | out = np.copy(mat)
197 | out[0,0] *= sx
198 | out[0,2] *= sx
199 | out[1,1] *= sy
200 | out[1,2] *= sy
201 | return out
202 |
203 |
204 |
--------------------------------------------------------------------------------
/data/kitti/test_files_stereo.txt:
--------------------------------------------------------------------------------
1 | training/image_2/000000_10.png
2 | training/image_2/000001_10.png
3 | training/image_2/000002_10.png
4 | training/image_2/000003_10.png
5 | training/image_2/000004_10.png
6 | training/image_2/000005_10.png
7 | training/image_2/000006_10.png
8 | training/image_2/000007_10.png
9 | training/image_2/000008_10.png
10 | training/image_2/000009_10.png
11 | training/image_2/000010_10.png
12 | training/image_2/000011_10.png
13 | training/image_2/000012_10.png
14 | training/image_2/000013_10.png
15 | training/image_2/000014_10.png
16 | training/image_2/000015_10.png
17 | training/image_2/000016_10.png
18 | training/image_2/000017_10.png
19 | training/image_2/000018_10.png
20 | training/image_2/000019_10.png
21 | training/image_2/000020_10.png
22 | training/image_2/000021_10.png
23 | training/image_2/000022_10.png
24 | training/image_2/000023_10.png
25 | training/image_2/000024_10.png
26 | training/image_2/000025_10.png
27 | training/image_2/000026_10.png
28 | training/image_2/000027_10.png
29 | training/image_2/000028_10.png
30 | training/image_2/000029_10.png
31 | training/image_2/000030_10.png
32 | training/image_2/000031_10.png
33 | training/image_2/000032_10.png
34 | training/image_2/000033_10.png
35 | training/image_2/000034_10.png
36 | training/image_2/000035_10.png
37 | training/image_2/000036_10.png
38 | training/image_2/000037_10.png
39 | training/image_2/000038_10.png
40 | training/image_2/000039_10.png
41 | training/image_2/000040_10.png
42 | training/image_2/000041_10.png
43 | training/image_2/000042_10.png
44 | training/image_2/000043_10.png
45 | training/image_2/000044_10.png
46 | training/image_2/000045_10.png
47 | training/image_2/000046_10.png
48 | training/image_2/000047_10.png
49 | training/image_2/000048_10.png
50 | training/image_2/000049_10.png
51 | training/image_2/000050_10.png
52 | training/image_2/000051_10.png
53 | training/image_2/000052_10.png
54 | training/image_2/000053_10.png
55 | training/image_2/000054_10.png
56 | training/image_2/000055_10.png
57 | training/image_2/000056_10.png
58 | training/image_2/000057_10.png
59 | training/image_2/000058_10.png
60 | training/image_2/000059_10.png
61 | training/image_2/000060_10.png
62 | training/image_2/000061_10.png
63 | training/image_2/000062_10.png
64 | training/image_2/000063_10.png
65 | training/image_2/000064_10.png
66 | training/image_2/000065_10.png
67 | training/image_2/000066_10.png
68 | training/image_2/000067_10.png
69 | training/image_2/000068_10.png
70 | training/image_2/000069_10.png
71 | training/image_2/000070_10.png
72 | training/image_2/000071_10.png
73 | training/image_2/000072_10.png
74 | training/image_2/000073_10.png
75 | training/image_2/000074_10.png
76 | training/image_2/000075_10.png
77 | training/image_2/000076_10.png
78 | training/image_2/000077_10.png
79 | training/image_2/000078_10.png
80 | training/image_2/000079_10.png
81 | training/image_2/000080_10.png
82 | training/image_2/000081_10.png
83 | training/image_2/000082_10.png
84 | training/image_2/000083_10.png
85 | training/image_2/000084_10.png
86 | training/image_2/000085_10.png
87 | training/image_2/000086_10.png
88 | training/image_2/000087_10.png
89 | training/image_2/000088_10.png
90 | training/image_2/000089_10.png
91 | training/image_2/000090_10.png
92 | training/image_2/000091_10.png
93 | training/image_2/000092_10.png
94 | training/image_2/000093_10.png
95 | training/image_2/000094_10.png
96 | training/image_2/000095_10.png
97 | training/image_2/000096_10.png
98 | training/image_2/000097_10.png
99 | training/image_2/000098_10.png
100 | training/image_2/000099_10.png
101 | training/image_2/000100_10.png
102 | training/image_2/000101_10.png
103 | training/image_2/000102_10.png
104 | training/image_2/000103_10.png
105 | training/image_2/000104_10.png
106 | training/image_2/000105_10.png
107 | training/image_2/000106_10.png
108 | training/image_2/000107_10.png
109 | training/image_2/000108_10.png
110 | training/image_2/000109_10.png
111 | training/image_2/000110_10.png
112 | training/image_2/000111_10.png
113 | training/image_2/000112_10.png
114 | training/image_2/000113_10.png
115 | training/image_2/000114_10.png
116 | training/image_2/000115_10.png
117 | training/image_2/000116_10.png
118 | training/image_2/000117_10.png
119 | training/image_2/000118_10.png
120 | training/image_2/000119_10.png
121 | training/image_2/000120_10.png
122 | training/image_2/000121_10.png
123 | training/image_2/000122_10.png
124 | training/image_2/000123_10.png
125 | training/image_2/000124_10.png
126 | training/image_2/000125_10.png
127 | training/image_2/000126_10.png
128 | training/image_2/000127_10.png
129 | training/image_2/000128_10.png
130 | training/image_2/000129_10.png
131 | training/image_2/000130_10.png
132 | training/image_2/000131_10.png
133 | training/image_2/000132_10.png
134 | training/image_2/000133_10.png
135 | training/image_2/000134_10.png
136 | training/image_2/000135_10.png
137 | training/image_2/000136_10.png
138 | training/image_2/000137_10.png
139 | training/image_2/000138_10.png
140 | training/image_2/000139_10.png
141 | training/image_2/000140_10.png
142 | training/image_2/000141_10.png
143 | training/image_2/000142_10.png
144 | training/image_2/000143_10.png
145 | training/image_2/000144_10.png
146 | training/image_2/000145_10.png
147 | training/image_2/000146_10.png
148 | training/image_2/000147_10.png
149 | training/image_2/000148_10.png
150 | training/image_2/000149_10.png
151 | training/image_2/000150_10.png
152 | training/image_2/000151_10.png
153 | training/image_2/000152_10.png
154 | training/image_2/000153_10.png
155 | training/image_2/000154_10.png
156 | training/image_2/000155_10.png
157 | training/image_2/000156_10.png
158 | training/image_2/000157_10.png
159 | training/image_2/000158_10.png
160 | training/image_2/000159_10.png
161 | training/image_2/000160_10.png
162 | training/image_2/000161_10.png
163 | training/image_2/000162_10.png
164 | training/image_2/000163_10.png
165 | training/image_2/000164_10.png
166 | training/image_2/000165_10.png
167 | training/image_2/000166_10.png
168 | training/image_2/000167_10.png
169 | training/image_2/000168_10.png
170 | training/image_2/000169_10.png
171 | training/image_2/000170_10.png
172 | training/image_2/000171_10.png
173 | training/image_2/000172_10.png
174 | training/image_2/000173_10.png
175 | training/image_2/000174_10.png
176 | training/image_2/000175_10.png
177 | training/image_2/000176_10.png
178 | training/image_2/000177_10.png
179 | training/image_2/000178_10.png
180 | training/image_2/000179_10.png
181 | training/image_2/000180_10.png
182 | training/image_2/000181_10.png
183 | training/image_2/000182_10.png
184 | training/image_2/000183_10.png
185 | training/image_2/000184_10.png
186 | training/image_2/000185_10.png
187 | training/image_2/000186_10.png
188 | training/image_2/000187_10.png
189 | training/image_2/000188_10.png
190 | training/image_2/000189_10.png
191 | training/image_2/000190_10.png
192 | training/image_2/000191_10.png
193 | training/image_2/000192_10.png
194 | training/image_2/000193_10.png
195 | training/image_2/000194_10.png
196 | training/image_2/000195_10.png
197 | training/image_2/000196_10.png
198 | training/image_2/000197_10.png
199 | training/image_2/000198_10.png
200 | training/image_2/000199_10.png
201 |
--------------------------------------------------------------------------------
/data/kitti/test_scenes_eigen.txt:
--------------------------------------------------------------------------------
1 | 2011_09_26_drive_0117
2 | 2011_09_28_drive_0002
3 | 2011_09_26_drive_0052
4 | 2011_09_30_drive_0016
5 | 2011_09_26_drive_0059
6 | 2011_09_26_drive_0027
7 | 2011_09_26_drive_0020
8 | 2011_09_26_drive_0009
9 | 2011_09_26_drive_0013
10 | 2011_09_26_drive_0101
11 | 2011_09_26_drive_0046
12 | 2011_09_26_drive_0029
13 | 2011_09_26_drive_0064
14 | 2011_09_26_drive_0048
15 | 2011_10_03_drive_0027
16 | 2011_09_26_drive_0002
17 | 2011_09_26_drive_0036
18 | 2011_09_29_drive_0071
19 | 2011_10_03_drive_0047
20 | 2011_09_30_drive_0027
21 | 2011_09_26_drive_0086
22 | 2011_09_26_drive_0084
23 | 2011_09_26_drive_0096
24 | 2011_09_30_drive_0018
25 | 2011_09_26_drive_0106
26 | 2011_09_26_drive_0056
27 | 2011_09_26_drive_0023
28 | 2011_09_26_drive_0093
29 |
--------------------------------------------------------------------------------
/data/kitti/test_scenes_stereo.txt:
--------------------------------------------------------------------------------
1 | 2011_09_26_drive_0005
2 | 2011_09_26_drive_0009
3 | 2011_09_26_drive_0011
4 | 2011_09_26_drive_0013
5 | 2011_09_26_drive_0014
6 | 2011_09_26_drive_0015
7 | 2011_09_26_drive_0017
8 | 2011_09_26_drive_0018
9 | 2011_09_26_drive_0019
10 | 2011_09_26_drive_0022
11 | 2011_09_26_drive_0027
12 | 2011_09_26_drive_0028
13 | 2011_09_26_drive_0029
14 | 2011_09_26_drive_0032
15 | 2011_09_26_drive_0036
16 | 2011_09_26_drive_0046
17 | 2011_09_26_drive_0051
18 | 2011_09_26_drive_0056
19 | 2011_09_26_drive_0057
20 | 2011_09_26_drive_0059
21 | 2011_09_26_drive_0070
22 | 2011_09_26_drive_0084
23 | 2011_09_26_drive_0096
24 | 2011_09_26_drive_0101
25 | 2011_09_26_drive_0104
26 | 2011_09_28_drive_0002
27 | 2011_09_29_drive_0004
28 | 2011_09_29_drive_0071
29 | 2011_10_03_drive_0047
30 |
--------------------------------------------------------------------------------
/data/prepare_train_data.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import argparse
3 | import scipy.misc
4 | import numpy as np
5 | from glob import glob
6 | from joblib import Parallel, delayed
7 | import os
8 |
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("--dataset_dir", type=str, default='./dataset/KITTI/raw/data/', help="where the dataset is stored")
11 | parser.add_argument("--dataset_name", type=str, default="kitti_raw_eigen", choices=["kitti_raw_eigen", "kitti_odom"])
12 | parser.add_argument("--dump_root", type=str, default='./dataset/dfnet_train/', help="Where to dump the data")
13 | parser.add_argument("--seq_length", type=int, default=5, help="Length of each training sequence")
14 | parser.add_argument("--img_height", type=int, default=320, help="image height")
15 | parser.add_argument("--img_width", type=int, default=1152, help="image width")
16 | parser.add_argument("--num_threads", type=int, default=4, help="number of threads to use")
17 | args = parser.parse_args()
18 |
19 | def concat_image_seq(seq):
20 | for i, im in enumerate(seq):
21 | if i == 0:
22 | res = im
23 | else:
24 | res = np.hstack((res, im))
25 | return res
26 |
27 | def dump_example(n):
28 | if n % 2000 == 0:
29 | print('Progress %d/%d....' % (n, data_loader.num_train))
30 | example = data_loader.get_train_example_with_idx(n)
31 | if example == False:
32 | return
33 | image_seq = concat_image_seq(example['image_seq'])
34 | intrinsics = example['intrinsics']
35 | fx = intrinsics[0, 0]
36 | fy = intrinsics[1, 1]
37 | cx = intrinsics[0, 2]
38 | cy = intrinsics[1, 2]
39 | dump_dir = os.path.join(args.dump_root, example['folder_name'])
40 | # if not os.path.isdir(dump_dir):
41 | # os.makedirs(dump_dir, exist_ok=True)
42 | try:
43 | os.makedirs(dump_dir)
44 | except OSError:
45 | if not os.path.isdir(dump_dir):
46 | raise
47 | dump_img_file = os.path.join(dump_dir, '%s.jpg' % example['file_name'])
48 | scipy.misc.imsave(dump_img_file, image_seq.astype(np.uint8))
49 | dump_cam_file = os.path.join(dump_dir, '%s_cam.txt' % example['file_name'])
50 | with open(dump_cam_file, 'w') as f:
51 | f.write('%f,0.,%f,0.,%f,%f,0.,0.,1.' % (fx, cx, fy, cy))
52 | # Ground truth pose
53 | if 'pose_seq' in example.keys():
54 | pose_seq = example['pose_seq']
55 | dump_pose_file = os.path.join(dump_dir, '%s_pose.txt' % example['file_name'])
56 | tgt2src_6d, src2tgt_6d = pose_seq
57 | with open(dump_pose_file, 'w') as f:
58 | tx, ty, tz, rx, ry, rz = tgt2src_6d
59 | f.write('%f,%f,%f,%f,%f,%f,' % (tx, ty, tz, rx, ry, rz))
60 | tx, ty, tz, rx, ry, rz = src2tgt_6d
61 | f.write('%f,%f,%f,%f,%f,%f' % (tx, ty, tz, rx, ry, rz))
62 | else:
63 | pass
64 |
65 | def main():
66 | if not os.path.exists(args.dump_root):
67 | os.makedirs(args.dump_root)
68 |
69 | global data_loader
70 | if args.dataset_name == 'kitti_odom':
71 | from kitti.kitti_odom_loader import kitti_odom_loader
72 | data_loader = kitti_odom_loader(args.dataset_dir,
73 | img_height=args.img_height,
74 | img_width=args.img_width,
75 | seq_length=args.seq_length)
76 |
77 | if args.dataset_name == 'kitti_raw_eigen':
78 | from kitti.kitti_raw_loader import kitti_raw_loader
79 | data_loader = kitti_raw_loader(args.dataset_dir,
80 | split='eigen',
81 | img_height=args.img_height,
82 | img_width=args.img_width,
83 | seq_length=args.seq_length)
84 |
85 | #Parallel(n_jobs=args.num_threads)(delayed(dump_example)(n) for n in range(data_loader.num_train))
86 | # Debug only
87 | for n in range(data_loader.num_train):
88 | dump_example(n)
89 |
90 | # Split into train/val
91 | np.random.seed(8964)
92 | subfolders = os.listdir(args.dump_root)
93 | with open(os.path.join(args.dump_root, 'train.txt'), 'w') as tf:
94 | with open(os.path.join(args.dump_root, 'val.txt'), 'w') as vf:
95 | for s in subfolders:
96 | if not os.path.isdir(os.path.join(args.dump_root, '%s' % s)):
97 | continue
98 | imfiles = glob(os.path.join(args.dump_root, s, '*.jpg'))
99 | frame_ids = [os.path.basename(fi).split('.')[0] for fi in imfiles]
100 | for frame in frame_ids:
101 |
102 | if np.random.random() < 0.1:
103 | vf.write('%s %s\n' % (s, frame))
104 | else:
105 | tf.write('%s %s\n' % (s, frame))
106 |
107 |
108 | main()
109 |
110 |
--------------------------------------------------------------------------------
/kitti_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/kitti_eval/__init__.py
--------------------------------------------------------------------------------
/kitti_eval/depth_evaluation_utils.py:
--------------------------------------------------------------------------------
1 | # Mostly based on the code written by Clement Godard:
2 | # https://github.com/mrharicot/monodepth/blob/master/utils/evaluation_utils.py
3 | import numpy as np
4 | # import pandas as pd
5 | import os
6 | import cv2
7 | from collections import Counter
8 | import pickle
9 |
10 | def compute_errors(gt, pred):
11 | thresh = np.maximum((gt / pred), (pred / gt))
12 | a1 = (thresh < 1.25 ).mean()
13 | a2 = (thresh < 1.25 ** 2).mean()
14 | a3 = (thresh < 1.25 ** 3).mean()
15 |
16 | rmse = (gt - pred) ** 2
17 | rmse = np.sqrt(rmse.mean())
18 |
19 | rmse_log = (np.log(gt) - np.log(pred)) ** 2
20 | rmse_log = np.sqrt(rmse_log.mean())
21 |
22 | abs_rel = np.mean(np.abs(gt - pred) / gt)
23 |
24 | sq_rel = np.mean(((gt - pred)**2) / gt)
25 |
26 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
27 |
28 | ###############################################################################
29 | ####################### KITTI
30 |
31 | width_to_focal = dict()
32 | width_to_focal[1242] = 721.5377
33 | width_to_focal[1241] = 718.856
34 | width_to_focal[1224] = 707.0493
35 | width_to_focal[1238] = 718.3351
36 |
37 | def load_gt_disp_kitti(path):
38 | gt_disparities = []
39 | for i in range(200):
40 | disp = cv2.imread(path + "/training/disp_noc_0/" + str(i).zfill(6) + "_10.png", -1)
41 | disp = disp.astype(np.float32) / 256
42 | gt_disparities.append(disp)
43 | return gt_disparities
44 |
45 | def convert_disps_to_depths_kitti(gt_disparities, pred_disparities):
46 | gt_depths = []
47 | pred_depths = []
48 | pred_disparities_resized = []
49 |
50 | for i in range(len(gt_disparities)):
51 | gt_disp = gt_disparities[i]
52 | height, width = gt_disp.shape
53 |
54 | pred_disp = pred_disparities[i]
55 | pred_disp = width * cv2.resize(pred_disp, (width, height), interpolation=cv2.INTER_LINEAR)
56 |
57 | pred_disparities_resized.append(pred_disp)
58 |
59 | mask = gt_disp > 0
60 |
61 | gt_depth = width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask))
62 | pred_depth = width_to_focal[width] * 0.54 / pred_disp
63 |
64 | gt_depths.append(gt_depth)
65 | pred_depths.append(pred_depth)
66 | return gt_depths, pred_depths, pred_disparities_resized
67 |
68 |
69 | ###############################################################################
70 | ####################### EIGEN
71 |
72 | def read_text_lines(file_path):
73 | f = open(file_path, 'r')
74 | lines = f.readlines()
75 | f.close()
76 | lines = [l.rstrip() for l in lines]
77 | return lines
78 |
79 | def read_file_data(files, data_root):
80 | gt_files = []
81 | gt_calib = []
82 | im_sizes = []
83 | im_files = []
84 | cams = []
85 | num_probs = 0
86 | for filename in files:
87 | filename = filename.split()[0]
88 | splits = filename.split('/')
89 | # camera_id = filename[-1] # 2 is left, 3 is right
90 | date = splits[0]
91 | im_id = splits[4][:10]
92 | file_root = '{}/{}'
93 |
94 | im = filename
95 | vel = '{}/{}/velodyne_points/data/{}.bin'.format(splits[0], splits[1], im_id)
96 |
97 | if os.path.isfile(data_root + im):
98 | gt_files.append(data_root + vel)
99 | gt_calib.append(data_root + date + '/')
100 | im_sizes.append(cv2.imread(data_root + im).shape[:2])
101 | im_files.append(data_root + im)
102 | cams.append(2)
103 | else:
104 | num_probs += 1
105 | print('{} missing'.format(data_root + im))
106 | # print(num_probs, 'files missing')
107 |
108 | return gt_files, gt_calib, im_sizes, im_files, cams
109 |
110 | def load_velodyne_points(file_name):
111 | # adapted from https://github.com/hunse/kitti
112 | points = np.fromfile(file_name, dtype=np.float32).reshape(-1, 4)
113 | points[:, 3] = 1.0 # homogeneous
114 | return points
115 |
116 |
117 | def lin_interp(shape, xyd):
118 | # taken from https://github.com/hunse/kitti
119 | m, n = shape
120 | ij, d = xyd[:, 1::-1], xyd[:, 2]
121 | f = LinearNDInterpolator(ij, d, fill_value=0)
122 | J, I = np.meshgrid(np.arange(n), np.arange(m))
123 | IJ = np.vstack([I.flatten(), J.flatten()]).T
124 | disparity = f(IJ).reshape(shape)
125 | return disparity
126 |
127 |
128 | def read_calib_file(path):
129 | # taken from https://github.com/hunse/kitti
130 | float_chars = set("0123456789.e+- ")
131 | data = {}
132 | with open(path, 'r') as f:
133 | for line in f.readlines():
134 | key, value = line.split(':', 1)
135 | value = value.strip()
136 | data[key] = value
137 | if float_chars.issuperset(value):
138 | # try to cast to float array
139 | try:
140 | # (Yuliang) Add list() outside of map() for Python3.x
141 | data[key] = np.array(list(map(float, value.split(' '))))
142 | except ValueError:
143 | # casting error: data[key] already eq. value, so pass
144 | pass
145 |
146 | return data
147 |
148 |
149 | def get_focal_length_baseline(calib_dir, cam=2):
150 | cam2cam = read_calib_file(calib_dir + 'calib_cam_to_cam.txt')
151 | P2_rect = cam2cam['P_rect_02'].reshape(3,4)
152 | P3_rect = cam2cam['P_rect_03'].reshape(3,4)
153 |
154 | # cam 2 is left of camera 0 -6cm
155 | # cam 3 is to the right +54cm
156 | b2 = P2_rect[0,3] / -P2_rect[0,0]
157 | b3 = P3_rect[0,3] / -P3_rect[0,0]
158 | baseline = b3-b2
159 |
160 | if cam==2:
161 | focal_length = P2_rect[0,0]
162 | elif cam==3:
163 | focal_length = P3_rect[0,0]
164 |
165 | return focal_length, baseline
166 |
167 |
168 | def sub2ind(matrixSize, rowSub, colSub):
169 | m, n = matrixSize
170 | return rowSub * (n-1) + colSub - 1
171 |
172 | def generate_depth_map(calib_dir, velo_file_name, im_shape, cam=2, interp=False, vel_depth=False):
173 | # load calibration files
174 | cam2cam = read_calib_file(calib_dir + 'calib_cam_to_cam.txt')
175 | velo2cam = read_calib_file(calib_dir + 'calib_velo_to_cam.txt')
176 | velo2cam = np.hstack((velo2cam['R'].reshape(3,3), velo2cam['T'][..., np.newaxis]))
177 | velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))
178 |
179 | # compute projection matrix velodyne->image plane
180 | R_cam2rect = np.eye(4)
181 | R_cam2rect[:3,:3] = cam2cam['R_rect_00'].reshape(3,3)
182 | P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3,4)
183 | P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)
184 |
185 | # load velodyne points and remove all behind image plane (approximation)
186 | # each row of the velodyne data is forward, left, up, reflectance
187 | velo = load_velodyne_points(velo_file_name)
188 | velo = velo[velo[:, 0] >= 0, :]
189 |
190 | # project the points to the camera
191 | velo_pts_im = np.dot(P_velo2im, velo.T).T
192 | velo_pts_im[:, :2] = velo_pts_im[:,:2] / velo_pts_im[:,2][..., np.newaxis]
193 |
194 | if vel_depth:
195 | velo_pts_im[:, 2] = velo[:, 0]
196 |
197 | # check if in bounds
198 | # use minus 1 to get the exact same value as KITTI matlab code
199 | velo_pts_im[:, 0] = np.round(velo_pts_im[:,0]) - 1
200 | velo_pts_im[:, 1] = np.round(velo_pts_im[:,1]) - 1
201 | val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
202 | val_inds = val_inds & (velo_pts_im[:,0] < im_shape[1]) & (velo_pts_im[:,1] < im_shape[0])
203 | velo_pts_im = velo_pts_im[val_inds, :]
204 |
205 | # project to image
206 | depth = np.zeros((im_shape))
207 | depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2]
208 |
209 | # find the duplicate points and choose the closest depth
210 | inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
211 | # (Yuliang) iteritems() -> items()
212 | dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
213 | for dd in dupe_inds:
214 | pts = np.where(inds==dd)[0]
215 | x_loc = int(velo_pts_im[pts[0], 0])
216 | y_loc = int(velo_pts_im[pts[0], 1])
217 | depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
218 | depth[depth<0] = 0
219 |
220 | if interp:
221 | # interpolate the depth map to fill in holes
222 | depth_interp = lin_interp(im_shape, velo_pts_im)
223 | return depth, depth_interp
224 | else:
225 | return depth
226 |
227 |
228 |
229 |
--------------------------------------------------------------------------------
/kitti_eval/eval_depth.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import sys
3 | import cv2
4 | import os
5 | import numpy as np
6 | import argparse
7 | from depth_evaluation_utils import *
8 |
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("--kitti_dir", type=str, default='./dataset/KITTI/raw/data/', help='Path to the KITTI dataset directory')
11 | parser.add_argument("--pred_file", type=str, help="Path to the prediction file")
12 | parser.add_argument("--split", type=str, default='val')
13 | parser.add_argument('--min_depth', type=float, default=1e-3, help="Threshold for minimum depth")
14 | parser.add_argument('--max_depth', type=float, default=80, help="Threshold for maximum depth")
15 | args = parser.parse_args()
16 |
17 | def main():
18 | if args.split == 'val':
19 | test_file_list = './data/kitti/val_files_eigen.txt'
20 | elif args.split == 'test':
21 | test_file_list = './data/kitti/test_files_eigen.txt'
22 | else:
23 | assert False
24 |
25 | pred_depths = np.load(args.pred_file)
26 | test_files = read_text_lines(test_file_list)
27 | gt_files, gt_calib, im_sizes, im_files, cams = \
28 | read_file_data(test_files, args.kitti_dir)
29 | num_test = len(im_files)
30 | gt_depths = []
31 | pred_depths_resized = []
32 | invalid_ids = []
33 | for t_id in range(num_test):
34 | camera_id = cams[t_id] # 2 is left, 3 is right
35 | # Some frames in val set do not have ground truth labels
36 | try:
37 | depth = generate_depth_map(gt_calib[t_id],
38 | gt_files[t_id],
39 | im_sizes[t_id],
40 | camera_id,
41 | False,
42 | True)
43 | gt_depths.append(depth.astype(np.float32))
44 |
45 | pred_depths_resized.append(
46 | cv2.resize(pred_depths[t_id],
47 | (im_sizes[t_id][1], im_sizes[t_id][0]),
48 | interpolation=cv2.INTER_LINEAR))
49 | except:
50 | invalid_ids.append(t_id)
51 | print(t_id)
52 | pred_depths = pred_depths_resized
53 | num_test -= len(invalid_ids)
54 |
55 | rms = np.zeros(num_test, np.float32)
56 | log_rms = np.zeros(num_test, np.float32)
57 | abs_rel = np.zeros(num_test, np.float32)
58 | sq_rel = np.zeros(num_test, np.float32)
59 | d1_all = np.zeros(num_test, np.float32)
60 | a1 = np.zeros(num_test, np.float32)
61 | a2 = np.zeros(num_test, np.float32)
62 | a3 = np.zeros(num_test, np.float32)
63 | for i in range(num_test):
64 | gt_depth = gt_depths[i]
65 | pred_depth = np.copy(pred_depths[i])
66 |
67 | mask = np.logical_and(gt_depth > args.min_depth,
68 | gt_depth < args.max_depth)
69 | # crop used by Garg ECCV16 to reprocude Eigen NIPS14 results
70 | # if used on gt_size 370x1224 produces a crop of [-218, -3, 44, 1180]
71 | gt_height, gt_width = gt_depth.shape
72 | crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
73 | 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32)
74 |
75 | crop_mask = np.zeros(mask.shape)
76 | crop_mask[crop[0]:crop[1],crop[2]:crop[3]] = 1
77 | mask = np.logical_and(mask, crop_mask)
78 |
79 | # Scale matching
80 | scalor = np.median(gt_depth[mask])/np.median(pred_depth[mask])
81 | pred_depth[mask] *= scalor
82 |
83 | pred_depth[pred_depth < args.min_depth] = args.min_depth
84 | pred_depth[pred_depth > args.max_depth] = args.max_depth
85 | abs_rel[i], sq_rel[i], rms[i], log_rms[i], a1[i], a2[i], a3[i] = \
86 | compute_errors(gt_depth[mask], pred_depth[mask])
87 |
88 | print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format('abs_rel', 'sq_rel', 'rms', 'log_rms', 'd1_all', 'a1', 'a2', 'a3'))
89 | print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(abs_rel.mean(), sq_rel.mean(), rms.mean(), log_rms.mean(), d1_all.mean(), a1.mean(), a2.mean(), a3.mean()))
90 |
91 | main()
92 |
--------------------------------------------------------------------------------
/kitti_eval/eval_pose.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import os
3 | import numpy as np
4 | import argparse
5 | from glob import glob
6 | from pose_evaluation_utils import *
7 |
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument("--gtruth_dir", type=str,
10 | help='Path to the directory with ground-truth trajectories')
11 | parser.add_argument("--pred_dir", type=str,
12 | help="Path to the directory with predicted trajectories")
13 | args = parser.parse_args()
14 |
15 | def main():
16 | pred_files = glob(os.path.join(args.pred_dir, '*.txt'))
17 | ate_all = []
18 | for i in range(len(pred_files)):
19 | gtruth_file = os.path.join(args.gtruth_dir, os.path.basename(pred_files[i]))
20 | if not os.path.exists(gtruth_file):
21 | continue
22 | ate = compute_ate(gtruth_file, pred_files[i])
23 | if ate == False:
24 | continue
25 | ate_all.append(ate)
26 | ate_all = np.array(ate_all)
27 | print("Predictions dir: %s" % args.pred_dir)
28 | print("ATE mean: %.4f, std: %.4f" % (np.mean(ate_all), np.std(ate_all)))
29 | main()
30 |
--------------------------------------------------------------------------------
/misc/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Download pre-trained models
4 | echo "Downloading pre-trained models"
5 | wget https://filebox.ece.vt.edu/~ylzou/eccv2018dfnet/pretrained.tar
6 | tar -xvf pretrained.tar
7 |
8 | # Download training data
9 | echo "Downloading training data"
10 | mkdir dataset
11 | cd dataset
12 | wget https://filebox.ece.vt.edu/~ylzou/eccv2018dfnet/kitti_5frame_1152_320.tar
13 | tar -xvf kitti_5frame_1152_320.tar
14 | cd ..
15 |
16 |
--------------------------------------------------------------------------------
/misc/zou2018dfnet.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vt-vl-lab/DF-Net/53f4e016b881d55624042f755235eb8d7d248209/misc/zou2018dfnet.gif
--------------------------------------------------------------------------------
/test_flownet_2012.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import cv2
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | import PIL.Image as pil
7 | import png
8 | import scipy
9 |
10 | from core import flow_to_image
11 | from core import flownet
12 |
13 | flags = tf.app.flags
14 | flags.DEFINE_integer("batch_size", 1, "The size of of a sample batch")
15 | flags.DEFINE_integer("img_height", 384, "Image height")
16 | flags.DEFINE_integer("img_width", 1280, "Image width")
17 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/flow2012/training/', "Dataset directory")
18 | flags.DEFINE_string("output_dir", None, "Output directory")
19 | flags.DEFINE_string("ckpt_file", 'pretrained/unflowc_pre', "checkpoint file")
20 | FLAGS = flags.FLAGS
21 |
22 | FLOW_SCALE = 5.0
23 |
24 | # kitti 2012 has 194 training pairs, 195 test pairs
25 | if 'train' in FLAGS.dataset_dir:
26 | NUM = 194
27 | elif 'test' in FLAGS.dataset_dir:
28 | NUM = 195
29 |
30 | def get_flow(path):
31 | bgr = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
32 | invalid = bgr[:, :, 0] == 0
33 | out_flow = (bgr[:, :, 2:0:-1].astype('f4') - 2**15) / 64.
34 | out_flow[invalid] = 0
35 | return out_flow, bgr[:, :, 0]
36 |
37 | def compute_flow_error(gt_flow, pred_flow, mask):
38 | H, W, _ = gt_flow.shape
39 | old_H, old_W, _ = pred_flow.shape
40 | # Reshape predicted flow to have same size as ground truth
41 | pred0 = cv2.resize(pred_flow[:,:,0], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*W/old_W)
42 | pred1 = cv2.resize(pred_flow[:,:,1], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*H/old_H)
43 | pred = np.stack((pred0, pred1), axis=-1) * FLOW_SCALE
44 |
45 | err = np.sqrt(np.sum(np.square(gt_flow - pred), axis=-1))
46 | err_valid = np.sum(err * mask) / np.sum(mask)
47 | return err_valid, pred
48 |
49 | def write_flow_png(name, flow):
50 | H, W, _ = flow.shape
51 | out = np.ones((H, W, 3), dtype=np.uint64)
52 | out[:,:,1] = np.minimum(np.maximum(flow[:,:,1]*64.+2**15, 0), 2**16).astype(np.uint64)
53 | out[:,:,0] = np.minimum(np.maximum(flow[:,:,0]*64.+2**15, 0), 2**16).astype(np.uint64)
54 | with open(name, 'wb') as f:
55 | writer = png.Writer(width=W, height=H, bitdepth=16)
56 | im2list = out.reshape(-1, out.shape[1]*out.shape[2]).tolist()
57 | writer.write(f, im2list)
58 |
59 |
60 | def pick_frame(path):
61 | new_files = []
62 | # flow2012 dataset only has 194 pairs
63 | for i in range(NUM):
64 | frame1 = os.path.join(path, 'colored_0', '{:06d}'.format(i) + '_10.png')
65 | frame2 = os.path.join(path, 'colored_0', '{:06d}'.format(i) + '_11.png')
66 | new_files.append([frame1, frame2])
67 | return new_files
68 |
69 | def main(_):
70 | new_files = pick_frame(FLAGS.dataset_dir)
71 | basename = os.path.basename(FLAGS.ckpt_file)
72 |
73 | im1_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
74 | im2_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
75 | pred_flows = flownet(im1_pl, im2_pl, flownet_spec='C')
76 |
77 | saver = tf.train.Saver([var for var in tf.all_variables() if 'flow' in var.name])
78 | config = tf.ConfigProto()
79 | config.gpu_options.allow_growth = True
80 | errs = np.zeros(NUM)
81 |
82 | if not FLAGS.output_dir is None and not os.path.exists(FLAGS.output_dir):
83 | os.makedirs(FLAGS.output_dir)
84 |
85 | with tf.Session(config=config) as sess:
86 | saver.restore(sess, FLAGS.ckpt_file)
87 | # For val set
88 | for t in range(0, len(new_files)):
89 | if t % 100 == 0:
90 | print('processing %s: %d/%d' % (basename, t, len(new_files)))
91 | raw_im0 = pil.open(new_files[t][0])
92 | raw_im1 = pil.open(new_files[t][1])
93 | scaled_im0 = raw_im0.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
94 | scaled_im1 = raw_im1.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
95 | # Minus ImageNet channel mean
96 | channel_mean = np.array([104.920005, 110.1753, 114.785955])
97 | scaled_im0 = (np.expand_dims(np.array(scaled_im0), axis=0).astype(np.float32)-channel_mean)/255.
98 | scaled_im1 = (np.expand_dims(np.array(scaled_im1), axis=0).astype(np.float32)-channel_mean)/255.
99 | feed_dict = {im1_pl: scaled_im0, im2_pl: scaled_im1}
100 | pred_flows_val = sess.run(pred_flows, feed_dict=feed_dict)
101 | pred_flow_val = pred_flows_val[-1][0]
102 |
103 | # Only for training set
104 | if 'train' in FLAGS.dataset_dir:
105 | # no occlusion
106 | #gt_flow, mask = get_flow(new_files[t][0].replace('colored_0', 'flow_noc'))
107 | # all
108 | gt_flow, mask = get_flow(new_files[t][0].replace('colored_0', 'flow_occ'))
109 | errs[t], scaled_pred = compute_flow_error(gt_flow, pred_flow_val[0,:,:,:], mask)
110 |
111 | # Save for eval
112 | if 'test' in FLAGS.dataset_dir:
113 | _, scaled_pred = compute_flow_error(np.array(raw_im0)[:,:,:2], pred_flow_val[0,:,:,:], np.array(raw_im0)[:,:,0])
114 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1])
115 | write_flow_png(png_name, scaled_pred)
116 |
117 | # Save for visual colormap
118 | if not 'test' in FLAGS.dataset_dir and not FLAGS.output_dir is None:
119 | flow_im = flow_to_image(scaled_pred)
120 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]).replace('png', 'jpg')
121 | cv2.imwrite(png_name, flow_im[:,:,::-1])
122 |
123 | print('{:>10}'.format('(valid) endpoint error'))
124 | print('{:10.4f}'.format(errs.mean()))
125 |
126 | if __name__ == '__main__':
127 | tf.app.run()
128 |
--------------------------------------------------------------------------------
/test_flownet_2015.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import cv2
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | import PIL.Image as pil
7 | import png
8 | import scipy
9 |
10 | from core import flow_to_image
11 | from core import flownet
12 |
13 | flags = tf.app.flags
14 | flags.DEFINE_integer("batch_size", 1, "The size of of a sample batch")
15 | flags.DEFINE_integer("img_height", 384, "Image height")
16 | flags.DEFINE_integer("img_width", 1280, "Image width")
17 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/flow2015/training/', "Dataset directory")
18 | flags.DEFINE_string("output_dir", None, "Output directory")
19 | flags.DEFINE_string("ckpt_file", 'pretrained/unflowc_pre', "checkpoint file")
20 | FLAGS = flags.FLAGS
21 |
22 | FLOW_SCALE = 5.0
23 |
24 | # kitti 2012 has 200 training pairs, 200 test pairs
25 | NUM = 200
26 |
27 | def get_flow(path):
28 | bgr = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
29 | invalid = bgr[:, :, 0] == 0
30 | out_flow = (bgr[:, :, 2:0:-1].astype('f4') - 2**15) / 64.
31 | out_flow[invalid] = 0
32 | return out_flow, bgr[:, :, 0]
33 |
34 | def compute_flow_error(gt_flow, pred_flow, mask):
35 | H, W, _ = gt_flow.shape
36 | old_H, old_W, _ = pred_flow.shape
37 | # Reshape predicted flow to have same size as ground truth
38 | pred0 = cv2.resize(pred_flow[:,:,0], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*W/old_W)
39 | pred1 = cv2.resize(pred_flow[:,:,1], (W, H), interpolation=cv2.INTER_LINEAR) * (1.0*H/old_H)
40 | pred = np.stack((pred0, pred1), axis=-1) * FLOW_SCALE
41 |
42 | err = np.sqrt(np.sum(np.square(gt_flow - pred), axis=-1))
43 | err_valid = np.sum(err * mask) / np.sum(mask)
44 |
45 | gt_mag = np.sqrt(np.sum(np.square(gt_flow), axis=-1))
46 | mask1 = err > 3.
47 | mask2 = err / (gt_mag+1e-12) > 0.05
48 | final_mask = np.logical_and(np.logical_and(mask1, mask2), mask)
49 | f1 = final_mask.sum()/mask.sum()
50 |
51 | return err_valid, pred, f1
52 |
53 | def write_flow_png(name, flow):
54 | H, W, _ = flow.shape
55 | out = np.ones((H, W, 3), dtype=np.uint64)
56 | out[:,:,1] = np.minimum(np.maximum(flow[:,:,1]*64.+2**15, 0), 2**16).astype(np.uint64)
57 | out[:,:,0] = np.minimum(np.maximum(flow[:,:,0]*64.+2**15, 0), 2**16).astype(np.uint64)
58 | with open(name, 'wb') as f:
59 | writer = png.Writer(width=W, height=H, bitdepth=16)
60 | im2list = out.reshape(-1, out.shape[1]*out.shape[2]).tolist()
61 | writer.write(f, im2list)
62 |
63 |
64 | def pick_frame(path):
65 | new_files = []
66 | for i in range(NUM):
67 | frame1 = os.path.join(path, 'image_2', '{:06d}'.format(i) + '_10.png')
68 | frame2 = os.path.join(path, 'image_2', '{:06d}'.format(i) + '_11.png')
69 | new_files.append([frame1, frame2])
70 | return new_files
71 |
72 | def main(_):
73 | new_files = pick_frame(FLAGS.dataset_dir)
74 | basename = os.path.basename(FLAGS.ckpt_file)
75 |
76 | im1_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
77 | im2_pl = tf.placeholder(dtype=tf.float32, shape=(1, FLAGS.img_height, FLAGS.img_width, 3))
78 | pred_flows = flownet(im1_pl, im2_pl, flownet_spec='C')
79 |
80 | saver = tf.train.Saver([var for var in tf.all_variables()])
81 | config = tf.ConfigProto()
82 | config.gpu_options.allow_growth = True
83 | errs = np.zeros(NUM)
84 | f1 = np.zeros(NUM)
85 |
86 | if not FLAGS.output_dir is None and not os.path.exists(FLAGS.output_dir):
87 | os.makedirs(FLAGS.output_dir)
88 |
89 | with tf.Session(config=config) as sess:
90 | saver.restore(sess, FLAGS.ckpt_file)
91 | # For val set
92 | for t in range(0, len(new_files)):
93 | if t % 100 == 0:
94 | print('processing %s: %d/%d' % (basename, t, len(new_files)))
95 | raw_im0 = pil.open(new_files[t][0])
96 | raw_im1 = pil.open(new_files[t][1])
97 | scaled_im0 = raw_im0.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
98 | scaled_im1 = raw_im1.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
99 | # Minus ImageNet channel mean
100 | channel_mean = np.array([104.920005, 110.1753, 114.785955])
101 | scaled_im0 = (np.expand_dims(np.array(scaled_im0), axis=0).astype(np.float32)-channel_mean)/255.
102 | scaled_im1 = (np.expand_dims(np.array(scaled_im1), axis=0).astype(np.float32)-channel_mean)/255.
103 | feed_dict = {im1_pl: scaled_im0, im2_pl: scaled_im1}
104 | pred_flows_val = sess.run(pred_flows, feed_dict=feed_dict)
105 | pred_flow_val = pred_flows_val[-1][0]
106 |
107 | # Only for training set
108 | if 'train' in FLAGS.dataset_dir:
109 | # no occlusion
110 | #gt_flow, mask = get_flow(new_files[t][0].replace('image_2', 'flow_noc'))
111 | # all
112 | gt_flow, mask = get_flow(new_files[t][0].replace('image_2', 'flow_occ'))
113 | errs[t], scaled_pred, f1[t] = compute_flow_error(gt_flow, pred_flow_val[0,:,:,:], mask)
114 |
115 | # Save for eval
116 | if 'test' in FLAGS.dataset_dir:
117 | _, scaled_pred, _ = compute_flow_error(np.array(raw_im0)[:,:,:2], pred_flow_val[0,:,:,:], np.array(raw_im0)[:,:,0])
118 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1])
119 | write_flow_png(png_name, scaled_pred)
120 |
121 | # Save for visual colormap
122 | if not 'test' in FLAGS.dataset_dir and not FLAGS.output_dir is None:
123 | flow_im = flow_to_image(scaled_pred)
124 | png_name = os.path.join(FLAGS.output_dir, new_files[t][0].split('/')[-1]).replace('png', 'jpg')
125 | cv2.imwrite(png_name, flow_im[:,:,::-1])
126 |
127 | print('{:>10}, {:>10}'.format('(valid) endpoint error', 'f1 score'))
128 | print('{:10.4f}, {:10.4f}'.format(errs.mean(), f1.mean()))
129 |
130 | if __name__ == '__main__':
131 | tf.app.run()
132 |
--------------------------------------------------------------------------------
/test_kitti_depth.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import tensorflow as tf
3 | import numpy as np
4 | import os
5 | import PIL.Image as pil
6 | from core import DFLearner
7 |
8 | flags = tf.app.flags
9 | flags.DEFINE_integer("batch_size", 4, "The size of of a sample batch")
10 | flags.DEFINE_integer("img_height", 160, "Image height")
11 | flags.DEFINE_integer("img_width", 576, "Image width")
12 | flags.DEFINE_string("dataset_dir", './dataset/KITTI/raw/data/', "Dataset directory")
13 | flags.DEFINE_string("split", 'val', 'val or test')
14 | flags.DEFINE_string("output_dir", None, "Output directory")
15 | flags.DEFINE_string("ckpt_file", None, "checkpoint file")
16 | FLAGS = flags.FLAGS
17 |
18 | def main(_):
19 | if FLAGS.split == 'val':
20 | txt_file = 'data/kitti/val_files_eigen.txt'
21 | elif FLAGS.split == 'test':
22 | txt_file = 'data/kitti/test_files_eigen.txt'
23 | else:
24 | assert False
25 |
26 | with open(txt_file, 'r') as f:
27 | test_files = f.readlines()
28 | test_files = [FLAGS.dataset_dir + t[:-1] for t in test_files]
29 | if not os.path.exists(FLAGS.output_dir):
30 | os.makedirs(FLAGS.output_dir)
31 | basename = os.path.basename(FLAGS.ckpt_file)
32 | output_file = os.path.join(FLAGS.output_dir, basename)
33 | model = DFLearner()
34 | model.setup_inference(img_height=FLAGS.img_height,
35 | img_width=FLAGS.img_width,
36 | batch_size=FLAGS.batch_size,
37 | mode='depth')
38 | saver = tf.train.Saver([var for var in tf.model_variables()])
39 | config = tf.ConfigProto()
40 | config.gpu_options.allow_growth = True
41 | with tf.Session(config=config) as sess:
42 | saver.restore(sess, FLAGS.ckpt_file)
43 | pred_all = []
44 | for t in range(0, len(test_files), FLAGS.batch_size):
45 | if t % 100 == 0:
46 | print('processing %s: %d/%d' % (basename, t, len(test_files)))
47 | inputs = np.zeros(
48 | (FLAGS.batch_size, FLAGS.img_height, FLAGS.img_width, 3),
49 | dtype=np.uint8)
50 | for b in range(FLAGS.batch_size):
51 | idx = t + b
52 | if idx >= len(test_files):
53 | break
54 | raw_im = pil.open(test_files[idx])
55 | scaled_im = raw_im.resize((FLAGS.img_width, FLAGS.img_height), pil.ANTIALIAS)
56 | inputs[b] = np.array(scaled_im)
57 | pred = model.inference(inputs, sess, mode='depth')
58 | for b in range(FLAGS.batch_size):
59 | idx = t + b
60 | if idx >= len(test_files):
61 | break
62 | pred_all.append(pred['depth'][b,:,:,0])
63 | np.save(output_file, pred_all)
64 |
65 | if __name__ == '__main__':
66 | tf.app.run()
67 |
--------------------------------------------------------------------------------
/train_df.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import tensorflow as tf
3 | import pprint
4 | import random
5 | import numpy as np
6 | from core import DFLearner
7 | import os
8 |
9 | flags = tf.app.flags
10 | flags.DEFINE_string("dataset_dir", "./dataset/kitti_5frame_1152_320/", "Dataset directory")
11 | flags.DEFINE_string("checkpoint_dir", "./ckpt/dfnet", "Directory name to save the checkpoints")
12 | flags.DEFINE_string("ckpt_flow", "pretrained/unflowc_pre", "checkpoint for Flow Net")
13 | flags.DEFINE_string("ckpt_dp", "pretrained/cs_5frame_pre", "checkpoint for Depth Net and Pose Net")
14 | flags.DEFINE_string("ckpt_pose", None, "checkpoint for Pose Net, if not shared with Depth Net")
15 | flags.DEFINE_float("learning_rate", 0.0001, "Learning rate of for adam")
16 | flags.DEFINE_float("beta1", 0.9, "Momentum term of adam")
17 | flags.DEFINE_float("smooth_weight", 3.0, "Weight for smoothness")
18 | flags.DEFINE_float("alpha_image_loss", 0.85, "Weight between SSIM and L1 in the image loss")
19 | flags.DEFINE_float("depth_consistency", 0.2, "Weight for forward-backward depth consistency loss.")
20 | flags.DEFINE_float("flow_smooth_weight", 3.0, "Weight for flow smoothness")
21 | flags.DEFINE_float("flow_consistency", 0.2, "Weight for forward-backward flow consistency loss.")
22 | flags.DEFINE_float("cross_consistency", 0.5, "Weight for cross-network consistency loss")
23 | flags.DEFINE_integer("batch_size", 4, "The size of of a sample batch, must divisible by number of GPUs!")
24 | flags.DEFINE_integer("num_gpus", 4, "Number of GPUs for training, starting from 0.")
25 | flags.DEFINE_integer("img_height", 320, "Image height")
26 | flags.DEFINE_integer("img_width", 1152, "Image width")
27 | flags.DEFINE_integer("seq_length", 5, "Sequence length for each example") # Fixed. Don't change
28 | flags.DEFINE_integer("max_steps", 100000, "Maximum number of training iterations")
29 | flags.DEFINE_integer("summary_freq", 100, "Logging every log_freq iterations")
30 | flags.DEFINE_integer("save_latest_freq", 5000, \
31 | "Save the latest model every save_latest_freq iterations (overwrites the previous latest model)")
32 | flags.DEFINE_boolean("continue_train", True, "Continue training from previous checkpoint")
33 | flags.DEFINE_boolean("scale_normalize", False, "Scale normalization for disparity.") # Set to True will break the training.
34 | flags.DEFINE_boolean("fix_pose", False, "Fix pose network")
35 | FLAGS = flags.FLAGS
36 |
37 | def main(_):
38 | seed = 8964
39 | tf.set_random_seed(seed)
40 | np.random.seed(seed)
41 | random.seed(seed)
42 |
43 | pp = pprint.PrettyPrinter()
44 | pp.pprint(flags.FLAGS.__flags)
45 |
46 | if not os.path.exists(FLAGS.checkpoint_dir):
47 | os.makedirs(FLAGS.checkpoint_dir)
48 |
49 | learner = DFLearner()
50 | learner.train(FLAGS)
51 |
52 | if __name__ == '__main__':
53 | tf.app.run()
54 |
--------------------------------------------------------------------------------