├── .gitignore ├── LICENSE ├── README.md ├── dense_estimation ├── __init__.py ├── app │ ├── experiment.py │ └── gui.py ├── data.py ├── datasets │ ├── __init__.py │ ├── image_utils.py │ ├── nyu_depth_v2.py │ └── util.py ├── densenet.py ├── distributions.py ├── logger.py ├── losses.py ├── output.py ├── resnet.py └── trainer.py ├── nyud_raw_train_to_npy.py ├── nyud_test_to_npy.py ├── preview_dataset.py ├── process_raw.m ├── test.py ├── test_laina.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | /datasets 2 | /log 3 | /checkpoints 4 | /downloads 5 | *.pyc 6 | __pycache__ 7 | octave-workspace 8 | /laina_models 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Simon Meister 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Monocular Depth Prediction 2 | 3 | This repository contains a unofficial PyTorch implementation of a monocular depth prediction model described in 4 | ["Deeper Depth Prediction with Fully Convolutional Residual Networks"](https://arxiv.org/abs/1606.00373) by [Iro Laina](http://campar.in.tum.de/Main/IroLaina) and others. 5 | For the official models, see the 6 | [FCRN-DepthPrediction](https://github.com/iro-cp/FCRN-DepthPrediction) repository. 7 | This implementation supports data pre-processing, training from scratch, and evaluation. The code currently only supports the NYU Depth v2 dataset, but it should be easy to add other datasets. 8 | 9 | Note that there is some code to support uncertainty (variance) prediction, however there are some dependencies missing from this repo and i didn't have time to document this. You don't need to worry about this code and can always leave the `--dist` argument set to `''` to use the code for standard depth prediction. 10 | 11 | ### TODO 12 | - upload evaluation performance numbers on NYU Depth 13 | - document test.py script 14 | 15 | ### License 16 | This project is licensed under the MIT License (refer to the LICENSE file for details). 17 | 18 | ## Setup (Python 3) 19 | 20 | ### Install prerequisites 21 | * install [pytorch](https://pytorch.org/) 22 | * install [tensorflow](https://www.tensorflow.org/) (for tensorboard visualization only - no gpu support required). The easiest way is to run `pip install tensorflow`. 23 | * install other python packages: `pip install scipy matplotlib h5py` 24 | * install matlab (the pre-processing script depends on the NYU Depth v2 matlab toolbox) 25 | 26 | ### Prepare datasets 27 | * `python nyud_test_to_npy.py` (modify the paths in that file to point to correct dirs) 28 | * download the NYU Depth v2 raw dataset (~400GB) and the toolbox from https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html. 29 | * generate training dataset with matlab - see process_raw.m 30 | * `python nyud_raw_train_to_npy.py` (modify the paths in that file to point to correct dirs) 31 | * modify raw_root in train.py and test.py to point to correct dir 32 | 33 | 34 | ## Usage examples 35 | 36 | ### Train and view results 37 | * `python train.py --ex my_test` 38 | * `tensorboard logdir=log/my_test` 39 | * open `localhost:6006` in a browser 40 | 41 | ### Continue training from checkpoint 42 | Checkpoints are stored after each epoch. 43 | 44 | * `python train.py --ex my_test --epochs 80 --lr 0.01` 45 | * `python train.py --ex my_test --epochs 50 --lr 0.003` 46 | 47 | ### View all training options 48 | * `python train.py --help` 49 | -------------------------------------------------------------------------------- /dense_estimation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonmeister/pytorch-mono-depth/713c70e2fdae6d9d6e0322febadfedcaee9470d3/dense_estimation/__init__.py -------------------------------------------------------------------------------- /dense_estimation/app/experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | 5 | def get_experiment(name, overwrite, epoch=None): 6 | log_dir = os.path.join('./log', name) 7 | save_dir = os.path.join('/media/data/depth-prediction/checkpoints', name) 8 | if overwrite: # or (os.path.isdir(log_dir) and not os.path.isdir(save_dir)): 9 | shutil.rmtree(log_dir, ignore_errors=True) 10 | shutil.rmtree(save_dir, ignore_errors=True) 11 | if not os.path.isdir(save_dir): 12 | os.makedirs(log_dir) 13 | os.makedirs(save_dir) 14 | save_paths = sorted(os.listdir(save_dir), 15 | key=lambda s: int(s.split('.')[0].split('_')[1])) 16 | if len(save_paths) > 0: 17 | save_path = save_paths[-1] if epoch is None else 'model_{}.pth'.format(epoch) 18 | restore_path = os.path.join(save_dir, save_path) 19 | starting_epoch = int(save_path.split('.')[0].split('_')[1]) + 1 20 | else: 21 | restore_path = None 22 | starting_epoch = 0 23 | return log_dir, save_dir, restore_path, starting_epoch 24 | -------------------------------------------------------------------------------- /dense_estimation/app/gui.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib 3 | import matplotlib.pyplot as plt 4 | from matplotlib.widgets import Slider, Button 5 | 6 | 7 | def display(results, image_names, title="Flow eval"): 8 | image_grids = [] 9 | num_images = len(results[0]) 10 | num_rows = len(results[0][0]) 11 | num_cols = len(results) 12 | 13 | image_grids = [] 14 | for i in range(num_images): 15 | image_grid = [] 16 | for image_lists in results: 17 | image_grid.append(image_lists[i]) 18 | image_grids.append(image_grid) 19 | 20 | fig = plt.figure(facecolor='grey') 21 | fig.suptitle(title) 22 | mng = plt.get_current_fig_manager() 23 | mng.resize(*mng.window.maxsize()) 24 | imshow_images = [] 25 | plt.subplots_adjust(wspace=0, hspace=0.0) 26 | 27 | imshow_image_lists = [] 28 | for j, image_col in enumerate(image_grids[0]): 29 | imshow_images = [] 30 | for i, t in enumerate(zip(image_names, image_col)): 31 | title, image = t 32 | ax = fig.add_subplot(num_rows, num_cols, i * num_cols + j + 1) 33 | if j == 0: 34 | ax.set_ylabel(title) 35 | ax.set_yticks([]) 36 | ax.set_xticks([]) 37 | if np.size(image, 3) == 1: 38 | imshow_images.append(ax.imshow(image[0, :, :, 0], "gray")) 39 | else: 40 | imshow_images.append(ax.imshow(image[0, :, :, :])) 41 | imshow_image_lists.append(imshow_images) 42 | 43 | def display_example(index): 44 | for j, image_col in enumerate(image_grids[int(index)]): 45 | imshow_images = imshow_image_lists[j] 46 | for im, image in zip(imshow_images, image_col): 47 | if np.size(image, 3) == 1: 48 | im.set_data(image[0, :, :, 0]) 49 | else: 50 | im.set_data(image[0, :, :, :]) 51 | plt.draw() 52 | 53 | current_index = 0 54 | 55 | next_button_ax = fig.add_axes([0.8, 0.025, 0.1, 0.04]) 56 | next_button = Button(next_button_ax, 'next') 57 | prev_button_ax = fig.add_axes([0.7, 0.025, 0.1, 0.04]) 58 | prev_button = Button(prev_button_ax, 'previous') 59 | slider_ax = fig.add_axes([0.1, 0.025, 0.55, 0.04]) 60 | slider = Slider(slider_ax, 'Page', 0, num_images - 1, 61 | valinit=1, valfmt='%0.0f') 62 | 63 | def next_button_on_clicked(mouse_event): 64 | nonlocal current_index 65 | if current_index < num_images - 1: 66 | current_index += 1 67 | slider.set_val(current_index) 68 | 69 | def prev_button_on_clicked(mouse_event): 70 | nonlocal current_index 71 | if current_index > 0: 72 | current_index -= 1 73 | slider.set_val(current_index) 74 | 75 | def sliders_on_changed(val): 76 | nonlocal current_index 77 | current_index = val 78 | display_example(val) 79 | 80 | slider.on_changed(sliders_on_changed) 81 | prev_button.on_clicked(prev_button_on_clicked) 82 | next_button.on_clicked(next_button_on_clicked) 83 | 84 | plt.draw() 85 | plt.show() 86 | -------------------------------------------------------------------------------- /dense_estimation/data.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | 4 | def get_training_loader(dset_class, root, batch_size, out_size, 5 | num_threads=1, limit=None, debug=False, shuffle=True): 6 | dset = dset_class(root, split='train', transform=dset_class.get_transform(True, size=out_size), 7 | limit=limit, debug=debug) 8 | return DataLoader(dset, shuffle=shuffle, batch_size=batch_size, pin_memory=True, 9 | num_workers=num_threads) 10 | 11 | 12 | def get_testing_loader(dset_class, root, batch_size, out_size, 13 | num_threads=1, limit=None, debug=False, training=False, shuffle=False): 14 | dset = dset_class(root, split='test', transform=dset_class.get_transform(training, out_size), 15 | limit=limit, debug=debug) 16 | return DataLoader(dset, shuffle=shuffle, batch_size=batch_size, num_workers=num_threads) 17 | -------------------------------------------------------------------------------- /dense_estimation/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonmeister/pytorch-mono-depth/713c70e2fdae6d9d6e0322febadfedcaee9470d3/dense_estimation/datasets/__init__.py -------------------------------------------------------------------------------- /dense_estimation/datasets/image_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import scipy.ndimage 4 | from scipy.ndimage.filters import gaussian_filter 5 | from scipy.ndimage.interpolation import map_coordinates 6 | import collections 7 | from PIL import Image 8 | import numbers 9 | 10 | __author__ = "Wei OUYANG" 11 | __license__ = "GPL" 12 | __version__ = "0.1.0" 13 | __status__ = "Development" 14 | 15 | 16 | def center_crop(x, center_crop_size): 17 | assert x.ndim == 3 18 | centerw, centerh = x.shape[1] // 2, x.shape[2] // 2 19 | halfw, halfh = center_crop_size[0] // 2, center_crop_size[1] // 2 20 | return x[:, centerw - halfw:centerw + halfw, centerh - halfh:centerh + halfh] 21 | 22 | 23 | def to_tensor(x): 24 | import torch 25 | x = x.transpose((2, 0, 1)) 26 | return torch.from_numpy(x).float() 27 | 28 | 29 | def random_num_generator(config, random_state=np.random): 30 | if config[0] == 'uniform': 31 | ret = random_state.uniform(config[1], config[2], 1)[0] 32 | elif config[0] == 'lognormal': 33 | ret = random_state.lognormal(config[1], config[2], 1)[0] 34 | else: 35 | print(config) 36 | raise Exception('unsupported format') 37 | return ret 38 | 39 | 40 | def poisson_downsampling(image, peak, random_state=np.random): 41 | if not isinstance(image, np.ndarray): 42 | imgArr = np.array(image, dtype='float32') 43 | else: 44 | imgArr = image.astype('float32') 45 | Q = imgArr.max(axis=(0, 1)) / peak 46 | if Q[0] == 0: 47 | return imgArr 48 | ima_lambda = imgArr / Q 49 | noisy_img = random_state.poisson(lam=ima_lambda) 50 | return noisy_img.astype('float32') 51 | 52 | 53 | def elastic_transform(image, alpha=1000, sigma=30, spline_order=1, mode='nearest', random_state=np.random): 54 | """Elastic deformation of image as described in [Simard2003]_. 55 | .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for 56 | Convolutional Neural Networks applied to Visual Document Analysis", in 57 | Proc. of the International Conference on Document Analysis and 58 | Recognition, 2003. 59 | """ 60 | assert image.ndim == 3 61 | shape = image.shape[:2] 62 | 63 | dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), 64 | sigma, mode="constant", cval=0) * alpha 65 | dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), 66 | sigma, mode="constant", cval=0) * alpha 67 | 68 | x, y = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') 69 | indices = [np.reshape(x + dx, (-1, 1)), np.reshape(y + dy, (-1, 1))] 70 | result = np.empty_like(image) 71 | for i in range(image.shape[2]): 72 | result[:, :, i] = map_coordinates( 73 | image[:, :, i], indices, order=spline_order, mode=mode).reshape(shape) 74 | return result 75 | 76 | 77 | class Merge(object): 78 | """Merge a group of images 79 | """ 80 | 81 | def __init__(self, axis=-1): 82 | self.axis = axis 83 | 84 | def __call__(self, images): 85 | if isinstance(images, collections.Sequence) or isinstance(images, np.ndarray): 86 | assert all([isinstance(i, np.ndarray) 87 | for i in images]), 'only numpy array is supported' 88 | shapes = [list(i.shape) for i in images] 89 | for s in shapes: 90 | s[self.axis] = None 91 | assert all([s == shapes[0] for s in shapes] 92 | ), 'shapes must be the same except the merge axis' 93 | return np.concatenate(images, axis=self.axis) 94 | else: 95 | raise Exception("obj is not a sequence (list, tuple, etc)") 96 | 97 | 98 | class Split(object): 99 | """Split images into individual arraies 100 | """ 101 | 102 | def __init__(self, *slices, **kwargs): 103 | assert isinstance(slices, collections.Sequence) 104 | slices_ = [] 105 | for s in slices: 106 | if isinstance(s, collections.Sequence): 107 | slices_.append(slice(*s)) 108 | else: 109 | slices_.append(s) 110 | assert all([isinstance(s, slice) for s in slices_] 111 | ), 'slices must be consist of slice instances' 112 | self.slices = slices_ 113 | self.axis = kwargs.get('axis', -1) 114 | 115 | def __call__(self, image): 116 | if isinstance(image, np.ndarray): 117 | ret = [] 118 | for s in self.slices: 119 | sl = [slice(None)] * image.ndim 120 | sl[self.axis] = s 121 | ret.append(image[sl]) 122 | return ret 123 | else: 124 | raise Exception("obj is not an numpy array") 125 | 126 | 127 | class ElasticTransform(object): 128 | """Apply elastic transformation on a numpy.ndarray (H x W x C) 129 | """ 130 | 131 | def __init__(self, alpha, sigma): 132 | self.alpha = alpha 133 | self.sigma = sigma 134 | 135 | def __call__(self, image): 136 | if isinstance(self.alpha, collections.Sequence): 137 | alpha = random_num_generator(self.alpha) 138 | else: 139 | alpha = self.alpha 140 | if isinstance(self.sigma, collections.Sequence): 141 | sigma = random_num_generator(self.sigma) 142 | else: 143 | sigma = self.sigma 144 | return elastic_transform(image, alpha=alpha, sigma=sigma) 145 | 146 | 147 | class PoissonSubsampling(object): 148 | """Poisson subsampling on a numpy.ndarray (H x W x C) 149 | """ 150 | 151 | def __init__(self, peak, random_state=np.random): 152 | self.peak = peak 153 | self.random_state = random_state 154 | 155 | def __call__(self, image): 156 | if isinstance(self.peak, collections.Sequence): 157 | peak = random_num_generator( 158 | self.peak, random_state=self.random_state) 159 | else: 160 | peak = self.peak 161 | return poisson_downsampling(image, peak, random_state=self.random_state) 162 | 163 | 164 | class AddGaussianNoise(object): 165 | """Add gaussian noise to a numpy.ndarray (H x W x C) 166 | """ 167 | 168 | def __init__(self, mean, sigma, random_state=np.random): 169 | self.sigma = sigma 170 | self.mean = mean 171 | self.random_state = random_state 172 | 173 | def __call__(self, image): 174 | if isinstance(self.sigma, collections.Sequence): 175 | sigma = random_num_generator( 176 | self.sigma, random_state=self.random_state) 177 | else: 178 | sigma = self.sigma 179 | if isinstance(self.mean, collections.Sequence, random_state=self.random_state): 180 | mean = random_num_generator(self.mean) 181 | else: 182 | mean = self.mean 183 | row, col, ch = image.shape 184 | gauss = self.random_state.normal(mean, sigma, (row, col, ch)) 185 | gauss = gauss.reshape(row, col, ch) 186 | image += gauss 187 | return image 188 | 189 | 190 | class AddSpeckleNoise(object): 191 | """Add speckle noise to a numpy.ndarray (H x W x C) 192 | """ 193 | 194 | def __init__(self, mean, sigma, random_state=np.random): 195 | self.sigma = sigma 196 | self.mean = mean 197 | self.random_state = random_state 198 | 199 | def __call__(self, image): 200 | if isinstance(self.sigma, collections.Sequence): 201 | sigma = random_num_generator( 202 | self.sigma, random_state=self.random_state) 203 | else: 204 | sigma = self.sigma 205 | if isinstance(self.mean, collections.Sequence): 206 | mean = random_num_generator( 207 | self.mean, random_state=self.random_state) 208 | else: 209 | mean = self.mean 210 | row, col, ch = image.shape 211 | gauss = self.random_state.normal(mean, sigma, (row, col, ch)) 212 | gauss = gauss.reshape(row, col, ch) 213 | image += image * gauss 214 | return image 215 | 216 | 217 | class GaussianBlurring(object): 218 | """Apply gaussian blur to a numpy.ndarray (H x W x C) 219 | """ 220 | 221 | def __init__(self, sigma, random_state=np.random): 222 | self.sigma = sigma 223 | self.random_state = random_state 224 | 225 | def __call__(self, image): 226 | if isinstance(self.sigma, collections.Sequence): 227 | sigma = random_num_generator( 228 | self.sigma, random_state=self.random_state) 229 | else: 230 | sigma = self.sigma 231 | image = gaussian_filter(image, sigma=(sigma, sigma, 0)) 232 | return image 233 | 234 | 235 | class AddGaussianPoissonNoise(object): 236 | """Add poisson noise with gaussian blurred image to a numpy.ndarray (H x W x C) 237 | """ 238 | 239 | def __init__(self, sigma, peak, random_state=np.random): 240 | self.sigma = sigma 241 | self.peak = peak 242 | self.random_state = random_state 243 | 244 | def __call__(self, image): 245 | if isinstance(self.sigma, collections.Sequence): 246 | sigma = random_num_generator( 247 | self.sigma, random_state=self.random_state) 248 | else: 249 | sigma = self.sigma 250 | if isinstance(self.peak, collections.Sequence): 251 | peak = random_num_generator( 252 | self.peak, random_state=self.random_state) 253 | else: 254 | peak = self.peak 255 | bg = gaussian_filter(image, sigma=(sigma, sigma, 0)) 256 | bg = poisson_downsampling( 257 | bg, peak=peak, random_state=self.random_state) 258 | return image + bg 259 | 260 | 261 | class MaxScaleNumpy(object): 262 | """scale with max and min of each channel of the numpy array i.e. 263 | channel = (channel - mean) / std 264 | """ 265 | 266 | def __init__(self, range_min=0.0, range_max=1.0): 267 | self.scale = (range_min, range_max) 268 | 269 | def __call__(self, image): 270 | mn = image.min(axis=(0, 1)) 271 | mx = image.max(axis=(0, 1)) 272 | return self.scale[0] + (image - mn) * (self.scale[1] - self.scale[0]) / (mx - mn) 273 | 274 | 275 | class MedianScaleNumpy(object): 276 | """Scale with median and mean of each channel of the numpy array i.e. 277 | channel = (channel - mean) / std 278 | """ 279 | 280 | def __init__(self, range_min=0.0, range_max=1.0): 281 | self.scale = (range_min, range_max) 282 | 283 | def __call__(self, image): 284 | mn = image.min(axis=(0, 1)) 285 | md = np.median(image, axis=(0, 1)) 286 | return self.scale[0] + (image - mn) * (self.scale[1] - self.scale[0]) / (md - mn) 287 | 288 | 289 | class NormalizeNumpy(object): 290 | """Normalize each channel of the numpy array i.e. 291 | channel = (channel - mean) / std 292 | """ 293 | 294 | def __call__(self, image): 295 | image -= image.mean(axis=(0, 1)) 296 | s = image.std(axis=(0, 1)) 297 | s[s == 0] = 1.0 298 | image /= s 299 | return image 300 | 301 | 302 | class MutualExclude(object): 303 | """Remove elements from one channel 304 | """ 305 | 306 | def __init__(self, exclude_channel, from_channel): 307 | self.from_channel = from_channel 308 | self.exclude_channel = exclude_channel 309 | 310 | def __call__(self, image): 311 | mask = image[:, :, self.exclude_channel] > 0 312 | image[:, :, self.from_channel][mask] = 0 313 | return image 314 | 315 | 316 | class RandomCropNumpy(object): 317 | """Crops the given numpy array at a random location to have a region of 318 | the given size. size can be a tuple (target_height, target_width) 319 | or an integer, in which case the target will be of a square shape (size, size) 320 | """ 321 | 322 | def __init__(self, size, random_state=np.random): 323 | if isinstance(size, numbers.Number): 324 | self.size = (int(size), int(size)) 325 | else: 326 | self.size = size 327 | self.random_state = random_state 328 | 329 | def __call__(self, img): 330 | w, h = img.shape[:2] 331 | th, tw = self.size 332 | if w == tw and h == th: 333 | return img 334 | elif w == tw: 335 | x1 = 0 336 | y1 = self.random_state.randint(0, h - th) 337 | elif h == th: 338 | x1 = self.random_state.randint(0, w - tw) 339 | y1 = 0 340 | else: 341 | x1 = self.random_state.randint(0, w - tw) 342 | y1 = self.random_state.randint(0, h - th) 343 | 344 | return img[x1:x1 + tw, y1: y1 + th, :] 345 | 346 | 347 | class CenterCropNumpy(object): 348 | """Crops the given numpy array at the center to have a region of 349 | the given size. size can be a tuple (target_height, target_width) 350 | or an integer, in which case the target will be of a square shape (size, size) 351 | """ 352 | 353 | def __init__(self, size): 354 | if isinstance(size, numbers.Number): 355 | self.size = (int(size), int(size)) 356 | else: 357 | self.size = size 358 | 359 | def __call__(self, img): 360 | w, h = img.shape[:2] 361 | th, tw = self.size 362 | x1 = int(round((w - tw) / 2.)) 363 | y1 = int(round((h - th) / 2.)) 364 | return img[x1:x1 + tw, y1: y1 + th, :] 365 | 366 | 367 | class RandomRotate(object): 368 | """Rotate a PIL.Image or numpy.ndarray (H x W x C) randomly 369 | """ 370 | 371 | def __init__(self, angle_range=(0.0, 360.0), axes=(0, 1), mode='reflect', random_state=np.random): 372 | assert isinstance(angle_range, tuple) 373 | self.angle_range = angle_range 374 | self.random_state = random_state 375 | self.axes = axes 376 | self.mode = mode 377 | 378 | def __call__(self, image): 379 | angle = self.random_state.uniform( 380 | self.angle_range[0], self.angle_range[1]) 381 | if isinstance(image, np.ndarray): 382 | mi, ma = image.min(), image.max() 383 | image = scipy.ndimage.interpolation.rotate( 384 | image, angle, reshape=False, axes=self.axes, mode=self.mode) 385 | return np.clip(image, mi, ma) 386 | elif isinstance(image, Image.Image): 387 | return image.rotate(angle) 388 | else: 389 | raise Exception('unsupported type') 390 | 391 | 392 | class RandomFlipHorizontal(object): 393 | """Flip a numpy.ndarray (H x W x C) horizontally with probability 0.5 394 | """ 395 | 396 | def __init__(self, random_state=np.random): 397 | self.random_state = random_state 398 | 399 | def __call__(self, image): 400 | val = self.random_state.uniform() 401 | if isinstance(image, np.ndarray): 402 | if val > 0.5: 403 | return image[:, ::-1, :] 404 | return image 405 | else: 406 | raise Exception('unsupported type') 407 | 408 | 409 | class RandomColor(object): 410 | """Multiply numpy.ndarray (H x W x C) globally 411 | """ 412 | 413 | def __init__(self, multiplier_range=(0.8, 1.2), random_state=np.random): 414 | assert isinstance(multiplier_range, tuple) 415 | self.multiplier_range = multiplier_range 416 | self.random_state = random_state 417 | 418 | def __call__(self, image): 419 | mult = self.random_state.uniform(self.multiplier_range[0], 420 | self.multiplier_range[1]) 421 | if isinstance(image, np.ndarray): 422 | return np.clip(image * mult, 0, 255) 423 | else: 424 | raise Exception('unsupported type') 425 | 426 | 427 | class BilinearResize(object): 428 | """Resize a PIL.Image or numpy.ndarray (H x W x C) 429 | """ 430 | 431 | def __init__(self, zoom): 432 | self.zoom = [zoom, zoom, 1] 433 | 434 | def __call__(self, image): 435 | if isinstance(image, np.ndarray): 436 | return scipy.ndimage.interpolation.zoom(image, self.zoom, order=1) 437 | elif isinstance(image, Image.Image): 438 | return image.resize(self.size, Image.BILINEAR) 439 | else: 440 | raise Exception('unsupported type') 441 | 442 | 443 | class EnhancedCompose(object): 444 | """Composes several transforms together. 445 | Args: 446 | transforms (List[Transform]): list of transforms to compose. 447 | Example: 448 | >>> transforms.Compose([ 449 | >>> transforms.CenterCrop(10), 450 | >>> transforms.ToTensor(), 451 | >>> ]) 452 | """ 453 | 454 | def __init__(self, transforms): 455 | self.transforms = transforms 456 | 457 | def __call__(self, img): 458 | for t in self.transforms: 459 | if isinstance(t, collections.Sequence): 460 | assert isinstance(img, collections.Sequence) and len(img) == len( 461 | t), "size of image group and transform group does not fit" 462 | tmp_ = [] 463 | for i, im_ in enumerate(img): 464 | if callable(t[i]): 465 | tmp_.append(t[i](im_)) 466 | else: 467 | tmp_.append(im_) 468 | img = tmp_ 469 | elif callable(t): 470 | img = t(img) 471 | elif t is None: 472 | continue 473 | else: 474 | raise Exception('unexpected type') 475 | return img 476 | 477 | 478 | if __name__ == '__main__': 479 | from torchvision.transforms import Lambda 480 | input_channel = 3 481 | target_channel = 3 482 | 483 | # define a transform pipeline 484 | transform = EnhancedCompose([ 485 | Merge(), 486 | RandomCropNumpy(size=(512, 512)), 487 | RandomRotate(), 488 | Split([0, input_channel], [input_channel, input_channel+target_channel]), 489 | [CenterCropNumpy(size=(256, 256)), CenterCropNumpy(size=(256, 256))], 490 | [NormalizeNumpy(), MaxScaleNumpy(0, 1.0)], 491 | # for non-pytorch usage, remove to_tensor conversion 492 | [Lambda(to_tensor), Lambda(to_tensor)] 493 | ]) 494 | # read input data for test 495 | image_in = np.array(Image.open('input.jpg')) 496 | image_target = np.array(Image.open('target.jpg')) 497 | 498 | # apply the transform 499 | x, y = transform([image_in, image_target]) 500 | -------------------------------------------------------------------------------- /dense_estimation/datasets/nyu_depth_v2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from skimage.transform import warp, AffineTransform 4 | 5 | import torch 6 | import torch.utils.data as data 7 | import torchvision.utils 8 | from torchvision.transforms import Lambda, Normalize, ToTensor 9 | 10 | from .util import transform_chw 11 | from .image_utils import (EnhancedCompose, Merge, RandomCropNumpy, Split, to_tensor, 12 | BilinearResize, CenterCropNumpy, RandomRotate, AddGaussianNoise, 13 | RandomFlipHorizontal, RandomColor) 14 | 15 | NYUD_MEAN = [0.48056951, 0.41091299, 0.39225179] 16 | NYUD_STD = [0.28918225, 0.29590312, 0.3093034] 17 | 18 | 19 | class NYU_Depth_V2(data.Dataset): 20 | def __init__(self, root, split='test', transform=None, limit=None, debug=False): 21 | self.root = root 22 | self.split = split 23 | self.transform = transform 24 | self.limit = limit 25 | self.debug = debug 26 | 27 | if debug: 28 | self.images = np.random.rand(20, 3, 240, 320) * 255 29 | self.depths = np.random.rand(20, 1, 240, 320) * 10 30 | elif split == 'test': 31 | folder = os.path.join(root, 'nyu_depth_v2', 'labeled', 'npy') 32 | self.images = np.load(os.path.join(folder, 'images.npy')) 33 | self.depths = np.load(os.path.join(folder, 'depths.npy')) 34 | else: 35 | folder = os.path.join(root, 'nyu_depth_v2', 'npy') 36 | self.file_paths = [os.path.join(folder, n) for n in sorted(os.listdir(folder))] 37 | 38 | def __len__(self): 39 | if hasattr(self, 'images'): 40 | length = len(self.images) 41 | else: 42 | length = len(self.file_paths) 43 | if self.limit is not None: 44 | length = np.minimum(self.limit, length) 45 | return length 46 | 47 | def __getitem__(self, index): 48 | if self.split == 'test' or self.debug: 49 | image = self.images[index] 50 | depth = self.depths[index] 51 | else: 52 | stacked = np.load(self.file_paths[index]) 53 | image = stacked[0:3] 54 | depth = stacked[3:5] 55 | 56 | if self.transform is not None: 57 | image, depth = transform_chw(self.transform, [image, depth]) 58 | 59 | return image, depth 60 | 61 | def compute_image_mean(self): 62 | return np.mean(self.images / 255, axis=(0, 2, 3)) 63 | 64 | def compute_image_std(self): 65 | return np.std(self.images / 255, axis=(0, 2, 3)) 66 | 67 | @staticmethod 68 | def get_transform(training=True, size=(256,192), normalize=True): 69 | if training: 70 | transforms = [ 71 | Merge(), 72 | RandomFlipHorizontal(), 73 | RandomRotate(angle_range=(-5, 5), mode='constant'), 74 | RandomCropNumpy(size=size), 75 | RandomAffineZoom(scale_range=(1.0, 1.5)), 76 | Split([0, 3], [3, 5]), # 77 | # Note: ToTensor maps from [0, 255] to [0, 1] while to_tensor does not 78 | [RandomColor(multiplier_range=(0.8, 1.2)), None], 79 | ] 80 | else: 81 | transforms = [ 82 | [BilinearResize(0.5), None], 83 | ] 84 | 85 | transforms.extend([ 86 | # Note: ToTensor maps from [0, 255] to [0, 1] while to_tensor does not 87 | [ToTensor(), Lambda(to_tensor)], 88 | [Normalize(mean=NYUD_MEAN, std=NYUD_STD), None] if normalize else None 89 | ]) 90 | 91 | return EnhancedCompose(transforms) 92 | 93 | 94 | class RandomAffineZoom(): 95 | def __init__(self, scale_range=(1.0, 1.5), random_state=np.random): 96 | assert isinstance(scale_range, tuple) 97 | self.scale_range = scale_range 98 | self.random_state = random_state 99 | 100 | def __call__(self, image): 101 | scale = self.random_state.uniform(self.scale_range[0], 102 | self.scale_range[1]) 103 | if isinstance(image, np.ndarray): 104 | af = AffineTransform(scale=(scale, scale)) 105 | image = warp(image, af.inverse) 106 | rgb = image[:, :, 0:3] 107 | depth = image[:, :, 3:4] / scale 108 | mask = image[:, :, 4:5] 109 | return np.concatenate([rgb, depth, mask], axis=2) 110 | else: 111 | raise Exception('unsupported type') 112 | -------------------------------------------------------------------------------- /dense_estimation/datasets/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | import sys 4 | from urllib.request import FancyURLopener 5 | 6 | 7 | def download(url, destination, tmp_dir='/tmp'): 8 | def _progress(count, block_size, total_size): 9 | sys.stdout.write('\rDownloading %s %.1f%%' % (url, 10 | float(count * block_size) / float(total_size) * 100.0)) 11 | sys.stdout.flush() 12 | urlretrieve = FancyURLopener().retrieve 13 | if url.endswith('.zip'): 14 | local_zip_path = os.path.join(tmp_dir, 'datasets_download.zip') 15 | urlretrieve(url, local_zip_path, _progress) 16 | with zipfile.ZipFile(local_zip_path, "r") as zip_ref: 17 | zip_ref.extractall(extract_to) 18 | os.remove(local_zip_path) 19 | else: 20 | urlretrieve(url, destination, _progress) 21 | 22 | 23 | def maybe_download(url, destination): 24 | if not os.path.isfile(destination): 25 | download(url, destination) 26 | 27 | 28 | def transform_chw(transform, lst): 29 | """Convert each array in lst from CHW to HWC""" 30 | return transform([x.transpose((1, 2, 0)) for x in lst]) 31 | -------------------------------------------------------------------------------- /dense_estimation/densenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init 4 | import torch.nn.functional as F 5 | import math 6 | 7 | 8 | class _BN_ReLU_Conv(nn.Sequential): 9 | def __init__(self, num_input_features, num_output_features, dropout_p, 10 | kernel_size=3, stride=1, padding=1): 11 | super().__init__() 12 | self.add_module('bn', nn.BatchNorm2d(num_input_features)), 13 | self.add_module('relu', nn.ReLU(inplace=True)), 14 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, 15 | kernel_size=kernel_size, stride=stride, padding=padding)) 16 | if dropout_p > 0: 17 | self.add_module('dropout', nn.Dropout(dropout_p)) 18 | 19 | 20 | class _TransitionDown(nn.Sequential): 21 | def __init__(self, num_features, dropout_p): 22 | super().__init__() 23 | self.add_module('bn_relu_conv', _BN_ReLU_Conv(num_features, num_features, dropout_p, 24 | kernel_size=1, padding=0)) 25 | self.add_module('pool', nn.MaxPool2d(kernel_size=2)) 26 | 27 | 28 | class _TransitionUp(nn.Module): 29 | def __init__(self, num_features): 30 | super().__init__() 31 | self.deconv = nn.ConvTranspose2d(num_features, num_features, 32 | kernel_size=3, stride=2, padding=1) 33 | 34 | def forward(self, x, skip): 35 | self.deconv.padding = ( 36 | ((x.size(2) - 1) * self.deconv.stride[0] - skip.size(2) 37 | + self.deconv.kernel_size[0] + 1) // 2, 38 | ((x.size(3) - 1) * self.deconv.stride[1] - skip.size(3) 39 | + self.deconv.kernel_size[1] + 1) // 2) 40 | up = self.deconv(x, output_size=skip.size()) 41 | return torch.cat([up, skip], 1) 42 | 43 | 44 | class _DenseLayer(nn.Sequential): 45 | def __init__(self, num_input_features, num_output_features, dropout_p): 46 | super().__init__() 47 | self.add_module('bn_relu_conv', _BN_ReLU_Conv(num_input_features, 48 | num_output_features, dropout_p)) 49 | 50 | def forward(self, x): 51 | new_features = super().forward(x) 52 | return torch.cat([x, new_features], 1) 53 | 54 | 55 | class _DenseBlock(nn.Sequential): 56 | def __init__(self, num_layers, num_input_features, growth_rate, dropout_p): 57 | super().__init__() 58 | for i in range(num_layers): 59 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, 60 | dropout_p) 61 | self.add_module('denselayer{}'.format(i + 1), layer) 62 | 63 | 64 | class SoftmaxLayer(nn.Module): 65 | def __init__(self): 66 | super().__init__() 67 | 68 | def forward(self, x): 69 | b, c, h, w = x.size(3) 70 | 71 | x = x.transpose(1, 3) 72 | x = x.view(-1, c) 73 | x = F.softmax(x) 74 | x = t.view(b, w, h, c) 75 | return x.transpose(1, 3) 76 | 77 | 78 | class DenseNet(nn.Module): 79 | """Fully Convolutional DenseNet described in 80 | Args: 81 | num_input_features (int) - number of module input features 82 | num_output_features (int) - number of module output features 83 | growth_rate (int) - how many filters to add each layer (`k` in paper) 84 | num_transitions (int) - number of transition down = number of transition up 85 | block_config (int or list of size 2 * num_transitions + 1) - how many layers in each block 86 | num_init_features (int) - number of filters to learn in the first convolution layer 87 | dropout_p (float) - dropout rate after each dense layer 88 | """ 89 | def __init__(self, num_input_features=3, num_output_features=1, 90 | growth_rate=16, num_transitions=5, 91 | block_config=[4, 5, 7, 10, 12, 15, 12, 10, 7, 5, 4], 92 | num_init_features=48, dropout_p=0.2): 93 | 94 | block_config_size = 2 * num_transitions + 1 95 | if isinstance(block_config, list): 96 | assert len(block_config) == block_config_size 97 | else: 98 | block_config = [block_config] * block_config_size 99 | 100 | super().__init__() 101 | 102 | self.block_config = block_config 103 | self.growth_rate = growth_rate 104 | self.num_transitions = num_transitions 105 | 106 | self.downsampling_blocks = nn.ModuleList() 107 | self.downsampling_transitions = nn.ModuleList() 108 | self.upsampling_blocks = nn.ModuleList() 109 | self.upsampling_transitions = nn.ModuleList() 110 | self.relu = torch.nn.ReLU() 111 | 112 | num_features = num_init_features 113 | num_features_skip = [] 114 | 115 | self.init_conv = nn.Conv2d(num_input_features, num_features, kernel_size=3, padding=1) 116 | 117 | for i in range(num_transitions): 118 | num_layers = block_config[i] 119 | dense = _DenseBlock(num_layers, num_features, growth_rate, dropout_p) 120 | num_features += growth_rate * num_layers 121 | transition = _TransitionDown(num_features, dropout_p) 122 | self.downsampling_blocks.append(dense) 123 | self.downsampling_transitions.append(transition) 124 | num_features_skip.append(num_features) 125 | 126 | num_layers = block_config[num_transitions] 127 | self.bottleneck_block = _DenseBlock(num_layers, num_features, growth_rate, dropout_p) 128 | num_features_skip = num_features_skip[::-1] 129 | 130 | for i in range(num_transitions): 131 | num_features_last_block = growth_rate * block_config[num_transitions + i] 132 | transition = _TransitionUp(num_features_last_block) 133 | num_features = num_features_last_block + num_features_skip[i] 134 | 135 | num_layers = block_config[num_transitions + 1 + i] 136 | dense = _DenseBlock(num_layers, num_features, growth_rate, dropout_p) 137 | self.upsampling_blocks.append(dense) 138 | self.upsampling_transitions.append(transition) 139 | num_features += growth_rate * num_layers 140 | 141 | self.output_conv = nn.Conv2d(num_features, num_output_features, kernel_size=1) 142 | 143 | for m in self.modules(): 144 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 145 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 146 | m.weight.data.normal_(0, math.sqrt(2. / n)) 147 | if m.bias is not None: 148 | m.bias.data.zero_() 149 | elif isinstance(m, nn.BatchNorm2d): 150 | m.weight.data.fill_(1) 151 | m.bias.data.zero_() 152 | 153 | # We have to initialize the output weights to keep predicted variance low, as it will be 154 | # exponentiated 155 | # self.output_conv.weight.data.normal_(0, 0.01) 156 | 157 | # for m in self.modules(): 158 | # if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 159 | # nn.init.kaiming_uniform(m.weight.data) 160 | # m.bias.data.zero_() 161 | # elif isinstance(m, nn.BatchNorm2d): 162 | # m.weight.data.fill_(1) 163 | # m.bias.data.zero_() 164 | 165 | def forward(self, x): 166 | features = self.init_conv(x) 167 | skip_features = [] 168 | 169 | for dense, transition in zip(self.downsampling_blocks, self.downsampling_transitions): 170 | block_features = dense(features) 171 | skip_features.append(block_features) 172 | features = transition(block_features) 173 | 174 | features = self.bottleneck_block(features) 175 | for i, block in enumerate(zip(self.upsampling_transitions, self.upsampling_blocks)): 176 | skip = skip_features.pop() 177 | num_features_last_block = self.growth_rate * self.block_config[self.num_transitions + i] 178 | transition, dense = block 179 | up_features = transition(features[:, -num_features_last_block:], skip) 180 | features = dense(up_features) 181 | 182 | return self.output_conv(features) 183 | -------------------------------------------------------------------------------- /dense_estimation/distributions.py: -------------------------------------------------------------------------------- 1 | ## Experimental research code - not part of the depth prediction re-implementation 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import scipy.stats as ss 7 | import matplotlib.pyplot as plt 8 | import math 9 | 10 | from pytorch.contrib.distributions import BaseDistribution, MultivariateDiag 11 | 12 | 13 | class GaussianScaleMixture(BaseDistribution): 14 | def __init__(self, mean, variances, weights): 15 | self._mean = mean 16 | self._variances = variances 17 | self._weights = weights 18 | 19 | @property 20 | def mean(self): 21 | return self._mean 22 | 23 | @property 24 | def variance(self): 25 | return torch.sum(self._variances * self._weights, dim=1) 26 | 27 | @property 28 | def averages(self): 29 | variances_lst = torch.split(self._variances, 1, dim=1) 30 | weights_lst = torch.split(self._weights, 1, dim=1) 31 | avgs = [torch.squeeze(torch.mean(w)) for w in variances_lst] 32 | avgs += [torch.squeeze(torch.mean(v)) for v in weights_lst] 33 | return torch.cat(avgs) 34 | 35 | def log_prob(self, x): 36 | return math.log(1. / (math.sqrt(2 * math.pi))) - self.log_loss(x) 37 | 38 | def log_loss(self, x): 39 | variances_lst = torch.split(self._variances, 1, dim=1) 40 | weights_lst = torch.split(self._weights, 1, dim=1) 41 | 42 | out = Variable(torch.zeros(*self._mean.size())) 43 | if x.is_cuda: 44 | out = out.cuda() 45 | 46 | exponent_lst = [] 47 | for var, weight in zip(variances_lst, weights_lst): 48 | exponent = - ((x - self._mean) ** 2) / (2 * var) 49 | exponent_lst.append(exponent) 50 | # Assuming # channels = 1, we can concat and take max along dim 1 51 | a = torch.max(torch.cat(exponent_lst, dim=1), dim=1)[0] 52 | 53 | for var, weight, exponent in zip(variances_lst, weights_lst, exponent_lst): 54 | exp = torch.exp(exponent - a) 55 | out += weight * exp / torch.sqrt(var) 56 | return - a - torch.log(out) 57 | 58 | @staticmethod 59 | def plot(averages, label): 60 | averages = averages.cpu().data.numpy() 61 | num_gaussians = int(len(averages) / 2) 62 | means = np.zeros((num_gaussians)) 63 | stdevs = np.sqrt(averages[:num_gaussians]) 64 | weights = averages[num_gaussians:] 65 | x = np.arange(-5., 5., 0.01) 66 | 67 | pdfs = [p * ss.norm.pdf(x, mu, sd) for mu, sd, p in zip(means, stdevs, weights)] 68 | 69 | density = np.sum(np.array(pdfs), axis=0) 70 | plt.plot(x, density, label=label) 71 | 72 | 73 | class PowerExponential(BaseDistribution): 74 | def __init__(self, mean, variance, k=0.5, eps=1e-6): 75 | self._mean = mean 76 | self._variance = variance 77 | self._dim = int(mean.size()[1]) 78 | self._k = k 79 | self._eps = eps 80 | 81 | @property 82 | def mean(self): 83 | return self._mean 84 | 85 | @property 86 | def averages(self): 87 | avgs = [torch.mean(self._variance)] 88 | return torch.cat(avgs) 89 | 90 | @property 91 | def variance(self): 92 | return self._variance 93 | 94 | def log_loss(self, x): 95 | vr = self._variance 96 | u = x - self._mean 97 | 98 | t2 = torch.sum(torch.log(vr), dim=1) 99 | t3 = (torch.sum((u ** 2) / vr, dim=1) + self._eps) ** self._k 100 | return t2 + t3 101 | 102 | @staticmethod 103 | def plot(averages, label): 104 | pass 105 | -------------------------------------------------------------------------------- /dense_estimation/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import numpy as np 4 | import tensorflow as tf 5 | import torch 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | class TensorBoardLogger(): 10 | def __init__(self, log_dir, visualizer, max_testing_images=3, 11 | testing_loss_names=[], run_options=None, 12 | starting_epoch=0): 13 | self.summary_writer = tf.summary.FileWriter(log_dir) 14 | self.sess = tf.Session() 15 | self.visualizer = visualizer 16 | self.image_placeholders = [] 17 | self.testing_loss_placeholders = [] 18 | 19 | for name in visualizer.names: 20 | p = tf.placeholder(tf.float32, name=name) 21 | self.image_placeholders.append(p) 22 | tf.summary.image(name, p, collections=['testing_images'], 23 | max_outputs=max_testing_images) 24 | 25 | self.training_loss_placeholder = tf.placeholder(tf.float32, name='training_loss') 26 | tf.summary.scalar('training/loss', self.training_loss_placeholder, 27 | collections=['training_loss']) 28 | 29 | self.learning_rate_placeholder = tf.placeholder(tf.float32, name='training_lr') 30 | tf.summary.scalar('training/learning_rate', self.learning_rate_placeholder, 31 | collections=['training_lr']) 32 | 33 | for name in testing_loss_names: 34 | p = tf.placeholder(tf.float32, name=name) 35 | self.testing_loss_placeholders.append(p) 36 | tf.summary.scalar('testing/'+name, p, collections=['testing_losses']) 37 | 38 | if os.path.isdir(log_dir): 39 | shutil.rmtree(log_dir) 40 | os.mkdir(log_dir) 41 | 42 | if False: #run_options is not None: # TODO not out yet in pre-built tensorflow 43 | run_options_placeholder = tf.placeholder(tf.string, 44 | name='training_run_options') 45 | tf.summary.text('training/run_options', 46 | self.training_loss_placeholder, 47 | collections=['training_run_options']) 48 | 49 | feed_dict = {run_options_placeholder: run_options} 50 | self._eval_and_add_summary(feed_dict, 'training_run_options', 51 | starting_epoch) 52 | 53 | def _eval_and_add_summary(self, feed_dict, key, step): 54 | summaries = tf.get_collection(key) 55 | summary_ = tf.summary.merge(summaries) 56 | summary = self.sess.run(summary_, feed_dict=feed_dict) 57 | self.summary_writer.add_summary(summary, step) 58 | 59 | def log_training_loss(self, iteration, loss, learning_rate): 60 | feed_dict = {self.training_loss_placeholder: loss} 61 | self._eval_and_add_summary(feed_dict, 'training_loss', iteration) 62 | feed_dict = {self.learning_rate_placeholder: learning_rate} 63 | self._eval_and_add_summary(feed_dict, 'training_lr', iteration) 64 | 65 | def log_testing_images(self, epoch, input, outputs, target): 66 | arrays = self.visualizer(input, outputs, target) 67 | images = visuals_to_numpy(arrays) 68 | 69 | feed_dict = dict(zip(self.image_placeholders, images)) 70 | self._eval_and_add_summary(feed_dict, 'testing_images', epoch) 71 | 72 | def log_testing_losses(self, epoch, losses): 73 | feed_dict = dict(zip(self.testing_loss_placeholders, losses)) 74 | self._eval_and_add_summary(feed_dict, 'testing_losses', epoch) 75 | 76 | def close(): 77 | self.summary_writer.close() 78 | self.sess.close() 79 | 80 | 81 | def visuals_to_numpy(arrays): 82 | images = [] 83 | for x in arrays: 84 | if isinstance(x, tuple): 85 | x, cmap_fn = x 86 | else: 87 | cmap_fn = None 88 | x_np = np.transpose(x.numpy(), (0, 2, 3, 1)) 89 | if cmap_fn is not None: 90 | x_np = cmap_fn(x_np[:, :, :, 0]) 91 | images.append(x_np) 92 | return images 93 | 94 | 95 | class BasicVisualizer(): 96 | """Visualizes a single image element input, output and target.""" 97 | names = ['image', 'prediction', 'truth'] 98 | 99 | def __call__(self, input, outputs, target): 100 | return [input, outputs[0], target] 101 | 102 | 103 | class DistributionVisualizer(): 104 | """Visualizes output distribution mean and variance.""" 105 | #names = ['image', 'mean', 'variance', 'truth'] 106 | names = ['mean', 'variance', 'error'] 107 | 108 | def __init__(self, distribution): 109 | self.distribution = distribution 110 | 111 | def __call__(self, input, outputs, target): 112 | d = self.distribution(*outputs) 113 | 114 | # TODO create distribution plot 115 | 116 | #return [target, 117 | # d.mean, 118 | # (2*d.variance, plt.cm.jet), 119 | # (torch.abs(target-d.mean), plt.cm.jet)] 120 | return [d.mean, d.variance, torch.abs(target-d.mean)] 121 | -------------------------------------------------------------------------------- /dense_estimation/losses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from math import log 5 | 6 | 7 | def _mask_input(input, mask=None): 8 | if mask is not None: 9 | input = input * mask 10 | count = torch.sum(mask).data[0] 11 | else: 12 | count = np.prod(input.size(), dtype=np.float32).item() 13 | return input, count 14 | 15 | 16 | class BerHuLoss(nn.Module): 17 | def forward(self, input, target, mask=None): 18 | x = input - target 19 | abs_x = torch.abs(x) 20 | c = torch.max(abs_x).data[0] / 5 21 | leq = (abs_x <= c).float() 22 | l2_losses = (x ** 2 + c ** 2) / (2 * c) 23 | losses = leq * abs_x + (1 - leq) * l2_losses 24 | losses, count = _mask_input(losses, mask) 25 | return torch.sum(losses) / count 26 | 27 | 28 | class HuberLoss(nn.Module): 29 | def __init__(self): 30 | super().__init__() 31 | self.loss = nn.SmoothL1Loss(size_average=False) 32 | 33 | def forward(self, input, target, mask=None): 34 | if mask is not None: 35 | loss = self.loss(input * mask, target * mask) 36 | count = torch.sum(mask).data[0] 37 | return loss / count 38 | 39 | count = np.prod(input.size(), dtype=np.float32).item() 40 | return self.loss(input, target) / count 41 | 42 | 43 | class DistributionLogLoss(nn.Module): 44 | def __init__(self, distribution): 45 | super().__init__() 46 | self.distribution = distribution 47 | 48 | def forward(self, input, target, mask=None): 49 | d = self.distribution(*input) 50 | loss = d.log_loss(target) 51 | loss, count = _mask_input(loss, mask) 52 | return torch.sum(loss) / count 53 | 54 | 55 | class RMSLoss(nn.Module): 56 | def forward(self, input, target, mask=None): 57 | loss = torch.pow(input - target, 2) 58 | loss, count = _mask_input(loss, mask) 59 | return torch.sqrt(torch.sum(loss) / count) 60 | 61 | 62 | class RelLoss(nn.Module): 63 | def forward(self, input, target, mask=None): 64 | loss = torch.abs(input - target) / target 65 | loss, count = _mask_input(loss, mask) 66 | return torch.sum(loss) / count 67 | 68 | 69 | class Log10Loss(nn.Module): 70 | def forward(self, input, target, mask=None): 71 | loss = torch.abs((torch.log(target) - torch.log(input)) / log(10)) 72 | loss, count = _mask_input(loss, mask) 73 | return torch.sum(loss) / count 74 | 75 | 76 | class TestingLosses(nn.Module): 77 | def __init__(self, scalar_losses): 78 | super().__init__() 79 | self.scalar_losses = nn.ModuleList(scalar_losses) 80 | 81 | def forward(self, input, target): 82 | scalars = [m(input, target) for m in self.scalar_losses] 83 | return torch.cat(scalars) 84 | -------------------------------------------------------------------------------- /dense_estimation/output.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | 6 | class GaussianScaleMixtureOutput(nn.Module): 7 | def __init__(self, num_gaussians): 8 | super().__init__() 9 | self.num_gaussians = num_gaussians 10 | self.num_channels = 2 * num_gaussians + 1 11 | self.softmax = nn.Softmax2d() 12 | 13 | def forward(self, x): 14 | assert x.size(1) == self.num_channels 15 | 16 | weights, variances, mean = torch.split(x, self.num_gaussians, dim=1) 17 | variances = torch.exp(variances) 18 | weights = self.softmax(weights) 19 | return mean, variances, weights 20 | 21 | 22 | class PowerExponentialOutput(nn.Module): 23 | def __init__(self): 24 | super().__init__() 25 | self.num_channels = 2 26 | self.relu = nn.ReLU() 27 | 28 | def forward(self, x): 29 | assert x.size(1) == 2 30 | 31 | mean, variance = torch.split(x, 1, dim=1) 32 | #mean = self.relu(mean) 33 | 34 | variance = torch.exp(variance) 35 | return mean, variance 36 | -------------------------------------------------------------------------------- /dense_estimation/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import math 5 | import numpy as np 6 | from torchvision.models.resnet import Bottleneck, model_urls 7 | import torch.utils.model_zoo as model_zoo 8 | import torch.nn.functional as F 9 | 10 | 11 | class _ProjectUp(nn.Module): 12 | def __init__(self, num_input_features): 13 | super().__init__() 14 | num_output_features = int(num_input_features / 2) 15 | self.conv1 = nn.Conv2d(num_input_features, num_output_features, 16 | kernel_size=5, padding=2) 17 | self.bn1 = nn.BatchNorm2d(num_output_features) 18 | self.relu = nn.ReLU(inplace=True) 19 | self.conv2 = nn.Conv2d(num_output_features, num_output_features, 20 | kernel_size=3, padding=1) 21 | self.bn2 = nn.BatchNorm2d(num_output_features) 22 | self.conv_proj = nn.Conv2d(num_input_features, num_output_features, 23 | kernel_size=5, padding=2) 24 | self.upsample = nn.UpsamplingNearest2d(scale_factor=2) 25 | self._unpool_masks = dict() 26 | 27 | def _get_unpool_mask(self, x): 28 | size = x.size() 29 | n, c, h, w = size 30 | key = tuple(size) 31 | if not key in self._unpool_masks: 32 | unpool_mask = [[0.0 if x % 2 == 0 and y % 2 == 0 else 1.0 for x in range(w)] 33 | for y in range(h)] 34 | unpool_mask = np.tile(unpool_mask, (n, c, 1, 1)) 35 | unpool_mask = torch.Tensor(unpool_mask).byte() 36 | if x.is_cuda: 37 | unpool_mask = unpool_mask.cuda() 38 | self._unpool_masks[key] = unpool_mask 39 | return self._unpool_masks[key] 40 | 41 | def forward(self, x, skip): 42 | x = self.upsample(x) 43 | unpool_mask = self._get_unpool_mask(x) 44 | x = x.masked_fill(Variable(unpool_mask), 0.0) 45 | 46 | proj = self.conv_proj(x) 47 | 48 | out = self.conv1(x) 49 | out = self.bn1(out) 50 | out = self.relu(out) 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | 54 | out += proj 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class _FPNUp(nn.Module): 61 | def __init__(self, num_input_features, skip_channel_adjust=True): 62 | super().__init__() 63 | self.conv_channel_adjust = nn.Conv2d(num_input_features, 256, 64 | kernel_size=1) 65 | self.conv_fusion = nn.Conv2d(256, 256, 66 | kernel_size=3, padding=1) 67 | 68 | def forward(self, x, skip): 69 | upsample = nn.UpsamplingBilinear2d(size=skip.size()[2:]) 70 | x = upsample(x) 71 | skip = self.conv_channel_adjust(skip) 72 | fused = self.conv_fusion(x + skip) 73 | return fused 74 | 75 | 76 | class ResNet(nn.Module): 77 | 78 | def __init__(self, block, layers, num_classes=1, output=None, fpn=False, 79 | dropout_active=True): 80 | self.inplanes = 64 81 | 82 | super(ResNet, self).__init__() 83 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 84 | bias=False) 85 | self.bn1 = nn.BatchNorm2d(64) 86 | self.relu = nn.ReLU(inplace=True) 87 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 88 | self.layer1 = self._make_layer(block, 64, layers[0]) 89 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 90 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 91 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 92 | self.dropout_active = dropout_active 93 | 94 | # -- Adapted for fully convolutional operation 95 | Up = _FPNUp if fpn else _ProjectUp 96 | self.fpn = fpn 97 | self.up1 = Up(1024) 98 | self.up2 = Up(512) 99 | self.up3 = Up(256) 100 | self.up4 = Up(128) 101 | if fpn: 102 | self.conv_init_fpn = nn.Conv2d(2048, 256, kernel_size=1, stride=1, bias=True) 103 | out_channels = 256 104 | else: 105 | self.conv_up = nn.Conv2d(2048, 1024, kernel_size=1, stride=1, bias=True) 106 | self.bn_up = nn.BatchNorm2d(1024) 107 | out_channels = 64 108 | 109 | self.output = output 110 | if output is not None: 111 | num_classes = output.num_channels 112 | 113 | self.conv_out = nn.Conv2d(out_channels, num_classes, kernel_size=3, stride=1, padding=0, 114 | bias=True) 115 | self._upsamplings = dict() 116 | 117 | for m in self.modules(): 118 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 119 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 120 | m.weight.data.normal_(0, math.sqrt(2. / n)) 121 | if m.bias is not None: 122 | m.bias.data.zero_() 123 | elif isinstance(m, nn.BatchNorm2d): 124 | m.weight.data.fill_(1) 125 | m.bias.data.zero_() 126 | 127 | # We have to initialize the output weights to keep predicted variance low, as it will be 128 | # exponentiated 129 | self.conv_out.weight.data.normal_(0, 0.01) 130 | 131 | def _make_layer(self, block, planes, blocks, stride=1): 132 | downsample = None 133 | if stride != 1 or self.inplanes != planes * block.expansion: 134 | downsample = nn.Sequential( 135 | nn.Conv2d(self.inplanes, planes * block.expansion, 136 | kernel_size=1, stride=stride, bias=False), 137 | nn.BatchNorm2d(planes * block.expansion), 138 | ) 139 | 140 | layers = [] 141 | layers.append(block(self.inplanes, planes, stride, downsample)) 142 | self.inplanes = planes * block.expansion 143 | for i in range(1, blocks): 144 | layers.append(block(self.inplanes, planes)) 145 | 146 | return nn.Sequential(*layers) 147 | 148 | def _get_upsample(self, x): 149 | h, w = x.size()[2:] 150 | key = (h, w) 151 | if not key in self._upsamplings: 152 | upsample = nn.UpsamplingBilinear2d((h, w)) 153 | self._upsamplings[key] = upsample 154 | return self._upsamplings[key] 155 | 156 | def forward(self, x): 157 | upsample = self._get_upsample(x) 158 | 159 | x = self.conv1(x) 160 | x = self.bn1(x) 161 | l0 = self.relu(x) 162 | x = self.maxpool(l0) 163 | 164 | l1 = self.layer1(x) 165 | l2 = self.layer2(l1) 166 | l3 = self.layer3(l2) 167 | l4 = self.layer4(l3) 168 | 169 | l4 = F.dropout(l4, training=self.dropout_active) 170 | 171 | # -- Adapted for fully convolutional operation 172 | 173 | if self.fpn: 174 | x = self.conv_init_fpn(l4) 175 | else: 176 | x = self.conv_up(l4) 177 | x = self.bn_up(x) 178 | 179 | 180 | x = self.up1(x, l3) 181 | x = self.up2(x, l2) 182 | x = self.up3(x, l1) 183 | if not self.fpn: 184 | x = self.up4(x, l0) 185 | 186 | x = self.conv_out(x) 187 | x = upsample(x) 188 | 189 | if self.output is not None: 190 | x = self.output(x) 191 | else: 192 | x = self.relu(x) 193 | 194 | return x 195 | 196 | 197 | def resnet50(pretrained=True, **kwargs): 198 | """Constructs a ResNet-50 model. 199 | Args: 200 | pretrained (bool): If True, returns a model pre-trained on ImageNet 201 | """ 202 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 203 | if pretrained: 204 | state = model.state_dict() 205 | state.update(model_zoo.load_url(model_urls['resnet50'])) 206 | del state['fc.weight'] 207 | del state['fc.bias'] 208 | model.load_state_dict(state) 209 | return model 210 | -------------------------------------------------------------------------------- /dense_estimation/trainer.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from torch.autograd import Variable 7 | 8 | from .losses import DistributionLogLoss 9 | 10 | 11 | class Trainer(): 12 | def __init__(self, model, training_criterion, testing_multi_criterion, 13 | training_loader, testing_loader, display_interval=100, cuda=True, 14 | save_dir=None, logger=None, logging_interval=100, lr=0.001, 15 | distribution=None): 16 | if cuda: 17 | model = model.cuda() 18 | training_criterion = training_criterion.cuda() 19 | testing_multi_criterion = testing_multi_criterion.cuda() 20 | 21 | assert distribution is None or isinstance(training_criterion, 22 | DistributionLogLoss) 23 | 24 | self.cuda = cuda 25 | self.model = model 26 | self.training_criterion = training_criterion 27 | self.testing_multi_criterion = testing_multi_criterion 28 | self.testing_loader = testing_loader 29 | self.training_loader = training_loader 30 | self.display_interval = display_interval 31 | self.save_dir = save_dir 32 | self.logger = logger 33 | self.logging_interval = logging_interval 34 | self.optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, 35 | weight_decay=1e-4) 36 | #self.optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=1e-4) 37 | self.distribution = distribution 38 | self.lr = lr 39 | 40 | def train(self, epochs, restore_path=None, starting_epoch=0): 41 | if restore_path: 42 | self.restore(restore_path) 43 | 44 | for epoch in range(epochs): 45 | self.train_epoch(epoch, starting_epoch) 46 | self.test(epoch, starting_epoch) 47 | if self.save_dir: 48 | self.save(epoch + starting_epoch) 49 | 50 | def train_epoch(self, epoch, starting_epoch): 51 | log_epoch = epoch + starting_epoch 52 | 53 | self.model.train() 54 | num_iterations = len(self.training_loader) 55 | 56 | epoch_loss = 0 57 | 58 | for i, batch in enumerate(self.training_loader): 59 | input = Variable(batch[0]) 60 | target = Variable(batch[1]) 61 | if self.cuda: 62 | input = input.cuda() 63 | target = target.cuda() 64 | 65 | output = self.model(input) 66 | 67 | if self.distribution is not None: 68 | pred_channels = self.distribution(*output).mean.size(1) 69 | else: 70 | pred_channels = output.size(1) 71 | 72 | if pred_channels != target.size(1): 73 | target, mask = torch.split(target, pred_channels, dim=1) 74 | loss = self.training_criterion(output, target, mask=mask) 75 | else: 76 | loss = self.training_criterion(output, target) 77 | 78 | self.optimizer.zero_grad() 79 | loss.backward() 80 | self.optimizer.step() 81 | epoch_loss += loss.data[0] 82 | 83 | if i % self.display_interval == 0 or i == 0: 84 | print("===> Epoch[{}]({}/{}): Loss: {:.4f}" 85 | .format(log_epoch, 86 | i + starting_epoch, 87 | len(self.training_loader), 88 | loss.data[0])) 89 | 90 | if self.logger is not None and (i % self.logging_interval == 0 or i == 0): 91 | criterion_name = self.training_criterion.__class__.__name__ 92 | self.logger.log_training_loss(log_epoch * num_iterations + i, 93 | loss.data[0], 94 | self.lr) 95 | 96 | print("===> Epoch {} Complete: Avg. Loss: {:.4f}" 97 | .format(log_epoch, epoch_loss / num_iterations)) 98 | 99 | 100 | def test(self, epoch, starting_epoch): 101 | log_epoch = epoch + starting_epoch 102 | 103 | loss_names = [m.__class__.__name__ 104 | for m in self.testing_multi_criterion.scalar_losses] 105 | losses = np.zeros(len(loss_names)) 106 | self.model.eval() 107 | 108 | for i, batch in enumerate(self.testing_loader): 109 | input = Variable(batch[0], volatile=True) 110 | target = Variable(batch[1], volatile=True) 111 | if self.cuda: 112 | input = input.cuda() 113 | target = target.cuda() 114 | 115 | # Predictions are computed at half resolution 116 | upsample = nn.UpsamplingBilinear2d(size=target.size()[2:]) 117 | output = self.model(input) 118 | 119 | if self.distribution is not None: 120 | cpu_outputs = [x.cpu().data for x in output] 121 | output = self.distribution(*output).mean 122 | else: 123 | cpu_outputs = [output.cpu().data] 124 | 125 | output = upsample(output) 126 | 127 | losses += self.testing_multi_criterion(output, target).cpu().data.numpy() 128 | 129 | if self.logger is not None and i == 0: 130 | self.logger.log_testing_images(log_epoch, input.cpu().data, 131 | cpu_outputs, 132 | target.cpu().data) 133 | 134 | losses /= len(self.testing_loader) 135 | loss_strings = ["{}: {:.4f}".format(n, l) 136 | for n, l in zip(loss_names, losses)] 137 | 138 | if self.logger is not None: 139 | self.logger.log_testing_losses(log_epoch, losses) 140 | 141 | print("===> Avg. Testing {}" 142 | .format(', '.join(loss_strings))) 143 | 144 | def save(self, epoch): 145 | assert self.save_dir is not None 146 | checkpoint_name = "model_{}.pth".format(epoch) 147 | save_path = os.path.join(self.save_dir, checkpoint_name) 148 | torch.save(self.model.state_dict(), save_path) 149 | print("Checkpoint saved to {}".format(save_path)) 150 | 151 | def restore(self, path): 152 | state_dict = torch.load(path) 153 | self.model.load_state_dict(state_dict) 154 | print("Restored checkpoint from {}".format(path)) 155 | -------------------------------------------------------------------------------- /nyud_raw_train_to_npy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from oct2py import octave 4 | from PIL import Image 5 | import numpy as np 6 | import scipy.ndimage 7 | 8 | MAX_DEPTH = 10 9 | 10 | 11 | def save_npy(source_dir, target_dir): 12 | image_folder = os.path.join(source_dir, '_rgb') 13 | depth_folder = os.path.join(source_dir, '_depth') 14 | mask_folder = os.path.join(source_dir, '_mask') 15 | npy_folder = os.path.join(target_dir, 'npy') 16 | if os.path.isdir(npy_folder): 17 | shutil.rmtree(npy_folder) 18 | os.makedirs(npy_folder) 19 | image_paths = [os.path.join(image_folder, n) 20 | for n in sorted(os.listdir(image_folder))] 21 | depth_paths = [os.path.join(depth_folder, n) 22 | for n in sorted(os.listdir(depth_folder))] 23 | mask_paths = [os.path.join(mask_folder, n) 24 | for n in sorted(os.listdir(mask_folder))] 25 | for i, paths in enumerate(zip(image_paths, depth_paths, mask_paths)): 26 | image_path, depth_path, mask_path = paths 27 | image = np.array(Image.open(image_path), dtype=np.float32) 28 | depth_0 = np.array(Image.open(depth_path), dtype=np.float32) 29 | depth_0 = np.expand_dims(depth_0 , 2) 30 | depth_0 = (depth_0 / 2 ** 16) * MAX_DEPTH 31 | #print(np.max(depth_0)) 32 | depth_1 = np.array(Image.open(mask_path), dtype=np.float32) 33 | depth_1 = np.float32((np.expand_dims(depth_1, 2) / 255) > 0.5) 34 | #print(np.min(depth_1)) 35 | stacked = np.transpose(np.concatenate((image, depth_0, depth_1), 2), 36 | (2, 0, 1)) 37 | stacked = scipy.ndimage.interpolation.zoom(stacked, (1, 0.5, 0.5), order=1) 38 | 39 | np.save(os.path.join(npy_folder, '{}.npy'.format(i)), stacked) 40 | 41 | 42 | if __name__ == '__main__': 43 | save_npy('/media/data/datasets/nyu_depth_v2_raw', 44 | '/home/smeister/datasets/nyu_depth_v2') 45 | -------------------------------------------------------------------------------- /nyud_test_to_npy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from oct2py import octave 4 | from PIL import Image 5 | import numpy as np 6 | import scipy.ndimage 7 | import os 8 | import scipy.io 9 | import h5py 10 | 11 | from dense_estimation.datasets.util import maybe_download 12 | 13 | 14 | NYUD_URL = 'http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/nyu_depth_v2_labeled.mat' 15 | NYUD_SPLITS_URL = 'http://horatio.cs.nyu.edu/mit/silberman/indoor_seg_sup/splits.mat' 16 | 17 | 18 | def save_npy(source_dir, target_dir): 19 | if not os.path.isdir(source_dir): 20 | os.makedirs(source_dir) 21 | nyud_file_path = os.path.join(source_dir, 'nyu_depth_v2_labeled.mat') 22 | splits_file_path = os.path.join(source_dir, 'splits.mat') 23 | 24 | maybe_download(NYUD_URL, nyud_file_path) 25 | maybe_download(NYUD_SPLITS_URL, splits_file_path) 26 | 27 | print("Loading dataset: NYU Depth V2") 28 | nyud_dict = h5py.File(nyud_file_path, 'r') 29 | splits_dict = scipy.io.loadmat(splits_file_path) 30 | 31 | images = np.asarray(nyud_dict['images'], dtype=np.float32) 32 | depths = np.asarray(nyud_dict['depths'], dtype=np.float32) 33 | 34 | # convert to NCHW arrays 35 | images = images.swapaxes(2, 3) 36 | depths = np.expand_dims(depths.swapaxes(1, 2), 1) 37 | 38 | #if split == 'train': 39 | # indices = splits_dict['trainNdxs'][:, 0] - 1 40 | #else: 41 | # indices = splits_dict['testNdxs'][:, 0] - 1 42 | indices = splits_dict['testNdxs'][:, 0] - 1 43 | 44 | images = np.take(images, indices, axis=0) 45 | depths = np.take(depths, indices, axis=0) 46 | 47 | npy_folder = os.path.join(target_dir, 'npy') 48 | if os.path.isdir(npy_folder): 49 | shutil.rmtree(npy_folder) 50 | os.makedirs(npy_folder) 51 | 52 | np.save(os.path.join(npy_folder, 'images.npy'), images) 53 | np.save(os.path.join(npy_folder, 'depths.npy'), depths) 54 | 55 | 56 | if __name__ == '__main__': 57 | save_npy('/home/smeister/work/depth-prediction/datasets/nyu_depth_v2', 58 | '/home/smeister/datasets/nyu_depth_v2/labeled') 59 | -------------------------------------------------------------------------------- /preview_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.utils.data as data 4 | import matplotlib.pyplot as plt 5 | 6 | from dense_estimation.datasets.nyu_depth_v2 import NYU_Depth_V2 7 | 8 | 9 | dset = NYU_Depth_V2("/home/smeister/datasets", split='train', 10 | transform=NYU_Depth_V2.get_transform(normalize=False)) 11 | #print(dset.compute_image_std(), dset.compute_image_mean()) 12 | trainloader = data.DataLoader(dset, batch_size=4) 13 | for i, data in enumerate(trainloader): 14 | imgs, labels = data 15 | if i == 0: 16 | # TODO make_grid is currently broken 17 | #img = torchvision.utils.make_grid([imgs, labels]).numpy() 18 | #img = np.transpose(img, (1, 2, 0)) 19 | #img = img[:, :, ::-1] 20 | #plt.imshow(img) 21 | print(np.transpose(labels[0, 1].numpy(), (0, 1))) 22 | plt.imshow(np.transpose(imgs.numpy()[1] , (1, 2, 0))) 23 | plt.figure() 24 | plt.imshow(np.transpose(labels[1, 0].numpy(), (0, 1)), cmap='gray') 25 | plt.figure() 26 | plt.imshow(np.transpose(labels[1, 1].numpy(), (0, 1)), cmap='gray') 27 | plt.show() 28 | -------------------------------------------------------------------------------- /process_raw.m: -------------------------------------------------------------------------------- 1 | % Usage: copy to raw data directory along with toolbox files (inside of 'tools' subdirectory) 2 | 3 | addpath('tools'); 4 | 5 | d = dir('.'); 6 | isub = [d(:).isdir]; %# returns logical vector 7 | nameFolds = {d(isub).name}'; 8 | nameFolds(ismember(nameFolds,{'.','..','tools'})) = []; 9 | nameFolds(~cellfun(@isempty,(regexp(nameFolds,'._out')))) = []; 10 | disp(numel(nameFolds)); 11 | 12 | count = 0; 13 | dist = 40; 14 | 15 | rgbOutFolder = '_rgb'; 16 | if ~exist(rgbOutFolder, 'dir') 17 | mkdir(rgbOutFolder); 18 | end 19 | 20 | depthOutFolder = '_depth'; 21 | if ~exist(depthOutFolder, 'dir') 22 | mkdir(depthOutFolder); 23 | end 24 | 25 | maskOutFolder = '_mask'; 26 | if ~exist(maskOutFolder, 'dir') 27 | mkdir(maskOutFolder); 28 | end 29 | 30 | %filledDepthOutFolder = '_filled'; 31 | %if ~exist(filledDepthOutFolder, 'dir') 32 | % mkdir(filledDepthOutFolder); 33 | %end 34 | 35 | for f = 1:numel(nameFolds) 36 | disp(f); 37 | disp(nameFolds{f}); 38 | files = get_synched_frames(nameFolds{f}); 39 | c = numel(files); 40 | disp(strcat('filecount: ',int2str(c))); 41 | 42 | files = files(1:dist:c); 43 | c = numel(files); 44 | disp(strcat('filecount to process: ',int2str(c))); 45 | 46 | parfor idx = 1:c 47 | rgbFilename = strcat(nameFolds{f},'/',files(idx).rawRgbFilename); 48 | depthFilename = strcat(nameFolds{f},'/',files(idx).rawDepthFilename); 49 | outRGBFilename = strcat(rgbOutFolder,'/',num2str(count + idx - 1),'.ppm'); 50 | outDepthFilename = strcat(depthOutFolder,'/',num2str(count + idx - 1),'.pgm'); 51 | maskOutFilename = strcat(maskOutFolder,'/',num2str(count + idx - 1),'.pgm'); 52 | %filledDepthFilename = strcat(filledDepthOutFolder,'/',num2str(count + idx - 1),'.pgm'); 53 | rgb = imread(rgbFilename); 54 | depth = imread(depthFilename); 55 | depth = swapbytes(depth); 56 | [depthOut, rgbOut] = project_depth_map(depth, rgb); 57 | %filledImgDepth = fill_depth_colorization(double(rgbOut) / 255.0, depthOut, 0.8); 58 | imgDepth = depthOut; 59 | imgDepth = imgDepth / 10.0; 60 | imgDepth = crop_image(imgDepth); 61 | rgbOut = crop_image(rgbOut); 62 | maskOut = double(~(imgDepth == 0 | imgDepth == 1.0)); 63 | 64 | %filledImgDepth = filledImgDepth / 10.0; 65 | %filledImgDepth = crop_image(filledImgDepth); 66 | 67 | imwrite(rgbOut, outRGBFilename); 68 | imwrite(uint16(round(imgDepth*65535)), outDepthFilename); 69 | imwrite(maskOut, maskOutFilename); 70 | %imwrite(filledImgDepth, filledDepthFilename); 71 | 72 | end 73 | count = count + c; 74 | end 75 | disp(count); 76 | 77 | exit; 78 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | import json 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import matplotlib.pyplot as plt 10 | 11 | from dense_estimation.resnet import resnet50 12 | from dense_estimation.output import GaussianScaleMixtureOutput, PowerExponentialOutput 13 | from dense_estimation.losses import (BerHuLoss, RMSLoss, RelLoss, TestingLosses, HuberLoss, 14 | Log10Loss, DistributionLogLoss) 15 | #from dense_estimation.distributions import GaussianScaleMixture, PowerExponential 16 | from dense_estimation.datasets.nyu_depth_v2 import NYU_Depth_V2 17 | from dense_estimation.data import get_testing_loader 18 | from dense_estimation.app.experiment import get_experiment 19 | from dense_estimation.app.gui import display 20 | from dense_estimation.logger import DistributionVisualizer, BasicVisualizer, visuals_to_numpy 21 | 22 | parser = argparse.ArgumentParser(description='testing script') 23 | parser.add_argument('--no_cuda', action='store_true', help='use cpu') 24 | parser.add_argument('--threads', type=int, default=16, help='number of threads for data loader') 25 | parser.add_argument('--seed', type=int, default=123, help='random seed to use') 26 | parser.add_argument('--ex', type=str, default='default', 27 | help='comma separated names of experiments to compare; use name:epoch to specify epoch to load') 28 | parser.add_argument('--gpu', type=str, default='0', help='cuda device to use if using --cuda') 29 | parser.add_argument('--max', type=int, default=20, help='max number of examples to visualize') 30 | parser.add_argument('--samples', type=int, default=1, help='number of monte carlo dropout samples (sampling enabled if > 1)') 31 | opt = parser.parse_args() 32 | 33 | 34 | cuda = not opt.no_cuda 35 | if cuda: 36 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu 37 | if not torch.cuda.is_available(): 38 | raise Exception("No GPU found, please run without --cuda") 39 | 40 | out_size = (208, 256) 41 | transf_size = (out_size[1], out_size[0]) 42 | 43 | raw_root = '/home/smeister/datasets' 44 | testing_loader = get_testing_loader(NYU_Depth_V2, raw_root, 1, transf_size, 45 | opt.threads, debug=False) 46 | 47 | class BasicDist(): 48 | def __init__(self, mean, var): 49 | self.mean = mean 50 | self.variance = var 51 | 52 | 53 | def _test(ex, epoch): 54 | results = [] 55 | with open('./log/{}/opts.txt'.format(ex), 'r') as f: 56 | ex_opt = json.load(f) 57 | 58 | if ex_opt['dist'] != '': 59 | dist_map = { 60 | 'gsm': (GaussianScaleMixture, lambda: GaussianScaleMixtureOutput(ex_opt['num_gaussians'])), 61 | 'exp': (PowerExponential, lambda: PowerExponentialOutput()), 62 | } 63 | distribution, output_unit = dist_map[ex_opt['dist'] ] 64 | model = resnet50(output=output_unit(), fpn=ex_opt['fpn'], dropout_active=False) 65 | visualizer = DistributionVisualizer(distribution) 66 | dropout_active = False 67 | else: 68 | output_unit = None 69 | dropout_active = opt.samples > 1 70 | model = resnet50(fpn=ex_opt['fpn'], dropout_active=dropout_active) 71 | if dropout_active: 72 | distribution = BasicDist 73 | visualizer = DistributionVisualizer(BasicDist) 74 | else: 75 | distribution = None 76 | visualizer = BasicVisualizer() 77 | 78 | losses_clses = [RMSLoss(), RelLoss(), Log10Loss()] 79 | #if distribution is not None: 80 | # losses_clses += [DistributionLogLoss(distribution)] 81 | 82 | testing_multi_criterion = TestingLosses(losses_clses) 83 | 84 | if cuda: 85 | model = model.cuda() 86 | testing_multi_criterion = testing_multi_criterion.cuda() 87 | 88 | _, _, restore_path, _ = get_experiment(ex, False, epoch=epoch) 89 | state_dict = torch.load(restore_path) 90 | model.load_state_dict(state_dict) 91 | 92 | loss_names = [m.__class__.__name__ 93 | for m in testing_multi_criterion.scalar_losses] 94 | losses = np.zeros(len(loss_names)) 95 | model.eval() 96 | prob = 0 97 | 98 | num = opt.max if opt.max != -1 else len(testing_loader) 99 | 100 | averages = [] 101 | 102 | for i, batch in enumerate(testing_loader): 103 | print(i) 104 | if i > num: break 105 | 106 | input = torch.autograd.Variable(batch[0], volatile=True) 107 | target = torch.autograd.Variable(batch[1], volatile=True) 108 | if cuda: 109 | input = input.cuda() 110 | target = target.cuda() 111 | 112 | # Predictions are computed at half resolution 113 | upsample = nn.UpsamplingBilinear2d(size=target.size()[2:]) 114 | 115 | samples = [] 116 | if dropout_active: 117 | for _ in range(opt.samples): 118 | sample = model(input) 119 | samples.append(sample) 120 | stacked = torch.cat(samples, dim=1) 121 | mean = torch.mean(stacked, dim=1) 122 | var = torch.var(stacked, dim=1) 123 | output = [mean, var] 124 | else: 125 | output = model(input) 126 | 127 | if isinstance(output, list): 128 | output = [upsample(x) for x in output] 129 | cpu_outputs = [x.cpu().data for x in output] 130 | d = distribution(*output) 131 | output = d.mean 132 | if output_unit: 133 | prob += torch.mean(d.prob(target[:, 0:1, :, :])).cpu().data[0] 134 | averages.append(d.averages) 135 | else: 136 | output = upsample(output) 137 | cpu_outputs = [output.cpu().data] 138 | 139 | losses += testing_multi_criterion(output, target).cpu().data.numpy() 140 | 141 | viz_pt = visualizer(input.cpu().data, cpu_outputs, target.cpu().data) 142 | images = visuals_to_numpy(viz_pt) 143 | results.append(images) 144 | 145 | losses /= len(testing_loader) 146 | loss_strings = ["{}: {:.4f}".format(n, l) 147 | for n, l in zip(loss_names, losses)] 148 | 149 | print("===> [{}] Testing {}" 150 | .format(ex, ', '.join(loss_strings))) 151 | 152 | if output_unit: 153 | averages = torch.squeeze(torch.mean(torch.stack(averages, dim=1), dim=1)) 154 | prob /= len(testing_loader) 155 | print("===> [{}] Avg. Likelihood {}".format(ex, prob)) 156 | print("===> [{}] Dist. Averages {}" 157 | .format(ex, averages.cpu().data.numpy())) 158 | distribution.plot(averages, label=ex) 159 | 160 | return results, visualizer.names 161 | 162 | 163 | if __name__ == '__main__': 164 | results = [] 165 | plt.figure() 166 | for spec in opt.ex.split(','): 167 | splits = spec.split(':') 168 | ex = splits[0] 169 | epoch = int(splits[1]) if len(splits) == 2 else None 170 | 171 | result, image_names = _test(ex, epoch) 172 | results.append(result) 173 | plt.legend() 174 | plt.show() 175 | display(results, image_names) 176 | -------------------------------------------------------------------------------- /test_laina.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | import tensorflow as tf 5 | from matplotlib import pyplot as plt 6 | import torch 7 | import torch.nn as nn 8 | import h5py 9 | import scipy.io 10 | from torch.autograd import Variable 11 | 12 | import laina_models 13 | 14 | from dense_estimation.losses import RMSLoss, RelLoss, TestingLosses, Log10Loss 15 | from dense_estimation.datasets.image_utils import BilinearResize 16 | 17 | def test(model_data_path, images, targets): 18 | 19 | # Default input size 20 | height = 240 21 | width = 320 22 | channels = 3 23 | batch_size = 1 24 | 25 | 26 | # Create a placeholder for the input image 27 | input_node = tf.placeholder(tf.float32, shape=(batch_size, height, width, channels)) 28 | 29 | # Construct the network 30 | net = laina_models.ResNet50UpProj({'data': input_node}, batch_size) 31 | 32 | testing_multi_criterion = TestingLosses([RMSLoss(), RelLoss(), Log10Loss()]) 33 | 34 | losses = np.zeros(3) 35 | resize = BilinearResize(0.5) 36 | 37 | with tf.Session() as sess: 38 | 39 | # Load the converted parameters 40 | print('Loading the model') 41 | net.load(model_data_path, sess) 42 | 43 | uninitialized_vars = [] 44 | for var in tf.global_variables(): 45 | try: 46 | sess.run(var) 47 | except tf.errors.FailedPreconditionError: 48 | uninitialized_vars.append(var) 49 | 50 | init_new_vars_op = tf.variables_initializer(uninitialized_vars) 51 | sess.run(init_new_vars_op) 52 | 53 | for i in range(images.shape[0]): 54 | image = np.expand_dims(resize(images[i, :, :, :]), 0) 55 | target = np.expand_dims(targets[i, :, :, :], 0) 56 | 57 | pred = sess.run(net.get_output(), feed_dict={input_node: image}) 58 | target_pt = Variable(torch.Tensor(target).permute(0, 3, 1, 2)) # NHWC -> NCHW 59 | pred_pt = Variable(torch.Tensor(pred).permute(0, 3, 1, 2)) # NHWC -> NCHW 60 | 61 | upsample = nn.UpsamplingBilinear2d(size=target_pt.size()[2:]) 62 | pred_pt = upsample(pred_pt) 63 | losses += testing_multi_criterion(pred_pt, target_pt).data.numpy() 64 | 65 | if i < 5: 66 | fig = plt.figure() 67 | plt.imshow(pred_pt.data.numpy()[0,0,:,:], cmap='gray') 68 | print("{}/{}".format(i, images.shape[0])) 69 | plt.show() 70 | 71 | losses /= len(images) 72 | print(losses) 73 | 74 | 75 | def load_testing_data(root): 76 | folder = os.path.join(root, 'nyu_depth_v2') 77 | nyud_file_path = os.path.join(folder, 'nyu_depth_v2_labeled.mat') 78 | splits_file_path = os.path.join(folder, 'splits.mat') 79 | nyud_dict = h5py.File(nyud_file_path, 'r') 80 | splits_dict = scipy.io.loadmat(splits_file_path) 81 | images = np.asarray(nyud_dict['images'], dtype=np.float32) 82 | depths = np.asarray(nyud_dict['depths'], dtype=np.float32) 83 | 84 | # NCWH -> NHWC 85 | images = np.transpose(images, (0, 3, 2, 1)) 86 | depths = np.transpose(np.expand_dims(depths, 1), (0, 3, 2, 1)) 87 | 88 | indices = splits_dict['testNdxs'][:, 0] - 1 89 | images = np.take(images, indices, axis=0) 90 | depths = np.take(depths, indices, axis=0) 91 | return images, depths 92 | 93 | 94 | def main(): 95 | images, depths = load_testing_data('./datasets') 96 | #images = np.random.rand(5, 480, 640, 3) 97 | #depths = np.random.rand(5, 480, 640, 1) 98 | pred = test('./downloads/NYU_ResNet-UpProj.npy', images, depths) 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | import json 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from dense_estimation.densenet import DenseNet 10 | from dense_estimation.resnet import resnet50 11 | from dense_estimation.output import GaussianScaleMixtureOutput, PowerExponentialOutput 12 | from dense_estimation.losses import (BerHuLoss, RMSLoss, RelLoss, TestingLosses, HuberLoss, 13 | Log10Loss, DistributionLogLoss) 14 | #from dense_estimation.distributions import GaussianScaleMixture, PowerExponential 15 | from dense_estimation.datasets.nyu_depth_v2 import NYU_Depth_V2 16 | from dense_estimation.data import get_testing_loader, get_training_loader 17 | from dense_estimation.trainer import Trainer 18 | from dense_estimation.logger import TensorBoardLogger, BasicVisualizer, DistributionVisualizer 19 | from dense_estimation.app.experiment import get_experiment 20 | 21 | 22 | parser = argparse.ArgumentParser(description='Monocular Depth Prediction + Uncertainty') 23 | parser.add_argument('--batch', type=int, default=16, help='training batch size') 24 | parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train for') 25 | parser.add_argument('--lr', type=float, default=0.01, help='learning rate') 26 | parser.add_argument('--cuda', action='store_true', help='use cuda') 27 | parser.add_argument('--threads', type=int, default=16, help='number of threads for data loader') 28 | parser.add_argument('--seed', type=int, default=123, help='random seed to use') 29 | parser.add_argument('--debug', action='store_true', help='load random fake data to run quickly') 30 | parser.add_argument('--overfit', action='store_true', help='train on testing set to check model') 31 | parser.add_argument('--ex', type=str, default='default', 32 | help='name of experiment (continue training if existing)') 33 | parser.add_argument('--ow', action='store_true', help='overwrite existing experiment') 34 | parser.add_argument('--gpu', type=str, default='0', help='cuda device to use if using --cuda') 35 | parser.add_argument('--dist', type=str, default='', help='gsm or exp') 36 | parser.add_argument('--num_gaussians', type=int, default=2, help='number of gaussians for gsm distribution') 37 | parser.add_argument('--limit', action='store_true', help='limit number of training examples per epoch') 38 | parser.add_argument('--fpn', action='store_true', help='use resnet upsampling style from FPN paper') 39 | parser.add_argument('--densenet', action='store_true', help='use DenseNet instead of ResNet') 40 | #parser.add_argument('--ckpt', type=str, help='checkpoint epoch to run from if --ex is given and name exists') 41 | opt = parser.parse_args() 42 | print(opt) 43 | 44 | 45 | 46 | 47 | cuda = opt.cuda 48 | if cuda: 49 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu 50 | if not torch.cuda.is_available(): 51 | raise Exception("No GPU found, please run without --cuda") 52 | 53 | torch.manual_seed(opt.seed) 54 | if cuda: 55 | torch.cuda.manual_seed(opt.seed) 56 | 57 | out_size = (208, 256) 58 | transf_size = (out_size[1], out_size[0]) 59 | 60 | 61 | if opt.dist != '': 62 | dist_map = { 63 | 'gsm': (GaussianScaleMixture, lambda: GaussianScaleMixtureOutput(opt.num_gaussians)), 64 | 'exp': (PowerExponential, lambda: PowerExponentialOutput()) 65 | } 66 | distribution, output_unit = dist_map[opt.dist] 67 | model = resnet50(output=output_unit(), fpn=opt.fpn) 68 | visualizer = DistributionVisualizer(distribution) 69 | training_criterion = DistributionLogLoss(distribution) 70 | else: 71 | distribution = None 72 | if opt.densenet: 73 | model = DenseNet() 74 | else: 75 | model = resnet50(fpn=opt.fpn) 76 | visualizer = BasicVisualizer() 77 | training_criterion = BerHuLoss() 78 | 79 | 80 | dset_root = './datasets' 81 | raw_root = '/home/smeister/datasets' 82 | 83 | 84 | log_dir, save_dir, restore_path, starting_epoch = get_experiment(opt.ex, 85 | opt.ow) 86 | with open('./log/{}/opts.txt'.format(opt.ex), 'w') as f: 87 | json.dump({k:getattr(opt,k) for k in opt.__dict__}, f) 88 | 89 | print("Training from epoch {}".format(starting_epoch)) 90 | 91 | testing_multi_criterion = TestingLosses([RMSLoss(), RelLoss(), Log10Loss()]) 92 | 93 | 94 | if opt.overfit: 95 | training_loader = get_testing_loader(NYU_Depth_V2, raw_root, opt.batch, transf_size, 96 | opt.threads,debug=opt.debug, shuffle=True, 97 | training=True) 98 | else: 99 | training_loader = get_training_loader(NYU_Depth_V2, raw_root, opt.batch, transf_size, 100 | opt.threads, limit=30 if opt.limit else None) 101 | 102 | testing_loader = get_testing_loader(NYU_Depth_V2, raw_root, opt.batch, transf_size, 103 | opt.threads, debug=opt.debug) 104 | 105 | logger = TensorBoardLogger(log_dir, visualizer, 106 | testing_loss_names=['RMS', 'Rel', 'Log10'], 107 | max_testing_images=9, 108 | run_options=str(opt), 109 | starting_epoch=starting_epoch) 110 | 111 | trainer = Trainer(model, training_criterion, testing_multi_criterion, 112 | training_loader, testing_loader, save_dir=save_dir, 113 | cuda=opt.cuda, display_interval=10, logger=logger, 114 | logging_interval=10, lr=opt.lr, distribution=distribution) 115 | trainer.train(opt.epochs, restore_path=restore_path, starting_epoch=starting_epoch) 116 | 117 | # TODO config file for directories & environment setup 118 | --------------------------------------------------------------------------------