├── Examples ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png ├── 6.png ├── 7.png └── .gitignore ├── imgs ├── sample.png ├── pipeLine.png └── thumbnail.jpg ├── results └── .gitignore ├── models ├── __init__.py ├── base_model.py ├── test_model.py └── networks.py ├── data ├── base_data_loader.py ├── base_dataset.py ├── single_dataset.py ├── __init__.py └── image_folder.py ├── .gitignore ├── download_pretrained_models.sh ├── run_test.py ├── remove_running_stats.py ├── LICENSE ├── util.py ├── arguments.py └── README.md /Examples/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/Examples/1.png -------------------------------------------------------------------------------- /Examples/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/Examples/2.png -------------------------------------------------------------------------------- /Examples/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/Examples/3.png -------------------------------------------------------------------------------- /Examples/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/Examples/4.png -------------------------------------------------------------------------------- /Examples/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/Examples/5.png -------------------------------------------------------------------------------- /Examples/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/Examples/6.png -------------------------------------------------------------------------------- /Examples/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/Examples/7.png -------------------------------------------------------------------------------- /imgs/sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/imgs/sample.png -------------------------------------------------------------------------------- /imgs/pipeLine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/imgs/pipeLine.png -------------------------------------------------------------------------------- /imgs/thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atapour/monocularDepth-Inference/HEAD/imgs/thumbnail.jpg -------------------------------------------------------------------------------- /results/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /Examples/.gitignore: -------------------------------------------------------------------------------- 1 | # keep the png files in this directory 2 | !*.png 3 | # and this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | def create_model(args): 2 | model = None 3 | from .test_model import TestModel 4 | model = TestModel() 5 | model.initialize(args) 6 | print("The model has now been created") 7 | return model 8 | -------------------------------------------------------------------------------- /data/base_data_loader.py: -------------------------------------------------------------------------------- 1 | class BaseDataLoader(): 2 | 3 | def __init__(self): 4 | pass 5 | 6 | def initialize(self, args): 7 | self.args = args 8 | pass 9 | 10 | def load_data(): 11 | return None 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | !checkpoints 2 | checkpoints/*.pth 3 | !results 4 | !Examples 5 | *.zip 6 | unused 7 | __pycache__ 8 | *.pyc 9 | models/__pycache__ 10 | models/*.pyc 11 | data/__pycache__ 12 | data/*.pyc 13 | *.png 14 | !Examples/*.png 15 | !imgs/*.png 16 | 17 | -------------------------------------------------------------------------------- /download_pretrained_models.sh: -------------------------------------------------------------------------------- 1 | echo "downloading pretrained models..." 2 | 3 | mkdir -p ./checkpoints 4 | 5 | MODELS=./checkpoints/checkpoints.zip 6 | URL_MODELS=https://collections.durham.ac.uk/downloads/r2rf55z770q 7 | 8 | echo "downloading the style transfer and depth estimation models..." 9 | 10 | wget --quiet --no-check-certificate --show-progress $URL_MODELS -O $MODELS 11 | 12 | echo "checking the MD5 checksum for downloaded models..." 13 | 14 | cd checkpoints 15 | 16 | CHECK_SUM_CHECKPOINTS='b176b00450ce9aaf3ef812087ed3ef49 checkpoints.zip' 17 | 18 | echo $CHECK_SUM_CHECKPOINTS | md5sum -c 19 | 20 | echo "Unpacking the zip file..." 21 | 22 | unzip -q checkpoints.zip && rm checkpoints.zip && rm README.txt 23 | 24 | echo "All Done!!" 25 | 26 | -------------------------------------------------------------------------------- /run_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from arguments import Arguments 3 | from data import CreateDataLoader 4 | from models import create_model 5 | from util import save_images 6 | 7 | 8 | if __name__ == '__main__': 9 | args = Arguments().parse() 10 | 11 | data_loader = CreateDataLoader(args) 12 | dataset = data_loader.load_data() 13 | model = create_model(args) 14 | 15 | for i, data in enumerate(dataset): 16 | if i >= args.how_many: 17 | break 18 | model.set_input(data) 19 | model.test() 20 | visuals = model.get_current_visuals() 21 | img_path = model.get_image_paths() 22 | img_size = model.get_image_sizes() 23 | print('%04d: processing image... %s' % (i, img_path)) 24 | save_images(args.results_dir, visuals, img_path, size=img_size) 25 | -------------------------------------------------------------------------------- /remove_running_stats.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | 4 | #This script is hastily written to remove the running stats (mean and var) from the instanceNorm layers in netG_A2B checkpint since pytorch 4 does not keep these anymore. Thus, this should only be run if and only if you are using pyTorch 4. 5 | 6 | ckeckpoint_name = './checkpoints/netG_A2B.pth' #path and name of the chekcpoint we intend to remove the stats from 7 | 8 | checkpoint_in = torch.load(ckeckpoint_name) 9 | ckeckpoint_out = {} 10 | 11 | print('removing running means and variances from %s.' % ckeckpoint_name) 12 | for key in checkpoint_in.keys(): 13 | if 'model' in key and not 'running' in key: 14 | ckeckpoint_out[key] = checkpoint_in[key] 15 | torch.save(ckeckpoint_out,ckeckpoint_name) 16 | print('checkoint %s has now been overwritten.' % ckeckpoint_name) 17 | -------------------------------------------------------------------------------- /data/base_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from PIL import Image 3 | import torchvision.transforms as transforms 4 | 5 | class BaseDataset(data.Dataset): 6 | def __init__(self): 7 | super(BaseDataset, self).__init__() 8 | 9 | def name(self): 10 | return 'BaseDataset' 11 | 12 | def initialize(self, opt): 13 | pass 14 | 15 | def get_transform(opt): 16 | transform_list = [] 17 | 18 | transform_list.append(transforms.Lambda( 19 | lambda img: __scale_to_256_factor(img))) 20 | 21 | transform_list += [transforms.ToTensor(), 22 | transforms.Normalize((0.5, 0.5, 0.5), 23 | (0.5, 0.5, 0.5))] 24 | return transforms.Compose(transform_list) 25 | 26 | def __scale_to_256_factor(img): 27 | return img.resize((1024, 256), Image.BICUBIC) -------------------------------------------------------------------------------- /data/single_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from data.base_dataset import BaseDataset, get_transform 3 | from data.image_folder import make_dataset 4 | from PIL import Image 5 | 6 | class TestDataset(BaseDataset): 7 | def initialize(self, opt): 8 | self.opt = opt 9 | self.root = opt.data_directory 10 | self.dir_A = os.path.join(opt.data_directory) 11 | 12 | self.A_paths = make_dataset(self.dir_A) 13 | 14 | self.A_paths = sorted(self.A_paths) 15 | 16 | self.transform = get_transform(opt) 17 | 18 | def __getitem__(self, index): 19 | A_path = self.A_paths[index] 20 | A_img = Image.open(A_path).convert('RGB') 21 | A_size = A_img.size 22 | 23 | A = self.transform(A_img) 24 | input_nc = 3 25 | 26 | return {'A': A, 'A_paths': A_path, 'A_sizes': A_size} 27 | 28 | def __len__(self): 29 | return len(self.A_paths) 30 | 31 | def name(self): 32 | return 'TestDataset' 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Amir Atapour, Durham University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | 5 | class BaseModel(): 6 | def name(self): 7 | return 'BaseModel' 8 | 9 | def initialize(self, args): 10 | self.args = args 11 | self.gpu_ids = args.gpu_ids 12 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor 13 | self.save_dir = os.path.join(args.checkpoints_dir, 'inference') 14 | 15 | def set_input(self, input): 16 | self.input = input 17 | 18 | def forward(self): 19 | pass 20 | 21 | def test(self): 22 | pass 23 | 24 | def get_image_paths(self): 25 | pass 26 | 27 | def optimize_parameters(self): 28 | pass 29 | 30 | def get_current_visuals(self): 31 | return self.input 32 | 33 | def get_current_errors(self): 34 | return {} 35 | 36 | def save(self, label): 37 | pass 38 | 39 | # helper loading function that can be used by subclasses 40 | def load_network(self, network, network_label, epoch_label): 41 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 42 | save_path = os.path.join(self.save_dir, save_filename) 43 | network.load_state_dict(torch.load(save_path)) -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from data.base_data_loader import BaseDataLoader 3 | 4 | 5 | def CreateDataLoader(args): 6 | data_loader = CustomDatasetDataLoader() 7 | data_loader.initialize(args) 8 | return data_loader 9 | 10 | 11 | def CreateDataset(args): 12 | dataset = None 13 | from data.single_dataset import TestDataset 14 | dataset = TestDataset() 15 | print("The dataset has been created") 16 | dataset.initialize(args) 17 | return dataset 18 | 19 | 20 | class CustomDatasetDataLoader(BaseDataLoader): 21 | def name(self): 22 | return 'CustomDatasetDataLoader' 23 | 24 | def initialize(self, args): 25 | BaseDataLoader.initialize(self, args) 26 | self.dataset = CreateDataset(args) 27 | self.dataloader = torch.utils.data.DataLoader( 28 | self.dataset, 29 | batch_size=1, 30 | shuffle=False, 31 | num_workers=1) 32 | 33 | def load_data(self): 34 | return self 35 | 36 | def __len__(self): 37 | return min(len(self.dataset), self.args.max_dataset_size) 38 | 39 | def __iter__(self): 40 | for i, data in enumerate(self.dataloader): 41 | if i >= self.args.max_dataset_size: 42 | break 43 | yield data -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | import os 6 | import ntpath 7 | import cv2 8 | 9 | def tensor2im(image_tensor): 10 | image_numpy = image_tensor[0].cpu().float().numpy() 11 | if image_numpy.shape[0] == 3: 12 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 13 | return image_numpy.astype(np.uint8) 14 | 15 | elif image_numpy.shape[0] == 1: 16 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 65535.0 17 | return image_numpy.astype(np.uint16) 18 | 19 | def save_image_color(image_numpy, image_path): 20 | image_numpy = image_numpy.astype(np.uint8) 21 | image_pil = Image.fromarray(image_numpy) 22 | image_pil.save(image_path) 23 | 24 | def save_image_depth(image_numpy, image_path): 25 | cv2.imwrite(image_path,image_numpy) 26 | 27 | def mkdirs(paths): 28 | if isinstance(paths, list) and not isinstance(paths, str): 29 | for path in paths: 30 | mkdir(path) 31 | else: 32 | mkdir(paths) 33 | 34 | def mkdir(path): 35 | if not os.path.exists(path): 36 | os.makedirs(path) 37 | 38 | def save_images(results_dir, visuals, image_path, size=None): 39 | image_dir = results_dir 40 | if not os.path.exists(image_dir): 41 | os.makedirs(image_dir) 42 | 43 | short_path = ntpath.basename(image_path[0]) 44 | name = os.path.splitext(short_path)[0] 45 | 46 | for label, im in visuals.items(): 47 | 48 | image_name = '%s_%s.png' % (name, label) 49 | save_path = os.path.join(image_dir, image_name) 50 | h, w, _ = im.shape 51 | if size!=None: 52 | im = cv2.resize(im, size) 53 | 54 | if label == 'depth': 55 | save_image_depth(im, save_path) 56 | else: 57 | save_image_color(im, save_path) -------------------------------------------------------------------------------- /models/test_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import os 4 | from collections import OrderedDict 5 | from torch.autograd import Variable 6 | import itertools 7 | import util as util 8 | from .base_model import BaseModel 9 | from . import networks 10 | import sys 11 | 12 | class TestModel(BaseModel): 13 | def name(self): 14 | return 'TestModel' 15 | def initialize(self, args): 16 | BaseModel.initialize(self, args) 17 | self.input_A = self.Tensor(1, 3, 1024, 256) 18 | 19 | self.netG_AtoB = networks.define_G(3, 3, 64, 'resnet_9blocks', 'instance', False, args.init_type, self.gpu_ids) 20 | self.netG_BtoC = networks.define_G(3, 1, 64, 'unet_256', 'batch', False, args.init_type, self.gpu_ids) 21 | 22 | checkpoint_AtoB_filename = 'netG_A2B.pth' 23 | checkpoint_BtoC_filename = 'netG_B2C.pth' 24 | 25 | checkpoint_path_AtoB = os.path.join(args.checkpoints_dir, checkpoint_AtoB_filename) 26 | checkpoint_path_BtoC = os.path.join(args.checkpoints_dir, checkpoint_BtoC_filename) 27 | 28 | self.netG_AtoB.load_state_dict(torch.load(checkpoint_path_AtoB)) 29 | self.netG_BtoC.load_state_dict(torch.load(checkpoint_path_BtoC)) 30 | 31 | def set_input(self, input): 32 | self.image_sizes = input['A_sizes'] 33 | 34 | input_A = input['A'] 35 | self.input_A.resize_(input_A.size()).copy_(input_A) 36 | self.image_paths = input['A_paths'] 37 | 38 | self.size = (int(self.image_sizes[0]), int(self.image_sizes[1])) 39 | 40 | 41 | def test(self): 42 | self.real_A = Variable(self.input_A) 43 | self.fake_B = self.netG_AtoB(self.real_A) 44 | self.fake_C = self.netG_BtoC(self.fake_B) 45 | 46 | def get_image_paths(self): 47 | return self.image_paths 48 | 49 | def get_image_sizes(self): 50 | return self.size 51 | 52 | def get_current_visuals(self): 53 | real_A = util.tensor2im(self.real_A.data) 54 | fake_B = util.tensor2im(self.fake_B.data) 55 | fake_C = util.tensor2im(self.fake_C.data) 56 | 57 | return OrderedDict([('original', real_A), ('restyled', fake_B), ('depth', fake_C)]) -------------------------------------------------------------------------------- /data/image_folder.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Code from 3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py 4 | # Modified the original code so that it also loads images from the current 5 | # directory as well as the subdirectories 6 | ############################################################################### 7 | 8 | import torch.utils.data as data 9 | 10 | from PIL import Image 11 | import os 12 | import os.path 13 | 14 | IMG_EXTENSIONS = [ 15 | '.jpg', '.JPG', '.jpeg', '.JPEG', 16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 17 | ] 18 | 19 | 20 | def is_image_file(filename): 21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 22 | 23 | 24 | def make_dataset(dir): 25 | images = [] 26 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 27 | 28 | for root, _, fnames in sorted(os.walk(dir)): 29 | for fname in fnames: 30 | if is_image_file(fname): 31 | path = os.path.join(root, fname) 32 | images.append(path) 33 | 34 | return images 35 | 36 | def default_loader(path): 37 | return Image.open(path).convert('RGB') 38 | 39 | class ImageFolder(data.Dataset): 40 | 41 | def __init__(self, root, transform=None, return_paths=False, 42 | loader=default_loader): 43 | imgs = make_dataset(root) 44 | if len(imgs) == 0: 45 | raise(RuntimeError("Found 0 images in: " + root + "\n" 46 | "Supported image extensions are: " + 47 | ",".join(IMG_EXTENSIONS))) 48 | 49 | self.root = root 50 | self.imgs = imgs 51 | self.transform = transform 52 | self.return_paths = return_paths 53 | self.loader = loader 54 | 55 | def __getitem__(self, index): 56 | path = self.imgs[index] 57 | img = self.loader(path) 58 | if self.transform is not None: 59 | img = self.transform(img) 60 | if self.return_paths: 61 | return img, path 62 | else: 63 | return img 64 | 65 | def __len__(self): 66 | return len(self.imgs) 67 | -------------------------------------------------------------------------------- /arguments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import util 4 | import torch 5 | 6 | 7 | class Arguments(): 8 | def __init__(self): 9 | self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 10 | self.initialized = False 11 | 12 | def initialize(self): 13 | self.parser.add_argument('--data_directory', default="./Examples", help='path to the directory containing the images') 14 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 15 | self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='the directory that contains the checkpoints') 16 | self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') 17 | self.parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal|xavier|kaiming|orthogonal]') 18 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') 19 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 20 | self.parser.add_argument('--how_many', type=int, default=5000, help='how many test images to run') 21 | self.initialized = True 22 | 23 | def parse(self): 24 | if not self.initialized: 25 | self.initialize() 26 | self.args = self.parser.parse_args() 27 | 28 | str_ids = self.args.gpu_ids.split(',') 29 | self.args.gpu_ids = [] 30 | for str_id in str_ids: 31 | id = int(str_id) 32 | if id >= 0: 33 | self.args.gpu_ids.append(id) 34 | 35 | # set gpu ids 36 | if len(self.args.gpu_ids) > 0: 37 | torch.cuda.set_device(self.args.gpu_ids[0]) 38 | 39 | args = vars(self.args) 40 | 41 | 42 | print('------------ Arguments -------------') 43 | for k, v in sorted(args.items()): 44 | print('%s: %s' % (str(k), str(v))) 45 | print('------------------------------------') 46 | 47 | return self.args 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Real-Time Monocular Depth Estimation using Synthetic Data with Domain Adaptation via Image Style Transfer 2 | 3 | Requires an NVIDIA GPU, Python 2 or 3, [CUDA CuDNN](https://developer.nvidia.com/cudnn), [PyTorch 0.3.1](https://pytorch.org/previous-versions/) or [PyTorch 0.4.0](http://pytorch.org), and [OpenCV](http://www.opencv.org), 4 | 5 | ![General Pipeline](https://github.com/atapour/styleDepth-Inference/blob/master/imgs/pipeLine.png) 6 |                  7 |                  8 |                  9 |                  10 |                 General pipeline of the approach 11 | ## Method: 12 | 13 | _"Monocular depth estimation using learning-based approaches has become relevant and promising in recent years. However, most monocular depth estimators either need to rely on large quantities of ground truth depth data, which is extremely expensive and difficult to obtain or predict disparity as an intermediary step using a secondary supervisory signal, leading to blurring and other artefacts. Training a depth estimation model using pixel-perfect synthetic environment data can resolve most of these issues, but introduces the problem of domain bias. This is the inability 14 | to apply a model trained on synthetic data to real-world scenarios. With recent advances in image style transfer and its connections with domain adaptation (Maximum Mean Discrepancy), our approach takes advantage of style transfer and adversarial training to predict pixel perfect depth from 15 | a single real-world color image based on training over a large corpus of synthetic environment data. Experimental results indicate the efficacy of our approach compared to contemporary state-of-the-art."_ 16 | 17 | [[Atapour-Abarghouei and Breckon, Proc. CVPR, 2018](http://breckon.eu/toby/publications/papers/abarghouei18monocular.pdf)] 18 | 19 | --- 20 | 21 | ## Reference implementation: 22 | Produces a depth map output image based on a monocular color image input. 23 | * The input RGB image will first be transformed into the style of the images captured from a highly realistic synthetic virtual environment, on which the depth prediction network is trained. 24 | * The provided color image is used as the input to [CycleGAN](https://junyanz.github.io/CycleGAN/), which transforms the style of the image. Image style transfer is used as a method of domain adaptation. 25 | * The style transferred image is used as the input to a model trained on synthetic images and can produce pixel-perfect depth outputs. 26 | * The code provides an inference pipeline and can be run using the test harness: run_test.py 27 | * Example images are provided in the 'Examples' directory. 28 | * The training was in part performed based on the code from [https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix), and we would like to thank the authors and contributors. 29 | 30 | 31 | ![](https://github.com/atapour/styleDepth-Inference/blob/master/imgs/sample.png) 32 |                  33 |                  34 |                  35 |                  36 |       Example of the results of the approach 37 | 38 | --- 39 | ## Instructions to run the inference code using PyTorch 0.3.1: 40 | 41 | ``` 42 | $ git clone https://github.com/atapour/monocularDepth-Inference.git 43 | $ cd monocularDepth-Inference 44 | $ chmod +x ./download_pretrained_models.sh 45 | $ ./download_pretrained_models.sh 46 | $ python run_test.py --data_directory=./Examples --checkpoints_dir=./checkpoints --results_dir=./results 47 | ``` 48 | --- 49 | ## Instructions to run the inference code using PyTorch 0.4.0: 50 | 51 | ``` 52 | $ git clone https://github.com/atapour/monocularDepth-Inference.git 53 | $ cd monocularDepth-Inference 54 | $ chmod +x ./download_pretrained_models.sh 55 | $ ./download_pretrained_models.sh 56 | $ python remove_running_stats.py 57 | $ python run_test.py --data_directory=./Examples --checkpoints_dir=./checkpoints --results_dir=./results 58 | ``` 59 | --- 60 | 61 | The output results are written in a directory taken as an argument to the test harness ('./results' by default): 62 | * the script entitled "download_pretrained_models.sh" will download the required pre-trained models and checks the downloaded file integrity using MD5 checksum. 63 | * the checkpoints that are available for direct download were created using pyTorch 0.3.1 and will not work if you are using pyTorch 0.4.0. The provided python script named ' remove_running_stats.py' will remedy the situation. 64 | * the file with the suffix "_original" is the original input image. 65 | * the file with the suffix "_restyled" is the style transferred image. 66 | * the file with the suffix "_depth" is the output depth image. 67 | 68 | --- 69 | 70 | 71 | ## Example: 72 | [![Video Example](https://github.com/atapour/styleDepth-Inference/blob/master/imgs/thumbnail.jpg)](https://vimeo.com/260393753 "Video Example - Click to Play") 73 | 74 |                  75 |                  76 |                  77 |                  78 |       Video Example - click image above to play. 79 | 80 | --- 81 | 82 | ## Reference: 83 | 84 | [Real-Time Monocular Depth Estimation using Synthetic Data with Domain Adaptation via Image Style Transfer](http://breckon.eu/toby/publications/papers/abarghouei18monocular.pdf) 85 | (A. Atapour-Abarghouei, T.P. Breckon), In Proc. Conf. Computer Vision and Pattern Recognition, 2018. [[pdf](http://breckon.eu/toby/publications/papers/abarghouei18monocular.pdf)] [[demo](https://vimeo.com/260393753)] 86 | 87 | ``` 88 | @InProceedings{abarghouei18monocular, 89 | author = {Atapour-Abarghouei, A. and Breckon, T.P.}, 90 | title = {Real-Time Monocular Depth Estimation using Synthetic Data with Domain Adaptation}, 91 | booktitle = {Proc. Computer Vision and Pattern Recognition}, 92 | pages = {1-8}, 93 | year = {2018}, 94 | month = {June}, 95 | publisher = {IEEE}, 96 | keywords = {monocular depth, generative adversarial network, GAN, depth map, disparity, depth from single image}, 97 | } 98 | 99 | ``` 100 | --- 101 | -------------------------------------------------------------------------------- /models/networks.py: -------------------------------------------------------------------------------- 1 | # based on https://junyanz.github.io/CycleGAN/ 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import init 6 | import functools 7 | from torch.autograd import Variable 8 | 9 | 10 | def get_norm_layer(norm_type='instance'): 11 | if norm_type == 'batch': 12 | norm_layer = functools.partial(nn.BatchNorm2d, affine=True) 13 | elif norm_type == 'instance': 14 | norm_layer = functools.partial(nn.InstanceNorm2d, affine=False) 15 | elif norm_type == 'none': 16 | norm_layer = None 17 | else: 18 | raise NotImplementedError('normalization layer [%s] is not found' % norm_type) 19 | return norm_layer 20 | 21 | def define_G(input_nc, output_nc, ngf, which_model_netG, norm='batch', use_dropout=False, init_type='normal', gpu_ids=[]): 22 | netG = None 23 | use_gpu = len(gpu_ids) > 0 24 | norm_layer = get_norm_layer(norm_type=norm) 25 | 26 | if use_gpu: 27 | assert(torch.cuda.is_available()) 28 | 29 | if which_model_netG == 'resnet_9blocks': 30 | netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9, gpu_ids=gpu_ids) 31 | elif which_model_netG == 'resnet_6blocks': 32 | netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6, gpu_ids=gpu_ids) 33 | elif which_model_netG == 'unet_128': 34 | netG = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout, gpu_ids=gpu_ids) 35 | elif which_model_netG == 'unet_256': 36 | netG = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout, gpu_ids=gpu_ids) 37 | else: 38 | raise NotImplementedError('Generator model name [%s] is not recognized' % which_model_netG) 39 | if len(gpu_ids) > 0: 40 | netG.cuda(gpu_ids[0]) 41 | return netG 42 | 43 | 44 | def define_D(input_nc, ndf, which_model_netD, 45 | n_layers_D=3, norm='batch', use_sigmoid=False, init_type='normal', gpu_ids=[]): 46 | netD = None 47 | use_gpu = len(gpu_ids) > 0 48 | norm_layer = get_norm_layer(norm_type=norm) 49 | 50 | if use_gpu: 51 | assert(torch.cuda.is_available()) 52 | if which_model_netD == 'basic': 53 | netD = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids) 54 | elif which_model_netD == 'n_layers': 55 | netD = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids) 56 | elif which_model_netD == 'pixel': 57 | netD = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids) 58 | else: 59 | raise NotImplementedError('Discriminator model name [%s] is not recognized' % 60 | which_model_netD) 61 | if use_gpu: 62 | netD.cuda(gpu_ids[0]) 63 | return netD 64 | 65 | 66 | class GANLoss(nn.Module): 67 | def __init__(self, use_lsgan=True, target_real_label=1.0, target_fake_label=0.0, 68 | tensor=torch.FloatTensor): 69 | super(GANLoss, self).__init__() 70 | self.real_label = target_real_label 71 | self.fake_label = target_fake_label 72 | self.real_label_var = None 73 | self.fake_label_var = None 74 | self.Tensor = tensor 75 | if use_lsgan: 76 | self.loss = nn.MSELoss() 77 | else: 78 | self.loss = nn.BCELoss() 79 | 80 | def get_target_tensor(self, input, target_is_real): 81 | target_tensor = None 82 | if target_is_real: 83 | create_label = ((self.real_label_var is None) or 84 | (self.real_label_var.numel() != input.numel())) 85 | if create_label: 86 | real_tensor = self.Tensor(input.size()).fill_(self.real_label) 87 | self.real_label_var = Variable(real_tensor, requires_grad=False) 88 | target_tensor = self.real_label_var 89 | else: 90 | create_label = ((self.fake_label_var is None) or 91 | (self.fake_label_var.numel() != input.numel())) 92 | if create_label: 93 | fake_tensor = self.Tensor(input.size()).fill_(self.fake_label) 94 | self.fake_label_var = Variable(fake_tensor, requires_grad=False) 95 | target_tensor = self.fake_label_var 96 | return target_tensor 97 | 98 | def __call__(self, input, target_is_real): 99 | target_tensor = self.get_target_tensor(input, target_is_real) 100 | return self.loss(input, target_tensor) 101 | 102 | class ResnetGenerator(nn.Module): 103 | def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'): 104 | assert(n_blocks >= 0) 105 | super(ResnetGenerator, self).__init__() 106 | self.input_nc = input_nc 107 | self.output_nc = output_nc 108 | self.ngf = ngf 109 | self.gpu_ids = gpu_ids 110 | if type(norm_layer) == functools.partial: 111 | use_bias = norm_layer.func == nn.InstanceNorm2d 112 | else: 113 | use_bias = norm_layer == nn.InstanceNorm2d 114 | 115 | model = [nn.ReflectionPad2d(3), 116 | nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, 117 | bias=use_bias), 118 | norm_layer(ngf), 119 | nn.ReLU(True)] 120 | 121 | n_downsampling = 2 122 | for i in range(n_downsampling): 123 | mult = 2**i 124 | model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, 125 | stride=2, padding=1, bias=use_bias), 126 | norm_layer(ngf * mult * 2), 127 | nn.ReLU(True)] 128 | 129 | mult = 2**n_downsampling 130 | for i in range(n_blocks): 131 | model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] 132 | 133 | for i in range(n_downsampling): 134 | mult = 2**(n_downsampling - i) 135 | model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), 136 | kernel_size=3, stride=2, 137 | padding=1, output_padding=1, 138 | bias=use_bias), 139 | norm_layer(int(ngf * mult / 2)), 140 | nn.ReLU(True)] 141 | model += [nn.ReflectionPad2d(3)] 142 | model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] 143 | model += [nn.Tanh()] 144 | 145 | self.model = nn.Sequential(*model) 146 | 147 | def forward(self, input): 148 | if self.gpu_ids and isinstance(input.data, torch.cuda.FloatTensor): 149 | return nn.parallel.data_parallel(self.model, input, self.gpu_ids) 150 | else: 151 | return self.model(input) 152 | 153 | 154 | # Define a resnet block 155 | class ResnetBlock(nn.Module): 156 | def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): 157 | super(ResnetBlock, self).__init__() 158 | self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) 159 | 160 | def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): 161 | conv_block = [] 162 | p = 0 163 | if padding_type == 'reflect': 164 | conv_block += [nn.ReflectionPad2d(1)] 165 | elif padding_type == 'replicate': 166 | conv_block += [nn.ReplicationPad2d(1)] 167 | elif padding_type == 'zero': 168 | p = 1 169 | else: 170 | raise NotImplementedError('padding [%s] is not implemented' % padding_type) 171 | 172 | conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), 173 | norm_layer(dim), 174 | nn.ReLU(True)] 175 | if use_dropout: 176 | conv_block += [nn.Dropout(0.5)] 177 | 178 | p = 0 179 | if padding_type == 'reflect': 180 | conv_block += [nn.ReflectionPad2d(1)] 181 | elif padding_type == 'replicate': 182 | conv_block += [nn.ReplicationPad2d(1)] 183 | elif padding_type == 'zero': 184 | p = 1 185 | else: 186 | raise NotImplementedError('padding [%s] is not implemented' % padding_type) 187 | conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), 188 | norm_layer(dim)] 189 | 190 | return nn.Sequential(*conv_block) 191 | 192 | def forward(self, x): 193 | out = x + self.conv_block(x) 194 | return out 195 | 196 | 197 | # Defines the Unet generator. 198 | # |num_downs|: number of downsamplings in UNet. For example, 199 | # if |num_downs| == 7, image of size 128x128 will become of size 1x1 200 | # at the bottleneck 201 | class UnetGenerator(nn.Module): 202 | def __init__(self, input_nc, output_nc, num_downs, ngf=64, 203 | norm_layer=nn.BatchNorm2d, use_dropout=False, gpu_ids=[]): 204 | super(UnetGenerator, self).__init__() 205 | self.gpu_ids = gpu_ids 206 | 207 | # construct unet structure 208 | unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) 209 | for i in range(num_downs - 5): 210 | unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) 211 | unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) 212 | unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) 213 | unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) 214 | unet_block = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) 215 | 216 | self.model = unet_block 217 | 218 | def forward(self, input): 219 | if self.gpu_ids and isinstance(input.data, torch.cuda.FloatTensor): 220 | return nn.parallel.data_parallel(self.model, input, self.gpu_ids) 221 | else: 222 | return self.model(input) 223 | 224 | 225 | # Defines the submodule with skip connection. 226 | # X -------------------identity---------------------- X 227 | # |-- downsampling -- |submodule| -- upsampling --| 228 | class UnetSkipConnectionBlock(nn.Module): 229 | def __init__(self, outer_nc, inner_nc, input_nc=None, 230 | submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): 231 | super(UnetSkipConnectionBlock, self).__init__() 232 | self.outermost = outermost 233 | if type(norm_layer) == functools.partial: 234 | use_bias = norm_layer.func == nn.InstanceNorm2d 235 | else: 236 | use_bias = norm_layer == nn.InstanceNorm2d 237 | if input_nc is None: 238 | input_nc = outer_nc 239 | downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, 240 | stride=2, padding=1, bias=use_bias) 241 | downrelu = nn.LeakyReLU(0.2, True) 242 | downnorm = norm_layer(inner_nc) 243 | uprelu = nn.ReLU(True) 244 | upnorm = norm_layer(outer_nc) 245 | 246 | if outermost: 247 | upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, 248 | kernel_size=4, stride=2, 249 | padding=1) 250 | down = [downconv] 251 | up = [uprelu, upconv, nn.Tanh()] 252 | model = down + [submodule] + up 253 | elif innermost: 254 | upconv = nn.ConvTranspose2d(inner_nc, outer_nc, 255 | kernel_size=4, stride=2, 256 | padding=1, bias=use_bias) 257 | down = [downrelu, downconv] 258 | up = [uprelu, upconv, upnorm] 259 | model = down + up 260 | else: 261 | upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, 262 | kernel_size=4, stride=2, 263 | padding=1, bias=use_bias) 264 | down = [downrelu, downconv, downnorm] 265 | up = [uprelu, upconv, upnorm] 266 | 267 | if use_dropout: 268 | model = down + [submodule] + up + [nn.Dropout(0.5)] 269 | else: 270 | model = down + [submodule] + up 271 | 272 | self.model = nn.Sequential(*model) 273 | 274 | def forward(self, x): 275 | if self.outermost: 276 | return self.model(x) 277 | else: 278 | return torch.cat([x, self.model(x)], 1) 279 | 280 | 281 | # Defines the PatchGAN discriminator with the specified arguments. 282 | class NLayerDiscriminator(nn.Module): 283 | def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, gpu_ids=[]): 284 | super(NLayerDiscriminator, self).__init__() 285 | self.gpu_ids = gpu_ids 286 | if type(norm_layer) == functools.partial: 287 | use_bias = norm_layer.func == nn.InstanceNorm2d 288 | else: 289 | use_bias = norm_layer == nn.InstanceNorm2d 290 | 291 | kw = 4 292 | padw = 1 293 | sequence = [ 294 | nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), 295 | nn.LeakyReLU(0.2, True) 296 | ] 297 | 298 | nf_mult = 1 299 | nf_mult_prev = 1 300 | for n in range(1, n_layers): 301 | nf_mult_prev = nf_mult 302 | nf_mult = min(2**n, 8) 303 | sequence += [ 304 | nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, 305 | kernel_size=kw, stride=2, padding=padw, bias=use_bias), 306 | norm_layer(ndf * nf_mult), 307 | nn.LeakyReLU(0.2, True) 308 | ] 309 | 310 | nf_mult_prev = nf_mult 311 | nf_mult = min(2**n_layers, 8) 312 | sequence += [ 313 | nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, 314 | kernel_size=kw, stride=1, padding=padw, bias=use_bias), 315 | norm_layer(ndf * nf_mult), 316 | nn.LeakyReLU(0.2, True) 317 | ] 318 | 319 | sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] 320 | 321 | if use_sigmoid: 322 | sequence += [nn.Sigmoid()] 323 | 324 | self.model = nn.Sequential(*sequence) 325 | 326 | def forward(self, input): 327 | if len(self.gpu_ids) and isinstance(input.data, torch.cuda.FloatTensor): 328 | return nn.parallel.data_parallel(self.model, input, self.gpu_ids) 329 | else: 330 | return self.model(input) 331 | 332 | class PixelDiscriminator(nn.Module): 333 | def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d, use_sigmoid=False, gpu_ids=[]): 334 | super(PixelDiscriminator, self).__init__() 335 | self.gpu_ids = gpu_ids 336 | if type(norm_layer) == functools.partial: 337 | use_bias = norm_layer.func == nn.InstanceNorm2d 338 | else: 339 | use_bias = norm_layer == nn.InstanceNorm2d 340 | 341 | self.net = [ 342 | nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0), 343 | nn.LeakyReLU(0.2, True), 344 | nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias), 345 | norm_layer(ndf * 2), 346 | nn.LeakyReLU(0.2, True), 347 | nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)] 348 | 349 | if use_sigmoid: 350 | self.net.append(nn.Sigmoid()) 351 | 352 | self.net = nn.Sequential(*self.net) 353 | 354 | def forward(self, input): 355 | if len(self.gpu_ids) and isinstance(input.data, torch.cuda.FloatTensor): 356 | return nn.parallel.data_parallel(self.net, input, self.gpu_ids) 357 | else: 358 | return self.net(input) --------------------------------------------------------------------------------