├── images ├── lgc.png ├── local.png └── output.png ├── model ├── ops.py ├── main.py ├── dataloader.py └── model.py └── README.md /images/lgc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiotosi92/LGC-Tensorflow/HEAD/images/lgc.png -------------------------------------------------------------------------------- /images/local.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiotosi92/LGC-Tensorflow/HEAD/images/local.png -------------------------------------------------------------------------------- /images/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiotosi92/LGC-Tensorflow/HEAD/images/output.png -------------------------------------------------------------------------------- /model/ops.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | 5 | def conv2d(x, kernel_shape, strides=1, relu=True, padding='SAME'): 6 | W = tf.get_variable("weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False)) 7 | tf.add_to_collection(tf.GraphKeys.WEIGHTS, W) 8 | b = tf.get_variable("biases", kernel_shape[3], initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False)) 9 | with tf.name_scope("conv"): 10 | x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding=padding) 11 | x = tf.nn.bias_add(x, b) 12 | if relu: 13 | x = tf.nn.relu(x) 14 | return x 15 | 16 | 17 | def conv2d_transpose(x, kernel_shape, strides=1, relu=True): 18 | W = tf.get_variable("weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False)) 19 | tf.add_to_collection(tf.GraphKeys.WEIGHTS, W) 20 | b = tf.get_variable("biases", kernel_shape[2], initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False)) 21 | output_shape = [tf.shape(x)[0], tf.shape(x)[1] * strides, tf.shape(x)[2] * strides, kernel_shape[2]] 22 | with tf.name_scope("deconv"): 23 | x = tf.nn.conv2d_transpose(x, W, output_shape, strides=[1, strides, strides, 1], padding="SAME") 24 | x = tf.nn.bias_add(x, b) 25 | if relu: 26 | x = tf.nn.relu(x) 27 | return x 28 | 29 | 30 | def encoding_unit(name, inputs, num_outputs): 31 | with tf.variable_scope('encoding' + str(name)): 32 | conv = tf.contrib.layers.conv2d( 33 | inputs=inputs, 34 | num_outputs=num_outputs, 35 | kernel_size=3, 36 | activation_fn=None 37 | ) 38 | relu = tf.nn.relu(conv) 39 | pool = tf.contrib.layers.max_pool2d(relu, 2) 40 | 41 | forward = conv 42 | return pool, forward 43 | 44 | 45 | def decoding_unit(number, inputs, num_outputs, forwards=None): 46 | with tf.variable_scope('decoding' + number): 47 | conv_transpose = tf.contrib.layers.conv2d_transpose( 48 | inputs=inputs, 49 | num_outputs=num_outputs*2, 50 | kernel_size=3, 51 | stride=2, 52 | activation_fn=None 53 | ) 54 | 55 | if forwards is not None: 56 | if isinstance(forwards, (list, tuple)): 57 | for f in forwards: 58 | conv_transpose = tf.concat([conv_transpose, f], axis=3) 59 | else: 60 | conv_transpose = tf.concat([conv_transpose, forwards], axis=3) 61 | 62 | conv = tf.contrib.layers.conv2d( 63 | inputs=conv_transpose, 64 | num_outputs=num_outputs, 65 | kernel_size=3, 66 | activation_fn=None 67 | ) 68 | 69 | relu = tf.nn.relu(conv) 70 | 71 | return relu 72 | 73 | 74 | def pool_2d(x): 75 | return tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], "SAME") 76 | 77 | 78 | def pad(img): 79 | hpad = (16 - img.shape[1]%16)%16 80 | wpad = (16 - img.shape[2]%16)%16 81 | if hpad+wpad==0: 82 | return img 83 | else: 84 | return np.pad(img, ((0,0),(0,hpad),(0,wpad),(0,0)), 'constant'),hpad,wpad 85 | 86 | 87 | def depad(img,hpad,wpad): 88 | return img[:,0:img.shape[1]-hpad,0:img.shape[2]-wpad,:] 89 | -------------------------------------------------------------------------------- /model/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | from model import LGC 4 | 5 | parser = argparse.ArgumentParser(description='Argument parser') 6 | 7 | """Arguments related to run mode""" 8 | parser.add_argument('--is_training', dest='is_training', type=str, default='False', help='train, test') 9 | 10 | """Arguments related to training""" 11 | parser.add_argument('--epoch', dest='epoch', type=int, default=14, help='# of epoch') 12 | parser.add_argument('--image_height', dest='image_height', type=int, default=384, help='# image height') 13 | parser.add_argument('--image_width', dest='image_width', type=int, default=1280, help='# image width') 14 | parser.add_argument('--crop_height', dest='crop_height', type=int, default=256, help='# crop height') 15 | parser.add_argument('--crop_width', dest='crop_width', type=int, default=512, help='# crop width') 16 | parser.add_argument('--batch_size', dest='batch_size', type=int, default=128, help='# images in batch') 17 | parser.add_argument('--patch_size', dest='patch_size', type=int, default=9, help='# images in patches') 18 | parser.add_argument('--dataset', dest='dataset', type=str, default='../utils/kitti_training_set.txt', help='dataset') 19 | parser.add_argument('--initial_learning_rate', dest='initial_learning_rate', type=float, default=0.003, help='initial learning rate for gradient descent') 20 | parser.add_argument('--threshold', dest='threshold', type=float, default=3, help='disparity error if absolute difference between disparity and groundtruth > threshold') 21 | parser.add_argument('--late_fusion', help='LFM as local network', action='store_true') 22 | 23 | """Arguments related to models""" 24 | parser.add_argument('--model', dest='model', type=str, default='', help='CCNN, EFN, LFM, ConfNet, LGC') 25 | 26 | """Arguments related to monitoring and outputs""" 27 | parser.add_argument('--log_directory', dest='log_directory', type=str, default='../log', help='directory to save checkpoints and summaries') 28 | parser.add_argument('--checkpoint_path', dest='checkpoint_path', nargs='*', help='path to a specific checkpoint to load') 29 | parser.add_argument('--save_epoch_freq', dest='save_epoch_freq', type=int, default=2, help='save a model every save_epoch_freq epochs (does not overwrite previously saved models)') 30 | parser.add_argument('--model_name', dest='model_name', type=str, default='CCNN.model', help='model name') 31 | parser.add_argument('--output_path', dest='output_path', type=str, default='../output/CCNN/', help='model name') 32 | 33 | args = parser.parse_args() 34 | 35 | def main(_): 36 | with tf.Session() as sess: 37 | model = LGC(sess, 38 | is_training=args.is_training, 39 | epoch=args.epoch, 40 | threshold=args.threshold, 41 | image_height=args.image_height, 42 | image_width=args.image_width, 43 | crop_height=args.crop_height, 44 | crop_width=args.crop_width, 45 | batch_size=args.batch_size, 46 | patch_size=args.patch_size, 47 | initial_learning_rate=args.initial_learning_rate, 48 | model=args.model, 49 | model_name=args.model_name, 50 | late_fusion=args.late_fusion, 51 | dataset=args.dataset 52 | ) 53 | 54 | if args.is_training == 'True': 55 | model.train(args) 56 | else: 57 | model.test(args) 58 | 59 | 60 | if __name__ == '__main__': 61 | tf.app.run() 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LGCNet-Tensorflow 2 | 3 | Tensorflow implementation of a local-global framework for confidence estimation. 4 | 5 | **Beyond local reasoning for stereo confidence estimation with deep learning** 6 | 7 | [Fabio Tosi](https://vision.disi.unibo.it/~ftosi/), [Matteo Poggi](https://vision.disi.unibo.it/~mpoggi/), Antonio Benincasa and [Stefano Mattoccia](https://vision.disi.unibo.it/~smatt/Site/Home.html) 8 | ECCV 2018 9 | 10 | ## Qualitative results on KITTI 11 | ![Alt text](https://github.com/fabiotosi92/LGC-Tensorflow/blob/master/images/output.png "output") 12 | 13 | Example of confidence estimation. (a) Reference image from KITTI 2015 dataset, (b) disparity map obtained with MC-CNN, (c) confidence estimated with a local approach (CCNN) and (d) the proposed local-global framework, highlighting regions on which the latter method provides more reliable predictions (red bounding boxes). 14 | 15 | For more details: 16 | [pdf](https://vision.disi.unibo.it/~ftosi/papers/eccv18_lgc.pdf) 17 | 18 | ## Training 19 | 20 | For training, the KITTI input images have been padded to 384x1280. 21 | 22 | The __training file__ should be a _.txt_ file in which each line contains: [_path_image_left_];[_path_image_disparity_];[_path_image_groundtruth_]. Disparity maps and groundtruth as 16 bit images. 23 | 24 | You can train the __local__ network as follows: 25 | 26 | ```shell 27 | python ./model/main.py --is_training True --epoch 14 --batch_size 128 --patch_size 9 --dataset [path_training_file] --initial_learning_rate 0.003 --log_directory [path_log] --model_name model --model [CCNN, EFN, LFN] 28 | ``` 29 | Use the _--model_ argument to choose the architecture: 30 | * [__CCNN__](https://github.com/fabiotosi92/CCNN-Tensorflow/edit/master/README.md) ([Poggi et al.](https://vision.disi.unibo.it/~mpoggi/papers/bmvc2016.pdf)) 31 | * __EFN__ (Early Fusion Network) 32 | * __LFN__ (Late Fusion Network) 33 | 34 | Similarly, you can train the __global__ network (_ConfNet_): 35 | ```shell 36 | python ./model/main.py --is_training True --epoch 1600 --batch_size 1 --crop_height 256 --crop_width 512 --dataset [path_training_file] --initial_learning_rate 0.003 --log_directory [path_log] --model_name model --model ConfNet 37 | ``` 38 | Finally, you can load weights from the local and global networks and train __LGCNet__ thereafter: 39 | 40 | ```shell 41 | python ./model/main.py --is_training True --epoch 14 --batch_size 128 --patch_size 9 --dataset [path_training_file] --initial_learning_rate 0.003 --log_directory [path_log] --model_name model --model LGC --checkpoint_path [path_checkpoint_ConfNet] [path_checkpoint_CCNN/LFN] --late_fusion 42 | ``` 43 | Use _--late_fusion_ flag to set __LFN__ as local network. Otherwise, disable it to use __CCNN__. 44 | 45 | **Warning:** set _checkpoint_CCNN/LFN_ accordingly. 46 | 47 | ## Testing 48 | 49 | The __testing file__ should be a _.txt_ file in which each line contains: [_path_image_left_];[_path_image_disparity_]. Disparity maps as 16 bit images. 50 | 51 | If you want to test the __local__ network or the __global__ network indipendently: 52 | 53 | ```shell 54 | python ./model/main.py --is_training False --batch_size 1 --dataset [path_testing_file] --checkpoint_path [path_checkpoint] --output_path [path_output] --model [CCNN, EFN, LFN, ConfNet] 55 | ``` 56 | 57 | For testing __LGCNet__, instead, you can run: 58 | 59 | ```shell 60 | python ./model/main.py --is_training False --batch_size 1 --dataset [path_testing_file] --checkpoint_path [path_checkpoint_ConfNet] [path_checkpoint_CCNN/LFN] [path_checkpoint_LGC] --output_path [path_output] --model LGC --late_fusion 61 | ``` 62 | 63 | ## Pretrained models 64 | 65 | You can download the pre-trained models, trained on 20 images of KITTI 12 dataset for AD-CENSUS, SMG and MC-CNN stereo algorithms, here: 66 | 67 | [Google Drive](https://drive.google.com/open?id=1gXThUY_6pRG2HozAyMB_tY4urd0rIPZh) 68 | 69 | ## Requirements 70 | This code was tested with Tensorflow 1.4, CUDA 8.0 and Ubuntu 16.04. 71 | -------------------------------------------------------------------------------- /model/dataloader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class Dataloader(object): 4 | 5 | def __init__(self, file, image_height, image_width, is_training): 6 | self.file = file 7 | self.image_height = image_height 8 | self.image_width = image_width 9 | self.is_training = is_training 10 | 11 | self.left = None 12 | self.disp = None 13 | self.gt = None 14 | 15 | input_queue = tf.train.string_input_producer([self.file], shuffle=False) 16 | line_reader = tf.TextLineReader() 17 | _, line = line_reader.read(input_queue) 18 | split_line = tf.string_split([line], ';').values 19 | 20 | if is_training == 'True': 21 | self.left = tf.cast(self.read_image(split_line[0], [None, None, 3]), tf.float32) 22 | self.disp = tf.cast(self.read_image(split_line[1], [None, None, 1], dtype=tf.uint16), tf.float32)/256.0 23 | self.gt = tf.cast(self.read_image(split_line[2], [None, None, 1], dtype=tf.uint16), tf.float32)/ 256.0 24 | self.left_filename = split_line[0] 25 | self.disp_filename = split_line[1] 26 | self.gt_filename = split_line[2] 27 | else: 28 | self.left = tf.stack([tf.cast(self.read_image(split_line[0], [None, None, 3]), tf.float32)], 0) 29 | self.disp = tf.stack([tf.cast(self.read_image(split_line[1], [None, None, 1], dtype=tf.uint16), tf.float32)], 0)/256.0 30 | self.disp_filename = split_line[0] 31 | self.left_filename = split_line[1] 32 | 33 | def get_patches(self, patch_size, threshold, loc = None, glob = None): 34 | left_list = [] 35 | disp_list = [] 36 | gt_list = [] 37 | 38 | left_list.append(self.left) 39 | disp_list.append(self.disp) 40 | gt_list.append(self.gt) 41 | 42 | ksizes = [1, patch_size, patch_size, 1] 43 | strides = [1, 1, 1, 1] 44 | rates = [1, 1, 1, 1] 45 | padding = 'VALID' 46 | 47 | left_patches = tf.reshape( 48 | tf.extract_image_patches(left_list, ksizes, strides, rates, padding), [-1, patch_size, patch_size, 3]) 49 | 50 | disp_patches = tf.reshape( 51 | tf.extract_image_patches(disp_list, ksizes, strides, rates, padding), [-1, patch_size, patch_size, 1]) 52 | 53 | gt_patches = tf.reshape( 54 | tf.extract_image_patches(gt_list, ksizes, strides, rates, padding), [-1, patch_size, patch_size, 1]) 55 | 56 | mask = gt_patches[:, int(patch_size / 2):int(patch_size / 2) + 1, int(patch_size / 2):int(patch_size / 2) + 1,:] > 0 57 | valid = tf.tile(mask, [1, patch_size, patch_size, 1]) 58 | 59 | left_patches = tf.reshape(tf.boolean_mask(left_patches, tf.concat([valid, valid, valid], axis=3)), 60 | [-1, patch_size, patch_size, 3]) 61 | disp_patches = tf.reshape(tf.boolean_mask(disp_patches, valid), [-1, patch_size, patch_size, 1]) 62 | gt_patches = tf.reshape(tf.boolean_mask(gt_patches, valid), [-1, patch_size, patch_size, 1]) 63 | 64 | labels = tf.cast(tf.abs(disp_patches - gt_patches) <= threshold, tf.float32) 65 | 66 | if loc is not None and glob is not None: 67 | global_list = [] 68 | local_list = [] 69 | 70 | global_list.append(loc) 71 | local_list.append(glob) 72 | 73 | global_patches = tf.reshape( 74 | tf.extract_image_patches(global_list, ksizes, strides, rates, padding), [-1, patch_size, patch_size, 1]) 75 | 76 | local_patches = tf.reshape( 77 | tf.extract_image_patches(local_list, ksizes, strides, rates, padding), [-1, patch_size, patch_size, 1]) 78 | 79 | global_patches = tf.reshape(tf.boolean_mask(global_patches, valid), [-1, patch_size, patch_size, 1]) 80 | local_patches = tf.reshape(tf.boolean_mask(local_patches, valid), [-1, patch_size, patch_size, 1]) 81 | 82 | return left_patches, disp_patches, global_patches, local_patches, labels 83 | 84 | return left_patches, disp_patches, labels 85 | 86 | def get_crops(self, crop_height, crop_width, batch_size): 87 | crops = tf.random_crop(tf.concat([self.left, self.disp, self.gt], -1), [crop_height, crop_width, 5]) 88 | left_image, disp_image, gt_image = tf.split(crops, [3, 1, 1], axis=2) 89 | 90 | min_after_dequeue = 8 91 | num_threads = 8 92 | capacity = min_after_dequeue + 4 * batch_size 93 | left_image_batch, disp_image_batch, gt_image_batch = tf.train.shuffle_batch([left_image, disp_image, gt_image], 94 | batch_size, capacity, 95 | min_after_dequeue, num_threads) 96 | return left_image_batch, disp_image_batch, gt_image_batch 97 | 98 | def read_image(self, image_path, shape=None, dtype=tf.uint8): 99 | image_raw = tf.read_file(image_path) 100 | if dtype == tf.uint8: 101 | image = tf.image.decode_image(image_raw) 102 | else: 103 | image = tf.image.decode_png(image_raw, dtype=dtype) 104 | if shape is None: 105 | image.set_shape([None, None, 3]) 106 | else: 107 | image.set_shape(shape) 108 | if self.is_training == 'True': 109 | return tf.image.resize_image_with_crop_or_pad(image, self.image_height, self.image_width) 110 | return image 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /model/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import ops 4 | import time 5 | import os 6 | import cv2 7 | from dataloader import Dataloader 8 | 9 | 10 | def count_text_lines(file_path): 11 | f = open(file_path, 'r') 12 | lines = f.readlines() 13 | f.close() 14 | return len(lines) 15 | 16 | 17 | class LGC(object): 18 | 19 | def __init__(self, sess, late_fusion, epoch=14, threshold=3, initial_learning_rate=0.003, image_height=384, image_width=1280, crop_height=256, crop_width=512, 20 | batch_size=128, patch_size=9, is_training=False, model='LGC', model_name='model', dataset='./fileutils/training.txt'): 21 | 22 | self.sess = sess 23 | self.is_training = is_training 24 | self.batch_size = batch_size 25 | self.patch_size = patch_size 26 | self.image_height = image_height 27 | self.image_width = image_width 28 | self.crop_height = crop_height 29 | self.crop_width = crop_width 30 | self.radius = int(patch_size/2) 31 | self.threshold = threshold 32 | self.initial_learning_rate = initial_learning_rate 33 | self.model = model 34 | self.model_name = model_name 35 | self.epoch=epoch 36 | self.model_collection = [self.model_name] 37 | self.dataset = dataset 38 | self.late_fusion = late_fusion 39 | 40 | self.build_dataloader() 41 | self.build_model() 42 | 43 | if self.is_training == 'True': 44 | self.build_losses() 45 | self.build_summaries() 46 | 47 | 48 | def build_dataloader(self): 49 | self.dataloader = Dataloader(file=self.dataset, image_height=self.image_height, image_width=self.image_width, is_training=self.is_training) 50 | 51 | def build_model(self): 52 | if self.is_training=='True': #train 53 | if self.model == 'LFN' or self.model == 'CNN' or self.model == 'EFN': 54 | self.left = tf.placeholder(tf.float32, [self.batch_size, self.patch_size, self.patch_size, 3], name='left') 55 | self.disp = tf.placeholder(tf.float32, [self.batch_size, self.patch_size, self.patch_size, 1], name='disparity') 56 | self.gt = tf.placeholder(tf.float32, [self.batch_size, 1, 1, 1], name='gt') 57 | elif self.model == 'ConfNet': 58 | self.left, self.disp, self.gt = self.dataloader.get_crops(self.crop_height, self.crop_width, self.batch_size) 59 | else: 60 | self.left_full = tf.expand_dims(self.dataloader.left, 0) 61 | self.disp_full = tf.expand_dims(self.dataloader.disp, 0) 62 | self.left = tf.placeholder(tf.float32, [self.batch_size, self.patch_size, self.patch_size, 3], name='left') 63 | self.disp = tf.placeholder(tf.float32, [self.batch_size, self.patch_size, self.patch_size, 1], name='disparity') 64 | self.glob = tf.placeholder(tf.float32, [self.batch_size, self.patch_size, self.patch_size, 1], name='global') 65 | self.local = tf.placeholder(tf.float32, [self.batch_size, self.patch_size, self.patch_size, 1], name='local') 66 | self.gt = tf.placeholder(tf.float32, [self.batch_size, 1, 1, 1], name='gt') 67 | else: #test 68 | self.left = tf.placeholder(tf.float32, name='left') 69 | self.disp = tf.placeholder(tf.float32, name='disparity') 70 | 71 | self.learning_rate = tf.placeholder(tf.float32, shape=[]) 72 | 73 | {'CCNN': self.EFN, 74 | 'EFN': self.EFN, 75 | 'LFN': self.LFN, 76 | 'ConfNet': self.ConfNet, 77 | 'LGC': self.LGC}[self.model]() 78 | 79 | def EFN(self): #CCNN/EFN 80 | 81 | kernel_size = 3 82 | filters = 64 83 | fc_filters = 100 84 | 85 | if self.model == "EFN": 86 | print(" [*] Building EFN model...") 87 | nchannels = 4 88 | model_input = tf.concat([self.disp, self.left], axis=3) 89 | else: #CCNN 90 | print(" [*] Building CCNN model...") 91 | nchannels=1 92 | if self.model == 'LGC': 93 | disp = self.disp_full if self.is_training == 'True' else self.disp 94 | model_input = tf.pad(disp, [[0, 0], [self.radius, self.radius], [self.radius, self.radius], [0, 0]]) 95 | else: 96 | model_input = self.disp 97 | 98 | with tf.variable_scope('CCNN'): 99 | with tf.variable_scope("conv1"): 100 | conv1 = ops.conv2d(model_input, [kernel_size, kernel_size, nchannels, filters], 1, True, padding='VALID') 101 | 102 | with tf.variable_scope("conv2"): 103 | conv2 = ops.conv2d(conv1, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 104 | 105 | with tf.variable_scope("conv3"): 106 | conv3 = ops.conv2d(conv2, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 107 | 108 | with tf.variable_scope("conv4"): 109 | conv4 = ops.conv2d(conv3, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 110 | 111 | with tf.variable_scope("fully_connected_1"): 112 | fc1 = ops.conv2d(conv4, [1, 1, filters, fc_filters], 1, True, padding='VALID') 113 | 114 | with tf.variable_scope("fully_connected_2"): 115 | fc2 = ops.conv2d(fc1, [1, 1, fc_filters, fc_filters], 1, True, padding='VALID') 116 | 117 | with tf.variable_scope("prediction"): 118 | if self.model == 'LGC': 119 | self.local_prediction = tf.nn.sigmoid(ops.conv2d(fc2, [1, 1, fc_filters, 1], 1, False, padding='VALID')) 120 | else: 121 | self.prediction = ops.conv2d(fc2, [1, 1, fc_filters, 1], 1, False, padding='VALID') 122 | 123 | def LFN(self): 124 | print(" [*] Building LFN model...") 125 | 126 | kernel_size = 3 127 | filters = 64 128 | fc_filters = 100 129 | 130 | if self.model == 'LGC': 131 | disp, left = (self.disp_full, self.left_full) if self.is_training == 'True' else (self.disp, self.left) 132 | model_input_disp = tf.pad(disp, [[0, 0], [self.radius, self.radius], [self.radius, self.radius], [0, 0]]) 133 | model_input_left = tf.pad(left, [[0, 0], [self.radius, self.radius], [self.radius, self.radius], [0, 0]]) 134 | else: 135 | model_input_disp = self.disp 136 | model_input_left = self.left 137 | 138 | with tf.variable_scope('LFN'): 139 | with tf.variable_scope('disparity'): 140 | with tf.variable_scope("conv1"): 141 | conv1_disp = ops.conv2d(model_input_disp, [kernel_size, kernel_size, 1, filters], 1, True, padding='VALID') 142 | 143 | with tf.variable_scope("conv2"): 144 | conv2_disp = ops.conv2d(conv1_disp, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 145 | 146 | with tf.variable_scope("conv3"): 147 | conv3_disp = ops.conv2d(conv2_disp, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 148 | 149 | with tf.variable_scope("conv4"): 150 | conv4_disp = ops.conv2d(conv3_disp, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 151 | 152 | with tf.variable_scope('RGB'): 153 | with tf.variable_scope("conv1"): 154 | conv1_left = ops.conv2d(model_input_left, [kernel_size, kernel_size, 3, filters], 1, True, padding='VALID') 155 | 156 | with tf.variable_scope("conv2"): 157 | conv2_left = ops.conv2d(conv1_left, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 158 | 159 | with tf.variable_scope("conv3"): 160 | conv3_left = ops.conv2d(conv2_left, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 161 | 162 | with tf.variable_scope("conv4"): 163 | conv4_left = ops.conv2d(conv3_left, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 164 | 165 | with tf.variable_scope("fully_connected_1"): 166 | fc1 = ops.conv2d(tf.concat([conv4_left, conv4_disp], axis=3), [1, 1, 2 * filters, fc_filters], 1, True, padding='VALID') 167 | 168 | with tf.variable_scope("fully_connected_2"): 169 | fc2 = ops.conv2d(fc1, [1, 1, fc_filters, fc_filters], 1, True, padding='VALID') 170 | 171 | with tf.variable_scope("prediction"): 172 | if self.model == 'LGC': 173 | self.local_prediction = tf.nn.sigmoid(ops.conv2d(fc2, [1, 1, fc_filters, 1], 1, False, padding='VALID')) 174 | else: 175 | self.prediction = ops.conv2d(fc2, [1, 1, fc_filters, 1], 1, False, padding='VALID') 176 | 177 | def LGC(self): 178 | print(" [*] Building LGC model...") 179 | 180 | kernel_size = 3 181 | filters = 64 182 | fc_filters = 100 183 | scale=255.0 184 | 185 | self.LFN() if self.late_fusion else self.EFN() 186 | self.ConfNet() 187 | 188 | model_input_disp = self.disp 189 | model_input_local, model_input_global = (self.local, self.glob) if self.is_training == 'True' else (self.local_prediction, self.global_prediction) 190 | 191 | with tf.variable_scope('LGC'): 192 | with tf.variable_scope('disparity'): 193 | 194 | with tf.variable_scope("conv1"): 195 | conv1_disp = ops.conv2d(model_input_disp, [kernel_size, kernel_size, 1, filters], 1, True, padding='VALID') 196 | 197 | with tf.variable_scope("conv2"): 198 | conv2_disp = ops.conv2d(conv1_disp, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 199 | 200 | with tf.variable_scope("conv3"): 201 | conv3_disp = ops.conv2d(conv2_disp, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 202 | 203 | with tf.variable_scope("conv4"): 204 | conv4_disp = ops.conv2d(conv3_disp, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 205 | 206 | with tf.variable_scope('local'): 207 | with tf.variable_scope("conv1"): 208 | conv1_local = ops.conv2d(model_input_local*scale, [kernel_size, kernel_size, 1, filters], 1, True, padding='VALID') 209 | 210 | with tf.variable_scope("conv2"): 211 | conv2_local = ops.conv2d(conv1_local, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 212 | 213 | with tf.variable_scope("conv3"): 214 | conv3_local = ops.conv2d(conv2_local, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 215 | 216 | with tf.variable_scope("conv4"): 217 | conv4_local = ops.conv2d(conv3_local, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 218 | 219 | with tf.variable_scope('global'): 220 | with tf.variable_scope("conv1"): 221 | conv1_global = ops.conv2d(model_input_global*scale, [kernel_size, kernel_size, 1, filters], 1, True, padding='VALID') 222 | 223 | with tf.variable_scope("conv2"): 224 | conv2_global = ops.conv2d(conv1_global, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 225 | 226 | with tf.variable_scope("conv3"): 227 | conv3_global = ops.conv2d(conv2_global, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 228 | 229 | with tf.variable_scope("conv4"): 230 | conv4_global = ops.conv2d(conv3_global, [kernel_size, kernel_size, filters, filters], 1, True, padding='VALID') 231 | 232 | with tf.variable_scope("fully_connected_1"): 233 | fc1 = ops.conv2d(tf.concat([conv4_global, conv4_local, conv4_disp], axis=3), [1, 1, 3 * filters, fc_filters], 1, True, padding='VALID') 234 | 235 | with tf.variable_scope("fully_connected_2"): 236 | fc2 = ops.conv2d(fc1, [1, 1, fc_filters, fc_filters], 1, True, padding='VALID') 237 | 238 | with tf.variable_scope("prediction"): 239 | self.prediction = ops.conv2d(fc2, [1, 1, fc_filters, 1], 1, False, padding='VALID') 240 | 241 | def ConfNet(self): 242 | print(" [*] Building ConfNet model...") 243 | 244 | kernel_size = 3 245 | filters = 32 246 | 247 | if (self.model == "ConfNet") or (self.model == "LGC" and self.is_training == 'False'): 248 | left = self.left 249 | disp = self.disp 250 | else: 251 | left = self.left_full 252 | disp = self.disp_full 253 | 254 | with tf.variable_scope('ConfNet'): 255 | with tf.variable_scope('RGB'): 256 | with tf.variable_scope("conv1"): 257 | self.conv1_RGB = ops.conv2d(left, [kernel_size, kernel_size, 3, filters], 1, True, padding='SAME') 258 | 259 | with tf.variable_scope('disparity'): 260 | with tf.variable_scope("conv1"): 261 | self.conv1_disparity = ops.conv2d(disp, [kernel_size, kernel_size, 1, filters], 1, True, padding='SAME') 262 | 263 | model_input = tf.concat([self.conv1_RGB, self.conv1_disparity], axis=3) 264 | 265 | self.net1, self.scale1 = ops.encoding_unit('1', model_input, filters * 2) 266 | self.net2, self.scale2 = ops.encoding_unit('2', self.net1, filters * 4) 267 | self.net3, self.scale3 = ops.encoding_unit('3', self.net2, filters * 8) 268 | self.net4, self.scale4 = ops.encoding_unit('4', self.net3, filters * 16) 269 | 270 | self.net5 = ops.decoding_unit('4', self.net4, num_outputs=filters * 8, forwards=self.scale4) 271 | self.net6 = ops.decoding_unit('3', self.net5, num_outputs=filters * 4, forwards=self.scale3) 272 | self.net7 = ops.decoding_unit('2', self.net6, num_outputs=filters * 2, forwards=self.scale2) 273 | self.net8 = ops.decoding_unit('1', self.net7, num_outputs=filters, forwards=model_input) 274 | 275 | if self.model == 'LGC': 276 | self.global_prediction = tf.nn.sigmoid(ops.conv2d(self.net8, [kernel_size, kernel_size, filters, 1], 1, False, padding='SAME')) 277 | else: 278 | self.prediction = ops.conv2d(self.net8, [kernel_size, kernel_size, filters, 1], 1, False, padding='SAME') 279 | 280 | def build_losses(self): 281 | with tf.variable_scope('loss'): 282 | if self.model == 'ConfNet': 283 | self.mask = tf.cast(tf.not_equal(self.gt, 0.0), dtype=tf.float32) 284 | self.labels = tf.cast(tf.abs(tf.subtract(self.gt, self.disp)) <= self.threshold, dtype=tf.float32) 285 | self.loss = tf.losses.sigmoid_cross_entropy(self.labels, self.prediction, self.mask) 286 | else: 287 | self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.gt, logits=self.prediction)) 288 | 289 | def train(self, args): 290 | if self.model == 'LGC': 291 | self.train_LGC(args) 292 | elif self.model == 'ConfNet': 293 | self.train_global(args) 294 | else: 295 | self.train_local(args) 296 | 297 | def train_local(self, args): 298 | print("\n [*] Training....") 299 | 300 | if not os.path.exists(args.log_directory): 301 | os.makedirs(args.log_directory) 302 | 303 | self.vars = tf.all_variables() 304 | self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss, var_list=self.vars) 305 | self.saver = tf.train.Saver() 306 | self.summary_op = tf.summary.merge_all(self.model_collection[0]) 307 | self.writer = tf.summary.FileWriter(args.log_directory + "/summary/", graph=self.sess.graph) 308 | 309 | total_num_parameters = 0 310 | for variable in tf.trainable_variables(): 311 | total_num_parameters += np.array(variable.get_shape().as_list()).prod() 312 | print(" [*] Number of trainable parameters: {}".format(total_num_parameters)) 313 | 314 | self.sess.run(tf.global_variables_initializer()) 315 | self.sess.run(tf.local_variables_initializer()) 316 | 317 | print(' [*] Loading training set...') 318 | patches_left, patches_disp, patches_gt = self.dataloader.get_patches(self.patch_size, self.threshold) 319 | line = self.dataloader.disp_filename 320 | num_samples = count_text_lines(self.dataset) 321 | 322 | print(' [*] Training data loaded successfully') 323 | epoch = 0 324 | iteration = 0 325 | lr = self.initial_learning_rate 326 | 327 | coord = tf.train.Coordinator() 328 | threads = tf.train.start_queue_runners(coord=coord) 329 | 330 | print(" [*] Start Training...") 331 | while epoch < self.epoch: 332 | for i in range(num_samples): 333 | batch_left, batch_disp, batch_gt, filename = self.sess.run([patches_left, patches_disp, patches_gt, line]) 334 | print(" [*] Training image: " + filename) 335 | 336 | step_image = 0 337 | while step_image < len(batch_disp): 338 | offset = (step_image * self.batch_size) % (batch_disp.shape[0] - self.batch_size) 339 | batch_reference = batch_left[offset:(offset + self.batch_size), :, :, :] 340 | batch_data = batch_disp[offset:(offset + self.batch_size), :, :, :] 341 | batch_labels = batch_gt[offset:(offset + self.batch_size), self.radius:self.radius+1, self.radius:self.radius+1, :] 342 | 343 | _, loss, summary_str = self.sess.run([self.optimizer, self.loss, self.summary_op], feed_dict={self.left:batch_reference, self.disp:batch_data, self.gt:batch_labels, self.learning_rate: lr}) 344 | 345 | print("Epoch: [%2d]" % epoch + ", Image: [%2d]" % i + ", Iter: [%2d]" % iteration + ", Loss: [%2f]" % loss ) 346 | self.writer.add_summary(summary_str, global_step=iteration) 347 | iteration = iteration + 1 348 | step_image = step_image + self.batch_size 349 | 350 | epoch = epoch + 1 351 | 352 | if np.mod(epoch, args.save_epoch_freq) == 0: 353 | self.saver.save(self.sess, args.log_directory + '/' + self.model_name, global_step=iteration) 354 | 355 | if epoch == 10: 356 | lr = lr/10 357 | 358 | coord.request_stop() 359 | coord.join(threads) 360 | 361 | def train_global(self, args): 362 | print("\n [*] Training....") 363 | 364 | if not os.path.exists(args.log_directory): 365 | os.makedirs(args.log_directory) 366 | 367 | self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss, var_list=tf.all_variables()) 368 | self.saver = tf.train.Saver() 369 | self.summary_op = tf.summary.merge_all(self.model_collection[0]) 370 | self.writer = tf.summary.FileWriter(args.log_directory + "/summary/", graph=self.sess.graph) 371 | 372 | total_num_parameters = 0 373 | for variable in tf.trainable_variables(): 374 | total_num_parameters += np.array(variable.get_shape().as_list()).prod() 375 | print(" [*] Number of trainable parameters: {}".format(total_num_parameters)) 376 | 377 | self.sess.run(tf.global_variables_initializer()) 378 | self.sess.run(tf.local_variables_initializer()) 379 | 380 | print(' [*] Loading training set...') 381 | line = self.dataloader.disp_filename 382 | num_samples = count_text_lines(self.dataset) 383 | 384 | steps_per_epoch = np.ceil(num_samples / self.batch_size).astype(np.int32) 385 | num_total_steps = self.epoch * steps_per_epoch 386 | lr = self.initial_learning_rate 387 | 388 | coord = tf.train.Coordinator() 389 | threads = tf.train.start_queue_runners(coord=coord) 390 | 391 | print(" [*] Start Training...") 392 | for step in range(0, num_total_steps): 393 | 394 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={self.learning_rate: lr}) 395 | 396 | print("Step: [%2d]" % step + "/[%2d]" % num_total_steps + ", Loss: [%2f]" % loss) 397 | 398 | if step % 2 == 0: 399 | summary_str = self.sess.run(self.summary_op, feed_dict={self.learning_rate: lr}) 400 | self.writer.add_summary(summary_str, global_step=step) 401 | 402 | if step % 5000 == 0: 403 | self.saver.save(self.sess, args.log_directory + '/' + self.model_name, global_step=step) 404 | 405 | if step == steps_per_epoch * 1000: 406 | lr = lr/10 407 | 408 | self.saver.save(self.sess, args.log_directory + '/' + self.model_name, global_step=num_total_steps) 409 | 410 | coord.request_stop() 411 | coord.join(threads) 412 | 413 | def train_LGC(self, args): 414 | print("\n [*] Training....") 415 | 416 | if not os.path.exists(args.log_directory): 417 | os.makedirs(args.log_directory) 418 | 419 | self.vars = tf.all_variables() 420 | self.vars_global = [k for k in self.vars if k.name.startswith('ConfNet')] 421 | self.vars_local = [k for k in self.vars if (k.name.startswith('CCNN') or k.name.startswith('LFN'))] 422 | self.vars_lgc = [k for k in self.vars if k.name.startswith('LGC')] 423 | 424 | self.saver_global = tf.train.Saver(self.vars_global) 425 | self.saver_local = tf.train.Saver(self.vars_local) 426 | self.saver = tf.train.Saver(self.vars_lgc) 427 | 428 | self.summary_op = tf.summary.merge_all(self.model_collection[0]) 429 | self.writer = tf.summary.FileWriter(args.log_directory + "/summary/", graph=self.sess.graph) 430 | 431 | total_num_parameters = 0 432 | for variable in self.vars_lgc: 433 | total_num_parameters += np.array(variable.get_shape().as_list()).prod() 434 | print(" [*] Number of trainable parameters: {}".format(total_num_parameters)) 435 | 436 | self.sess.run(tf.global_variables_initializer()) 437 | self.sess.run(tf.local_variables_initializer()) 438 | 439 | self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss, var_list=self.vars_lgc) 440 | 441 | if args.checkpoint_path: 442 | self.saver_global.restore(self.sess, args.checkpoint_path[0]) 443 | self.saver_local.restore(self.sess, args.checkpoint_path[1]) 444 | print(" [*] Load model: SUCCESS") 445 | else: 446 | print(" [*] Load failed...neglected") 447 | print(" [*] End Testing...") 448 | raise ValueError('args.checkpoint_path is None') 449 | 450 | print(' [*] Loading training set...') 451 | patches_left, patches_disp, patches_global, patches_local, patches_gt = self.dataloader.get_patches(self.patch_size, self.threshold, self.global_prediction[0], self.local_prediction[0]) 452 | left = self.dataloader.left 453 | disp = self.dataloader.disp 454 | gt = self.dataloader.gt 455 | line = self.dataloader.disp_filename 456 | num_samples = count_text_lines(self.dataset) 457 | 458 | print(' [*] Training data loaded successfully') 459 | epoch = 0 460 | iteration = 0 461 | lr = self.initial_learning_rate 462 | 463 | coord = tf.train.Coordinator() 464 | threads = tf.train.start_queue_runners(coord=coord) 465 | 466 | left = tf.expand_dims(left, 0) 467 | disp = tf.expand_dims(disp, 0) 468 | gt = tf.expand_dims(gt, 0) 469 | 470 | print(" [*] Start Training...") 471 | while epoch < self.epoch: 472 | for i in range(num_samples): 473 | batch_left, batch_disp, batch_gt, batch_global, batch_local, filename = self.sess.run([patches_left, patches_disp, patches_gt, patches_global, patches_local, line]) 474 | 475 | print(" [*] Training image: " + filename) 476 | 477 | step_image = 0 478 | while step_image < len(batch_disp): 479 | offset = (step_image * self.batch_size) % (batch_disp.shape[0] - self.batch_size) 480 | batch_reference = batch_left[offset:(offset + self.batch_size), :, :, :] 481 | batch_data = batch_disp[offset:(offset + self.batch_size), :, :, :] 482 | batch_glob = batch_global[offset:(offset + self.batch_size), :, :, :] 483 | batch_loc = batch_local[offset:(offset + self.batch_size), :, :, :] 484 | batch_labels = batch_gt[offset:(offset + self.batch_size), self.radius:self.radius+1, self.radius:self.radius+1, :] 485 | 486 | _, loss, summary_str = self.sess.run([self.optimizer, self.loss, self.summary_op], 487 | feed_dict={self.left:batch_reference, self.disp:batch_data, self.glob:batch_glob, self.local:batch_loc, self.gt:batch_labels, self.learning_rate: lr}) 488 | 489 | print("Epoch: [%2d]" % epoch + ", Image: [%2d]" % i + ", Iter: [%2d]" % iteration + ", Loss: [%2f]" % loss ) 490 | self.writer.add_summary(summary_str, global_step=iteration) 491 | iteration = iteration + 1 492 | step_image = step_image + self.batch_size 493 | 494 | epoch = epoch + 1 495 | 496 | if np.mod(epoch, args.save_epoch_freq) == 0: 497 | self.saver.save(self.sess, args.log_directory + '/' + self.model_name, global_step=iteration) 498 | 499 | if epoch == 10: 500 | lr = lr/10 501 | 502 | coord.request_stop() 503 | coord.join(threads) 504 | 505 | def test(self, args): 506 | print("[*] Testing....") 507 | 508 | if not os.path.exists(args.output_path): 509 | os.makedirs(args.output_path) 510 | 511 | self.sess.run(tf.global_variables_initializer()) 512 | self.sess.run(tf.local_variables_initializer()) 513 | 514 | if self.model == 'LGC': 515 | self.vars = tf.all_variables() 516 | self.vars_global = [k for k in self.vars if k.name.startswith('ConfNet')] 517 | self.vars_local = [k for k in self.vars if (k.name.startswith('CCNN') or k.name.startswith('LFN')) ] 518 | self.vars_lgc = [k for k in self.vars if k.name.startswith('LGC')] 519 | 520 | self.saver_global = tf.train.Saver(self.vars_global) 521 | self.saver_local = tf.train.Saver(self.vars_local) 522 | self.saver_LGC = tf.train.Saver(self.vars_lgc) 523 | 524 | if args.checkpoint_path[0] and args.checkpoint_path[1] and args.checkpoint_path[2]: 525 | self.saver_global.restore(self.sess, args.checkpoint_path[0]) 526 | self.saver_local.restore(self.sess, args.checkpoint_path[1]) 527 | self.saver_LGC.restore(self.sess, args.checkpoint_path[2]) 528 | 529 | print(" [*] Load model: SUCCESS") 530 | else: 531 | print(" [*] Load failed...neglected") 532 | print(" [*] End Testing...") 533 | raise ValueError('args.checkpoint_path[0] or args.checkpoint_path[1] or args.checkpoint_path[2] is None') 534 | else: 535 | self.saver = tf.train.Saver() 536 | 537 | if args.checkpoint_path: 538 | self.saver.restore(self.sess, args.checkpoint_path[0]) 539 | print(" [*] Load model: SUCCESS") 540 | else: 541 | print(" [*] Load failed...neglected") 542 | print(" [*] End Testing...") 543 | raise ValueError('args.checkpoint_path is None') 544 | 545 | disp_batch = self.dataloader.disp 546 | left_batch = self.dataloader.left 547 | line = self.dataloader.disp_filename 548 | num_samples = count_text_lines(self.dataset) 549 | 550 | if self.model == 'ConfNet': 551 | prediction = tf.nn.sigmoid(self.prediction) 552 | else: 553 | prediction = tf.pad(tf.nn.sigmoid(self.prediction), tf.constant([[0, 0], [self.radius, self.radius], [self.radius, self.radius], [0, 0]]), "CONSTANT") 554 | 555 | coord = tf.train.Coordinator() 556 | threads = tf.train.start_queue_runners(coord=coord) 557 | 558 | print(" [*] Start Testing...") 559 | for step in range(num_samples): 560 | batch_left, batch_disp, filename = self.sess.run([left_batch, disp_batch, line]) 561 | if self.model == 'ConfNet' or self.model == 'LGC': 562 | val_disp, hpad, wpad = ops.pad(batch_disp) 563 | val_left, _, _ = ops.pad(batch_left) 564 | 565 | print(" [*] Test image:" + filename) 566 | start = time.time() 567 | if self.model == 'ConfNet' or self.model == 'LGC': 568 | confidence = self.sess.run(prediction, feed_dict={self.left: val_left, self.disp: val_disp}) 569 | confidence = ops.depad(confidence, hpad, wpad) 570 | else: 571 | confidence = self.sess.run(prediction, feed_dict={self.left: batch_left, self.disp: batch_disp}) 572 | current = time.time() 573 | output_file = args.output_path + filename.strip().split('/')[-1] 574 | 575 | cv2.imwrite(output_file, (confidence[0] * 65535.0).astype('uint16')) 576 | print(" [*] Confidence prediction saved in:" + output_file) 577 | print(" [*] Running time:" + str(current - start) + "s") 578 | 579 | coord.request_stop() 580 | coord.join(threads) 581 | 582 | def build_summaries(self): 583 | tf.summary.scalar('loss', self.loss, collections=self.model_collection) 584 | tf.summary.scalar('learning_rate', self.learning_rate, collections=self.model_collection) 585 | if self.model == 'ConfNet': 586 | tf.summary.image('left', self.left, collections=self.model_collection) 587 | tf.summary.image('confidence', tf.nn.sigmoid(self.prediction), collections=self.model_collection) 588 | tf.summary.image('disparity', self.disp, collections=self.model_collection) 589 | tf.summary.image('labels', self.labels * self.mask, collections=self.model_collection) 590 | 591 | 592 | --------------------------------------------------------------------------------