├── DN ├── direction_model.py ├── ioUtils.py ├── lossFunction.py └── train_direction.py ├── E2E ├── e2e_model.py ├── forward.py ├── forward_e2e.py ├── io_utils.py ├── loss_function.py ├── main.py ├── model_io.py ├── network_init.py ├── post_process.py └── train.py ├── LICENSE ├── README ├── README.md ├── WTN ├── depth_model.py ├── ioUtils.py ├── lossFunction.py └── train_depth.py ├── example ├── PSPNet │ ├── frankfurt_000000_002196.png │ ├── frankfurt_000000_002963.png │ ├── frankfurt_000001_031266.png │ └── munster_000067_000019.png ├── inputImages │ ├── frankfurt_000000_002196_leftImg8bit.png │ ├── frankfurt_000000_002963_leftImg8bit.png │ ├── frankfurt_000001_031266_leftImg8bit.png │ └── munster_000067_000019_leftImg8bit.png ├── output │ ├── frankfurt_000000_002196.mat │ ├── frankfurt_000000_002196.png │ ├── frankfurt_000000_002963.mat │ ├── frankfurt_000000_002963.png │ ├── frankfurt_000001_031266.mat │ ├── frankfurt_000001_031266.png │ ├── munster_000067_000019.mat │ └── munster_000067_000019.png └── sample_list.txt └── matlab ├── batch_generate_cityscapes.m ├── batch_generate_cityscapes_PSP_ss.m └── generate_GT_cityscapes_unified.m /DN/direction_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import ceil 3 | import tensorflow as tf 4 | import scipy.io as sio 5 | 6 | VGG_MEAN = [103.939, 116.779, 123.68] 7 | 8 | class Network: 9 | def __init__(self, params, wd=5e-5, modelWeightPaths=None): 10 | self._params = params 11 | self._images = tf.placeholder("float") 12 | self._batch_images = tf.expand_dims(self._images, 0) 13 | self._gt = tf.placeholder("float") 14 | self._batch_gt = tf.expand_dims(self._gt, 0) 15 | self._wd = wd 16 | self.modelDict = {} 17 | 18 | if modelWeightPaths is not None: 19 | for path in modelWeightPaths: 20 | self.modelDict.update(sio.loadmat(path)) 21 | 22 | def build(self, inputData, ss=None, ssMask=None, keepProb=1.0): 23 | # ss_3 = tf.tile(tf.expand_dims(ss, -1), [1, 1, 1, 3]) 24 | inputData = inputData * tf.expand_dims(ss, -1) 25 | 26 | inputData = tf.concat(3, [inputData, tf.expand_dims(ssMask,-1)]) 27 | 28 | self.conv1_1 = self._conv_layer(inputData, params=self._params["direction/conv1_1"]) 29 | self.conv1_2 = self._conv_layer(self.conv1_1, params=self._params["direction/conv1_2"]) 30 | self.pool1 = self._max_pool(self.conv1_2, 'direction/pool1') 31 | 32 | self.conv2_1 = self._conv_layer(self.pool1, params=self._params["direction/conv2_1"]) 33 | self.conv2_2 = self._conv_layer(self.conv2_1, params=self._params["direction/conv2_2"]) 34 | self.pool2 = self._max_pool(self.conv2_2, 'direction/pool2') 35 | 36 | self.conv3_1 = self._conv_layer(self.pool2, params=self._params["direction/conv3_1"]) 37 | self.conv3_2 = self._conv_layer(self.conv3_1, params=self._params["direction/conv3_2"]) 38 | self.conv3_3 = self._conv_layer(self.conv3_2, params=self._params["direction/conv3_3"]) 39 | self.pool3 = self._average_pool(self.conv3_3, 'direction/pool3') 40 | 41 | self.conv4_1 = self._conv_layer(self.pool3, params=self._params["direction/conv4_1"]) 42 | self.conv4_2 = self._conv_layer(self.conv4_1, params=self._params["direction/conv4_2"]) 43 | self.conv4_3 = self._conv_layer(self.conv4_2, params=self._params["direction/conv4_3"]) 44 | self.pool4 = self._average_pool(self.conv4_3, 'direction/pool4') 45 | 46 | self.conv5_1 = self._conv_layer(self.pool4, params=self._params["direction/conv5_1"]) 47 | self.conv5_2 = self._conv_layer(self.conv5_1, params=self._params["direction/conv5_2"]) 48 | self.conv5_3 = self._conv_layer(self.conv5_2, params=self._params["direction/conv5_3"]) 49 | 50 | print "built all CNN layers!" 51 | 52 | self.fcn5_1 = self._conv_layer(self.conv5_3, params=self._params["direction/fcn5_1"]) 53 | self.fcn5_2 = self._conv_layer(self.fcn5_1, params=self._params["direction/fcn5_2"]) 54 | self.fcn5_3 = self._conv_layer(self.fcn5_2, params=self._params["direction/fcn5_3"]) 55 | 56 | self.fcn4_1 = self._conv_layer(self.conv4_3, params=self._params["direction/fcn4_1"]) 57 | self.fcn4_2 = self._conv_layer(self.fcn4_1, params=self._params["direction/fcn4_2"]) 58 | self.fcn4_3 = self._conv_layer(self.fcn4_2, params=self._params["direction/fcn4_3"]) 59 | 60 | self.fcn3_1 = self._conv_layer(self.conv3_3, params=self._params["direction/fcn3_1"]) 61 | self.fcn3_2 = self._conv_layer(self.fcn3_1, params=self._params["direction/fcn3_2"]) 62 | self.fcn3_3 = self._conv_layer(self.fcn3_2, params=self._params["direction/fcn3_3"]) 63 | 64 | print "built all FCN layers!" 65 | 66 | self.upscore5_3 = self._upscore_layer(self.fcn5_3, params=self._params["direction/upscore5_3"], 67 | shape=tf.shape(self.fcn3_3)) 68 | self.upscore4_3 = self._upscore_layer(self.fcn4_3, params=self._params["direction/upscore4_3"], 69 | shape=tf.shape(self.fcn3_3)) 70 | 71 | self.fuse3 = tf.concat(3, [self.fcn3_3, self.upscore5_3, self.upscore4_3], name="direction/fuse3") 72 | self.fuse3_1 = self._conv_layer(self.fuse3, params=self._params["direction/fuse3_1"]) 73 | self.fuse3_2 = self._conv_layer(self.fuse3_1, params=self._params["direction/fuse3_2"]) 74 | self.fuse3_3 = self._conv_layer(self.fuse3_2, params=self._params["direction/fuse3_3"]) 75 | print "built all fusing layers" 76 | 77 | self.output = self._upscore_layer(self.fuse3_3, params=self._params["direction/upscore3_1"], 78 | shape=tf.shape(inputData)) 79 | 80 | # ss_2 = tf.tile(tf.expand_dims(ss,-1),[1,1,1,2]) 81 | # self.output = self.output * ss_2 82 | self.output = self.output * tf.expand_dims(ss, -1) 83 | 84 | self.output = tf.nn.l2_normalize(self.output, 3, epsilon=1e-20) 85 | 86 | print "built the output layer!" 87 | # LAYER BUILDING 88 | 89 | def _max_pool(self, bottom, name): 90 | return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 91 | padding='SAME', name=name) 92 | 93 | def _average_pool(self, bottom, name): 94 | return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 95 | padding='SAME', name=name) 96 | 97 | def _conv_layer(self, bottom, params, keepProb=1.0): 98 | with tf.variable_scope(params["name"]) as scope: 99 | filt = self.get_conv_filter(params) 100 | 101 | if "dr" in params.keys(): 102 | conv = tf.nn.atrous_conv2d(bottom, filt, params["dr"], padding="SAME") 103 | else: 104 | conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') 105 | 106 | conv_biases = self.get_bias(params) 107 | 108 | if params["act"] == "relu": 109 | activation = tf.nn.relu(tf.nn.bias_add(conv, conv_biases)) 110 | elif params["act"] == "lin": 111 | activation = tf.nn.bias_add(conv, conv_biases) 112 | elif params["act"] == "tanh": 113 | activation = tf.nn.tanh(tf.nn.bias_add(conv, conv_biases)) 114 | 115 | if not isinstance(keepProb, (int, long, float)): 116 | activation = tf.nn.dropout(activation, keep_prob=keepProb, seed=0) 117 | 118 | return activation 119 | 120 | # WEIGHTS GENERATION 121 | 122 | def get_bias(self, params): 123 | if params["name"]+"/biases" in self.modelDict: 124 | init = tf.constant_initializer(value=self.modelDict[params["name"]+"/biases"], dtype=tf.float32) 125 | print "loaded " + params["name"] + "/biases" 126 | else: 127 | init = tf.constant_initializer(value=0.0) 128 | print "generated " + params["name"] + "/biases" 129 | 130 | var = tf.get_variable(name="biases", initializer=init, shape=params["shape"][3]) 131 | 132 | return var 133 | 134 | def get_conv_filter(self, params): 135 | if params["name"]+"/weights" in self.modelDict: 136 | init = tf.constant_initializer(value=self.modelDict[params["name"]+"/weights"], dtype=tf.float32) 137 | var = tf.get_variable(name="weights", initializer=init, shape=params["shape"]) 138 | print "loaded " + params["name"]+"/weights" 139 | else: 140 | if params["std"]: 141 | stddev = params["std"] 142 | else: 143 | fanIn = params["shape"][0]*params["shape"][1]*params["shape"][2] 144 | stddev = (2/float(fanIn))**0.5 145 | 146 | init = tf.truncated_normal(shape=params["shape"], stddev=stddev, seed=0) 147 | var = tf.get_variable(name="weights", initializer=init) 148 | print "generated " + params["name"] + "/weights" 149 | 150 | if not tf.get_variable_scope().reuse: 151 | weightDecay = tf.mul(tf.nn.l2_loss(var), self._wd, 152 | name='weight_loss') 153 | tf.add_to_collection('losses', weightDecay) 154 | 155 | return var 156 | 157 | def _upscore_layer(self, bottom, shape, params): 158 | strides = [1, params["stride"], params["stride"], 1] 159 | with tf.variable_scope(params["name"]): 160 | in_features = bottom.get_shape()[3].value 161 | 162 | new_shape = [shape[0], shape[1], shape[2], params["outputChannels"]] 163 | output_shape = tf.pack(new_shape) 164 | 165 | f_shape = [params["ksize"], params["ksize"], params["outputChannels"], in_features] 166 | 167 | weights = self.get_deconv_filter(f_shape, params) 168 | deconv = tf.nn.conv2d_transpose(bottom, weights, output_shape, 169 | strides=strides, padding='SAME') 170 | 171 | return deconv 172 | 173 | def get_deconv_filter(self, f_shape, params): 174 | if params["name"]+"/up_filter" in self.modelDict: 175 | init = tf.constant_initializer(value=self.modelDict[params["name"]+"/up_filter"], dtype=tf.float32) 176 | else: 177 | width = f_shape[0] 178 | height = f_shape[0] 179 | f = ceil(width / 2.0) 180 | c = (2 * f - 1 - f % 2) / (2.0 * f) 181 | bilinear = np.zeros([f_shape[0], f_shape[1]]) 182 | for x in range(width): 183 | for y in range(height): 184 | value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) 185 | bilinear[x, y] = value 186 | weights = np.zeros(f_shape) 187 | for i in range(f_shape[2]): 188 | weights[:, :, i, i] = bilinear 189 | 190 | init = tf.constant_initializer(value=weights, 191 | dtype=tf.float32) 192 | 193 | return tf.get_variable(name="up_filter", initializer=init, shape=f_shape) 194 | -------------------------------------------------------------------------------- /DN/ioUtils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import skimage 3 | import skimage.io 4 | import scipy.io as sio 5 | import skimage.transform 6 | import sys 7 | 8 | np.random.seed(0) 9 | 10 | VGG_MEAN = [103.939, 116.779, 123.68] 11 | 12 | 13 | def read_mat(path): 14 | return np.load(path) 15 | 16 | 17 | def write_mat(path, m): 18 | np.save(path, m) 19 | 20 | 21 | def read_ids(path): 22 | return [line.rstrip('\n') for line in open(path)] 23 | 24 | 25 | class Batch_Feeder: 26 | def __init__(self, dataset, indices, train, batchSize, padWidth=None, padHeight=None, flip=False, keepEmpty=True): 27 | self._epochs_completed = 0 28 | self._index_in_epoch = 0 29 | self._dataset = dataset 30 | self._indices = indices 31 | self._train = train 32 | self._batchSize = batchSize 33 | self._padWidth = padWidth 34 | self._padHeight = padHeight 35 | self._flip = flip 36 | self._keepEmpty = keepEmpty 37 | 38 | def set_paths(self, idList=None, imageDir=None, gtDir=None, ssDir=None): 39 | self._paths = [] 40 | 41 | if self._train: 42 | for id in idList: 43 | self._paths.append([id, imageDir + '/' + id + '_leftImg8bit.png', 44 | gtDir + '/' + id + '_unified_GT.mat', 45 | ssDir + '/' + id + '_unified_ss.mat']) 46 | self.shuffle() 47 | else: 48 | for id in idList: 49 | self._paths.append([id, imageDir + '/' + id + '_leftImg8bit.png', 50 | ssDir + '/' + id + '_unified_ss.mat']) 51 | 52 | self._numData = len(self._paths) 53 | 54 | if self._numData < self._batchSize: 55 | self._batchSize = self._numData 56 | 57 | def shuffle(self): 58 | np.random.shuffle(self._paths) 59 | 60 | def next_batch(self): 61 | idBatch = [] 62 | imageBatch = [] 63 | gtBatch = [] 64 | ssBatch = [] 65 | ssMaskBatch = [] 66 | weightBatch = [] 67 | 68 | if self._train: 69 | while(len(idBatch) < self._batchSize): 70 | ss = (sio.loadmat(self._paths[self._index_in_epoch][3])['mask']).astype(float) 71 | ssMask = ss 72 | ss = np.sum(ss[:,:,self._indices], 2) 73 | 74 | background = np.zeros(ssMask.shape[0:2] + (1,)) 75 | ssMask = np.concatenate((ssMask[:,:,[1,2,3,4]], background, ssMask[:,:,[0,5,6,7]]), axis=-1) 76 | ssMask = np.argmax(ssMask, axis=-1) 77 | ssMask = ssMask.astype(float) 78 | ssMask = (ssMask - 4) * 32 # centered at 0, with 0 being background, spaced 32 apart for classes 79 | 80 | if ss.sum() > 0 or self._keepEmpty: 81 | idBatch.append(self._paths[self._index_in_epoch][0]) 82 | 83 | image = (self.image_scaling(skimage.io.imread(self._paths[self._index_in_epoch][1]))).astype(float) 84 | gt = (sio.loadmat(self._paths[self._index_in_epoch][2])['dir_map']).astype(float) 85 | weight = (sio.loadmat(self._paths[self._index_in_epoch][2])['weight_map']).astype(float) 86 | 87 | imageBatch.append(self.pad(image)) 88 | gtBatch.append(self.pad(gt)) 89 | weightBatch.append(self.pad(weight)) 90 | ssBatch.append(self.pad(ss)) 91 | ssMaskBatch.append(self.pad(ssMask)) 92 | else: 93 | pass 94 | # raw_input("skipping " + self._paths[self._index_in_epoch][0]) 95 | self._index_in_epoch += 1 96 | if self._index_in_epoch == self._numData: 97 | self._index_in_epoch = 0 98 | self.shuffle() 99 | 100 | imageBatch = np.array(imageBatch) 101 | gtBatch = np.array(gtBatch) 102 | ssBatch = np.array(ssBatch) 103 | ssMaskBatch = np.array(ssMaskBatch) 104 | weightBatch = np.array(weightBatch) 105 | 106 | if self._flip and np.random.uniform() > 0.5: 107 | for i in range(len(imageBatch)): 108 | for j in range(3): 109 | imageBatch[i,:,:,j] = np.fliplr(imageBatch[i,:,:,j]) 110 | 111 | weightBatch[i] = np.fliplr(weightBatch[i]) 112 | ssBatch[i] = np.fliplr(ssBatch[i]) 113 | ssMaskBatch[i] = np.fliplr(ssMaskBatch[i]) 114 | 115 | for j in range(2): 116 | gtBatch[i,:,:,j] = np.fliplr(gtBatch[i,:,:,j]) 117 | 118 | gtBatch[i,:,:,0] = -1 * gtBatch[i,:,:,0] 119 | return imageBatch, gtBatch, weightBatch, ssBatch, ssMaskBatch, idBatch 120 | else: 121 | for example in self._paths[self._index_in_epoch:min(self._index_in_epoch+self._batchSize, self._numData)]: 122 | imageBatch.append(self.pad((self.image_scaling(skimage.io.imread(example[1]))).astype(float))) 123 | idBatch.append(example[0]) 124 | ss = (sio.loadmat(example[2])['mask']).astype(float) 125 | ssMask = ss 126 | ss = np.sum(ss[:, :, self._indices], 2) 127 | background = np.zeros(ssMask.shape[0:2] + (1,)) 128 | ssMask = np.concatenate((ssMask[:,:,[1,2,3,4]], background, ssMask[:,:,[0,5,6,7]]), axis=-1) 129 | ssMask = np.argmax(ssMask, axis=-1) 130 | ssMask = ssMask.astype(float) 131 | ssMask = (ssMask - 4) * 32 # centered at 0, with 0 being background, spaced 32 apart for classes 132 | ssBatch.append(self.pad(ss)) 133 | ssMaskBatch.append(self.pad(ssMask)) 134 | imageBatch = np.array(imageBatch) 135 | ssBatch = np.array(ssBatch) 136 | ssMaskBatch = np.array(ssMaskBatch) 137 | 138 | self._index_in_epoch += self._batchSize 139 | return imageBatch, ssBatch, ssMaskBatch, idBatch 140 | 141 | def total_samples(self): 142 | return self._numData 143 | 144 | def image_scaling(self, rgb_in): 145 | if rgb_in.dtype == np.float32: 146 | rgb_in = rgb_in*255 147 | elif rgb_in.dtype == np.uint8: 148 | rgb_in = rgb_in.astype(np.float32) 149 | 150 | # VGG16 was trained using opencv which reads images as BGR, but skimage reads images as RGB 151 | rgb_out = np.zeros(rgb_in.shape).astype(np.float32) 152 | rgb_out[:,:,0] = rgb_in[:,:,2] - VGG_MEAN[2] 153 | rgb_out[:,:,1] = rgb_in[:,:,1] - VGG_MEAN[1] 154 | rgb_out[:,:,2] = rgb_in[:,:,0] - VGG_MEAN[0] 155 | 156 | return rgb_out 157 | 158 | def pad(self, data): 159 | if self._padHeight and self._padWidth: 160 | if data.ndim == 3: 161 | npad = ((0,self._padHeight-data.shape[0]),(0,self._padWidth-data.shape[1]),(0,0)) 162 | elif data.ndim == 2: 163 | npad = ((0, self._padHeight - data.shape[0]), (0, self._padWidth - data.shape[1])) 164 | padData = np.pad(data, npad, mode='constant', constant_values=0) 165 | 166 | else: 167 | padData = data 168 | 169 | return padData 170 | 171 | -------------------------------------------------------------------------------- /DN/lossFunction.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def angularErrorTotal(pred, gt, weight, ss, outputChannels=2): 4 | with tf.name_scope("angular_error"): 5 | pred = tf.reshape(pred, (-1, outputChannels)) 6 | gt = tf.to_float(tf.reshape(gt, (-1, outputChannels))) 7 | weight = tf.to_float(tf.reshape(weight, (-1, 1))) 8 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 9 | 10 | pred = tf.nn.l2_normalize(pred, 1) * 0.999999 11 | gt = tf.nn.l2_normalize(gt, 1) * 0.999999 12 | 13 | errorAngles = tf.acos(tf.reduce_sum(pred * gt, reduction_indices=[1], keep_dims=True)) 14 | 15 | lossAngleTotal = tf.reduce_sum((tf.abs(errorAngles*errorAngles))*ss*weight) 16 | 17 | return lossAngleTotal 18 | 19 | def angularErrorLoss(pred, gt, weight, ss, outputChannels=2): 20 | lossAngleTotal = angularErrorTotal(pred=pred, gt=gt, ss=ss, weight=weight, outputChannels=outputChannels) \ 21 | / (countTotal(ss)+1) 22 | 23 | tf.add_to_collection('losses', lossAngleTotal) 24 | 25 | totalLoss = tf.add_n(tf.get_collection('losses'), name='total_loss') 26 | 27 | return totalLoss 28 | 29 | 30 | def exceedingAngleThreshold(pred, gt, ss, threshold, outputChannels=2): 31 | with tf.name_scope("angular_error"): 32 | pred = tf.reshape(pred, (-1, outputChannels)) 33 | gt = tf.to_float(tf.reshape(gt, (-1, outputChannels))) 34 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 35 | 36 | pred = tf.nn.l2_normalize(pred, 1) * 0.999999 37 | gt = tf.nn.l2_normalize(gt, 1) * 0.999999 38 | 39 | errorAngles = tf.acos(tf.reduce_sum(pred * gt, reduction_indices=[1], keep_dims=True)) * ss 40 | 41 | exceedCount = tf.reduce_sum(tf.to_float(tf.less(threshold/180*3.14159, errorAngles))) 42 | 43 | return exceedCount 44 | 45 | def countCorrect(pred, gt, ss, k, outputChannels): 46 | with tf.name_scope("correct"): 47 | pred = tf.argmax(tf.reshape(pred, (-1, outputChannels)), 1) 48 | gt = tf.reshape(gt, (-1, outputChannels)) 49 | 50 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 51 | 52 | correct = tf.reduce_sum(tf.mul(tf.reshape(tf.to_float(tf.nn.in_top_k(gt, pred, k)), (-1, 1)), ss), reduction_indices=[0]) 53 | return correct 54 | 55 | 56 | def countTotal(ss): 57 | with tf.name_scope("total"): 58 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 59 | total = tf.reduce_sum(ss) 60 | 61 | return total 62 | 63 | def countTotalWeighted(ss, weight): 64 | with tf.name_scope("total"): 65 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 66 | weight = tf.to_float(tf.reshape(weight, (-1, 1))) 67 | total = tf.reduce_sum(ss * weight) 68 | 69 | return total -------------------------------------------------------------------------------- /DN/train_direction.py: -------------------------------------------------------------------------------- 1 | import direction_model 2 | from ioUtils import * 3 | import math 4 | import lossFunction 5 | import tensorflow as tf 6 | import numpy as np 7 | import sys 8 | import os 9 | import scipy.io as sio 10 | import re 11 | import time 12 | 13 | VGG_MEAN = [103.939, 116.779, 123.68] 14 | 15 | tf.set_random_seed(0) 16 | 17 | def initialize_model(outputChannels, wd=None, modelWeightPaths=None): 18 | fuseChannels=256 19 | params = {"direction/conv1_1": {"name": "direction/conv1_1", "shape": [3,3,4,64], "std": None, "act": "relu"}, 20 | "direction/conv1_2": {"name": "direction/conv1_2", "shape": [3,3,64,64], "std": None, "act": "relu"}, 21 | "direction/conv2_1": {"name": "direction/conv2_1", "shape": [3,3,64,128], "std": None, "act": "relu"}, 22 | "direction/conv2_2": {"name": "direction/conv2_2", "shape": [3,3,128,128], "std": None, "act": "relu"}, 23 | "direction/conv3_1": {"name": "direction/conv3_1", "shape": [3,3,128,256], "std": None, "act": "relu"}, 24 | "direction/conv3_2": {"name": "direction/conv3_2", "shape": [3,3,256,256], "std": None, "act": "relu"}, 25 | "direction/conv3_3": {"name": "direction/conv3_3", "shape": [3,3,256,256], "std": None, "act": "relu"}, 26 | "direction/conv4_1": {"name": "direction/conv4_1", "shape": [3,3,256,512], "std": None, "act": "relu"}, 27 | "direction/conv4_2": {"name": "direction/conv4_2", "shape": [3,3,512,512], "std": None, "act": "relu"}, 28 | "direction/conv4_3": {"name": "direction/conv4_3", "shape": [3,3,512,512], "std": None, "act": "relu"}, 29 | "direction/conv5_1": {"name": "direction/conv5_1", "shape": [3,3,512,512], "std": None, "act": "relu"}, 30 | "direction/conv5_2": {"name": "direction/conv5_2", "shape": [3,3,512,512], "std": None, "act": "relu"}, 31 | "direction/conv5_3": {"name": "direction/conv5_3", "shape": [3,3,512,512], "std": None, "act": "relu"}, 32 | "direction/fcn5_1": {"name": "direction/fcn5_1", "shape": [5,5,512,512], "std": None, "act": "relu"}, 33 | "direction/fcn5_2": {"name": "direction/fcn5_2", "shape": [1,1,512,512], "std": None, "act": "relu"}, 34 | "direction/fcn5_3": {"name": "direction/fcn5_3", "shape": [1,1,512,fuseChannels], "std": 1e-2, "act": "relu"}, 35 | "direction/upscore5_3": {"name": "direction/upscore5_3", "ksize": 8, "stride": 4, "outputChannels": fuseChannels}, 36 | "direction/fcn4_1": {"name": "direction/fcn4_1", "shape": [5,5,512,512], "std": None, "act": "relu"}, 37 | "direction/fcn4_2": {"name": "direction/fcn4_2", "shape": [1,1,512,512], "std": None, "act": "relu"}, 38 | "direction/fcn4_3": {"name": "direction/fcn4_3", "shape": [1,1,512,fuseChannels], "std": 1e-3, "act": "relu"}, 39 | "direction/upscore4_3": {"name": "direction/upscore4_3", "ksize": 4, "stride": 2, "outputChannels": fuseChannels}, 40 | "direction/fcn3_1": {"name": "direction/fcn3_1", "shape": [5,5,256,256], "std": None, "act": "relu"}, 41 | "direction/fcn3_2": {"name": "direction/fcn3_2", "shape": [1,1,256,256], "std": None, "act": "relu"}, 42 | "direction/fcn3_3": {"name": "direction/fcn3_3", "shape": [1,1,256,fuseChannels], "std": 1e-4, "act": "relu"}, 43 | "direction/fuse3_1": {"name": "direction/fuse_1", "shape": [1,1,fuseChannels*3, 512], "std": None, "act": "relu"}, 44 | "direction/fuse3_2": {"name": "direction/fuse_2", "shape": [1,1,512,512], "std": None, "act": "relu"}, 45 | "direction/fuse3_3": {"name": "direction/fuse_3", "shape": [1,1,512,outputChannels], "std": None, "act": "lin"}, 46 | "direction/upscore3_1": {"name": "direction/upscore3_1", "ksize": 8, "stride": 4, "outputChannels":outputChannels}} 47 | 48 | return direction_model.Network(params, wd=wd, modelWeightPaths=modelWeightPaths) 49 | 50 | def forward_model(model, feeder, outputSavePath): 51 | with tf.Session() as sess: 52 | tfBatchImages = tf.placeholder("float", shape=[None, 512, 1024, 3]) 53 | tfBatchSS = tf.placeholder("float", shape=[None, 512, 1024]) 54 | tfBatchSSMask = tf.placeholder("float", shape=[None, 512, 1024]) 55 | 56 | with tf.name_scope("model_builder"): 57 | print "attempting to build model" 58 | model.build(tfBatchImages, tfBatchSS, tfBatchSSMask) 59 | print "built the model" 60 | sys.stdout.flush() 61 | 62 | init = tf.initialize_all_variables() 63 | sess.run(init) 64 | 65 | for i in range(int(math.floor(feeder.total_samples() / batchSize))): 66 | imageBatch, ssBatch, ssMaskBatch, idBatch = feeder.next_batch() 67 | 68 | outputBatch = sess.run(model.output, feed_dict={tfBatchImages: imageBatch, tfBatchSS: ssBatch, tfBatchSSMask: ssMaskBatch}) 69 | 70 | for j in range(len(idBatch)): 71 | outputFilePath = os.path.join(outputSavePath, idBatch[j]+'.mat') 72 | outputFileDir = os.path.dirname(outputFilePath) 73 | 74 | if not os.path.exists(outputFileDir): 75 | os.makedirs(outputFileDir) 76 | 77 | sio.savemat(outputFilePath, {"dir_map": outputBatch[j]}, do_compression=True) 78 | 79 | print "processed image %d out of %d"%(j+batchSize*i, feeder.total_samples()) 80 | 81 | def train_model(model, outputChannels, learningRate, trainFeeder, valFeeder, modelSavePath=None, savePrefix=None, initialIteration=1): 82 | with tf.Session() as sess: 83 | tfBatchImages = tf.placeholder("float", shape=[None, 512, 1024, 3]) 84 | tfBatchGT = tf.placeholder("float", shape=[None, 512, 1024, 2]) 85 | tfBatchWeight = tf.placeholder("float", shape=[None, 512, 1024]) 86 | tfBatchSS = tf.placeholder("float", shape=[None, 512, 1024]) 87 | tfBatchSSMask = tf.placeholder("float", shape=[None, 512, 1024]) 88 | 89 | with tf.name_scope("model_builder"): 90 | print "attempting to build model" 91 | model.build(tfBatchImages, tfBatchSS, tfBatchSSMask) 92 | print "built the model" 93 | 94 | sys.stdout.flush() 95 | loss = lossFunction.angularErrorLoss(pred=model.output, gt=tfBatchGT, weight=tfBatchWeight, ss=tfBatchSS, outputChannels=outputChannels) 96 | 97 | angleError = lossFunction.angularErrorTotal(pred=model.output, gt=tfBatchGT, weight=tfBatchWeight, ss=tfBatchSS, outputChannels=outputChannels) 98 | numPredicted = lossFunction.countTotal(ss=tfBatchSS) 99 | numPredictedWeighted = lossFunction.countTotalWeighted(ss=tfBatchSS, weight=tfBatchWeight) 100 | exceed45 = lossFunction.exceedingAngleThreshold(pred=model.output, gt=tfBatchGT, 101 | ss=tfBatchSS, threshold=45.0, outputChannels=outputChannels) 102 | exceed225 = lossFunction.exceedingAngleThreshold(pred=model.output, gt=tfBatchGT, 103 | ss=tfBatchSS, threshold=22.5, outputChannels=outputChannels) 104 | 105 | train_op = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(loss=loss) 106 | 107 | init = tf.initialize_all_variables() 108 | 109 | sess.run(init) 110 | iteration = initialIteration 111 | 112 | while iteration < 1000: 113 | batchLosses = [] 114 | totalAngleError = 0 115 | totalExceed45 = 0 116 | totalExceed225 = 0 117 | totalPredicted = 0 118 | totalPredictedWeighted = 0 119 | 120 | for k in range(int(math.floor(valFeeder.total_samples() / batchSize))): 121 | imageBatch, gtBatch, weightBatch, ssBatch, ssMaskBatch, _ = valFeeder.next_batch() 122 | 123 | batchLoss, batchAngleError, batchPredicted, batchPredictedWeighted, batchExceed45, batchExceed225 = sess.run( 124 | [loss, angleError, numPredicted, numPredictedWeighted, exceed45, exceed225], 125 | feed_dict={tfBatchImages: imageBatch, 126 | tfBatchGT: gtBatch, 127 | tfBatchWeight: weightBatch, 128 | tfBatchSS: ssBatch, 129 | tfBatchSSMask: ssMaskBatch}) 130 | # print "ran iteration" 131 | batchLosses.append(batchLoss) 132 | totalAngleError += batchAngleError 133 | totalPredicted += batchPredicted 134 | totalPredictedWeighted += batchPredictedWeighted 135 | totalExceed45 += batchExceed45 136 | totalExceed225 += batchExceed225 137 | 138 | if np.isnan(np.mean(batchLosses)): 139 | print "LOSS RETURNED NaN" 140 | sys.stdout.flush() 141 | return 1 142 | 143 | print "%s Itr: %d - val loss: %.3f, angle MSE: %.3f, exceed45: %.3f, exceed22.5: %.3f" % ( 144 | time.strftime("%H:%M:%S"), iteration, 145 | float(np.mean(batchLosses)), totalAngleError / totalPredictedWeighted, 146 | totalExceed45 / totalPredicted, totalExceed225 / totalPredicted) 147 | sys.stdout.flush() 148 | 149 | if (iteration > 0 and iteration % 5 == 0) or checkSaveFlag(modelSavePath): 150 | modelSaver(sess, modelSavePath, savePrefix, iteration) 151 | 152 | for j in range(int(math.floor(trainFeeder.total_samples() / batchSize))): 153 | # print "running batch %d"%(j) 154 | # sys.stdout.flush() 155 | imageBatch, gtBatch, weightBatch, ssBatch, ssMaskBatch, _ = trainFeeder.next_batch() 156 | sess.run(train_op, feed_dict={tfBatchImages: imageBatch, 157 | tfBatchGT: gtBatch, 158 | tfBatchWeight: weightBatch, 159 | tfBatchSS: ssBatch, 160 | tfBatchSSMask: ssMaskBatch}) 161 | iteration += 1 162 | 163 | def modelSaver(sess, modelSavePath, savePrefix, iteration, maxToKeep=5): 164 | allWeights = {} 165 | for name in [n.name for n in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]: 166 | param = sess.run(name) 167 | nameParts = re.split('[:/]', name) 168 | saveName = nameParts[-4]+'/'+nameParts[-3]+'/'+nameParts[-2] 169 | allWeights[saveName] = param 170 | 171 | weightsFileName = os.path.join(modelSavePath, savePrefix+'_%03d'%iteration) 172 | 173 | sio.savemat(weightsFileName, allWeights) 174 | 175 | 176 | def checkSaveFlag(modelSavePath): 177 | flagPath = os.path.join(modelSavePath, 'saveme.flag') 178 | 179 | if os.path.exists(flagPath): 180 | return True 181 | else: 182 | return False 183 | 184 | 185 | if __name__ == "__main__": 186 | outputChannels = 2 187 | classType = 'unified_CR' 188 | indices = [0,1,2,3,4,5,6,7] 189 | # 0=car, 1=person, 2=rider, 3=motorcycle, 4=bicycle, 5=truck, 6=bus, 7=train 190 | savePrefix = "direction_" + classType + "_unified_CR_pretrain" 191 | train = True 192 | 193 | if train: 194 | batchSize = 4 195 | learningRate = 1e-5 196 | # learningRateActual = 1e-7 197 | wd = 1e-5 198 | 199 | modelWeightPaths = ["./cityscapes/models/direction/VGG16init_conv1_ch4.mat"] 200 | initialIteration = 1 201 | 202 | model = initialize_model(outputChannels=outputChannels, wd=wd, modelWeightPaths=modelWeightPaths) 203 | 204 | trainFeeder = Batch_Feeder(dataset="cityscapes", indices=indices, train=train, batchSize=batchSize, 205 | padWidth=None, padHeight=None, flip=True, keepEmpty=False) 206 | trainFeeder.set_paths(idList=read_ids('./cityscapes/splits/trainlist.txt'), 207 | imageDir="./cityscapes/inputImages/train", 208 | gtDir="./cityscapes/unified/iGTFine/train", 209 | ssDir="./cityscapes/unified/ssMaskFineGT/train") 210 | 211 | valFeeder = Batch_Feeder(dataset="cityscapes", indices=indices, train=train, batchSize=batchSize, 212 | padWidth=None, padHeight=None) 213 | 214 | valFeeder.set_paths(idList=read_ids('./cityscapes/splits/vallist.txt'), 215 | imageDir="./cityscapes/inputImages/val", 216 | gtDir="./cityscapes/unified/iGTFine/val", 217 | ssDir="./cityscapes/unified/ssMaskFineGT/val") 218 | 219 | train_model(model=model, outputChannels=outputChannels, 220 | learningRate=learningRate, 221 | trainFeeder=trainFeeder, valFeeder=valFeeder, 222 | modelSavePath="./cityscapes/models/direction", savePrefix=savePrefix, 223 | initialIteration=initialIteration) 224 | else: 225 | batchSize = 5 226 | modelWeightPaths = ["./cityscapes/models/direction/direction3_unified_ss_wide_pretrain_VGGFIX_020.mat"] 227 | 228 | model = initialize_model(outputChannels=outputChannels, wd=0, modelWeightPaths=modelWeightPaths) 229 | 230 | feeder = Batch_Feeder(dataset="cityscapes", indices=indices, train=train, batchSize=batchSize, padWidth=None, padHeight=None) 231 | feeder.set_paths(idList=read_ids("./cityscapes/splits/vallist.txt"), 232 | imageDir="./cityscapes/inputImages/val", 233 | ssDir="./cityscapes/unified/ssMaskFineGT/val") 234 | 235 | forward_model(model, feeder=feeder, 236 | outputSavePath="./training/output/direction_ss") 237 | -------------------------------------------------------------------------------- /E2E/e2e_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import ceil 3 | import tensorflow as tf 4 | import scipy.io as sio 5 | 6 | VGG_MEAN = [103.939, 116.779, 123.68] 7 | 8 | class Network: 9 | def __init__(self, params, wd=5e-5, modelWeightPaths=None): 10 | self._params = params 11 | self._wd = wd 12 | self.modelDict = {} 13 | 14 | if modelWeightPaths is not None: 15 | for path in modelWeightPaths: 16 | self.modelDict.update(sio.loadmat(path)) 17 | 18 | def build(self, inputData, ss=None, ssMask=None, keepProb=1.0): 19 | if ss is not None: 20 | ss = tf.expand_dims(ss,-1) 21 | inputData = inputData * ss 22 | 23 | inputData = tf.concat(3, [inputData, tf.expand_dims(ssMask,-1)]) 24 | 25 | print "building direction net" 26 | 27 | self.conv1_1 = self._conv_layer(inputData, params=self._params["direction/conv1_1"]) 28 | self.conv1_2 = self._conv_layer(self.conv1_1, params=self._params["direction/conv1_2"]) 29 | self.pool1 = self._max_pool(self.conv1_2, 'direction/pool1') 30 | 31 | self.conv2_1 = self._conv_layer(self.pool1, params=self._params["direction/conv2_1"]) 32 | self.conv2_2 = self._conv_layer(self.conv2_1, params=self._params["direction/conv2_2"]) 33 | self.pool2 = self._max_pool(self.conv2_2, 'direction/pool2') 34 | 35 | self.conv3_1 = self._conv_layer(self.pool2, params=self._params["direction/conv3_1"]) 36 | self.conv3_2 = self._conv_layer(self.conv3_1, params=self._params["direction/conv3_2"]) 37 | self.conv3_3 = self._conv_layer(self.conv3_2, params=self._params["direction/conv3_3"]) 38 | self.pool3 = self._average_pool(self.conv3_3, 'direction/pool3') 39 | 40 | self.conv4_1 = self._conv_layer(self.pool3, params=self._params["direction/conv4_1"]) 41 | self.conv4_2 = self._conv_layer(self.conv4_1, params=self._params["direction/conv4_2"]) 42 | self.conv4_3 = self._conv_layer(self.conv4_2, params=self._params["direction/conv4_3"]) 43 | self.pool4 = self._average_pool(self.conv4_3, 'direction/pool4') 44 | 45 | self.conv5_1 = self._conv_layer(self.pool4, params=self._params["direction/conv5_1"]) 46 | self.conv5_2 = self._conv_layer(self.conv5_1, params=self._params["direction/conv5_2"]) 47 | self.conv5_3 = self._conv_layer(self.conv5_2, params=self._params["direction/conv5_3"]) 48 | 49 | self.pool5 = self._average_pool(self.conv5_3, 'direction/pool5') 50 | 51 | self.fcn5_1 = self._conv_layer(self.conv5_3, params=self._params["direction/fcn5_1"]) 52 | self.fcn5_2 = self._conv_layer(self.fcn5_1, params=self._params["direction/fcn5_2"]) 53 | self.fcn5_3 = self._conv_layer(self.fcn5_2, params=self._params["direction/fcn5_3"]) 54 | 55 | self.fcn4_1 = self._conv_layer(self.conv4_3, params=self._params["direction/fcn4_1"]) 56 | self.fcn4_2 = self._conv_layer(self.fcn4_1, params=self._params["direction/fcn4_2"]) 57 | self.fcn4_3 = self._conv_layer(self.fcn4_2, params=self._params["direction/fcn4_3"]) 58 | 59 | self.fcn3_1 = self._conv_layer(self.conv3_3, params=self._params["direction/fcn3_1"]) 60 | self.fcn3_2 = self._conv_layer(self.fcn3_1, params=self._params["direction/fcn3_2"]) 61 | self.fcn3_3 = self._conv_layer(self.fcn3_2, params=self._params["direction/fcn3_3"]) 62 | 63 | self.upscore5_3 = self._upscore_layer(self.fcn5_3, params=self._params["direction/upscore5_3"], 64 | shape=tf.shape(self.fcn3_3)) 65 | self.upscore4_3 = self._upscore_layer(self.fcn4_3, params=self._params["direction/upscore4_3"], 66 | shape=tf.shape(self.fcn3_3)) 67 | 68 | self.fuse3 = tf.concat(3, [self.fcn3_3, self.upscore5_3, self.upscore4_3], name="direction/fuse3") 69 | self.fuse3_1 = self._conv_layer(self.fuse3, params=self._params["direction/fuse3_1"]) 70 | self.fuse3_2 = self._conv_layer(self.fuse3_1, params=self._params["direction/fuse3_2"]) 71 | self.fuse3_3 = self._conv_layer(self.fuse3_2, params=self._params["direction/fuse3_3"]) 72 | 73 | self.direction = self._upscore_layer(self.fuse3_3, params=self._params["direction/upscore3_1"], 74 | shape=tf.shape(inputData)) 75 | 76 | if ss is not None: 77 | self.direction = self.direction * ss 78 | 79 | self.direction = tf.nn.l2_normalize(self.direction, 3, epsilon=1e-20) 80 | 81 | print "built the direction net!" 82 | 83 | print "building depth net" 84 | 85 | self.conv1_1 = self._conv_layer(self.direction, params=self._params["depth/conv1_1"]) 86 | self.conv1_2 = self._conv_layer(self.conv1_1, params=self._params["depth/conv1_2"]) 87 | self.pool1 = self._average_pool(self.conv1_2, 'depth/pool') 88 | 89 | self.conv2_1 = self._conv_layer(self.pool1, params=self._params["depth/conv2_1"]) 90 | self.conv2_2 = self._conv_layer(self.conv2_1, params=self._params["depth/conv2_2"]) 91 | self.conv2_3 = self._conv_layer(self.conv2_2, params=self._params["depth/conv2_3"]) 92 | self.conv2_4 = self._conv_layer(self.conv2_3, params=self._params["depth/conv2_4"]) 93 | self.pool2 = self._average_pool(self.conv2_4, 'depth/pool') 94 | 95 | self.fcn1 = self._conv_layer(self.pool2, params=self._params["depth/fcn1"], keepProb=keepProb) 96 | self.fcn2 = self._conv_layer(self.fcn1, params=self._params["depth/fcn2"], keepProb=keepProb) 97 | 98 | self.outputData = self._upscore_layer(self.fcn2, params=self._params["depth/upscore"], 99 | shape=tf.shape(inputData)) 100 | 101 | if ss is not None: 102 | self.outputData = self.outputData * ss 103 | 104 | self.outputDataArgMax = tf.argmax(input=self.outputData, dimension=3) 105 | 106 | print "built the depth net!" 107 | 108 | # LAYER BUILDING 109 | 110 | def _max_pool(self, bottom, name): 111 | return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 112 | padding='SAME', name=name) 113 | 114 | def _average_pool(self, bottom, name): 115 | return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 116 | padding='SAME', name=name) 117 | 118 | def _conv_layer(self, bottom, params, keepProb=1.0): 119 | with tf.variable_scope(params["name"]) as scope: 120 | filt = self.get_conv_filter(params) 121 | 122 | if "dr" in params.keys(): 123 | conv = tf.nn.atrous_conv2d(bottom, filt, params["dr"], padding="SAME") 124 | else: 125 | conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') 126 | 127 | conv_biases = self.get_bias(params) 128 | 129 | if params["act"] == "relu": 130 | activation = tf.nn.relu(tf.nn.bias_add(conv, conv_biases)) 131 | elif params["act"] == "lin": 132 | activation = tf.nn.bias_add(conv, conv_biases) 133 | elif params["act"] == "tanh": 134 | activation = tf.nn.tanh(tf.nn.bias_add(conv, conv_biases)) 135 | 136 | if not isinstance(keepProb, (int, long, float)): 137 | activation = tf.nn.dropout(activation, keep_prob=keepProb) 138 | 139 | return activation 140 | 141 | # WEIGHTS GENERATION 142 | 143 | def get_bias(self, params): 144 | if "trainable" in params.keys(): 145 | trainable = params["trainable"] 146 | else: 147 | trainable = True 148 | 149 | if params["name"] + "/biases" in self.modelDict: 150 | init = tf.constant_initializer(value=self.modelDict[params["name"] + "/biases"], dtype=tf.float32) 151 | print "loaded " + params["name"] + "/biases" 152 | else: 153 | init = tf.constant_initializer(value=0.0) 154 | print "generated " + params["name"] + "/biases" 155 | var = tf.get_variable(name="biases", initializer=init, shape=params["shape"][3], trainable=trainable) 156 | 157 | return var 158 | 159 | def get_conv_filter(self, params): 160 | if "trainable" in params.keys(): 161 | trainable = params["trainable"] 162 | else: 163 | trainable = True 164 | 165 | if params["name"] + "/weights" in self.modelDict: 166 | init = tf.constant_initializer(value=self.modelDict[params["name"] + "/weights"], dtype=tf.float32) 167 | print "loaded " + params["name"] + "/weights" 168 | else: 169 | if params["std"]: 170 | stddev = params["std"] 171 | else: 172 | fanIn = params["shape"][0] * params["shape"][1] * params["shape"][2] 173 | stddev = (2 / fanIn) ** 0.5 174 | 175 | init = tf.truncated_normal(shape=params["shape"], stddev=stddev) 176 | print "generated " + params["name"] + "/weights" 177 | 178 | var = tf.get_variable(name="weights", initializer=init, shape=params["shape"], trainable=trainable) 179 | 180 | if not tf.get_variable_scope().reuse and self._wd: 181 | weightDecay = tf.mul(tf.nn.l2_loss(var), self._wd, 182 | name='weight_loss') 183 | tf.add_to_collection('losses', weightDecay) 184 | 185 | return var 186 | 187 | def _upscore_layer(self, bottom, shape, params): 188 | strides = [1, params["stride"], params["stride"], 1] 189 | with tf.variable_scope(params["name"]): 190 | in_features = bottom.get_shape()[3].value 191 | 192 | if shape is None: 193 | in_shape = tf.shape(bottom) 194 | 195 | h = ((in_shape[1] - 1) * params["stride"]) + 1 196 | w = ((in_shape[2] - 1) * params["stride"]) + 1 197 | new_shape = [in_shape[0], h, w, params["outputChannels"]] 198 | else: 199 | new_shape = [shape[0], shape[1], shape[2], params["outputChannels"]] 200 | output_shape = tf.pack(new_shape) 201 | 202 | f_shape = [params["ksize"], params["ksize"], params["outputChannels"], in_features] 203 | 204 | weights = self.get_deconv_filter(f_shape, params) 205 | deconv = tf.nn.conv2d_transpose(bottom, weights, output_shape, 206 | strides=strides, padding='SAME') 207 | return deconv 208 | 209 | def get_deconv_filter(self, f_shape, params): 210 | if "trainable" in params.keys(): 211 | trainable = params["trainable"] 212 | else: 213 | trainable = True 214 | if params["name"]+"/up_filter" in self.modelDict: 215 | init = tf.constant_initializer(value=self.modelDict[params["name"]+"/up_filter"], dtype=tf.float32) 216 | print "loaded " + params["name"] + "/up_filter" 217 | else: 218 | width = f_shape[0] 219 | height = f_shape[0] 220 | f = ceil(width / 2.0) 221 | c = (2 * f - 1 - f % 2) / (2.0 * f) 222 | bilinear = np.zeros([f_shape[0], f_shape[1]]) 223 | for x in range(width): 224 | for y in range(height): 225 | value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) 226 | bilinear[x, y] = value 227 | weights = np.zeros(f_shape) 228 | for i in range(f_shape[2]): 229 | weights[:, :, i, i] = bilinear 230 | 231 | init = tf.constant_initializer(value=weights, 232 | dtype=tf.float32) 233 | print "generated " + params["name"] + "/up_filter" 234 | return tf.get_variable(name="up_filter", initializer=init, shape=f_shape, trainable=trainable) 235 | -------------------------------------------------------------------------------- /E2E/forward.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | import math 4 | import sys 5 | import scipy.io as sio 6 | import skimage.io as skio 7 | import scipy.ndimage.interpolation 8 | from post_process import * 9 | 10 | def forward_model(model, feeder, outputSavePath, batchSize=1): 11 | with tf.Session() as sess: 12 | tfBatchImages = tf.placeholder("float") 13 | tfBatchSS = tf.placeholder("float") 14 | tfBatchSSMask = tf.placeholder("float") 15 | keepProb = tf.placeholder("float") 16 | 17 | with tf.name_scope("model_builder"): 18 | print "attempting to build model" 19 | model.build(tfBatchImages, tfBatchSS, tfBatchSSMask, keepProb=keepProb) 20 | print "built the model" 21 | 22 | init = tf.initialize_all_variables() 23 | 24 | sess.run(init) 25 | 26 | if not os.path.exists(outputSavePath): 27 | os.makedirs(outputSavePath) 28 | 29 | for i in range(int(math.floor(feeder.total_samples() / batchSize))): 30 | imageBatch, ssBatch, ssMaskBatch, idBatch = feeder.next_batch() 31 | 32 | outputBatch = sess.run(model.outputDataArgMax, feed_dict={tfBatchImages: imageBatch, 33 | tfBatchSS: ssBatch, 34 | tfBatchSSMask: ssMaskBatch, 35 | keepProb: 1.0}) 36 | outputBatch = outputBatch.astype(np.uint8) 37 | 38 | 39 | 40 | for j in range(len(idBatch)): 41 | outputFilePath = os.path.join(outputSavePath, idBatch[j] + '.png') 42 | outputFilePathMat = os.path.join(outputSavePath, idBatch[j] + '.mat') 43 | outputFileDir = os.path.dirname(outputFilePath) 44 | 45 | if not os.path.exists(outputFileDir): 46 | os.makedirs(outputFileDir) 47 | 48 | outputImage = watershed_cut(outputBatch[j], ssMaskBatch[j]) 49 | skio.imsave(outputFilePath, scipy.ndimage.interpolation.zoom(outputImage, 2.0, mode='nearest', order=0)) 50 | 51 | sio.savemat(outputFilePathMat, {"depth_map": outputBatch[j]}, do_compression=True) 52 | 53 | print "processed image %d out of %d" % (j + batchSize * i + 1, feeder.total_samples()) 54 | sys.stdout.flush() 55 | -------------------------------------------------------------------------------- /E2E/forward_e2e.py: -------------------------------------------------------------------------------- 1 | import math 2 | import tensorflow as tf 3 | import numpy as np 4 | import sys 5 | import os 6 | import scipy.io as sio 7 | import skimage.io 8 | 9 | VGG_MEAN = [103.939, 116.779, 123.68] 10 | 11 | tf.set_random_seed(0) 12 | 13 | def initialize_model(outputChannels, wd=None, modelWeightPaths=None): 14 | params = { 15 | "direction/conv1_1": {"name": "direction/conv1_1", "shape": [3, 3, 3, 64], "std": None, "act": "relu"}, 16 | "direction/conv1_2": {"name": "direction/conv1_2", "shape": [3, 3, 64, 64], "std": None, "act": "relu"}, 17 | "direction/conv2_1": {"name": "direction/conv2_1", "shape": [3, 3, 64, 128], "std": None, "act": "relu"}, 18 | "direction/conv2_2": {"name": "direction/conv2_2", "shape": [3, 3, 128, 128], "std": None, "act": "relu"}, 19 | "direction/conv3_1": {"name": "direction/conv3_1", "shape": [3, 3, 128, 256], "std": None, "act": "relu"}, 20 | "direction/conv3_2": {"name": "direction/conv3_2", "shape": [3, 3, 256, 256], "std": None, "act": "relu"}, 21 | "direction/conv3_3": {"name": "direction/conv3_3", "shape": [3, 3, 256, 256], "std": None, "act": "relu"}, 22 | "direction/conv4_1": {"name": "direction/conv4_1", "shape": [3, 3, 256, 512], "std": None, "act": "relu"}, 23 | "direction/conv4_2": {"name": "direction/conv4_2", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 24 | "direction/conv4_3": {"name": "direction/conv4_3", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 25 | "direction/conv5_1": {"name": "direction/conv5_1", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 26 | "direction/conv5_2": {"name": "direction/conv5_2", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 27 | "direction/conv5_3": {"name": "direction/conv5_3", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 28 | "direction/fcn5_1": {"name": "direction/fcn5_1", "shape": [5, 5, 512, 512], "std": 1e-4, "act": "relu"}, 29 | "direction/fcn5_2": {"name": "direction/fcn5_2", "shape": [1, 1, 512, 512], "std": 1e-4, "act": "relu"}, 30 | "direction/fcn5_3": {"name": "direction/fcn5_3", "shape": [1, 1, 512, 256], "std": 1e-4, "act": "relu"}, 31 | "direction/upscore5_3": {"name": "direction/upscore5_4", "ksize": 8, "stride": 4, "outputChannels": 256}, 32 | "direction/fcn4_1": {"name": "direction/fcn4_1", "shape": [5, 5, 512, 512], "std": 1e-4, "act": "relu"}, 33 | "direction/fcn4_2": {"name": "direction/fcn4_2", "shape": [1, 1, 512, 512], "std": 1e-4, "act": "relu"}, 34 | "direction/fcn4_3": {"name": "direction/fcn4_3", "shape": [1, 1, 512, 256], "std": 1e-4, "act": "relu"}, 35 | "direction/upscore4_3": {"name": "direction/upscore4_3", "ksize": 4, "stride": 2, "outputChannels": 256}, 36 | "direction/fcn3_1": {"name": "direction/fcn3_1", "shape": [5, 5, 256, 256], "std": 1e-5, "act": "relu"}, 37 | "direction/fcn3_2": {"name": "direction/fcn3_2", "shape": [1, 1, 256, 256], "std": 1e-5, "act": "relu"}, 38 | "direction/fcn3_3": {"name": "direction/fcn3_3", "shape": [1, 1, 256, 256], "std": 1e-5, "act": "relu"}, 39 | "direction/fuse3_1": {"name": "direction/fuse_1", "shape": [1,1,256*3,512], "std": 1e-5, "act": "relu"}, 40 | "direction/fuse3_2": {"name": "direction/fuse_2", "shape": [1, 1, 512, 512], "std": 1e-5, "act": "relu"}, 41 | "direction/fuse3_3": {"name": "direction/fuse_3", "shape": [1, 1, 512, 2], "std": 1e-5, "act": "lin"}, 42 | "direction/upscore3_1": {"name": "direction/upscore3_1", "ksize": 8, "stride": 4, "outputChannels": 2}, 43 | 44 | "depth/conv1_1": {"name": "depth/conv1_1", "shape": [5,5,2,64], "std": 1e-1, "act": "relu"}, 45 | "depth/conv1_2": {"name": "depth/conv1_2", "shape": [5,5,64,128], "std": 1e-1, "act": "relu"}, 46 | "depth/conv2_1": {"name": "depth/conv2_1", "shape": [5,5,128,128], "std": 1e-2, "act": "relu"}, 47 | "depth/conv2_2": {"name": "depth/conv2_2", "shape": [5,5,128,128], "std": 1e-2, "act": "relu"}, 48 | "depth/conv2_3": {"name": "depth/conv2_3", "shape": [5,5,128,128], "std": 1e-2, "act": "relu"}, 49 | "depth/conv2_4": {"name": "depth/conv2_4", "shape": [5,5,128,128], "std": 1e-2, "act": "relu"}, 50 | "depth/fcn1": {"name": "depth/fcn1", "shape": [1,1,128,128], "std": 1e-2, "act": "relu"}, 51 | "depth/fcn2": {"name": "depth/fcn2", "shape": [1,1,128,16], "std": 1e-1, "act": "relu"}, 52 | "depth/upscore": {"name": "depth/upscore", "ksize": 8, "stride": 4, "outputChannels": 16}, 53 | } 54 | 55 | return joint_model2_wide.Network(params, wd=wd, modelWeightPaths=modelWeightPaths) 56 | 57 | def forward_model(model, feeder, outputSavePath): 58 | with tf.Session() as sess: 59 | images = tf.placeholder("float") 60 | tfBatchImages = tf.expand_dims(images, 0) 61 | ss = tf.placeholder("float") 62 | tfBatchSS = tf.expand_dims(ss, 0) 63 | keepProb = tf.placeholder("float") 64 | 65 | with tf.name_scope("model_builder"): 66 | print "attempting to build model" 67 | model.build(tfBatchImages, tfBatchSS, keepProb=keepProb) 68 | print "built the model" 69 | 70 | init = tf.initialize_all_variables() 71 | 72 | sess.run(init) 73 | 74 | if not os.path.exists(outputSavePath): 75 | os.makedirs(outputSavePath) 76 | # for i in range(1): 77 | for i in range(int(math.floor(feeder.total_samples() / batchSize))): 78 | imageBatch, ssBatch, idBatch = feeder.next_batch() 79 | # skimage.io.imsave("/u/mbai/transfer/scaledimage.png",imageBatch[0,:,:,:]) 80 | # sio.savemat("/u/mbai/transfer/scaledimage.mat",{'image':imageBatch[0,:,:,:]}) 81 | # raw_input("saved") 82 | 83 | outputBatch = sess.run(model.outputDataArgMax, feed_dict={tfBatchImages: imageBatch, 84 | tfBatchSS: ssBatch, 85 | keepProb: 1.0}) 86 | outputBatch = outputBatch.astype(np.uint8) 87 | 88 | # outputBatch = sess.run(model.direction, feed_dict={tfBatchImages: imageBatch, 89 | # tfBatchSS: ssBatch, 90 | # keepProb: 1.0}) 91 | 92 | for j in range(len(idBatch)): 93 | outputFilePath = os.path.join(outputSavePath, idBatch[j]+'.mat') 94 | # outputFilePath = os.path.join(outputSavePath, idBatch[j] + '.png') 95 | outputFileDir = os.path.dirname(outputFilePath) 96 | 97 | if not os.path.exists(outputFileDir): 98 | os.makedirs(outputFileDir) 99 | 100 | sio.savemat(outputFilePath, {"depth_map": outputBatch[j]}, do_compression=True) 101 | 102 | # skimage.io.imsave(outputFilePath, outputBatch[j]) 103 | 104 | # sio.savemat(outputFilePath, {"dir_map": outputBatch[j]}) 105 | 106 | print "processed image %d to %d out of %d"%(i*batchSize+1, (i+1)*batchSize, feeder.total_samples()) 107 | sys.stdout.flush() 108 | 109 | if __name__ == "__main__": 110 | outputChannels = 16 111 | outputPrefix = "submission3" 112 | outputSet = 'val' 113 | batchSize = 10 114 | 115 | configurations = {'car': {"index":[0], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_vehicles_final_wideup_ssLRR_045.mat"]}, 116 | 'truck': {"index":[5], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_vehicles_final_wideup_ssLRR_045.mat"]}, 117 | 'bus': {"index": [6], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_vehicles_final_wideup_ssLRR_045.mat"]}, 118 | 'train': {"index": [7], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_vehicles_final_wideup_ssLRR_045.mat"]}, 119 | 'person': {"index": [1], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_humans_final_wideup_ssLRR_010.mat"]}, 120 | 'rider': {"index": [2], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_humans_final_wideup_ssLRR_010.mat"]}, 121 | 'motorcycle': {"index": [3], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_cycles_final_wideup_ssLRR_010.mat"]}, 122 | 'bicycle': {"index": [4], "model": ["/ais/gobi4/mbai/instance_seg/cityscapes/models/joint/joint2_cycles_final_wideup_ssLRR_010.mat"]}, 123 | } 124 | # 0=car, 1=person, 2=rider, 3=motorcycle, 4=bicycle, 5=truck, 6=bus, 7=train 125 | 126 | for type in configurations: 127 | model = initialize_model(outputChannels=outputChannels, modelWeightPaths=configurations[type]["model"]) 128 | 129 | feeder = Batch_Feeder(dataset="cityscapes", 130 | indices=configurations[type]["index"], 131 | train=False, 132 | batchSize=batchSize) 133 | 134 | feeder.set_paths(idList=read_ids('/ais/gobi4/mbai/instance_seg/cityscapes/splits/'+outputSet+'list.txt'), 135 | imageDir="/ais/gobi4/mbai/instance_seg/cityscapes/inputImages/"+outputSet, 136 | ssDir="/ais/gobi4/mbai/instance_seg/cityscapes/unified/ssMaskFineLRR/"+outputSet) 137 | 138 | forward_model(model, feeder=feeder, 139 | outputSavePath="/ais/gobi4/mbai/instance_seg/training/outputs/%s/%s/%s"%(outputPrefix, outputSet, type)) 140 | 141 | tf.reset_default_graph() 142 | -------------------------------------------------------------------------------- /E2E/io_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import skimage 3 | import skimage.io 4 | import scipy.io as sio 5 | import scipy.misc 6 | import skimage.transform 7 | 8 | np.random.seed(0) 9 | 10 | VGG_MEAN = [103.939, 116.779, 123.68] 11 | CLASS_TO_SS = {"person":11, "rider":12, "motorcycle":17, 12 | "bicycle":18, "car":13, "truck":14, "bus":15, "train":16} 13 | 14 | def read_mat(path): 15 | return np.load(path) 16 | 17 | 18 | def write_mat(path, m): 19 | np.save(path, m) 20 | 21 | class Batch_Feeder: 22 | def __init__(self, dataset, train, batchSize, padWidth=None, padHeight=None, flip=False, keepEmpty=True, shuffle=False): 23 | self._epochs_completed = 0 24 | self._index_in_epoch = 0 25 | self._dataset = dataset 26 | self._train = train 27 | self._batchSize = batchSize 28 | self._padWidth = padWidth 29 | self._padHeight = padHeight 30 | self._flip = flip 31 | self._keepEmpty = keepEmpty 32 | self._shuffle = shuffle 33 | 34 | def set_paths(self, idList=None, imageDir=None, gtDir=None, ssDir=None): 35 | self._paths = [] 36 | 37 | if self._train: 38 | for id in idList: 39 | self._paths.append([id, imageDir + '/' + id + '_leftImg8bit.png', 40 | gtDir + '/' + id + '_unified_GT.mat', 41 | ssDir + '/' + id + '.png']) 42 | if self._shuffle: 43 | self.shuffle() 44 | else: 45 | for id in idList: 46 | self._paths.append([id, imageDir + '/' + id + '_leftImg8bit.png', 47 | ssDir + '/' + id + '.png']) 48 | 49 | self._numData = len(self._paths) 50 | 51 | if self._numData < self._batchSize: 52 | self._batchSize = self._numData 53 | 54 | def shuffle(self): 55 | np.random.shuffle(self._paths) 56 | 57 | def next_batch(self): 58 | idBatch = [] 59 | imageBatch = [] 60 | gtBatch = [] 61 | ssBinaryBatch = [] 62 | ssMaskBatch = [] 63 | weightBatch = [] 64 | 65 | if self._train: 66 | while(len(idBatch) < self._batchSize): 67 | ssImage = skimage.io.imread(self._paths[self._index_in_epoch][3]) 68 | ssBinary, ssMask = ssProcess(ssImage) 69 | 70 | idBatch.append(self._paths[self._index_in_epoch][0]) 71 | image = (image_scaling(skimage.io.imread(self._paths[self._index_in_epoch][1]))).astype(float) 72 | image = scipy.misc.imresize(image, 50) 73 | gt = (sio.loadmat(self._paths[self._index_in_epoch][2])['depth_map']).astype(float) 74 | weight = (sio.loadmat(self._paths[self._index_in_epoch][2])['weight_map']).astype(float) 75 | 76 | imageBatch.append(pad(image, self._padHeight, self._padWidth)) 77 | gtBatch.append(pad(gt, self._padHeight, self._padWidth)) 78 | weightBatch.append(pad(weight, self._padHeight, self._padWidth)) 79 | ssBinaryBatch.append(pad(ssBinary, self._padHeight, self._padWidth)) 80 | ssMaskBatch.append(pad(ssMask, self._padHeight, self._padWidth)) 81 | 82 | self._index_in_epoch += 1 83 | 84 | if self._index_in_epoch == self._numData: 85 | self._index_in_epoch = 0 86 | if self._shuffle: 87 | self.shuffle() 88 | 89 | imageBatch = np.array(imageBatch) 90 | gtBatch = np.array(gtBatch) 91 | ssBinaryBatch = np.array(ssBinaryBatch) 92 | ssMaskBatch = np.array(ssMaskBatch) 93 | weightBatch = np.array(weightBatch) 94 | 95 | if self._flip and np.random.uniform() > 0.5: 96 | for i in range(len(imageBatch)): 97 | for j in range(3): 98 | imageBatch[i,:,:,j] = np.fliplr(imageBatch[i,:,:,j]) 99 | 100 | ssBinaryBatch[i] = np.fliplr(ssBinaryBatch[i]) 101 | ssMaskBatch[i] = np.fliplr(ssMaskBatch[i]) 102 | gtBatch[i] = np.fliplr(gtBatch[i]) 103 | weightBatch[i] = np.fliplr(weightBatch[i]) 104 | 105 | return imageBatch, gtBatch, weightBatch, ssBinaryBatch, ssMaskBatch, idBatch 106 | else: 107 | for example in self._paths[self._index_in_epoch:min(self._index_in_epoch+self._batchSize, self._numData)]: 108 | image = skimage.io.imread(example[1]) 109 | image = scipy.misc.imresize(image,50) 110 | image = pad(image_scaling(image), self._padHeight, self._padWidth).astype(float) 111 | 112 | imageBatch.append(image) 113 | 114 | idBatch.append(example[0]) 115 | ssImage = skimage.io.imread(example[2]) 116 | 117 | ssImage = scipy.misc.imresize(ssImage, 50, interp="nearest") 118 | 119 | ssBinary, ssMask = ssProcess(ssImage) 120 | 121 | ssMaskBatch.append(pad(ssMask, self._padHeight, self._padWidth)) 122 | ssBinaryBatch.append(pad(ssBinary, self._padHeight, self._padWidth)) 123 | 124 | imageBatch = np.array(imageBatch) 125 | ssBinaryBatch = np.array(ssBinaryBatch) 126 | ssMaskBatch = np.array(ssMaskBatch) 127 | 128 | self._index_in_epoch += self._batchSize 129 | 130 | return imageBatch, ssBinaryBatch, ssMaskBatch, idBatch 131 | 132 | def total_samples(self): 133 | return self._numData 134 | 135 | def read_ids(path): 136 | # return ['munster/munster_000071_000019'] 137 | return [line.rstrip('\n') for line in open(path)] 138 | 139 | def image_scaling(rgb_in): 140 | if rgb_in.dtype == np.float32: 141 | rgb_in = rgb_in*255 142 | elif rgb_in.dtype == np.uint8: 143 | rgb_in = rgb_in.astype(np.float32) 144 | 145 | # VGG16 was trained using opencv which reads images as BGR, but skimage reads images as RGB 146 | rgb_out = np.zeros(rgb_in.shape).astype(np.float32) 147 | rgb_out[:,:,0] = rgb_in[:,:,2] - VGG_MEAN[2] 148 | rgb_out[:,:,1] = rgb_in[:,:,1] - VGG_MEAN[1] 149 | rgb_out[:,:,2] = rgb_in[:,:,0] - VGG_MEAN[0] 150 | 151 | return rgb_out 152 | 153 | def pad(data, padHeight=None, padWidth=None): 154 | if padHeight and padWidth: 155 | if data.ndim == 3: 156 | npad = ((0,padHeight-data.shape[0]),(0,padWidth-data.shape[1]),(0,0)) 157 | elif data.ndim == 2: 158 | npad = ((0, padHeight - data.shape[0]), (0, padWidth - data.shape[1])) 159 | padData = np.pad(data, npad, mode='constant', constant_values=0) 160 | 161 | else: 162 | padData = data 163 | 164 | return padData 165 | 166 | def ssProcess(ssImage): 167 | ssMask = np.zeros(shape=ssImage.shape, dtype=np.float32) 168 | ssImageInt = ssImage 169 | 170 | if ssImageInt.dtype == np.float32: 171 | ssImageInt = (ssImageInt*255).astype(np.uint8) 172 | 173 | # order: Person, Rider, Motorcycle, Bicycle, Car, Truck, Bus, Train 174 | 175 | ssMask += (ssImageInt==CLASS_TO_SS['person']).astype(np.float32)*1 176 | ssMask += (ssImageInt==CLASS_TO_SS['rider']).astype(np.float32)*2 177 | ssMask += (ssImageInt==CLASS_TO_SS['motorcycle']).astype(np.float32)*3 178 | ssMask += (ssImageInt==CLASS_TO_SS['bicycle']).astype(np.float32)*4 179 | ssMask += (ssImageInt==CLASS_TO_SS['car']).astype(np.float32)*6 180 | ssMask += (ssImageInt==CLASS_TO_SS['truck']).astype(np.float32)*7 181 | ssMask += (ssImageInt==CLASS_TO_SS['bus']).astype(np.float32)*8 182 | ssMask += (ssImageInt==CLASS_TO_SS['train']).astype(np.float32)*9 183 | 184 | ssBinary = (ssMask != 0).astype(np.float32) 185 | 186 | ssMask[ssMask == 0] = 1 # temp fix 187 | 188 | ssMask = (ssMask - 5) * 32 189 | 190 | return ssBinary, ssMask 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /E2E/loss_function.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def depthCELoss2(pred, gt, weight, ss, outputChannels=16): 5 | with tf.name_scope("depth_CE_loss"): 6 | pred = tf.reshape(pred, (-1, outputChannels)) 7 | epsilon = tf.constant(value=1e-25) 8 | predSoftmax = tf.to_float(tf.nn.softmax(pred)) 9 | 10 | gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32) 11 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 12 | weight = tf.to_float(tf.reshape(weight, (-1, 1))) 13 | 14 | crossEntropyScaling = tf.to_float([3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) 15 | crossEntropy = -tf.reduce_sum(((1-gt)*tf.log(tf.maximum(1-predSoftmax, epsilon)) 16 | + gt*tf.log(tf.maximum(predSoftmax, epsilon)))*ss*crossEntropyScaling*weight, 17 | reduction_indices=[1]) 18 | 19 | crossEntropySum = tf.reduce_sum(crossEntropy, name="cross_entropy_sum") 20 | 21 | return crossEntropySum 22 | 23 | def depthCELoss(pred, gt, ss, outputChannels=16): 24 | with tf.name_scope("depth_CE_loss"): 25 | pred = tf.reshape(pred, (-1, outputChannels)) 26 | epsilon = tf.constant(value=1e-25) 27 | #pred = pred + epsilon 28 | predSoftmax = tf.to_float(tf.nn.softmax(pred)) 29 | predSoftmax = predSoftmax + epsilon 30 | 31 | gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32) 32 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 33 | 34 | crossEntropy = -tf.reduce_sum(gt * tf.log(predSoftmax) * ss, reduction_indices=[1]) 35 | 36 | crossEntropySum = tf.reduce_sum(crossEntropy, name="cross_entropy_sum") 37 | return crossEntropySum 38 | 39 | def modelTotalLoss(pred, gt, weight, ss, outputChannels=1): 40 | lossDepthTotal = depthCELoss2(pred=pred, gt=gt, weight=weight, ss=ss, 41 | outputChannels=outputChannels) / (countTotalWeighted(ss, weight)+1) 42 | 43 | tf.add_to_collection('losses', lossDepthTotal) 44 | 45 | totalLoss = tf.add_n(tf.get_collection('losses'), name='total_loss') 46 | 47 | return totalLoss 48 | 49 | def countCorrect(pred, gt, ss, k, outputChannels): 50 | with tf.name_scope("correct"): 51 | pred = tf.argmax(tf.reshape(pred, (-1, outputChannels)), 1) 52 | gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32) 53 | 54 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 55 | 56 | correct = tf.reduce_sum(tf.mul(tf.reshape(tf.to_float(tf.nn.in_top_k(gt, pred, k)), (-1, 1)), ss), reduction_indices=[0]) 57 | return correct 58 | 59 | def countTotal(ss): 60 | with tf.name_scope("total"): 61 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 62 | total = tf.reduce_sum(ss) 63 | 64 | return total 65 | 66 | def countTotalWeighted(ss, weight): 67 | with tf.name_scope("total"): 68 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 69 | weight = tf.to_float(tf.reshape(weight, (-1, 1))) 70 | total = tf.reduce_sum(ss * weight) 71 | 72 | return total 73 | -------------------------------------------------------------------------------- /E2E/main.py: -------------------------------------------------------------------------------- 1 | from network_init import get_model 2 | from io_utils import * 3 | import tensorflow as tf 4 | from forward import forward_model 5 | from train import train_model 6 | 7 | tf.set_random_seed(0) 8 | 9 | if __name__ == "__main__": 10 | outputChannels = 16 11 | savePrefix = "" 12 | outputPrefix = "" 13 | # 0=car, 1=person, 2=rider, 3=motorcycle, 4=bicycle, 5=truck, 6=bus, 7=train 14 | train = False 15 | 16 | if train: 17 | batchSize = 3 18 | learningRate = 5e-6 # usually i use 5e-6 19 | wd = 1e-6 20 | 21 | modelWeightPaths = [""] 22 | 23 | initialIteration = 1 24 | 25 | trainFeeder = Batch_Feeder(dataset="cityscapes", 26 | train=train, 27 | batchSize=batchSize, 28 | flip=True, keepEmpty=False, shuffle=True) 29 | 30 | trainFeeder.set_paths(idList=read_ids(''), 31 | imageDir="n", 32 | gtDir="", 33 | ssDir="") 34 | 35 | valFeeder = Batch_Feeder(dataset="cityscapes", 36 | train=train, 37 | batchSize=batchSize, shuffle=False) 38 | 39 | valFeeder.set_paths(idList=read_ids(''), 40 | imageDir="", 41 | gtDir="", 42 | ssDir="") 43 | 44 | model = get_model(wd=wd, modelWeightPaths=modelWeightPaths) 45 | 46 | train_model(model=model, outputChannels=outputChannels, 47 | learningRate=learningRate, 48 | trainFeeder=trainFeeder, 49 | valFeeder=valFeeder, 50 | modelSavePath="", 51 | savePrefix=savePrefix, 52 | initialIteration=initialIteration) 53 | 54 | else: 55 | batchSize = 1 56 | modelWeightPaths = ["../model/dwt_cityscapes_pspnet.mat"] 57 | 58 | model = get_model(modelWeightPaths=modelWeightPaths) 59 | 60 | feeder = Batch_Feeder(dataset="cityscapes", 61 | train=train, 62 | batchSize=batchSize) 63 | 64 | feeder.set_paths(idList=read_ids('../example/sample_list.txt'), 65 | imageDir="../example/inputImages", 66 | ssDir="../example/PSPNet") 67 | 68 | forward_model(model, feeder=feeder, 69 | outputSavePath="../example/output") 70 | -------------------------------------------------------------------------------- /E2E/model_io.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import scipy.io as sio 3 | import os 4 | from re import split 5 | 6 | def modelSaver(sess, modelSavePath, savePrefix, iteration, maxToKeep=5): 7 | allWeights = {} 8 | 9 | for name in [n.name for n in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]: 10 | param = sess.run(name) 11 | nameParts = split('[:/]', name) 12 | saveName = nameParts[-4]+'/'+nameParts[-3]+'/'+nameParts[-2] 13 | allWeights[saveName] = param 14 | 15 | savePath = os.path.join(modelSavePath, savePrefix+'_%03d'%iteration) 16 | sio.savemat(savePath, allWeights) 17 | print "saving model to %s" % savePath 18 | 19 | def checkSaveFlag(modelSavePath): 20 | flagPath = os.path.join(modelSavePath, 'saveme.flag') 21 | 22 | if os.path.exists(flagPath): 23 | return True 24 | else: 25 | return False -------------------------------------------------------------------------------- /E2E/network_init.py: -------------------------------------------------------------------------------- 1 | import e2e_model 2 | def get_model(wd=None, modelWeightPaths=None): 3 | params = { 4 | "direction/conv1_1": {"name": "direction/conv1_1", "shape": [3, 3, 4, 64], "std": None, "act": "relu"}, 5 | "direction/conv1_2": {"name": "direction/conv1_2", "shape": [3, 3, 64, 64], "std": None, "act": "relu"}, 6 | "direction/conv2_1": {"name": "direction/conv2_1", "shape": [3, 3, 64, 128], "std": None, "act": "relu"}, 7 | "direction/conv2_2": {"name": "direction/conv2_2", "shape": [3, 3, 128, 128], "std": None, "act": "relu"}, 8 | "direction/conv3_1": {"name": "direction/conv3_1", "shape": [3, 3, 128, 256], "std": None, "act": "relu"}, 9 | "direction/conv3_2": {"name": "direction/conv3_2", "shape": [3, 3, 256, 256], "std": None, "act": "relu"}, 10 | "direction/conv3_3": {"name": "direction/conv3_3", "shape": [3, 3, 256, 256], "std": None, "act": "relu"}, 11 | "direction/conv4_1": {"name": "direction/conv4_1", "shape": [3, 3, 256, 512], "std": None, "act": "relu"}, 12 | "direction/conv4_2": {"name": "direction/conv4_2", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 13 | "direction/conv4_3": {"name": "direction/conv4_3", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 14 | "direction/conv5_1": {"name": "direction/conv5_1", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 15 | "direction/conv5_2": {"name": "direction/conv5_2", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 16 | "direction/conv5_3": {"name": "direction/conv5_3", "shape": [3, 3, 512, 512], "std": None, "act": "relu"}, 17 | "direction/fcn5_1": {"name": "direction/fcn5_1", "shape": [5, 5, 512, 512], "std": None, "act": "relu"}, 18 | "direction/fcn5_2": {"name": "direction/fcn5_2", "shape": [1, 1, 512, 512], "std": None, "act": "relu"}, 19 | "direction/fcn5_3": {"name": "direction/fcn5_3", "shape": [1, 1, 512, 256], "std": None, "act": "relu"}, 20 | "direction/upscore5_3": {"name": "direction/upscore5_3", "ksize": 8, "stride": 4, "outputChannels": 256}, 21 | "direction/fcn4_1": {"name": "direction/fcn4_1", "shape": [5, 5, 512, 512], "std": None, "act": "relu"}, 22 | "direction/fcn4_2": {"name": "direction/fcn4_2", "shape": [1, 1, 512, 512], "std": None, "act": "relu"}, 23 | "direction/fcn4_3": {"name": "direction/fcn4_3", "shape": [1, 1, 512, 256], "std": None, "act": "relu"}, 24 | "direction/upscore4_3": {"name": "direction/upscore4_3", "ksize": 4, "stride": 2, "outputChannels": 256}, 25 | "direction/fcn3_1": {"name": "direction/fcn3_1", "shape": [5, 5, 256, 256], "std": None, "act": "relu"}, 26 | "direction/fcn3_2": {"name": "direction/fcn3_2", "shape": [1, 1, 256, 256], "std": None, "act": "relu"}, 27 | "direction/fcn3_3": {"name": "direction/fcn3_3", "shape": [1, 1, 256, 256], "std": None, "act": "relu"}, 28 | "direction/fuse3_1": {"name": "direction/fuse_1", "shape": [1,1,256*3,512], "std": None, "act": "relu"}, 29 | "direction/fuse3_2": {"name": "direction/fuse_2", "shape": [1, 1, 512, 512], "std": None, "act": "relu"}, 30 | "direction/fuse3_3": {"name": "direction/fuse_3", "shape": [1, 1, 512, 2], "std": None, "act": "lin"}, 31 | "direction/upscore3_1": {"name": "direction/upscore3_1", "ksize": 8, "stride": 4, "outputChannels": 2}, 32 | 33 | "depth/conv1_1": {"name": "depth/conv1_1", "shape": [5,5,2,64], "std": None, "act": "relu"}, 34 | "depth/conv1_2": {"name": "depth/conv1_2", "shape": [5,5,64,128], "std": None, "act": "relu"}, 35 | "depth/conv2_1": {"name": "depth/conv2_1", "shape": [5,5,128,128], "std": None, "act": "relu"}, 36 | "depth/conv2_2": {"name": "depth/conv2_2", "shape": [5,5,128,128], "std": None, "act": "relu"}, 37 | "depth/conv2_3": {"name": "depth/conv2_3", "shape": [5,5,128,128], "std": None, "act": "relu"}, 38 | "depth/conv2_4": {"name": "depth/conv2_4", "shape": [5,5,128,128], "std": None, "act": "relu"}, 39 | "depth/fcn1": {"name": "depth/fcn1", "shape": [1,1,128,128], "std": None, "act": "relu"}, 40 | "depth/fcn2": {"name": "depth/fcn2", "shape": [1,1,128,16], "std": None, "act": "relu"}, 41 | "depth/upscore": {"name": "depth/upscore", "ksize": 8, "stride": 4, "outputChannels": 16}, 42 | } 43 | 44 | return e2e_model.Network(params, wd=wd, modelWeightPaths=modelWeightPaths) 45 | -------------------------------------------------------------------------------- /E2E/post_process.py: -------------------------------------------------------------------------------- 1 | import scipy.ndimage.interpolation 2 | import scipy.misc 3 | import skimage.morphology 4 | import numpy as np 5 | 6 | #CS codes: 24: person, 25: rider, 32: motorcycle, 33: bicycle, 26: car, 27: truck, 28: bus, 31: train 7 | #PSP SS codes: 8 | # CLASS_TO_SS = {"person":12, "rider":13, "motorcycle":18, 9 | # "bicycle":19, "car":13, "truck":15, "bus":16, "train":17} 10 | CLASS_TO_SS = {"person":-128, "rider":-96, "motorcycle":-64, 11 | "bicycle":-32, "car":32, "truck":64, "bus":96, "train":128} 12 | CLASS_TO_CITYSCAPES = {"person":24, "rider":25, "motorcycle":32, 13 | "bicycle":33, "car":26, "truck":27, "bus":28, "train":31} 14 | THRESHOLD = {"person":1, "rider":1, "motorcycle":1, "bicycle":1, 15 | "car":2, "truck":2, "bus":2, "train":2} 16 | MIN_SIZE = {"person":20, "rider":20, "motorcycle":20, "bicycle":20, 17 | "car":25, "truck":45, "bus":45, "train":45} 18 | SELEM = {1: (np.ones((3,3))).astype(np.bool), 19 | 2: (np.ones((5,5))).astype(np.bool)} 20 | 21 | def watershed_cut(depthImage, ssMask): 22 | ssMask = ssMask.astype(np.int32) 23 | resultImage = np.zeros(shape=ssMask.shape, dtype=np.float32) 24 | 25 | for semClass in CLASS_TO_CITYSCAPES.keys(): 26 | csCode = CLASS_TO_CITYSCAPES[semClass] 27 | ssCode = CLASS_TO_SS[semClass] 28 | ssMaskClass = (ssMask == ssCode) 29 | 30 | ccImage = (depthImage > THRESHOLD[semClass]) * ssMaskClass 31 | ccImage = skimage.morphology.remove_small_objects(ccImage, min_size=MIN_SIZE[semClass]) 32 | ccImage = skimage.morphology.remove_small_holes(ccImage) 33 | ccLabels = skimage.morphology.label(ccImage) 34 | 35 | ccIDs = np.unique(ccLabels)[1:] 36 | for ccID in ccIDs: 37 | ccIDMask = (ccLabels == ccID) 38 | ccIDMask = skimage.morphology.binary_dilation(ccIDMask, SELEM[THRESHOLD[semClass]]) 39 | instanceID = 1000 * csCode + ccID 40 | resultImage[ccIDMask] = instanceID 41 | 42 | resultImage = resultImage.astype(np.uint16) 43 | return resultImage 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /E2E/train.py: -------------------------------------------------------------------------------- 1 | from model_io import * 2 | import sys 3 | from loss_function import * 4 | import math 5 | import time 6 | 7 | def train_model(model, outputChannels, learningRate, trainFeeder, valFeeder, 8 | modelSavePath=None, savePrefix=None, initialIteration=1, batchSize=1): 9 | with tf.Session() as sess: 10 | tfBatchImages = tf.placeholder("float") 11 | tfBatchGT = tf.placeholder("float") 12 | tfBatchWeight = tf.placeholder("float") 13 | tfBatchSS = tf.placeholder("float") 14 | tfBatchSSMask = tf.placeholder("float") 15 | keepProb = tf.placeholder("float") 16 | 17 | with tf.name_scope("model_builder"): 18 | print "attempting to build model" 19 | model.build(tfBatchImages, tfBatchSS, tfBatchSSMask, keepProb=keepProb) 20 | print "built the model" 21 | sys.stdout.flush() 22 | 23 | loss = modelTotalLoss(pred=model.outputData, gt=tfBatchGT, weight=tfBatchWeight, ss=tfBatchSS, outputChannels=outputChannels) 24 | numPredictedWeighted = countTotalWeighted(ss=tfBatchSS, weight=tfBatchWeight) 25 | numPredicted = countTotal(ss=tfBatchSS) 26 | numCorrect = countCorrect(pred=model.outputData, gt=tfBatchGT, ss=tfBatchSS, k=1, outputChannels=outputChannels) 27 | 28 | print "setting adam optimizer" 29 | sys.stdout.flush() 30 | 31 | train_op = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(loss=loss) 32 | 33 | init = tf.initialize_all_variables() 34 | print "attempting to run init" 35 | sys.stdout.flush() 36 | 37 | sess.run(init) 38 | print "completed init" 39 | sys.stdout.flush() 40 | 41 | iteration = initialIteration 42 | 43 | while iteration < 1000: 44 | batchLosses = [] 45 | totalPredictedWeighted = 0 46 | totalPredicted = 0 47 | totalCorrect = 0 48 | 49 | for k in range(int(math.floor(valFeeder.total_samples() / batchSize))): 50 | imageBatch, gtBatch, weightBatch, ssBatch, ssMaskBatch, _ = valFeeder.next_batch() 51 | 52 | batchLoss, batchPredicted, batchPredictedWeighted, batchCorrect = sess.run( 53 | [loss, numPredicted, numPredictedWeighted, numCorrect], 54 | feed_dict={tfBatchImages: imageBatch, 55 | tfBatchGT: gtBatch, 56 | tfBatchWeight: weightBatch, 57 | tfBatchSS: ssBatch, 58 | tfBatchSSMask: ssMaskBatch, 59 | keepProb: 1.0}) 60 | 61 | batchLosses.append(batchLoss) 62 | totalPredicted += batchPredicted 63 | totalPredictedWeighted += batchPredictedWeighted 64 | totalCorrect += batchCorrect 65 | 66 | if np.isnan(np.mean(batchLosses)): 67 | print "LOSS RETURNED NaN" 68 | sys.stdout.flush() 69 | return 1 70 | 71 | print "%s Itr: %d - val loss: %.6f, correct: %.6f" % (time.strftime("%H:%M:%S"), 72 | iteration, float(np.mean(batchLosses)), totalCorrect / totalPredicted) 73 | sys.stdout.flush() 74 | 75 | if (iteration > 0 and iteration % 5 == 0) or checkSaveFlag(modelSavePath): 76 | modelSaver(sess, modelSavePath, savePrefix, iteration) 77 | 78 | #for j in range(10): 79 | for j in range(int(math.floor(trainFeeder.total_samples() / batchSize))): 80 | # print "attempting to run train batch" 81 | # sys.stdout.flush() 82 | 83 | imageBatch, gtBatch, weightBatch, ssBatch, ssMaskBatch, _ = trainFeeder.next_batch() 84 | sess.run(train_op, feed_dict={tfBatchImages: imageBatch, 85 | tfBatchGT: gtBatch, 86 | tfBatchWeight: weightBatch, 87 | tfBatchSS: ssBatch, 88 | tfBatchSSMask: ssMaskBatch, 89 | keepProb: 0.7}) 90 | # print "ran one iteration" 91 | 92 | iteration += 1 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 min2209 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Performs instance level segmentation detailed in the following paper: 2 | 3 | Min Bai and Raquel Urtasun, Deep Watershed Transformation for Instance Segmentation, in CVPR 2017. Accessible at https://arxiv.org/abs/1611.08303. 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Deep Watershed Transform 2 | 3 | Performs instance level segmentation detailed in the following paper: 4 | 5 | Min Bai and Raquel Urtasun, Deep Watershed Transformation for Instance Segmentation, in CVPR 2017. Accessible at https://arxiv.org/abs/1611.08303. 6 | 7 | This page is still under construction. 8 | 9 | ## Dependencies 10 | 11 | Developed and tested on Ubuntu 14.04 and 16.04. 12 | 13 | 1) TensorFlow www.tensorflow.org 14 | 2) Numpy, Scipy, and Skimage (sudo apt-get install python-numpy python-scipy python-skimage) 15 | 16 | ## Inputs 17 | 18 | 1) Cityscapes images (www.cityscapes-dataset.com). 19 | 2) Semantic Segmentation for input images. In our case, we used the output from PSPNet (by H. Zhao et al. https://github.com/hszhao/PSPNet). These are uint8 images with pixel-wise semantic labels encoded with 'trainIDs' defined by Cityscapes. For more information, visit https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py 20 | 21 | ## Outputs 22 | 23 | The model produces pixel-wise instance labels as a uint16 image with the same formatting as the Cityscapes instance segmentation challenge ground truth. In particular, each pixel is labeled as 'id' * 1000 + instance_id, where 'id' is as defined by Cityscapes (for more information, consult labels.py in the above link), and instance_id is an integer indexing the object instance. 24 | 25 | ## Testing the Model 26 | 27 | 1) Clone repository into dwt/. 28 | 2) Download the model from www.cs.toronto.edu/~mbai/dwt_cityscapes_pspnet.mat and place into the "dwt/model" directory. 29 | 3) run "cd E2E" 30 | 4) run "python main.py" 31 | 5) The results will be available in "dwt/example/output". 32 | 33 | ## Training the Model 34 | 35 | 1) Will be available soon. 36 | -------------------------------------------------------------------------------- /WTN/depth_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import ceil 3 | import tensorflow as tf 4 | import math 5 | import scipy.io as sio 6 | 7 | VGG_MEAN = [103.939, 116.779, 123.68] 8 | 9 | class Network: 10 | def __init__(self, params, wd=5e-5, modelWeightPaths=None): 11 | self._params = params 12 | self._images = tf.placeholder("float") 13 | self._batch_images = tf.expand_dims(self._images, 0) 14 | self._gt = tf.placeholder("float") 15 | self._batch_gt = tf.expand_dims(self._gt, 0) 16 | self._wd = wd 17 | 18 | self.modelDict = {} 19 | 20 | if modelWeightPaths is not None: 21 | for path in modelWeightPaths: 22 | self.modelDict.update(sio.loadmat(path)) 23 | 24 | def build(self, inputData, ss, keepProb=1): 25 | self.conv1_1 = self._conv_layer(inputData, params=self._params["depth/conv1_1"]) 26 | self.conv1_2 = self._conv_layer(self.conv1_1, params=self._params["depth/conv1_2"]) 27 | self.pool1 = self._average_pool(self.conv1_2, 'depth/pool') 28 | 29 | self.conv2_1 = self._conv_layer(self.pool1, params=self._params["depth/conv2_1"]) 30 | self.conv2_2 = self._conv_layer(self.conv2_1, params=self._params["depth/conv2_2"]) 31 | self.conv2_3 = self._conv_layer(self.conv2_2, params=self._params["depth/conv2_3"]) 32 | self.conv2_4 = self._conv_layer(self.conv2_3, params=self._params["depth/conv2_4"]) 33 | self.pool2 = self._average_pool(self.conv2_4, 'depth/pool') 34 | 35 | self.fcn1 = self._conv_layer_dropout(self.pool2, params=self._params["depth/fcn1"], keepProb=keepProb) 36 | self.fcn2 = self._conv_layer_dropout(self.fcn1, params=self._params["depth/fcn2"], keepProb=keepProb) 37 | 38 | self.outputData = self._upscore_layer(self.fcn2, params=self._params["depth/upscore"], 39 | shape=tf.shape(inputData)) 40 | 41 | self.outputDataArgMax = tf.argmax(input=self.outputData, dimension=3) 42 | 43 | 44 | def _max_pool(self, bottom, name): 45 | return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 46 | padding='SAME', name=name) 47 | 48 | def _average_pool(self, bottom, name): 49 | return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 50 | padding='SAME', name=name) 51 | 52 | def _conv_layer(self, bottom, params): 53 | with tf.variable_scope(params["name"]) as scope: 54 | filt = self.get_conv_filter(params) 55 | conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') 56 | 57 | conv_biases = self.get_bias(params) 58 | 59 | if params["act"] == "relu": 60 | activation = tf.nn.relu(tf.nn.bias_add(conv, conv_biases)) 61 | elif params["act"] == "lin": 62 | activation = tf.nn.bias_add(conv, conv_biases) 63 | elif params["act"] == "tanh": 64 | activation = tf.nn.tanh(tf.nn.bias_add(conv, conv_biases)) 65 | 66 | return activation 67 | 68 | def _conv_layer_dropout(self, bottom, params, keepProb): 69 | with tf.variable_scope(params["name"]) as scope: 70 | filt = self.get_conv_filter(params) 71 | conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') 72 | 73 | conv_biases = self.get_bias(params) 74 | 75 | if params["act"] == "relu": 76 | activation = tf.nn.relu(tf.nn.bias_add(conv, conv_biases)) 77 | elif params["act"] == "lin": 78 | activation = tf.nn.bias_add(conv, conv_biases) 79 | elif params["act"] == "tanh": 80 | activation = tf.nn.tanh(tf.nn.bias_add(conv, conv_biases)) 81 | 82 | activation = tf.nn.dropout(activation, keepProb, seed=0) 83 | 84 | return activation 85 | 86 | # WEIGHTS GENERATION 87 | 88 | def get_bias(self, params): 89 | if params["name"]+"/biases" in self.modelDict: 90 | init = tf.constant_initializer(value=self.modelDict[params["name"]+"/biases"], dtype=tf.float32) 91 | print "loaded " + params["name"] + "/biases" 92 | else: 93 | init = tf.constant_initializer(value=0.0) 94 | print "generated " + params["name"] + "/biases" 95 | 96 | var = tf.get_variable(name="biases", initializer=init, shape=params["shape"][3]) 97 | 98 | return var 99 | 100 | def get_conv_filter(self, params): 101 | if params["name"]+"/weights" in self.modelDict: 102 | init = tf.constant_initializer(value=self.modelDict[params["name"]+"/weights"], dtype=tf.float32) 103 | var = tf.get_variable(name="weights", initializer=init, shape=params["shape"]) 104 | print "loaded " + params["name"]+"/weights" 105 | else: 106 | if params["std"]: 107 | stddev = params["std"] 108 | else: 109 | fanIn = params["shape"][0]*params["shape"][1]*params["shape"][2] 110 | stddev = (2/float(fanIn))**0.5 111 | 112 | init = tf.truncated_normal(shape=params["shape"], stddev=stddev, seed=0) 113 | var = tf.get_variable(name="weights", initializer=init) 114 | print "generated " + params["name"] + "/weights" 115 | 116 | if not tf.get_variable_scope().reuse: 117 | weightDecay = tf.mul(tf.nn.l2_loss(var), self._wd, 118 | name='weight_loss') 119 | tf.add_to_collection('losses', weightDecay) 120 | 121 | return var 122 | 123 | def _upscore_layer(self, bottom, shape, params): 124 | strides = [1, params["stride"], params["stride"], 1] 125 | with tf.variable_scope(params["name"]): 126 | in_features = bottom.get_shape()[3].value 127 | 128 | new_shape = [shape[0], shape[1], shape[2], params["outputChannels"]] 129 | output_shape = tf.pack(new_shape) 130 | 131 | f_shape = [params["ksize"], params["ksize"], params["outputChannels"], in_features] 132 | 133 | weights = self.get_deconv_filter(f_shape, params) 134 | deconv = tf.nn.conv2d_transpose(bottom, weights, output_shape, 135 | strides=strides, padding='SAME') 136 | return deconv 137 | 138 | def get_deconv_filter(self, f_shape, params): 139 | if params["name"]+"/up_filter" in self.modelDict: 140 | init = tf.constant_initializer(value=self.modelDict[params["name"]+"/up_filter"], dtype=tf.float32) 141 | print "loaded " + params["name"]+"/up_filter" 142 | else: 143 | width = f_shape[0] 144 | height = f_shape[0] 145 | f = ceil(width / 2.0) 146 | c = (2 * f - 1 - f % 2) / (2.0 * f) 147 | bilinear = np.zeros([f_shape[0], f_shape[1]]) 148 | for x in range(width): 149 | for y in range(height): 150 | value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) 151 | bilinear[x, y] = value 152 | weights = np.zeros(f_shape) 153 | for i in range(f_shape[2]): 154 | weights[:, :, i, i] = bilinear 155 | 156 | init = tf.constant_initializer(value=weights, 157 | dtype=tf.float32) 158 | print "generated " + params["name"] + "/up_filter" 159 | 160 | return tf.get_variable(name="up_filter", initializer=init, shape=f_shape) -------------------------------------------------------------------------------- /WTN/ioUtils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io as sio 3 | 4 | np.random.seed(0) 5 | 6 | VGG_MEAN = [103.939, 116.779, 123.68] 7 | 8 | 9 | def read_mat(path): 10 | return np.load(path) 11 | 12 | 13 | def write_mat(path, m): 14 | np.save(path, m) 15 | 16 | 17 | def read_ids(path): 18 | return [line.rstrip('\n') for line in open(path)] 19 | 20 | 21 | class Batch_Feeder: 22 | def __init__(self, dataset, indices, train, batchSize, padWidth, padHeight, flip=False, keepEmpty=False): 23 | self._epochs_completed = 0 24 | self._index_in_epoch = 0 25 | self._dataset = dataset 26 | self._indices = indices 27 | self._train = train 28 | self._batchSize = batchSize 29 | self._padWidth = padWidth 30 | self._padHeight = padHeight 31 | self._flip = flip 32 | self._keepEmpty = keepEmpty 33 | 34 | def set_paths(self, idList=None, gtDir=None, ssDir=None): 35 | self._paths = [] 36 | 37 | if self._train: 38 | for id in idList: 39 | if self._dataset == "kitti": 40 | self._paths.append([id, gtDir+'/'+id+'.mat', ssDir+'/'+id+'.mat']) 41 | elif self._dataset == "cityscapes" or self._dataset == "pascal": 42 | self._paths.append([id, 43 | gtDir + '/' + id + '_unified_GT.mat', 44 | ssDir + '/' + id + '_unified_ss.mat']) 45 | else: 46 | for id in idList: 47 | if self._dataset == "kitti": 48 | self._paths.append([id, ssDir+'/'+id+'.mat']) 49 | elif self._dataset == "cityscapes" or self._dataset == "pascal": 50 | self._paths.append([id, 51 | ssDir + '/' + id + '_unified_ss.mat']) 52 | 53 | self._numData = len(self._paths) 54 | 55 | assert self._batchSize < self._numData 56 | 57 | def shuffle(self): 58 | np.random.shuffle(self._paths) 59 | 60 | def next_batch(self): 61 | 62 | idBatch = [] 63 | dirBatch = [] 64 | gtBatch = [] 65 | ssBatch = [] 66 | weightBatch = [] 67 | 68 | if self._train: 69 | while (len(idBatch) < self._batchSize): 70 | ss = (sio.loadmat(self._paths[self._index_in_epoch][2])['mask']).astype(float) 71 | ss = np.sum(ss[:,:,self._indices], 2) 72 | 73 | if ss.sum() > 0 or self._keepEmpty: 74 | idBatch.append(self._paths[self._index_in_epoch][0]) 75 | 76 | dir = (sio.loadmat(self._paths[self._index_in_epoch][1])['dir_map']).astype(float) 77 | gt = (sio.loadmat(self._paths[self._index_in_epoch][1])['depth_map']).astype(float) 78 | weight = (sio.loadmat(self._paths[self._index_in_epoch][1])['weight_map']).astype(float) 79 | 80 | dirBatch.append(self.pad(dir)) 81 | gtBatch.append(self.pad(gt)) 82 | weightBatch.append(self.pad(weight)) 83 | ssBatch.append(ss) 84 | 85 | self._index_in_epoch += 1 86 | 87 | if self._index_in_epoch == self._numData: 88 | self._index_in_epoch = 0 89 | self.shuffle() 90 | 91 | dirBatch = np.array(dirBatch) 92 | gtBatch = np.array(gtBatch) 93 | ssBatch = np.array(ssBatch) 94 | weightBatch = np.array(weightBatch) 95 | 96 | if self._flip and np.random.uniform() > 0.5: 97 | for i in range(len(dirBatch)): 98 | for j in range(2): 99 | dirBatch[i,:,:,j] = np.fliplr(dirBatch[i,:,:,j]) 100 | dirBatch[i, :, :, 0] = -1 * dirBatch[i, :, :, 0] 101 | ssBatch[i] = np.fliplr(ssBatch[i]) 102 | gtBatch[i] = np.fliplr(gtBatch[i]) 103 | weightBatch[i] = np.fliplr(weightBatch[i]) 104 | return dirBatch, gtBatch, weightBatch, ssBatch, idBatch 105 | else: 106 | for example in self._paths[self._index_in_epoch:min(self._index_in_epoch + self._batchSize, self._numData)]: 107 | dirBatch.append(self.pad((sio.loadmat(example[1])['dir_map']).astype(float))) 108 | idBatch.append(example[0]) 109 | ss = (sio.loadmat(example[2])['mask']).astype(float) 110 | ss = np.sum(ss[:, :, self._indices], 2) 111 | ssBatch.append(self.pad(ss)) 112 | # imageBatch = np.array(imageBatch) 113 | dirBatch = np.array(dirBatch) 114 | ssBatch = np.array(ssBatch) 115 | # return imageBatch, dirBatch, ssBatch, idBatch 116 | self._index_in_epoch += self._batchSize 117 | return dirBatch, ssBatch, idBatch 118 | 119 | def total_samples(self): 120 | return self._numData 121 | 122 | def image_scaling(self, rgb_scaled): 123 | # if self._dataset == "cityscapes": 124 | # rgb_scaled = skimage.transform.pyramid_reduce(rgb_scaled, sigma=0.001) 125 | #rgb_scaled = skimage.transform.rescale(rgb_scaled, 0.5) 126 | 127 | rgb_scaled[:,:,0] = (rgb_scaled[:,:,0] - VGG_MEAN[0])/128 128 | rgb_scaled[:,:,1] = (rgb_scaled[:,:,1] - VGG_MEAN[1])/128 129 | rgb_scaled[:,:,2] = (rgb_scaled[:,:,2] - VGG_MEAN[2])/128 130 | 131 | return rgb_scaled 132 | # Convert RGB to BGR 133 | red, green, blue = tf.split(3, 3, rgb_scaled) 134 | # assert red.get_shape().as_list()[1:] == [224, 224, 1] 135 | # assert green.get_shape().as_list()[1:] == [224, 224, 1] 136 | # assert blue.get_shape().as_list()[1:] == [224, 224, 1] 137 | #bgr = tf.concat(3, [ 138 | # blue - VGG_MEAN[0], 139 | # green - VGG_MEAN[1], 140 | # red - VGG_MEAN[2], 141 | #]) 142 | # assert bgr.get_shape().as_list()[1:] == [224, 224, 3] 143 | 144 | def pad(self, data): 145 | if self._padHeight and self._padWidth: 146 | if data.ndim == 3: 147 | npad = ((0,self._padHeight-data.shape[0]),(0,self._padWidth-data.shape[1]),(0,0)) 148 | elif data.ndim == 2: 149 | npad = ((0, self._padHeight - data.shape[0]), (0, self._padWidth - data.shape[1])) 150 | padData = np.pad(data, npad, mode='constant', constant_values=0) 151 | 152 | else: 153 | padData = data 154 | 155 | return padData 156 | -------------------------------------------------------------------------------- /WTN/lossFunction.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def depthCELoss2(pred, gt, weight, ss, outputChannels=16): 4 | with tf.name_scope("depth_CE_loss"): 5 | pred = tf.reshape(pred, (-1, outputChannels)) 6 | epsilon = tf.constant(value=1e-25) 7 | predSoftmax = tf.to_float(tf.nn.softmax(pred)) 8 | 9 | gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32) 10 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 11 | weight = tf.to_float(tf.reshape(weight, (-1, 1))) 12 | 13 | crossEntropyScaling = tf.to_float([3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) 14 | 15 | crossEntropy = -tf.reduce_sum(((1-gt)*tf.log(tf.maximum(1-predSoftmax, epsilon)) 16 | + gt*tf.log(tf.maximum(predSoftmax, epsilon)))*ss*crossEntropyScaling*weight, 17 | reduction_indices=[1]) 18 | 19 | crossEntropySum = tf.reduce_sum(crossEntropy, name="cross_entropy_sum") 20 | return crossEntropySum 21 | 22 | def depthCELoss(pred, gt, ss, outputChannels=16): 23 | with tf.name_scope("depth_CE_loss"): 24 | pred = tf.reshape(pred, (-1, outputChannels)) 25 | epsilon = tf.constant(value=1e-25) 26 | #pred = pred + epsilon 27 | predSoftmax = tf.to_float(tf.nn.softmax(pred)) 28 | predSoftmax = predSoftmax + epsilon 29 | 30 | gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32) 31 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 32 | 33 | crossEntropy = -tf.reduce_sum(gt * tf.log(predSoftmax) * ss, reduction_indices=[1]) 34 | 35 | crossEntropySum = tf.reduce_sum(crossEntropy, name="cross_entropy_sum") 36 | return crossEntropySum 37 | 38 | def modelTotalLoss(pred, gt, weight, ss, outputChannels=1): 39 | lossDepthTotal = depthCELoss2(pred=pred, gt=gt, weight=weight, ss=ss, 40 | outputChannels=outputChannels) / (countTotalWeighted(ss, weight) + 1) 41 | 42 | tf.add_to_collection('losses', lossDepthTotal) 43 | 44 | totalLoss = tf.add_n(tf.get_collection('losses'), name='total_loss') 45 | 46 | return totalLoss 47 | 48 | def countTotal(ss): 49 | with tf.name_scope("total"): 50 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 51 | total = tf.reduce_sum(ss) 52 | 53 | return total 54 | 55 | def countCorrect(pred, gt, ss, k, outputChannels): 56 | with tf.name_scope("correct"): 57 | pred = tf.argmax(tf.reshape(pred, (-1, outputChannels)), 1) 58 | gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32) 59 | 60 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 61 | 62 | correct = tf.reduce_sum(tf.mul(tf.reshape(tf.to_float(tf.nn.in_top_k(gt, pred, k)), (-1, 1)), ss), reduction_indices=[0]) 63 | return correct 64 | 65 | def countTotalWeighted(ss, weight): 66 | with tf.name_scope("total"): 67 | ss = tf.to_float(tf.reshape(ss, (-1, 1))) 68 | weight = tf.to_float(tf.reshape(weight, (-1, 1))) 69 | total = tf.reduce_sum(ss * weight) 70 | 71 | return total -------------------------------------------------------------------------------- /WTN/train_depth.py: -------------------------------------------------------------------------------- 1 | import depth_model 2 | from ioUtils import * 3 | import math 4 | import lossFunction 5 | import tensorflow as tf 6 | import numpy as np 7 | import sys 8 | import os 9 | import scipy.io as sio 10 | import re 11 | import time 12 | 13 | VGG_MEAN = [103.939, 116.779, 123.68] 14 | 15 | tf.set_random_seed(0) 16 | 17 | def initialize_model(outputChannels, wd=None, modelWeightPaths=None): 18 | params = {"depth/conv1_1": {"name": "depth/conv1_1", "shape": [5,5,2,64], "std": None, "act": "relu", "reuse": False}, 19 | "depth/conv1_2": {"name": "depth/conv1_2", "shape": [5,5,64,128], "std": None, "act": "relu", "reuse": False}, 20 | "depth/conv2_1": {"name": "depth/conv2_1", "shape": [5,5,128,128], "std": None, "act": "relu", "reuse": False}, 21 | "depth/conv2_2": {"name": "depth/conv2_2", "shape": [5,5,128,128], "std": None, "act": "relu", "reuse": False}, 22 | "depth/conv2_3": {"name": "depth/conv2_3", "shape": [5,5,128,128], "std": None, "act": "relu", "reuse": False}, 23 | "depth/conv2_4": {"name": "depth/conv2_4", "shape": [5,5,128,128], "std": None, "act": "relu", "reuse": False}, 24 | "depth/fcn1": {"name": "depth/fcn1", "shape": [1,1,128,128], "std": None, "act": "relu", "reuse": False}, 25 | "depth/fcn2": {"name": "depth/fcn2", "shape": [1,1,128,outputChannels], "std": None, "act": "relu", "reuse": False}, 26 | "depth/upscore": {"name": "depth/upscore", "ksize": 8, "stride": 4, "outputChannels": outputChannels}, 27 | } 28 | 29 | return depth_model.Network(params, wd=wd, modelWeightPaths=modelWeightPaths) 30 | 31 | def forward_model(model, feeder, outputSavePath): 32 | with tf.Session() as sess: 33 | tfBatchDirs = tf.placeholder("float") 34 | tfBatchSS = tf.placeholder("float") 35 | keepProb = tf.placeholder("float") 36 | 37 | with tf.name_scope("model_builder"): 38 | print "attempting to build model" 39 | model.build(tfBatchDirs, tfBatchSS, keepProb=keepProb) 40 | print "built the model" 41 | 42 | init = tf.initialize_all_variables() 43 | 44 | sess.run(init) 45 | 46 | if not os.path.exists(outputSavePath): 47 | os.makedirs(outputSavePath) 48 | 49 | for i in range(int(math.floor(feeder.total_samples() / batchSize))): 50 | dirBatch, ssBatch, idBatch = feeder.next_batch() 51 | 52 | outputBatch = sess.run(model.outputDataArgMax, feed_dict={tfBatchDirs: dirBatch, 53 | tfBatchSS: ssBatch, 54 | keepProb: 1.0}) 55 | outputBatch = outputBatch.astype(np.uint8) 56 | 57 | for j in range(len(idBatch)): 58 | outputFilePath = os.path.join(outputSavePath, idBatch[j]+'.mat') 59 | outputFileDir = os.path.dirname(outputFilePath) 60 | # print outputFileDir 61 | # print outputFilePath 62 | # raw_input("pause") 63 | 64 | if not os.path.exists(outputFileDir): 65 | os.makedirs(outputFileDir) 66 | 67 | sio.savemat(outputFilePath, {"depth_map": outputBatch[j]}) 68 | 69 | print "processed image %d out of %d"%(j+batchSize*i, feeder.total_samples()) 70 | 71 | def train_model(model, outputChannels, learningRate, trainFeeder, valFeeder, modelSavePath=None, savePrefix=None, initialIteration=1): 72 | with tf.Session() as sess: 73 | tfBatchDirs = tf.placeholder("float") 74 | tfBatchGT = tf.placeholder("float") 75 | tfBatchWeight = tf.placeholder("float") 76 | tfBatchSS = tf.placeholder("float") 77 | keepProb = tf.placeholder("float") 78 | 79 | with tf.name_scope("model_builder"): 80 | print "attempting to build model" 81 | model.build(tfBatchDirs, tfBatchSS, keepProb=keepProb) 82 | print "built the model" 83 | sys.stdout.flush() 84 | loss = lossFunction.modelTotalLoss(pred=model.outputData, gt=tfBatchGT, weight=tfBatchWeight, ss=tfBatchSS, outputChannels=outputChannels) 85 | numPredictedWeighted = lossFunction.countTotalWeighted(ss=tfBatchSS, weight=tfBatchWeight) 86 | numPredicted = lossFunction.countTotal(ss=tfBatchSS) 87 | numCorrect = lossFunction.countCorrect(pred=model.outputData, gt=tfBatchGT, ss=tfBatchSS, k=1, outputChannels=outputChannels) 88 | 89 | train_op = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(loss=loss) 90 | 91 | init = tf.initialize_all_variables() 92 | 93 | sess.run(init) 94 | iteration = initialIteration 95 | 96 | while iteration < 1000: 97 | batchLosses = [] 98 | totalPredicted = 0 99 | totalCorrect = 0 100 | totalPredictedWeighted = 0 101 | 102 | for k in range(int(math.floor(valFeeder.total_samples() / batchSize))): 103 | dirBatch, gtBatch, weightBatch, ssBatch, _ = valFeeder.next_batch() 104 | # batchLoss, batchDepthError, batchPredicted, batchExceed3, batchExceed5 = \ 105 | # sess.run([loss, depthError, numPredicted, exceed3, exceed5], 106 | # feed_dict={tfBatchDirs: dirBatch, 107 | # tfBatchGT: gtBatch, 108 | # tfBatchSS: ssBatch}) 109 | 110 | batchLoss, batchPredicted, batchPredictedWeighted, batchCorrect = sess.run([loss, numPredicted, numPredictedWeighted, numCorrect], 111 | feed_dict={tfBatchDirs: dirBatch, 112 | tfBatchGT: gtBatch, 113 | tfBatchWeight: weightBatch, 114 | tfBatchSS: ssBatch, 115 | keepProb: 1.0}) 116 | 117 | batchLosses.append(batchLoss) 118 | totalPredicted += batchPredicted 119 | totalPredictedWeighted += batchPredictedWeighted 120 | totalCorrect += batchCorrect 121 | 122 | if np.isnan(np.mean(batchLosses)): 123 | print "LOSS RETURNED NaN" 124 | sys.stdout.flush() 125 | return 1 126 | 127 | # print "Itr: %d - b %d - val loss: %.3f, depth MSE: %.3f, exceed3: %.3f, exceed5: %.3f"%(iteration,j, 128 | # float(np.mean(batchLosses)), totalDepthError/totalPredicted, 129 | # totalExceed3/totalPredicted, totalExceed5/totalPredicted) 130 | print "%s Itr: %d - val loss: %.6f, correct: %.6f" % (time.strftime("%H:%M:%S"), 131 | iteration, float(np.mean(batchLosses)), totalCorrect / totalPredicted) 132 | 133 | if (iteration > 0 and iteration % 5 == 0) or checkSaveFlag(modelSavePath): 134 | modelSaver(sess, modelSavePath, savePrefix, iteration) 135 | 136 | # print "Processed iteration %d, batch %d" % (i,j) 137 | # sys.stdout.flush() 138 | 139 | sys.stdout.flush() 140 | # raw_input("paused") 141 | #for j in range(10): 142 | for j in range(int(math.floor(trainFeeder.total_samples() / batchSize))): 143 | dirBatch, gtBatch, weightBatch, ssBatch, _ = trainFeeder.next_batch() 144 | sess.run(train_op, feed_dict={tfBatchDirs: dirBatch, 145 | tfBatchGT: gtBatch, 146 | tfBatchWeight: weightBatch, 147 | tfBatchSS: ssBatch, 148 | keepProb: 0.7}) 149 | 150 | iteration += 1 151 | 152 | 153 | def modelSaver(sess, modelSavePath, savePrefix, iteration, maxToKeep=5): 154 | allWeights = {} 155 | 156 | for name in [n.name for n in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]: 157 | param = sess.run(name) 158 | nameParts = re.split('[:/]', name) 159 | saveName = nameParts[-4]+'/'+nameParts[-3]+'/'+nameParts[-2] 160 | allWeights[saveName] = param 161 | 162 | # print "Name: %s Mean: %.3f Max: %.3f Min: %.3f std: %.3f" % (name, 163 | # param.mean(), 164 | # param.max(), 165 | # param.min(), 166 | # param.std()) 167 | # if name == "depth/fcn2/weights:0": 168 | # for j in range(outputChannels): 169 | # print "ch: %d, max %e, min %e, std %e" % ( 170 | # j, param[:, :, :, j].max(), param[:, :, :, j].min(), param[:, :, :, j].std()) 171 | 172 | # raw_input("done") 173 | 174 | sio.savemat(os.path.join(modelSavePath, savePrefix+'_%03d'%iteration), allWeights) 175 | 176 | 177 | def checkSaveFlag(modelSavePath): 178 | flagPath = os.path.join(modelSavePath, 'saveme.flag') 179 | 180 | if os.path.exists(flagPath): 181 | return True 182 | else: 183 | return False 184 | 185 | if __name__ == "__main__": 186 | outputChannels = 16 187 | classType = 'unified_CR' 188 | indices = [0,1,2,3,4,5,6,7] 189 | # 0=car, 1=person, 2=rider, 3=motorcycle, 4=bicycle, 5=truck, 6=bus, 7=train 190 | savePrefix = "depth_" + classType + "_CR_pretrain" 191 | 192 | train = True 193 | 194 | if train: 195 | batchSize = 6 196 | learningRate = 5e-4 197 | wd = 1e-6 198 | 199 | modelWeightPaths = [] 200 | initialIteration = 1 201 | 202 | model = initialize_model(outputChannels=outputChannels, wd=wd, modelWeightPaths=modelWeightPaths) 203 | trainFeeder = Batch_Feeder(dataset="cityscapes", 204 | indices=indices, 205 | train=train, 206 | batchSize=batchSize, 207 | padWidth=None, 208 | padHeight=None, flip=True, keepEmpty=False) 209 | trainFeeder.set_paths(idList=read_ids('./cityscapes/splits/trainlist.txt'), 210 | gtDir="./cityscapes/unified/iGTFine/train", 211 | ssDir="./cityscapes/unified/ssMaskFineGT/train") 212 | 213 | valFeeder = Batch_Feeder(dataset="cityscapes", indices=indices, train=train, batchSize=batchSize, padWidth=None, padHeight=None) 214 | valFeeder.set_paths(idList=read_ids('./cityscapes/splits/vallist.txt'), 215 | gtDir="./cityscapes/unified/iGTFine/val", 216 | ssDir="./cityscapes/unified/ssMaskFineGT/val") 217 | 218 | train_model(model=model, outputChannels=outputChannels, 219 | learningRate=learningRate, 220 | trainFeeder=trainFeeder, 221 | valFeeder=valFeeder, 222 | modelSavePath="./cityscapes/models/depth", 223 | savePrefix=savePrefix, 224 | initialIteration=initialIteration) 225 | 226 | else: 227 | batchSize = 5 228 | modelWeightPaths = [] 229 | model = initialize_model(outputChannels=outputChannels, wd=None, modelWeightPaths=modelWeightPaths) 230 | 231 | feeder = Batch_Feeder(dataset="cityscapes", train=train, indices=indices, batchSize=batchSize, padWidth=None, padHeight=None) 232 | feeder.set_paths(idList=read_ids('./cityscapes/splits/vallist.txt'), 233 | ssDir="./cityscapes/unified/ssMaskFineGT/val") 234 | 235 | forward_model(model, feeder=feeder, 236 | outputSavePath="" % ()) 237 | -------------------------------------------------------------------------------- /example/PSPNet/frankfurt_000000_002196.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/PSPNet/frankfurt_000000_002196.png -------------------------------------------------------------------------------- /example/PSPNet/frankfurt_000000_002963.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/PSPNet/frankfurt_000000_002963.png -------------------------------------------------------------------------------- /example/PSPNet/frankfurt_000001_031266.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/PSPNet/frankfurt_000001_031266.png -------------------------------------------------------------------------------- /example/PSPNet/munster_000067_000019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/PSPNet/munster_000067_000019.png -------------------------------------------------------------------------------- /example/inputImages/frankfurt_000000_002196_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/inputImages/frankfurt_000000_002196_leftImg8bit.png -------------------------------------------------------------------------------- /example/inputImages/frankfurt_000000_002963_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/inputImages/frankfurt_000000_002963_leftImg8bit.png -------------------------------------------------------------------------------- /example/inputImages/frankfurt_000001_031266_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/inputImages/frankfurt_000001_031266_leftImg8bit.png -------------------------------------------------------------------------------- /example/inputImages/munster_000067_000019_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/inputImages/munster_000067_000019_leftImg8bit.png -------------------------------------------------------------------------------- /example/output/frankfurt_000000_002196.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/frankfurt_000000_002196.mat -------------------------------------------------------------------------------- /example/output/frankfurt_000000_002196.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/frankfurt_000000_002196.png -------------------------------------------------------------------------------- /example/output/frankfurt_000000_002963.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/frankfurt_000000_002963.mat -------------------------------------------------------------------------------- /example/output/frankfurt_000000_002963.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/frankfurt_000000_002963.png -------------------------------------------------------------------------------- /example/output/frankfurt_000001_031266.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/frankfurt_000001_031266.mat -------------------------------------------------------------------------------- /example/output/frankfurt_000001_031266.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/frankfurt_000001_031266.png -------------------------------------------------------------------------------- /example/output/munster_000067_000019.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/munster_000067_000019.mat -------------------------------------------------------------------------------- /example/output/munster_000067_000019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/min2209/dwt/03fe206780ac6d72897ced75b38acb7c7635638c/example/output/munster_000067_000019.png -------------------------------------------------------------------------------- /example/sample_list.txt: -------------------------------------------------------------------------------- 1 | frankfurt_000000_002196 2 | munster_000067_000019 3 | frankfurt_000000_002963 4 | frankfurt_000001_031266 5 | -------------------------------------------------------------------------------- /matlab/batch_generate_cityscapes.m: -------------------------------------------------------------------------------- 1 | class = 'unified'; 2 | 3 | sets = {'val', 'train'}; 4 | 5 | for j = 1:2 6 | set = sets{j}; 7 | 8 | keys = [26000, 26999; 9 | 24000, 24999, 10 | 25000, 25999, 11 | 32000, 32999, 12 | 33000, 33999, 13 | 27000, 27999, 14 | 28000, 28999, 15 | 31000, 31999]; 16 | 17 | input_list_file = strcat('./cityscapes/splits/',set,'list.txt'); 18 | input_folder = strcat('./cityscapes/gtFine/', set); 19 | output_file_path = strcat('./cityscapes/unified/iGTFull/', set); 20 | 21 | boundaries = [0,1,2,3,4,5,7,9,12,15,19,24,30,37,45,54,Inf]; 22 | 23 | fid = fopen(input_list_file); 24 | input_file = fgetl(fid); 25 | processed = 0; 26 | 27 | while ischar(input_file) 28 | id = regexpi(input_file, '[a-z]+_\d\d\d\d\d\d_\d\d\d\d\d\d', 'match'); 29 | id = id{1}; 30 | city = regexpi(id, '^[a-z]+', 'match'); 31 | city = city{1}; 32 | output_file = fullfile(output_file_path, city, strcat(id, strcat('_',class,'_GT.mat'))); 33 | [output_file_dir, ~, ~] = fileparts(output_file); 34 | if ~exist(output_file_dir, 'dir') 35 | mkdir(output_file_dir); 36 | end 37 | 38 | generate_GT_cityscapes_unified(fullfile(input_folder, strcat(input_file, '_gtFine_instanceIds.png')), ... 39 | output_file, false, keys, 1, boundaries); 40 | 41 | input_file = fgetl(fid); 42 | processed = processed + 1; 43 | if mod(processed, 50) == 0 44 | disp(sprintf('Processed %d direction files', processed)); 45 | end 46 | end 47 | fclose(fid); 48 | 49 | end 50 | -------------------------------------------------------------------------------- /matlab/batch_generate_cityscapes_PSP_ss.m: -------------------------------------------------------------------------------- 1 | class = 'unified'; 2 | set = 'val'; 3 | 4 | keys = [13, 11, 12, 17, 18, 14, 15, 16]; 5 | 6 | input_list_file = strcat('./cityscapes/splits/', set,'list.txt'); 7 | output_dir = strcat('./cityscapes/unified/ssMaskFinePSP/', set); 8 | input_dir = './PSPNet/evaluation/mc_result/cityscapes/test/gray'; 9 | 10 | 11 | fid = fopen(input_list_file); 12 | input_file = fgetl(fid); 13 | processed = 0; 14 | while ischar(input_file) 15 | id = regexpi(input_file, '[a-z]+_\d\d\d\d\d\d_\d\d\d\d\d\d', 'match'); 16 | id = id{1}; 17 | city = regexpi(id, '^[a-z]+', 'match'); 18 | city = city{1}; 19 | output_file = fullfile(output_dir, city, strcat(id, '_', class, '_ss.mat')); 20 | %output_file = fullfile(output_dir, city, strcat(id, '_car_semantic_CF.png')); 21 | [output_file_dir, ~, ~] = fileparts(output_file); 22 | if ~exist(output_file_dir, 'dir') 23 | mkdir(output_file_dir); 24 | end 25 | 26 | raw_mask = imread(fullfile(input_dir, [id, '.png'])); 27 | 28 | [height, width] = size(raw_mask); 29 | 30 | mask = uint8(zeros(height/2, width/2, 8)); 31 | 32 | for i=1:length(keys) 33 | raw_mask_downsampled = uint8(raw_mask == keys(i)); 34 | raw_mask_downsampled = downsample(downsample(raw_mask_downsampled,2)', 2)'; 35 | mask(:,:,i) = uint8(raw_mask_downsampled); 36 | end 37 | 38 | save(output_file, 'mask'); 39 | 40 | input_file = fgetl(fid); 41 | processed = processed + 1; 42 | if mod(processed, 100) == 0 43 | disp(sprintf('Processed %d segmentation masks', processed)); 44 | end 45 | 46 | end 47 | fclose(fid); 48 | -------------------------------------------------------------------------------- /matlab/generate_GT_cityscapes_unified.m: -------------------------------------------------------------------------------- 1 | function generate_GT_cityscapes_unified(input_annotation, gt_output, figures, keys, downsampling, depth_bins) 2 | % generates 2 channel unitvec gt 3 | annotation = imread(input_annotation); 4 | 5 | if downsampling > 1 6 | annotation = downsample(downsample(annotation,downsampling)', downsampling)'; 7 | end 8 | 9 | height = size(annotation,1); 10 | width = size(annotation,2); 11 | depth_map = zeros(size(annotation)); 12 | dir_map = repmat(depth_map, 1, 1, 2); 13 | weight_map = depth_map; 14 | ss_map = depth_map; 15 | edge_map = depth_map; 16 | se = strel('disk', 1, 8); 17 | se3 = strel('disk', 3, 8); 18 | 19 | %annotation(annotation < classId | annotation > (classId + 1000)) = 0; 20 | annotation(~((annotation >= keys(1,1) & annotation <= keys(1,2)) | ... 21 | (annotation >= keys(2,1) & annotation <= keys(2,2)) | ... 22 | (annotation >= keys(3,1) & annotation <= keys(3,2)) | ... 23 | (annotation >= keys(4,1) & annotation <= keys(4,2)) | ... 24 | (annotation >= keys(5,1) & annotation <= keys(5,2)) | ... 25 | (annotation >= keys(6,1) & annotation <= keys(6,2)) | ... 26 | (annotation >= keys(7,1) & annotation <= keys(7,2)) | ... 27 | (annotation >= keys(8,1) & annotation <= keys(8,2)))) = 0; 28 | 29 | ss_map = annotation; 30 | ss_map(ss_map > 1) = 1; 31 | 32 | ids = unique(annotation); 33 | 34 | for i = 2:length(ids) 35 | annotation_i = annotation; 36 | annotation_i(annotation_i~=ids(i)) = 0; 37 | annotation_i(annotation_i>0) = 1; 38 | 39 | if sum(sum(annotation_i)) < 100 40 | continue; 41 | end 42 | 43 | depth_i = bwdist(1-annotation_i); 44 | depth_map = depth_map + depth_i; 45 | 46 | dir_i = zeros(size(dir_map)); 47 | 48 | [dir_i(:,:,1), dir_i(:,:,2)] = imgradientxy(depth_i); 49 | 50 | dir_i = dir_i / 8; 51 | 52 | dir_map = dir_map + dir_i; 53 | 54 | weight_map(annotation_i==1) = 200 / sqrt(sum(sum(annotation_i))); 55 | end 56 | 57 | edges = 1-double(edge(annotation)); 58 | dir_map = dir_map .* repmat(edges, 1, 1, 2); 59 | %depth_map = depth_map .* edges; 60 | 61 | for i=1:length(depth_bins)-1 62 | depth_map(depth_map > depth_bins(i) & depth_map <= depth_bins(i+1)) = i-1; 63 | end 64 | 65 | for i=1:size(keys,1) 66 | annotation_i = double(annotation); 67 | annotation_i(annotation_i < keys(i,1) | annotation_i > keys(i,2)) = 0; 68 | annotation_i = annotation_i - (keys(i,1)) + double(~annotation_i) * (keys(i,1)); 69 | edge_map_i = edge(annotation_i, 0.00001); 70 | 71 | annotation_i_inv = ~(imdilate(~annotation_i, se)); 72 | 73 | edge_map_i = edge_map_i .* annotation_i_inv; 74 | edge_map_i = imdilate(edge_map_i, se); 75 | edge_map = max(edge_map, edge_map_i); 76 | end 77 | 78 | dir_map = single(dir_map); 79 | depth_map = uint8(depth_map); 80 | weight_map = single(weight_map); 81 | ss_map = uint8(ss_map); 82 | edge_map = uint8(edge_map); 83 | 84 | save(gt_output, 'depth_map', 'dir_map', 'weight_map', 'edge_map'); 85 | if figures 86 | figure(1); 87 | imagesc(depth_map); 88 | figure(2); 89 | imagesc(dir_map(:,:,1)); 90 | figure(3); 91 | imagesc(dir_map(:,:,2)); 92 | figure(3); 93 | imagesc(weight_map); 94 | figure(4); 95 | imagesc(ss_map); 96 | end 97 | end 98 | 99 | --------------------------------------------------------------------------------