├── .gitignore_global ├── README.md ├── cpp ├── cpputils.cpp └── makefile ├── fr_occ.py ├── freiburg_dataloader.py ├── helpers ├── freiburg.py └── sintel.py ├── kitti2015_dataloader.py ├── kitti_dataloader.py ├── main.py ├── params ├── freiburg.json ├── kitti.json ├── kitti15.json ├── kitti15_test.json ├── kitti_test.json └── pretrain.json ├── pylibs ├── pfmutil.py └── tfutils.py └── tfmodel.py /.gitignore_global: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.so 3 | *.o 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RecResNet 2 | RecResNet: A Recurrent Residual CNN Architecture for Disparity Map Enhancement 3DV 2018. If you use this code please cite our paper [ RecResNet: A Recurrent Residual CNN Architecture for Disparity Map 3 | Enhancement ] (http://personal.stevens.edu/~kbatsos/RecResNet.pdf) 4 | 5 | ``` 6 | @inproceedings{batsos2018recresnet, 7 | title={RecResNet: A Recurrent Residual CNN Architecture for Disparity Map Enhancement}, 8 | author={Batsos, Konstantinos and Mordohai, Philipos}, 9 | booktitle={ In International Conference on 3D Vision (3DV) }, 10 | year={2018} 11 | } 12 | 13 | ``` 14 | #Python 15 | 16 | The code is using python 2.7 and tensorflow version 1.6.0 17 | 18 | # CPP 19 | 20 | The code includes two helper functions in c++. To compile the c++ code you will need boost python. You can safely omit these functions and replace them with your own. 21 | 22 | #Training 23 | 24 | The parameters for the datasets are provided in JSON format in the params folder. Please replace the paths of the data to reflect your filesystem. All other parameters can be found in main.py. To train the network simply issue: 25 | 26 | ``` 27 | python main.py --params ./params/(dataset).json 28 | 29 | ``` 30 | 31 | The code saves data and variables to visualize the training process in tensorboard. 32 | 33 | #Testing 34 | 35 | As provided the code can be used to test the whole dataset specified in the corresponding JSON file. To test a trained model simply issue: 36 | 37 | ``` 38 | python main.py --params ./params/(dataset).json --model (model path to load) --mode test 39 | 40 | ``` 41 | 42 | # Computing occlusiong masks for the synthetic dataset 43 | 44 | If you would like to compute the occlusion masks for the synthetic dataset you can run the fr_occ.py code. 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /cpp/cpputils.cpp: -------------------------------------------------------------------------------- 1 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "boost/python/extract.hpp" 9 | #include "boost/python/numeric.hpp" 10 | #include 11 | 12 | using namespace std; 13 | using namespace boost::python; 14 | 15 | typedef uint16_t uint16; 16 | 17 | 18 | PyObject* DDSupport(PyObject* dm){ 19 | PyArrayObject* dmA = reinterpret_cast(dm); 20 | 21 | float * dmp = reinterpret_cast(PyArray_DATA(dmA)); 22 | 23 | npy_intp *shape = PyArray_DIMS(dmA); 24 | 25 | 26 | PyObject* res = PyArray_SimpleNew(2,PyArray_DIMS(dmA), NPY_FLOAT); 27 | float* res_data = static_cast(PyArray_DATA(reinterpret_cast(res))); 28 | 29 | bool* ddmap = (bool *)calloc((int)shape[0]*shape[1],sizeof(bool)); 30 | 31 | for(int j=0; j 1 || 75 | fabs( dmp[(i+1)*shape[1]+j] - val ) > 1 || 76 | fabs( dmp[i*shape[1]+(j-1)] - val ) > 1 || 77 | fabs( dmp[i*shape[1]+(j+1)] - val ) > 1){ 78 | ddmap[i*shape[1]+j] = true; 79 | 80 | } 81 | 82 | } 83 | } 84 | 85 | } 86 | 87 | 88 | #pragma omp parallel 89 | { 90 | 91 | #pragma omp for 92 | for(int i=0; i=0;k--){ 103 | if(ddmap[i*shape[1]+k]){ 104 | leftdist=j-k; 105 | break; 106 | } 107 | 108 | } 109 | int rightdist=0; 110 | for(int k=j; k=0;k--){ 119 | if(ddmap[k*shape[1]+j]){ 120 | topdist=i-k; 121 | break; 122 | } 123 | } 124 | 125 | int bottomdist=0; 126 | for(int k=i; k max) 137 | max = rightdist; 138 | if(topdist > max) 139 | max = topdist; 140 | if(bottomdist > max) 141 | max = bottomdist; 142 | 143 | res_data[ i*shape[1]+j] = max; 144 | 145 | 146 | } 147 | } 148 | 149 | } 150 | 151 | delete [] ddmap; 152 | 153 | return res; 154 | 155 | } 156 | 157 | 158 | 159 | 160 | 161 | template 162 | inline T getDisp (T* data_,const int width,const int32_t u,const int32_t v) { 163 | return data_[v*width+u]; 164 | } 165 | 166 | template 167 | // is disparity valid 168 | inline bool isValid (T* data_,const int32_t u,const int32_t v,const int width) { 169 | return data_[v*width+u]>=0; 170 | } 171 | 172 | void write2png(PyObject *disp, const std::string & path ){ 173 | 174 | PyArrayObject* dispA = reinterpret_cast(disp); 175 | float * dispD = reinterpret_cast(PyArray_DATA(dispA)); 176 | npy_intp *shape = PyArray_DIMS(dispA); 177 | 178 | int height = shape[0]; int width = shape[1]; 179 | 180 | png::image< png::gray_pixel_16 > image(width,height); 181 | for (int32_t v=0; v(l_gt); 196 | PyArrayObject* r_gtA = reinterpret_cast(r_gt); 197 | 198 | //Get the pointer to the data 199 | float * lgtD = reinterpret_cast(PyArray_DATA(l_gtA)); 200 | float * rgtD = reinterpret_cast(PyArray_DATA(r_gtA)); 201 | 202 | 203 | npy_intp *shape = PyArray_DIMS(l_gtA); 204 | npy_intp *shapeout = new npy_intp[2]; 205 | shapeout[0] = shape[0]; shapeout[1] = shape[1]; 206 | 207 | PyObject* res = PyArray_SimpleNew(2, shapeout, NPY_FLOAT); 208 | 209 | //Get the pointer to the data 210 | float* res_data = static_cast(PyArray_DATA(reinterpret_cast(res))); 211 | 212 | #pragma omp parallel for 213 | for(int i=0; i1 ){ 220 | res_data[ i*shape[1] +j ] =0; 221 | }else 222 | res_data[ i*shape[1] +j ] =lgtD[ i*shape[1] +j ] ; 223 | } 224 | } 225 | 226 | 227 | return res; 228 | 229 | 230 | } 231 | 232 | 233 | 234 | 235 | BOOST_PYTHON_MODULE(cpputils) { 236 | 237 | numeric::array::set_module_and_type("numpy", "ndarray"); 238 | 239 | def("write2png",write2png); 240 | def("make_occ",make_occ); 241 | 242 | 243 | import_array(); 244 | } 245 | 246 | -------------------------------------------------------------------------------- /cpp/makefile: -------------------------------------------------------------------------------- 1 | #Set the compiler 2 | 3 | CC = g++ 4 | 5 | #Set compile time flas 6 | 7 | CFLAGS= -fopenmp -shared -O3 -Wl,--export-dynamic 8 | 9 | # location of the Python header files 10 | 11 | PYTHON_VERSION = 2.7 12 | PYTHON_INCLUDE = /usr/include/python$(PYTHON_VERSION) 13 | 14 | # location of the Boost Python include files and library 15 | 16 | BOOST_INC = /usr/include 17 | BOOST_LIB = /usr/lib 18 | 19 | # compile mesh classes 20 | TARGET = cpputils 21 | 22 | $(TARGET).so: $(TARGET).o 23 | $(CC) $(CFLAGS) $(TARGET).o -L$(BOOST_LIB) -lboost_python -lpng16 -L/usr/lib/python$(PYTHON_VERSION)/config -lpython$(PYTHON_VERSION) -o $(TARGET).so 24 | 25 | $(TARGET).o: $(TARGET).cpp 26 | $(CC) -I$(PYTHON_INCLUDE) -I$(BOOST_INC) -fPIC -fopenmp -c $(TARGET).cpp 27 | 28 | clean: 29 | rm -rf *.so *.o 30 | 31 | 32 | -------------------------------------------------------------------------------- /fr_occ.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | import sys 7 | import math 8 | import random 9 | import os 10 | 11 | sys.path.insert(0,'../pylibs') 12 | sys.path.insert(0,'../src') 13 | 14 | import cpputils 15 | import pfmutil as pfm 16 | 17 | l_gt_p = "..../Freiburg/driving/disparity/15mm_focallength/scene_forwards/slow/left/" 18 | r_gt_p = "..../Freiburg/driving/disparity/15mm_focallength/scene_forwards/slow/right/" 19 | save_p = ".../Freiburg/driving/disparity/15mm_focallength/scene_forwards/slow/left_nonocc/" 20 | 21 | ims =os.listdir(l_gt_p) 22 | 23 | for im in ims: 24 | l_gt = pfm.load(l_gt_p+im)[0] 25 | r_gt = pfm.load(r_gt_p+im)[0] 26 | occ = cpputils.make_occ( l_gt,r_gt ) 27 | pfm.save(save_p+im,occ) 28 | -------------------------------------------------------------------------------- /freiburg_dataloader.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | import scipy 6 | from sklearn.feature_extraction import image 7 | import matplotlib.pyplot as plt 8 | 9 | import os 10 | import sys 11 | sys.path.insert(0,'./pylibs') 12 | import pfmutil as pfm 13 | import time 14 | import random 15 | 16 | 17 | class Dataloader(object): 18 | 19 | def __init__(self,params): 20 | self.__params=params 21 | self.__img_contets=os.listdir(self.__params.left_path) 22 | self.__img_contets.sort() 23 | self.__contents = os.listdir(self.__params.gt_path) 24 | self.__contents.sort() 25 | self.__training_samples=len(self.__contents) 26 | self.__sample_index=0 27 | self.epoch=0 28 | self.maxwidth=0 29 | self.maxheight=0 30 | self.configure_input_size() 31 | self.__widthresize =self.maxwidth+ (self.__params.down_sample_ratio - self.maxwidth%self.__params.down_sample_ratio)%self.__params.down_sample_ratio 32 | self.__heightresize =self.maxheight+( self.__params.down_sample_ratio - self.maxheight%self.__params.down_sample_ratio)%self.__params.down_sample_ratio 33 | 34 | # self.shuffle_data() 35 | self.max_disp=356 36 | 37 | def get_training_data_size(self): 38 | return 256,256,2 39 | 40 | def shuffle_data(self): 41 | millis = int(round(time.time())) 42 | np.random.seed(millis) 43 | np.random.shuffle(self.__img_contets) 44 | np.random.seed(millis) 45 | np.random.shuffle(self.__contents) 46 | 47 | def get_sample_size(self): 48 | return self.__training_samples 49 | 50 | def get_sample_index(self): 51 | return self.__sample_index 52 | 53 | 54 | def get_data_size(self): 55 | return self.__heightresize,self.__widthresize,2 56 | 57 | def configure_input_size(self): 58 | 59 | for i in range(len(self.__img_contets)): 60 | img = scipy.misc.imread( self.__params.left_path+self.__img_contets[i]).astype(float); 61 | s = img.shape 62 | if self.maxheight < s[0]: 63 | self.maxheight = s[0] 64 | 65 | if self.maxwidth < s[1]: 66 | self.maxwidth = s[1] 67 | 68 | 69 | def load_training_sample(self): 70 | if self.__sample_index >= self.__training_samples: 71 | self.__sample_index=0 72 | self.epoch+=1 73 | self.shuffle_data() 74 | 75 | img = scipy.misc.imread( self.__params.left_path+self.__img_contets[self.__sample_index]).astype(np.float32); 76 | img = img[:,:,0]*0.299 + img[:,:,1]*0.587 + img[:,:,2]*0.114 77 | 78 | model=self.__params.kitti_disp_path 79 | if(bool(random.getrandbits(1))): 80 | model=self.__params.kitti15_disp_path 81 | 82 | disp = pfm.load(model+self.__contents[self.__sample_index])[0].astype(float) 83 | gt = pfm.load(self.__params.gt_path+self.__contents[self.__sample_index])[0].astype(float) 84 | gt_noc = pfm.load(self.__params.gt_path_noc+self.__contents[self.__sample_index])[0].astype(float) 85 | s = img.shape 86 | maxheight = s[0]-256 87 | maxwidth = s[1]-256 88 | x = random.randint(0,maxheight) 89 | y = random.randint(0,maxwidth) 90 | disp = disp[x:x+256,y:y+256] 91 | img = img[x:x+256,y:y+256] 92 | gt = gt[x:x+256,y:y+256] 93 | gt_noc = gt_noc[x:x+256,y:y+256] 94 | data = np.stack([disp,img],axis=2) 95 | data = np.reshape(data,[1,data.shape[0],data.shape[1],data.shape[2]]) 96 | gt = np.reshape(gt,[1,gt.shape[0],gt.shape[1],1]) 97 | gt_noc = np.reshape(gt_noc,[1,gt_noc.shape[0],gt_noc.shape[1],1]) 98 | 99 | self.__sample_index+=1 100 | 101 | return data,gt,gt_noc,self.__sample_index 102 | 103 | def load_verify_sample(self): 104 | if self.__sample_index >= self.__training_samples: 105 | self.__sample_index=0 106 | self.epoch+=1 107 | 108 | img = scipy.misc.imread( self.__params.left_path+self.__img_contets[self.__sample_index]).astype(np.float32); 109 | img = img[:,:,0]*0.299 + img[:,:,1]*0.587 + img[:,:,2]*0.114 110 | model=self.__params.kitti_disp_path 111 | 112 | disp = pfm.load(model+self.__contents[self.__sample_index])[0].astype(float) 113 | gt = pfm.load(self.__params.gt_path+self.__contents[self.__sample_index])[0].astype(float) 114 | gt_noc = pfm.load(self.__params.gt_path_noc+self.__contents[self.__sample_index])[0].astype(float) 115 | s = img.shape 116 | height,width= img.shape; 117 | if s[0] = self.__training_samples-self.__val_num: 74 | self.__sample_index=0 75 | self.epoch+=1 76 | self.shuffle_data() 77 | 78 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 79 | img = img[:,:,0]*0.299 + img[:,:,1]*0.587 + img[:,:,2]*0.114 80 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 81 | gt = scipy.misc.imread( self.__params.gt_path+self.__contents[self.__sample_index]).astype(float)/256; 82 | gt_noc = scipy.misc.imread( self.__params.gt_path_noc+self.__contents[self.__sample_index]).astype(float)/256; 83 | 84 | s = img.shape 85 | maxheight = s[0]-256 86 | maxwidth = s[1]-256 87 | x = random.randint(0,maxheight) 88 | y = random.randint(0,maxwidth) 89 | disp = disp[x:x+256,y:y+256] 90 | img = img[x:x+256,y:y+256] 91 | gt = gt[x:x+256,y:y+256] 92 | gt_noc = gt_noc[x:x+256,y:y+256] 93 | 94 | data = np.stack([disp,img],axis=2) 95 | data = np.reshape(data,[1,data.shape[0],data.shape[1],data.shape[2]]) 96 | gt = np.reshape(gt,[1,gt.shape[0],gt.shape[1],1]) 97 | gt_noc = np.reshape(gt_noc,[1,gt_noc.shape[0],gt_noc.shape[1],1]) 98 | 99 | self.__sample_index+=1 100 | 101 | return data,gt,gt_noc,self.__sample_index 102 | 103 | 104 | def load_validation_sample(self): 105 | if self.__sample_index >= self.__training_samples: 106 | self.__sample_index=self.__training_samples-40 107 | 108 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 109 | img = img[:,:,0]*0.299 + img[:,:,1]*0.587 + img[:,:,2]*0.114 110 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 111 | gt = scipy.misc.imread( self.__params.gt_path+self.__contents[self.__sample_index]).astype(float)/256; 112 | gt_noc = scipy.misc.imread( self.__params.gt_path_noc+self.__contents[self.__sample_index]).astype(float)/256; 113 | 114 | s = img.shape 115 | if s[0] = self.__training_samples: 139 | self.__sample_index=self.__training_samples-40 140 | 141 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 142 | img = img[:,:,0]*0.299 + img[:,:,1]*0.587 + img[:,:,2]*0.114 143 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 144 | gt = scipy.misc.imread( self.__params.gt_path+self.__contents[self.__sample_index]).astype(float)/256; 145 | gt_noc = scipy.misc.imread( self.__params.gt_path_noc+self.__contents[self.__sample_index]).astype(float)/256; 146 | 147 | s = img.shape 148 | height,width= img.shape; 149 | if s[0] = self.__training_samples: 174 | self.__sample_index=0 175 | 176 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 177 | img = img[:,:,0]*0.299 + img[:,:,1]*0.587 + img[:,:,2]*0.114 178 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 179 | 180 | height,width= img.shape; 181 | 182 | s = img.shape 183 | if s[0] = self.__training_samples-self.__val_num: 57 | self.__sample_index=0 58 | self.epoch+=1 59 | 60 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 61 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 62 | gt = scipy.misc.imread( self.__params.gt_path+self.__contents[self.__sample_index]).astype(float)/256; 63 | gt_noc = scipy.misc.imread( self.__params.gt_path_noc+self.__contents[self.__sample_index]).astype(float)/256; 64 | 65 | height,width = img.shape 66 | 67 | s = img.shape 68 | maxheight = s[0]-256 69 | maxwidth = s[1]-256 70 | x = random.randint(0,maxheight) 71 | y = random.randint(0,maxwidth) 72 | disp = disp[x:x+256,y:y+256] 73 | img = img[x:x+256,y:y+256] 74 | gt = gt[x:x+256,y:y+256] 75 | gt_noc = gt_noc[x:x+256,y:y+256] 76 | 77 | data = np.stack([disp,img],axis=2) 78 | data = np.reshape(data,[1,data.shape[0],data.shape[1],data.shape[2]]) 79 | gt = np.reshape(gt,[1,gt.shape[0],gt.shape[1],1]) 80 | gt_noc = np.reshape(gt_noc,[1,gt_noc.shape[0],gt_noc.shape[1],1]) 81 | 82 | self.__sample_index+=1 83 | 84 | return data,gt,gt_noc,self.__sample_index 85 | 86 | def load_validation_sample(self): 87 | if self.__sample_index >= self.__training_samples: 88 | self.__sample_index=self.__training_samples-40 89 | self.epoch+=1 90 | 91 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 92 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 93 | gt = scipy.misc.imread( self.__params.gt_path+self.__contents[self.__sample_index]).astype(float)/256; 94 | gt_noc = scipy.misc.imread( self.__params.gt_path_noc+self.__contents[self.__sample_index]).astype(float)/256; 95 | 96 | s = img.shape 97 | if s[0] = self.__training_samples: 121 | self.__sample_index=0 122 | self.epoch+=1 123 | 124 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 125 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 126 | gt = scipy.misc.imread( self.__params.gt_path+self.__contents[self.__sample_index]).astype(float)/256; 127 | gt_noc = scipy.misc.imread( self.__params.gt_path_noc+self.__contents[self.__sample_index]).astype(float)/256; 128 | 129 | s = img.shape 130 | height,width= img.shape; 131 | if s[0] = self.__training_samples: 157 | self.__sample_index=0 158 | 159 | img = scipy.misc.imread( self.__params.left_path+self.__contents[self.__sample_index]).astype(float); 160 | disp = scipy.misc.imread( self.__params.disp_path+self.__contents[self.__sample_index]).astype(float)/256; 161 | 162 | height,width = img.shape 163 | 164 | s = img.shape 165 | if s[0] 0 ):#and dataloader.epoch> 0 242 | 243 | print "########################### Running Validation ###########################################" 244 | 245 | def validate( validation_dataloader, v_height,v_width ): 246 | accumulate_pred = np.empty([0]) 247 | accumulate_pred_1 = np.empty([0]) 248 | accumulate_init = np.empty([0]) 249 | validation_dataloader.init_sample_index(validation_dataloader.get_sample_size()-40) 250 | 251 | while( validation_dataloader.get_sample_index() < validation_dataloader.get_sample_size()): 252 | 253 | data,gt,gt_noc,sindex = validation_dataloader.load_validation_sample(); 254 | outp,err_it1,err,ini_err,disp,errmp,errmi = sess.run([pred,error_it1,error_it2,init_error,x,error_map_it2,error_map], feed_dict={x: data,y: gt, y_noc:gt_noc, input_width:v_width,input_height:v_height,is_training:False, 255 | max_disp:validation_dataloader.max_disp,keep_prob:1}) 256 | 257 | print("Sample: "+ str(validation_dataloader.get_sample_index()) + " Step: " + str(sindex) + " Init Error " + str(ini_err) + "Error it1: " + str(err_it1) + " Error: " + str(err) ) 258 | accumulate_pred = np.append( accumulate_pred,[err ] ) 259 | accumulate_pred_1 = np.append( accumulate_pred_1,[err_it1 ] ) 260 | accumulate_init = np.append( accumulate_init,[ini_err]) 261 | 262 | 263 | 264 | 265 | mean_error_1 = np.mean(accumulate_pred_1) 266 | mean_error = np.mean(accumulate_pred) 267 | mean_init_error = np.mean(accumulate_init) 268 | print(' Validation Initial Error: '+ str(mean_init_error) + 'Validation Mean error 1: ' + str(mean_error_1) + 'Validation Mean error 2: ' + str(mean_error) ) 269 | return mean_error 270 | 271 | if loader_data["loader"] != "freiburg": 272 | if loader_data["loader"] == "kitti" or loader_data["loader"] == "pretrain": 273 | kt_height,kt_width,kt_channels = kitti_dataloader.get_data_size() 274 | mean_error=validate(kitti_dataloader, kt_height,kt_width ) 275 | v_e = sess.run( kt_val_merge, {kt_val_err:mean_error} ) 276 | val_writer.add_summary(v_e,1) 277 | val_writer.flush() 278 | 279 | if loader_data["loader"] == "kitti15" or loader_data["loader"] == "pretrain": 280 | kt15_height,kt15_width,kt15_channels = kitti15_dataloader.get_data_size() 281 | mean_error=validate(kitti15_dataloader, kt15_height,kt15_width ) 282 | v_e = sess.run( kt15_val_merge, {kt15_val_err:mean_error} ) 283 | val_writer.add_summary(v_e,1) 284 | val_writer.flush() 285 | 286 | # if (dataloader.epoch%5 == 0) : 287 | save_path = saver.save(sess, model_save,global_step=global_step) 288 | print("Model saved in file: %s" % save_path) 289 | 290 | 291 | 292 | elif args.mode == 'verify': 293 | with tf.Session() as sess: 294 | 295 | 296 | if loader_data["loader"] == "kitti": 297 | dataloader = ktdt(params) 298 | elif loader_data["loader"] == "kitti15": 299 | dataloader = kt2015dt(params) 300 | else: 301 | dataloader = frdt(params) 302 | 303 | height,width,channels = dataloader.get_data_size(); 304 | 305 | x=tf.placeholder(tf.float32, [1,None,None,channels], name="x_p") 306 | y=tf.placeholder(tf.float32, [1,None,None,1], name="y_p") 307 | y_noc=tf.placeholder(tf.float32, [1,None,None,1], name="y_noc_p") 308 | 309 | input_height = tf.Variable(0, name="input_height",dtype=tf.int32) 310 | input_width = tf.Variable(0, name="input_width",dtype=tf.int32) 311 | is_training = tf.Variable(False, name="is_training",dtype=tf.bool) 312 | keep_prob = tf.placeholder(tf.float32,name="keep_prob") 313 | error_map = tfmodel.get_error_map(x[:,:,:,0:1],y) 314 | init_error = tfmodel.gt_compare(x[:,:,:,0:1],y) 315 | 316 | pred = tf.to_float(tfmodel.h_net(x,input_height,input_width,is_training,reuse=False,keep_prob=keep_prob)) 317 | out1 = pred[:,:,:,4:5] 318 | r1_it1 = pred[:,:,:,1:2] 319 | r2_it1 = pred[:,:,:,2:3] 320 | r3_it1 = pred[:,:,:,3:4] 321 | 322 | error_map_it1 = tfmodel.get_error_map(pred[:,:,:,4:5],y) 323 | error_it1 = tfmodel.gt_compare(pred[:,:,:,4:5],y) 324 | 325 | pred = tf.concat([pred[:,:,:,4:5],x[:,:,:,1:2]],3) 326 | pred = tf.to_float(tfmodel.h_net(pred,input_height,input_width,is_training,reuse=True,keep_prob=keep_prob)) 327 | 328 | r1_it2 = pred[:,:,:,1:2] 329 | r2_it2 = pred[:,:,:,2:3] 330 | r3_it2 = pred[:,:,:,3:4] 331 | 332 | error_map_it2 = tfmodel.get_error_map(pred[:,:,:,4:5],y) 333 | error_it2 = tfmodel.gt_compare(pred[:,:,:,4:5],y) 334 | 335 | saver = tf.train.Saver() 336 | saver.restore(sess, args.model) 337 | print("Model restored") 338 | 339 | accumulate_it1 = np.empty([0]) 340 | accumulate_it2 = np.empty([0]) 341 | accumulate_init = np.empty([0]) 342 | 343 | while( dataloader.get_sample_index() < dataloader.get_sample_size() ): 344 | data,gt,gt_noc,sindex,o_height,o_width,name = dataloader.load_verify_sample(); 345 | n_h = data.shape[1] - o_height; 346 | n_w = data.shape[2] - o_width; 347 | 348 | disp_p = np.copy(data[0,n_h:data.shape[1],n_w:data.shape[2],0 ]) 349 | cpputils.write2png(disp_p.astype(np.float32),str("./validation/"+name)) 350 | outp,outp1,ini_err,err_it1,err_it2,init_error_map,err_map_it1,err_map_it2, r1_i1,r2_i1,r3_i1, r1_i2,r2_i2,r3_i2 = sess.run([pred,out1,init_error,error_it1,error_it2,error_map,error_map_it1,error_map_it2,r1_it1,r2_it1,r3_it1, 351 | r1_it2,r2_it2,r3_it2 ], feed_dict={x: data,y:gt,y_noc:gt_noc,input_width:width,input_height:height,is_training:False,keep_prob:1}) 352 | 353 | accumulate_it1 = np.append( accumulate_it1,[err_it1 ] ) 354 | accumulate_it2 = np.append( accumulate_it2,[err_it2 ] ) 355 | accumulate_init = np.append( accumulate_init,[ini_err]) 356 | print "Sample index: " + str(dataloader.get_sample_index()) + " Init error: " + str(ini_err) + " it1 error: " + str(err_it1)+ " it2 error: " + str(err_it2) 357 | 358 | disp_init = np.copy(data[0, n_h:data.shape[1],n_w:data.shape[2],0 ]) 359 | disp_p1 = np.copy(outp1[0, n_h:outp1.shape[1],n_w:outp1.shape[2],0 ]) 360 | disp_p = np.copy(outp[0, n_h:outp.shape[1],n_w:outp.shape[2],4 ]) 361 | 362 | 363 | ier = np.copy(init_error_map[0, n_h:init_error_map.shape[1],n_w:init_error_map.shape[2],0 ]) 364 | it_er1 = np.copy(err_map_it1[0, n_h:err_map_it1.shape[1],n_w:err_map_it1.shape[2],0 ]) 365 | it_er2 = np.copy(err_map_it2[0, n_h:err_map_it2.shape[1],n_w:err_map_it2.shape[2],0 ]) 366 | 367 | rit1= r1_i1[0,:,:,0]+r2_i1[0,:,:,0]+r3_i1[0,:,:,0] 368 | rit2= r1_i2[0,:,:,0]+r2_i2[0,:,:,0]+r3_i2[0,:,:,0] 369 | mean_error_it2 = np.mean(accumulate_it2) 370 | mean_error_it1 = np.mean(accumulate_it1) 371 | mean_init_error = np.mean(accumulate_init) 372 | print('Init mean error: ' + str(mean_init_error) + ' It 1 mean error : '+ str(mean_error_it1) + ' It 2 mean error : '+ str(mean_error_it2)) 373 | else: 374 | with tf.Session() as sess: 375 | 376 | if loader_data["loader"] == "kitti": 377 | dataloader = ktdt(params) 378 | elif loader_data["loader"] == "kitti15": 379 | dataloader = kt2015dt(params) 380 | else: 381 | dataloader = frdt(params) 382 | 383 | height,width,channels = dataloader.get_data_size(); 384 | 385 | x=tf.placeholder(tf.float32, [1,None,None,channels], name="x_p") 386 | 387 | input_height = tf.Variable(0, name="input_height",dtype=tf.int32) 388 | input_width = tf.Variable(0, name="input_width",dtype=tf.int32) 389 | is_training = tf.Variable(False, name="is_training",dtype=tf.bool) 390 | keep_prob = tf.placeholder(tf.float32,name="keep_prob") 391 | 392 | pred = tf.to_float(tfmodel.h_net(x,input_height,input_width,is_training,reuse=False,keep_prob=keep_prob)) 393 | pred = tf.concat([pred[:,:,:,4:5],x[:,:,:,1:2]],3) 394 | pred = tf.to_float(tfmodel.h_net(pred,input_height,input_width,is_training,reuse=True,keep_prob=keep_prob)) 395 | 396 | saver = tf.train.Saver() 397 | saver.restore(sess, args.model) 398 | print("Model restored") 399 | 400 | while( dataloader.get_sample_index() < dataloader.get_sample_size() ): 401 | 402 | data,sindex,o_height,o_width,name = dataloader.load_test_sample(); 403 | outp = sess.run(pred, feed_dict={x: data,input_width:width,input_height:height,is_training:False,keep_prob:1}) 404 | 405 | n_h = outp.shape[1] - o_height; 406 | n_w = outp.shape[2] - o_width; 407 | 408 | disp_p = np.copy(outp[0, n_h:outp.shape[1],n_w:outp.shape[2],4 ]) 409 | cpputils.write2png(disp_p,str("./test/"+name)) 410 | print("Res saved at: "+ "./test/"+name ) 411 | 412 | 413 | 414 | 415 | -------------------------------------------------------------------------------- /params/freiburg.json: -------------------------------------------------------------------------------- 1 | { 2 | "loader":"freiburg", 3 | "left_path":"..../Freiburg/driving/frames_cleanpass/15mm_focallength/scene_forwards/slow/left/", 4 | "kitti_disp_path":"(disparities computes with different models)..../freiburg/mc-cnn-fst/kitti/filtered/", 5 | "kitti15_disp_path":"(disparities computes with different models)..../freiburg/mc-cnn-fst/kitti15/filtered/", 6 | "gt_path":"...../Freiburg/driving/disparity/15mm_focallength/scene_forwards/slow/left/", 7 | "gt_path_noc":"...../Freiburg/driving/disparity/15mm_focallength/scene_forwards/slow/left_nonocc/", 8 | "down_sample_ratio":8, 9 | "epochs":400 10 | } -------------------------------------------------------------------------------- /params/kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "loader":"kitti", 3 | "left_path":"..../data.kitti/unzip/training/image_0/", 4 | "disp_path":"..../kitti12/training/filtered/", 5 | "gt_path":"..../data.kitti/unzip/training/disp_occ/", 6 | "gt_path_noc":"..../data.kitti/unzip/training/disp_noc/", 7 | "down_sample_ratio":8, 8 | "epochs":50 9 | } -------------------------------------------------------------------------------- /params/kitti15.json: -------------------------------------------------------------------------------- 1 | { 2 | "loader":"kitti15", 3 | "left_path":"..../data.kitti2015/unzip/training/image_2/", 4 | "disp_path":"..../kitti15/training/filtered/", 5 | "gt_path":"..../data.kitti2015/unzip/training/disp_occ_0/", 6 | "gt_path_noc":"..../data.kitti2015/unzip/training/disp_noc_0/", 7 | "down_sample_ratio":8, 8 | "epochs":50 9 | } -------------------------------------------------------------------------------- /params/kitti15_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "loader":"kitti15", 3 | "left_path":"..../data.kitti2015/unzip/testing/image_2/", 4 | "disp_path":"..../kitti15/testing/filtered/", 5 | "gt_path":"", 6 | "gt_path_noc":"", 7 | "down_sample_ratio":8, 8 | "epochs":0 9 | } -------------------------------------------------------------------------------- /params/kitti_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "loader":"kitti", 3 | "left_path":"..../data.kitti/unzip/testing/image_0/", 4 | "disp_path":"..../kitti12/testing/filtered/", 5 | "gt_path":"", 6 | "gt_path_noc":"", 7 | "down_sample_ratio":8, 8 | "epochs":0 9 | } -------------------------------------------------------------------------------- /params/pretrain.json: -------------------------------------------------------------------------------- 1 | { 2 | "loader":"pretrain", 3 | "data":{ 4 | 5 | "freiburg":{ 6 | "loader":"freiburg", 7 | "left_path":"..../Freiburg/driving/frames_cleanpass/15mm_focallength/scene_forwards/slow/left/", 8 | "kitti_disp_path":"..../freiburg/mc-cnn-fst/kitti/filtered/", 9 | "kitti15_disp_path":"..../freiburg/mc-cnn-fst/kitti15/filtered/", 10 | "gt_path":"..../Freiburg/driving/disparity/15mm_focallength/scene_forwards/slow/left/", 11 | "gt_path_noc":"..../Freiburg/driving/disparity/15mm_focallength/scene_forwards/slow/left_nonocc/", 12 | "down_sample_ratio":8, 13 | "epochs":400 14 | }, 15 | "kitti":{ 16 | "loader":"kitti", 17 | "left_path":"..../data.kitti/unzip/training/image_0/", 18 | "disp_path":"..../kitti12/training/filtered/", 19 | "gt_path":"..../data.kitti/unzip/training/disp_occ/", 20 | "gt_path_noc":"..../data.kitti/unzip/training/disp_noc/", 21 | "down_sample_ratio":8, 22 | "epochs":50 23 | }, 24 | "kitti15":{ 25 | "loader":"kitti15", 26 | "left_path":"..../data.kitti2015/unzip/training/image_2/", 27 | "disp_path":"..../kitti15/training/filtered/", 28 | "gt_path":"..../data.kitti2015/unzip/training/disp_occ_0/", 29 | "gt_path_noc":"..../data.kitti2015/unzip/training/disp_noc_0/", 30 | "down_sample_ratio":8, 31 | "epochs":50 32 | } 33 | 34 | } 35 | } -------------------------------------------------------------------------------- /pylibs/pfmutil.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import re 4 | import sys 5 | import struct 6 | 7 | import matplotlib.pyplot as plt 8 | import matplotlib.image as mpimg 9 | import numpy as np 10 | 11 | def load(fname): 12 | color = None 13 | width = None 14 | height = None 15 | scale = None 16 | endian = None 17 | 18 | file = open(fname) 19 | header = file.readline().rstrip() 20 | if header == 'PF': 21 | color = True 22 | elif header == 'Pf': 23 | color = False 24 | else: 25 | raise Exception('Not a PFM file.') 26 | 27 | dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) 28 | if dim_match: 29 | width, height = map(int, dim_match.groups()) 30 | else: 31 | raise Exception('Malformed PFM header.') 32 | 33 | scale = float(file.readline().rstrip()) 34 | if scale < 0: # little-endian 35 | endian = '<' 36 | scale = -scale 37 | else: 38 | endian = '>' # big-endian 39 | 40 | data = np.fromfile(file, endian + 'f') 41 | shape = (height, width, 3) if color else (height, width) 42 | return np.flipud(np.reshape(data, shape)).astype(np.float32), scale 43 | 44 | def save(fname, image, scale=1): 45 | file = open(fname, 'w') 46 | color = None 47 | 48 | if image.dtype.name != 'float32': 49 | raise Exception('Image dtype must be float32.') 50 | 51 | if len(image.shape) == 3 and image.shape[2] == 3: # color image 52 | color = True 53 | elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale 54 | color = False 55 | else: 56 | raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.') 57 | 58 | file.write('PF\n' if color else 'Pf\n') 59 | file.write('%d %d\n' % (image.shape[1], image.shape[0])) 60 | 61 | endian = image.dtype.byteorder 62 | 63 | if endian == '<' or endian == '=' and sys.byteorder == 'little': 64 | scale = -scale 65 | 66 | file.write('%f\n' % scale) 67 | 68 | np.flipud(image).tofile(file) 69 | 70 | def show(img): 71 | imgplot = plt.imshow(img.astype(np.float32), cmap='gray'); 72 | plt.show(); 73 | -------------------------------------------------------------------------------- /pylibs/tfutils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def _variable_on_cpu(name, shape, initializer, use_fp16=False): 6 | """Helper to create a Variable stored on CPU memory. 7 | Args: 8 | name: name of the variable 9 | shape: list of ints 10 | initializer: initializer for Variable 11 | Returns: 12 | Variable Tensor 13 | """ 14 | with tf.device('/cpu:0'): 15 | dtype = tf.float16 if use_fp16 else tf.float32 16 | var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype) 17 | return var 18 | 19 | def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True): 20 | """Helper to create an initialized Variable with weight decay. 21 | Note that the Variable is initialized with a truncated normal distribution. 22 | A weight decay is added only if one is specified. 23 | Args: 24 | name: name of the variable 25 | shape: list of ints 26 | stddev: standard deviation of a truncated Gaussian 27 | wd: add L2Loss weight decay multiplied by this float. If None, weight 28 | decay is not added for this Variable. 29 | use_xavier: bool, whether to use xavier initializer 30 | Returns: 31 | Variable Tensor 32 | """ 33 | if use_xavier: 34 | initializer = tf.contrib.layers.xavier_initializer() 35 | else: 36 | initializer = tf.truncated_normal_initializer(stddev=stddev) 37 | var = _variable_on_cpu(name, shape, initializer) 38 | if wd is not None: 39 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') 40 | tf.add_to_collection('losses', weight_decay) 41 | return var 42 | 43 | def max_out(inputs, num_units, axis=None): 44 | shape = inputs.get_shape().as_list() 45 | if shape[0] is None: 46 | shape[0] = -1 47 | if axis is None: # Assume that channel is the last dimension 48 | axis = -1 49 | num_channels = shape[axis] 50 | if num_channels % num_units: 51 | raise ValueError('number of features({}) is not ' 52 | 'a multiple of num_units({})'.format(num_channels, num_units)) 53 | shape[axis] = num_units 54 | shape += [num_channels // num_units] 55 | print shape 56 | outputs = tf.reduce_max(tf.reshape(inputs, shape), -1, keep_dims=False) 57 | return outputs 58 | 59 | 60 | def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope,reuse): 61 | """ Batch normalization on 2D convolutional maps. 62 | 63 | Args: 64 | inputs: Tensor, 4D BHWC input maps 65 | is_training: boolean tf.Varialbe, true indicates training phase 66 | bn_decay: float or float tensor variable, controling moving average weight 67 | scope: string, variable scope 68 | Return: 69 | normed: batch-normalized maps 70 | """ 71 | return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay,reuse) 72 | 73 | def batch_norm_template(inputs, is_training, scope, moments_dims, bn_decay,reuse=False): 74 | """ Batch normalization on convolutional maps and beyond... 75 | Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow 76 | 77 | Args: 78 | inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC 79 | is_training: boolean tf.Varialbe, true indicates training phase 80 | scope: string, variable scope 81 | moments_dims: a list of ints, indicating dimensions for moments calculation 82 | bn_decay: float or float tensor variable, controling moving average weight 83 | Return: 84 | normed: batch-normalized maps 85 | """ 86 | with tf.variable_scope(scope,reuse=reuse) as sc: 87 | num_channels = inputs.get_shape()[-1].value 88 | beta = tf.Variable(tf.constant(0.0, shape=[num_channels]), 89 | name='beta', trainable=True) 90 | gamma = tf.Variable(tf.constant(1.0, shape=[num_channels]), 91 | name='gamma', trainable=True) 92 | batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments') 93 | decay = 0.99 94 | # ema = tf.train.ExponentialMovingAverage(decay=decay) 95 | # # Operator that maintains moving averages of variables. 96 | # ema_apply_op = tf.cond(is_training, 97 | # lambda: ema.apply([batch_mean, batch_var]), 98 | # lambda: tf.no_op()) 99 | 100 | pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False) 101 | pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False) 102 | 103 | if is_training: 104 | train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) 105 | train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) 106 | with tf.control_dependencies([train_mean, train_var]): 107 | return tf.nn.batch_normalization(inputs, 108 | batch_mean, batch_var, beta, gamma, 1e-3) 109 | else: 110 | return tf.nn.batch_normalization(inputs, 111 | pop_mean, pop_var, beta, gamma, 1e-3) 112 | 113 | 114 | def batch_norm(inputs, is_training, scope, moments_dims, bn_decay,reuse=False): 115 | """ Batch normalization on convolutional maps and beyond... 116 | Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow 117 | 118 | Args: 119 | inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC 120 | is_training: boolean tf.Varialbe, true indicates training phase 121 | scope: string, variable scope 122 | moments_dims: a list of ints, indicating dimensions for moments calculation 123 | bn_decay: float or float tensor variable, controling moving average weight 124 | Return: 125 | normed: batch-normalized maps 126 | """ 127 | with tf.variable_scope(scope,reuse=reuse) as sc: 128 | num_channels = inputs.get_shape()[-1].value 129 | beta = tf.Variable(tf.constant(0.0, shape=[num_channels]), 130 | name='beta', trainable=True) 131 | gamma = tf.Variable(tf.constant(1.0, shape=[num_channels]), 132 | name='gamma', trainable=True) 133 | batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments') 134 | decay = bn_decay if bn_decay is not None else 0.9 135 | ema = tf.train.ExponentialMovingAverage(decay=decay) 136 | # Operator that maintains moving averages of variables. 137 | ema_apply_op = tf.cond(is_training, 138 | lambda: ema.apply([batch_mean, batch_var]), 139 | lambda: tf.no_op()) 140 | 141 | # Update moving average and return current batch's avg and var. 142 | def mean_var_with_update(): 143 | with tf.control_dependencies([ema_apply_op]): 144 | return tf.identity(batch_mean), tf.identity(batch_var) 145 | 146 | # ema.average returns the Variable holding the average of var. 147 | mean, var = tf.cond(is_training, 148 | mean_var_with_update, 149 | lambda: (ema.average(batch_mean), ema.average(batch_var))) 150 | normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3) 151 | return normed 152 | 153 | 154 | 155 | def conv2d_depth(inputs, 156 | num_output_channels, 157 | kernel_size, 158 | scope, 159 | stride=[1, 1], 160 | padding='SAME', 161 | rate=1, 162 | use_xavier=True, 163 | stddev=1e-3, 164 | weight_decay=0.0, 165 | activation_fn=tf.nn.relu, 166 | bn=False, 167 | bn_decay=None, 168 | is_training=None, 169 | dropout=None, 170 | reuse=False): 171 | """ 2D convolution with non-linear operation. 172 | Args: 173 | inputs: 4-D tensor variable BxHxWxC 174 | num_output_channels: int 175 | kernel_size: a list of 2 ints 176 | scope: string 177 | stride: a list of 2 ints 178 | padding: 'SAME' or 'VALID' 179 | use_xavier: bool, use xavier_initializer if true 180 | stddev: float, stddev for truncated_normal init 181 | weight_decay: float 182 | activation_fn: function 183 | bn: bool, whether to use batch norm 184 | bn_decay: float or float tensor variable in [0,1] 185 | is_training: bool Tensor variable 186 | Returns: 187 | Variable tensor 188 | """ 189 | with tf.variable_scope(scope,reuse=reuse) as sc: 190 | kernel_h, kernel_w = kernel_size 191 | num_in_channels = inputs.get_shape()[-1].value 192 | kernel_shape = [kernel_h, kernel_w, 193 | num_in_channels, num_output_channels] 194 | kernel = _variable_with_weight_decay('weights', 195 | shape=kernel_shape, 196 | use_xavier=use_xavier, 197 | stddev=stddev, 198 | wd=weight_decay) 199 | stride_h, stride_w = stride 200 | outputs = tf.nn.depthwise_conv2d(inputs, kernel, 201 | [1, stride_h, stride_w, 1], 202 | rate=rate, 203 | padding=padding) 204 | biases = _variable_on_cpu('biases', [num_in_channels*num_output_channels], 205 | tf.constant_initializer(0.0)) 206 | outputs = tf.nn.bias_add(outputs, biases) 207 | 208 | if bn: 209 | outputs = batch_norm_for_conv2d(outputs, is_training, 210 | bn_decay=bn_decay, scope='bn',reuse=reuse) 211 | 212 | 213 | if activation_fn is not None: 214 | outputs = activation_fn(outputs) 215 | 216 | if dropout is not None: 217 | outputs = tf.nn.dropout(outputs, dropout) 218 | 219 | return outputs 220 | 221 | 222 | def conv2d(inputs, 223 | num_output_channels, 224 | kernel_size, 225 | scope, 226 | stride=[1, 1], 227 | padding='SAME', 228 | use_xavier=True, 229 | stddev=1e-3, 230 | weight_decay=0.0, 231 | activation_fn=tf.nn.relu, 232 | bn=False, 233 | bn_decay=None, 234 | is_training=None, 235 | dropout=None, 236 | reuse=False): 237 | """ 2D convolution with non-linear operation. 238 | Args: 239 | inputs: 4-D tensor variable BxHxWxC 240 | num_output_channels: int 241 | kernel_size: a list of 2 ints 242 | scope: string 243 | stride: a list of 2 ints 244 | padding: 'SAME' or 'VALID' 245 | use_xavier: bool, use xavier_initializer if true 246 | stddev: float, stddev for truncated_normal init 247 | weight_decay: float 248 | activation_fn: function 249 | bn: bool, whether to use batch norm 250 | bn_decay: float or float tensor variable in [0,1] 251 | is_training: bool Tensor variable 252 | Returns: 253 | Variable tensor 254 | """ 255 | with tf.variable_scope(scope,reuse=reuse) as sc: 256 | kernel_h, kernel_w = kernel_size 257 | num_in_channels = inputs.get_shape()[-1].value 258 | kernel_shape = [kernel_h, kernel_w, 259 | num_in_channels, num_output_channels] 260 | kernel = _variable_with_weight_decay('weights', 261 | shape=kernel_shape, 262 | use_xavier=use_xavier, 263 | stddev=stddev, 264 | wd=weight_decay) 265 | stride_h, stride_w = stride 266 | outputs = tf.nn.conv2d(inputs, kernel, 267 | [1, stride_h, stride_w, 1], 268 | padding=padding) 269 | biases = _variable_on_cpu('biases', [num_output_channels], 270 | tf.constant_initializer(0.0)) 271 | outputs = tf.nn.bias_add(outputs, biases) 272 | 273 | if bn: 274 | outputs = batch_norm_for_conv2d(outputs, is_training, 275 | bn_decay=bn_decay, scope='bn',reuse=reuse) 276 | 277 | if activation_fn is not None: 278 | outputs = activation_fn(outputs) 279 | 280 | if dropout is not None: 281 | outputs = tf.nn.dropout(outputs, dropout) 282 | 283 | return outputs 284 | 285 | 286 | def conv2d_transpose(inputs, 287 | num_output_channels, 288 | kernel_size, 289 | scope, 290 | stride=[1, 1], 291 | padding='SAME', 292 | use_xavier=True, 293 | stddev=1e-3, 294 | weight_decay=0.0, 295 | activation_fn=tf.nn.relu, 296 | bn=False, 297 | bn_decay=None, 298 | is_training=None, 299 | dropout=None, 300 | height=None, 301 | width=None, 302 | reuse=False): 303 | """ 2D convolution transpose with non-linear operation. 304 | Args: 305 | inputs: 4-D tensor variable BxHxWxC 306 | num_output_channels: int 307 | kernel_size: a list of 2 ints 308 | scope: string 309 | stride: a list of 2 ints 310 | padding: 'SAME' or 'VALID' 311 | use_xavier: bool, use xavier_initializer if true 312 | stddev: float, stddev for truncated_normal init 313 | weight_decay: float 314 | activation_fn: function 315 | bn: bool, whether to use batch norm 316 | bn_decay: float or float tensor variable in [0,1] 317 | is_training: bool Tensor variable 318 | Returns: 319 | Variable tensor 320 | Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a 321 | """ 322 | with tf.variable_scope(scope,reuse=reuse) as sc: 323 | kernel_h, kernel_w = kernel_size 324 | num_in_channels = inputs.get_shape()[-1].value 325 | kernel_shape = [kernel_h, kernel_w, 326 | num_output_channels, num_in_channels] # reversed to conv2d 327 | kernel = _variable_with_weight_decay('weights', 328 | shape=kernel_shape, 329 | use_xavier=use_xavier, 330 | stddev=stddev, 331 | wd=weight_decay) 332 | stride_h, stride_w = stride 333 | 334 | # from slim.convolution2d_transpose 335 | def get_deconv_dim(dim_size, stride_size, kernel_size, padding): 336 | dim_size *= stride_size 337 | 338 | if padding == 'VALID' and dim_size is not None: 339 | dim_size += max(kernel_size - stride_size, 0) 340 | return dim_size 341 | 342 | # caculate output shape 343 | batch_size = inputs.get_shape()[0].value 344 | if height == None: 345 | height = inputs.get_shape()[1].value 346 | if width == None: 347 | width = inputs.get_shape()[2].value 348 | 349 | out_height = get_deconv_dim(height, stride_h, kernel_h, padding) 350 | out_width = get_deconv_dim(width, stride_w, kernel_w, padding) 351 | output_shape = [batch_size, out_height, out_width, num_output_channels] 352 | 353 | outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape, 354 | [1, stride_h, stride_w, 1], 355 | padding=padding) 356 | biases = _variable_on_cpu('biases', [num_output_channels], 357 | tf.constant_initializer(0.0)) 358 | outputs = tf.nn.bias_add(outputs, biases) 359 | 360 | 361 | if bn: 362 | outputs = batch_norm_for_conv2d(outputs, is_training, 363 | bn_decay=bn_decay, scope='bn',reuse=reuse) 364 | 365 | if activation_fn is not None: 366 | outputs = activation_fn(outputs) 367 | 368 | if dropout is not None: 369 | outputs = tf.nn.dropout(outputs, dropout) 370 | return outputs 371 | 372 | 373 | def upsample_nn(x, ratio): 374 | s = tf.shape(x) 375 | h = s[1] 376 | w = s[2] 377 | return tf.image.resize_nearest_neighbor(x, [h * ratio, w * ratio]) -------------------------------------------------------------------------------- /tfmodel.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import sys 3 | sys.path.insert(0,'./pylibs') 4 | import tfutils 5 | 6 | def get_error_map(x,gt): 7 | weights= tf.cast(tf.greater(gt,0),tf.float32) 8 | est = tf.multiply(x,weights) 9 | res = tf.cast( tf.abs(tf.subtract(est,gt)) ,tf.float32) 10 | error_map = tf.cast( tf.greater(res,3),tf.float32) 11 | return error_map 12 | 13 | def gt_compare(x,gt): 14 | weights= tf.cast(tf.greater(gt,0),tf.float32) 15 | valid = tf.cast(tf.count_nonzero(gt),tf.float32) 16 | est = tf.multiply(x,weights) 17 | res = tf.cast( tf.abs(tf.subtract(est,gt)) ,tf.float32) 18 | error = tf.reduce_sum(tf.cast( tf.greater(res,3),tf.float32)) 19 | return tf.divide(error,valid) 20 | 21 | def res_block(inputs,kernel,features,scope,dropout,bn,reuse,is_training): 22 | conv1 = tfutils.conv2d(inputs, 23 | features, 24 | kernel, 25 | scope+'_block_conv1', 26 | stride=[1, 1], 27 | padding='SAME', 28 | use_xavier=True, 29 | stddev=1e-3, 30 | weight_decay=0.0, 31 | activation_fn=tf.nn.relu, 32 | bn=bn, 33 | bn_decay=None, 34 | is_training=is_training, 35 | dropout=dropout, 36 | reuse=reuse) 37 | 38 | conv2 = tfutils.conv2d(conv1, 39 | features, 40 | kernel, 41 | scope+'_block_conv2', 42 | stride=[1, 1], 43 | padding='SAME', 44 | use_xavier=True, 45 | stddev=1e-3, 46 | weight_decay=0.0, 47 | activation_fn=tf.nn.relu, 48 | bn=bn, 49 | bn_decay=None, 50 | is_training=is_training, 51 | dropout=None, 52 | reuse=reuse) 53 | 54 | conv2 = tf.concat([inputs,conv2],3) 55 | 56 | conv3 = tfutils.conv2d(conv2, 57 | features, 58 | kernel, 59 | scope+'_block_conv3', 60 | stride=[1, 1], 61 | padding='SAME', 62 | use_xavier=True, 63 | stddev=1e-3, 64 | weight_decay=0.0, 65 | activation_fn=tf.nn.relu, 66 | bn=bn, 67 | bn_decay=None, 68 | is_training=is_training, 69 | dropout=dropout, 70 | reuse=reuse) 71 | 72 | conv4 = tfutils.conv2d(conv3, 73 | features, 74 | kernel, 75 | scope+'_block_conv4', 76 | stride=[1, 1], 77 | padding='SAME', 78 | use_xavier=True, 79 | stddev=1e-3, 80 | weight_decay=0.0, 81 | activation_fn=tf.nn.relu, 82 | bn=bn, 83 | bn_decay=None, 84 | is_training=is_training, 85 | dropout=None, 86 | reuse=reuse) 87 | 88 | conv4 = tf.concat([conv2,conv4],3) 89 | 90 | return conv4 91 | 92 | 93 | def h_net(inputs,height,width,is_training,reuse,keep_prob): 94 | 95 | bn=False 96 | 97 | conv1 = tfutils.conv2d_depth(inputs, 98 | 32, 99 | [5,5], 100 | 'First_Block_conv1', 101 | stride=[1, 1], 102 | padding='SAME', 103 | rate=[1], 104 | use_xavier=True, 105 | stddev=1e-3, 106 | weight_decay=0.0, 107 | activation_fn=tf.nn.relu, 108 | bn=False, 109 | bn_decay=None, 110 | is_training=is_training, 111 | dropout=None, 112 | reuse=reuse) 113 | 114 | skip1 = conv1 115 | 116 | conv1 = tfutils.conv2d(conv1, 117 | 32, 118 | [5,5], 119 | 'First_Block_conv1_down', 120 | stride=[2, 2], 121 | padding='SAME', 122 | use_xavier=True, 123 | stddev=1e-3, 124 | weight_decay=0.0, 125 | activation_fn=tf.nn.relu, 126 | bn=False, 127 | bn_decay=None, 128 | is_training=is_training, 129 | dropout=keep_prob, 130 | reuse=reuse) 131 | 132 | 133 | conv1_block = res_block(conv1,[3,3],32,'First_Block_block1',1,bn,reuse,is_training) 134 | skip2 = conv1_block 135 | 136 | 137 | 138 | ############################################################################### 139 | ######################## Transpose Convolution 1_step ######################## 140 | 141 | 142 | conv1_up_block = tfutils.conv2d_transpose(conv1_block, 143 | 32, 144 | [5,5], 145 | 'First_Block_inverted_conv1_up', 146 | stride=[2, 2], 147 | padding='SAME', 148 | use_xavier=True, 149 | stddev=1e-3, 150 | weight_decay=0.0, 151 | activation_fn=tf.nn.relu, 152 | bn=False, 153 | bn_decay=None, 154 | is_training=is_training, 155 | dropout=1, 156 | height=height//2, 157 | width=width//2, 158 | reuse=reuse) 159 | 160 | conv1_up_block = tf.concat([conv1_up_block,skip1],3) 161 | conv1_up_block = res_block(conv1_up_block,[3,3],32,'First_Block_inverted_block',1,bn,reuse,is_training) 162 | 163 | 164 | down2_skip = tfutils.conv2d(conv1_up_block, 165 | 1, 166 | [1,1], 167 | 'First_Block_inverted_out_output', 168 | stride=[1, 1], 169 | padding='SAME', 170 | use_xavier=True, 171 | stddev=1e-3, 172 | weight_decay=0.0, 173 | activation_fn=None, 174 | bn=False, 175 | bn_decay=None, 176 | is_training=is_training, 177 | dropout=1, 178 | reuse=reuse) 179 | 180 | 181 | 182 | ############################################################################### 183 | 184 | 185 | 186 | conv2_block = tfutils.conv2d(conv1_block, 187 | 64, 188 | [5,5], 189 | 'Second_Block_out_conv2', 190 | stride=[2, 2], 191 | padding='SAME', 192 | use_xavier=True, 193 | stddev=1e-3, 194 | weight_decay=0.0, 195 | activation_fn=tf.nn.relu, 196 | bn=False, 197 | bn_decay=None, 198 | is_training=is_training, 199 | dropout=keep_prob, 200 | reuse=reuse) 201 | 202 | skip3 = conv2_block 203 | conv2_block = res_block(conv2_block,[3,3],64,"Second_Block_block2",1,bn,reuse,is_training) 204 | 205 | 206 | ############################################################################### 207 | ######################## Transpose Convolution 2_steps ######################## 208 | 209 | conv2_up_block_1 = tfutils.conv2d_transpose(conv2_block, 210 | 32, 211 | [5,5], 212 | 'Second_Block_inverted_conv1_up', 213 | stride=[2, 2], 214 | padding='SAME', 215 | use_xavier=True, 216 | stddev=1e-3, 217 | weight_decay=0.0, 218 | activation_fn=tf.nn.relu, 219 | bn=False, 220 | bn_decay=None, 221 | is_training=is_training, 222 | dropout=1, 223 | height=height//4, 224 | width=width//4, 225 | reuse=reuse) 226 | 227 | conv2_up_block_1 = tf.concat([conv2_up_block_1,skip2],3) 228 | conv2_up_block_1 = res_block(conv2_up_block_1,[3,3],32,'Second_Block_inverted_block1',1,bn,reuse,is_training) 229 | 230 | 231 | conv2_up_block_2 = tfutils.conv2d_transpose(conv2_up_block_1, 232 | 32, 233 | [5,5], 234 | 'Second_Block_inverted_conv2_up', 235 | stride=[2, 2], 236 | padding='SAME', 237 | use_xavier=True, 238 | stddev=1e-3, 239 | weight_decay=0.0, 240 | activation_fn=tf.nn.relu, 241 | bn=False, 242 | bn_decay=None, 243 | is_training=is_training, 244 | dropout=1, 245 | height=height//2, 246 | width=width//2, 247 | reuse=reuse) 248 | 249 | conv2_up_block_2 = tf.concat([conv2_up_block_2,skip1],3) 250 | conv2_up_block_2 = res_block(conv2_up_block_2,[3,3],32,'Second_Block_inverted_block2',1,bn,reuse,is_training) 251 | 252 | 253 | 254 | down4_skip = tfutils.conv2d(conv2_up_block_2, 255 | 1, 256 | [1,1], 257 | 'Second_Block_inverted_out_output', 258 | stride=[1, 1], 259 | padding='SAME', 260 | use_xavier=True, 261 | stddev=1e-3, 262 | weight_decay=0.0, 263 | activation_fn=None, 264 | bn=False, 265 | bn_decay=None, 266 | is_training=is_training, 267 | dropout=1, 268 | reuse=reuse) 269 | 270 | 271 | 272 | 273 | ############################################################################### 274 | 275 | 276 | 277 | conv3_block = tfutils.conv2d(conv2_block, 278 | 128, 279 | [5,5], 280 | 'Third_Block_out_conv3', 281 | stride=[2, 2], 282 | padding='SAME', 283 | use_xavier=True, 284 | stddev=1e-3, 285 | weight_decay=0.0, 286 | activation_fn=tf.nn.relu, 287 | bn=False, 288 | bn_decay=None, 289 | is_training=is_training, 290 | dropout=keep_prob, 291 | reuse=reuse) 292 | 293 | conv3_block = res_block(conv3_block,[3,3],128,"Third_Block_block2",1,bn,reuse,is_training) 294 | 295 | 296 | conv3_up_block_1 = tfutils.conv2d_transpose(conv3_block, 297 | 128, 298 | [5,5], 299 | 'Third_Block_inverted_conv1_up', 300 | stride=[2, 2], 301 | padding='SAME', 302 | use_xavier=True, 303 | stddev=1e-3, 304 | weight_decay=0.0, 305 | activation_fn=tf.nn.relu, 306 | bn=False, 307 | bn_decay=None, 308 | is_training=is_training, 309 | dropout=1, 310 | height=height//8, 311 | width=width//8, 312 | reuse=reuse) 313 | 314 | conv3_up_block_1 = tf.concat([conv3_up_block_1,skip3],3) 315 | conv3_up_block_1 = res_block(conv3_up_block_1,[3,3],64,'Third_Block_inverted_block1',1,bn,reuse,is_training) 316 | 317 | 318 | conv3_up_block_2 = tfutils.conv2d_transpose(conv3_up_block_1, 319 | 64, 320 | [5,5], 321 | 'Third_Block_inverted_conv2_up', 322 | stride=[2, 2], 323 | padding='SAME', 324 | use_xavier=True, 325 | stddev=1e-3, 326 | weight_decay=0.0, 327 | activation_fn=tf.nn.relu, 328 | bn=False, 329 | bn_decay=None, 330 | is_training=is_training, 331 | dropout=1, 332 | height=height//4, 333 | width=width//4, 334 | reuse=reuse) 335 | 336 | conv3_up_block_2 = tf.concat([conv3_up_block_2,skip2],3) 337 | conv3_up_block_2 = res_block(conv3_up_block_2,[3,3],64,'Third_Block_inverted_block2',1,bn,reuse,is_training) 338 | 339 | 340 | conv3_up_block_3 = tfutils.conv2d_transpose(conv3_up_block_2, 341 | 32, 342 | [5,5], 343 | 'Third_Block_inverted_conv3_up', 344 | stride=[2, 2], 345 | padding='SAME', 346 | use_xavier=True, 347 | stddev=1e-3, 348 | weight_decay=0.0, 349 | activation_fn=tf.nn.relu, 350 | bn=False, 351 | bn_decay=None, 352 | is_training=is_training, 353 | dropout=1, 354 | height=height//2, 355 | width=width//2, 356 | reuse=reuse) 357 | 358 | conv3_up_block_3 = tf.concat([conv3_up_block_3,skip1],3) 359 | conv3_up_block_3 = res_block(conv3_up_block_3,[3,3],64,'Third_Block_inverted_block3',1,bn,reuse,is_training) 360 | 361 | 362 | 363 | down8_skip = tfutils.conv2d(conv2_up_block_2, 364 | 1, 365 | [1,1], 366 | 'Third_Block_inverted_output', 367 | stride=[1, 1], 368 | padding='SAME', 369 | use_xavier=True, 370 | stddev=1e-3, 371 | weight_decay=0.0, 372 | activation_fn=None, 373 | bn=False, 374 | bn_decay=None, 375 | is_training=is_training, 376 | dropout=1, 377 | reuse=reuse) 378 | 379 | all_preds = tf.concat( [ inputs[:,:,:,0:1],down2_skip,down4_skip,down8_skip ],3 ) 380 | 381 | output = tf.add( inputs[:,:,:,0:1], down8_skip ) 382 | output = tf.add( output ,down4_skip) 383 | output = tf.add( output ,down2_skip) 384 | 385 | 386 | output = tf.concat( [ output, skip1],3 ) 387 | output = res_block(output,[3,3],64,'Combined_Res_Block',1,bn,reuse,is_training) 388 | 389 | output = tfutils.conv2d(output, 390 | 1, 391 | [1,1], 392 | 'output_layer', 393 | stride=[1, 1], 394 | padding='SAME', 395 | use_xavier=True, 396 | stddev=1e-3, 397 | weight_decay=0.0, 398 | activation_fn=tf.nn.relu, 399 | bn=False, 400 | bn_decay=None, 401 | is_training=is_training, 402 | dropout=1, 403 | reuse=reuse) 404 | 405 | 406 | 407 | 408 | return tf.concat([all_preds,output],3) --------------------------------------------------------------------------------