├── README.md ├── data_extraction ├── pickle_kitti_dataset.py ├── pickle_nyu_dataset.py └── save_to_file_nyu.m ├── depth_estimate.PNG ├── depth_estimation_nunet.py ├── inference_timer.py ├── kitti.gif ├── models ├── losses.py └── models.py ├── prediction_comparison.py ├── requirements.txt └── utils ├── augmented_data_generator.py ├── deep_utils.py ├── fill_depth_colorization.py ├── image_utils.py ├── images_to_video.py ├── rgb2depth.py ├── rgb2depth_stream.py └── stack_videos.py /README.md: -------------------------------------------------------------------------------- 1 | # depth-estimation 2 | Practical Depth Estimation with Image Segmentation and Serial U-Nets 3 | 4 | ![Depth Estimate](depth_estimate.PNG) 5 | 6 | ![Car Depth Estimate](kitti.gif) 7 | 8 | **Depth Estimates on KITTI Validation Data** 9 | 10 | ``` 11 | depth-estimation 12 | | depth_estimation_nunet.py <--- main file 13 | | depth_estimate.png 14 | | inference_timer.py 15 | | kitti.gif 16 | | prediction_comparison.py 17 | | README.md 18 | | requirements.txt 19 | | 20 | +---data_extraction 21 | | pickle_kitti_dataset.py 22 | | pickle_nyu_dataset.py 23 | | save_to_file_nyu.m 24 | | 25 | +---models 26 | | losses.py 27 | | models.py 28 | | 29 | \---utils 30 | augmented_data_generator.py 31 | deep_utils.py 32 | fill_depth_colorization.py 33 | image_utils.py 34 | images_2_video.py 35 | rgb2depth.py 36 | rgb2depth_stream.py 37 | stack_videos.py 38 | 39 | ``` 40 | 41 | ### Initial Setup 42 | ``` 43 | git clone https://github.com/mech0ctopus/depth-estimation.git 44 | cd depth-estimation 45 | pip install -r requirements.txt 46 | ``` 47 | 48 | ### Use Pre-Trained Network on Webcam 49 | 1. Download & extract pre-trained weights from link below. Place in depth-estimation folder. 50 | 2. Run rgb2depth_stream. 51 | ``` 52 | cd depth-estimation 53 | python utils\rgb2depth_stream.py 54 | ``` 55 | 56 | ### Use Pre-Trained Network on RGB Video 57 | 1. Download & extract pre-trained weights from link below. Place in depth-estimation folder. 58 | 2. Run video_depth_writer. 59 | ``` 60 | cd depth-estimation 61 | python utils\video_depth_writer.py 62 | ``` 63 | 64 | ### Train Depth Estimation Network 65 | 1. Download NYU Depth V2 or KITTI images from link below 66 | 2. (Optional, for NYU Depth V2) Colorize depth images 67 | ``` 68 | python utils\fill_depth_colorization.py 69 | ``` 70 | 3. Update training & validation folderpaths 71 | 4. Verify input shapes are correct (NYU: 480x640, Re-sized KITTI: 192x640) 72 | ``` 73 | python depth_estimation_nunet.py 74 | ``` 75 | 5. View Results in Tensorboard. 76 | ``` 77 | cd depth-estimation 78 | tensorboard --logdir logs 79 | ``` 80 | 81 | ### Pre-trained Weights 82 | - [Download Pre-trained Weights (NYU Depth V2, ResNet34 Backbones, 480x640 Images)](https://mega.nz/#!y9E1lC7S!UATGE-izPvmzfm_bWeGTkPb9tmoAS8pP4P72iyTQ2pQ) 83 | 84 | - [Download Pre-trained Weights (KITTI, ResNet50 Backbones, 192x640 Images)](https://mega.nz/file/L8kHRZSQ#sbZyujgm9CUJL1vdw9D4L6JtTLfS7IzoLtT7mDzI63I) 85 | 86 | ### Download Pre-processed KITTI Dataset 87 | [Download Pre-processed KITTI RGB and Depth Images (Re-sized and colorized) Training Images (5.5GB)](https://mega.nz/file/O1sn3TQQ#fbXlhG5T8Ad30CTtfwvKyKfgDyH3Aa2tq_fSoYhTA0U) 88 | 89 | **Note:** Raw image data is from the [KITTI Raw Dataset (synced and rectified)](http://www.cvlibs.net/datasets/kitti/raw_data.php) and the [KITTI Depth Prediction Dataset (annotated depth maps)](http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_prediction). 90 | 91 | ### Datasets 92 | - [FieldSAFE](https://vision.eng.au.dk/fieldsafe/) 93 | - [KITTI](http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_prediction) 94 | - [NYU Depth Dataset V2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html) 95 | 96 | ### Citation 97 | ``` 98 | @conference{vehits20, 99 | author={Kyle J. Cantrell. and Craig D. Miller. and Carlos W. Morato.}, 100 | title={Practical Depth Estimation with Image Segmentation and Serial U-Nets}, 101 | booktitle={Proceedings of the 6th International Conference on Vehicle Technology and Intelligent Transport Systems - Volume 1: VEHITS,}, 102 | year={2020}, 103 | pages={406-414}, 104 | publisher={SciTePress}, 105 | organization={INSTICC}, 106 | doi={10.5220/0009781804060414}, 107 | isbn={978-989-758-419-0}, 108 | } 109 | ``` -------------------------------------------------------------------------------- /data_extraction/pickle_kitti_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Reads and pickles KITTI dataset into multiple files. 4 | 5 | Expects dataset structured as follows: 6 | dataset 7 | X_rgb 8 | 2011_09_26_drive_0002_sync (default KITTI subtree) 9 | 2011_09_26_drive_0009_sync 10 | ... 11 | y_depth 12 | 2011_09_26_drive_0002_sync 13 | 2011_09_26_drive_0009_sync 14 | ... 15 | 16 | Usage: 17 | pickle_data(dataset,output_folderpath) 18 | 19 | """ 20 | import numpy as np 21 | from glob import glob 22 | from utils.image_utils import depth_read, rgb_read 23 | import pickle 24 | from math import ceil 25 | 26 | def append_folderpath(folderpath): 27 | '''Adds '\\' to folderpath end if needed''' 28 | if folderpath.endswith('\\')==False: 29 | folderpath=str(folderpath)+ '\\' 30 | 31 | return folderpath 32 | 33 | def get_X_y_paths(data_folderpath,subfolder): 34 | '''Builds X & y subfolderpaths.''' 35 | #Append folder paths if necessary 36 | data_folderpath=append_folderpath(data_folderpath) 37 | subfolder=append_folderpath(subfolder) 38 | #Establish X and y subfolder paths 39 | X_rgb_subpath=data_folderpath+'X_rgb'+'\\'+subfolder 40 | y_depth_subpath=data_folderpath+'y_depth'+'\\'+subfolder 41 | 42 | return X_rgb_subpath, y_depth_subpath 43 | 44 | def get_depth_paths(y_depth_subpath): 45 | '''Build lists of filepaths for left & right image depths .''' 46 | y_depth_subpath=append_folderpath(y_depth_subpath) 47 | #Point to correct location 48 | left_depth=y_depth_subpath+'proj_depth\groundtruth\image_02\\' 49 | right_depth=y_depth_subpath+'proj_depth\groundtruth\image_03\\' 50 | #Get all image filenames 51 | left_depth_paths=glob(left_depth+'*.PNG') 52 | right_depth_paths=glob(right_depth+'*.PNG') 53 | 54 | return left_depth_paths, right_depth_paths 55 | 56 | def pickle_depth_images(subfolder,depth_paths,output_folderpath,max_array_len=200): 57 | '''Generates pickle file of y_depth data''' 58 | #Read depth images and update np.array 59 | num_images=len(depth_paths) 60 | y=np.zeros((num_images,375,1242),dtype=np.uint8) 61 | for idx, depth_path in enumerate(depth_paths): 62 | y[idx]=depth_read(depth_path) 63 | 64 | #Split data into smaller pickle files if necessary 65 | num_intervals=ceil(num_images/max_array_len) 66 | if num_intervals>1: 67 | y_splits=np.array_split(y,num_intervals) 68 | #Clear y variable 69 | y=None 70 | for idx, y_split in enumerate(y_splits): 71 | #Save to pickle file 72 | pickle.dump(y_split, open(output_folderpath+r"y_"+str(subfolder)+f"_{idx}.p", "wb"), protocol=4) 73 | #Clear y_split variable 74 | y_splits=None 75 | else: 76 | #Save to pickle file 77 | pickle.dump(y, open(output_folderpath+r"y_"+str(subfolder)+".p", "wb"), protocol=4) 78 | #Clear y variable 79 | y=None 80 | 81 | def get_rgb_paths(X_rgb_subpath,left_depth_paths, right_depth_paths): 82 | '''Create list of RGB paths corresponding to input depth paths''' 83 | #Point to correct location 84 | left_rgb=X_rgb_subpath+'image_02\data\\' 85 | right_rgb=X_rgb_subpath+'image_03\data\\' 86 | #Build list of image names in left and right depth paths 87 | left_depth_image_names=[filepath.split('\\')[-1] for filepath in left_depth_paths] 88 | right_depth_image_names=[filepath.split('\\')[-1] for filepath in right_depth_paths] 89 | #Build list of left and right RGB paths corrseponding to depth images 90 | left_rgb_paths,right_rgb_paths=[],[] 91 | for left_depth_image_name in left_depth_image_names: 92 | left_rgb_paths.append(left_rgb+left_depth_image_name) 93 | for right_depth_image_name in right_depth_image_names: 94 | right_rgb_paths.append(right_rgb+right_depth_image_name) 95 | rgb_paths=left_rgb_paths+right_rgb_paths 96 | 97 | return rgb_paths 98 | 99 | def pickle_rgb_images(subfolder,rgb_paths,output_folderpath,max_array_len=200): 100 | '''Generates pickle file of X_rgb data''' 101 | #Read RGB images and update np.array 102 | num_images=len(rgb_paths) 103 | X=np.zeros((num_images,375,1242,3),dtype=np.uint8) 104 | for idx, rgb_path in enumerate(rgb_paths): 105 | X[idx]=rgb_read(rgb_path) 106 | 107 | #Split data into smaller pickle files if necessary 108 | num_intervals=ceil(num_images/max_array_len) 109 | if num_intervals>1: 110 | X_splits=np.array_split(X,num_intervals) 111 | #Clear X variable 112 | X=None 113 | for idx, X_split in enumerate(X_splits): 114 | #Save to pickle file 115 | pickle.dump(X_split, open(output_folderpath+r"X_"+str(subfolder)+f"_{idx}.p", "wb"), protocol=4) 116 | #Clear X_split variable 117 | X_splits=None 118 | else: 119 | #Save to pickle file 120 | pickle.dump(X, open(output_folderpath+r"X_"+str(subfolder)+".p", "wb"), protocol=4) 121 | #Clear X variable 122 | X=None 123 | 124 | def pickle_folder(data_folderpath,subfolder,output_folderpath): 125 | '''Reads and pickles one folder from KITTI. 126 | Save X, y folder pair as pickle files.''' 127 | #Identify where X and y data is located 128 | X_rgb_subpath, y_depth_subpath=get_X_y_paths(data_folderpath,subfolder) 129 | #Build list of filepaths for left & right image depths 130 | left_depth_paths, right_depth_paths=get_depth_paths(y_depth_subpath) 131 | depth_paths=left_depth_paths+right_depth_paths 132 | #Create pickle file of all depth images listed in depth_paths 133 | pickle_depth_images(subfolder,depth_paths,output_folderpath) 134 | #Create list of corresponding RGB paths 135 | rgb_paths=get_rgb_paths(X_rgb_subpath,left_depth_paths, right_depth_paths) 136 | #Create pickle file of all RGB images listed in rgb_paths 137 | pickle_rgb_images(subfolder,rgb_paths,output_folderpath) 138 | 139 | def pickle_dataset(data_folderpath,output_folderpath): 140 | '''Reads and pickles KITTI dataset into multiple files.''' 141 | output_folderpath=append_folderpath(output_folderpath) 142 | #Build list of subfolders in data_folderpath\y_depth 143 | data_folderpath=append_folderpath(data_folderpath) 144 | y_depth_path=data_folderpath+'y_depth' 145 | subfolders=glob(y_depth_path+'\\*\\') 146 | #Parse out foldername 147 | subfolders=[subfolder.split('\\')[-2] for subfolder in subfolders] 148 | #Pickle each subfolder 149 | for subfolder in subfolders: 150 | print(f'Pickling {subfolder}') 151 | pickle_folder(data_folderpath,subfolder,output_folderpath) 152 | 153 | if __name__ == '__main__': 154 | dataset=r"G:\Documents\KITTI\sandbox_val" 155 | output_folderpath=r"G:\Documents\KITTI\pickled_KITTI\validation" 156 | pickle_dataset(dataset,output_folderpath) -------------------------------------------------------------------------------- /data_extraction/pickle_nyu_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Pickles thin NYU Depth dataset 4 | """ 5 | import numpy as np 6 | from glob import glob 7 | from PIL import Image 8 | import pickle 9 | from utils.image_utils import depth_read 10 | 11 | def generate_pickle_files(X,y): 12 | '''Generates pickle file to compress whole dataset.''' 13 | pickle.dump(X, open(r"X.p", "wb"), protocol=4) 14 | pickle.dump(y, open(r"y.p", "wb"), protocol=4) 15 | 16 | def load_pickle_files(X_file, y_file): 17 | '''Reads data from pickle files''' 18 | X=pickle.load(open(X_file,'rb')) 19 | y=pickle.load(open(y_file,'rb')) 20 | return X, y 21 | 22 | def read_data(data_folderpath,output_folderpath,num_intervals=35): 23 | '''Reads full dataset. Assumes data has been resized. 24 | Assumes "data_folderpath" contains subfolders corresponding 25 | to class names and each containing jpg files for class.''' 26 | print('Initializing Matrices') 27 | X=np.zeros((7392,480,640,3),dtype=np.uint8) #Was 480,640 28 | y=np.zeros((7392,480,640),dtype=np.uint8) #Was 480,640 29 | 30 | #Append folderpaths if needed 31 | if data_folderpath.endswith('\\')==False: 32 | data_folderpath=str(data_folderpath)+ '\\' 33 | if output_folderpath.endswith('\\')==False: 34 | output_folderpath=str(output_folderpath)+ '\\' 35 | X_folderpath=data_folderpath+'X_rgb\\' 36 | y_folderpath=data_folderpath+'y_depth\\' 37 | 38 | #Build list of filenames 39 | X_filelist=glob(X_folderpath+'*.png') 40 | y_filelist=glob(y_folderpath+'*.png') 41 | 42 | X_filelist.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) 43 | y_filelist.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) 44 | 45 | for idx in range(len(X_filelist)): 46 | print(f'Reading file #{idx}') 47 | #Load images 48 | rgb_image=Image.open(X_filelist[idx]) 49 | #depth_image=Image.open(y_filelist[idx]) 50 | 51 | #store as np.arrays 52 | X[idx]=np.array(rgb_image) #.reshape(640,480,3) #Reshape is new 53 | #y[idx]=np.array(depth_image) #.reshape(640,480) #Reshape is new 54 | y[idx]=depth_read(y_filelist[idx]) 55 | # if idx==1: 56 | ## image_utils.image_from_np(y[idx],rgb=False) 57 | # image_utils.image_from_np(X[idx]) 58 | # plt.imshow(y[idx], cmap='gray', interpolation='nearest') 59 | # break 60 | rgb_image.close() 61 | #depth_image.close() 62 | 63 | print('Splitting Data') 64 | y_splits=np.array_split(y,num_intervals) 65 | X_splits=np.array_split(X,num_intervals) 66 | X=None 67 | y=None 68 | 69 | print('Pickling') 70 | for idx in range(len(y_splits)): 71 | #Save to pickle file 72 | pickle.dump(y_splits[idx], open(output_folderpath+f"y_{idx}.p", "wb"), protocol=4) 73 | pickle.dump(X_splits[idx], open(output_folderpath+f"X_{idx}.p", "wb"), protocol=4) 74 | 75 | if __name__ == '__main__': 76 | dataset=r"E:\NYU\nyud_raw_data\nyuv2-python-toolbox-master\colorized" 77 | output_folderpath=r"G:\Documents\NYU Depth Dataset\nyu_data\pickled_colorized" 78 | read_data(dataset,output_folderpath) -------------------------------------------------------------------------------- /data_extraction/save_to_file_nyu.m: -------------------------------------------------------------------------------- 1 | %Saves RGB and color images from nyu_depth_v2_labeled.mat to PNG files 2 | %Assumes depths and images variables have been loaded 3 | 4 | size_rgb_images=size(images); 5 | num_rgb_images=size_rgb_images(4); 6 | 7 | for i=1:num_rgb_images 8 | disp(strcat(num2str(i),'/',num2str(num_rgb_images))); 9 | rgb_filename=strcat('nyu_data/X_rgb/rgb_',num2str(i),'.png'); 10 | imwrite(uint8(images(:,:,:,i)),rgb_filename); 11 | 12 | d_filename=strcat('nyu_data/y_depth/d_',num2str(i),'.png'); 13 | imwrite(mat2gray(depths(:,:,i)),d_filename); 14 | end -------------------------------------------------------------------------------- /depth_estimate.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mech0ctopus/depth-estimation/89d0a3f2efcb16a85530694b476d9868b374a5e8/depth_estimate.PNG -------------------------------------------------------------------------------- /depth_estimation_nunet.py: -------------------------------------------------------------------------------- 1 | # Kyle J. Cantrell & Craig D. Miller 2 | # kjcantrell@wpi.edu & cdmiller@wpi.edu 3 | # Deep Learning for Advanced Robot Perception 4 | # 5 | # Depth Estimation from RGB Images 6 | 7 | import numpy as np 8 | from glob import glob 9 | from utils import deep_utils 10 | from utils.image_utils import depth_read, rgb_read, depth_read_kitti 11 | from models import models 12 | from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint 13 | from tensorflow.keras.optimizers import Adam 14 | import datetime 15 | from tensorflow.compat.v1 import ConfigProto 16 | from tensorflow.compat.v1 import InteractiveSession 17 | import segmentation_models 18 | 19 | config = ConfigProto() 20 | config.gpu_options.per_process_gpu_memory_fraction = 0.9 21 | config.gpu_options.allow_growth = True 22 | session = InteractiveSession(config=config) 23 | 24 | def _batchGenerator(X_filelist,y_filelist,batchSize): 25 | """ 26 | Yield X and Y data when the batch is filled. 27 | """ 28 | #Sort filelists to confirm they are same order 29 | X_filelist.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) 30 | y_filelist.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) 31 | #Shuffle order of filenames 32 | X_filelist,y_filelist=deep_utils.simul_shuffle(X_filelist,y_filelist) 33 | 34 | while True: 35 | idx=0 36 | 37 | while idx 1] = 1 40 | (H, W) = imgDepth.shape 41 | numPix = H * W 42 | indsM = np.arange(numPix).reshape((W, H)).transpose() 43 | knownValMask = (imgIsNoise == False).astype(int) 44 | grayImg = skimage.color.rgb2gray(imgRgb) 45 | winRad = 1 46 | len_ = 0 47 | absImgNdx = 0 48 | len_window = (2 * winRad + 1) ** 2 49 | len_zeros = numPix * len_window 50 | 51 | cols = np.zeros(len_zeros) - 1 52 | rows = np.zeros(len_zeros) - 1 53 | vals = np.zeros(len_zeros) - 1 54 | gvals = np.zeros(len_window) - 1 55 | 56 | for j in range(W): 57 | for i in range(H): 58 | nWin = 0 59 | for ii in range(max(0, i - winRad), min(i + winRad + 1, H)): 60 | for jj in range(max(0, j - winRad), min(j + winRad + 1, W)): 61 | if ii == i and jj == j: 62 | continue 63 | 64 | rows[len_] = absImgNdx 65 | cols[len_] = indsM[ii, jj] 66 | gvals[nWin] = grayImg[ii, jj] 67 | 68 | len_ = len_ + 1 69 | nWin = nWin + 1 70 | 71 | curVal = grayImg[i, j] 72 | gvals[nWin] = curVal 73 | c_var = np.mean((gvals[:nWin + 1] - np.mean(gvals[:nWin+ 1])) ** 2) 74 | 75 | csig = c_var * 0.6 76 | mgv = np.min((gvals[:nWin] - curVal) ** 2) 77 | if csig < -mgv / np.log(0.01): 78 | csig = -mgv / np.log(0.01) 79 | 80 | if csig < 2e-06: 81 | csig = 2e-06 82 | 83 | gvals[:nWin] = np.exp(-(gvals[:nWin] - curVal) ** 2 / csig) 84 | gvals[:nWin] = gvals[:nWin] / sum(gvals[:nWin]) 85 | vals[len_ - nWin:len_] = -gvals[:nWin] 86 | 87 | # Now the self-reference (along the diagonal). 88 | rows[len_] = absImgNdx 89 | cols[len_] = absImgNdx 90 | vals[len_] = 1 # sum(gvals(1:nWin)) 91 | 92 | len_ = len_ + 1 93 | absImgNdx = absImgNdx + 1 94 | 95 | vals = vals[:len_] 96 | cols = cols[:len_] 97 | rows = rows[:len_] 98 | A = scipy.sparse.csr_matrix((vals, (rows, cols)), (numPix, numPix)) 99 | 100 | rows = np.arange(0, numPix) 101 | cols = np.arange(0, numPix) 102 | vals = (knownValMask * alpha).transpose().reshape(numPix) 103 | G = scipy.sparse.csr_matrix((vals, (rows, cols)), (numPix, numPix)) 104 | 105 | A = A + G 106 | b = np.multiply(vals.reshape(numPix), imgDepth.flatten('F')) 107 | 108 | #print ('Solving system..') 109 | 110 | new_vals = spsolve(A, b) 111 | new_vals = np.reshape(new_vals, (H, W), 'F') 112 | 113 | #print ('Done.') 114 | 115 | denoisedDepthImg = new_vals * maxImgAbsDepth 116 | 117 | output = denoisedDepthImg.reshape((H, W)).astype('float32') 118 | 119 | output = np.multiply(output, (1-knownValMask)) + imgDepthInput 120 | 121 | return output 122 | 123 | if __name__=="__main__": 124 | X_files=glob(r"E:\NYU\nyud_raw_data\nyuv2-python-toolbox-master\X_rgb\\"+'*') 125 | y_files=glob(r"E:\NYU\nyud_raw_data\nyuv2-python-toolbox-master\y_depth\\"+'*') 126 | 127 | X_files.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) 128 | y_files.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) 129 | 130 | len_files=len(X_files) 131 | 132 | last_image=7171 133 | 134 | for idx in range(len_files): 135 | if idx>last_image: 136 | start=timer() 137 | #Read RGB and depth input images 138 | imgRgb=image_utils.rgb_read(X_files[idx]) 139 | imgDepthInput=image_utils.depth_read(y_files[idx]) 140 | #Colorize 141 | denoised_depth_img=fill_depth_colorization(imgRgb=imgRgb, imgDepthInput=imgDepthInput, alpha=0.8) 142 | #Save depth image 143 | image_utils.heatmap(denoised_depth_img,save=True, 144 | name=r'E:\NYU\nyud_raw_data\nyuv2-python-toolbox-master\colorized\y_depth\\'+f'c_depth_{idx}') 145 | end=timer() 146 | dt=end-start 147 | print(f'Saving {idx}/{len_files} in {dt} sec') 148 | else: 149 | pass -------------------------------------------------------------------------------- /utils/image_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Image Utility functions. 4 | """ 5 | import numpy as np 6 | from PIL import Image 7 | import matplotlib.pyplot as plt 8 | import cv2 9 | 10 | def crop_image(img,x=640,y=192,mode='middle'): 11 | '''Crops images starting at 'top', 'middle', or 'bottom'.''' 12 | 13 | if img.shape[0] != y: 14 | #Crop vertically 15 | if mode=='top': 16 | y_top, y_bottom = 0, y 17 | img=img[y_top:y_bottom,:] 18 | elif mode=='middle': 19 | y_mid=img.shape[0]/2 20 | y_top, y_bottom = int(y_mid-y/2), int(y_mid+y/2) 21 | img=img[y_top:y_bottom,:] 22 | elif mode=='bottom': 23 | y_top, y_bottom = img.shape[0]-y, img.shape[0] 24 | img=img[y_top:y_bottom,:] 25 | else: 26 | print('Unknown crop mode.') 27 | img=None 28 | 29 | if img.shape[1] != x: 30 | #Crop horizontally in the middle of image 31 | x_mid=img.shape[1]/2 32 | x_left, x_right = int(x_mid-x/2), int(x_mid+x/2) 33 | img=img[:,x_left:x_right] 34 | 35 | return img 36 | 37 | def image_from_np(image_array,save=False,rgb=True): 38 | '''Plots RGB or grayscale image from numpy array''' 39 | if rgb==True: 40 | img = Image.fromarray(image_array, 'RGB') 41 | img.show() 42 | else: 43 | #img = Image.fromarray(image_array, '1') 44 | img=cv2.imshow('image_from_np',image_array) 45 | cv2.waitKey(0) 46 | return img 47 | 48 | def add_blur(im_array,ksize=12,sigmaColor=400,sigmaMax=700): 49 | """ 50 | Adds bilateral filtering to blur objects but preserve edges 51 | """ 52 | return cv2.bilateralFiltering(im_array,ksize,sigmaColor,sigmaMax) 53 | 54 | def rgb_read(filename): 55 | '''Reads RGB image from png file and returns it as a numpy array''' 56 | #Load image 57 | image=Image.open(filename) 58 | #store as np.array 59 | rgb=np.array(image) 60 | image.close() 61 | return rgb 62 | 63 | def depth_read(filename): 64 | '''Loads depth map D from png file and returns it as a numpy array''' 65 | #Lower is closer 66 | # From KITTI devkit 67 | 68 | image=Image.open(filename) 69 | depth_png = np.array(image, dtype=int) 70 | # make sure we have a proper 16bit depth map here.. not 8bit! 71 | 72 | if depth_png.shape==(480,640,3): 73 | depth_png=(depth_png[:,:,0]+depth_png[:,:,1]+depth_png[:,:,2])/3 74 | 75 | #depth_png=depth_png[:,:,3] 76 | assert(np.max(depth_png) <= 255) 77 | depth=depth_png.astype(np.float) 78 | #depth = depth_png.astype(np.float) / 256. 79 | #depth[depth_png == 0] = -1. 80 | image.close() 81 | 82 | return depth 83 | 84 | def depth_read_kitti(filename): 85 | '''Loads depth map D from png file and returns it as a numpy array''' 86 | #Lower is closer 87 | # From KITTI devkit 88 | 89 | image=Image.open(filename) 90 | depth_png = np.array(image, dtype=int) 91 | 92 | #TODO: Determine if this if legitimate for getting depth values 93 | if depth_png.shape==(192,640,4): 94 | # print('it is') 95 | depth_png=(depth_png[:,:,0]+depth_png[:,:,1]+depth_png[:,:,2])/3 96 | 97 | assert(np.max(depth_png) <= 255) 98 | depth=depth_png.astype('int8') #np.float 99 | 100 | image.close() 101 | 102 | return depth 103 | def heatmap(image,save=False,name='heatmap',cmap='gray'): 104 | '''Plots heatmap of depth data from image or np.ndarray.''' 105 | if type(image)==np.ndarray: 106 | pic_array=image 107 | else: 108 | #Convert to np.ndarray 109 | pic=Image.open(image) 110 | pic_array=np.array(pic) 111 | #Plot heatmap 112 | print(pic_array.shape) 113 | plt.imshow(pic_array, cmap=cmap, interpolation='nearest') #cmap=binary, plasma, gray 114 | plt.show() 115 | if save==True: 116 | plt.imsave(name+'.png',pic_array, cmap=cmap) 117 | 118 | if __name__=='__main__': 119 | filename=r"G:\Documents\KITTI\sandbox\y_depth\2011_09_26_drive_0002_sync\proj_depth\groundtruth\image_02\0000000005.png" 120 | heatmap(filename) 121 | d=depth_read(filename) 122 | -------------------------------------------------------------------------------- /utils/images_to_video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Build video from image sequence. 4 | """ 5 | import cv2 6 | from glob import glob 7 | 8 | def images_to_video(folderpath,output_filename=r'output_video.avi',out_FPS=10,width=640,height=480): 9 | '''Converts all images in a folder into video.''' 10 | 11 | images=glob(folderpath+'*.jpg') 12 | 13 | # Define the codec and create VideoWriter object. 14 | out = cv2.VideoWriter(output_filename,cv2.VideoWriter_fourcc('M','J','P','G'), 15 | out_FPS, (int(width),int(height))) 16 | 17 | for image in images: 18 | img=cv2.imread(image) 19 | out.write(img) 20 | 21 | cv2.destroyAllWindows() 22 | 23 | if __name__=='__main__': 24 | folderpath=r"D:\rpi_cal_files\\" 25 | images_to_video(folderpath,output_filename=r'camera_cal.avi',out_FPS=10,width=640,height=480) -------------------------------------------------------------------------------- /utils/rgb2depth.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Tool for creating a depth prediction video from an input RGB video. 4 | """ 5 | import cv2 6 | import numpy as np 7 | from models import models 8 | import image_utils 9 | import deep_utils 10 | 11 | def rgb2depth_video(filename, model, out_FPS=10, width=640, height=192, 12 | output_filename=r'depth_output.avi', gamma=1, 13 | mirror=False, crop_mode='middle'): 14 | '''Create depth prediction video from input RGB video. Adjust 15 | brightness (gamma) for viewing purposes only. Anything other than 16 | gamma=1 is distorting the numerical depth prediction.''' 17 | 18 | cam = cv2.VideoCapture(filename) 19 | 20 | # Define the codec and create VideoWriter object. 21 | out = cv2.VideoWriter(output_filename,cv2.VideoWriter_fourcc('M','J','P','G'), 22 | out_FPS, (int(width),int(height))) 23 | 24 | while(cam.isOpened()): 25 | ret_val, img = cam.read() 26 | 27 | try: 28 | if mirror: 29 | img = cv2.flip(img, 1) 30 | 31 | #If img doesn't match output height & width 32 | if (img.shape[0] != height) or (img.shape[1] != width): 33 | #Crop image 34 | img=image_utils.crop_image(img,width,height,mode=crop_mode) 35 | 36 | #Predict depth 37 | img=img.reshape(1,height,width,3) 38 | img=np.divide(img,255).astype(np.float16) #Normalize input 39 | y_est=model.predict(img) 40 | y_est=y_est.reshape((height,width)) 41 | 42 | #Map 2D grayscale to RGB equivalent 43 | vis = cv2.cvtColor((y_est*255*(1/gamma)).astype(np.uint8),cv2.COLOR_GRAY2BGR) 44 | vis = cv2.cvtColor(vis,cv2.COLOR_BGR2GRAY) 45 | #Map BGR to Rainbow 46 | vis=cv2.applyColorMap(vis,cv2.COLORMAP_RAINBOW) 47 | 48 | #Write prediction to video 49 | out.write(vis) 50 | except: 51 | break 52 | 53 | cam.release() 54 | cv2.destroyAllWindows() 55 | 56 | if __name__=='__main__': 57 | #Load pretrained & compiled model 58 | weights=r"E:\W-Net_Connected_weights_best_KITTI_35Epochs.hdf5" 59 | model=models.wnet_connected() 60 | model=deep_utils.load_model_weights(model,weights) 61 | #Define input RGB video (tested with .mp4, .mov, and .avi) 62 | rgb_video=r"G:\Program Files\MATLAB\R2018b\toolbox\vision\visiondata\atrium.mp4" 63 | #Create depth video 64 | rgb2depth_video(rgb_video,model,out_FPS=30, width=640, height=192, 65 | output_filename=r'depth_output.avi', gamma=0.6, 66 | mirror=False, crop_mode='middle') -------------------------------------------------------------------------------- /utils/rgb2depth_stream.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Tool for testing depth estimation models on live video stream. 4 | """ 5 | import cv2 6 | import numpy as np 7 | import image_utils 8 | import deep_utils 9 | import time 10 | from models import models 11 | 12 | def rgb2depth_stream(model,method='cv2',mirror=True,width=640,height=192, 13 | gamma=1,crop_mode='middle'): 14 | '''Runs depth estimation on live webcam video stream. Adjust 15 | brightness (gamma) for viewing purposes only. Anything other than 16 | gamma=1 is distorting the numerical depth prediction.''' 17 | cam = cv2.VideoCapture(0) 18 | 19 | while True: 20 | start=time.time() 21 | 22 | ret_val, img = cam.read() 23 | if mirror: 24 | img = cv2.flip(img, 1) 25 | 26 | #If img doesn't match output height & width 27 | if (img.shape[0] != height) or (img.shape[1] != width): 28 | #Crop image 29 | img=image_utils.crop_image(img,width,height,mode=crop_mode) 30 | 31 | img=img.reshape(1,height,width,3) 32 | img=np.divide(img,255).astype(np.float16) 33 | #Predict depth 34 | y_est=model.predict(img) 35 | y_est=y_est.reshape((height,width)) 36 | 37 | #Show depth prediction results 38 | if method=='cv2': 39 | #Map 2D grayscale to RGB equivalent 40 | vis = cv2.cvtColor((y_est*255*(1/gamma)).astype(np.uint8),cv2.COLOR_GRAY2BGR) 41 | vis = cv2.cvtColor(vis,cv2.COLOR_BGR2GRAY) 42 | #Map BGR to Rainbow 43 | vis=cv2.applyColorMap(vis,cv2.COLORMAP_RAINBOW) 44 | 45 | cv2.imshow('Depth Estimate', vis) 46 | elif method=='heatmap': 47 | image_utils.heatmap(y_est,cmap='plasma') 48 | else: 49 | print('Unknown display method.') 50 | 51 | #Estimate instantaneous frames per second 52 | end=time.time() 53 | fps=round(1/(end-start),2) 54 | print(f'FPS: {fps}') 55 | 56 | if cv2.waitKey(1) == 27: 57 | break # esc to quit 58 | cv2.destroyAllWindows() 59 | 60 | if __name__=='__main__': 61 | #Load pretrained & compiled model 62 | weights=r"E:\W-Net_Connected_weights_best_KITTI_35Epochs.hdf5" 63 | model=models.wnet_connected() 64 | model=deep_utils.load_model_weights(model,weights) 65 | 66 | display_methods=['cv2','heatmap'] 67 | 68 | rgb2depth_stream(model,method=display_methods[0],mirror=True, 69 | width=640,height=192,gamma=0.3,crop_mode='middle') -------------------------------------------------------------------------------- /utils/stack_videos.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Stacks videos vertically or horizontally. 4 | """ 5 | import cv2 6 | import numpy as np 7 | 8 | def stack_videos(filename1,filename2, out_FPS=10, output_filename=r'output_stack.avi', 9 | stack_direction='vertical'): 10 | '''Stacks videos vertically or horizontally.''' 11 | 12 | cam1 = cv2.VideoCapture(filename1) 13 | cam2 = cv2.VideoCapture(filename2) 14 | 15 | if stack_direction=='vertical': 16 | vert_mul=2 17 | hor_mul=1 18 | axis=0 19 | else: 20 | vert_mul=1 21 | hor_mul=2 22 | axis=1 23 | 24 | # Define the codec and create VideoWriter object. 25 | out = cv2.VideoWriter(output_filename,cv2.VideoWriter_fourcc('M','J','P','G'), 26 | out_FPS, (int(hor_mul*cam1.get(3)),int(vert_mul*cam1.get(4)))) 27 | 28 | while(cam1.isOpened()): 29 | ret_val1, img1 = cam1.read() 30 | ret_val2, img2 = cam2.read() 31 | 32 | axis 33 | if ret_val1 == True: 34 | # Write the frame into the file 'output.avi' 35 | vis=np.concatenate((img1, img2), axis=axis) #0:vertical, 1:horizontal 36 | out.write(vis) 37 | 38 | else: 39 | break 40 | 41 | cam1.release() 42 | cam2.release() 43 | cv2.destroyAllWindows() 44 | 45 | if __name__=='__main__': 46 | filename1=r"C:\Users\Craig\Documents\GitHub\depth-estimation\models\output_2011_09_26_drive_0009_unseen_10FPS.avi" 47 | filename2=r"C:\Users\Craig\Documents\GitHub\depth-estimation\models\2011_09_26_drive_0009_unseen_depth_10FPS.avi" 48 | stack_videos(filename1,filename2,stack_direction='horizontal') --------------------------------------------------------------------------------