├── .gitignore ├── LICENSE ├── README.md ├── config.py ├── custom_layers └── unpooling_layer.py ├── data_generator.py ├── data_generator_depth.py ├── demo.py ├── demo_depth.py ├── depth_model.py ├── images ├── 0_image.png ├── 0_label.png ├── 0_out.png ├── 1_image.png ├── 1_label.png ├── 1_out.png ├── 2_image.png ├── 2_label.png ├── 2_out.png ├── 3_image.png ├── 3_label.png ├── 3_out.png ├── 4_image.png ├── 4_label.png ├── 4_out.png ├── 5_image.png ├── 5_label.png ├── 5_out.png ├── 6_image.png ├── 6_label.png ├── 6_out.png ├── 7_image.png ├── 7_label.png ├── 7_out.png ├── 8_image.png ├── 8_label.png ├── 8_out.png ├── 9_image.png ├── 9_label.png ├── 9_out.png ├── dataset.png ├── legend.png └── segnet.png ├── migrate.py ├── model.py ├── pre-process.py ├── train.py ├── train_depth.py ├── train_names.txt ├── unit_tests.py ├── utils.py ├── valid_names.txt └── vgg16.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | temp/ 4 | data/ 5 | logs/ 6 | models/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 刘杨 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 3D Object Detection 2 | 3 | This repository is to do Indoor Semantic Segmentation with SegNet. 4 | 5 | ## Dependencies 6 | - [NumPy](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html) 7 | - [Tensorflow](https://www.tensorflow.org/versions/r0.8/get_started/os_setup.html) 8 | - [Keras](https://keras.io/#installation) 9 | - [OpenCV](https://opencv-python-tutroals.readthedocs.io/en/latest/) 10 | 11 | ## Dataset 12 | 13 | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/dataset.png) 14 | 15 | Follow the [instruction](http://buildingparser.stanford.edu/dataset.html#overview) to download 2D-3D-S dataset. 16 | 17 | ```bash 18 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_1_no_xyz.tar 19 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_2_no_xyz.tar 20 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_3_no_xyz.tar 21 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_4_no_xyz.tar 22 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_5a_no_xyz.tar 23 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_5b_no_xyz.tar 24 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_6_no_xyz.tar 25 | ``` 26 | 27 | ## Architecture 28 | 29 | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/segnet.png) 30 | 31 | 32 | ## ImageNet Pretrained Models 33 | Download [VGG16](https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5) into models folder. 34 | 35 | ## Usage 36 | ### Data Pre-processing 37 | Extract training images: 38 | ```bash 39 | $ python pre-process.py 40 | ``` 41 | 42 | ### Train 43 | ```bash 44 | $ python train.py 45 | ``` 46 | 47 | If you want to visualize during training, run in your terminal: 48 | ```bash 49 | $ tensorboard --logdir path_to_current_dir/logs 50 | ``` 51 | 52 | ### Demo 53 | 54 | ```bash 55 | $ python demo.py 56 | ``` 57 | 58 | Input | GT | Output | 59 | |---|---|---| 60 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/0_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/0_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/0_out.png)| 61 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/1_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/1_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/1_out.png)| 62 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/2_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/2_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/2_out.png)| 63 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/3_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/3_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/3_out.png)| 64 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/4_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/4_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/4_out.png)| 65 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/5_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/5_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/5_out.png)| 66 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/6_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/6_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/6_out.png)| 67 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/7_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/7_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/7_out.png)| 68 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/8_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/8_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/8_out.png)| 69 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/9_image.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/9_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/9_out.png)| 70 | 71 | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/legend.png) -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | img_rows, img_cols = 320, 320 2 | img_rows_half, img_cols_half = 160, 160 3 | channel = 4 4 | batch_size = 30 5 | epochs = 1000 6 | patience = 50 7 | num_samples = 43100 8 | num_train_samples = 34480 9 | # num_samples - num_train_samples 10 | num_valid_samples = 8620 11 | unknown = 128 12 | 13 | # bgr others ceiling floor wall column beam window door table chair bookcase sofa board clutter 14 | colors = [[0, 0, 0], [86, 233, 234], [218, 166, 104], [75, 123, 190], [163, 173, 89], [137, 156, 246], [72, 185, 81], [64, 144, 113], [83, 84, 84], [127, 48, 41], [45, 43, 238], [115, 39, 99], [127, 116, 84], [236, 235, 235]] 15 | 16 | num_classes = 14 17 | -------------------------------------------------------------------------------- /custom_layers/unpooling_layer.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.engine.topology import Layer 3 | from keras.layers import Reshape, Concatenate, Lambda, Multiply 4 | 5 | 6 | class Unpooling(Layer): 7 | 8 | def __init__(self, **kwargs): 9 | super(Unpooling, self).__init__(**kwargs) 10 | 11 | def build(self, input_shape): 12 | super(Unpooling, self).build(input_shape) 13 | 14 | def call(self, inputs, **kwargs): 15 | x = inputs[:, 1] 16 | # print('x.shape: ' + str(K.int_shape(x))) 17 | bool_mask = Lambda(lambda t: K.greater_equal(t[:, 0], t[:, 1]), 18 | output_shape=K.int_shape(x)[1:])(inputs) 19 | # print('bool_mask.shape: ' + str(K.int_shape(bool_mask))) 20 | mask = Lambda(lambda t: K.cast(t, dtype='float32'))(bool_mask) 21 | # print('mask.shape: ' + str(K.int_shape(mask))) 22 | x = Multiply()([mask, x]) 23 | # print('x.shape: ' + str(K.int_shape(x))) 24 | return x 25 | 26 | def compute_output_shape(self, input_shape): 27 | return input_shape[0], input_shape[2], input_shape[3], input_shape[4] 28 | -------------------------------------------------------------------------------- /data_generator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from random import shuffle 4 | 5 | import cv2 as cv 6 | import numpy as np 7 | from keras.utils import Sequence 8 | 9 | from config import batch_size 10 | from config import colors 11 | from config import img_cols 12 | from config import img_rows 13 | from config import num_classes 14 | 15 | train_folder = 'data/rgb' 16 | depth_folder = 'data/depth' 17 | semantic_folder = 'data/semantic' 18 | 19 | 20 | def get_semantic(name): 21 | tokens = name.split('_') 22 | tokens[-1] = 'semantic_pretty.png' 23 | name = '_'.join(tokens) 24 | filename = os.path.join(semantic_folder, name) 25 | semantic = cv.imread(filename) 26 | return semantic 27 | 28 | 29 | def get_y(semantic): 30 | temp = np.zeros(shape=(320, 320, num_classes), dtype=np.int32) 31 | semantic = np.array(semantic).astype(np.int32) 32 | for i in range(num_classes): 33 | temp[:, :, i] = np.sum(np.abs(semantic - colors[i]), axis=2) 34 | y = np.argmin(temp, axis=2) 35 | return y 36 | 37 | 38 | def to_bgr(y_pred): 39 | ret = np.zeros((img_rows, img_cols, 3), np.float32) 40 | for r in range(320): 41 | for c in range(320): 42 | color_id = y_pred[r, c] 43 | # print("color_id: " + str(color_id)) 44 | ret[r, c, :] = colors[color_id] 45 | ret = ret.astype(np.uint8) 46 | return ret 47 | 48 | 49 | def random_choice(image_size, crop_size): 50 | height, width = image_size 51 | crop_height, crop_width = crop_size 52 | x = random.randint(0, max(0, width - crop_width)) 53 | y = random.randint(0, max(0, height - crop_height)) 54 | return x, y 55 | 56 | 57 | def safe_crop(mat, x, y, crop_size): 58 | crop_height, crop_width = crop_size 59 | if len(mat.shape) == 2: 60 | ret = np.zeros((crop_height, crop_width), np.float32) 61 | else: 62 | ret = np.zeros((crop_height, crop_width, 3), np.float32) 63 | crop = mat[y:y + crop_height, x:x + crop_width] 64 | h, w = crop.shape[:2] 65 | ret[0:h, 0:w] = crop 66 | if crop_size != (320, 320): 67 | ret = cv.resize(ret, dsize=(img_rows, img_cols), interpolation=cv.INTER_CUBIC) 68 | return ret 69 | 70 | 71 | class DataGenSequence(Sequence): 72 | def __init__(self, usage): 73 | self.usage = usage 74 | 75 | filename = '{}_names.txt'.format(usage) 76 | with open(filename, 'r') as f: 77 | self.names = f.read().splitlines() 78 | 79 | np.random.shuffle(self.names) 80 | 81 | def __len__(self): 82 | return int(np.ceil(len(self.names) / float(batch_size))) 83 | 84 | def __getitem__(self, idx): 85 | i = idx * batch_size 86 | 87 | length = min(batch_size, (len(self.names) - i)) 88 | batch_x = np.empty((length, img_rows, img_cols, 3), dtype=np.float32) 89 | batch_y = np.empty((length, img_rows, img_cols), dtype=np.int32) 90 | 91 | for i_batch in range(length): 92 | name = self.names[i] 93 | filename = os.path.join(train_folder, name) 94 | image = cv.imread(filename) 95 | image_size = image.shape[:2] 96 | semantic = get_semantic(name) 97 | 98 | different_sizes = [(320, 320), (480, 480), (480, 480), (480, 480), (640, 640), (640, 640), (640, 640), 99 | (960, 960), (960, 960), (960, 960)] 100 | crop_size = random.choice(different_sizes) 101 | 102 | x, y = random_choice(image_size, crop_size) 103 | image = safe_crop(image, x, y, crop_size) 104 | semantic = safe_crop(semantic, x, y, crop_size) 105 | 106 | if np.random.random_sample() > 0.5: 107 | image = np.fliplr(image) 108 | semantic = np.fliplr(semantic) 109 | 110 | x = image / 255. 111 | y = get_y(semantic) 112 | 113 | batch_x[i_batch, :, :, 0:3] = x 114 | batch_y[i_batch, :, :] = y 115 | 116 | i += 1 117 | 118 | return batch_x, batch_y 119 | 120 | def on_epoch_end(self): 121 | np.random.shuffle(self.names) 122 | 123 | 124 | def train_gen(): 125 | return DataGenSequence('train') 126 | 127 | 128 | def valid_gen(): 129 | return DataGenSequence('valid') 130 | 131 | 132 | def split_data(): 133 | train_folder = 'data/rgb' 134 | names = [f for f in os.listdir(train_folder) if f.endswith('.png')] 135 | num_samples = len(names) # 52903 136 | print('num_samples: ' + str(num_samples)) 137 | 138 | num_train_samples = int(num_samples * 0.8) 139 | print('num_train_samples: ' + str(num_train_samples)) 140 | num_valid_samples = num_samples - num_train_samples 141 | print('num_valid_samples: ' + str(num_valid_samples)) 142 | valid_names = random.sample(names, num_valid_samples) 143 | train_names = [n for n in names if n not in valid_names] 144 | shuffle(valid_names) 145 | shuffle(train_names) 146 | 147 | with open('valid_names.txt', 'w') as file: 148 | file.write('\n'.join(valid_names)) 149 | 150 | with open('train_names.txt', 'w') as file: 151 | file.write('\n'.join(train_names)) 152 | 153 | 154 | if __name__ == '__main__': 155 | split_data() 156 | -------------------------------------------------------------------------------- /data_generator_depth.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from random import shuffle 4 | 5 | import cv2 as cv 6 | import numpy as np 7 | 8 | from config import img_cols 9 | from config import img_rows 10 | 11 | train_folder = 'data/rgb' 12 | depth_folder = 'data/depth' 13 | semantic_folder = 'data/semantic' 14 | 15 | 16 | def get_depth(name): 17 | tokens = name.split('_') 18 | tokens[-1] = 'depth.png' 19 | name = '_'.join(tokens) 20 | filename = os.path.join(depth_folder, name) 21 | label = cv.imread(filename, 0) 22 | return label 23 | 24 | 25 | def random_choice(image_size, crop_size): 26 | height, width = image_size 27 | crop_height, crop_width = crop_size 28 | x = random.randint(0, width - crop_width) 29 | y = random.randint(0, height - crop_height) 30 | return x, y 31 | 32 | 33 | def safe_crop(mat, x, y, crop_size): 34 | crop_height, crop_width = crop_size 35 | if len(mat.shape) == 2: 36 | ret = np.zeros((crop_height, crop_width), np.float32) 37 | else: 38 | ret = np.zeros((crop_height, crop_width, 3), np.float32) 39 | crop = mat[y:y + crop_height, x:x + crop_width] 40 | h, w = crop.shape[:2] 41 | ret[0:h, 0:w] = crop 42 | if crop_size != (320, 320): 43 | ret = cv.resize(ret, dsize=(img_rows, img_cols), interpolation=cv.INTER_CUBIC) 44 | return ret 45 | 46 | 47 | def data_gen(usage, batch_size): 48 | filename = '{}_names.txt'.format(usage) 49 | with open(filename, 'r') as f: 50 | names = f.read().splitlines() 51 | i = 0 52 | np.random.shuffle(names) 53 | while True: 54 | batch_x = np.empty((batch_size, img_rows, img_cols, 3), dtype=np.float32) 55 | batch_y = np.empty((batch_size, img_rows, img_cols, 1), dtype=np.float32) 56 | 57 | for i_batch in range(batch_size): 58 | name = names[i] 59 | filename = os.path.join(train_folder, name) 60 | image = cv.imread(filename) 61 | image_size = image.shape[:2] 62 | depth = get_depth(name) 63 | 64 | different_sizes = [(320, 320), (480, 480), (640, 640)] 65 | crop_size = random.choice(different_sizes) 66 | 67 | x, y = random_choice(image_size, crop_size) 68 | image = safe_crop(image, x, y, crop_size) 69 | depth = safe_crop(depth, x, y, crop_size) 70 | 71 | if np.random.random_sample() > 0.5: 72 | image = np.fliplr(image) 73 | depth = np.fliplr(depth) 74 | 75 | batch_x[i_batch, :, :, 0:3] = image / 255. 76 | batch_y[i_batch, :, :, 0] = depth / 255. 77 | 78 | i += 1 79 | if i >= len(names): 80 | i = 0 81 | np.random.shuffle(names) 82 | 83 | yield batch_x, batch_y 84 | 85 | 86 | def train_gen(batch_size): 87 | return data_gen('train', batch_size) 88 | 89 | 90 | def valid_gen(batch_size): 91 | return data_gen('valid', batch_size) 92 | 93 | 94 | def split_data(): 95 | train_folder = 'data/rgb' 96 | names = [f for f in os.listdir(train_folder) if f.endswith('.png')] 97 | num_samples = len(names) 98 | print('num_samples: ' + str(num_samples)) 99 | num_train_samples = int(num_samples * 0.8) 100 | print('num_train_samples: ' + str(num_train_samples)) 101 | num_valid_samples = num_samples - num_train_samples 102 | print('num_valid_samples: ' + str(num_valid_samples)) 103 | valid_names = random.sample(names, num_valid_samples) 104 | train_names = [n for n in names if n not in valid_names] 105 | shuffle(valid_names) 106 | shuffle(train_names) 107 | 108 | with open('valid_names.txt', 'w') as file: 109 | file.write('\n'.join(valid_names)) 110 | 111 | with open('train_names.txt', 'w') as file: 112 | file.write('\n'.join(train_names)) 113 | 114 | 115 | if __name__ == '__main__': 116 | split_data() 117 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # import the necessary packages 2 | import os 3 | import random 4 | 5 | import cv2 as cv 6 | import keras.backend as K 7 | import numpy as np 8 | 9 | from config import num_classes 10 | from data_generator import random_choice, safe_crop, to_bgr 11 | from model import build_encoder_decoder 12 | from utils import draw_str 13 | 14 | 15 | def get_semantic(name): 16 | label_test_path = 'data/semantic_test/' 17 | tokens = name.split('_') 18 | tokens[-1] = 'semantic_pretty.png' 19 | name = '_'.join(tokens) 20 | filename = os.path.join(label_test_path, name) 21 | label = cv.imread(filename) 22 | return label 23 | 24 | 25 | if __name__ == '__main__': 26 | img_rows, img_cols = 320, 320 27 | channel = 3 28 | 29 | model_weights_path = 'models/model.64-2.1187.hdf5' 30 | model = build_encoder_decoder() 31 | model.load_weights(model_weights_path) 32 | 33 | print(model.summary()) 34 | 35 | rgb_test_path = 'data/rgb_test/' 36 | label_test_path = 'data/semantic_test/' 37 | test_images = [f for f in os.listdir(rgb_test_path) if 38 | os.path.isfile(os.path.join(rgb_test_path, f)) and f.endswith('.png')] 39 | 40 | samples = random.sample(test_images, 10) 41 | 42 | for i in range(len(samples)): 43 | image_name = samples[i] 44 | filename = os.path.join(rgb_test_path, image_name) 45 | image = cv.imread(filename) 46 | label = get_semantic(image_name) 47 | image_size = image.shape[:2] 48 | different_sizes = [(320, 320), (480, 480), (480, 480), (480, 480), (640, 640), (640, 640), (640, 640), 49 | (960, 960), (960, 960), (960, 960)] 50 | crop_size = random.choice(different_sizes) 51 | 52 | x, y = random_choice(image_size, crop_size) 53 | image = safe_crop(image, x, y, crop_size) 54 | label = safe_crop(label, x, y, crop_size) 55 | print('Start processing image: {}'.format(filename)) 56 | 57 | x_test = np.empty((1, img_rows, img_cols, 3), dtype=np.float32) 58 | x_test[0, :, :, 0:3] = image / 255. 59 | 60 | out = model.predict(x_test) 61 | out = np.reshape(out, (img_rows, img_cols, num_classes)) 62 | out = np.argmax(out, axis=2) 63 | out = to_bgr(out) 64 | 65 | str_msg = 'crop_size: %s' % (str(crop_size)) 66 | draw_str(out, (20, 20), str_msg) 67 | 68 | if not os.path.exists('images'): 69 | os.makedirs('images') 70 | 71 | cv.imwrite('images/{}_image.png'.format(i), image) 72 | cv.imwrite('images/{}_out.png'.format(i), out) 73 | cv.imwrite('images/{}_label.png'.format(i), label) 74 | 75 | K.clear_session() 76 | -------------------------------------------------------------------------------- /demo_depth.py: -------------------------------------------------------------------------------- 1 | # import the necessary packages 2 | import os 3 | import random 4 | 5 | import cv2 as cv 6 | import keras.backend as K 7 | import numpy as np 8 | 9 | from data_generator_depth import random_choice, safe_crop 10 | from depth_model import build_encoder_decoder 11 | 12 | 13 | def get_depth(name): 14 | label_test_path = 'data/depth_test/' 15 | tokens = name.split('_') 16 | tokens[-1] = 'depth.png' 17 | name = '_'.join(tokens) 18 | filename = os.path.join(label_test_path, name) 19 | label = cv.imread(filename, 0) 20 | return label 21 | 22 | 23 | if __name__ == '__main__': 24 | img_rows, img_cols = 320, 320 25 | channel = 3 26 | 27 | model_weights_path = 'models/depth_model.32-0.0085.hdf5' 28 | model = build_encoder_decoder() 29 | model.load_weights(model_weights_path) 30 | 31 | print(model.summary()) 32 | 33 | rgb_test_path = 'data/rgb_test/' 34 | label_test_path = 'data/depth_test/' 35 | test_images = [f for f in os.listdir(rgb_test_path) if 36 | os.path.isfile(os.path.join(rgb_test_path, f)) and f.endswith('.png')] 37 | 38 | samples = random.sample(test_images, 10) 39 | 40 | for i in range(len(samples)): 41 | image_name = samples[i] 42 | filename = os.path.join(rgb_test_path, image_name) 43 | image = cv.imread(filename) 44 | label = get_depth(image_name) 45 | image_size = image.shape[:2] 46 | different_sizes = [(320, 320), (480, 480), (640, 640)] 47 | crop_size = random.choice(different_sizes) 48 | 49 | x, y = random_choice(image_size, crop_size) 50 | image = safe_crop(image, x, y, crop_size) 51 | label = safe_crop(label, x, y, crop_size) 52 | print('Start processing image: {}'.format(filename)) 53 | 54 | x_test = np.empty((1, img_rows, img_cols, 3), dtype=np.float32) 55 | x_test[0, :, :, 0:3] = image / 255. 56 | 57 | out = model.predict(x_test) 58 | # print(out.shape) 59 | 60 | out = np.reshape(out, (img_rows, img_cols)) 61 | out = out * 255.0 62 | out = out.astype(np.uint8) 63 | 64 | if not os.path.exists('images'): 65 | os.makedirs('images') 66 | 67 | cv.imwrite('images/{}_depth_image.png'.format(i), image) 68 | cv.imwrite('images/{}_depth_out.png'.format(i), out) 69 | cv.imwrite('images/{}_depth_label.png'.format(i), label) 70 | 71 | K.clear_session() 72 | -------------------------------------------------------------------------------- /depth_model.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.layers import Input, Conv2D, UpSampling2D, BatchNormalization, ZeroPadding2D, MaxPooling2D, Concatenate, Lambda 3 | from keras.models import Model 4 | from keras.utils import plot_model 5 | 6 | from custom_layers.unpooling_layer import Unpooling 7 | 8 | 9 | def build_encoder_decoder(): 10 | kernel = 3 11 | 12 | # Encoder 13 | input_tensor = Input(shape=(320, 320, 3)) 14 | x = ZeroPadding2D((1, 1))(input_tensor) 15 | x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_1')(x) 16 | x = ZeroPadding2D((1, 1))(x) 17 | x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_2')(x) 18 | orig_1 = x 19 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 20 | 21 | x = ZeroPadding2D((1, 1))(x) 22 | x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_1')(x) 23 | x = ZeroPadding2D((1, 1))(x) 24 | x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_2')(x) 25 | orig_2 = x 26 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 27 | 28 | x = ZeroPadding2D((1, 1))(x) 29 | x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_1')(x) 30 | x = ZeroPadding2D((1, 1))(x) 31 | x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_2')(x) 32 | x = ZeroPadding2D((1, 1))(x) 33 | x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_3')(x) 34 | orig_3 = x 35 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 36 | 37 | x = ZeroPadding2D((1, 1))(x) 38 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_1')(x) 39 | x = ZeroPadding2D((1, 1))(x) 40 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_2')(x) 41 | x = ZeroPadding2D((1, 1))(x) 42 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_3')(x) 43 | orig_4 = x 44 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 45 | 46 | x = ZeroPadding2D((1, 1))(x) 47 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_1')(x) 48 | x = ZeroPadding2D((1, 1))(x) 49 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_2')(x) 50 | x = ZeroPadding2D((1, 1))(x) 51 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_3')(x) 52 | orig_5 = x 53 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 54 | 55 | # Decoder 56 | x = UpSampling2D(size=(2, 2))(x) 57 | x = Unpooling(orig_5, (20, 20, 512))(x) 58 | x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_1', kernel_initializer='he_normal', 59 | bias_initializer='zeros')(x) 60 | x = BatchNormalization()(x) 61 | x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_2', kernel_initializer='he_normal', 62 | bias_initializer='zeros')(x) 63 | x = BatchNormalization()(x) 64 | x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_3', kernel_initializer='he_normal', 65 | bias_initializer='zeros')(x) 66 | x = BatchNormalization()(x) 67 | 68 | x = UpSampling2D(size=(2, 2))(x) 69 | x = Unpooling(orig_4, (40, 40, 512))(x) 70 | x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_1', kernel_initializer='he_normal', 71 | bias_initializer='zeros')(x) 72 | x = BatchNormalization()(x) 73 | x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_2', kernel_initializer='he_normal', 74 | bias_initializer='zeros')(x) 75 | x = BatchNormalization()(x) 76 | x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_3', kernel_initializer='he_normal', 77 | bias_initializer='zeros')(x) 78 | x = BatchNormalization()(x) 79 | 80 | x = UpSampling2D(size=(2, 2))(x) 81 | x = Unpooling(orig_3, (80, 80, 256))(x) 82 | x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_1', kernel_initializer='he_normal', 83 | bias_initializer='zeros')(x) 84 | x = BatchNormalization()(x) 85 | x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_2', kernel_initializer='he_normal', 86 | bias_initializer='zeros')(x) 87 | x = BatchNormalization()(x) 88 | x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_3', kernel_initializer='he_normal', 89 | bias_initializer='zeros')(x) 90 | x = BatchNormalization()(x) 91 | 92 | x = UpSampling2D(size=(2, 2))(x) 93 | x = Unpooling(orig_2, (160, 160, 128))(x) 94 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_1', kernel_initializer='he_normal', 95 | bias_initializer='zeros')(x) 96 | x = BatchNormalization()(x) 97 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_2', kernel_initializer='he_normal', 98 | bias_initializer='zeros')(x) 99 | x = BatchNormalization()(x) 100 | 101 | x = UpSampling2D(size=(2, 2))(x) 102 | x = Unpooling(orig_1, (320, 320, 64))(x) 103 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_1', kernel_initializer='he_normal', 104 | bias_initializer='zeros')(x) 105 | x = BatchNormalization()(x) 106 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_2', kernel_initializer='he_normal', 107 | bias_initializer='zeros')(x) 108 | x = BatchNormalization()(x) 109 | 110 | x = Conv2D(1, (1, 1), activation='sigmoid', padding='valid', name='pred', kernel_initializer='he_normal', 111 | bias_initializer='zeros')(x) 112 | 113 | model = Model(inputs=input_tensor, outputs=x) 114 | return model 115 | 116 | 117 | if __name__ == '__main__': 118 | encoder_decoder = build_encoder_decoder() 119 | # input_layer = model.get_layer('input') 120 | print(encoder_decoder.summary()) 121 | plot_model(encoder_decoder, to_file='encoder_decoder.svg', show_layer_names=True, show_shapes=True) 122 | 123 | K.clear_session() 124 | -------------------------------------------------------------------------------- /images/0_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/0_image.png -------------------------------------------------------------------------------- /images/0_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/0_label.png -------------------------------------------------------------------------------- /images/0_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/0_out.png -------------------------------------------------------------------------------- /images/1_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/1_image.png -------------------------------------------------------------------------------- /images/1_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/1_label.png -------------------------------------------------------------------------------- /images/1_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/1_out.png -------------------------------------------------------------------------------- /images/2_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/2_image.png -------------------------------------------------------------------------------- /images/2_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/2_label.png -------------------------------------------------------------------------------- /images/2_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/2_out.png -------------------------------------------------------------------------------- /images/3_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/3_image.png -------------------------------------------------------------------------------- /images/3_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/3_label.png -------------------------------------------------------------------------------- /images/3_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/3_out.png -------------------------------------------------------------------------------- /images/4_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/4_image.png -------------------------------------------------------------------------------- /images/4_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/4_label.png -------------------------------------------------------------------------------- /images/4_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/4_out.png -------------------------------------------------------------------------------- /images/5_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/5_image.png -------------------------------------------------------------------------------- /images/5_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/5_label.png -------------------------------------------------------------------------------- /images/5_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/5_out.png -------------------------------------------------------------------------------- /images/6_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/6_image.png -------------------------------------------------------------------------------- /images/6_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/6_label.png -------------------------------------------------------------------------------- /images/6_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/6_out.png -------------------------------------------------------------------------------- /images/7_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/7_image.png -------------------------------------------------------------------------------- /images/7_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/7_label.png -------------------------------------------------------------------------------- /images/7_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/7_out.png -------------------------------------------------------------------------------- /images/8_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/8_image.png -------------------------------------------------------------------------------- /images/8_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/8_label.png -------------------------------------------------------------------------------- /images/8_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/8_out.png -------------------------------------------------------------------------------- /images/9_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/9_image.png -------------------------------------------------------------------------------- /images/9_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/9_label.png -------------------------------------------------------------------------------- /images/9_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/9_out.png -------------------------------------------------------------------------------- /images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/dataset.png -------------------------------------------------------------------------------- /images/legend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/legend.png -------------------------------------------------------------------------------- /images/segnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/segnet.png -------------------------------------------------------------------------------- /migrate.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | import numpy as np 3 | 4 | from depth_model import build_encoder_decoder 5 | from vgg16 import vgg16_model 6 | 7 | 8 | def migrate_model(new_model): 9 | old_model = vgg16_model(224, 224, 3) 10 | # print(old_model.summary()) 11 | old_layers = [l for l in old_model.layers] 12 | new_layers = [l for l in new_model.layers] 13 | 14 | old_conv1_1 = old_model.get_layer('conv1_1') 15 | old_weights = old_conv1_1.get_weights()[0] 16 | old_biases = old_conv1_1.get_weights()[1] 17 | new_weights = np.zeros((3, 3, 3, 64), dtype=np.float32) 18 | new_weights[:, :, 0:3, :] = old_weights 19 | new_conv1_1 = new_model.get_layer('conv1_1') 20 | new_conv1_1.set_weights([new_weights, old_biases]) 21 | 22 | for i in range(2, 31): 23 | old_layer = old_layers[i] 24 | new_layer = new_layers[i + 1] 25 | new_layer.set_weights(old_layer.get_weights()) 26 | 27 | del old_model 28 | 29 | 30 | if __name__ == '__main__': 31 | model = build_encoder_decoder() 32 | migrate_model(model) 33 | print(model.summary()) 34 | model.save_weights('models/model_weights.h5') 35 | 36 | K.clear_session() 37 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.layers import Input, Conv2D, UpSampling2D, BatchNormalization, ZeroPadding2D, MaxPooling2D, Concatenate, Lambda, Reshape 3 | from keras.models import Model 4 | from keras.utils import plot_model 5 | 6 | from custom_layers.unpooling_layer import Unpooling 7 | 8 | 9 | def build_encoder_decoder(): 10 | num_labels = 14 11 | kernel = 3 12 | 13 | # Encoder 14 | input_tensor = Input(shape=(320, 320, 3)) 15 | x = ZeroPadding2D((1, 1))(input_tensor) 16 | x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_1')(x) 17 | x = ZeroPadding2D((1, 1))(x) 18 | x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_2')(x) 19 | orig_1 = x 20 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 21 | 22 | x = ZeroPadding2D((1, 1))(x) 23 | x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_1')(x) 24 | x = ZeroPadding2D((1, 1))(x) 25 | x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_2')(x) 26 | orig_2 = x 27 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 28 | 29 | x = ZeroPadding2D((1, 1))(x) 30 | x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_1')(x) 31 | x = ZeroPadding2D((1, 1))(x) 32 | x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_2')(x) 33 | x = ZeroPadding2D((1, 1))(x) 34 | x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_3')(x) 35 | orig_3 = x 36 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 37 | 38 | x = ZeroPadding2D((1, 1))(x) 39 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_1')(x) 40 | x = ZeroPadding2D((1, 1))(x) 41 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_2')(x) 42 | x = ZeroPadding2D((1, 1))(x) 43 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_3')(x) 44 | orig_4 = x 45 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 46 | 47 | x = ZeroPadding2D((1, 1))(x) 48 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_1')(x) 49 | x = ZeroPadding2D((1, 1))(x) 50 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_2')(x) 51 | x = ZeroPadding2D((1, 1))(x) 52 | x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_3')(x) 53 | orig_5 = x 54 | x = MaxPooling2D((2, 2), strides=(2, 2))(x) 55 | 56 | # Decoder 57 | x = UpSampling2D(size=(2, 2))(x) 58 | the_shape = K.int_shape(orig_5) 59 | shape = (1, the_shape[1], the_shape[2], the_shape[3]) 60 | origReshaped = Reshape(shape)(orig_5) 61 | xReshaped = Reshape(shape)(x) 62 | together = Concatenate(axis=1)([origReshaped, xReshaped]) 63 | x = Unpooling()(together) 64 | x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_1', kernel_initializer='he_normal', 65 | bias_initializer='zeros')(x) 66 | x = BatchNormalization()(x) 67 | x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_2', kernel_initializer='he_normal', 68 | bias_initializer='zeros')(x) 69 | x = BatchNormalization()(x) 70 | x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_3', kernel_initializer='he_normal', 71 | bias_initializer='zeros')(x) 72 | x = BatchNormalization()(x) 73 | 74 | x = UpSampling2D(size=(2, 2))(x) 75 | the_shape = K.int_shape(orig_4) 76 | shape = (1, the_shape[1], the_shape[2], the_shape[3]) 77 | origReshaped = Reshape(shape)(orig_4) 78 | xReshaped = Reshape(shape)(x) 79 | together = Concatenate(axis=1)([origReshaped, xReshaped]) 80 | x = Unpooling()(together) 81 | x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_1', kernel_initializer='he_normal', 82 | bias_initializer='zeros')(x) 83 | x = BatchNormalization()(x) 84 | x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_2', kernel_initializer='he_normal', 85 | bias_initializer='zeros')(x) 86 | x = BatchNormalization()(x) 87 | x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_3', kernel_initializer='he_normal', 88 | bias_initializer='zeros')(x) 89 | x = BatchNormalization()(x) 90 | 91 | x = UpSampling2D(size=(2, 2))(x) 92 | the_shape = K.int_shape(orig_3) 93 | shape = (1, the_shape[1], the_shape[2], the_shape[3]) 94 | origReshaped = Reshape(shape)(orig_3) 95 | xReshaped = Reshape(shape)(x) 96 | together = Concatenate(axis=1)([origReshaped, xReshaped]) 97 | x = Unpooling()(together) 98 | x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_1', kernel_initializer='he_normal', 99 | bias_initializer='zeros')(x) 100 | x = BatchNormalization()(x) 101 | x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_2', kernel_initializer='he_normal', 102 | bias_initializer='zeros')(x) 103 | x = BatchNormalization()(x) 104 | x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_3', kernel_initializer='he_normal', 105 | bias_initializer='zeros')(x) 106 | x = BatchNormalization()(x) 107 | 108 | x = UpSampling2D(size=(2, 2))(x) 109 | the_shape = K.int_shape(orig_2) 110 | shape = (1, the_shape[1], the_shape[2], the_shape[3]) 111 | origReshaped = Reshape(shape)(orig_2) 112 | xReshaped = Reshape(shape)(x) 113 | together = Concatenate(axis=1)([origReshaped, xReshaped]) 114 | x = Unpooling()(together) 115 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_1', kernel_initializer='he_normal', 116 | bias_initializer='zeros')(x) 117 | x = BatchNormalization()(x) 118 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_2', kernel_initializer='he_normal', 119 | bias_initializer='zeros')(x) 120 | x = BatchNormalization()(x) 121 | 122 | x = UpSampling2D(size=(2, 2))(x) 123 | the_shape = K.int_shape(orig_1) 124 | shape = (1, the_shape[1], the_shape[2], the_shape[3]) 125 | origReshaped = Reshape(shape)(orig_1) 126 | xReshaped = Reshape(shape)(x) 127 | together = Concatenate(axis=1)([origReshaped, xReshaped]) 128 | x = Unpooling()(together) 129 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_1', kernel_initializer='he_normal', 130 | bias_initializer='zeros')(x) 131 | x = BatchNormalization()(x) 132 | x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_2', kernel_initializer='he_normal', 133 | bias_initializer='zeros')(x) 134 | x = BatchNormalization()(x) 135 | 136 | x = Conv2D(num_labels, (1, 1), activation='softmax', padding='valid', name='pred', kernel_initializer='he_normal', 137 | bias_initializer='zeros')(x) 138 | 139 | model = Model(inputs=input_tensor, outputs=x) 140 | return model 141 | 142 | 143 | if __name__ == '__main__': 144 | encoder_decoder = build_encoder_decoder() 145 | # input_layer = model.get_layer('input') 146 | print(encoder_decoder.summary()) 147 | plot_model(encoder_decoder, to_file='encoder_decoder.svg', show_layer_names=True, show_shapes=True) 148 | 149 | K.clear_session() 150 | -------------------------------------------------------------------------------- /pre-process.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import shutil 5 | import tarfile 6 | 7 | if __name__ == '__main__': 8 | 9 | if not os.path.exists('data/rgb'): 10 | os.makedirs('data/rgb') 11 | area_nos = [1, 2, 3, 4, 6] 12 | for area_no in area_nos: 13 | tar_file = 'area_{}_no_xyz.tar'.format(area_no) 14 | filename = os.path.join('data', tar_file) 15 | print('Extracting {}...'.format(filename)) 16 | 17 | with tarfile.open(filename) as tar: 18 | tar.extractall() 19 | 20 | folder = 'area_{}'.format(area_no) 21 | 22 | for f in [f for f in os.listdir(os.path.join(folder, 'data/rgb')) if f.endswith('.png')]: 23 | src_path = os.path.join(folder, 'data/rgb') 24 | src_path = os.path.join(src_path, f) 25 | dst_path = 'data/rgb/' 26 | shutil.move(src_path, dst_path) 27 | 28 | if not os.path.exists('data/depth'): 29 | os.makedirs('data/depth') 30 | for f in [f for f in os.listdir(os.path.join(folder, 'data/depth')) if f.endswith('.png')]: 31 | src_path = os.path.join(folder, 'data/depth') 32 | src_path = os.path.join(src_path, f) 33 | dst_path = 'data/depth/' 34 | shutil.move(src_path, dst_path) 35 | 36 | if not os.path.exists('data/semantic'): 37 | os.makedirs('data/semantic') 38 | for f in [f for f in os.listdir(os.path.join(folder, 'data/semantic_pretty')) if f.endswith('.png')]: 39 | src_path = os.path.join(folder, 'data/semantic_pretty') 40 | src_path = os.path.join(src_path, f) 41 | dst_path = 'data/semantic/' 42 | shutil.move(src_path, dst_path) 43 | 44 | shutil.rmtree(folder) 45 | 46 | if not os.path.exists('data/rgb_test'): 47 | os.makedirs('data/rgb_test') 48 | area_nos = ['5a', '5b'] 49 | for area_no in area_nos: 50 | tar_file = 'area_{}_no_xyz.tar'.format(area_no) 51 | filename = os.path.join('data', tar_file) 52 | print('Extracting {}...'.format(filename)) 53 | 54 | with tarfile.open(filename) as tar: 55 | tar.extractall() 56 | 57 | folder = 'area_{}'.format(area_no) 58 | if not os.path.exists('data/rgb_test'): 59 | os.makedirs('data/rgb_test') 60 | for f in [f for f in os.listdir(os.path.join(folder, 'data/rgb')) if f.endswith('.png')]: 61 | src_path = os.path.join(folder, 'data/rgb') 62 | src_path = os.path.join(src_path, f) 63 | dst_path = 'data/rgb_test/' 64 | shutil.move(src_path, dst_path) 65 | 66 | if not os.path.exists('data/depth_test'): 67 | os.makedirs('data/depth_test') 68 | for f in [f for f in os.listdir(os.path.join(folder, 'data/depth')) if f.endswith('.png')]: 69 | src_path = os.path.join(folder, 'data/depth') 70 | src_path = os.path.join(src_path, f) 71 | dst_path = 'data/depth_test/' 72 | shutil.move(src_path, dst_path) 73 | 74 | if not os.path.exists('data/semantic_test'): 75 | os.makedirs('data/semantic_test') 76 | for f in [f for f in os.listdir(os.path.join(folder, 'data/semantic_pretty')) if f.endswith('.png')]: 77 | src_path = os.path.join(folder, 'data/semantic_pretty') 78 | src_path = os.path.join(src_path, f) 79 | dst_path = 'data/semantic_test/' 80 | shutil.move(src_path, dst_path) 81 | 82 | image_names = [f for f in os.listdir('data/rgb') if f.endswith('.png')] 83 | print('{} images'.format(len(image_names))) 84 | depth_names = [f for f in os.listdir('data/depth') if f.endswith('.png')] 85 | print('{} depths'.format(len(depth_names))) 86 | semantic_names = [f for f in os.listdir('data/semantic') if f.endswith('.png')] 87 | print('{} semantics'.format(len(semantic_names))) 88 | 89 | image_names = [f for f in os.listdir('data/rgb_test') if f.endswith('.png')] 90 | print('{} test images'.format(len(image_names))) 91 | depth_names = [f for f in os.listdir('data/depth_test') if f.endswith('.png')] 92 | print('{} test depths'.format(len(depth_names))) 93 | semantic_names = [f for f in os.listdir('data/semantic_test') if f.endswith('.png')] 94 | print('{} test semantics'.format(len(semantic_names))) 95 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import keras 4 | import tensorflow as tf 5 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau 6 | from keras.utils import multi_gpu_model 7 | 8 | import migrate 9 | from config import patience, epochs, num_train_samples, num_valid_samples, batch_size 10 | from data_generator import train_gen, valid_gen 11 | from model import build_encoder_decoder 12 | from utils import get_available_gpus, get_available_cpus, sparse_cross_entropy 13 | 14 | if __name__ == '__main__': 15 | # Parse arguments 16 | ap = argparse.ArgumentParser() 17 | ap.add_argument("-p", "--pretrained", help="path to save pretrained model files") 18 | args = vars(ap.parse_args()) 19 | pretrained_path = args["pretrained"] 20 | checkpoint_models_path = 'models/' 21 | 22 | # Callbacks 23 | tensor_board = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True) 24 | model_names = checkpoint_models_path + 'model.{epoch:02d}-{val_loss:.4f}.hdf5' 25 | model_checkpoint = ModelCheckpoint(model_names, monitor='val_loss', verbose=1, save_best_only=True) 26 | early_stop = EarlyStopping('val_loss', patience=patience) 27 | reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, patience=int(patience / 4), verbose=1) 28 | 29 | 30 | class MyCbk(keras.callbacks.Callback): 31 | def __init__(self, model): 32 | keras.callbacks.Callback.__init__(self) 33 | self.model_to_save = model 34 | 35 | def on_epoch_end(self, epoch, logs=None): 36 | fmt = checkpoint_models_path + 'model.%02d-%.4f.hdf5' 37 | self.model_to_save.save(fmt % (epoch, logs['val_loss'])) 38 | 39 | 40 | # Load our model, added support for Multi-GPUs 41 | num_gpu = len(get_available_gpus()) 42 | if num_gpu >= 2: 43 | with tf.device("/cpu:0"): 44 | if pretrained_path is not None: 45 | model = build_encoder_decoder() 46 | model.load_weights(pretrained_path) 47 | else: 48 | model = build_encoder_decoder() 49 | migrate.migrate_model(model) 50 | 51 | new_model = multi_gpu_model(model, gpus=num_gpu) 52 | # rewrite the callback: saving through the original model and not the multi-gpu model. 53 | model_checkpoint = MyCbk(model) 54 | else: 55 | if pretrained_path is not None: 56 | new_model = build_encoder_decoder() 57 | new_model.load_weights(pretrained_path) 58 | else: 59 | new_model = build_encoder_decoder() 60 | migrate.migrate_model(new_model) 61 | 62 | # sgd = keras.optimizers.SGD(lr=0.001, momentum=0.9, decay=0.0005, nesterov=True) 63 | decoder_target = tf.placeholder(dtype='int32', shape=(None, None, None)) 64 | new_model.compile(optimizer='nadam', loss=sparse_cross_entropy, target_tensors=[decoder_target]) 65 | 66 | print(new_model.summary()) 67 | 68 | # Final callbacks 69 | callbacks = [tensor_board, model_checkpoint, early_stop, reduce_lr] 70 | 71 | # Start Fine-tuning 72 | new_model.fit_generator(train_gen(), 73 | steps_per_epoch=num_train_samples // batch_size, 74 | validation_data=valid_gen(), 75 | validation_steps=num_valid_samples // batch_size, 76 | epochs=epochs, 77 | verbose=1, 78 | callbacks=callbacks, 79 | use_multiprocessing=True, 80 | workers=int(get_available_cpus() * 0.80) 81 | ) 82 | -------------------------------------------------------------------------------- /train_depth.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import keras 4 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau 5 | 6 | import migrate 7 | from config import patience, epochs, num_train_samples, num_valid_samples 8 | from data_generator_depth import train_gen, valid_gen 9 | from depth_model import build_encoder_decoder 10 | from utils import depth_loss 11 | 12 | if __name__ == '__main__': 13 | # Parse arguments 14 | ap = argparse.ArgumentParser() 15 | ap.add_argument("-p", "--pretrained", help="path to save pretrained model files") 16 | args = vars(ap.parse_args()) 17 | pretrained_path = args["pretrained"] 18 | 19 | checkpoint_models_path = 'models/' 20 | 21 | # Callbacks 22 | tensor_board = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True) 23 | model_names = checkpoint_models_path + 'depth_model.{epoch:02d}-{val_loss:.4f}.hdf5' 24 | model_checkpoint = ModelCheckpoint(model_names, monitor='val_loss', verbose=1, save_best_only=True) 25 | early_stop = EarlyStopping('val_loss', patience=patience) 26 | reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, patience=int(patience / 4), verbose=1) 27 | 28 | if pretrained_path is not None: 29 | model = build_encoder_decoder() 30 | model.load_weights(pretrained_path) 31 | else: 32 | model = build_encoder_decoder() 33 | migrate.migrate_model(model) 34 | 35 | model.compile(optimizer='nadam', loss=depth_loss) 36 | 37 | print(model.summary()) 38 | 39 | # Final callbacks 40 | callbacks = [tensor_board, model_checkpoint, early_stop, reduce_lr] 41 | 42 | batch_size = 14 43 | 44 | # Start Fine-tuning 45 | model.fit_generator(train_gen(batch_size), 46 | steps_per_epoch=num_train_samples // batch_size, 47 | validation_data=valid_gen(batch_size), 48 | validation_steps=num_valid_samples // batch_size, 49 | epochs=epochs, 50 | verbose=1, 51 | callbacks=callbacks, 52 | use_multiprocessing=True, 53 | workers=4 54 | ) 55 | -------------------------------------------------------------------------------- /unit_tests.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | import cv2 as cv 4 | from data_generator_depth import train_gen 5 | 6 | 7 | class TestStringMethods(unittest.TestCase): 8 | 9 | def test_data_generator(self): 10 | iter = train_gen() 11 | batch_x, batch_y = next(iter) 12 | for i in range(len(batch_x)): 13 | x = batch_x[i] 14 | y = batch_y[i] 15 | x = (x * 255.).astype(np.uint8) 16 | y = (y * 255.).astype(np.uint8) 17 | cv.imwrite('temp/test_data_generator_x_{}.png'.format(i), x) 18 | cv.imwrite('temp/test_data_generator_y_{}.png'.format(i), y) 19 | 20 | 21 | if __name__ == '__main__': 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | 3 | import cv2 as cv 4 | import tensorflow as tf 5 | from tensorflow.python.client import device_lib 6 | 7 | 8 | def sparse_cross_entropy(y_true, y_pred): 9 | """ 10 | Calculate the cross-entropy loss between y_true and y_pred. 11 | 12 | y_true is a 3-rank tensor with the desired output. 13 | The shape is [batch_size, img_rows, img_cols]. 14 | 15 | y_pred is the decoder's output which is a 4-rank tensor 16 | with shape [batch_size, img_rows, img_cols, num_labels] 17 | so that for each image in the batch there is a one-hot 18 | encoded array of length num_labels. 19 | """ 20 | 21 | # Calculate the loss. This outputs a 22 | # 3-rank tensor of shape [batch_size, img_rows, img_cols] 23 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, 24 | logits=y_pred) 25 | 26 | # Keras may reduce this across the first axis (the batch) 27 | # but the semantics are unclear, so to be sure we use 28 | # the loss across the entire 3-rank tensor, we reduce it 29 | # to a single scalar with the mean function. 30 | loss_mean = tf.reduce_mean(loss) 31 | 32 | return loss_mean 33 | 34 | 35 | # getting the number of GPUs 36 | def get_available_gpus(): 37 | local_device_protos = device_lib.list_local_devices() 38 | return [x.name for x in local_device_protos if x.device_type == 'GPU'] 39 | 40 | 41 | # getting the number of CPUs 42 | def get_available_cpus(): 43 | return multiprocessing.cpu_count() 44 | 45 | 46 | def draw_str(dst, target, s): 47 | x, y = target 48 | cv.putText(dst, s, (x + 1, y + 1), cv.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness=2, lineType=cv.LINE_AA) 49 | cv.putText(dst, s, (x, y), cv.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv.LINE_AA) 50 | -------------------------------------------------------------------------------- /vgg16.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import keras.backend as K 4 | from keras.layers import Conv2D, ZeroPadding2D, MaxPooling2D 5 | from keras.layers import Dense, Dropout, Flatten 6 | from keras.models import Sequential 7 | 8 | 9 | def vgg16_model(img_rows, img_cols, channel=3): 10 | model = Sequential() 11 | # Encoder 12 | model.add(ZeroPadding2D((1, 1), input_shape=(img_rows, img_cols, channel), name='input')) 13 | model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_1')) 14 | model.add(ZeroPadding2D((1, 1))) 15 | model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_2')) 16 | model.add(MaxPooling2D((2, 2), strides=(2, 2))) 17 | 18 | model.add(ZeroPadding2D((1, 1))) 19 | model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_1')) 20 | model.add(ZeroPadding2D((1, 1))) 21 | model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_2')) 22 | model.add(MaxPooling2D((2, 2), strides=(2, 2))) 23 | 24 | model.add(ZeroPadding2D((1, 1))) 25 | model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_1')) 26 | model.add(ZeroPadding2D((1, 1))) 27 | model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_2')) 28 | model.add(ZeroPadding2D((1, 1))) 29 | model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_3')) 30 | model.add(MaxPooling2D((2, 2), strides=(2, 2))) 31 | 32 | model.add(ZeroPadding2D((1, 1))) 33 | model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_1')) 34 | model.add(ZeroPadding2D((1, 1))) 35 | model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_2')) 36 | model.add(ZeroPadding2D((1, 1))) 37 | model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_3')) 38 | model.add(MaxPooling2D((2, 2), strides=(2, 2))) 39 | 40 | model.add(ZeroPadding2D((1, 1))) 41 | model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_1')) 42 | model.add(ZeroPadding2D((1, 1))) 43 | model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_2')) 44 | model.add(ZeroPadding2D((1, 1))) 45 | model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_3')) 46 | model.add(MaxPooling2D((2, 2), strides=(2, 2))) 47 | 48 | # Add Fully Connected Layer 49 | model.add(Flatten(name='flatten')) 50 | model.add(Dense(4096, activation='relu', name='dense1')) 51 | model.add(Dropout(0.5)) 52 | model.add(Dense(4096, activation='relu', name='dense2')) 53 | model.add(Dropout(0.5)) 54 | model.add(Dense(1000, activation='softmax', name='softmax')) 55 | 56 | # Loads ImageNet pre-trained data 57 | weights_path = 'models/vgg16_weights_tf_dim_ordering_tf_kernels.h5' 58 | model.load_weights(weights_path) 59 | 60 | return model 61 | 62 | 63 | if __name__ == '__main__': 64 | model = vgg16_model(224, 224, 3) 65 | # input_layer = model.get_layer('input') 66 | print(model.summary()) 67 | 68 | K.clear_session() 69 | --------------------------------------------------------------------------------