├── .gitignore
├── LICENSE
├── README.md
├── config.py
├── custom_layers
    └── unpooling_layer.py
├── data_generator.py
├── data_generator_depth.py
├── demo.py
├── demo_depth.py
├── depth_model.py
├── images
    ├── 0_image.png
    ├── 0_label.png
    ├── 0_out.png
    ├── 1_image.png
    ├── 1_label.png
    ├── 1_out.png
    ├── 2_image.png
    ├── 2_label.png
    ├── 2_out.png
    ├── 3_image.png
    ├── 3_label.png
    ├── 3_out.png
    ├── 4_image.png
    ├── 4_label.png
    ├── 4_out.png
    ├── 5_image.png
    ├── 5_label.png
    ├── 5_out.png
    ├── 6_image.png
    ├── 6_label.png
    ├── 6_out.png
    ├── 7_image.png
    ├── 7_label.png
    ├── 7_out.png
    ├── 8_image.png
    ├── 8_label.png
    ├── 8_out.png
    ├── 9_image.png
    ├── 9_label.png
    ├── 9_out.png
    ├── dataset.png
    ├── legend.png
    └── segnet.png
├── migrate.py
├── model.py
├── pre-process.py
├── train.py
├── train_depth.py
├── train_names.txt
├── unit_tests.py
├── utils.py
├── valid_names.txt
└── vgg16.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__/
3 | temp/
4 | data/
5 | logs/
6 | models/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 刘杨
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 3D Object Detection
 2 | 
 3 | This repository is to do Indoor Semantic Segmentation with SegNet.
 4 | 
 5 | ## Dependencies
 6 | - [NumPy](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html)
 7 | - [Tensorflow](https://www.tensorflow.org/versions/r0.8/get_started/os_setup.html)
 8 | - [Keras](https://keras.io/#installation)
 9 | - [OpenCV](https://opencv-python-tutroals.readthedocs.io/en/latest/)
10 | 
11 | ## Dataset
12 | 
13 | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/dataset.png)
14 | 
15 | Follow the [instruction](http://buildingparser.stanford.edu/dataset.html#overview) to download 2D-3D-S dataset.
16 | 
17 | ```bash
18 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_1_no_xyz.tar
19 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_2_no_xyz.tar
20 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_3_no_xyz.tar
21 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_4_no_xyz.tar
22 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_5a_no_xyz.tar
23 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_5b_no_xyz.tar
24 | $ wget https://storage.googleapis.com/3dsemantics/noXYZ/area_6_no_xyz.tar
25 | ```
26 | 
27 | ## Architecture
28 | 
29 | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/segnet.png)
30 | 
31 | 
32 | ## ImageNet Pretrained Models
33 | Download [VGG16](https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5) into models folder.
34 | 
35 | ## Usage
36 | ### Data Pre-processing
37 | Extract training images:
38 | ```bash
39 | $ python pre-process.py
40 | ```
41 | 
42 | ### Train
43 | ```bash
44 | $ python train.py
45 | ```
46 | 
47 | If you want to visualize during training, run in your terminal:
48 | ```bash
49 | $ tensorboard --logdir path_to_current_dir/logs
50 | ```
51 | 
52 | ### Demo
53 | 
54 | ```bash
55 | $ python demo.py
56 | ```
57 | 
58 | Input | GT | Output |
59 | |---|---|---|
60 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/0_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/0_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/0_out.png)|
61 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/1_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/1_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/1_out.png)|
62 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/2_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/2_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/2_out.png)|
63 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/3_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/3_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/3_out.png)|
64 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/4_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/4_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/4_out.png)|
65 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/5_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/5_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/5_out.png)|
66 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/6_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/6_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/6_out.png)|
67 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/7_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/7_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/7_out.png)|
68 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/8_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/8_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/8_out.png)|
69 | |![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/9_image.png)  | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/9_label.png) | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/9_out.png)|
70 | 
71 | ![image](https://github.com/foamliu/3D-Object-Detection/raw/master/images/legend.png)


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | img_rows, img_cols = 320, 320
 2 | img_rows_half, img_cols_half = 160, 160
 3 | channel = 4
 4 | batch_size = 30
 5 | epochs = 1000
 6 | patience = 50
 7 | num_samples = 43100
 8 | num_train_samples = 34480
 9 | # num_samples - num_train_samples
10 | num_valid_samples = 8620
11 | unknown = 128
12 | 
13 | # bgr     others      ceiling        floor            wall            column          beam             window         door            table         chair          bookcase       sofa           board           clutter
14 | colors = [[0, 0, 0], [86, 233, 234], [218, 166, 104], [75, 123, 190], [163, 173, 89], [137, 156, 246], [72, 185, 81], [64, 144, 113], [83, 84, 84], [127, 48, 41], [45, 43, 238], [115, 39, 99], [127, 116, 84], [236, 235, 235]]
15 | 
16 | num_classes = 14
17 | 


--------------------------------------------------------------------------------
/custom_layers/unpooling_layer.py:
--------------------------------------------------------------------------------
 1 | from keras import backend as K
 2 | from keras.engine.topology import Layer
 3 | from keras.layers import Reshape, Concatenate, Lambda, Multiply
 4 | 
 5 | 
 6 | class Unpooling(Layer):
 7 | 
 8 |     def __init__(self, **kwargs):
 9 |         super(Unpooling, self).__init__(**kwargs)
10 | 
11 |     def build(self, input_shape):
12 |         super(Unpooling, self).build(input_shape)
13 | 
14 |     def call(self, inputs, **kwargs):
15 |         x = inputs[:, 1]
16 |         # print('x.shape: ' + str(K.int_shape(x)))
17 |         bool_mask = Lambda(lambda t: K.greater_equal(t[:, 0], t[:, 1]),
18 |                            output_shape=K.int_shape(x)[1:])(inputs)
19 |         # print('bool_mask.shape: ' + str(K.int_shape(bool_mask)))
20 |         mask = Lambda(lambda t: K.cast(t, dtype='float32'))(bool_mask)
21 |         # print('mask.shape: ' + str(K.int_shape(mask)))
22 |         x = Multiply()([mask, x])
23 |         # print('x.shape: ' + str(K.int_shape(x)))
24 |         return x
25 | 
26 |     def compute_output_shape(self, input_shape):
27 |         return input_shape[0], input_shape[2], input_shape[3], input_shape[4]
28 | 


--------------------------------------------------------------------------------
/data_generator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from random import shuffle
  4 | 
  5 | import cv2 as cv
  6 | import numpy as np
  7 | from keras.utils import Sequence
  8 | 
  9 | from config import batch_size
 10 | from config import colors
 11 | from config import img_cols
 12 | from config import img_rows
 13 | from config import num_classes
 14 | 
 15 | train_folder = 'data/rgb'
 16 | depth_folder = 'data/depth'
 17 | semantic_folder = 'data/semantic'
 18 | 
 19 | 
 20 | def get_semantic(name):
 21 |     tokens = name.split('_')
 22 |     tokens[-1] = 'semantic_pretty.png'
 23 |     name = '_'.join(tokens)
 24 |     filename = os.path.join(semantic_folder, name)
 25 |     semantic = cv.imread(filename)
 26 |     return semantic
 27 | 
 28 | 
 29 | def get_y(semantic):
 30 |     temp = np.zeros(shape=(320, 320, num_classes), dtype=np.int32)
 31 |     semantic = np.array(semantic).astype(np.int32)
 32 |     for i in range(num_classes):
 33 |         temp[:, :, i] = np.sum(np.abs(semantic - colors[i]), axis=2)
 34 |     y = np.argmin(temp, axis=2)
 35 |     return y
 36 | 
 37 | 
 38 | def to_bgr(y_pred):
 39 |     ret = np.zeros((img_rows, img_cols, 3), np.float32)
 40 |     for r in range(320):
 41 |         for c in range(320):
 42 |             color_id = y_pred[r, c]
 43 |             # print("color_id: " + str(color_id))
 44 |             ret[r, c, :] = colors[color_id]
 45 |     ret = ret.astype(np.uint8)
 46 |     return ret
 47 | 
 48 | 
 49 | def random_choice(image_size, crop_size):
 50 |     height, width = image_size
 51 |     crop_height, crop_width = crop_size
 52 |     x = random.randint(0, max(0, width - crop_width))
 53 |     y = random.randint(0, max(0, height - crop_height))
 54 |     return x, y
 55 | 
 56 | 
 57 | def safe_crop(mat, x, y, crop_size):
 58 |     crop_height, crop_width = crop_size
 59 |     if len(mat.shape) == 2:
 60 |         ret = np.zeros((crop_height, crop_width), np.float32)
 61 |     else:
 62 |         ret = np.zeros((crop_height, crop_width, 3), np.float32)
 63 |     crop = mat[y:y + crop_height, x:x + crop_width]
 64 |     h, w = crop.shape[:2]
 65 |     ret[0:h, 0:w] = crop
 66 |     if crop_size != (320, 320):
 67 |         ret = cv.resize(ret, dsize=(img_rows, img_cols), interpolation=cv.INTER_CUBIC)
 68 |     return ret
 69 | 
 70 | 
 71 | class DataGenSequence(Sequence):
 72 |     def __init__(self, usage):
 73 |         self.usage = usage
 74 | 
 75 |         filename = '{}_names.txt'.format(usage)
 76 |         with open(filename, 'r') as f:
 77 |             self.names = f.read().splitlines()
 78 | 
 79 |         np.random.shuffle(self.names)
 80 | 
 81 |     def __len__(self):
 82 |         return int(np.ceil(len(self.names) / float(batch_size)))
 83 | 
 84 |     def __getitem__(self, idx):
 85 |         i = idx * batch_size
 86 | 
 87 |         length = min(batch_size, (len(self.names) - i))
 88 |         batch_x = np.empty((length, img_rows, img_cols, 3), dtype=np.float32)
 89 |         batch_y = np.empty((length, img_rows, img_cols), dtype=np.int32)
 90 | 
 91 |         for i_batch in range(length):
 92 |             name = self.names[i]
 93 |             filename = os.path.join(train_folder, name)
 94 |             image = cv.imread(filename)
 95 |             image_size = image.shape[:2]
 96 |             semantic = get_semantic(name)
 97 | 
 98 |             different_sizes = [(320, 320), (480, 480), (480, 480), (480, 480), (640, 640), (640, 640), (640, 640),
 99 |                                (960, 960), (960, 960), (960, 960)]
100 |             crop_size = random.choice(different_sizes)
101 | 
102 |             x, y = random_choice(image_size, crop_size)
103 |             image = safe_crop(image, x, y, crop_size)
104 |             semantic = safe_crop(semantic, x, y, crop_size)
105 | 
106 |             if np.random.random_sample() > 0.5:
107 |                 image = np.fliplr(image)
108 |                 semantic = np.fliplr(semantic)
109 | 
110 |             x = image / 255.
111 |             y = get_y(semantic)
112 | 
113 |             batch_x[i_batch, :, :, 0:3] = x
114 |             batch_y[i_batch, :, :] = y
115 | 
116 |             i += 1
117 | 
118 |         return batch_x, batch_y
119 | 
120 |     def on_epoch_end(self):
121 |         np.random.shuffle(self.names)
122 | 
123 | 
124 | def train_gen():
125 |     return DataGenSequence('train')
126 | 
127 | 
128 | def valid_gen():
129 |     return DataGenSequence('valid')
130 | 
131 | 
132 | def split_data():
133 |     train_folder = 'data/rgb'
134 |     names = [f for f in os.listdir(train_folder) if f.endswith('.png')]
135 |     num_samples = len(names)  # 52903
136 |     print('num_samples: ' + str(num_samples))
137 | 
138 |     num_train_samples = int(num_samples * 0.8)
139 |     print('num_train_samples: ' + str(num_train_samples))
140 |     num_valid_samples = num_samples - num_train_samples
141 |     print('num_valid_samples: ' + str(num_valid_samples))
142 |     valid_names = random.sample(names, num_valid_samples)
143 |     train_names = [n for n in names if n not in valid_names]
144 |     shuffle(valid_names)
145 |     shuffle(train_names)
146 | 
147 |     with open('valid_names.txt', 'w') as file:
148 |         file.write('\n'.join(valid_names))
149 | 
150 |     with open('train_names.txt', 'w') as file:
151 |         file.write('\n'.join(train_names))
152 | 
153 | 
154 | if __name__ == '__main__':
155 |     split_data()
156 | 


--------------------------------------------------------------------------------
/data_generator_depth.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from random import shuffle
  4 | 
  5 | import cv2 as cv
  6 | import numpy as np
  7 | 
  8 | from config import img_cols
  9 | from config import img_rows
 10 | 
 11 | train_folder = 'data/rgb'
 12 | depth_folder = 'data/depth'
 13 | semantic_folder = 'data/semantic'
 14 | 
 15 | 
 16 | def get_depth(name):
 17 |     tokens = name.split('_')
 18 |     tokens[-1] = 'depth.png'
 19 |     name = '_'.join(tokens)
 20 |     filename = os.path.join(depth_folder, name)
 21 |     label = cv.imread(filename, 0)
 22 |     return label
 23 | 
 24 | 
 25 | def random_choice(image_size, crop_size):
 26 |     height, width = image_size
 27 |     crop_height, crop_width = crop_size
 28 |     x = random.randint(0, width - crop_width)
 29 |     y = random.randint(0, height - crop_height)
 30 |     return x, y
 31 | 
 32 | 
 33 | def safe_crop(mat, x, y, crop_size):
 34 |     crop_height, crop_width = crop_size
 35 |     if len(mat.shape) == 2:
 36 |         ret = np.zeros((crop_height, crop_width), np.float32)
 37 |     else:
 38 |         ret = np.zeros((crop_height, crop_width, 3), np.float32)
 39 |     crop = mat[y:y + crop_height, x:x + crop_width]
 40 |     h, w = crop.shape[:2]
 41 |     ret[0:h, 0:w] = crop
 42 |     if crop_size != (320, 320):
 43 |         ret = cv.resize(ret, dsize=(img_rows, img_cols), interpolation=cv.INTER_CUBIC)
 44 |     return ret
 45 | 
 46 | 
 47 | def data_gen(usage, batch_size):
 48 |     filename = '{}_names.txt'.format(usage)
 49 |     with open(filename, 'r') as f:
 50 |         names = f.read().splitlines()
 51 |     i = 0
 52 |     np.random.shuffle(names)
 53 |     while True:
 54 |         batch_x = np.empty((batch_size, img_rows, img_cols, 3), dtype=np.float32)
 55 |         batch_y = np.empty((batch_size, img_rows, img_cols, 1), dtype=np.float32)
 56 | 
 57 |         for i_batch in range(batch_size):
 58 |             name = names[i]
 59 |             filename = os.path.join(train_folder, name)
 60 |             image = cv.imread(filename)
 61 |             image_size = image.shape[:2]
 62 |             depth = get_depth(name)
 63 | 
 64 |             different_sizes = [(320, 320), (480, 480), (640, 640)]
 65 |             crop_size = random.choice(different_sizes)
 66 | 
 67 |             x, y = random_choice(image_size, crop_size)
 68 |             image = safe_crop(image, x, y, crop_size)
 69 |             depth = safe_crop(depth, x, y, crop_size)
 70 | 
 71 |             if np.random.random_sample() > 0.5:
 72 |                 image = np.fliplr(image)
 73 |                 depth = np.fliplr(depth)
 74 | 
 75 |             batch_x[i_batch, :, :, 0:3] = image / 255.
 76 |             batch_y[i_batch, :, :, 0] = depth / 255.
 77 | 
 78 |             i += 1
 79 |             if i >= len(names):
 80 |                 i = 0
 81 |                 np.random.shuffle(names)
 82 | 
 83 |         yield batch_x, batch_y
 84 | 
 85 | 
 86 | def train_gen(batch_size):
 87 |     return data_gen('train', batch_size)
 88 | 
 89 | 
 90 | def valid_gen(batch_size):
 91 |     return data_gen('valid', batch_size)
 92 | 
 93 | 
 94 | def split_data():
 95 |     train_folder = 'data/rgb'
 96 |     names = [f for f in os.listdir(train_folder) if f.endswith('.png')]
 97 |     num_samples = len(names)
 98 |     print('num_samples: ' + str(num_samples))
 99 |     num_train_samples = int(num_samples * 0.8)
100 |     print('num_train_samples: ' + str(num_train_samples))
101 |     num_valid_samples = num_samples - num_train_samples
102 |     print('num_valid_samples: ' + str(num_valid_samples))
103 |     valid_names = random.sample(names, num_valid_samples)
104 |     train_names = [n for n in names if n not in valid_names]
105 |     shuffle(valid_names)
106 |     shuffle(train_names)
107 | 
108 |     with open('valid_names.txt', 'w') as file:
109 |         file.write('\n'.join(valid_names))
110 | 
111 |     with open('train_names.txt', 'w') as file:
112 |         file.write('\n'.join(train_names))
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     split_data()
117 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | # import the necessary packages
 2 | import os
 3 | import random
 4 | 
 5 | import cv2 as cv
 6 | import keras.backend as K
 7 | import numpy as np
 8 | 
 9 | from config import num_classes
10 | from data_generator import random_choice, safe_crop, to_bgr
11 | from model import build_encoder_decoder
12 | from utils import draw_str
13 | 
14 | 
15 | def get_semantic(name):
16 |     label_test_path = 'data/semantic_test/'
17 |     tokens = name.split('_')
18 |     tokens[-1] = 'semantic_pretty.png'
19 |     name = '_'.join(tokens)
20 |     filename = os.path.join(label_test_path, name)
21 |     label = cv.imread(filename)
22 |     return label
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     img_rows, img_cols = 320, 320
27 |     channel = 3
28 | 
29 |     model_weights_path = 'models/model.64-2.1187.hdf5'
30 |     model = build_encoder_decoder()
31 |     model.load_weights(model_weights_path)
32 | 
33 |     print(model.summary())
34 | 
35 |     rgb_test_path = 'data/rgb_test/'
36 |     label_test_path = 'data/semantic_test/'
37 |     test_images = [f for f in os.listdir(rgb_test_path) if
38 |                    os.path.isfile(os.path.join(rgb_test_path, f)) and f.endswith('.png')]
39 | 
40 |     samples = random.sample(test_images, 10)
41 | 
42 |     for i in range(len(samples)):
43 |         image_name = samples[i]
44 |         filename = os.path.join(rgb_test_path, image_name)
45 |         image = cv.imread(filename)
46 |         label = get_semantic(image_name)
47 |         image_size = image.shape[:2]
48 |         different_sizes = [(320, 320), (480, 480), (480, 480), (480, 480), (640, 640), (640, 640), (640, 640),
49 |                            (960, 960), (960, 960), (960, 960)]
50 |         crop_size = random.choice(different_sizes)
51 | 
52 |         x, y = random_choice(image_size, crop_size)
53 |         image = safe_crop(image, x, y, crop_size)
54 |         label = safe_crop(label, x, y, crop_size)
55 |         print('Start processing image: {}'.format(filename))
56 | 
57 |         x_test = np.empty((1, img_rows, img_cols, 3), dtype=np.float32)
58 |         x_test[0, :, :, 0:3] = image / 255.
59 | 
60 |         out = model.predict(x_test)
61 |         out = np.reshape(out, (img_rows, img_cols, num_classes))
62 |         out = np.argmax(out, axis=2)
63 |         out = to_bgr(out)
64 | 
65 |         str_msg = 'crop_size: %s' % (str(crop_size))
66 |         draw_str(out, (20, 20), str_msg)
67 | 
68 |         if not os.path.exists('images'):
69 |             os.makedirs('images')
70 | 
71 |         cv.imwrite('images/{}_image.png'.format(i), image)
72 |         cv.imwrite('images/{}_out.png'.format(i), out)
73 |         cv.imwrite('images/{}_label.png'.format(i), label)
74 | 
75 |     K.clear_session()
76 | 


--------------------------------------------------------------------------------
/demo_depth.py:
--------------------------------------------------------------------------------
 1 | # import the necessary packages
 2 | import os
 3 | import random
 4 | 
 5 | import cv2 as cv
 6 | import keras.backend as K
 7 | import numpy as np
 8 | 
 9 | from data_generator_depth import random_choice, safe_crop
10 | from depth_model import build_encoder_decoder
11 | 
12 | 
13 | def get_depth(name):
14 |     label_test_path = 'data/depth_test/'
15 |     tokens = name.split('_')
16 |     tokens[-1] = 'depth.png'
17 |     name = '_'.join(tokens)
18 |     filename = os.path.join(label_test_path, name)
19 |     label = cv.imread(filename, 0)
20 |     return label
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     img_rows, img_cols = 320, 320
25 |     channel = 3
26 | 
27 |     model_weights_path = 'models/depth_model.32-0.0085.hdf5'
28 |     model = build_encoder_decoder()
29 |     model.load_weights(model_weights_path)
30 | 
31 |     print(model.summary())
32 | 
33 |     rgb_test_path = 'data/rgb_test/'
34 |     label_test_path = 'data/depth_test/'
35 |     test_images = [f for f in os.listdir(rgb_test_path) if
36 |                    os.path.isfile(os.path.join(rgb_test_path, f)) and f.endswith('.png')]
37 | 
38 |     samples = random.sample(test_images, 10)
39 | 
40 |     for i in range(len(samples)):
41 |         image_name = samples[i]
42 |         filename = os.path.join(rgb_test_path, image_name)
43 |         image = cv.imread(filename)
44 |         label = get_depth(image_name)
45 |         image_size = image.shape[:2]
46 |         different_sizes = [(320, 320), (480, 480), (640, 640)]
47 |         crop_size = random.choice(different_sizes)
48 | 
49 |         x, y = random_choice(image_size, crop_size)
50 |         image = safe_crop(image, x, y, crop_size)
51 |         label = safe_crop(label, x, y, crop_size)
52 |         print('Start processing image: {}'.format(filename))
53 | 
54 |         x_test = np.empty((1, img_rows, img_cols, 3), dtype=np.float32)
55 |         x_test[0, :, :, 0:3] = image / 255.
56 | 
57 |         out = model.predict(x_test)
58 |         # print(out.shape)
59 | 
60 |         out = np.reshape(out, (img_rows, img_cols))
61 |         out = out * 255.0
62 |         out = out.astype(np.uint8)
63 | 
64 |         if not os.path.exists('images'):
65 |             os.makedirs('images')
66 | 
67 |         cv.imwrite('images/{}_depth_image.png'.format(i), image)
68 |         cv.imwrite('images/{}_depth_out.png'.format(i), out)
69 |         cv.imwrite('images/{}_depth_label.png'.format(i), label)
70 | 
71 |     K.clear_session()
72 | 


--------------------------------------------------------------------------------
/depth_model.py:
--------------------------------------------------------------------------------
  1 | import keras.backend as K
  2 | from keras.layers import Input, Conv2D, UpSampling2D, BatchNormalization, ZeroPadding2D, MaxPooling2D, Concatenate, Lambda
  3 | from keras.models import Model
  4 | from keras.utils import plot_model
  5 | 
  6 | from custom_layers.unpooling_layer import Unpooling
  7 | 
  8 | 
  9 | def build_encoder_decoder():
 10 |     kernel = 3
 11 | 
 12 |     # Encoder
 13 |     input_tensor = Input(shape=(320, 320, 3))
 14 |     x = ZeroPadding2D((1, 1))(input_tensor)
 15 |     x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_1')(x)
 16 |     x = ZeroPadding2D((1, 1))(x)
 17 |     x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_2')(x)
 18 |     orig_1 = x
 19 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 20 | 
 21 |     x = ZeroPadding2D((1, 1))(x)
 22 |     x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_1')(x)
 23 |     x = ZeroPadding2D((1, 1))(x)
 24 |     x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_2')(x)
 25 |     orig_2 = x
 26 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 27 | 
 28 |     x = ZeroPadding2D((1, 1))(x)
 29 |     x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_1')(x)
 30 |     x = ZeroPadding2D((1, 1))(x)
 31 |     x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_2')(x)
 32 |     x = ZeroPadding2D((1, 1))(x)
 33 |     x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_3')(x)
 34 |     orig_3 = x
 35 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 36 | 
 37 |     x = ZeroPadding2D((1, 1))(x)
 38 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_1')(x)
 39 |     x = ZeroPadding2D((1, 1))(x)
 40 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_2')(x)
 41 |     x = ZeroPadding2D((1, 1))(x)
 42 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_3')(x)
 43 |     orig_4 = x
 44 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 45 | 
 46 |     x = ZeroPadding2D((1, 1))(x)
 47 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_1')(x)
 48 |     x = ZeroPadding2D((1, 1))(x)
 49 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_2')(x)
 50 |     x = ZeroPadding2D((1, 1))(x)
 51 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_3')(x)
 52 |     orig_5 = x
 53 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 54 | 
 55 |     # Decoder
 56 |     x = UpSampling2D(size=(2, 2))(x)
 57 |     x = Unpooling(orig_5, (20, 20, 512))(x)
 58 |     x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_1', kernel_initializer='he_normal',
 59 |                bias_initializer='zeros')(x)
 60 |     x = BatchNormalization()(x)
 61 |     x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_2', kernel_initializer='he_normal',
 62 |                bias_initializer='zeros')(x)
 63 |     x = BatchNormalization()(x)
 64 |     x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_3', kernel_initializer='he_normal',
 65 |                bias_initializer='zeros')(x)
 66 |     x = BatchNormalization()(x)
 67 | 
 68 |     x = UpSampling2D(size=(2, 2))(x)
 69 |     x = Unpooling(orig_4, (40, 40, 512))(x)
 70 |     x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_1', kernel_initializer='he_normal',
 71 |                bias_initializer='zeros')(x)
 72 |     x = BatchNormalization()(x)
 73 |     x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_2', kernel_initializer='he_normal',
 74 |                bias_initializer='zeros')(x)
 75 |     x = BatchNormalization()(x)
 76 |     x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_3', kernel_initializer='he_normal',
 77 |                bias_initializer='zeros')(x)
 78 |     x = BatchNormalization()(x)
 79 | 
 80 |     x = UpSampling2D(size=(2, 2))(x)
 81 |     x = Unpooling(orig_3, (80, 80, 256))(x)
 82 |     x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_1', kernel_initializer='he_normal',
 83 |                bias_initializer='zeros')(x)
 84 |     x = BatchNormalization()(x)
 85 |     x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_2', kernel_initializer='he_normal',
 86 |                bias_initializer='zeros')(x)
 87 |     x = BatchNormalization()(x)
 88 |     x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_3', kernel_initializer='he_normal',
 89 |                bias_initializer='zeros')(x)
 90 |     x = BatchNormalization()(x)
 91 | 
 92 |     x = UpSampling2D(size=(2, 2))(x)
 93 |     x = Unpooling(orig_2, (160, 160, 128))(x)
 94 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_1', kernel_initializer='he_normal',
 95 |                bias_initializer='zeros')(x)
 96 |     x = BatchNormalization()(x)
 97 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_2', kernel_initializer='he_normal',
 98 |                bias_initializer='zeros')(x)
 99 |     x = BatchNormalization()(x)
100 | 
101 |     x = UpSampling2D(size=(2, 2))(x)
102 |     x = Unpooling(orig_1, (320, 320, 64))(x)
103 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_1', kernel_initializer='he_normal',
104 |                bias_initializer='zeros')(x)
105 |     x = BatchNormalization()(x)
106 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_2', kernel_initializer='he_normal',
107 |                bias_initializer='zeros')(x)
108 |     x = BatchNormalization()(x)
109 | 
110 |     x = Conv2D(1, (1, 1), activation='sigmoid', padding='valid', name='pred', kernel_initializer='he_normal',
111 |                bias_initializer='zeros')(x)
112 | 
113 |     model = Model(inputs=input_tensor, outputs=x)
114 |     return model
115 | 
116 | 
117 | if __name__ == '__main__':
118 |     encoder_decoder = build_encoder_decoder()
119 |     # input_layer = model.get_layer('input')
120 |     print(encoder_decoder.summary())
121 |     plot_model(encoder_decoder, to_file='encoder_decoder.svg', show_layer_names=True, show_shapes=True)
122 | 
123 |     K.clear_session()
124 | 


--------------------------------------------------------------------------------
/images/0_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/0_image.png


--------------------------------------------------------------------------------
/images/0_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/0_label.png


--------------------------------------------------------------------------------
/images/0_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/0_out.png


--------------------------------------------------------------------------------
/images/1_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/1_image.png


--------------------------------------------------------------------------------
/images/1_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/1_label.png


--------------------------------------------------------------------------------
/images/1_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/1_out.png


--------------------------------------------------------------------------------
/images/2_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/2_image.png


--------------------------------------------------------------------------------
/images/2_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/2_label.png


--------------------------------------------------------------------------------
/images/2_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/2_out.png


--------------------------------------------------------------------------------
/images/3_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/3_image.png


--------------------------------------------------------------------------------
/images/3_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/3_label.png


--------------------------------------------------------------------------------
/images/3_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/3_out.png


--------------------------------------------------------------------------------
/images/4_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/4_image.png


--------------------------------------------------------------------------------
/images/4_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/4_label.png


--------------------------------------------------------------------------------
/images/4_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/4_out.png


--------------------------------------------------------------------------------
/images/5_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/5_image.png


--------------------------------------------------------------------------------
/images/5_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/5_label.png


--------------------------------------------------------------------------------
/images/5_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/5_out.png


--------------------------------------------------------------------------------
/images/6_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/6_image.png


--------------------------------------------------------------------------------
/images/6_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/6_label.png


--------------------------------------------------------------------------------
/images/6_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/6_out.png


--------------------------------------------------------------------------------
/images/7_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/7_image.png


--------------------------------------------------------------------------------
/images/7_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/7_label.png


--------------------------------------------------------------------------------
/images/7_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/7_out.png


--------------------------------------------------------------------------------
/images/8_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/8_image.png


--------------------------------------------------------------------------------
/images/8_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/8_label.png


--------------------------------------------------------------------------------
/images/8_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/8_out.png


--------------------------------------------------------------------------------
/images/9_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/9_image.png


--------------------------------------------------------------------------------
/images/9_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/9_label.png


--------------------------------------------------------------------------------
/images/9_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/9_out.png


--------------------------------------------------------------------------------
/images/dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/dataset.png


--------------------------------------------------------------------------------
/images/legend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/legend.png


--------------------------------------------------------------------------------
/images/segnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/foamliu/3D-Object-Detection/1c5ae0b470f08abe652f38817393ebd0a8cf3ea9/images/segnet.png


--------------------------------------------------------------------------------
/migrate.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | import numpy as np
 3 | 
 4 | from depth_model import build_encoder_decoder
 5 | from vgg16 import vgg16_model
 6 | 
 7 | 
 8 | def migrate_model(new_model):
 9 |     old_model = vgg16_model(224, 224, 3)
10 |     # print(old_model.summary())
11 |     old_layers = [l for l in old_model.layers]
12 |     new_layers = [l for l in new_model.layers]
13 | 
14 |     old_conv1_1 = old_model.get_layer('conv1_1')
15 |     old_weights = old_conv1_1.get_weights()[0]
16 |     old_biases = old_conv1_1.get_weights()[1]
17 |     new_weights = np.zeros((3, 3, 3, 64), dtype=np.float32)
18 |     new_weights[:, :, 0:3, :] = old_weights
19 |     new_conv1_1 = new_model.get_layer('conv1_1')
20 |     new_conv1_1.set_weights([new_weights, old_biases])
21 | 
22 |     for i in range(2, 31):
23 |         old_layer = old_layers[i]
24 |         new_layer = new_layers[i + 1]
25 |         new_layer.set_weights(old_layer.get_weights())
26 | 
27 |     del old_model
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     model = build_encoder_decoder()
32 |     migrate_model(model)
33 |     print(model.summary())
34 |     model.save_weights('models/model_weights.h5')
35 | 
36 |     K.clear_session()
37 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import keras.backend as K
  2 | from keras.layers import Input, Conv2D, UpSampling2D, BatchNormalization, ZeroPadding2D, MaxPooling2D, Concatenate, Lambda, Reshape
  3 | from keras.models import Model
  4 | from keras.utils import plot_model
  5 | 
  6 | from custom_layers.unpooling_layer import Unpooling
  7 | 
  8 | 
  9 | def build_encoder_decoder():
 10 |     num_labels = 14
 11 |     kernel = 3
 12 | 
 13 |     # Encoder
 14 |     input_tensor = Input(shape=(320, 320, 3))
 15 |     x = ZeroPadding2D((1, 1))(input_tensor)
 16 |     x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_1')(x)
 17 |     x = ZeroPadding2D((1, 1))(x)
 18 |     x = Conv2D(64, (kernel, kernel), activation='relu', name='conv1_2')(x)
 19 |     orig_1 = x
 20 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 21 | 
 22 |     x = ZeroPadding2D((1, 1))(x)
 23 |     x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_1')(x)
 24 |     x = ZeroPadding2D((1, 1))(x)
 25 |     x = Conv2D(128, (kernel, kernel), activation='relu', name='conv2_2')(x)
 26 |     orig_2 = x
 27 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 28 | 
 29 |     x = ZeroPadding2D((1, 1))(x)
 30 |     x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_1')(x)
 31 |     x = ZeroPadding2D((1, 1))(x)
 32 |     x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_2')(x)
 33 |     x = ZeroPadding2D((1, 1))(x)
 34 |     x = Conv2D(256, (kernel, kernel), activation='relu', name='conv3_3')(x)
 35 |     orig_3 = x
 36 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 37 | 
 38 |     x = ZeroPadding2D((1, 1))(x)
 39 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_1')(x)
 40 |     x = ZeroPadding2D((1, 1))(x)
 41 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_2')(x)
 42 |     x = ZeroPadding2D((1, 1))(x)
 43 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv4_3')(x)
 44 |     orig_4 = x
 45 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 46 | 
 47 |     x = ZeroPadding2D((1, 1))(x)
 48 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_1')(x)
 49 |     x = ZeroPadding2D((1, 1))(x)
 50 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_2')(x)
 51 |     x = ZeroPadding2D((1, 1))(x)
 52 |     x = Conv2D(512, (kernel, kernel), activation='relu', name='conv5_3')(x)
 53 |     orig_5 = x
 54 |     x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 55 | 
 56 |     # Decoder
 57 |     x = UpSampling2D(size=(2, 2))(x)
 58 |     the_shape = K.int_shape(orig_5)
 59 |     shape = (1, the_shape[1], the_shape[2], the_shape[3])
 60 |     origReshaped = Reshape(shape)(orig_5)
 61 |     xReshaped = Reshape(shape)(x)
 62 |     together = Concatenate(axis=1)([origReshaped, xReshaped])
 63 |     x = Unpooling()(together)
 64 |     x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_1', kernel_initializer='he_normal',
 65 |                bias_initializer='zeros')(x)
 66 |     x = BatchNormalization()(x)
 67 |     x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_2', kernel_initializer='he_normal',
 68 |                bias_initializer='zeros')(x)
 69 |     x = BatchNormalization()(x)
 70 |     x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='deconv5_3', kernel_initializer='he_normal',
 71 |                bias_initializer='zeros')(x)
 72 |     x = BatchNormalization()(x)
 73 | 
 74 |     x = UpSampling2D(size=(2, 2))(x)
 75 |     the_shape = K.int_shape(orig_4)
 76 |     shape = (1, the_shape[1], the_shape[2], the_shape[3])
 77 |     origReshaped = Reshape(shape)(orig_4)
 78 |     xReshaped = Reshape(shape)(x)
 79 |     together = Concatenate(axis=1)([origReshaped, xReshaped])
 80 |     x = Unpooling()(together)
 81 |     x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_1', kernel_initializer='he_normal',
 82 |                bias_initializer='zeros')(x)
 83 |     x = BatchNormalization()(x)
 84 |     x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_2', kernel_initializer='he_normal',
 85 |                bias_initializer='zeros')(x)
 86 |     x = BatchNormalization()(x)
 87 |     x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='deconv4_3', kernel_initializer='he_normal',
 88 |                bias_initializer='zeros')(x)
 89 |     x = BatchNormalization()(x)
 90 | 
 91 |     x = UpSampling2D(size=(2, 2))(x)
 92 |     the_shape = K.int_shape(orig_3)
 93 |     shape = (1, the_shape[1], the_shape[2], the_shape[3])
 94 |     origReshaped = Reshape(shape)(orig_3)
 95 |     xReshaped = Reshape(shape)(x)
 96 |     together = Concatenate(axis=1)([origReshaped, xReshaped])
 97 |     x = Unpooling()(together)
 98 |     x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_1', kernel_initializer='he_normal',
 99 |                bias_initializer='zeros')(x)
100 |     x = BatchNormalization()(x)
101 |     x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_2', kernel_initializer='he_normal',
102 |                bias_initializer='zeros')(x)
103 |     x = BatchNormalization()(x)
104 |     x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='deconv3_3', kernel_initializer='he_normal',
105 |                bias_initializer='zeros')(x)
106 |     x = BatchNormalization()(x)
107 | 
108 |     x = UpSampling2D(size=(2, 2))(x)
109 |     the_shape = K.int_shape(orig_2)
110 |     shape = (1, the_shape[1], the_shape[2], the_shape[3])
111 |     origReshaped = Reshape(shape)(orig_2)
112 |     xReshaped = Reshape(shape)(x)
113 |     together = Concatenate(axis=1)([origReshaped, xReshaped])
114 |     x = Unpooling()(together)
115 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_1', kernel_initializer='he_normal',
116 |                bias_initializer='zeros')(x)
117 |     x = BatchNormalization()(x)
118 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv2_2', kernel_initializer='he_normal',
119 |                bias_initializer='zeros')(x)
120 |     x = BatchNormalization()(x)
121 | 
122 |     x = UpSampling2D(size=(2, 2))(x)
123 |     the_shape = K.int_shape(orig_1)
124 |     shape = (1, the_shape[1], the_shape[2], the_shape[3])
125 |     origReshaped = Reshape(shape)(orig_1)
126 |     xReshaped = Reshape(shape)(x)
127 |     together = Concatenate(axis=1)([origReshaped, xReshaped])
128 |     x = Unpooling()(together)
129 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_1', kernel_initializer='he_normal',
130 |                bias_initializer='zeros')(x)
131 |     x = BatchNormalization()(x)
132 |     x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='deconv1_2', kernel_initializer='he_normal',
133 |                bias_initializer='zeros')(x)
134 |     x = BatchNormalization()(x)
135 | 
136 |     x = Conv2D(num_labels, (1, 1), activation='softmax', padding='valid', name='pred', kernel_initializer='he_normal',
137 |                bias_initializer='zeros')(x)
138 | 
139 |     model = Model(inputs=input_tensor, outputs=x)
140 |     return model
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     encoder_decoder = build_encoder_decoder()
145 |     # input_layer = model.get_layer('input')
146 |     print(encoder_decoder.summary())
147 |     plot_model(encoder_decoder, to_file='encoder_decoder.svg', show_layer_names=True, show_shapes=True)
148 | 
149 |     K.clear_session()
150 | 


--------------------------------------------------------------------------------
/pre-process.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import shutil
 5 | import tarfile
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     if not os.path.exists('data/rgb'):
10 |         os.makedirs('data/rgb')
11 |     area_nos = [1, 2, 3, 4, 6]
12 |     for area_no in area_nos:
13 |         tar_file = 'area_{}_no_xyz.tar'.format(area_no)
14 |         filename = os.path.join('data', tar_file)
15 |         print('Extracting {}...'.format(filename))
16 | 
17 |         with tarfile.open(filename) as tar:
18 |             tar.extractall()
19 | 
20 |         folder = 'area_{}'.format(area_no)
21 | 
22 |         for f in [f for f in os.listdir(os.path.join(folder, 'data/rgb')) if f.endswith('.png')]:
23 |             src_path = os.path.join(folder, 'data/rgb')
24 |             src_path = os.path.join(src_path, f)
25 |             dst_path = 'data/rgb/'
26 |             shutil.move(src_path, dst_path)
27 | 
28 |         if not os.path.exists('data/depth'):
29 |             os.makedirs('data/depth')
30 |         for f in [f for f in os.listdir(os.path.join(folder, 'data/depth')) if f.endswith('.png')]:
31 |             src_path = os.path.join(folder, 'data/depth')
32 |             src_path = os.path.join(src_path, f)
33 |             dst_path = 'data/depth/'
34 |             shutil.move(src_path, dst_path)
35 | 
36 |         if not os.path.exists('data/semantic'):
37 |             os.makedirs('data/semantic')
38 |         for f in [f for f in os.listdir(os.path.join(folder, 'data/semantic_pretty')) if f.endswith('.png')]:
39 |             src_path = os.path.join(folder, 'data/semantic_pretty')
40 |             src_path = os.path.join(src_path, f)
41 |             dst_path = 'data/semantic/'
42 |             shutil.move(src_path, dst_path)
43 | 
44 |         shutil.rmtree(folder)
45 | 
46 |     if not os.path.exists('data/rgb_test'):
47 |         os.makedirs('data/rgb_test')
48 |     area_nos = ['5a', '5b']
49 |     for area_no in area_nos:
50 |         tar_file = 'area_{}_no_xyz.tar'.format(area_no)
51 |         filename = os.path.join('data', tar_file)
52 |         print('Extracting {}...'.format(filename))
53 | 
54 |         with tarfile.open(filename) as tar:
55 |             tar.extractall()
56 | 
57 |         folder = 'area_{}'.format(area_no)
58 |         if not os.path.exists('data/rgb_test'):
59 |             os.makedirs('data/rgb_test')
60 |         for f in [f for f in os.listdir(os.path.join(folder, 'data/rgb')) if f.endswith('.png')]:
61 |             src_path = os.path.join(folder, 'data/rgb')
62 |             src_path = os.path.join(src_path, f)
63 |             dst_path = 'data/rgb_test/'
64 |             shutil.move(src_path, dst_path)
65 | 
66 |         if not os.path.exists('data/depth_test'):
67 |             os.makedirs('data/depth_test')
68 |         for f in [f for f in os.listdir(os.path.join(folder, 'data/depth')) if f.endswith('.png')]:
69 |             src_path = os.path.join(folder, 'data/depth')
70 |             src_path = os.path.join(src_path, f)
71 |             dst_path = 'data/depth_test/'
72 |             shutil.move(src_path, dst_path)
73 | 
74 |         if not os.path.exists('data/semantic_test'):
75 |             os.makedirs('data/semantic_test')
76 |         for f in [f for f in os.listdir(os.path.join(folder, 'data/semantic_pretty')) if f.endswith('.png')]:
77 |             src_path = os.path.join(folder, 'data/semantic_pretty')
78 |             src_path = os.path.join(src_path, f)
79 |             dst_path = 'data/semantic_test/'
80 |             shutil.move(src_path, dst_path)
81 | 
82 |     image_names = [f for f in os.listdir('data/rgb') if f.endswith('.png')]
83 |     print('{} images'.format(len(image_names)))
84 |     depth_names = [f for f in os.listdir('data/depth') if f.endswith('.png')]
85 |     print('{} depths'.format(len(depth_names)))
86 |     semantic_names = [f for f in os.listdir('data/semantic') if f.endswith('.png')]
87 |     print('{} semantics'.format(len(semantic_names)))
88 | 
89 |     image_names = [f for f in os.listdir('data/rgb_test') if f.endswith('.png')]
90 |     print('{} test images'.format(len(image_names)))
91 |     depth_names = [f for f in os.listdir('data/depth_test') if f.endswith('.png')]
92 |     print('{} test depths'.format(len(depth_names)))
93 |     semantic_names = [f for f in os.listdir('data/semantic_test') if f.endswith('.png')]
94 |     print('{} test semantics'.format(len(semantic_names)))
95 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import keras
 4 | import tensorflow as tf
 5 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
 6 | from keras.utils import multi_gpu_model
 7 | 
 8 | import migrate
 9 | from config import patience, epochs, num_train_samples, num_valid_samples, batch_size
10 | from data_generator import train_gen, valid_gen
11 | from model import build_encoder_decoder
12 | from utils import get_available_gpus, get_available_cpus, sparse_cross_entropy
13 | 
14 | if __name__ == '__main__':
15 |     # Parse arguments
16 |     ap = argparse.ArgumentParser()
17 |     ap.add_argument("-p", "--pretrained", help="path to save pretrained model files")
18 |     args = vars(ap.parse_args())
19 |     pretrained_path = args["pretrained"]
20 |     checkpoint_models_path = 'models/'
21 | 
22 |     # Callbacks
23 |     tensor_board = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True)
24 |     model_names = checkpoint_models_path + 'model.{epoch:02d}-{val_loss:.4f}.hdf5'
25 |     model_checkpoint = ModelCheckpoint(model_names, monitor='val_loss', verbose=1, save_best_only=True)
26 |     early_stop = EarlyStopping('val_loss', patience=patience)
27 |     reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, patience=int(patience / 4), verbose=1)
28 | 
29 | 
30 |     class MyCbk(keras.callbacks.Callback):
31 |         def __init__(self, model):
32 |             keras.callbacks.Callback.__init__(self)
33 |             self.model_to_save = model
34 | 
35 |         def on_epoch_end(self, epoch, logs=None):
36 |             fmt = checkpoint_models_path + 'model.%02d-%.4f.hdf5'
37 |             self.model_to_save.save(fmt % (epoch, logs['val_loss']))
38 | 
39 | 
40 |     # Load our model, added support for Multi-GPUs
41 |     num_gpu = len(get_available_gpus())
42 |     if num_gpu >= 2:
43 |         with tf.device("/cpu:0"):
44 |             if pretrained_path is not None:
45 |                 model = build_encoder_decoder()
46 |                 model.load_weights(pretrained_path)
47 |             else:
48 |                 model = build_encoder_decoder()
49 |                 migrate.migrate_model(model)
50 | 
51 |         new_model = multi_gpu_model(model, gpus=num_gpu)
52 |         # rewrite the callback: saving through the original model and not the multi-gpu model.
53 |         model_checkpoint = MyCbk(model)
54 |     else:
55 |         if pretrained_path is not None:
56 |             new_model = build_encoder_decoder()
57 |             new_model.load_weights(pretrained_path)
58 |         else:
59 |             new_model = build_encoder_decoder()
60 |             migrate.migrate_model(new_model)
61 | 
62 |     # sgd = keras.optimizers.SGD(lr=0.001, momentum=0.9, decay=0.0005, nesterov=True)
63 |     decoder_target = tf.placeholder(dtype='int32', shape=(None, None, None))
64 |     new_model.compile(optimizer='nadam', loss=sparse_cross_entropy, target_tensors=[decoder_target])
65 | 
66 |     print(new_model.summary())
67 | 
68 |     # Final callbacks
69 |     callbacks = [tensor_board, model_checkpoint, early_stop, reduce_lr]
70 | 
71 |     # Start Fine-tuning
72 |     new_model.fit_generator(train_gen(),
73 |                             steps_per_epoch=num_train_samples // batch_size,
74 |                             validation_data=valid_gen(),
75 |                             validation_steps=num_valid_samples // batch_size,
76 |                             epochs=epochs,
77 |                             verbose=1,
78 |                             callbacks=callbacks,
79 |                             use_multiprocessing=True,
80 |                             workers=int(get_available_cpus() * 0.80)
81 |                             )
82 | 


--------------------------------------------------------------------------------
/train_depth.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import keras
 4 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
 5 | 
 6 | import migrate
 7 | from config import patience, epochs, num_train_samples, num_valid_samples
 8 | from data_generator_depth import train_gen, valid_gen
 9 | from depth_model import build_encoder_decoder
10 | from utils import depth_loss
11 | 
12 | if __name__ == '__main__':
13 |     # Parse arguments
14 |     ap = argparse.ArgumentParser()
15 |     ap.add_argument("-p", "--pretrained", help="path to save pretrained model files")
16 |     args = vars(ap.parse_args())
17 |     pretrained_path = args["pretrained"]
18 | 
19 |     checkpoint_models_path = 'models/'
20 | 
21 |     # Callbacks
22 |     tensor_board = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True)
23 |     model_names = checkpoint_models_path + 'depth_model.{epoch:02d}-{val_loss:.4f}.hdf5'
24 |     model_checkpoint = ModelCheckpoint(model_names, monitor='val_loss', verbose=1, save_best_only=True)
25 |     early_stop = EarlyStopping('val_loss', patience=patience)
26 |     reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, patience=int(patience / 4), verbose=1)
27 | 
28 |     if pretrained_path is not None:
29 |         model = build_encoder_decoder()
30 |         model.load_weights(pretrained_path)
31 |     else:
32 |         model = build_encoder_decoder()
33 |         migrate.migrate_model(model)
34 | 
35 |     model.compile(optimizer='nadam', loss=depth_loss)
36 | 
37 |     print(model.summary())
38 | 
39 |     # Final callbacks
40 |     callbacks = [tensor_board, model_checkpoint, early_stop, reduce_lr]
41 | 
42 |     batch_size = 14
43 | 
44 |     # Start Fine-tuning
45 |     model.fit_generator(train_gen(batch_size),
46 |                         steps_per_epoch=num_train_samples // batch_size,
47 |                         validation_data=valid_gen(batch_size),
48 |                         validation_steps=num_valid_samples // batch_size,
49 |                         epochs=epochs,
50 |                         verbose=1,
51 |                         callbacks=callbacks,
52 |                         use_multiprocessing=True,
53 |                         workers=4
54 |                         )
55 | 


--------------------------------------------------------------------------------
/unit_tests.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import unittest
 3 | import cv2 as cv
 4 | from data_generator_depth import train_gen
 5 | 
 6 | 
 7 | class TestStringMethods(unittest.TestCase):
 8 | 
 9 |     def test_data_generator(self):
10 |         iter = train_gen()
11 |         batch_x, batch_y = next(iter)
12 |         for i in range(len(batch_x)):
13 |             x = batch_x[i]
14 |             y = batch_y[i]
15 |             x = (x * 255.).astype(np.uint8)
16 |             y = (y * 255.).astype(np.uint8)
17 |             cv.imwrite('temp/test_data_generator_x_{}.png'.format(i), x)
18 |             cv.imwrite('temp/test_data_generator_y_{}.png'.format(i), y)
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     unittest.main()
23 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | 
 3 | import cv2 as cv
 4 | import tensorflow as tf
 5 | from tensorflow.python.client import device_lib
 6 | 
 7 | 
 8 | def sparse_cross_entropy(y_true, y_pred):
 9 |     """
10 |     Calculate the cross-entropy loss between y_true and y_pred.
11 | 
12 |     y_true is a 3-rank tensor with the desired output.
13 |     The shape is [batch_size, img_rows, img_cols].
14 | 
15 |     y_pred is the decoder's output which is a 4-rank tensor
16 |     with shape [batch_size, img_rows, img_cols, num_labels]
17 |     so that for each image in the batch there is a one-hot
18 |     encoded array of length num_labels.
19 |     """
20 | 
21 |     # Calculate the loss. This outputs a
22 |     # 3-rank tensor of shape [batch_size, img_rows, img_cols]
23 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true,
24 |                                                           logits=y_pred)
25 | 
26 |     # Keras may reduce this across the first axis (the batch)
27 |     # but the semantics are unclear, so to be sure we use
28 |     # the loss across the entire 3-rank tensor, we reduce it
29 |     # to a single scalar with the mean function.
30 |     loss_mean = tf.reduce_mean(loss)
31 | 
32 |     return loss_mean
33 | 
34 | 
35 | # getting the number of GPUs
36 | def get_available_gpus():
37 |     local_device_protos = device_lib.list_local_devices()
38 |     return [x.name for x in local_device_protos if x.device_type == 'GPU']
39 | 
40 | 
41 | # getting the number of CPUs
42 | def get_available_cpus():
43 |     return multiprocessing.cpu_count()
44 | 
45 | 
46 | def draw_str(dst, target, s):
47 |     x, y = target
48 |     cv.putText(dst, s, (x + 1, y + 1), cv.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness=2, lineType=cv.LINE_AA)
49 |     cv.putText(dst, s, (x, y), cv.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv.LINE_AA)
50 | 


--------------------------------------------------------------------------------
/vgg16.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import keras.backend as K
 4 | from keras.layers import Conv2D, ZeroPadding2D, MaxPooling2D
 5 | from keras.layers import Dense, Dropout, Flatten
 6 | from keras.models import Sequential
 7 | 
 8 | 
 9 | def vgg16_model(img_rows, img_cols, channel=3):
10 |     model = Sequential()
11 |     # Encoder
12 |     model.add(ZeroPadding2D((1, 1), input_shape=(img_rows, img_cols, channel), name='input'))
13 |     model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_1'))
14 |     model.add(ZeroPadding2D((1, 1)))
15 |     model.add(Conv2D(64, (3, 3), activation='relu', name='conv1_2'))
16 |     model.add(MaxPooling2D((2, 2), strides=(2, 2)))
17 | 
18 |     model.add(ZeroPadding2D((1, 1)))
19 |     model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_1'))
20 |     model.add(ZeroPadding2D((1, 1)))
21 |     model.add(Conv2D(128, (3, 3), activation='relu', name='conv2_2'))
22 |     model.add(MaxPooling2D((2, 2), strides=(2, 2)))
23 | 
24 |     model.add(ZeroPadding2D((1, 1)))
25 |     model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_1'))
26 |     model.add(ZeroPadding2D((1, 1)))
27 |     model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_2'))
28 |     model.add(ZeroPadding2D((1, 1)))
29 |     model.add(Conv2D(256, (3, 3), activation='relu', name='conv3_3'))
30 |     model.add(MaxPooling2D((2, 2), strides=(2, 2)))
31 | 
32 |     model.add(ZeroPadding2D((1, 1)))
33 |     model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_1'))
34 |     model.add(ZeroPadding2D((1, 1)))
35 |     model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_2'))
36 |     model.add(ZeroPadding2D((1, 1)))
37 |     model.add(Conv2D(512, (3, 3), activation='relu', name='conv4_3'))
38 |     model.add(MaxPooling2D((2, 2), strides=(2, 2)))
39 | 
40 |     model.add(ZeroPadding2D((1, 1)))
41 |     model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_1'))
42 |     model.add(ZeroPadding2D((1, 1)))
43 |     model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_2'))
44 |     model.add(ZeroPadding2D((1, 1)))
45 |     model.add(Conv2D(512, (3, 3), activation='relu', name='conv5_3'))
46 |     model.add(MaxPooling2D((2, 2), strides=(2, 2)))
47 | 
48 |     # Add Fully Connected Layer
49 |     model.add(Flatten(name='flatten'))
50 |     model.add(Dense(4096, activation='relu', name='dense1'))
51 |     model.add(Dropout(0.5))
52 |     model.add(Dense(4096, activation='relu', name='dense2'))
53 |     model.add(Dropout(0.5))
54 |     model.add(Dense(1000, activation='softmax', name='softmax'))
55 | 
56 |     # Loads ImageNet pre-trained data
57 |     weights_path = 'models/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
58 |     model.load_weights(weights_path)
59 | 
60 |     return model
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     model = vgg16_model(224, 224, 3)
65 |     # input_layer = model.get_layer('input')
66 |     print(model.summary())
67 | 
68 |     K.clear_session()
69 | 


--------------------------------------------------------------------------------