├── log └── events.out.tfevents.1521736042.Miguels-MacBook-Pro.local ├── README.md ├── util_test.py ├── generators_test.py ├── losses_test.py ├── losses.py ├── layers_test.py ├── LICENSE ├── layers.py ├── .gitignore ├── create_data_sym_links.py ├── autoencoder_image_loader.py ├── autoencoder_model.py ├── autoencoder_train.py ├── undeepvo_train.py ├── util.py ├── image_loader.py └── undeepvo_model.py /log/events.out.tfevents.1521736042.Miguels-MacBook-Pro.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maj-personal-repos/UnDeepVO/HEAD/log/events.out.tfevents.1521736042.Miguels-MacBook-Pro.local -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UnDeepVO 2 | UnDeepVO - Implementation of Monocular Visual Odometry through Unsupervised Deep Learning 3 | 4 | ## Work In Progress 5 | The implementation is not working yet. Once it is complete, the README will be updated with a full description and usage directions. 6 | -------------------------------------------------------------------------------- /util_test.py: -------------------------------------------------------------------------------- 1 | import pykitti 2 | from math import pi 3 | from util import euler_to_rotation, position_to_translation 4 | import numpy as np 5 | 6 | dataset = pykitti.odometry('data/dataset', '01') 7 | 8 | 9 | rotm = euler_to_rotation([0, 0, pi / 4]) 10 | 11 | transm = position_to_translation([1, 1, 0]) 12 | 13 | K = dataset.calib.K_cam2 14 | 15 | K -------------------------------------------------------------------------------- /generators_test.py: -------------------------------------------------------------------------------- 1 | from image_loader import get_stereo_image_generators 2 | import matplotlib.pyplot as plt 3 | 4 | image_generator = get_stereo_image_generators('data/train', 'data/test', batch_size=1, shuffle=False) 5 | 6 | img = image_generator[0].__next__() 7 | plt.imshow(img[0][0][0, :, :, :]) 8 | plt.show() 9 | plt.imshow(img[0][1][0, :, :, :]) 10 | plt.show() 11 | 12 | img = image_generator[0].__next__() 13 | plt.imshow(img[0][0][0, :, :, :]) 14 | plt.show() 15 | 16 | img = image_generator[0].__next__() 17 | plt.imshow(img[0][0][0, :, :, :]) 18 | plt.show() 19 | 20 | -------------------------------------------------------------------------------- /losses_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | from keras.losses import mean_absolute_error 4 | 5 | from losses import ssim, photometric_consistency_loss 6 | 7 | x = np.ones((1, 10, 10, 1)) 8 | y = np.ones((1, 10, 10, 1)) 9 | 10 | x_img1 = K.variable(x) 11 | y_img1 = K.variable(y) 12 | 13 | ssim1 = ssim(x_img1, y_img1) 14 | 15 | assert K.eval(ssim1).all() == np.zeros((1, 10, 10)).all() 16 | 17 | x_img2 = K.variable(255*x) 18 | y_img2 = K.variable(-255*y) 19 | 20 | ssim2 = ssim(x_img2, y_img2) 21 | 22 | assert K.eval(ssim2).all() == np.ones((1, 10, 10)).all() 23 | 24 | pcl = photometric_consistency_loss(x_img1, y_img1) 25 | 26 | assert K.eval(pcl).all() == np.zeros((1, 10, 10)).all() -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- 1 | from keras.losses import mean_absolute_error 2 | import keras.backend as K 3 | 4 | 5 | def ssim(x, y): 6 | c1 = 0.01 ** 2 7 | 8 | c2 = 0.03 ** 2 9 | 10 | mu_x = K.mean(x, axis=-1) 11 | 12 | mu_y = K.mean(y, axis=-1) 13 | 14 | sigma_x = K.mean(x ** 2, axis=-1) - mu_x ** 2 15 | 16 | sigma_y = K.mean(y ** 2, axis=-1) - mu_y ** 2 17 | 18 | sigma_xy = K.mean(x * y, axis=-1) - mu_x * mu_y 19 | 20 | ssim_n = (2 * mu_x * mu_y + c1) * (2 * sigma_xy + c2) 21 | 22 | ssim_d = (mu_x ** 2 + mu_y ** 2 + c1) * (sigma_x + sigma_y + c2) 23 | 24 | ssim_out = ssim_n / ssim_d 25 | 26 | return K.clip((1 - ssim_out) / 2, 0, 1) 27 | 28 | 29 | def photometric_consistency_loss(alpha): 30 | def loss(y_true, y_pred): 31 | return alpha * ssim(y_true, y_pred) + (1 - alpha) * mean_absolute_error(y_true, y_pred) 32 | 33 | return loss 34 | -------------------------------------------------------------------------------- /layers_test.py: -------------------------------------------------------------------------------- 1 | from layers import spatial_transformation 2 | import matplotlib.image as mpimg 3 | import numpy as np 4 | from keras import backend as K 5 | from image_loader import get_stereo_image_generators 6 | import matplotlib.pyplot as plt 7 | 8 | img_rows, img_cols = 128, 512 9 | 10 | train_gen, test_gen, train_samples, test_samples = get_stereo_image_generators('data/train/', 'data/test', batch_size=1, shuffle=True) 11 | img = train_gen.__next__() 12 | 13 | # spatial transformation lambda layer test 14 | 15 | left_image = img[0][0] 16 | disparity = 0.1 * np.ones((1, img_rows, img_cols, 1)) 17 | 18 | disparity = K.variable(disparity) 19 | left_image = K.variable(left_image) 20 | 21 | right_image = spatial_transformation([left_image, disparity], 1, '') 22 | 23 | im_right = K.eval(right_image) 24 | plt.imshow(im_right[0]) 25 | plt.show() 26 | 27 | # TODO: add tests for remaining layers: expand_dims, depth_to_disparity, disparity_difference 28 | 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Miguel Alonso Jr 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /layers.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Lambda 2 | from util import spatial_transform 3 | import keras.backend as K 4 | 5 | 6 | def spatial_transformation(inputs, sign, name): 7 | def output_shape(input_shape): 8 | 9 | return input_shape[0] 10 | 11 | return Lambda(lambda x: spatial_transform(x[0], sign*x[1]), output_shape=output_shape, name=name)(inputs) 12 | 13 | 14 | def expand_dims(inputs, dimension, name): 15 | def output_shape(input_shape): 16 | shape = list(input_shape) 17 | 18 | shape[3] = 1 19 | 20 | return tuple(shape) 21 | 22 | return Lambda(lambda x: K.expand_dims(inputs[:, :, :, dimension], 3), output_shape=output_shape, name=name)(inputs) 23 | 24 | 25 | def depth_to_disparity(inputs, baseline, focal_length, width, name): 26 | def output_shape(input_shape): 27 | return input_shape 28 | 29 | return Lambda(lambda x: width * baseline * focal_length / x, output_shape=output_shape, name=name)(inputs) 30 | 31 | 32 | def disparity_difference(disparities, name): 33 | def output_shape(input_shape): 34 | return input_shape 35 | 36 | return Lambda(lambda x: x[0] - x[1], output_shape=output_shape, name=name)(disparities) 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #data 2 | data 3 | 4 | #scratch 5 | scratch 6 | 7 | #visualization 8 | visualization 9 | 10 | #models 11 | models 12 | 13 | #IDEs 14 | .idea 15 | 16 | # Byte-compiled / optimized / DLL files 17 | __pycache__/ 18 | *.py[cod] 19 | *$py.class 20 | 21 | # C extensions 22 | *.so 23 | 24 | # Distribution / packaging 25 | .Python 26 | env/ 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # dotenv 98 | .env 99 | 100 | # virtualenv 101 | .venv 102 | venv/ 103 | ENV/ 104 | 105 | # Spyder project settings 106 | .spyderproject 107 | .spyproject 108 | 109 | # Rope project settings 110 | .ropeproject 111 | 112 | # mkdocs documentation 113 | /site 114 | 115 | # mypy 116 | .mypy_cache/ 117 | -------------------------------------------------------------------------------- /create_data_sym_links.py: -------------------------------------------------------------------------------- 1 | import os 2 | data_dir = 'data' 3 | left_image_dir = 'image_2' 4 | right_image_dir = 'image_3' 5 | 6 | # train_sequences = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', 7 | # '11', '12', '13', '14', '15', '16', '17', '18', '19', '20'] 8 | 9 | train_sequences = ['05', '12'] 10 | 11 | test_sequences = ['10'] 12 | 13 | train_dir = os.path.join(data_dir, 'train') 14 | 15 | test_dir = os.path.join(data_dir, 'test') 16 | 17 | if not os.path.exists(train_dir): 18 | os.makedirs(train_dir) 19 | 20 | if not os.path.exists(test_dir): 21 | os.makedirs(test_dir) 22 | 23 | if not os.path.exists(os.path.join(train_dir, 'left')): 24 | os.makedirs(os.path.join(train_dir, 'left')) 25 | 26 | if not os.path.exists(os.path.join(train_dir, 'right')): 27 | os.makedirs(os.path.join(train_dir, 'right')) 28 | 29 | if not os.path.exists(os.path.join(test_dir, 'left')): 30 | os.makedirs(os.path.join(test_dir, 'left')) 31 | 32 | if not os.path.exists(os.path.join(test_dir, 'right')): 33 | os.makedirs(os.path.join(test_dir, 'right')) 34 | 35 | root_dir = os.getcwd() 36 | 37 | for seq in train_sequences: 38 | l_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, left_image_dir) 39 | 40 | r_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, right_image_dir) 41 | 42 | for (_, _, left_filenames) in os.walk(l_dir): 43 | break 44 | 45 | for (_, _, right_filenames) in os.walk(r_dir): 46 | break 47 | 48 | for file in left_filenames: 49 | src_path = os.path.join(root_dir, l_dir, file) 50 | 51 | dst_path = os.path.join(train_dir, 'left', seq+file) 52 | 53 | os.symlink(src_path, dst_path) 54 | 55 | for file in right_filenames: 56 | src_path = os.path.join(root_dir, r_dir, file) 57 | 58 | dst_path = os.path.join(train_dir, 'right', seq+file) 59 | 60 | os.symlink(src_path, dst_path) 61 | 62 | for seq in test_sequences: 63 | l_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, left_image_dir) 64 | 65 | r_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, right_image_dir) 66 | 67 | for (_, _, left_filenames) in os.walk(l_dir): 68 | break 69 | 70 | for (_, _, right_filenames) in os.walk(r_dir): 71 | break 72 | 73 | for file in left_filenames: 74 | src_path = os.path.join(root_dir, l_dir, file) 75 | 76 | dst_path = os.path.join(test_dir, 'left', seq+file) 77 | 78 | os.symlink(src_path, dst_path) 79 | 80 | for file in right_filenames: 81 | src_path = os.path.join(root_dir, r_dir, file) 82 | 83 | dst_path = os.path.join(test_dir, 'right', seq+file) 84 | 85 | os.symlink(src_path, dst_path) 86 | -------------------------------------------------------------------------------- /autoencoder_image_loader.py: -------------------------------------------------------------------------------- 1 | from keras.preprocessing.image import ImageDataGenerator 2 | import numpy as np 3 | 4 | 5 | def get_stereo_image_generators(train_folder, test_folder, img_rows=128, img_cols=512, batch_size=16, shuffle=True): 6 | train_image_gen = ImageDataGenerator(rescale=1.0 / 255.0, 7 | # TODO: when network is complete, add image transformations to improve training 8 | # rotation_range=5, 9 | # shear_range=0.01, 10 | # zoom_range=0.01, 11 | # height_shift_range=0.01, 12 | # width_shift_range=0.01 13 | ) 14 | 15 | test_image_gen = ImageDataGenerator(rescale=1.0 / 255.0) 16 | 17 | train_generator_left = train_image_gen.flow_from_directory(train_folder, 18 | target_size=(img_rows, img_cols), 19 | batch_size=batch_size, 20 | seed=10, 21 | shuffle=shuffle, 22 | classes=['left'], 23 | class_mode=None, 24 | follow_links=True) 25 | 26 | test_generator_left = test_image_gen.flow_from_directory(test_folder, 27 | target_size=(img_rows, img_cols), 28 | batch_size=batch_size, 29 | seed=10, 30 | shuffle=shuffle, 31 | classes=['left'], 32 | class_mode=None, 33 | follow_links=True) 34 | 35 | def train_generator_func(): 36 | while True: 37 | left_image = train_generator_left.next() 38 | 39 | yield [left_image], [left_image] 40 | 41 | def test_generator_func(): 42 | while True: 43 | left_image = test_generator_left.next() 44 | 45 | yield [left_image], [left_image] 46 | 47 | train_generator = train_generator_func() 48 | 49 | test_generator = test_generator_func() 50 | 51 | train_length = len(train_generator_left.filenames) 52 | 53 | test_length = len(test_generator_left.filenames) 54 | 55 | return train_generator, test_generator, train_length, test_length 56 | -------------------------------------------------------------------------------- /autoencoder_model.py: -------------------------------------------------------------------------------- 1 | from keras import Model 2 | from keras.layers import Conv2D, Conv2DTranspose, concatenate 3 | from keras.optimizers import Adam 4 | 5 | 6 | class AutoEncoderModel(object): 7 | def __init__(self, left_input, right_input, lr=1e-4, rows=128, cols=512): 8 | self.rows = rows 9 | self.cols = cols 10 | self.left = left_input 11 | self.right = right_input 12 | self.left_est = None 13 | self.right_est = None 14 | self.output = None 15 | self.model = None 16 | self.lr = lr 17 | self.build_architecture() 18 | self.build_outputs() 19 | self.build_model() 20 | 21 | @staticmethod 22 | def conv(input, channels, kernel_size, strides, activation='elu'): 23 | 24 | return Conv2D(channels, kernel_size=kernel_size, strides=strides, padding='same', activation=activation)(input) 25 | 26 | @staticmethod 27 | def deconv(input, channels, kernel_size, scale): 28 | 29 | return Conv2DTranspose(channels, kernel_size=kernel_size, strides=scale, padding='same')(input) 30 | 31 | def conv_block(self, input, channels, kernel_size): 32 | conv1 = self.conv(input, channels, kernel_size, 1) 33 | 34 | conv2 = self.conv(conv1, channels, kernel_size, 2) 35 | 36 | return conv2 37 | 38 | def deconv_block(self, input, channels, kernel_size, skip): 39 | deconv1 = self.deconv(input, channels, kernel_size, 2) 40 | 41 | if skip is not None: 42 | concat1 = concatenate([deconv1, skip], 3) 43 | else: 44 | concat1 = deconv1 45 | 46 | iconv1 = self.conv(concat1, channels, kernel_size, 1) 47 | 48 | return iconv1 49 | 50 | def get_output(self, deconv): 51 | return self.conv(deconv, 3, 3, 1, 'sigmoid') 52 | 53 | def build_architecture(self): 54 | # encoder 55 | conv1 = self.conv_block(self.left, 32, 7) 56 | conv2 = self.conv_block(conv1, 64, 5) 57 | conv3 = self.conv_block(conv2, 128, 3) 58 | conv4 = self.conv_block(conv3, 256, 3) 59 | conv5 = self.conv_block(conv4, 512, 3) 60 | conv6 = self.conv_block(conv5, 512, 3) 61 | conv7 = self.conv_block(conv6, 512, 3) 62 | 63 | # skips 64 | skip1 = conv1 65 | skip2 = conv2 66 | skip3 = conv3 67 | skip4 = conv4 68 | skip5 = conv5 69 | skip6 = conv6 70 | 71 | deconv7 = self.deconv_block(conv7, 512, 3, skip6) 72 | deconv6 = self.deconv_block(deconv7, 512, 3, skip5) 73 | deconv5 = self.deconv_block(deconv6, 256, 3, skip4) 74 | deconv4 = self.deconv_block(deconv5, 128, 3, skip3) 75 | deconv3 = self.deconv_block(deconv4, 64, 3, skip2) 76 | deconv2 = self.deconv_block(deconv3, 32, 3, skip1) 77 | deconv1 = self.deconv_block(deconv2, 16, 3, None) 78 | 79 | self.output = self.get_output(deconv1) 80 | 81 | def build_outputs(self): 82 | self.left_est = self.output 83 | # self.right_est = expand_dims(self.output, 1, 'right_estimate') 84 | 85 | def build_model(self): 86 | self.model = Model(inputs=[self.left], outputs=[self.left_est]) 87 | self.model.compile(loss=['mae'], 88 | optimizer='adadelta', 89 | metrics=['mse']) 90 | -------------------------------------------------------------------------------- /autoencoder_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from keras import Input 4 | from keras.callbacks import TensorBoard, ModelCheckpoint, Callback 5 | from keras.utils import plot_model 6 | from autoencoder_model import AutoEncoderModel 7 | from autoencoder_image_loader import get_stereo_image_generators 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | parser = argparse.ArgumentParser(description='Stereo Autoencoder') 12 | parser.add_argument('--model_name', type=str, help='model name', default='ae') 13 | parser.add_argument('--models_dir', type=str, help='models directory', default='models') 14 | parser.add_argument('--data_path', type=str, help='path to the data', required=True) 15 | parser.add_argument('--input_height', type=int, help='input height', default=128) 16 | parser.add_argument('--input_width', type=int, help='input width', default=512) 17 | parser.add_argument('--batch_size', type=int, help='batch size', default=8) 18 | parser.add_argument('--num_epochs', type=int, help='number of epochs', default=100) 19 | parser.add_argument('--learning_rate', type=float, help='initial learning rate', default=1e-2) 20 | parser.add_argument('--log_directory', type=str, help='directory to save checkpoints and summaries', default='log') 21 | args = parser.parse_args() 22 | 23 | 24 | class VisualizeOutput(Callback): 25 | def __init__(self, input_image): 26 | super().__init__() 27 | self.input_image = input_image 28 | 29 | def on_epoch_begin(self, epoch, logs=None): 30 | self.visualize_input_output() 31 | 32 | def visualize_input_output(self): 33 | image = np.concatenate((self.input_image, self.model.predict(self.input_image)), axis=1)[0, :, :, :] 34 | plt.clf() 35 | plt.imshow(image) 36 | plt.pause(1) 37 | 38 | 39 | def main(args): 40 | train_gen, test_gen, train_samples, test_samples = get_stereo_image_generators(args.data_path + '/train', 41 | args.data_path + '/test', 42 | img_rows=args.input_height, 43 | img_cols=args.input_width, 44 | batch_size=args.batch_size, 45 | shuffle=False) 46 | image_generator = get_stereo_image_generators('data/train', 'data/test', batch_size=1, shuffle=False) 47 | 48 | input_image = image_generator[0].__next__()[0][0] 49 | 50 | input_shape = (args.input_height, args.input_width, 3) 51 | left_input = Input(input_shape) 52 | right_input = Input(input_shape) 53 | 54 | ae = AutoEncoderModel(left_input, right_input, args.learning_rate) 55 | ae.model.summary() 56 | plot_model(ae.model, show_shapes=True, to_file='scratch/ae.png') 57 | ae.model.fit_generator(train_gen, 58 | steps_per_epoch=train_samples // args.batch_size, 59 | # validation_data=test_gen, 60 | # validation_steps=test_samples // args.batch_size, 61 | epochs=args.num_epochs, 62 | verbose=1, 63 | callbacks=[VisualizeOutput(input_image), 64 | TensorBoard(log_dir=args.log_directory, 65 | batch_size=args.batch_size, 66 | write_graph=False), 67 | ModelCheckpoint(os.path.join(args.models_dir, args.model_name + '.h5'), 68 | monitor='loss', 69 | verbose=1, 70 | save_best_only=True)]) 71 | 72 | 73 | if __name__ == '__main__': 74 | main(args) 75 | -------------------------------------------------------------------------------- /undeepvo_train.py: -------------------------------------------------------------------------------- 1 | # uncomment this section to train on the CPU 2 | # import os 3 | # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 4 | # os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 5 | from keras.utils import plot_model 6 | from keras.layers import Input 7 | from image_loader import get_stereo_image_generators 8 | from undeepvo_model import UnDeepVOModel 9 | import argparse 10 | 11 | parser = argparse.ArgumentParser(description='UnDeepVo Keras implementation.') 12 | 13 | parser.add_argument('--mode', type=str, help='train or test', default='train') 14 | 15 | parser.add_argument('--models_dir', type=str, help='models directory', default='models') 16 | 17 | parser.add_argument('--model_name', type=str, help='model name', default='undeepvo') 18 | 19 | parser.add_argument('--data_path', type=str, help='path to the data', required=True) 20 | 21 | parser.add_argument('--input_height', type=int, help='input height', default=128) 22 | 23 | parser.add_argument('--input_width', type=int, help='input width', default=512) 24 | 25 | parser.add_argument('--batch_size', type=int, help='batch size', default=2) 26 | 27 | parser.add_argument('--num_epochs', type=int, help='number of epochs', default=50) 28 | 29 | parser.add_argument('--learning_rate', type=float, help='initial learning rate', default=0.1) 30 | 31 | parser.add_argument('--lr_loss_weight', type=float, help='left-right consistency weight', default=1.0) 32 | 33 | parser.add_argument('--alpha_image_loss', type=float, help='weight between SSIM and L1 in the image loss', default=0.85) 34 | 35 | parser.add_argument('--log_directory', type=str, help='directory to save checkpoints and summaries', default='log') 36 | 37 | parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', default='') 38 | 39 | args = parser.parse_args() 40 | 41 | 42 | def main(args): 43 | batch_size = args.batch_size 44 | 45 | epochs = args.num_epochs 46 | 47 | img_rows, img_cols = args.input_height, args.input_width 48 | 49 | models_dir = args.models_dir 50 | 51 | model_name = args.model_name 52 | 53 | data_path = args.data_path 54 | 55 | train_gen, test_gen, train_samples, test_samples = get_stereo_image_generators(data_path + '/train', 56 | data_path + '/test', 57 | img_rows=img_rows, 58 | img_cols=img_cols, 59 | batch_size=batch_size, 60 | shuffle=True) 61 | 62 | # channels last by default 63 | input_shape = (img_rows, img_cols, 3) 64 | 65 | left_input = Input(input_shape) 66 | 67 | left_input_next = Input(input_shape) 68 | 69 | right_input = Input(input_shape) 70 | 71 | udvo = UnDeepVOModel(left_input_next, left_input, right_input, args.learning_rate) 72 | 73 | # for epoch in range(epochs): 74 | # # TODO: need to save model after each epoch 75 | # # model_path = os.path.join(models_dir, model_name + '_epoch_%d' % epoch) 76 | # 77 | # udvo.model.fit_generator(train_gen, 78 | # steps_per_epoch=train_samples // batch_size, 79 | # epochs=epochs, 80 | # validation_data=test_gen, 81 | # validation_steps=test_samples // batch_size, 82 | # verbose=1, 83 | # # callbacks=[TensorBoard(log_dir=args.log_directory, 84 | # # histogram_freq=True, 85 | # # batch_size=batch_size, 86 | # # write_graph=False, 87 | # # write_grads=True)] 88 | # ) 89 | 90 | udvo.model.summary() 91 | 92 | plot_model(udvo.model, show_shapes=True, to_file='scratch/model.png') 93 | 94 | 95 | if __name__ == '__main__': 96 | main(args) 97 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | from math import sin, cos 4 | from keras import backend as K 5 | 6 | 7 | def euler_to_rotation(theta): 8 | R_x = np.array([[1, 0, 0], 9 | [0, cos(theta[0]), -sin(theta[0])], 10 | [0, sin(theta[0]), cos(theta[0])] 11 | ]) 12 | 13 | R_y = np.array([[cos(theta[1]), 0, sin(theta[1])], 14 | [0, 1, 0], 15 | [-sin(theta[1]), 0, cos(theta[1])] 16 | ]) 17 | 18 | R_z = np.array([[cos(theta[2]), -sin(theta[2]), 0], 19 | [sin(theta[2]), cos(theta[2]), 0], 20 | [0, 0, 1] 21 | ]) 22 | 23 | R = np.dot(R_z, np.dot(R_y, R_x)) 24 | 25 | output = np.identity(4) 26 | 27 | output[:3, :3] = R 28 | 29 | return output 30 | 31 | 32 | def position_to_translation(position): 33 | translation_mat = np.identity(4) 34 | 35 | translation_mat[:3, 3] = position[:3] 36 | 37 | return translation_mat 38 | 39 | 40 | def warp(image, depthmap, pose, K): 41 | image_shape = K.shape(image) 42 | 43 | num_batch = image_shape[0] 44 | 45 | height = image_shape[1] 46 | 47 | width = image_shape[2] 48 | 49 | channels = image_shape[3] 50 | 51 | 52 | def spatial_transform(input_images, x_offset, wrap_mode='border', name='bilinear_sampler', **kwargs): 53 | def _repeat(x, n_repeats): 54 | rep = K.tile(K.expand_dims(x, 1), [1, n_repeats]) 55 | 56 | return rep 57 | 58 | def _interpolate(im, x, y): 59 | _edge_size = 0 60 | 61 | if _wrap_mode == 'border': 62 | _edge_size = 1 63 | 64 | im = K.spatial_2d_padding(im, padding=((1, 1), (1, 1))) 65 | 66 | x = x + _edge_size 67 | 68 | y = y + _edge_size 69 | 70 | elif _wrap_mode == 'edge': 71 | _edge_size = 0 72 | 73 | else: 74 | return None 75 | 76 | x = K.clip(x, 0.0, K.eval(_width_f) - 1 + 2 * _edge_size) 77 | 78 | x0_f = K.round(x) 79 | 80 | y0_f = K.round(y) 81 | 82 | x1_f = x0_f + 1 83 | 84 | x0 = K.cast(x0_f, 'int32') 85 | 86 | y0 = K.cast(y0_f, 'int32') 87 | 88 | x1 = K.cast(K.minimum(x1_f, K.eval(_width_f) - 1 + 2 * _edge_size), 'int32') 89 | 90 | dim2 = (_width + 2 * _edge_size) 91 | 92 | dim1 = (_width + 2 * _edge_size) * (_height + 2 * _edge_size) 93 | 94 | base = _repeat(K.arange(_num_batch) * dim1, _height * _width) 95 | 96 | base_y0 = base + y0 * dim2 97 | 98 | idx_l = base_y0 + x0 99 | 100 | idx_r = base_y0 + x1 101 | 102 | im_flat = K.reshape(im, K.stack([-1, _num_channels])) 103 | 104 | pix_l = K.gather(im_flat, idx_l) 105 | 106 | pix_r = K.gather(im_flat, idx_r) 107 | 108 | weight_l = K.expand_dims(x1_f - x, 1) 109 | 110 | weight_r = K.expand_dims(x - x0_f, 1) 111 | 112 | return weight_l * pix_l + weight_r * pix_r 113 | 114 | def _transform(input_images, x_offset): 115 | x_t, y_t = np.meshgrid(np.linspace(0.0, K.eval(_width_f) - 1.0, K.eval(_width)), 116 | np.linspace(0.0, K.eval(_height_f) - 1.0, K.eval(_height))) 117 | 118 | x_t = K.variable(x_t) 119 | 120 | y_t = K.variable(y_t) 121 | 122 | x_t_flat = K.reshape(x_t, (1, -1)) 123 | 124 | y_t_flat = K.reshape(y_t, (1, -1)) 125 | 126 | x_t_flat = K.tile(x_t_flat, K.stack([_num_batch, 1])) 127 | 128 | y_t_flat = K.tile(y_t_flat, K.stack([_num_batch, 1])) 129 | 130 | x_t_flat = K.reshape(x_t_flat, [-1]) 131 | 132 | y_t_flat = K.reshape(y_t_flat, [-1]) 133 | 134 | x_t_flat = x_t_flat + K.reshape(x_offset, [-1]) * _width_f 135 | 136 | input_transformed = _interpolate(input_images, x_t_flat, y_t_flat) 137 | 138 | output = K.reshape(input_transformed, K.stack([_num_batch, _height, _width, _num_channels])) 139 | 140 | return output 141 | 142 | _num_batch = K.shape(input_images)[0] 143 | 144 | _height = K.shape(input_images)[1] 145 | 146 | _width = K.shape(input_images)[2] 147 | 148 | _num_channels = K.shape(input_images)[3] 149 | 150 | _height_f = K.cast(_height, 'float32') 151 | 152 | _width_f = K.cast(_width, 'float32') 153 | 154 | _wrap_mode = wrap_mode 155 | 156 | output = _transform(input_images, x_offset) 157 | 158 | return output 159 | -------------------------------------------------------------------------------- /image_loader.py: -------------------------------------------------------------------------------- 1 | from keras.preprocessing.image import ImageDataGenerator 2 | import numpy as np 3 | 4 | 5 | def get_stereo_image_generators(train_folder, test_folder, img_rows=128, img_cols=512, batch_size=16, shuffle=True): 6 | train_image_gen = ImageDataGenerator(rescale=1.0 / 255.0, 7 | # TODO: when network is complete, add image transformations to improve training 8 | # rotation_range=5, 9 | # shear_range=0.01, 10 | # zoom_range=0.01, 11 | # height_shift_range=0.01, 12 | # width_shift_range=0.01 13 | ) 14 | 15 | test_image_gen = ImageDataGenerator(rescale=1.0 / 255.0) 16 | 17 | train_generator_left = train_image_gen.flow_from_directory(train_folder, 18 | target_size=(img_rows, img_cols), 19 | batch_size=batch_size, 20 | seed=10, 21 | shuffle=shuffle, 22 | classes=['left'], 23 | class_mode=None, 24 | follow_links=True) 25 | 26 | train_generator_right = train_image_gen.flow_from_directory(train_folder, 27 | target_size=(img_rows, img_cols), 28 | batch_size=batch_size, 29 | seed=10, 30 | shuffle=shuffle, 31 | classes=['right'], 32 | class_mode=None, 33 | follow_links=True) 34 | 35 | test_generator_left = test_image_gen.flow_from_directory(test_folder, 36 | target_size=(img_rows, img_cols), 37 | batch_size=batch_size, 38 | seed=10, 39 | shuffle=shuffle, 40 | classes=['left'], 41 | class_mode=None, 42 | follow_links=True) 43 | 44 | test_generator_right = test_image_gen.flow_from_directory(test_folder, 45 | target_size=(img_rows, img_cols), 46 | batch_size=batch_size, 47 | seed=10, 48 | shuffle=shuffle, 49 | classes=['right'], 50 | class_mode=None, 51 | follow_links=True) 52 | 53 | def train_generator_func(): 54 | while True: 55 | left_image = train_generator_left.next() 56 | 57 | right_image = train_generator_right.next() 58 | 59 | yield [left_image, right_image], [left_image, 60 | right_image, 61 | np.zeros((left_image.shape[0], 62 | left_image.shape[1], 63 | left_image.shape[2], 64 | 1)), 65 | np.zeros((right_image.shape[0], 66 | right_image.shape[1], 67 | right_image.shape[2], 68 | 1))] 69 | 70 | def test_generator_func(): 71 | while True: 72 | left_image = test_generator_left.next() 73 | 74 | right_image = test_generator_right.next() 75 | 76 | yield [left_image, right_image], [left_image, 77 | right_image, 78 | np.zeros((left_image.shape[0], 79 | left_image.shape[1], 80 | left_image.shape[2], 81 | 1)), 82 | np.zeros((right_image.shape[0], 83 | right_image.shape[1], 84 | right_image.shape[2], 85 | 1))] 86 | 87 | train_generator = train_generator_func() 88 | 89 | test_generator = test_generator_func() 90 | 91 | train_length = len(train_generator_left.filenames) 92 | 93 | test_length = len(test_generator_left.filenames) 94 | 95 | return train_generator, test_generator, train_length, test_length 96 | -------------------------------------------------------------------------------- /undeepvo_model.py: -------------------------------------------------------------------------------- 1 | from keras.optimizers import Adam 2 | from keras.models import Model 3 | from keras.layers import Conv2D, Conv2DTranspose, concatenate, Cropping2D, Dense, Flatten 4 | from layers import depth_to_disparity, disparity_difference, expand_dims, spatial_transformation 5 | from losses import photometric_consistency_loss 6 | 7 | 8 | class UnDeepVOModel(object): 9 | def __init__(self, left_input_k_1, left_input_k, right_input_k, mode='train', lr=0.1, alpha_image_loss=0.85, 10 | img_rows=128, img_cols=512): 11 | # NOTE: disparity calculation 12 | # depth = baseline * focal / disparity 13 | # depth = 0.54 * 721 / (1242 * disp) 14 | 15 | self.img_rows = img_rows 16 | 17 | self.img_cols = img_cols 18 | 19 | self.baseline = 0.54 # meters 20 | 21 | self.focal_length = 718.856 / 1241 # image width = 1241 (note: must scale using this number) 22 | 23 | self.left = left_input_k 24 | 25 | self.right = right_input_k 26 | 27 | self.left_next = left_input_k_1 28 | 29 | self.left_est = None 30 | 31 | self.right_est = None 32 | 33 | self.depthmap = None 34 | 35 | self.depthmap_left = None 36 | 37 | self.depthmap_right = None 38 | 39 | self.disparity_left = None 40 | 41 | self.disparity_right = None 42 | 43 | self.disparity_diff_left = None 44 | 45 | self.disparity_diff_right = None 46 | 47 | self.right_to_left_disparity = None 48 | 49 | self.left_to_right_disparity = None 50 | 51 | self.model = None 52 | 53 | self.depthmap = None 54 | 55 | self.mode = mode 56 | 57 | self.lr = lr 58 | 59 | self.alpha_image_loss = alpha_image_loss 60 | 61 | self.build_depth_architecture() 62 | 63 | self.build_pose_architecture() 64 | 65 | self.build_outputs() 66 | 67 | self.build_model() 68 | 69 | if self.mode == 'test': 70 | return 71 | 72 | @staticmethod 73 | def conv(input, channels, kernel_size, strides, activation='elu'): 74 | 75 | return Conv2D(channels, kernel_size=kernel_size, strides=strides, padding='same', activation=activation)(input) 76 | 77 | @staticmethod 78 | def deconv(input, channels, kernel_size, scale): 79 | 80 | return Conv2DTranspose(channels, kernel_size=kernel_size, strides=scale, padding='same')(input) 81 | 82 | def conv_block(self, input, channels, kernel_size): 83 | conv1 = self.conv(input, channels, kernel_size, 1) 84 | 85 | conv2 = self.conv(conv1, channels, kernel_size, 2) 86 | 87 | return conv2 88 | 89 | def deconv_block(self, input, channels, kernel_size, skip): 90 | deconv1 = self.deconv(input, channels, kernel_size, 2) 91 | 92 | if skip is not None: 93 | concat1 = concatenate([deconv1, skip], 3) 94 | else: 95 | concat1 = deconv1 96 | 97 | iconv1 = self.conv(concat1, channels, kernel_size, 1) 98 | 99 | return iconv1 100 | 101 | def get_depth(self, input): 102 | return self.conv(input, 2, 3, 1, 'sigmoid') 103 | 104 | def build_pose_architecture(self): 105 | input = concatenate([self.left, self.left_next], axis=3) 106 | 107 | conv1 = self.conv(input, 16, 7, 1, activation='relu') 108 | 109 | conv2 = self.conv(conv1, 32, 5, 1, activation='relu') 110 | 111 | conv3 = self.conv(conv2, 64, 3, 1, activation='relu') 112 | 113 | conv4 = self.conv(conv3, 128, 3, 1, activation='relu') 114 | 115 | conv5 = self.conv(conv4, 256, 3, 1, activation='relu') 116 | 117 | conv6 = self.conv(conv5, 512, 3, 1, activation='relu') 118 | 119 | flat1 = Flatten()(conv6) 120 | 121 | # translation 122 | 123 | fc1_tran = Dense(512, input_shape=(8192,))(flat1) 124 | 125 | fc2_tran = Dense(512, input_shape=(512,))(fc1_tran) 126 | 127 | fc3_tran = Dense(3, input_shape=(512,))(fc2_tran) 128 | 129 | self.translation = fc3_tran 130 | 131 | # rotation 132 | 133 | fc1_rot = Dense(512, input_shape=(512,))(flat1) 134 | 135 | fc2_rot = Dense(512, input_shape=(512,))(fc1_rot) 136 | 137 | fc3_rot = Dense(3, input_shape=(512,))(fc2_rot) 138 | 139 | self.rotation = fc3_rot 140 | 141 | def build_depth_architecture(self): 142 | # encoder 143 | conv1 = self.conv_block(self.left, 32, 7) 144 | 145 | conv2 = self.conv_block(conv1, 64, 5) 146 | 147 | conv3 = self.conv_block(conv2, 128, 3) 148 | 149 | conv4 = self.conv_block(conv3, 256, 3) 150 | 151 | conv5 = self.conv_block(conv4, 512, 3) 152 | 153 | conv6 = self.conv_block(conv5, 512, 3) 154 | 155 | conv7 = self.conv_block(conv6, 512, 3) 156 | 157 | # skips 158 | skip1 = conv1 159 | 160 | skip2 = conv2 161 | 162 | skip3 = conv3 163 | 164 | skip4 = conv4 165 | 166 | skip5 = conv5 167 | 168 | skip6 = conv6 169 | 170 | deconv7 = self.deconv_block(conv7, 512, 3, skip6) 171 | 172 | deconv6 = self.deconv_block(deconv7, 512, 3, skip5) 173 | 174 | deconv5 = self.deconv_block(deconv6, 256, 3, skip4) 175 | 176 | deconv4 = self.deconv_block(deconv5, 128, 3, skip3) 177 | 178 | deconv3 = self.deconv_block(deconv4, 64, 3, skip2) 179 | 180 | deconv2 = self.deconv_block(deconv3, 32, 3, skip1) 181 | 182 | deconv1 = self.deconv_block(deconv2, 16, 3, None) 183 | 184 | self.depthmap = self.get_depth(deconv1) 185 | 186 | def build_outputs(self): 187 | 188 | # store depthmaps 189 | 190 | self.depthmap_left = expand_dims(self.depthmap, 0, 'depth_map_exp_left') 191 | 192 | self.depthmap_right = expand_dims(self.depthmap, 1, 'depth_map_exp_right') 193 | 194 | if self.mode == 'test': 195 | return 196 | 197 | # generate disparities 198 | 199 | self.disparity_left = depth_to_disparity(self.depthmap_left, self.baseline, self.focal_length, 1, 200 | 'disparity_left') 201 | 202 | self.disparity_right = depth_to_disparity(self.depthmap_right, self.baseline, self.focal_length, 1, 203 | 'disparity_right') 204 | 205 | # generate estimates of left and right images 206 | 207 | self.left_est = spatial_transformation([self.right, self.disparity_right], -1, 'left_est') 208 | 209 | self.right_est = spatial_transformation([self.left, self.disparity_left], 1, 'right_est') 210 | 211 | # generate left - right consistency 212 | 213 | self.right_to_left_disparity = spatial_transformation([self.disparity_right, self.disparity_right], -1, 214 | 'r2l_disparity') 215 | 216 | self.left_to_right_disparity = spatial_transformation([self.disparity_left, self.disparity_left], 1, 217 | 'l2r_disparity') 218 | 219 | self.disparity_diff_left = disparity_difference([self.disparity_left, self.right_to_left_disparity], 220 | 'disp_diff_left') 221 | 222 | self.disparity_diff_right = disparity_difference([self.disparity_right, self.left_to_right_disparity], 223 | 'disp_diff_right') 224 | 225 | def build_model(self): 226 | self.model = Model(inputs=[self.left_next, self.left, self.right], outputs=[self.left_est, 227 | self.right_est, 228 | self.disparity_diff_left, 229 | self.disparity_diff_right, 230 | self.translation, 231 | self.rotation]) 232 | self.model.compile(loss=[photometric_consistency_loss(self.alpha_image_loss), 233 | photometric_consistency_loss(self.alpha_image_loss), 234 | 'mean_absolute_error', 235 | 'mean_absolute_error', 236 | 'mean_absolute_error', 237 | 'mean_absolute_error'], 238 | optimizer=Adam(lr=self.lr), 239 | # metrics=['accuracy'] 240 | ) 241 | --------------------------------------------------------------------------------