├── log
    └── events.out.tfevents.1521736042.Miguels-MacBook-Pro.local
├── README.md
├── util_test.py
├── generators_test.py
├── losses_test.py
├── losses.py
├── layers_test.py
├── LICENSE
├── layers.py
├── .gitignore
├── create_data_sym_links.py
├── autoencoder_image_loader.py
├── autoencoder_model.py
├── autoencoder_train.py
├── undeepvo_train.py
├── util.py
├── image_loader.py
└── undeepvo_model.py


/log/events.out.tfevents.1521736042.Miguels-MacBook-Pro.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maj-personal-repos/UnDeepVO/HEAD/log/events.out.tfevents.1521736042.Miguels-MacBook-Pro.local


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # UnDeepVO
2 | UnDeepVO - Implementation of Monocular Visual Odometry through Unsupervised Deep Learning
3 | 
4 | ## Work In Progress
5 | The implementation is not working yet. Once it is complete, the README will be updated with a full description and usage directions.
6 | 


--------------------------------------------------------------------------------
/util_test.py:
--------------------------------------------------------------------------------
 1 | import pykitti
 2 | from math import pi
 3 | from util import euler_to_rotation, position_to_translation
 4 | import numpy as np
 5 | 
 6 | dataset = pykitti.odometry('data/dataset', '01')
 7 | 
 8 | 
 9 | rotm = euler_to_rotation([0, 0, pi / 4])
10 | 
11 | transm = position_to_translation([1, 1, 0])
12 | 
13 | K = dataset.calib.K_cam2
14 | 
15 | K


--------------------------------------------------------------------------------
/generators_test.py:
--------------------------------------------------------------------------------
 1 | from image_loader import get_stereo_image_generators
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | image_generator = get_stereo_image_generators('data/train', 'data/test', batch_size=1, shuffle=False)
 5 | 
 6 | img = image_generator[0].__next__()
 7 | plt.imshow(img[0][0][0, :, :, :])
 8 | plt.show()
 9 | plt.imshow(img[0][1][0, :, :, :])
10 | plt.show()
11 | 
12 | img = image_generator[0].__next__()
13 | plt.imshow(img[0][0][0, :, :, :])
14 | plt.show()
15 | 
16 | img = image_generator[0].__next__()
17 | plt.imshow(img[0][0][0, :, :, :])
18 | plt.show()
19 | 
20 | 


--------------------------------------------------------------------------------
/losses_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras import backend as K
 3 | from keras.losses import mean_absolute_error
 4 | 
 5 | from losses import ssim, photometric_consistency_loss
 6 | 
 7 | x = np.ones((1, 10, 10, 1))
 8 | y = np.ones((1, 10, 10, 1))
 9 | 
10 | x_img1 = K.variable(x)
11 | y_img1 = K.variable(y)
12 | 
13 | ssim1 = ssim(x_img1, y_img1)
14 | 
15 | assert K.eval(ssim1).all() == np.zeros((1, 10, 10)).all()
16 | 
17 | x_img2 = K.variable(255*x)
18 | y_img2 = K.variable(-255*y)
19 | 
20 | ssim2 = ssim(x_img2, y_img2)
21 | 
22 | assert K.eval(ssim2).all() == np.ones((1, 10, 10)).all()
23 | 
24 | pcl = photometric_consistency_loss(x_img1, y_img1)
25 | 
26 | assert K.eval(pcl).all() == np.zeros((1, 10, 10)).all()


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
 1 | from keras.losses import mean_absolute_error
 2 | import keras.backend as K
 3 | 
 4 | 
 5 | def ssim(x, y):
 6 |     c1 = 0.01 ** 2
 7 | 
 8 |     c2 = 0.03 ** 2
 9 | 
10 |     mu_x = K.mean(x, axis=-1)
11 | 
12 |     mu_y = K.mean(y, axis=-1)
13 | 
14 |     sigma_x = K.mean(x ** 2, axis=-1) - mu_x ** 2
15 | 
16 |     sigma_y = K.mean(y ** 2, axis=-1) - mu_y ** 2
17 | 
18 |     sigma_xy = K.mean(x * y, axis=-1) - mu_x * mu_y
19 | 
20 |     ssim_n = (2 * mu_x * mu_y + c1) * (2 * sigma_xy + c2)
21 | 
22 |     ssim_d = (mu_x ** 2 + mu_y ** 2 + c1) * (sigma_x + sigma_y + c2)
23 | 
24 |     ssim_out = ssim_n / ssim_d
25 | 
26 |     return K.clip((1 - ssim_out) / 2, 0, 1)
27 | 
28 | 
29 | def photometric_consistency_loss(alpha):
30 |     def loss(y_true, y_pred):
31 |         return alpha * ssim(y_true, y_pred) + (1 - alpha) * mean_absolute_error(y_true, y_pred)
32 | 
33 |     return loss
34 | 


--------------------------------------------------------------------------------
/layers_test.py:
--------------------------------------------------------------------------------
 1 | from layers import spatial_transformation
 2 | import matplotlib.image as mpimg
 3 | import numpy as np
 4 | from keras import backend as K
 5 | from image_loader import get_stereo_image_generators
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | img_rows, img_cols = 128, 512
 9 | 
10 | train_gen, test_gen, train_samples, test_samples = get_stereo_image_generators('data/train/', 'data/test', batch_size=1, shuffle=True)
11 | img = train_gen.__next__()
12 | 
13 | # spatial transformation lambda layer test
14 | 
15 | left_image = img[0][0]
16 | disparity = 0.1 * np.ones((1, img_rows, img_cols, 1))
17 | 
18 | disparity = K.variable(disparity)
19 | left_image = K.variable(left_image)
20 | 
21 | right_image = spatial_transformation([left_image, disparity], 1, '')
22 | 
23 | im_right = K.eval(right_image)
24 | plt.imshow(im_right[0])
25 | plt.show()
26 | 
27 | # TODO: add tests for remaining layers: expand_dims, depth_to_disparity, disparity_difference
28 | 
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Miguel Alonso Jr
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/layers.py:
--------------------------------------------------------------------------------
 1 | from keras.layers import Lambda
 2 | from util import spatial_transform
 3 | import keras.backend as K
 4 | 
 5 | 
 6 | def spatial_transformation(inputs, sign, name):
 7 |     def output_shape(input_shape):
 8 | 
 9 |         return input_shape[0]
10 | 
11 |     return Lambda(lambda x: spatial_transform(x[0], sign*x[1]), output_shape=output_shape, name=name)(inputs)
12 | 
13 | 
14 | def expand_dims(inputs, dimension, name):
15 |     def output_shape(input_shape):
16 |         shape = list(input_shape)
17 | 
18 |         shape[3] = 1
19 | 
20 |         return tuple(shape)
21 | 
22 |     return Lambda(lambda x: K.expand_dims(inputs[:, :, :, dimension], 3), output_shape=output_shape, name=name)(inputs)
23 | 
24 | 
25 | def depth_to_disparity(inputs, baseline, focal_length, width, name):
26 |     def output_shape(input_shape):
27 |         return input_shape
28 | 
29 |     return Lambda(lambda x: width * baseline * focal_length / x, output_shape=output_shape, name=name)(inputs)
30 | 
31 | 
32 | def disparity_difference(disparities, name):
33 |     def output_shape(input_shape):
34 |         return input_shape
35 | 
36 |     return Lambda(lambda x: x[0] - x[1], output_shape=output_shape, name=name)(disparities)
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #data
  2 | data
  3 | 
  4 | #scratch
  5 | scratch
  6 | 
  7 | #visualization
  8 | visualization
  9 | 
 10 | #models
 11 | models
 12 | 
 13 | #IDEs
 14 | .idea
 15 | 
 16 | # Byte-compiled / optimized / DLL files
 17 | __pycache__/
 18 | *.py[cod]
 19 | *$py.class
 20 | 
 21 | # C extensions
 22 | *.so
 23 | 
 24 | # Distribution / packaging
 25 | .Python
 26 | env/
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | *.egg-info/
 40 | .installed.cfg
 41 | *.egg
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | *.spec
 48 | 
 49 | # Installer logs
 50 | pip-log.txt
 51 | pip-delete-this-directory.txt
 52 | 
 53 | # Unit test / coverage reports
 54 | htmlcov/
 55 | .tox/
 56 | .coverage
 57 | .coverage.*
 58 | .cache
 59 | nosetests.xml
 60 | coverage.xml
 61 | *.cover
 62 | .hypothesis/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # celery beat schedule file
 92 | celerybeat-schedule
 93 | 
 94 | # SageMath parsed files
 95 | *.sage.py
 96 | 
 97 | # dotenv
 98 | .env
 99 | 
100 | # virtualenv
101 | .venv
102 | venv/
103 | ENV/
104 | 
105 | # Spyder project settings
106 | .spyderproject
107 | .spyproject
108 | 
109 | # Rope project settings
110 | .ropeproject
111 | 
112 | # mkdocs documentation
113 | /site
114 | 
115 | # mypy
116 | .mypy_cache/
117 | 


--------------------------------------------------------------------------------
/create_data_sym_links.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | data_dir = 'data'
 3 | left_image_dir = 'image_2'
 4 | right_image_dir = 'image_3'
 5 | 
 6 | # train_sequences = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09',
 7 | #                    '11', '12', '13', '14', '15', '16', '17', '18', '19', '20']
 8 | 
 9 | train_sequences = ['05', '12']
10 | 
11 | test_sequences = ['10']
12 | 
13 | train_dir = os.path.join(data_dir, 'train')
14 | 
15 | test_dir = os.path.join(data_dir, 'test')
16 | 
17 | if not os.path.exists(train_dir):
18 |     os.makedirs(train_dir)
19 | 
20 | if not os.path.exists(test_dir):
21 |     os.makedirs(test_dir)
22 | 
23 | if not os.path.exists(os.path.join(train_dir, 'left')):
24 |     os.makedirs(os.path.join(train_dir, 'left'))
25 | 
26 | if not os.path.exists(os.path.join(train_dir, 'right')):
27 |     os.makedirs(os.path.join(train_dir, 'right'))
28 | 
29 | if not os.path.exists(os.path.join(test_dir, 'left')):
30 |     os.makedirs(os.path.join(test_dir, 'left'))
31 | 
32 | if not os.path.exists(os.path.join(test_dir, 'right')):
33 |     os.makedirs(os.path.join(test_dir, 'right'))
34 | 
35 | root_dir = os.getcwd()
36 | 
37 | for seq in train_sequences:
38 |     l_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, left_image_dir)
39 | 
40 |     r_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, right_image_dir)
41 | 
42 |     for (_, _, left_filenames) in os.walk(l_dir):
43 |         break
44 | 
45 |     for (_, _, right_filenames) in os.walk(r_dir):
46 |         break
47 | 
48 |     for file in left_filenames:
49 |         src_path = os.path.join(root_dir, l_dir, file)
50 | 
51 |         dst_path = os.path.join(train_dir, 'left', seq+file)
52 | 
53 |         os.symlink(src_path, dst_path)
54 | 
55 |     for file in right_filenames:
56 |         src_path = os.path.join(root_dir, r_dir, file)
57 | 
58 |         dst_path = os.path.join(train_dir, 'right', seq+file)
59 | 
60 |         os.symlink(src_path, dst_path)
61 | 
62 | for seq in test_sequences:
63 |     l_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, left_image_dir)
64 | 
65 |     r_dir = os.path.join(data_dir, 'dataset', 'sequences', seq, right_image_dir)
66 | 
67 |     for (_, _, left_filenames) in os.walk(l_dir):
68 |         break
69 | 
70 |     for (_, _, right_filenames) in os.walk(r_dir):
71 |         break
72 | 
73 |     for file in left_filenames:
74 |         src_path = os.path.join(root_dir, l_dir, file)
75 | 
76 |         dst_path = os.path.join(test_dir, 'left', seq+file)
77 | 
78 |         os.symlink(src_path, dst_path)
79 | 
80 |     for file in right_filenames:
81 |         src_path = os.path.join(root_dir, r_dir, file)
82 | 
83 |         dst_path = os.path.join(test_dir, 'right', seq+file)
84 | 
85 |         os.symlink(src_path, dst_path)
86 | 


--------------------------------------------------------------------------------
/autoencoder_image_loader.py:
--------------------------------------------------------------------------------
 1 | from keras.preprocessing.image import ImageDataGenerator
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_stereo_image_generators(train_folder, test_folder, img_rows=128, img_cols=512, batch_size=16, shuffle=True):
 6 |     train_image_gen = ImageDataGenerator(rescale=1.0 / 255.0,
 7 |                                          # TODO: when network is complete, add image transformations to improve training
 8 |                                          # rotation_range=5,
 9 |                                          # shear_range=0.01,
10 |                                          # zoom_range=0.01,
11 |                                          # height_shift_range=0.01,
12 |                                          # width_shift_range=0.01
13 |                                          )
14 | 
15 |     test_image_gen = ImageDataGenerator(rescale=1.0 / 255.0)
16 | 
17 |     train_generator_left = train_image_gen.flow_from_directory(train_folder,
18 |                                                                target_size=(img_rows, img_cols),
19 |                                                                batch_size=batch_size,
20 |                                                                seed=10,
21 |                                                                shuffle=shuffle,
22 |                                                                classes=['left'],
23 |                                                                class_mode=None,
24 |                                                                follow_links=True)
25 | 
26 |     test_generator_left = test_image_gen.flow_from_directory(test_folder,
27 |                                                              target_size=(img_rows, img_cols),
28 |                                                              batch_size=batch_size,
29 |                                                              seed=10,
30 |                                                              shuffle=shuffle,
31 |                                                              classes=['left'],
32 |                                                              class_mode=None,
33 |                                                              follow_links=True)
34 | 
35 |     def train_generator_func():
36 |         while True:
37 |             left_image = train_generator_left.next()
38 | 
39 |             yield [left_image], [left_image]
40 | 
41 |     def test_generator_func():
42 |         while True:
43 |             left_image = test_generator_left.next()
44 | 
45 |             yield [left_image], [left_image]
46 | 
47 |     train_generator = train_generator_func()
48 | 
49 |     test_generator = test_generator_func()
50 | 
51 |     train_length = len(train_generator_left.filenames)
52 | 
53 |     test_length = len(test_generator_left.filenames)
54 | 
55 |     return train_generator, test_generator, train_length, test_length
56 | 


--------------------------------------------------------------------------------
/autoencoder_model.py:
--------------------------------------------------------------------------------
 1 | from keras import Model
 2 | from keras.layers import Conv2D, Conv2DTranspose, concatenate
 3 | from keras.optimizers import Adam
 4 | 
 5 | 
 6 | class AutoEncoderModel(object):
 7 |     def __init__(self, left_input, right_input, lr=1e-4, rows=128, cols=512):
 8 |         self.rows = rows
 9 |         self.cols = cols
10 |         self.left = left_input
11 |         self.right = right_input
12 |         self.left_est = None
13 |         self.right_est = None
14 |         self.output = None
15 |         self.model = None
16 |         self.lr = lr
17 |         self.build_architecture()
18 |         self.build_outputs()
19 |         self.build_model()
20 | 
21 |     @staticmethod
22 |     def conv(input, channels, kernel_size, strides, activation='elu'):
23 | 
24 |         return Conv2D(channels, kernel_size=kernel_size, strides=strides, padding='same', activation=activation)(input)
25 | 
26 |     @staticmethod
27 |     def deconv(input, channels, kernel_size, scale):
28 | 
29 |         return Conv2DTranspose(channels, kernel_size=kernel_size, strides=scale, padding='same')(input)
30 | 
31 |     def conv_block(self, input, channels, kernel_size):
32 |         conv1 = self.conv(input, channels, kernel_size, 1)
33 | 
34 |         conv2 = self.conv(conv1, channels, kernel_size, 2)
35 | 
36 |         return conv2
37 | 
38 |     def deconv_block(self, input, channels, kernel_size, skip):
39 |         deconv1 = self.deconv(input, channels, kernel_size, 2)
40 | 
41 |         if skip is not None:
42 |             concat1 = concatenate([deconv1, skip], 3)
43 |         else:
44 |             concat1 = deconv1
45 | 
46 |         iconv1 = self.conv(concat1, channels, kernel_size, 1)
47 | 
48 |         return iconv1
49 | 
50 |     def get_output(self, deconv):
51 |         return self.conv(deconv, 3, 3, 1, 'sigmoid')
52 | 
53 |     def build_architecture(self):
54 |         # encoder
55 |         conv1 = self.conv_block(self.left, 32, 7)
56 |         conv2 = self.conv_block(conv1, 64, 5)
57 |         conv3 = self.conv_block(conv2, 128, 3)
58 |         conv4 = self.conv_block(conv3, 256, 3)
59 |         conv5 = self.conv_block(conv4, 512, 3)
60 |         conv6 = self.conv_block(conv5, 512, 3)
61 |         conv7 = self.conv_block(conv6, 512, 3)
62 | 
63 |         # skips
64 |         skip1 = conv1
65 |         skip2 = conv2
66 |         skip3 = conv3
67 |         skip4 = conv4
68 |         skip5 = conv5
69 |         skip6 = conv6
70 | 
71 |         deconv7 = self.deconv_block(conv7, 512, 3, skip6)
72 |         deconv6 = self.deconv_block(deconv7, 512, 3, skip5)
73 |         deconv5 = self.deconv_block(deconv6, 256, 3, skip4)
74 |         deconv4 = self.deconv_block(deconv5, 128, 3, skip3)
75 |         deconv3 = self.deconv_block(deconv4, 64, 3, skip2)
76 |         deconv2 = self.deconv_block(deconv3, 32, 3, skip1)
77 |         deconv1 = self.deconv_block(deconv2, 16, 3, None)
78 | 
79 |         self.output = self.get_output(deconv1)
80 | 
81 |     def build_outputs(self):
82 |         self.left_est = self.output
83 |         # self.right_est = expand_dims(self.output, 1, 'right_estimate')
84 | 
85 |     def build_model(self):
86 |         self.model = Model(inputs=[self.left], outputs=[self.left_est])
87 |         self.model.compile(loss=['mae'],
88 |                            optimizer='adadelta',
89 |                            metrics=['mse'])
90 | 


--------------------------------------------------------------------------------
/autoencoder_train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from keras import Input
 4 | from keras.callbacks import TensorBoard, ModelCheckpoint, Callback
 5 | from keras.utils import plot_model
 6 | from autoencoder_model import AutoEncoderModel
 7 | from autoencoder_image_loader import get_stereo_image_generators
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | 
11 | parser = argparse.ArgumentParser(description='Stereo Autoencoder')
12 | parser.add_argument('--model_name', type=str, help='model name', default='ae')
13 | parser.add_argument('--models_dir', type=str, help='models directory', default='models')
14 | parser.add_argument('--data_path', type=str, help='path to the data', required=True)
15 | parser.add_argument('--input_height', type=int, help='input height', default=128)
16 | parser.add_argument('--input_width', type=int, help='input width', default=512)
17 | parser.add_argument('--batch_size', type=int, help='batch size', default=8)
18 | parser.add_argument('--num_epochs', type=int, help='number of epochs', default=100)
19 | parser.add_argument('--learning_rate', type=float, help='initial learning rate', default=1e-2)
20 | parser.add_argument('--log_directory', type=str, help='directory to save checkpoints and summaries', default='log')
21 | args = parser.parse_args()
22 | 
23 | 
24 | class VisualizeOutput(Callback):
25 |     def __init__(self, input_image):
26 |         super().__init__()
27 |         self.input_image = input_image
28 | 
29 |     def on_epoch_begin(self, epoch, logs=None):
30 |         self.visualize_input_output()
31 | 
32 |     def visualize_input_output(self):
33 |         image = np.concatenate((self.input_image, self.model.predict(self.input_image)), axis=1)[0, :, :, :]
34 |         plt.clf()
35 |         plt.imshow(image)
36 |         plt.pause(1)
37 | 
38 | 
39 | def main(args):
40 |     train_gen, test_gen, train_samples, test_samples = get_stereo_image_generators(args.data_path + '/train',
41 |                                                                                    args.data_path + '/test',
42 |                                                                                    img_rows=args.input_height,
43 |                                                                                    img_cols=args.input_width,
44 |                                                                                    batch_size=args.batch_size,
45 |                                                                                    shuffle=False)
46 |     image_generator = get_stereo_image_generators('data/train', 'data/test', batch_size=1, shuffle=False)
47 | 
48 |     input_image = image_generator[0].__next__()[0][0]
49 | 
50 |     input_shape = (args.input_height, args.input_width, 3)
51 |     left_input = Input(input_shape)
52 |     right_input = Input(input_shape)
53 | 
54 |     ae = AutoEncoderModel(left_input, right_input, args.learning_rate)
55 |     ae.model.summary()
56 |     plot_model(ae.model, show_shapes=True, to_file='scratch/ae.png')
57 |     ae.model.fit_generator(train_gen,
58 |                            steps_per_epoch=train_samples // args.batch_size,
59 |                            # validation_data=test_gen,
60 |                            # validation_steps=test_samples // args.batch_size,
61 |                            epochs=args.num_epochs,
62 |                            verbose=1,
63 |                            callbacks=[VisualizeOutput(input_image),
64 |                                       TensorBoard(log_dir=args.log_directory,
65 |                                                   batch_size=args.batch_size,
66 |                                                   write_graph=False),
67 |                                       ModelCheckpoint(os.path.join(args.models_dir, args.model_name + '.h5'),
68 |                                                       monitor='loss',
69 |                                                       verbose=1,
70 |                                                       save_best_only=True)])
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     main(args)
75 | 


--------------------------------------------------------------------------------
/undeepvo_train.py:
--------------------------------------------------------------------------------
 1 | # uncomment this section to train on the CPU
 2 | # import os
 3 | # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
 4 | # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 5 | from keras.utils import plot_model
 6 | from keras.layers import Input
 7 | from image_loader import get_stereo_image_generators
 8 | from undeepvo_model import UnDeepVOModel
 9 | import argparse
10 | 
11 | parser = argparse.ArgumentParser(description='UnDeepVo Keras implementation.')
12 | 
13 | parser.add_argument('--mode', type=str, help='train or test', default='train')
14 | 
15 | parser.add_argument('--models_dir', type=str, help='models directory', default='models')
16 | 
17 | parser.add_argument('--model_name', type=str, help='model name', default='undeepvo')
18 | 
19 | parser.add_argument('--data_path', type=str, help='path to the data', required=True)
20 | 
21 | parser.add_argument('--input_height', type=int, help='input height', default=128)
22 | 
23 | parser.add_argument('--input_width', type=int, help='input width', default=512)
24 | 
25 | parser.add_argument('--batch_size', type=int, help='batch size', default=2)
26 | 
27 | parser.add_argument('--num_epochs', type=int, help='number of epochs', default=50)
28 | 
29 | parser.add_argument('--learning_rate', type=float, help='initial learning rate', default=0.1)
30 | 
31 | parser.add_argument('--lr_loss_weight', type=float, help='left-right consistency weight', default=1.0)
32 | 
33 | parser.add_argument('--alpha_image_loss', type=float, help='weight between SSIM and L1 in the image loss', default=0.85)
34 | 
35 | parser.add_argument('--log_directory', type=str, help='directory to save checkpoints and summaries', default='log')
36 | 
37 | parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', default='')
38 | 
39 | args = parser.parse_args()
40 | 
41 | 
42 | def main(args):
43 |     batch_size = args.batch_size
44 | 
45 |     epochs = args.num_epochs
46 | 
47 |     img_rows, img_cols = args.input_height, args.input_width
48 | 
49 |     models_dir = args.models_dir
50 | 
51 |     model_name = args.model_name
52 | 
53 |     data_path = args.data_path
54 | 
55 |     train_gen, test_gen, train_samples, test_samples = get_stereo_image_generators(data_path + '/train',
56 |                                                                                    data_path + '/test',
57 |                                                                                    img_rows=img_rows,
58 |                                                                                    img_cols=img_cols,
59 |                                                                                    batch_size=batch_size,
60 |                                                                                    shuffle=True)
61 | 
62 |     # channels last by default
63 |     input_shape = (img_rows, img_cols, 3)
64 | 
65 |     left_input = Input(input_shape)
66 | 
67 |     left_input_next = Input(input_shape)
68 | 
69 |     right_input = Input(input_shape)
70 | 
71 |     udvo = UnDeepVOModel(left_input_next, left_input, right_input, args.learning_rate)
72 | 
73 |     # for epoch in range(epochs):
74 |     #     # TODO: need to save model after each epoch
75 |     #     # model_path = os.path.join(models_dir, model_name + '_epoch_%d' % epoch)
76 |     #
77 |     #     udvo.model.fit_generator(train_gen,
78 |     #                              steps_per_epoch=train_samples // batch_size,
79 |     #                              epochs=epochs,
80 |     #                              validation_data=test_gen,
81 |     #                              validation_steps=test_samples // batch_size,
82 |     #                              verbose=1,
83 |     #                              # callbacks=[TensorBoard(log_dir=args.log_directory,
84 |     #                              #                        histogram_freq=True,
85 |     #                              #                        batch_size=batch_size,
86 |     #                              #                        write_graph=False,
87 |     #                              #                        write_grads=True)]
88 |     #                              )
89 | 
90 |     udvo.model.summary()
91 | 
92 |     plot_model(udvo.model, show_shapes=True, to_file='scratch/model.png')
93 | 
94 | 
95 | if __name__ == '__main__':
96 |     main(args)
97 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import numpy as np
  3 | from math import sin, cos
  4 | from keras import backend as K
  5 | 
  6 | 
  7 | def euler_to_rotation(theta):
  8 |     R_x = np.array([[1, 0, 0],
  9 |                     [0, cos(theta[0]), -sin(theta[0])],
 10 |                     [0, sin(theta[0]), cos(theta[0])]
 11 |                     ])
 12 | 
 13 |     R_y = np.array([[cos(theta[1]), 0, sin(theta[1])],
 14 |                     [0, 1, 0],
 15 |                     [-sin(theta[1]), 0, cos(theta[1])]
 16 |                     ])
 17 | 
 18 |     R_z = np.array([[cos(theta[2]), -sin(theta[2]), 0],
 19 |                     [sin(theta[2]), cos(theta[2]), 0],
 20 |                     [0, 0, 1]
 21 |                     ])
 22 | 
 23 |     R = np.dot(R_z, np.dot(R_y, R_x))
 24 | 
 25 |     output = np.identity(4)
 26 | 
 27 |     output[:3, :3] = R
 28 | 
 29 |     return output
 30 | 
 31 | 
 32 | def position_to_translation(position):
 33 |     translation_mat = np.identity(4)
 34 | 
 35 |     translation_mat[:3, 3] = position[:3]
 36 | 
 37 |     return translation_mat
 38 | 
 39 | 
 40 | def warp(image, depthmap, pose, K):
 41 |     image_shape = K.shape(image)
 42 | 
 43 |     num_batch = image_shape[0]
 44 | 
 45 |     height = image_shape[1]
 46 | 
 47 |     width = image_shape[2]
 48 | 
 49 |     channels = image_shape[3]
 50 | 
 51 | 
 52 | def spatial_transform(input_images, x_offset, wrap_mode='border', name='bilinear_sampler', **kwargs):
 53 |     def _repeat(x, n_repeats):
 54 |         rep = K.tile(K.expand_dims(x, 1), [1, n_repeats])
 55 | 
 56 |         return rep
 57 | 
 58 |     def _interpolate(im, x, y):
 59 |         _edge_size = 0
 60 | 
 61 |         if _wrap_mode == 'border':
 62 |             _edge_size = 1
 63 | 
 64 |             im = K.spatial_2d_padding(im, padding=((1, 1), (1, 1)))
 65 | 
 66 |             x = x + _edge_size
 67 | 
 68 |             y = y + _edge_size
 69 | 
 70 |         elif _wrap_mode == 'edge':
 71 |             _edge_size = 0
 72 | 
 73 |         else:
 74 |             return None
 75 | 
 76 |         x = K.clip(x, 0.0, K.eval(_width_f) - 1 + 2 * _edge_size)
 77 | 
 78 |         x0_f = K.round(x)
 79 | 
 80 |         y0_f = K.round(y)
 81 | 
 82 |         x1_f = x0_f + 1
 83 | 
 84 |         x0 = K.cast(x0_f, 'int32')
 85 | 
 86 |         y0 = K.cast(y0_f, 'int32')
 87 | 
 88 |         x1 = K.cast(K.minimum(x1_f, K.eval(_width_f) - 1 + 2 * _edge_size), 'int32')
 89 | 
 90 |         dim2 = (_width + 2 * _edge_size)
 91 | 
 92 |         dim1 = (_width + 2 * _edge_size) * (_height + 2 * _edge_size)
 93 | 
 94 |         base = _repeat(K.arange(_num_batch) * dim1, _height * _width)
 95 | 
 96 |         base_y0 = base + y0 * dim2
 97 | 
 98 |         idx_l = base_y0 + x0
 99 | 
100 |         idx_r = base_y0 + x1
101 | 
102 |         im_flat = K.reshape(im, K.stack([-1, _num_channels]))
103 | 
104 |         pix_l = K.gather(im_flat, idx_l)
105 | 
106 |         pix_r = K.gather(im_flat, idx_r)
107 | 
108 |         weight_l = K.expand_dims(x1_f - x, 1)
109 | 
110 |         weight_r = K.expand_dims(x - x0_f, 1)
111 | 
112 |         return weight_l * pix_l + weight_r * pix_r
113 | 
114 |     def _transform(input_images, x_offset):
115 |         x_t, y_t = np.meshgrid(np.linspace(0.0, K.eval(_width_f) - 1.0, K.eval(_width)),
116 |                                np.linspace(0.0, K.eval(_height_f) - 1.0, K.eval(_height)))
117 | 
118 |         x_t = K.variable(x_t)
119 | 
120 |         y_t = K.variable(y_t)
121 | 
122 |         x_t_flat = K.reshape(x_t, (1, -1))
123 | 
124 |         y_t_flat = K.reshape(y_t, (1, -1))
125 | 
126 |         x_t_flat = K.tile(x_t_flat, K.stack([_num_batch, 1]))
127 | 
128 |         y_t_flat = K.tile(y_t_flat, K.stack([_num_batch, 1]))
129 | 
130 |         x_t_flat = K.reshape(x_t_flat, [-1])
131 | 
132 |         y_t_flat = K.reshape(y_t_flat, [-1])
133 | 
134 |         x_t_flat = x_t_flat + K.reshape(x_offset, [-1]) * _width_f
135 | 
136 |         input_transformed = _interpolate(input_images, x_t_flat, y_t_flat)
137 | 
138 |         output = K.reshape(input_transformed, K.stack([_num_batch, _height, _width, _num_channels]))
139 | 
140 |         return output
141 | 
142 |     _num_batch = K.shape(input_images)[0]
143 | 
144 |     _height = K.shape(input_images)[1]
145 | 
146 |     _width = K.shape(input_images)[2]
147 | 
148 |     _num_channels = K.shape(input_images)[3]
149 | 
150 |     _height_f = K.cast(_height, 'float32')
151 | 
152 |     _width_f = K.cast(_width, 'float32')
153 | 
154 |     _wrap_mode = wrap_mode
155 | 
156 |     output = _transform(input_images, x_offset)
157 | 
158 |     return output
159 | 


--------------------------------------------------------------------------------
/image_loader.py:
--------------------------------------------------------------------------------
 1 | from keras.preprocessing.image import ImageDataGenerator
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_stereo_image_generators(train_folder, test_folder, img_rows=128, img_cols=512, batch_size=16, shuffle=True):
 6 |     train_image_gen = ImageDataGenerator(rescale=1.0 / 255.0,
 7 |                                          # TODO: when network is complete, add image transformations to improve training
 8 |                                          # rotation_range=5,
 9 |                                          # shear_range=0.01,
10 |                                          # zoom_range=0.01,
11 |                                          # height_shift_range=0.01,
12 |                                          # width_shift_range=0.01
13 |                                          )
14 | 
15 |     test_image_gen = ImageDataGenerator(rescale=1.0 / 255.0)
16 | 
17 |     train_generator_left = train_image_gen.flow_from_directory(train_folder,
18 |                                                                target_size=(img_rows, img_cols),
19 |                                                                batch_size=batch_size,
20 |                                                                seed=10,
21 |                                                                shuffle=shuffle,
22 |                                                                classes=['left'],
23 |                                                                class_mode=None,
24 |                                                                follow_links=True)
25 | 
26 |     train_generator_right = train_image_gen.flow_from_directory(train_folder,
27 |                                                                 target_size=(img_rows, img_cols),
28 |                                                                 batch_size=batch_size,
29 |                                                                 seed=10,
30 |                                                                 shuffle=shuffle,
31 |                                                                 classes=['right'],
32 |                                                                 class_mode=None,
33 |                                                                 follow_links=True)
34 | 
35 |     test_generator_left = test_image_gen.flow_from_directory(test_folder,
36 |                                                              target_size=(img_rows, img_cols),
37 |                                                              batch_size=batch_size,
38 |                                                              seed=10,
39 |                                                              shuffle=shuffle,
40 |                                                              classes=['left'],
41 |                                                              class_mode=None,
42 |                                                              follow_links=True)
43 | 
44 |     test_generator_right = test_image_gen.flow_from_directory(test_folder,
45 |                                                               target_size=(img_rows, img_cols),
46 |                                                               batch_size=batch_size,
47 |                                                               seed=10,
48 |                                                               shuffle=shuffle,
49 |                                                               classes=['right'],
50 |                                                               class_mode=None,
51 |                                                               follow_links=True)
52 | 
53 |     def train_generator_func():
54 |         while True:
55 |             left_image = train_generator_left.next()
56 | 
57 |             right_image = train_generator_right.next()
58 | 
59 |             yield [left_image, right_image], [left_image,
60 |                                               right_image,
61 |                                               np.zeros((left_image.shape[0],
62 |                                                         left_image.shape[1],
63 |                                                         left_image.shape[2],
64 |                                                         1)),
65 |                                               np.zeros((right_image.shape[0],
66 |                                                         right_image.shape[1],
67 |                                                         right_image.shape[2],
68 |                                                         1))]
69 | 
70 |     def test_generator_func():
71 |         while True:
72 |             left_image = test_generator_left.next()
73 | 
74 |             right_image = test_generator_right.next()
75 | 
76 |             yield [left_image, right_image], [left_image,
77 |                                               right_image,
78 |                                               np.zeros((left_image.shape[0],
79 |                                                         left_image.shape[1],
80 |                                                         left_image.shape[2],
81 |                                                         1)),
82 |                                               np.zeros((right_image.shape[0],
83 |                                                         right_image.shape[1],
84 |                                                         right_image.shape[2],
85 |                                                         1))]
86 | 
87 |     train_generator = train_generator_func()
88 | 
89 |     test_generator = test_generator_func()
90 | 
91 |     train_length = len(train_generator_left.filenames)
92 | 
93 |     test_length = len(test_generator_left.filenames)
94 | 
95 |     return train_generator, test_generator, train_length, test_length
96 | 


--------------------------------------------------------------------------------
/undeepvo_model.py:
--------------------------------------------------------------------------------
  1 | from keras.optimizers import Adam
  2 | from keras.models import Model
  3 | from keras.layers import Conv2D, Conv2DTranspose, concatenate, Cropping2D, Dense, Flatten
  4 | from layers import depth_to_disparity, disparity_difference, expand_dims, spatial_transformation
  5 | from losses import photometric_consistency_loss
  6 | 
  7 | 
  8 | class UnDeepVOModel(object):
  9 |     def __init__(self, left_input_k_1, left_input_k, right_input_k, mode='train', lr=0.1, alpha_image_loss=0.85,
 10 |                  img_rows=128, img_cols=512):
 11 |         # NOTE: disparity calculation
 12 |         # depth = baseline * focal / disparity
 13 |         # depth = 0.54 * 721 / (1242 * disp)
 14 | 
 15 |         self.img_rows = img_rows
 16 | 
 17 |         self.img_cols = img_cols
 18 | 
 19 |         self.baseline = 0.54  # meters
 20 | 
 21 |         self.focal_length = 718.856 / 1241  # image width = 1241 (note: must scale using this number)
 22 | 
 23 |         self.left = left_input_k
 24 | 
 25 |         self.right = right_input_k
 26 | 
 27 |         self.left_next = left_input_k_1
 28 | 
 29 |         self.left_est = None
 30 | 
 31 |         self.right_est = None
 32 | 
 33 |         self.depthmap = None
 34 | 
 35 |         self.depthmap_left = None
 36 | 
 37 |         self.depthmap_right = None
 38 | 
 39 |         self.disparity_left = None
 40 | 
 41 |         self.disparity_right = None
 42 | 
 43 |         self.disparity_diff_left = None
 44 | 
 45 |         self.disparity_diff_right = None
 46 | 
 47 |         self.right_to_left_disparity = None
 48 | 
 49 |         self.left_to_right_disparity = None
 50 | 
 51 |         self.model = None
 52 | 
 53 |         self.depthmap = None
 54 | 
 55 |         self.mode = mode
 56 | 
 57 |         self.lr = lr
 58 | 
 59 |         self.alpha_image_loss = alpha_image_loss
 60 | 
 61 |         self.build_depth_architecture()
 62 | 
 63 |         self.build_pose_architecture()
 64 | 
 65 |         self.build_outputs()
 66 | 
 67 |         self.build_model()
 68 | 
 69 |         if self.mode == 'test':
 70 |             return
 71 | 
 72 |     @staticmethod
 73 |     def conv(input, channels, kernel_size, strides, activation='elu'):
 74 | 
 75 |         return Conv2D(channels, kernel_size=kernel_size, strides=strides, padding='same', activation=activation)(input)
 76 | 
 77 |     @staticmethod
 78 |     def deconv(input, channels, kernel_size, scale):
 79 | 
 80 |         return Conv2DTranspose(channels, kernel_size=kernel_size, strides=scale, padding='same')(input)
 81 | 
 82 |     def conv_block(self, input, channels, kernel_size):
 83 |         conv1 = self.conv(input, channels, kernel_size, 1)
 84 | 
 85 |         conv2 = self.conv(conv1, channels, kernel_size, 2)
 86 | 
 87 |         return conv2
 88 | 
 89 |     def deconv_block(self, input, channels, kernel_size, skip):
 90 |         deconv1 = self.deconv(input, channels, kernel_size, 2)
 91 | 
 92 |         if skip is not None:
 93 |             concat1 = concatenate([deconv1, skip], 3)
 94 |         else:
 95 |             concat1 = deconv1
 96 | 
 97 |         iconv1 = self.conv(concat1, channels, kernel_size, 1)
 98 | 
 99 |         return iconv1
100 | 
101 |     def get_depth(self, input):
102 |         return self.conv(input, 2, 3, 1, 'sigmoid')
103 | 
104 |     def build_pose_architecture(self):
105 |         input = concatenate([self.left, self.left_next], axis=3)
106 | 
107 |         conv1 = self.conv(input, 16, 7, 1, activation='relu')
108 | 
109 |         conv2 = self.conv(conv1, 32, 5, 1, activation='relu')
110 | 
111 |         conv3 = self.conv(conv2, 64, 3, 1, activation='relu')
112 | 
113 |         conv4 = self.conv(conv3, 128, 3, 1, activation='relu')
114 | 
115 |         conv5 = self.conv(conv4, 256, 3, 1, activation='relu')
116 | 
117 |         conv6 = self.conv(conv5, 512, 3, 1, activation='relu')
118 | 
119 |         flat1 = Flatten()(conv6)
120 | 
121 |         # translation
122 | 
123 |         fc1_tran = Dense(512, input_shape=(8192,))(flat1)
124 | 
125 |         fc2_tran = Dense(512, input_shape=(512,))(fc1_tran)
126 | 
127 |         fc3_tran = Dense(3, input_shape=(512,))(fc2_tran)
128 | 
129 |         self.translation = fc3_tran
130 | 
131 |         # rotation
132 | 
133 |         fc1_rot = Dense(512, input_shape=(512,))(flat1)
134 | 
135 |         fc2_rot = Dense(512, input_shape=(512,))(fc1_rot)
136 | 
137 |         fc3_rot = Dense(3, input_shape=(512,))(fc2_rot)
138 | 
139 |         self.rotation = fc3_rot
140 | 
141 |     def build_depth_architecture(self):
142 |         # encoder
143 |         conv1 = self.conv_block(self.left, 32, 7)
144 | 
145 |         conv2 = self.conv_block(conv1, 64, 5)
146 | 
147 |         conv3 = self.conv_block(conv2, 128, 3)
148 | 
149 |         conv4 = self.conv_block(conv3, 256, 3)
150 | 
151 |         conv5 = self.conv_block(conv4, 512, 3)
152 | 
153 |         conv6 = self.conv_block(conv5, 512, 3)
154 | 
155 |         conv7 = self.conv_block(conv6, 512, 3)
156 | 
157 |         # skips
158 |         skip1 = conv1
159 | 
160 |         skip2 = conv2
161 | 
162 |         skip3 = conv3
163 | 
164 |         skip4 = conv4
165 | 
166 |         skip5 = conv5
167 | 
168 |         skip6 = conv6
169 | 
170 |         deconv7 = self.deconv_block(conv7, 512, 3, skip6)
171 | 
172 |         deconv6 = self.deconv_block(deconv7, 512, 3, skip5)
173 | 
174 |         deconv5 = self.deconv_block(deconv6, 256, 3, skip4)
175 | 
176 |         deconv4 = self.deconv_block(deconv5, 128, 3, skip3)
177 | 
178 |         deconv3 = self.deconv_block(deconv4, 64, 3, skip2)
179 | 
180 |         deconv2 = self.deconv_block(deconv3, 32, 3, skip1)
181 | 
182 |         deconv1 = self.deconv_block(deconv2, 16, 3, None)
183 | 
184 |         self.depthmap = self.get_depth(deconv1)
185 | 
186 |     def build_outputs(self):
187 | 
188 |         # store depthmaps
189 | 
190 |         self.depthmap_left = expand_dims(self.depthmap, 0, 'depth_map_exp_left')
191 | 
192 |         self.depthmap_right = expand_dims(self.depthmap, 1, 'depth_map_exp_right')
193 | 
194 |         if self.mode == 'test':
195 |             return
196 | 
197 |         # generate disparities
198 | 
199 |         self.disparity_left = depth_to_disparity(self.depthmap_left, self.baseline, self.focal_length, 1,
200 |                                                  'disparity_left')
201 | 
202 |         self.disparity_right = depth_to_disparity(self.depthmap_right, self.baseline, self.focal_length, 1,
203 |                                                   'disparity_right')
204 | 
205 |         # generate estimates of left and right images
206 | 
207 |         self.left_est = spatial_transformation([self.right, self.disparity_right], -1, 'left_est')
208 | 
209 |         self.right_est = spatial_transformation([self.left, self.disparity_left], 1, 'right_est')
210 | 
211 |         # generate left - right consistency
212 | 
213 |         self.right_to_left_disparity = spatial_transformation([self.disparity_right, self.disparity_right], -1,
214 |                                                               'r2l_disparity')
215 | 
216 |         self.left_to_right_disparity = spatial_transformation([self.disparity_left, self.disparity_left], 1,
217 |                                                               'l2r_disparity')
218 | 
219 |         self.disparity_diff_left = disparity_difference([self.disparity_left, self.right_to_left_disparity],
220 |                                                         'disp_diff_left')
221 | 
222 |         self.disparity_diff_right = disparity_difference([self.disparity_right, self.left_to_right_disparity],
223 |                                                          'disp_diff_right')
224 | 
225 |     def build_model(self):
226 |         self.model = Model(inputs=[self.left_next, self.left, self.right], outputs=[self.left_est,
227 |                                                                                     self.right_est,
228 |                                                                                     self.disparity_diff_left,
229 |                                                                                     self.disparity_diff_right,
230 |                                                                                     self.translation,
231 |                                                                                     self.rotation])
232 |         self.model.compile(loss=[photometric_consistency_loss(self.alpha_image_loss),
233 |                                  photometric_consistency_loss(self.alpha_image_loss),
234 |                                  'mean_absolute_error',
235 |                                  'mean_absolute_error',
236 |                                  'mean_absolute_error',
237 |                                  'mean_absolute_error'],
238 |                            optimizer=Adam(lr=self.lr),
239 |                            # metrics=['accuracy']
240 |                            )
241 | 


--------------------------------------------------------------------------------