├── .gitignore
├── LICENSE
├── README.md
├── data.py
├── log
    └── .gitkeep
├── net_builder.py
└── run_posenet.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.ckpt*
3 | checkpoint
4 | log
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Kent Sommer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Camera Relocalization
 2 | 
 3 | ## Getting Started
 4 | 
 5 |  * Download the Cambridge Landmarks King's College dataset from [here](https://www.repository.cam.ac.uk/handle/1810/251342).
 6 | 
 7 |  * Download the starting and trained weights from [here](https://github.com/tensorflow/models/blob/master/slim/nets/mobilenet_v1.md).
 8 | 
 9 |  * To run:
10 |    * Extract the King's College dataset to wherever you prefer
11 |    * Extract the starting and trained weights to wherever you prefer
12 |    * If you want to retrain, simply run train.py
13 |    * If you just want to test, simply run test.py 
14 | 
15 | ## References
16 | 
17 | Ronald Clark, Sen Wang, Andrew Markham, Niki Trigoni, Hongkai Wen. VidLoc: A Deep Spatio-Temporal Model for 6-DoF Video-Clip Relocalization. CVPR 2017.
18 | 
19 | Alex Kendall and Roberto Cipolla. Geometric loss functions for camera pose regression with deep learning. CVPR, 2017.
20 | 
21 | Alex Kendall, Matthew Grimes and Roberto Cipolla. PoseNet: A Convolutional Network for Real-Time 6-DOF Camera Relocalization. ICCV, 2015.
22 | 
23 | ## Acknowledgement
24 | 
25 | Original implementation of PoseNet: https://github.com/kentsommer/tensorflow-posenet
26 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from tqdm import tqdm
  4 | import cv2
  5 | import numpy as np
  6 | import random
  7 | 
  8 | 
  9 | class datasource(object):
 10 |   def __init__(self, images, poses):
 11 |     self.images = images
 12 |     self.poses = poses
 13 | 
 14 | 
 15 | def centeredCrop(img, output_side_length):
 16 |   height, width, depth = img.shape
 17 |   new_height = output_side_length
 18 |   new_width = output_side_length
 19 |   if height > width:
 20 |     new_height = output_side_length * height / width
 21 |   else:
 22 |     new_width = output_side_length * width / height
 23 |   height_offset = (new_height - output_side_length) / 2
 24 |   width_offset = (new_width - output_side_length) / 2
 25 |   cropped_img = img[height_offset:height_offset + output_side_length,
 26 |                     width_offset:width_offset + output_side_length]
 27 |   return cropped_img
 28 | 
 29 | 
 30 | def preprocess(images):
 31 |   images_out = []  #final result
 32 |   #Resize and crop and compute mean!
 33 |   images_cropped = []
 34 |   for i in tqdm(range(len(images))):
 35 |     print 'images[i]', i, images[i]
 36 |     X = cv2.imread(images[i])
 37 |     print 'image size', X.shape
 38 |     X = cv2.resize(X, (455, 256))
 39 |     X = centeredCrop(X, 224)
 40 |     images_cropped.append(X)
 41 |   #compute images mean
 42 |   N = 0
 43 |   mean = np.zeros((1, 3, 224, 224))
 44 |   for X in tqdm(images_cropped):
 45 |     mean[0][0] += X[:, :, 0]
 46 |     mean[0][1] += X[:, :, 1]
 47 |     mean[0][2] += X[:, :, 2]
 48 |     N += 1
 49 |   mean[0] /= N
 50 |   #Subtract mean from all images
 51 |   for X in tqdm(images_cropped):
 52 |     X = np.transpose(X, (2, 0, 1))
 53 |     X = X - mean
 54 |     X = np.squeeze(X)
 55 |     X = np.transpose(X, (1, 2, 0))
 56 |     images_out.append(X)
 57 |   return images_out
 58 | 
 59 | 
 60 | def get_data(data_path, image_dir, max_num_images=-1):
 61 |   poses = []
 62 |   images = []
 63 | 
 64 |   with open(data_path) as f:
 65 |     next(f)  # skip the 3 header lines
 66 |     next(f)
 67 |     next(f)
 68 |     for line in f:
 69 |       fname, p0, p1, p2, p3, p4, p5, p6 = line.split()
 70 |       p0 = float(p0)
 71 |       p1 = float(p1)
 72 |       p2 = float(p2)
 73 |       p3 = float(p3)
 74 |       p4 = float(p4)
 75 |       p5 = float(p5)
 76 |       p6 = float(p6)
 77 |       poses.append((p0, p1, p2, p3, p4, p5, p6))
 78 |       images.append(os.path.join(image_dir, fname))
 79 |       if max_num_images > 0 and len(images) >= max_num_images:
 80 |         break
 81 |   images = preprocess(images)
 82 |   return datasource(images, poses)
 83 | 
 84 | 
 85 | def gen_data(source):
 86 |   while True:
 87 |     indices = range(len(source.images))
 88 |     random.shuffle(indices)
 89 |     for i in indices:
 90 |       image = source.images[i]
 91 |       pose_x = source.poses[i][0:3]
 92 |       pose_q = source.poses[i][3:7]
 93 |       yield image, pose_x, pose_q
 94 | 
 95 | 
 96 | def gen_data_batch(source, batch_size):
 97 |   data_gen = gen_data(source)
 98 |   while True:
 99 |     image_batch = []
100 |     pose_x_batch = []
101 |     pose_q_batch = []
102 |     for _ in range(batch_size):
103 |       image, pose_x, pose_q = next(data_gen)
104 |       image_batch.append(image)
105 |       pose_x_batch.append(pose_x)
106 |       pose_q_batch.append(pose_q)
107 |     yield np.array(image_batch), np.array(pose_x_batch), np.array(pose_q_batch)
108 | 


--------------------------------------------------------------------------------
/log/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/futurely/deep-camera-relocalization/4a2698ed979bd20d4221547d13a9a9b76c4405ea/log/.gitkeep


--------------------------------------------------------------------------------
/net_builder.py:
--------------------------------------------------------------------------------
 1 | from slim.nets import mobilenet_v1 as mobilenet
 2 | from tensorflow.contrib.layers import xavier_initializer
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def add_predictions(net, end_points):
 7 |   pose_xyz = tf.layers.dense(
 8 |       net, 3, name='cls3_fc_pose_xyz', kernel_initializer=xavier_initializer())
 9 |   end_points['cls3_fc_pose_xyz'] = pose_xyz
10 |   pose_wpqr = tf.layers.dense(
11 |       net,
12 |       4,
13 |       name='cls3_fc_pose_wpqr',
14 |       kernel_initializer=xavier_initializer())
15 |   end_points['cls3_fc_pose_wpqr'] = pose_wpqr
16 | 
17 | 
18 | def build_posenet(inputs, net_type):
19 |   if net_type.startswith('mobilenet'):
20 |     net = mobilenet.mobilenet_v1
21 |     logits, end_points = net(inputs, num_classes=1001)
22 |   if net_type.startswith('mobilenet'):
23 |     net = end_points['AvgPool_1a']
24 |   add_predictions(net, end_points)
25 |   return end_points
26 | 
27 | 
28 | def add_pose_loss(net, poses_x, poses_q):
29 |   loss = None
30 |   try:
31 |     p1_x = net['cls1_fc_pose_xyz']
32 |     p1_q = net['cls1_fc_pose_wpqr']
33 |     l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3
34 |     l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 150
35 |     if loss is None:
36 |       loss = l1_x + l1_q
37 |     else:
38 |       loss += l1_x + l1_q
39 |   except:
40 |     pass
41 | 
42 |   try:
43 |     p2_x = net['cls2_fc_pose_xyz']
44 |     p2_q = net['cls2_fc_pose_wpqr']
45 |     l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3
46 |     l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 150
47 |     if loss is None:
48 |       loss = l2_x + l2_q
49 |     else:
50 |       loss += l2_x + l2_q
51 |   except:
52 |     pass
53 | 
54 |   try:
55 |     p3_x = net['cls3_fc_pose_xyz']
56 |     p3_q = net['cls3_fc_pose_wpqr']
57 |     l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 0.3
58 |     l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 150
59 |     if loss is None:
60 |       loss = l3_x + l3_q
61 |     else:
62 |       loss += l3_x + l3_q
63 |   except:
64 |     pass
65 | 
66 |   return loss
67 | 


--------------------------------------------------------------------------------
/run_posenet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | 
  4 | from posenet import GoogLeNet as PoseNet
  5 | from tensorflow.python.training import training_util
  6 | from tqdm import tqdm
  7 | 
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | 
 11 | from data import get_data, gen_data_batch
 12 | from net_builder import build_posenet, add_pose_loss
 13 | 
 14 | max_num_train_images = -1
 15 | max_num_test_images = -1
 16 | batch_size = 48
 17 | max_iterations = 30000
 18 | max_iterations = 1
 19 | display_interval = 20
 20 | save_interval = 1000
 21 | test_interval = 1000
 22 | # Set this path to your data_file data_dir
 23 | data_dir = '/home/user/Datasets/camera_relocalization/KingsCollege'
 24 | train_data_file = 'dataset_train.txt'
 25 | test_data_file = 'dataset_train.txt'
 26 | model_path = '/home/user/Datasets/tensorflow/models/mobilenet/mobilenet_v1_1.0_224_2017_06_14/mobilenet_v1_1.0_224.ckpt'
 27 | checkpoint_dir = 'checkpoint'
 28 | output_checkpoint_dir = 'checkpoint'
 29 | checkpoint_file = 'posenet_mobilenet.ckpt'
 30 | train = True
 31 | test = True
 32 | test_first = True
 33 | 
 34 | debug = False
 35 | 
 36 | 
 37 | def should_load(name):
 38 |   if name.startswith('cls') and name.find('_fc_pose_') != -1:
 39 |     return False
 40 |   if name.find('Logits') != -1 or name.find('Predictions') != -1:
 41 |     return False
 42 |   return True
 43 | 
 44 | 
 45 | def load_data(data_dir, data_file, max_num_images=-1):
 46 |   data_path = os.path.join(data_dir, data_file)
 47 |   if max_num_images >= 0:
 48 |     data_source = get_data(data_path, data_dir, max_num_images)
 49 |   else:
 50 |     data_source = get_data(data_path, data_dir)
 51 |   num_images = len(data_source.images)
 52 |   num_batches = (num_images + batch_size - 1) / batch_size
 53 |   print 'num_images', num_images, 'batch_size', batch_size, 'num_batches', num_batches
 54 |   return data_source
 55 | 
 56 | 
 57 | def main():
 58 |   images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3])
 59 |   poses_x = tf.placeholder(tf.float32, [batch_size, 3])
 60 |   poses_q = tf.placeholder(tf.float32, [batch_size, 4])
 61 | 
 62 |   print 'build_posenet'
 63 |   net = build_posenet(images, 'mobilenet')
 64 |   #  net = PoseNet({'data': images})
 65 | 
 66 |   loss = add_pose_loss(net, poses_x, poses_q)
 67 |   print 'loss', loss
 68 | 
 69 |   global_step = training_util.create_global_step()
 70 |   opt = tf.train.AdamOptimizer(
 71 |       learning_rate=0.0001,
 72 |       beta1=0.9,
 73 |       beta2=0.999,
 74 |       epsilon=0.00000001,
 75 |       use_locking=False,
 76 |       name='Adam').minimize(
 77 |           loss, global_step=global_step)
 78 | 
 79 |   # Set GPU options
 80 |   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833)
 81 | 
 82 |   init = tf.global_variables_initializer()
 83 |   p3_x = net['cls3_fc_pose_xyz']
 84 |   p3_q = net['cls3_fc_pose_wpqr']
 85 | 
 86 |   variables_to_restore = tf.global_variables()
 87 |   if debug:
 88 |     print '\n  variables_to_restore', variables_to_restore
 89 |   variables_to_save = tf.global_variables()
 90 |   if debug:
 91 |     print '\n  variables_to_save', variables_to_save
 92 | 
 93 |   restorer = tf.train.Saver(variables_to_restore)
 94 |   saver = tf.train.Saver(variables_to_save)
 95 |   output_checkpoint = os.path.join(output_checkpoint_dir, checkpoint_file)
 96 | 
 97 |   checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
 98 |   if checkpoint is None:
 99 |     checkpoint = model_path
100 |   print 'checkpoint', checkpoint
101 | 
102 |   if train:
103 |     train_data_source = load_data(data_dir, train_data_file,
104 |                                   max_num_train_images)
105 |   if test:
106 |     test_data_source = load_data(data_dir, test_data_file, max_num_test_images)
107 | 
108 |   with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
109 |     # Load the data
110 |     sess.run(init)
111 |     # Restore model weights from previously saved model
112 |     try:
113 |       restorer.restore(sess, checkpoint)
114 |     except:
115 |       print 'Failed to restore from model:', model_path
116 |       variables_to_restore = [
117 |           x for x in tf.global_variables() if should_load(x.name)
118 |       ]
119 |       restorer = tf.train.Saver(variables_to_restore)
120 |       restorer.restore(sess, checkpoint)
121 |     print('Model restored from file: %s' % checkpoint)
122 | 
123 |     if train:
124 |       train_data_batch_generator = gen_data_batch(train_data_source,
125 |                                                   batch_size)
126 |     if test:
127 |       test_data_batch_generator = gen_data_batch(test_data_source, batch_size)
128 |       num_test_images = len(test_data_source.images)
129 |       num_test_batches = (num_test_images + batch_size - 1) / batch_size
130 |     iter = -1
131 |     for i in range(max_iterations):
132 |       if (i > 0 or test_first) and i % test_interval == 0:
133 |         print 'Validating'
134 |         results = np.zeros((num_test_images, 2))
135 |         for j in tqdm(range(num_test_batches)):
136 |           np_image, np_poses_x, np_poses_q = next(test_data_batch_generator)
137 |           if debug:
138 |             print 'np_image', np_image.shape, np_poses_x.shape, np_poses_q.shape
139 |           feed = {images: np_image}
140 |           predicted_x, predicted_q = sess.run([p3_x, p3_q], feed_dict=feed)
141 |           predicted_q = np.squeeze(predicted_q)
142 |           predicted_x = np.squeeze(predicted_x)
143 | 
144 |           batch_start = batch_size * j
145 |           batch_end = min(batch_start + batch_size, num_test_images)
146 |           pose_q = np.asarray(
147 |               test_data_source.poses[batch_start:batch_end])[:, 3:7]
148 |           pose_x = np.asarray(
149 |               test_data_source.poses[batch_start:batch_end])[:, 0:3]
150 |           pose_q = np.squeeze(pose_q)
151 |           pose_x = np.squeeze(pose_x)
152 |           predicted_q = predicted_q[:batch_end - batch_start]
153 |           predicted_x = predicted_x[:batch_end - batch_start]
154 | 
155 |           #Compute Individual Sample Error
156 |           pose_q /= np.linalg.norm(pose_q)
157 |           predicted_q /= np.linalg.norm(predicted_q)
158 |           d = abs(np.sum(np.multiply(pose_q, predicted_q), axis=1))
159 |           theta = 2 * np.arccos(d) * 180 / math.pi
160 |           error_x = np.linalg.norm(pose_x - predicted_x, axis=1)
161 |           results[batch_start:batch_end, :] = np.column_stack((error_x, theta))
162 |         median_result = np.median(results, axis=0)
163 |         print 'Median error ', median_result[0], 'm  and ', median_result[
164 |             1], 'degrees.'
165 |       if train:
166 |         np_images, np_poses_x, np_poses_q = next(train_data_batch_generator)
167 |         feed = {images: np_images, poses_x: np_poses_x, poses_q: np_poses_q}
168 | 
169 |         sess.run(opt, feed_dict=feed)
170 |         np_loss = sess.run(loss, feed_dict=feed)
171 |         if i > 0 and i % display_interval == 0:
172 |           print('Iteration: ' + str(i) + '\n\t' + 'Loss is: ' + str(np_loss))
173 |         if i > 0 and i % save_interval == 0:
174 |           saver.save(sess, output_file, global_step=global_step)
175 |           print('Intermediate file saved at: ' + output_file)
176 |         iter = i
177 | 
178 |     if iter > 0 and iter % save_interval != 0:
179 |       saver.save(sess, output_file, global_step=global_step)
180 |       print('Intermediate file saved at: ' + output_file)
181 | 
182 | 
183 | if __name__ == '__main__':
184 |   main()
185 | 


--------------------------------------------------------------------------------