├── .gitignore ├── LICENSE ├── README.md ├── data.py ├── log └── .gitkeep ├── net_builder.py └── run_posenet.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.ckpt* 3 | checkpoint 4 | log 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Kent Sommer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Camera Relocalization 2 | 3 | ## Getting Started 4 | 5 | * Download the Cambridge Landmarks King's College dataset from [here](https://www.repository.cam.ac.uk/handle/1810/251342). 6 | 7 | * Download the starting and trained weights from [here](https://github.com/tensorflow/models/blob/master/slim/nets/mobilenet_v1.md). 8 | 9 | * To run: 10 | * Extract the King's College dataset to wherever you prefer 11 | * Extract the starting and trained weights to wherever you prefer 12 | * If you want to retrain, simply run train.py 13 | * If you just want to test, simply run test.py 14 | 15 | ## References 16 | 17 | Ronald Clark, Sen Wang, Andrew Markham, Niki Trigoni, Hongkai Wen. VidLoc: A Deep Spatio-Temporal Model for 6-DoF Video-Clip Relocalization. CVPR 2017. 18 | 19 | Alex Kendall and Roberto Cipolla. Geometric loss functions for camera pose regression with deep learning. CVPR, 2017. 20 | 21 | Alex Kendall, Matthew Grimes and Roberto Cipolla. PoseNet: A Convolutional Network for Real-Time 6-DOF Camera Relocalization. ICCV, 2015. 22 | 23 | ## Acknowledgement 24 | 25 | Original implementation of PoseNet: https://github.com/kentsommer/tensorflow-posenet 26 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from tqdm import tqdm 4 | import cv2 5 | import numpy as np 6 | import random 7 | 8 | 9 | class datasource(object): 10 | def __init__(self, images, poses): 11 | self.images = images 12 | self.poses = poses 13 | 14 | 15 | def centeredCrop(img, output_side_length): 16 | height, width, depth = img.shape 17 | new_height = output_side_length 18 | new_width = output_side_length 19 | if height > width: 20 | new_height = output_side_length * height / width 21 | else: 22 | new_width = output_side_length * width / height 23 | height_offset = (new_height - output_side_length) / 2 24 | width_offset = (new_width - output_side_length) / 2 25 | cropped_img = img[height_offset:height_offset + output_side_length, 26 | width_offset:width_offset + output_side_length] 27 | return cropped_img 28 | 29 | 30 | def preprocess(images): 31 | images_out = [] #final result 32 | #Resize and crop and compute mean! 33 | images_cropped = [] 34 | for i in tqdm(range(len(images))): 35 | print 'images[i]', i, images[i] 36 | X = cv2.imread(images[i]) 37 | print 'image size', X.shape 38 | X = cv2.resize(X, (455, 256)) 39 | X = centeredCrop(X, 224) 40 | images_cropped.append(X) 41 | #compute images mean 42 | N = 0 43 | mean = np.zeros((1, 3, 224, 224)) 44 | for X in tqdm(images_cropped): 45 | mean[0][0] += X[:, :, 0] 46 | mean[0][1] += X[:, :, 1] 47 | mean[0][2] += X[:, :, 2] 48 | N += 1 49 | mean[0] /= N 50 | #Subtract mean from all images 51 | for X in tqdm(images_cropped): 52 | X = np.transpose(X, (2, 0, 1)) 53 | X = X - mean 54 | X = np.squeeze(X) 55 | X = np.transpose(X, (1, 2, 0)) 56 | images_out.append(X) 57 | return images_out 58 | 59 | 60 | def get_data(data_path, image_dir, max_num_images=-1): 61 | poses = [] 62 | images = [] 63 | 64 | with open(data_path) as f: 65 | next(f) # skip the 3 header lines 66 | next(f) 67 | next(f) 68 | for line in f: 69 | fname, p0, p1, p2, p3, p4, p5, p6 = line.split() 70 | p0 = float(p0) 71 | p1 = float(p1) 72 | p2 = float(p2) 73 | p3 = float(p3) 74 | p4 = float(p4) 75 | p5 = float(p5) 76 | p6 = float(p6) 77 | poses.append((p0, p1, p2, p3, p4, p5, p6)) 78 | images.append(os.path.join(image_dir, fname)) 79 | if max_num_images > 0 and len(images) >= max_num_images: 80 | break 81 | images = preprocess(images) 82 | return datasource(images, poses) 83 | 84 | 85 | def gen_data(source): 86 | while True: 87 | indices = range(len(source.images)) 88 | random.shuffle(indices) 89 | for i in indices: 90 | image = source.images[i] 91 | pose_x = source.poses[i][0:3] 92 | pose_q = source.poses[i][3:7] 93 | yield image, pose_x, pose_q 94 | 95 | 96 | def gen_data_batch(source, batch_size): 97 | data_gen = gen_data(source) 98 | while True: 99 | image_batch = [] 100 | pose_x_batch = [] 101 | pose_q_batch = [] 102 | for _ in range(batch_size): 103 | image, pose_x, pose_q = next(data_gen) 104 | image_batch.append(image) 105 | pose_x_batch.append(pose_x) 106 | pose_q_batch.append(pose_q) 107 | yield np.array(image_batch), np.array(pose_x_batch), np.array(pose_q_batch) 108 | -------------------------------------------------------------------------------- /log/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/futurely/deep-camera-relocalization/4a2698ed979bd20d4221547d13a9a9b76c4405ea/log/.gitkeep -------------------------------------------------------------------------------- /net_builder.py: -------------------------------------------------------------------------------- 1 | from slim.nets import mobilenet_v1 as mobilenet 2 | from tensorflow.contrib.layers import xavier_initializer 3 | import tensorflow as tf 4 | 5 | 6 | def add_predictions(net, end_points): 7 | pose_xyz = tf.layers.dense( 8 | net, 3, name='cls3_fc_pose_xyz', kernel_initializer=xavier_initializer()) 9 | end_points['cls3_fc_pose_xyz'] = pose_xyz 10 | pose_wpqr = tf.layers.dense( 11 | net, 12 | 4, 13 | name='cls3_fc_pose_wpqr', 14 | kernel_initializer=xavier_initializer()) 15 | end_points['cls3_fc_pose_wpqr'] = pose_wpqr 16 | 17 | 18 | def build_posenet(inputs, net_type): 19 | if net_type.startswith('mobilenet'): 20 | net = mobilenet.mobilenet_v1 21 | logits, end_points = net(inputs, num_classes=1001) 22 | if net_type.startswith('mobilenet'): 23 | net = end_points['AvgPool_1a'] 24 | add_predictions(net, end_points) 25 | return end_points 26 | 27 | 28 | def add_pose_loss(net, poses_x, poses_q): 29 | loss = None 30 | try: 31 | p1_x = net['cls1_fc_pose_xyz'] 32 | p1_q = net['cls1_fc_pose_wpqr'] 33 | l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 34 | l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 150 35 | if loss is None: 36 | loss = l1_x + l1_q 37 | else: 38 | loss += l1_x + l1_q 39 | except: 40 | pass 41 | 42 | try: 43 | p2_x = net['cls2_fc_pose_xyz'] 44 | p2_q = net['cls2_fc_pose_wpqr'] 45 | l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 46 | l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 150 47 | if loss is None: 48 | loss = l2_x + l2_q 49 | else: 50 | loss += l2_x + l2_q 51 | except: 52 | pass 53 | 54 | try: 55 | p3_x = net['cls3_fc_pose_xyz'] 56 | p3_q = net['cls3_fc_pose_wpqr'] 57 | l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 0.3 58 | l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 150 59 | if loss is None: 60 | loss = l3_x + l3_q 61 | else: 62 | loss += l3_x + l3_q 63 | except: 64 | pass 65 | 66 | return loss 67 | -------------------------------------------------------------------------------- /run_posenet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | 4 | from posenet import GoogLeNet as PoseNet 5 | from tensorflow.python.training import training_util 6 | from tqdm import tqdm 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | from data import get_data, gen_data_batch 12 | from net_builder import build_posenet, add_pose_loss 13 | 14 | max_num_train_images = -1 15 | max_num_test_images = -1 16 | batch_size = 48 17 | max_iterations = 30000 18 | max_iterations = 1 19 | display_interval = 20 20 | save_interval = 1000 21 | test_interval = 1000 22 | # Set this path to your data_file data_dir 23 | data_dir = '/home/user/Datasets/camera_relocalization/KingsCollege' 24 | train_data_file = 'dataset_train.txt' 25 | test_data_file = 'dataset_train.txt' 26 | model_path = '/home/user/Datasets/tensorflow/models/mobilenet/mobilenet_v1_1.0_224_2017_06_14/mobilenet_v1_1.0_224.ckpt' 27 | checkpoint_dir = 'checkpoint' 28 | output_checkpoint_dir = 'checkpoint' 29 | checkpoint_file = 'posenet_mobilenet.ckpt' 30 | train = True 31 | test = True 32 | test_first = True 33 | 34 | debug = False 35 | 36 | 37 | def should_load(name): 38 | if name.startswith('cls') and name.find('_fc_pose_') != -1: 39 | return False 40 | if name.find('Logits') != -1 or name.find('Predictions') != -1: 41 | return False 42 | return True 43 | 44 | 45 | def load_data(data_dir, data_file, max_num_images=-1): 46 | data_path = os.path.join(data_dir, data_file) 47 | if max_num_images >= 0: 48 | data_source = get_data(data_path, data_dir, max_num_images) 49 | else: 50 | data_source = get_data(data_path, data_dir) 51 | num_images = len(data_source.images) 52 | num_batches = (num_images + batch_size - 1) / batch_size 53 | print 'num_images', num_images, 'batch_size', batch_size, 'num_batches', num_batches 54 | return data_source 55 | 56 | 57 | def main(): 58 | images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) 59 | poses_x = tf.placeholder(tf.float32, [batch_size, 3]) 60 | poses_q = tf.placeholder(tf.float32, [batch_size, 4]) 61 | 62 | print 'build_posenet' 63 | net = build_posenet(images, 'mobilenet') 64 | # net = PoseNet({'data': images}) 65 | 66 | loss = add_pose_loss(net, poses_x, poses_q) 67 | print 'loss', loss 68 | 69 | global_step = training_util.create_global_step() 70 | opt = tf.train.AdamOptimizer( 71 | learning_rate=0.0001, 72 | beta1=0.9, 73 | beta2=0.999, 74 | epsilon=0.00000001, 75 | use_locking=False, 76 | name='Adam').minimize( 77 | loss, global_step=global_step) 78 | 79 | # Set GPU options 80 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) 81 | 82 | init = tf.global_variables_initializer() 83 | p3_x = net['cls3_fc_pose_xyz'] 84 | p3_q = net['cls3_fc_pose_wpqr'] 85 | 86 | variables_to_restore = tf.global_variables() 87 | if debug: 88 | print '\n variables_to_restore', variables_to_restore 89 | variables_to_save = tf.global_variables() 90 | if debug: 91 | print '\n variables_to_save', variables_to_save 92 | 93 | restorer = tf.train.Saver(variables_to_restore) 94 | saver = tf.train.Saver(variables_to_save) 95 | output_checkpoint = os.path.join(output_checkpoint_dir, checkpoint_file) 96 | 97 | checkpoint = tf.train.latest_checkpoint(checkpoint_dir) 98 | if checkpoint is None: 99 | checkpoint = model_path 100 | print 'checkpoint', checkpoint 101 | 102 | if train: 103 | train_data_source = load_data(data_dir, train_data_file, 104 | max_num_train_images) 105 | if test: 106 | test_data_source = load_data(data_dir, test_data_file, max_num_test_images) 107 | 108 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 109 | # Load the data 110 | sess.run(init) 111 | # Restore model weights from previously saved model 112 | try: 113 | restorer.restore(sess, checkpoint) 114 | except: 115 | print 'Failed to restore from model:', model_path 116 | variables_to_restore = [ 117 | x for x in tf.global_variables() if should_load(x.name) 118 | ] 119 | restorer = tf.train.Saver(variables_to_restore) 120 | restorer.restore(sess, checkpoint) 121 | print('Model restored from file: %s' % checkpoint) 122 | 123 | if train: 124 | train_data_batch_generator = gen_data_batch(train_data_source, 125 | batch_size) 126 | if test: 127 | test_data_batch_generator = gen_data_batch(test_data_source, batch_size) 128 | num_test_images = len(test_data_source.images) 129 | num_test_batches = (num_test_images + batch_size - 1) / batch_size 130 | iter = -1 131 | for i in range(max_iterations): 132 | if (i > 0 or test_first) and i % test_interval == 0: 133 | print 'Validating' 134 | results = np.zeros((num_test_images, 2)) 135 | for j in tqdm(range(num_test_batches)): 136 | np_image, np_poses_x, np_poses_q = next(test_data_batch_generator) 137 | if debug: 138 | print 'np_image', np_image.shape, np_poses_x.shape, np_poses_q.shape 139 | feed = {images: np_image} 140 | predicted_x, predicted_q = sess.run([p3_x, p3_q], feed_dict=feed) 141 | predicted_q = np.squeeze(predicted_q) 142 | predicted_x = np.squeeze(predicted_x) 143 | 144 | batch_start = batch_size * j 145 | batch_end = min(batch_start + batch_size, num_test_images) 146 | pose_q = np.asarray( 147 | test_data_source.poses[batch_start:batch_end])[:, 3:7] 148 | pose_x = np.asarray( 149 | test_data_source.poses[batch_start:batch_end])[:, 0:3] 150 | pose_q = np.squeeze(pose_q) 151 | pose_x = np.squeeze(pose_x) 152 | predicted_q = predicted_q[:batch_end - batch_start] 153 | predicted_x = predicted_x[:batch_end - batch_start] 154 | 155 | #Compute Individual Sample Error 156 | pose_q /= np.linalg.norm(pose_q) 157 | predicted_q /= np.linalg.norm(predicted_q) 158 | d = abs(np.sum(np.multiply(pose_q, predicted_q), axis=1)) 159 | theta = 2 * np.arccos(d) * 180 / math.pi 160 | error_x = np.linalg.norm(pose_x - predicted_x, axis=1) 161 | results[batch_start:batch_end, :] = np.column_stack((error_x, theta)) 162 | median_result = np.median(results, axis=0) 163 | print 'Median error ', median_result[0], 'm and ', median_result[ 164 | 1], 'degrees.' 165 | if train: 166 | np_images, np_poses_x, np_poses_q = next(train_data_batch_generator) 167 | feed = {images: np_images, poses_x: np_poses_x, poses_q: np_poses_q} 168 | 169 | sess.run(opt, feed_dict=feed) 170 | np_loss = sess.run(loss, feed_dict=feed) 171 | if i > 0 and i % display_interval == 0: 172 | print('Iteration: ' + str(i) + '\n\t' + 'Loss is: ' + str(np_loss)) 173 | if i > 0 and i % save_interval == 0: 174 | saver.save(sess, output_file, global_step=global_step) 175 | print('Intermediate file saved at: ' + output_file) 176 | iter = i 177 | 178 | if iter > 0 and iter % save_interval != 0: 179 | saver.save(sess, output_file, global_step=global_step) 180 | print('Intermediate file saved at: ' + output_file) 181 | 182 | 183 | if __name__ == '__main__': 184 | main() 185 | --------------------------------------------------------------------------------