├── LICENSE ├── data_loader_7scenes.py ├── data_loader_robotcar.py ├── env.py ├── eval.py ├── eval_utils.py ├── gen_map.py ├── model.py ├── ops.py ├── readme.md ├── train.py └── utils.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Mingpan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data_loader_7scenes.py: -------------------------------------------------------------------------------- 1 | # The data loader for "TUM style" of dataset 2 | 3 | 4 | import os 5 | import numpy as np 6 | import random 7 | from PIL import Image 8 | import re 9 | 10 | from tqdm import tqdm 11 | 12 | from env import MobileRobot 13 | from utils import rotation_matrix_to_quaternion, log_quaternion 14 | from glob import glob 15 | 16 | 17 | class CallableDict(dict): 18 | def __getitem__(self, key): 19 | val = super().__getitem__(key) 20 | if callable(val): 21 | return val(key) 22 | return val 23 | 24 | 25 | class DataLoader: 26 | def __init__(self, root_dir, img_height=96, img_width=96, header_lines=0, dim_control=7, 27 | norm_factor=None, no_model=False): 28 | 29 | split_file = os.path.join(root_dir, "TrainSplit.txt") 30 | with open(split_file, 'r') as f: 31 | data = [re.findall(r'\d+', line)[0] for line in f] 32 | train_path = [] 33 | for idx in data: 34 | if len(idx) < 2: 35 | idx = '0' + idx 36 | train_path.append('seq-' + idx) 37 | 38 | split_file = os.path.join(root_dir, "TestSplit.txt") 39 | with open(split_file, 'r') as f: 40 | data = [re.findall(r'\d+', line)[0] for line in f] 41 | test_path = [] 42 | for idx in data: 43 | if len(idx) < 2: 44 | idx = '0' + idx 45 | test_path.append('seq-' + idx) 46 | 47 | self.train_dir = [os.path.join(root_dir, path) for path in train_path] 48 | self.valid_dir = [os.path.join(root_dir, path) for path in test_path] 49 | self.test_dir = [os.path.join(root_dir, path) for path in test_path] 50 | 51 | self.height = img_height 52 | self.width = img_width 53 | self.header_lines = header_lines 54 | self.no_model = no_model 55 | self.dim_control = dim_control 56 | 57 | # require output elements 58 | self.time2rgb = CallableDict() 59 | self.time2rgb_unknown = CallableDict() 60 | self.time2pos = {} 61 | 62 | self.last_timestamp = 0 63 | 64 | self.train_sequences = [] 65 | for path in self.train_dir: 66 | self.train_sequences.append(self.load_train(path)) 67 | self.times_train = [t for sublist in self.train_sequences for t in sublist] 68 | 69 | if norm_factor is None: 70 | self.norm_xyz, self.norm_q = self.variance_norm() 71 | else: 72 | self.norm_xyz, self.norm_q = norm_factor 73 | 74 | self.valid_sequences = [] 75 | for path in self.valid_dir: 76 | self.valid_sequences.append(self.load_train(path)) 77 | self.times_valid = [t for sublist in self.valid_sequences for t in sublist] 78 | 79 | self.test_sequences = [] 80 | for path in self.test_dir: 81 | self.test_sequences.append(self.load_test(path)) 82 | self.times_test = [t for sublist in self.test_sequences for t in sublist] 83 | 84 | def load_rgb(self, rgb_path): 85 | img = Image.open(rgb_path) 86 | img = img.resize((self.width, self.height), Image.ANTIALIAS) 87 | data = np.array(img.getdata()).reshape(self.height, self.width, 3) 88 | data = data[..., ::-1].astype(np.float32) / 255. 89 | return data 90 | 91 | def load_train(self, train_dir): 92 | timestamps = [] 93 | time2rbgpath = {} 94 | 95 | # load images 96 | image_list = sorted(glob(os.path.join(train_dir, "*.color.png"))) 97 | for i, file_path in enumerate(image_list): 98 | timestamp = i + self.last_timestamp 99 | timestamps.append(timestamp) 100 | time2rbgpath[timestamp] = file_path 101 | self.time2rgb[timestamp] = lambda _timestamp: self.load_rgb(time2rbgpath[_timestamp]) 102 | 103 | # load states 104 | pose_list = sorted(glob(os.path.join(train_dir, "*.pose.txt"))) 105 | for i, truth_path in enumerate(pose_list): 106 | timestamp = i + self.last_timestamp 107 | with open(truth_path, 'r') as f_pos: 108 | # get transformation matrix H 109 | H = [list(map(float, line.strip().split())) for line in f_pos] 110 | H = np.array(H) 111 | # get euler angles 112 | R = H[:-1, :-1] 113 | # angles = rotation_matrix_to_euler_angle(R) 114 | q = rotation_matrix_to_quaternion(R) 115 | # get position 116 | xyz = H[:-1, -1] 117 | new_pos = np.concatenate((xyz, q)) 118 | # store it into the dict 119 | self.time2pos[timestamp] = new_pos 120 | 121 | self.last_timestamp = timestamp + 1 122 | 123 | return timestamps 124 | 125 | def load_test(self, test_dir): 126 | timestamps = [] 127 | time2rbgpath = {} 128 | 129 | # load images 130 | image_list = sorted(glob(os.path.join(test_dir, "*.color.png"))) 131 | for i, file_path in enumerate(image_list): 132 | timestamp = i + self.last_timestamp 133 | timestamps.append(timestamp) 134 | time2rbgpath[timestamp] = file_path 135 | self.time2rgb_unknown[timestamp] = lambda _timestamp: self.load_rgb(time2rbgpath[_timestamp]) 136 | 137 | self.last_timestamp = timestamp + 1 138 | 139 | return timestamps 140 | 141 | def get_train_set(self, size=None, sample=True): 142 | """ Get random sample pairs of (inputs, labels) from training times sequence""" 143 | inputs, labels = [], [] 144 | 145 | if size is None: 146 | size = len(self.times_train) 147 | 148 | def pos_to_state(p): 149 | xyz = p[:3] 150 | q = p[3:] 151 | log_q = log_quaternion(q) 152 | return np.concatenate((xyz, log_q)) 153 | 154 | # print("Collecting training data...") 155 | if sample: 156 | for i in range(size): # tqdm(range(size)): 157 | rand_idx = np.random.randint(len(self.times_train)) 158 | t = self.times_train[rand_idx] 159 | img = self.time2rgb[t] 160 | pos = self.time2pos[t] 161 | lab = pos_to_state(pos) 162 | inputs.append(img.copy()) 163 | labels.append(lab.copy()) 164 | else: 165 | inputs = [self.time2rgb[t] for t in self.times_train] 166 | labels = [pos_to_state(self.time2pos[t]) for t in self.times_train] 167 | 168 | # normalize 169 | labels = np.array(labels) 170 | labels[:, :3] /= self.norm_xyz 171 | labels[:, 3:] /= self.norm_q 172 | 173 | return np.array(inputs), labels 174 | 175 | def get_train_seq(self, idx=None): 176 | if idx is None: 177 | idx = np.random.randint(len(self.train_sequences)) 178 | time_seq = self.train_sequences[idx].copy() 179 | seq = MobileRobot(time_seq, self.time2rgb, self.dim_control, self.time2pos, no_model=self.no_model) 180 | return seq 181 | 182 | def get_valid_seq(self, idx=None): 183 | if idx is None: 184 | idx = np.random.randint(len(self.valid_sequences)) 185 | time_seq = self.valid_sequences[idx].copy() 186 | seq = MobileRobot(time_seq, self.time2rgb, self.dim_control, self.time2pos, 187 | no_model=self.no_model, norm_xyz=self.norm_xyz, norm_q=self.norm_q) 188 | return seq 189 | 190 | def get_test_seq(self, idx=None): 191 | if idx is None: 192 | idx = np.random.randint(len(self.test_sequences)) 193 | time_seq = self.test_sequences[idx].copy() 194 | seq = MobileRobot(time_seq, self.time2rgb_unknown, self.dim_control) 195 | return seq 196 | 197 | def variance_norm(self): 198 | print('Estimating the translation and rotational variance...', flush=True) 199 | poses = [] 200 | for idx in tqdm(range(len(self.train_sequences))): 201 | seq = self.get_train_seq(idx) 202 | poses.append(seq.get_pos()) 203 | for i in range(seq.horizon-1): 204 | seq.step() 205 | p = seq.get_pos() 206 | poses.append(p) 207 | poses = np.array(poses)[:, :, 0] 208 | norm_xyz = np.std(poses[:, :3]) 209 | norm_q = np.std(poses[:, 3:]) 210 | print("Std of xyz: {}; std of log q: {}".format(norm_xyz, norm_q), flush=True) 211 | return norm_xyz, norm_q 212 | -------------------------------------------------------------------------------- /data_loader_robotcar.py: -------------------------------------------------------------------------------- 1 | # The data loader for "TUM style" of dataset 2 | 3 | 4 | import os 5 | import numpy as np 6 | 7 | import pickle 8 | import skimage.transform as transform 9 | 10 | from env import MobileRobot 11 | from utils import log_quaternion 12 | 13 | 14 | class DataLoader: 15 | def __init__(self, root_dir, img_height=96, img_width=96, dim_control=7, 16 | no_model=False, norm_factor=None): 17 | train_datasets = ['2014-06-26-08-53-56', '2014-06-26-09-24-58'] 18 | valid_datasets = ['2014-06-23-15-36-04', '2014-06-23-15-41-25'] 19 | 20 | self.train_dir = [os.path.join(root_dir, path, path + '.pkl') for path in train_datasets] 21 | self.valid_dir = [os.path.join(root_dir, path, path + '.pkl') for path in valid_datasets] 22 | self.vo_dir = [os.path.join(root_dir, path, path + '_vo.pkl') for path in valid_datasets] 23 | self.test_dir = None 24 | 25 | self.height = img_height 26 | self.width = img_width 27 | self.no_model = no_model 28 | self.dim_control = dim_control 29 | 30 | # require output elements 31 | self.time2rgb = {} 32 | self.time2pos = {} 33 | 34 | self.train_sequences = [] 35 | for path in self.train_dir: 36 | self.train_sequences.append(self.load_train(path)) 37 | self.times_train = [t for sublist in self.train_sequences for t in sublist] 38 | 39 | if norm_factor is None: 40 | self.norm_xyz, self.norm_q = self.variance_norm() 41 | else: 42 | self.norm_xyz, self.norm_q = norm_factor 43 | 44 | @staticmethod 45 | def pos_to_state(p, norm_xyz=1., norm_q=1.): 46 | xyz = p[:3] 47 | q = p[3:] 48 | log_q = log_quaternion(q) 49 | xyz = xyz / norm_xyz 50 | log_q = log_q / norm_q 51 | return np.concatenate((xyz, log_q)) 52 | 53 | def variance_norm(self): 54 | print('Estimating the translation and rotational variance...', flush=True) 55 | states = np.array([self.pos_to_state(self.time2pos[t]) for t in self.times_train]) 56 | print(states.shape) 57 | norm_xyz = np.std(states[:, :3]) 58 | norm_q = np.std(states[:, 3:]) 59 | print("Std of xyz: {}; std of log q: {}".format(norm_xyz, norm_q), flush=True) 60 | return norm_xyz, norm_q 61 | 62 | def load_vo(self, vo_path): 63 | timestamps, vo_poses = pickle.load(open(vo_path, 'rb')) 64 | for ts, pose in vo_poses.items(): 65 | pose[2] = 0. 66 | return timestamps, vo_poses 67 | 68 | def load_file(self, file_path): 69 | timestamps, images, poses = pickle.load(open(file_path, 'rb')) 70 | 71 | for ts, pose in poses.items(): 72 | pose[2] = 0. 73 | 74 | return timestamps, images, poses 75 | 76 | def load_train(self, train_dir): 77 | (timestamps, images, poses) = self.load_file(train_dir) 78 | 79 | for ts, img in images.items(): 80 | img = transform.resize(img, (self.height, self.width), mode='constant', anti_aliasing=True) 81 | images[ts] = img 82 | 83 | self.time2rgb.update(images) 84 | self.time2pos.update(poses) 85 | 86 | return timestamps 87 | 88 | def get_train_set(self, size=None, sample=True): 89 | """ Get random sample pairs of (inputs, labels) from training times sequence""" 90 | inputs, poses = [], [] 91 | 92 | if size is None: 93 | size = len(self.times_train) 94 | 95 | # print("Collecting training data...") 96 | if sample: 97 | for i in range(size): # tqdm(range(size)): 98 | rand_idx = np.random.randint(len(self.times_train)) 99 | t = self.times_train[rand_idx] 100 | img = self.time2rgb[t] 101 | pos = self.time2pos[t] 102 | inputs.append(img.copy()) 103 | poses.append(pos.copy()) 104 | else: 105 | inputs = [self.time2rgb[t] for t in self.times_train] 106 | poses = [self.time2pos[t] for t in self.times_train] 107 | 108 | labels = np.array([self.pos_to_state(p, self.norm_xyz, self.norm_q) for p in poses]) 109 | 110 | return np.array(inputs), np.array(labels) 111 | 112 | def get_train_seq(self, idx=None): 113 | if idx is None: 114 | idx = np.random.randint(len(self.train_dir)) 115 | time_seq, time2rgb, time2pos = self.load_file(self.train_dir[idx]) 116 | 117 | for ts, img in time2rgb.items(): 118 | img = transform.resize(img, (self.height, self.width), mode='constant', anti_aliasing=True) 119 | time2rgb[ts] = img 120 | 121 | seq = MobileRobot(time_seq, time2rgb, self.dim_control, time2pos, no_model=self.no_model) 122 | return seq 123 | 124 | def get_valid_seq(self, idx=None): 125 | if idx is None: 126 | idx = np.random.randint(len(self.valid_dir)) 127 | time_seq, time2rgb, time2pos = self.load_file(self.valid_dir[idx]) 128 | _, time2vopos = self.load_vo(self.vo_dir[idx]) 129 | # fix the nan problem due to vo dataset 130 | time2vopos[time_seq[-1]] = time2vopos[time_seq[-2]] = time2vopos[time_seq[-3]] 131 | 132 | for ts, img in time2rgb.items(): 133 | img = transform.resize(img, (self.height, self.width), mode='constant', anti_aliasing=True) 134 | time2rgb[ts] = img 135 | 136 | seq = MobileRobot(time_seq, time2rgb, self.dim_control, time2pos, time2vopos, 137 | no_model=self.no_model, norm_xyz=self.norm_xyz, norm_q=self.norm_q) 138 | return seq 139 | -------------------------------------------------------------------------------- /env.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | from utils import quaternion_multiply, quaternion_inverse, log_quaternion 6 | 7 | norm = np.linalg.norm 8 | 9 | 10 | class MobileRobot: 11 | """ 12 | Simulate a sequence as a mobile robot. 13 | """ 14 | def __init__(self, times, time2rgb, dim_control, time2pos=None, time2vopos=None, 15 | no_model=False, norm_xyz=1., norm_q=1.): 16 | self.timestamps = times 17 | self.time2rgb = time2rgb 18 | self.dim_control = dim_control 19 | self.time2pos = time2pos 20 | self.time2vopos = time2vopos if time2vopos is not None else self.time2pos 21 | self.no_model = no_model 22 | self.norm_xyz = norm_xyz 23 | self.norm_q = norm_q 24 | 25 | self.pos = None 26 | self.time = None 27 | self.time_idx = 0 28 | self.reset() 29 | 30 | def render(self, step=None): 31 | if step is None: 32 | step = self.time_idx 33 | time = self.timestamps[step] 34 | return self.time2rgb[time] 35 | 36 | def reset(self, time_idx=0): 37 | self.time_idx = time_idx 38 | self.time = self.timestamps[time_idx] 39 | if self.time2pos is not None: 40 | self.pos = self.time2pos[self.time] 41 | return self.render() 42 | 43 | def get_pos(self): 44 | if self.time2pos is None: 45 | raise Exception("The ground truth position is not available!") 46 | self.pos = self.time2pos[self.time] 47 | xyz = self.pos[:3] / self.norm_xyz 48 | q = self.pos[3:] 49 | log_q = log_quaternion(q) / self.norm_q 50 | return np.concatenate((xyz, log_q)).reshape(-1, 1) 51 | 52 | def step(self): 53 | """ 54 | Return the next step image, the relative action (vel + quaternion), and time diff. 55 | 56 | If self.no_model is not set, the real velocity & real relative rotation (as a quaternion) will be returned. 57 | 58 | Otherwise, a vector of action = [0, 0, 0, 1, 0, 0, 0] is returned, indicating no 59 | translational movements (velocity = action[:3] = 0), and no orientation change, i.e., a constant transition. 60 | """ 61 | new_time = self.timestamps[self.time_idx + 1] 62 | dt = new_time - self.time 63 | 64 | new_pos = self.time2pos[new_time] 65 | vel = (self.time2vopos[new_time][:3] - self.time2vopos[self.time][:3]) / dt 66 | qt = self.time2vopos[self.time][3:] 67 | qt_1 = self.time2vopos[new_time][3:] 68 | # for quaternion, time is not considered 69 | q = quaternion_multiply(qt_1, quaternion_inverse(qt)) 70 | # combine 71 | action = np.concatenate((vel, q), axis=0) 72 | 73 | if self.no_model: 74 | action = np.zeros_like(action) 75 | action[3] = 1. # quaternion constant model 76 | self.pos = new_pos 77 | self.time_idx += 1 78 | self.time = new_time 79 | return self.render(), action.reshape(-1, 1), dt 80 | 81 | @property 82 | def horizon(self): 83 | return len(self.timestamps) 84 | 85 | @property 86 | def dim_state(self): 87 | return len(self.get_pos()) 88 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import json 4 | import argparse 5 | import pickle 6 | 7 | import numpy as np 8 | 9 | from tqdm import tqdm 10 | 11 | from data_loader_7scenes import DataLoader as DataLoader7Scenes 12 | from data_loader_robotcar import DataLoader as DataLoaderRobotCar 13 | 14 | from model import Model 15 | from utils import finite_diff, predict, correct 16 | from eval_utils import VideoRecorder, plot_result, equidistant_generate, translational_error, rotational_error 17 | 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--model_dir', type=str, default=None, 22 | help='directory to load model from. If not provided, the most recent dir' 23 | ' in the default path will be used, raise an Error if failed.') 24 | parser.add_argument('--state_var', type=float, default=0.001, 25 | help='estimated state transition variance for the model used by extended Kalman filter, ' 26 | 'it is assumed to be independent and stay the same for all dimension') 27 | parser.add_argument('--visualize_dir', type=str, default='visualize', 28 | help='directory to save the visualization output + video (if apply)') 29 | parser.add_argument('--video', default=False, action='store_true', 30 | help='if a video should be recorded comparing real and estimated ' 31 | 'reconstructed images, together with localization results') 32 | parser.add_argument('--no_model', default=False, action='store_true', 33 | help='if the true state transition will be hidden to test localization based only on images.') 34 | parser.add_argument('--generation_mode', type=int, default=1, 35 | help='0: equidistant sample image generation; 1: Kalman filter estimation for localization; ' 36 | '2: perform both') 37 | parser.add_argument('--all', default=False, action='store_true', 38 | help='evaluate all the validation sequences and report the performance w/o a picture.') 39 | parser.add_argument('--seq_idx', type=int, default=0, 40 | help='evaluate the given indexed sequence in the text.') 41 | parser.add_argument('--deviation', type=float, default=0., 42 | help='deviation across all state dimension for robust test.') 43 | args = parser.parse_args() 44 | 45 | evaluate(args) 46 | 47 | 48 | def ekf_estimate(system, model, state_var, save_video_dir=None, deviation=0.): 49 | """ Estimate the system with extended Kalman filter 50 | 51 | :param system: the system to estimate, 52 | :param model: the model we build for mimic the system, 53 | :param state_var: estimated state variance 54 | :param save_video_dir: if given, step wise images will be saved and recorded as a video 55 | :param deviation: deviation added upon each dimension of the initial state 56 | :returns 57 | x_est: estimated states by extended Kalman filter based on the images, 58 | Sigma_est: estimated observation uncertainties 59 | x_true: the recorded true system states 60 | o_list: list of obtained observation 61 | """ 62 | 63 | system.reset() 64 | x = system.get_pos() 65 | # set initial deviation 66 | x = x + (np.ones_like(x) * deviation) 67 | model.set_norm(system.norm_xyz, system.norm_q) 68 | 69 | Sigma = np.eye(model.dim_state) * state_var 70 | Q = np.eye(model.dim_state) * state_var 71 | 72 | x_true = [] # true system states 73 | o_list = [] # observations 74 | x_est = [] # estimated states 75 | Sigma_est = [] # estimated covariances 76 | R_list = [] # uncertainty matrix for observations 77 | 78 | # init the display for video recording 79 | recorder = None 80 | if save_video_dir is not None: 81 | recorder = VideoRecorder(save_video_dir, norm_xyz=system.norm_xyz) 82 | 83 | # init observation 84 | img = system.render() 85 | 86 | print("Simulating the system with EKF...") 87 | for t in tqdm(range(system.horizon)): 88 | o, cov_o = model.wrap(img) 89 | 90 | # estimate observation model H 91 | o_func = lambda _x: model.emit(_x) 92 | H = finite_diff(o_func, x, dim_out=model.dim_observation) 93 | 94 | # get the observation variance 95 | R = cov_o 96 | 97 | # correct the state using observation and H 98 | x, Sigma = correct(x, Sigma, o, R, H, model.emit) 99 | 100 | # collect current step data 101 | R_list.append(np.diag(R)) 102 | x_est.append(x) 103 | Sigma_est.append(Sigma) 104 | o_list.append(o) 105 | x_sys = system.get_pos() 106 | x_true.append(x_sys) 107 | 108 | if recorder is not None: 109 | emit = model.generate(x) 110 | recorder.record(img, emit, x_sys, x) 111 | 112 | if t < system.horizon - 1: 113 | # evolve the system by one step 114 | 115 | img, u, dt = system.step() 116 | dt = float(dt) 117 | 118 | # estimate state transition matrix A, B 119 | x_func = lambda _x: model.step(_x, u, dt) 120 | u_func = lambda _u: model.step(x, _u, dt) 121 | A = finite_diff(x_func, x, dim_out=model.dim_state) 122 | B = finite_diff(u_func, u, dim_out=model.dim_state) 123 | 124 | # get the transition mapping 125 | sim_step = lambda _x, _u: model.step(_x, _u, dt) 126 | 127 | # predict the next state, let Qt be correlated with dt, since longer time exhibit more uncertainty 128 | x, Sigma = predict(x, Sigma, u, A, B, Q*(dt**2), sim_step) 129 | 130 | if recorder is not None: 131 | recorder.finalize() 132 | 133 | # show the sigma of z 134 | print("Aver. inferred variance is {}".format(np.mean(R_list))) 135 | 136 | # revoke the normalization, prepare for evaluation 137 | x_est = np.array(x_est) 138 | x_est[:, :3, :] *= system.norm_xyz 139 | x_est[:, 3:, :] *= system.norm_q 140 | Sigma_est = np.array(Sigma_est) 141 | x_true = np.array(x_true) 142 | x_true[:, :3, :] *= system.norm_xyz 143 | x_true[:, 3:, :] *= system.norm_q 144 | o_list = np.array(o_list) 145 | 146 | return x_est, Sigma_est, x_true, o_list 147 | 148 | 149 | def load_model_specs(model_dir): 150 | """Look for the model specifications in the given directory""" 151 | if model_dir is None: 152 | # get the latest modified dir in the default dir, raise an Error if there's none 153 | default_dir = "checkpoints" 154 | all_dirs = [os.path.join(default_dir, d) 155 | for d in os.listdir(default_dir) if os.path.isdir(os.path.join(default_dir, d))] 156 | if len(all_dirs) == 0: 157 | raise FileNotFoundError("Model directory is not provided & no directory in default path!") 158 | model_dir = max(all_dirs, key=os.path.getmtime) 159 | 160 | if model_dir.endswith('.ckpt'): 161 | mdir = os.path.dirname(model_dir) 162 | json_path = os.path.join(mdir, "model_specs.json") 163 | else: 164 | json_path = os.path.join(model_dir, "model_specs.json") 165 | with open(json_path, 'r') as infile: 166 | data = json.load(infile) 167 | train_args = argparse.Namespace(**data) 168 | return train_args, model_dir 169 | 170 | 171 | def evaluate(eval_args): 172 | train_args, model_dir = load_model_specs(eval_args.model_dir) 173 | if len(eval_args.visualize_dir) > 0: 174 | eval_args.visualize_dir = os.path.join(model_dir, eval_args.visualize_dir) 175 | if not os.path.isdir(eval_args.visualize_dir): 176 | os.makedirs(eval_args.visualize_dir) 177 | else: 178 | eval_args.visualize_dir = None 179 | 180 | print('Model specification loaded.') 181 | print(eval_args) 182 | print(train_args) 183 | 184 | model = Model(dim_observation=train_args.dim_observation, 185 | batch_size=1, 186 | image_size=train_args.dim_input, 187 | reconstruct_size=train_args.dim_reconstruct) 188 | model.restore(model_dir) 189 | 190 | # get the dataset / simulated system 191 | DataLoader = DataLoaderRobotCar if train_args.use_robotcar else DataLoader7Scenes 192 | dataloader = DataLoader(train_args.data_dir, no_model=eval_args.no_model, 193 | img_width=train_args.dim_input[1], img_height=train_args.dim_input[0], 194 | norm_factor=train_args.scaling_factor) 195 | 196 | if not eval_args.all: 197 | sequence = dataloader.get_valid_seq(eval_args.seq_idx) 198 | 199 | if eval_args.generation_mode == 0: 200 | equidistant_generate(sequence, model, 201 | save_path=eval_args.visualize_dir, 202 | use_robotcar=train_args.use_robotcar) 203 | else: 204 | video_dir = eval_args.visualize_dir if eval_args.video else None 205 | x_est, cov_est, x_true, ob = ekf_estimate(sequence, model, eval_args.state_var, 206 | save_video_dir=video_dir, 207 | deviation=eval_args.deviation) 208 | 209 | plot_result(x_est, x_true, 210 | save_dir=eval_args.visualize_dir, 211 | use_robotcar=train_args.use_robotcar) 212 | 213 | if eval_args.generation_mode > 1: 214 | equidistant_generate(sequence, model, 215 | model_traj=x_est, 216 | save_path=eval_args.visualize_dir, 217 | use_robotcar=train_args.use_robotcar) 218 | else: 219 | x_est_all, x_true_all, x_pred_all = [], [], [] 220 | for i in range(len(dataloader.valid_sequences)): 221 | sequence = dataloader.get_valid_seq(i) 222 | x_est, _, x_true, _ = ekf_estimate(sequence, model, eval_args.state_var, 223 | deviation=eval_args.deviation) 224 | x_est_all.extend(x_est) 225 | x_true_all.extend(x_true) 226 | 227 | x_est_all = np.array(x_est_all) 228 | x_true_all = np.array(x_true_all) 229 | 230 | mean_est_err_xyz, median_est_err_xyz = translational_error(x_true_all, x_est_all) 231 | mean_est_err_rot, median_est_err_rot = rotational_error(x_true_all, x_est_all) 232 | print("Estimator:") 233 | print("\t Mean Error: {} m, {} deg".format(mean_est_err_xyz, mean_est_err_rot)) 234 | print("\t Median Error: {} m, {} deg".format(median_est_err_xyz, median_est_err_rot)) 235 | 236 | with open(os.path.join(eval_args.visualize_dir, 'evaluations.pkl'), 'wb') as f: 237 | data = { 238 | 'true': x_true_all, 239 | 'estimate': x_est_all, 240 | 'prediction': x_pred_all 241 | } 242 | pickle.dump(data, f) 243 | 244 | 245 | if __name__ == '__main__': 246 | main() 247 | 248 | -------------------------------------------------------------------------------- /eval_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import matplotlib.animation as animation 7 | from mpl_toolkits.mplot3d import Axes3D 8 | from matplotlib import gridspec 9 | 10 | import skimage.transform as transform 11 | 12 | from tqdm import tqdm 13 | 14 | from utils import quaternion_distance, inv_log_quaternion 15 | 16 | 17 | def reverse_preprocess(img, to_int=False, shift=False): 18 | img = img[..., ::-1] 19 | if shift: 20 | img += 0.5 21 | if to_int: 22 | return img.astype(np.int) 23 | return img 24 | 25 | 26 | class VideoRecorder: 27 | def __init__(self, save_dir, dim_state=3, norm_xyz=1.): 28 | self.dim_state = dim_state 29 | self.save_dir = save_dir 30 | self.norm_xyz = norm_xyz 31 | self.traj_true = np.zeros((dim_state, 0)) 32 | self.traj_est = np.zeros((dim_state, 0)) 33 | self.counter = 1 34 | self.gs = gridspec.GridSpec(2, 3, width_ratios=[2, 2, 3]) 35 | 36 | def record(self, real, emit, xt, xe): 37 | real = reverse_preprocess(real) 38 | emit = reverse_preprocess(emit) 39 | x_true = xt.copy() 40 | x_true *= self.norm_xyz 41 | x_est = xe.copy() 42 | x_est *= self.norm_xyz 43 | self.traj_true = np.hstack((self.traj_true, x_true[:3, :])) 44 | self.traj_est = np.hstack((self.traj_est, x_est[:3, :])) 45 | 46 | fig = plt.figure(figsize=(12, 6)) 47 | plt.subplots_adjust(wspace=-0.2) 48 | 49 | ax = fig.add_subplot(self.gs[:2, :2]) 50 | ax.plot(self.traj_true[0, :], self.traj_true[1, :], '--b', label='true') 51 | ax.plot(self.traj_est[0, :], self.traj_est[1, :], '-r', label='estimate') 52 | ax.scatter(self.traj_true[0, -1], self.traj_true[1, -1], c='b', s=20) 53 | ax.scatter(self.traj_est[0, -1], self.traj_est[1, -1], c='r', s=20) 54 | ax.set_xlabel("x [m]") 55 | ax.set_ylabel("y [m]") 56 | ax.tick_params(pad=-0.5) 57 | ax.set_xlim([-200, 200]) 58 | ax.set_ylim([-250, 200]) 59 | ax.legend() 60 | 61 | ax = fig.add_subplot(self.gs[0, 2]) 62 | ax.imshow(real) 63 | ax.set_title("Real Image") 64 | ax.grid(False) 65 | plt.axis('off') 66 | 67 | ax = fig.add_subplot(self.gs[1, 2]) 68 | ax.imshow(emit) 69 | ax.set_title("Generated Image") 70 | ax.grid(False) 71 | plt.axis('off') 72 | 73 | path = os.path.join(self.save_dir, "{}.png".format(self.counter)) 74 | plt.savefig(path, bbox_inches='tight') 75 | plt.close(fig) 76 | self.counter += 1 77 | 78 | def finalize(self): 79 | 80 | def update_image(num, load_dir, ax, update): 81 | ax.clear() 82 | path = os.path.join(load_dir, "{}.png".format(num+1)) 83 | data = plt.imread(path) 84 | update(1) 85 | img = ax.imshow(data) 86 | plt.axis('off') 87 | return img, 88 | 89 | print("Generating video in {}...".format(self.save_dir)) 90 | with tqdm(total=self.counter) as tbar: 91 | writer = animation.writers['ffmpeg'](fps=10, metadata=dict(artist='Me'), bitrate=1800) 92 | f = plt.figure(figsize=(12, 5)) 93 | f.tight_layout() 94 | ax = f.add_subplot(1, 1, 1) 95 | line_ani = animation.FuncAnimation(f, update_image, self.counter - 1, 96 | fargs=(self.save_dir, ax, tbar.update), 97 | interval=50, blit=True) 98 | video_path = os.path.join(self.save_dir, "video.mp4") 99 | line_ani.save(video_path, writer=writer) 100 | plt.close(f) 101 | 102 | 103 | def translational_error(true, pred): 104 | error = np.sqrt(np.sum(np.square(true[:, :3, 0] - pred[:, :3, 0]), axis=1)) 105 | return np.mean(error), np.median(error) 106 | 107 | 108 | def rotational_error(true, pred): 109 | q_true = [inv_log_quaternion(true[i, 3:, 0]) for i in range(len(true))] 110 | q_pred = [inv_log_quaternion(pred[i, 3:, 0]) for i in range(len(pred))] 111 | error = [quaternion_distance(qt, qp) for qt, qp in zip(q_true, q_pred)] 112 | return np.mean(error) / np.pi * 180, np.median(error) / np.pi * 180 113 | 114 | 115 | def plot_result(x_est, x_true=None, save_dir=None, use_robotcar=False): 116 | """Plot the output trajectories, estimate the error if possible. """ 117 | if use_robotcar: 118 | x_est[:, 2:-1, 0] = 0. 119 | 120 | f, axarr = plt.subplots(2, sharex='all', figsize=(12, 12)) 121 | for p in range(2): 122 | start_idx = p * 3 123 | end_idx = min(start_idx + 3, x_est.shape[1]) 124 | for i, idx in enumerate(range(start_idx, end_idx)): 125 | axarr[p].plot(x_est[:, idx, 0], '-', label='x%d' % idx, color='C%d' % i) 126 | if x_true is not None: 127 | axarr[p].plot(x_true[:, idx, 0], '--', label='x%d true' % idx, color='C%d' % i) 128 | axarr[p].legend() 129 | 130 | if x_true is not None: 131 | mu_xyz, median_xyz = translational_error(x_true, x_est) 132 | mu_rot, median_rot = rotational_error(x_true, x_est) 133 | if not use_robotcar: 134 | axarr[0].set_title("Extended Kalman Filter Error: est = %.3fm, %.3fdeg" % (median_xyz, median_rot)) 135 | else: 136 | axarr[0].set_title("Extended Kalman Filter Error (mean): est = %.3fm, %.3fdeg" % (mu_xyz, mu_rot)) 137 | else: 138 | axarr[0].set_title("Extended Kalman Filter Result") 139 | 140 | if save_dir is not None: 141 | path = os.path.join(save_dir, "filter") 142 | plt.savefig(path) 143 | 144 | 145 | def equidistant_generate(system, model, num_sample=10, model_traj=None, save_path=None, use_robotcar=False): 146 | # get system trajectory 147 | horizon = system.horizon 148 | model_traj = model_traj[:horizon, ...] if model_traj is not None else None 149 | system.reset() 150 | traj = [system.get_pos()] 151 | for i in tqdm(range(horizon - 1)): 152 | system.step() 153 | traj.append(system.get_pos()) 154 | traj = np.asarray(traj) 155 | 156 | plotting_gap = (horizon - 1) // (num_sample - 1) 157 | plotting_steps = list(range(0, horizon, plotting_gap)) 158 | 159 | real_images = [] 160 | generated_images = [] 161 | 162 | for i in plotting_steps: 163 | if model_traj is not None: 164 | pos = model_traj[i].copy() 165 | pos[:3, :] /= system.norm_xyz 166 | pos[3:, :] /= system.norm_q 167 | else: 168 | pos = traj[i] 169 | gen = reverse_preprocess(model.generate(pos)) 170 | real = reverse_preprocess(system.render(i)) 171 | real = transform.resize(real, gen.shape, 172 | anti_aliasing=True, mode='constant') 173 | real_images.append(real) 174 | generated_images.append(gen) 175 | if save_path: 176 | fig, ax = plt.subplots(1, frameon=False) 177 | ax.imshow(real) 178 | path = os.path.join(save_path, "real_{}".format(i)) 179 | ax.grid(False) 180 | ax.xaxis.set_visible(False) 181 | ax.yaxis.set_visible(False) 182 | plt.axis('off') 183 | plt.savefig(path, bbox_inches='tight') 184 | plt.close(fig) 185 | 186 | fig, ax = plt.subplots(1, frameon=False) 187 | ax.imshow(gen) 188 | path = os.path.join(save_path, "generated_{}".format(i)) 189 | ax.grid(False) 190 | ax.xaxis.set_visible(False) 191 | ax.yaxis.set_visible(False) 192 | plt.axis('off') 193 | plt.savefig(path, bbox_inches='tight') 194 | plt.close(fig) 195 | 196 | fig = plt.figure(figsize=(6, 6)) 197 | if not use_robotcar: 198 | ax = fig.gca(projection='3d') 199 | traj[:, :3, :] *= system.norm_xyz 200 | traj[:, 3:, :] *= system.norm_q 201 | ax.plot(traj[:, 0, 0], traj[:, 1, 0], traj[:, 2, 0], '--b') 202 | if model_traj is not None: 203 | ax.plot(model_traj[:, 0, 0], model_traj[:, 1, 0], model_traj[:, 2, 0], '-r') 204 | for i in plotting_steps[:-1]: 205 | ax.scatter(traj[i, 0, 0], traj[i, 1, 0], traj[i, 2, 0], c='b', s=20) 206 | if model_traj is not None: 207 | ax.scatter(model_traj[i, 0, 0], model_traj[i, 1, 0], model_traj[i, 2, 0], c='r', s=20) 208 | ax.set_xlabel('x[m]') 209 | ax.set_ylabel('y[m]') 210 | ax.set_zlabel('z[m]') 211 | ax.tick_params(pad=-0.5) 212 | else: 213 | ax = fig.gca() 214 | traj[:, :3, 0] *= system.norm_xyz 215 | traj[:, 3:, 0] *= system.norm_q 216 | ax.plot(traj[:, 0, 0], traj[:, 1, 0], '--b') 217 | if model_traj is not None: 218 | ax.plot(model_traj[:, 0, 0], model_traj[:, 1, 0], '-r') 219 | for i in plotting_steps[:-1]: 220 | ax.scatter(traj[i, 0, 0], traj[i, 1, 0], c='b', s=20) 221 | if model_traj is not None: 222 | ax.scatter(model_traj[i, 0, 0], model_traj[i, 1, 0], c='r', s=20) 223 | 224 | ax.set_xlabel('x[m]') 225 | ax.set_ylabel('y[m]') 226 | ax.tick_params(pad=-0.5) 227 | ax.set_xlim([-200, 200]) 228 | ax.set_ylim([-250, 200]) 229 | 230 | if save_path is not None: 231 | path = os.path.join(save_path, "trajectory") 232 | plt.savefig(path, bbox_inches='tight') 233 | plt.close(fig) 234 | 235 | # joint figure 236 | if save_path: 237 | shape = model.generate(traj[0]).shape 238 | join_figure = np.zeros((shape[0] * 2, shape[1] * num_sample, 3)) 239 | for i, (real, gen) in enumerate(zip(real_images, generated_images)): 240 | s = i * shape[1] 241 | e = (i + 1) * shape[1] 242 | join_figure[0:shape[0], s:e, :] = real 243 | join_figure[shape[0]:2 * shape[0], s:e, :] = gen 244 | 245 | fig, ax = plt.subplots(1, frameon=False) 246 | ax.imshow(join_figure) 247 | path = os.path.join(save_path, "joint") 248 | ax.grid(False) 249 | ax.xaxis.set_visible(False) 250 | ax.yaxis.set_visible(False) 251 | plt.axis('off') 252 | plt.savefig(path, bbox_inches='tight') 253 | plt.close(fig) 254 | 255 | 256 | -------------------------------------------------------------------------------- /gen_map.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from ops import build_fnn, dcgan_decode, dcgan_encode 7 | from ops import loss_rec_normal, loss_reg_normal, loss_kl_normal 8 | 9 | 10 | class GenMap(object): 11 | def __init__(self, arch_encoder_attribute, image_size, dim_features=512, num_conv_layer=4, 12 | transfer_fct=tf.nn.relu, learning_rate=1e-3, batch_size=100, rec_log_var=0., 13 | training=False, reconstruct_size=(64, 64, 3)): 14 | self.transfer_fct = transfer_fct 15 | self.learning_rate = learning_rate 16 | self.batch_size = batch_size 17 | 18 | self.dim_attribute = arch_encoder_attribute[0] 19 | self.latent_dim = arch_encoder_attribute[-1] 20 | self.reconstruction_dim = np.prod(reconstruct_size) 21 | 22 | self.x = tf.placeholder(tf.float32, [None, self.dim_attribute], name="pose_input") 23 | y_shape = [None] 24 | y_shape.extend(image_size) 25 | self.y = tf.placeholder(tf.float32, y_shape, name="image_input") 26 | self.y_small = tf.placeholder(tf.float32, [None, self.reconstruction_dim], name="image_reconstruct") 27 | 28 | arch_encoder_x = arch_encoder_attribute.copy() 29 | arch_encoder_x[-1] = arch_encoder_x[-1] * 2 30 | z_x_all = build_fnn(self.x, arch_encoder_x, transfer_fct, scope="encoder_x") 31 | self.z_x_mean, z_x_log_sigma_sq = tf.split(z_x_all, num_or_size_splits=2, axis=1) 32 | 33 | self.z_x_log_sigma_sq = tf.zeros_like(z_x_log_sigma_sq) # set the z_x_sigma to be 1 34 | 35 | # reparameterization 36 | epsilon_x = tf.random_normal((self.batch_size, self.latent_dim)) 37 | self.z_x = self.z_x_mean + (tf.sqrt(tf.exp(self.z_x_log_sigma_sq)) * epsilon_x) 38 | 39 | self.z_y_mean, self.z_y_log_sigma_sq = dcgan_encode(final_channel=dim_features, 40 | num_layer=num_conv_layer, 41 | inputs=self.y, 42 | output_size=self.latent_dim, 43 | activation=transfer_fct, 44 | scope="encoder_y", 45 | training=training) 46 | 47 | # reparameterization 48 | epsilon_y = tf.random_normal((self.batch_size, self.latent_dim)) 49 | self.z_y = self.z_y_mean + (tf.sqrt(tf.exp(self.z_y_log_sigma_sq)) * epsilon_y) 50 | 51 | # decode 52 | y_rec_pre = dcgan_decode(inputs=self.z_y, 53 | output_size=reconstruct_size, 54 | num_layer=num_conv_layer, 55 | channel_start=dim_features, 56 | activation=transfer_fct, 57 | scope="decoder_y", 58 | training=training) 59 | 60 | self.alpha = tf.get_variable("image_variance_log", initializer=rec_log_var * tf.ones((1,)), trainable=False) 61 | 62 | self.y_rec = (1 + 2e-6) * tf.sigmoid(y_rec_pre) - 1e-6 # to avoid numeric issues 63 | 64 | self._minimize_loss(self.y_small, self.y_rec, self.z_x_mean, self.z_x_log_sigma_sq, 65 | self.z_y_mean, self.z_y_log_sigma_sq, learning_rate, self.alpha) 66 | 67 | self.saver = tf.train.Saver(max_to_keep=1) 68 | config = tf.ConfigProto() 69 | config.gpu_options.allow_growth = True 70 | self.sess = tf.Session(config=config) 71 | self.sess.run(tf.global_variables_initializer()) 72 | 73 | def _minimize_loss(self, val, val_rec, z_s_mu, z_s_log_var, z_val_mu, z_val_log_var, learning_rate, val_log_var): 74 | """Minimize the loss of VAE, which contains kl-divergence term and reconstruction term.""" 75 | loss_rec = loss_rec_normal(val, val_rec, log_var=val_log_var) 76 | loss_kl = loss_kl_normal(z_val_mu, z_val_log_var, z_s_mu, z_s_log_var) 77 | self.cost_1 = tf.reduce_mean(loss_rec / self.reconstruction_dim) 78 | self.cost_2 = tf.reduce_mean(loss_kl / self.latent_dim) 79 | self.cost = self.cost_1 + self.cost_2 80 | self.train_op = tf.train.AdamOptimizer(learning_rate).minimize(self.cost) 81 | 82 | def batch_train(self, x, y, y_rec): 83 | _, cost, cost1, cost2 = self.sess.run((self.train_op, self.cost, self.cost_1, self.cost_2), 84 | feed_dict={self.x: x, self.y: y, self.y_small: y_rec}) 85 | return cost, cost1, cost2 86 | 87 | def infer_by_x(self, x, sample=False): 88 | """Infer the latent variable z given input x""" 89 | if not sample: 90 | return self.sess.run(self.z_x_mean, feed_dict={self.x: x}) 91 | else: 92 | return self.sess.run(self.z_x, feed_dict={self.x: x}) 93 | 94 | def cov_infer_by_y(self, y): 95 | log_var = self.sess.run(self.z_y_log_sigma_sq, feed_dict={self.y: y}) 96 | cov = np.diag(np.squeeze(np.exp(log_var))) 97 | return cov 98 | 99 | def cov_infer_by_x(self, x): 100 | log_var = self.sess.run(self.z_x_log_sigma_sq, feed_dict={self.x: x}) 101 | cov = np.diag(np.squeeze(np.exp(log_var))) 102 | return cov 103 | 104 | def sigma_infer_by_y(self, y): 105 | log_var = self.sess.run(self.z_y_log_sigma_sq, feed_dict={self.y: y}) 106 | return np.sqrt(np.exp(log_var)) 107 | 108 | def sigma_infer_by_x(self, x): 109 | log_var = self.sess.run(self.z_x_log_sigma_sq, feed_dict={self.x: x}) 110 | return np.sqrt(np.exp(log_var)) 111 | 112 | def infer_by_y(self, y, sample=False): 113 | """Infer the latent variable z given label y""" 114 | if not sample: 115 | return self.sess.run(self.z_y_mean, feed_dict={self.y: y}) 116 | else: 117 | return self.sess.run(self.z_y, feed_dict={self.y: y}) 118 | 119 | def construct_y_from_x(self, x): 120 | z = self.sess.run(self.z_x_mean, feed_dict={self.x: x}) 121 | y = self.sess.run(self.y_rec, feed_dict={self.z_y: z}) 122 | return y 123 | 124 | def reconstruct(self, y): 125 | z = self.sess.run(self.z_y_mean, feed_dict={self.y: y}) 126 | y = self.sess.run(self.y_rec, feed_dict={self.z_y: z}) 127 | return y 128 | 129 | def save(self, save_path): 130 | self.saver.save(self.sess, save_path) 131 | print("Model saved in path: {}".format(save_path), flush=True) 132 | 133 | def restore(self, path): 134 | # reinitialize adam 135 | # self.sess.run([v.initializer for v in self.adam_params]) 136 | if not path.endswith(".ckpt"): 137 | path = tf.train.latest_checkpoint(path) 138 | self.saver.restore(self.sess, path) 139 | print("Model restored from path: {}".format(path)) 140 | 141 | def destruct(self): 142 | tf.reset_default_graph() 143 | self.sess.close() 144 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import numpy as np 4 | import skimage.transform as transform 5 | 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | 10 | from tqdm import tqdm 11 | 12 | from utils import quaternion_multiply, inv_log_quaternion, log_quaternion 13 | 14 | from gen_map import GenMap 15 | 16 | plt.style.use("seaborn") 17 | 18 | 19 | class Model: 20 | def __init__(self, batch_size=100, dim_state=6, dim_control=7, dim_observation=2, 21 | image_size=(96, 96, 3), reconstruct_size=(64, 64, 3), reconstruction_accuracy=0., 22 | training=False, learning_rate=1e-3): 23 | self.dim_state = dim_state 24 | self.dim_control = dim_control 25 | self.dim_observation = dim_observation 26 | self.batch_size = batch_size 27 | self.image_size_origin = tuple(image_size) 28 | self.reconstruct_size_origin = tuple(reconstruct_size) 29 | self.reconstruct_size = np.prod(reconstruct_size) 30 | self.image_size = np.prod(image_size) 31 | self.norm_xyz = self.norm_q = 1. 32 | 33 | self.network = GenMap(arch_encoder_attribute=[self.dim_state, 512, self.dim_observation], 34 | image_size=image_size, 35 | dim_features=512, 36 | num_conv_layer=4, 37 | batch_size=batch_size, 38 | rec_log_var=reconstruction_accuracy, 39 | training=training, 40 | reconstruct_size=self.reconstruct_size_origin, 41 | learning_rate=learning_rate) 42 | 43 | def set_norm(self, norm_xyz, norm_q): 44 | self.norm_xyz = norm_xyz 45 | self.norm_q = norm_q 46 | 47 | def step(self, x, u, dt): 48 | """Apply quaternion for step""" 49 | xyz = (x[:3, 0] * self.norm_xyz + u[:3, 0] * dt) / self.norm_xyz 50 | qt = quaternion_multiply(u[3:, 0], inv_log_quaternion(x[3:, 0] * self.norm_q)) 51 | x1 = np.concatenate((xyz, log_quaternion(qt) / self.norm_q), axis=0).reshape(-1, 1) 52 | return x1 53 | 54 | def emit(self, state): 55 | state = state.reshape(-1, self.dim_state) 56 | observation = self.network.infer_by_x(state) 57 | observation = observation.reshape(self.dim_observation, -1) 58 | return observation 59 | 60 | def generate(self, state): 61 | state = state.reshape(-1, self.dim_state) 62 | img = self.network.construct_y_from_x(state) 63 | img = img.reshape(self.reconstruct_size_origin) 64 | img = np.clip(img, 0, 1.) 65 | return img 66 | 67 | def reconstruct(self, img): 68 | img = img.reshape((-1,) + self.image_size_origin) 69 | rec = self.network.reconstruct(img) 70 | rec = rec.reshape(self.reconstruct_size_origin) 71 | rec = np.clip(rec, 0, 1.) 72 | return rec 73 | 74 | def wrap(self, img): 75 | img = img.reshape((-1,) + self.image_size_origin) # for CNN 76 | observation = self.network.infer_by_y(img) 77 | cov_observation = self.network.cov_infer_by_y(img) 78 | observation = observation.reshape(self.dim_observation, -1) 79 | return observation, cov_observation 80 | 81 | def train(self, data_loader, model_dir, save_every=5, epoch=100): 82 | 83 | def update(loss_list, loss_list1, loss_list2, ep): 84 | """ 85 | Plot the intemediate training loss curve, save it to the model dir, and save the checkpoint. 86 | :param loss_list: overall loss 87 | :param loss_list1: reconstruction loss 88 | :param loss_list2: KL-divergence loss 89 | :param ep: current epoch index 90 | :return: None 91 | """ 92 | f = plt.figure() 93 | plt.plot(loss_list, label='overall') 94 | plt.plot(loss_list1, label='reconstruction') 95 | plt.plot(loss_list2, label='kl_div') 96 | plt.title("training curve") 97 | plt.xlabel("number of batch") 98 | plt.ylabel("-ELBO") 99 | p = int(0.05 * len(loss_list)) 100 | _max = np.max([loss_list[p], loss_list1[p], loss_list2[p]]) 101 | _min = np.min([loss_list, loss_list1, loss_list2]) 102 | _range = _max - _min 103 | print("Error: {}, Reconstruction: {}, KL-div: {}".format(np.mean(loss_list[-100:]), 104 | np.mean(loss_list1[-100:]), 105 | np.mean(loss_list2[-100:]))) 106 | plt.ylim([_min - 0.03 * _range, _max + _range]) 107 | plt.legend() 108 | path = os.path.join(model_dir, "training_curve.png") 109 | plt.savefig(path) 110 | checkpoint_path = os.path.join(model_dir, "model_{}.ckpt".format(ep)) 111 | self.save(checkpoint_path) 112 | plt.close(f) 113 | 114 | def get_epoch(dataset): 115 | train_inputs, train_targets = dataset.get_train_set(sample=False) 116 | reconstructs = [] 117 | for im in train_inputs: 118 | reconstructs.append(transform.resize(im, self.reconstruct_size_origin, 119 | anti_aliasing=True, mode='constant').reshape(-1)) # * 255.) 120 | reconstructs = np.array(reconstructs) 121 | return train_inputs, train_targets, reconstructs 122 | 123 | loss_all, loss_all1, loss_all2 = [], [], [] 124 | input_len = len(data_loader.times_train) 125 | 126 | inputs, targets, reconstructs = get_epoch(data_loader) 127 | 128 | print("Training the observer...") 129 | with tqdm(total=(epoch * input_len)) as tbar: 130 | for e in range(epoch): 131 | loss_list, loss_list1, loss_list2 = [], [], [] 132 | num_iters = int(input_len / self.batch_size) 133 | if num_iters * self.batch_size < input_len: 134 | num_iters += 1 135 | 136 | for i in range(num_iters): 137 | idx = np.random.choice(input_len, size=self.batch_size) 138 | batch_inputs = inputs[idx, ...] 139 | batch_targets = targets[idx, ...] 140 | batch_reconstructs = reconstructs[idx, ...] 141 | 142 | loss, loss_1, loss_2 = self.network.batch_train(batch_targets, batch_inputs, batch_reconstructs) 143 | loss_list.append(loss) 144 | loss_list1.append(loss_1) 145 | loss_list2.append(loss_2) 146 | 147 | final_mse = np.mean(loss_list) 148 | tbar.set_postfix(err="%.3f" % final_mse) 149 | tbar.update(input_len) 150 | loss_all.extend(loss_list) 151 | loss_all1.extend(loss_list1) 152 | loss_all2.extend(loss_list2) 153 | if (e+1) % save_every == 0: 154 | update(loss_all, loss_all1, loss_all2, e + 1) 155 | # save the final model 156 | checkpoint_path = os.path.join(model_dir, 'model.ckpt') 157 | self.save(checkpoint_path) 158 | return loss_all 159 | 160 | def save(self, path): 161 | self.network.save(path) 162 | 163 | def restore(self, path): 164 | self.network.restore(path) 165 | 166 | def close(self): 167 | self.network.destruct() 168 | -------------------------------------------------------------------------------- /ops.py: -------------------------------------------------------------------------------- 1 | # Tensorflow related operations 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | def build_fnn(x, units_list, activation=tf.sigmoid, scope="fnn", reuse=False): 8 | """Construct a fnn under the scope, if the scope already exists, reuse needs to be set as true.""" 9 | num_layers = len(units_list) 10 | h = x 11 | with tf.variable_scope(scope, reuse=reuse): 12 | for i in range(num_layers - 1): 13 | a = tf.layers.dense(h, units_list[i+1], activation=activation, 14 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 15 | bias_initializer=tf.zeros_initializer()) 16 | if i < num_layers - 2: 17 | h = activation(a) 18 | else: 19 | h = a 20 | return h 21 | 22 | 23 | def loss_rec_bernoulli(ori, rec): 24 | return -tf.reduce_sum(ori * tf.log_sigmoid(rec) + (1 - ori) * tf.log_sigmoid(-rec), reduction_indices=1) 25 | 26 | 27 | def loss_rec_categorical(ori, rec): 28 | """Categorial reconstrution """ 29 | return -tf.reduce_sum(ori * tf.nn.log_softmax(rec), reduction_indices=1) 30 | 31 | 32 | def loss_rec_normal(ori, mean, log_var=0.): 33 | """Negative log-likelihood of a normal distribution""" 34 | dim = ori.shape[1].value 35 | return 0.5 * (dim * np.log(np.pi * 2) + tf.reduce_sum(log_var + tf.square(ori - mean) / tf.exp(log_var), 36 | reduction_indices=1)) 37 | 38 | 39 | def loss_reg_normal(mean, log_var): 40 | """(Positive) KL div from given distribution to standard normal distribution""" 41 | return -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), reduction_indices=1) 42 | 43 | 44 | def loss_kl_normal(mean1, log_var1, mean2, log_var2): 45 | """(Positive) KL divergence from 1 to 2""" 46 | return 0.5 * tf.reduce_sum(-1 - log_var1 + log_var2 47 | + tf.square(mean1 - mean2) / tf.exp(log_var2) 48 | + tf.exp(log_var1 - log_var2), 49 | reduction_indices=1) 50 | 51 | # Conv / FSConv 52 | 53 | 54 | def dcgan_decode(inputs, output_size, num_layer=3, channel_start=512, activation=tf.nn.relu, 55 | scope="deconv", reuse=False, training=False): 56 | """Image decoder, taken ideas from the DC-GAN. 57 | 58 | :param inputs: the input tensor, 2D 59 | :param output_size: the size of the desired generated picture, a triple, e.g. (64, 64, 3) 60 | :param num_layer: number of Conv layers 61 | :param channel_start: number of initial features in the network 62 | :param activation: the activation function used 63 | :param scope: the tensorflow scope 64 | :param reuse: set to true if this scope of params were created somewhere before, and need to be reused. 65 | :param training: set to true if in training mode, for batch normalization only 66 | :returns 67 | h: the output tensor, 4D 68 | """ 69 | y_w, y_h, y_c = output_size 70 | original_h = y_h // (2 ** num_layer) 71 | original_w = y_w // (2 ** num_layer) 72 | with tf.variable_scope(scope, reuse=reuse): 73 | # reshape layer --> cubic layer 74 | net = tf.layers.dense(inputs, channel_start * original_h * original_w, activation=activation, 75 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 76 | bias_initializer=tf.zeros_initializer()) 77 | net = tf.reshape(net, [-1, original_h, original_w, channel_start]) 78 | 79 | for i in range(num_layer - 1): 80 | num_filter = channel_start // (2 ** (i+1)) 81 | net = tf.layers.conv2d_transpose(net, filters=num_filter, kernel_size=[5, 5], 82 | strides=2, padding='same') 83 | # net = tf.layers.batch_normalization(net, training=training) 84 | net = activation(net) 85 | 86 | net = tf.layers.conv2d_transpose(net, filters=y_c, kernel_size=[5, 5], strides=(2, 2), 87 | padding='same') 88 | 89 | h = tf.reshape(net, [-1, np.prod(output_size)]) 90 | 91 | return h 92 | 93 | 94 | def dcgan_encode(inputs, output_size, num_layer=3, final_channel=512, activation=tf.nn.relu, 95 | scope="encode", reuse=False, training=False): 96 | """Image encoder, taken ideas from the DC-GAN. 97 | 98 | :param inputs: the input tensor, should be 4D. 99 | :param output_size: the output size of the encoder, one dimensional, identical for mean and log variance 100 | :param num_layer: how many conv layers are stacked, excluding the final linear layer 101 | :param final_channel: number of filters for the final conv layer, also defines the sizes for previous layers 102 | :param activation: activation function in use for conv layers, excluding the final linear layer 103 | :param scope: scope name for tensorflow graph 104 | :param reuse: if reuse parameters, set to true if this scope was created somewhere before and needs to be reused 105 | :param training: if is in the training mode, for batch norm 106 | :returns 107 | mean: a tensor of size [inputs.shape[0], output_size] 108 | log_variance: a tensor of size [inputs.shape[0], output_size] 109 | """ 110 | with tf.variable_scope(scope, reuse=reuse): 111 | 112 | net = inputs 113 | 114 | for i in range(num_layer): 115 | scale = 2**(num_layer - i - 1) # e.g. 3-layer: 2**2, 2**1, 2**0 116 | net = tf.layers.conv2d(net, filters=final_channel / scale, kernel_size=[5, 5], 117 | strides=2, padding='same') 118 | # net = tf.layers.batch_normalization(net, training=training) 119 | net = activation(net) 120 | 121 | net = tf.reduce_mean(net, [1, 2], keepdims=False) 122 | 123 | mean = tf.layers.dense(inputs=net, units=output_size, 124 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 125 | bias_initializer=tf.zeros_initializer()) 126 | log_variance = tf.layers.dense(inputs=net, units=output_size, 127 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 128 | bias_initializer=tf.zeros_initializer()) 129 | return mean, log_variance -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | ## Introduction 3 | This is the code base for our work of Generative Map at . 4 | It is an effort to combine generative model (in particular, [Variational Auto-Encoders](https://arxiv.org/abs/1312.6114)) and the classic Kalman filter for generation with localization. 5 | For more details, please refer to [our paper on arXiv](https://arxiv.org/abs/1902.11124). 6 | 7 | ## Dependencies 8 | 1. Python3, and the following packages 9 | ``` 10 | pip install numpy scikit-image tqdm tensorflow==1.13.1 11 | ``` 12 | 13 | 2. Datasets: 14 | * 7-Scenes: Download from [their website](https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/). 15 | * RobotCar: Register & Download from [here](https://robotcar-dataset.robots.ox.ac.uk/). Remark: we adopt the [preprocessing toolkit from the RobotCar authors](https://github.com/ori-mrg/robotcar-dataset-sdk) and generate data first in `.pkl` format, then load it with `data_loader_robotcar.py`. 16 | 17 | ## Usage 18 | For training, one example use can be found below. The `PATH_TO_DATA_DIR` points to the data folder, for example in 7-Scenes it is the folder where you can find `TrainSplit.txt`, `TestSplit.txt`, and folders of `seq-xx`. 19 | ``` 20 | python train.py --data_dir 21 | ``` 22 | 23 | For evaluation, one example of estimation with generation using a constant model is 24 | ``` 25 | python eval.py --model_dir --generation_mode 2 --no_model 26 | ``` 27 | 28 | ## Detail Usage 29 | ``` 30 | usage: train.py [-h] [--training_size TRAINING_SIZE] 31 | [--dim_observation DIM_OBSERVATION] [--batch_size BATCH_SIZE] 32 | [--num_epochs NUM_EPOCHS] [--save_every SAVE_EVERY] 33 | [--model_dir MODEL_DIR] [--data_dir DATA_DIR] 34 | [--reconstruct_accuracy RECONSTRUCT_ACCURACY] 35 | [--dim_input DIM_INPUT DIM_INPUT DIM_INPUT] 36 | [--dim_reconstruct DIM_RECONSTRUCT DIM_RECONSTRUCT DIM_RECONSTRUCT] 37 | [--learning_rate LEARNING_RATE] [--use_robotcar] 38 | 39 | optional arguments: 40 | -h, --help show this help message and exit 41 | --training_size TRAINING_SIZE 42 | size of the training set, sampled from the given 43 | sequences 44 | --dim_observation DIM_OBSERVATION 45 | dimension of the latent representation ("observation") 46 | --batch_size BATCH_SIZE 47 | mini-batch size for training 48 | --num_epochs NUM_EPOCHS 49 | number of epochs for training 50 | --save_every SAVE_EVERY 51 | intermediate saving frequency by epochs 52 | --model_dir MODEL_DIR 53 | directory to save current model to 54 | --data_dir DATA_DIR directory to load the dataset 55 | --reconstruct_accuracy RECONSTRUCT_ACCURACY 56 | how accurate should the reconstruction be, float, 57 | negative and positive, the smaller the more accurate 58 | --dim_input DIM_INPUT DIM_INPUT DIM_INPUT 59 | the height, width, and number of channel for input 60 | image, separated by space 61 | --dim_reconstruct DIM_RECONSTRUCT DIM_RECONSTRUCT DIM_RECONSTRUCT 62 | the height, width, and number of channel for 63 | reconstructed image, separated by space 64 | --learning_rate LEARNING_RATE 65 | learning rate 66 | --use_robotcar if the robotcar data loader will be used, since it is 67 | stored in a different format. 68 | ``` 69 | 70 | ``` 71 | usage: eval.py [-h] [--model_dir MODEL_DIR] [--state_var STATE_VAR] 72 | [--visualize_dir VISUALIZE_DIR] [--video] [--no_model] 73 | [--generation_mode GENERATION_MODE] [--all] [--seq_idx SEQ_IDX] 74 | [--deviation DEVIATION] 75 | 76 | optional arguments: 77 | -h, --help show this help message and exit 78 | --model_dir MODEL_DIR 79 | directory to load model from. If not provided, the 80 | most recent dir in the default path will be used, 81 | raise an Error if failed. 82 | --state_var STATE_VAR 83 | estimated state transition variance for the model used 84 | by extended Kalman filter, it is assumed to be 85 | independent and stay the same for all dimension 86 | --visualize_dir VISUALIZE_DIR 87 | directory to save the visualization output + video (if 88 | apply) 89 | --video if a video should be recorded comparing real and 90 | estimated reconstructed images, together with 91 | localization results 92 | --no_model if the true state transition will be hidden to test 93 | localization based only on images. 94 | --generation_mode GENERATION_MODE 95 | 0: equidistant sample image generation; 1: Kalman 96 | filter estimation for localization; 2: perform both 97 | --all evaluate all the validation sequences and report the 98 | performance w/o a picture. 99 | --seq_idx SEQ_IDX evaluate the given indexed sequence in the text. 100 | --deviation DEVIATION 101 | deviation across all state dimension for robust test. 102 | ``` 103 | 104 | 105 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import time 4 | import json 5 | import argparse 6 | 7 | from data_loader_7scenes import DataLoader as DataLoader7Scenes 8 | from data_loader_robotcar import DataLoader as DataLoaderRobotCar 9 | 10 | from model import Model 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--training_size', type=int, default=3000, 16 | help='size of the training set, sampled from the given sequences') 17 | parser.add_argument('--dim_observation', type=int, default=128, 18 | help='dimension of the latent representation ("observation")') 19 | parser.add_argument('--batch_size', type=int, default=64, 20 | help='mini-batch size for training') 21 | parser.add_argument('--num_epochs', type=int, default=5000, 22 | help='number of epochs for training') 23 | parser.add_argument('--save_every', type=int, default=200, 24 | help='intermediate saving frequency by epochs') 25 | parser.add_argument('--model_dir', type=str, default="checkpoints/%s" % time.strftime("%Y-%m-%d_%H-%M-%S"), 26 | help='directory to save current model to') 27 | parser.add_argument('--data_dir', type=str, default='data/7scenes/chess', 28 | help='directory to load the dataset') 29 | parser.add_argument('--reconstruct_accuracy', type=float, default=-3., 30 | help='how accurate should the reconstruction be, float, negative and positive, ' 31 | 'the smaller the more accurate') 32 | parser.add_argument('--dim_input', nargs=3, default=[96, 96, 3], 33 | help='the height, width, and number of channel for input image, separated by space') 34 | parser.add_argument('--dim_reconstruct', nargs=3, default=[64, 64, 3], 35 | help='the height, width, and number of channel for reconstructed image, separated by space') 36 | parser.add_argument('--learning_rate', type=float, default=0.0001, 37 | help='learning rate') 38 | parser.add_argument('--use_robotcar', default=False, action='store_true', 39 | help='if the robotcar data loader will be used, since it is stored in a different format.') 40 | # # TODO reload&train is not implemented yet, as it was never really needed 41 | # parser.add_argument('--load_model', type=str, default=None, 42 | # help='Reload a model checkpoint and restore training.') 43 | args = parser.parse_args() 44 | 45 | # convert the list inputs into require format 46 | args.dim_input = tuple(map(int, args.dim_input)) 47 | args.dim_reconstruct = tuple(map(int, args.dim_reconstruct)) 48 | 49 | train(args) 50 | 51 | 52 | def train(args): 53 | if not os.path.isdir(args.model_dir): 54 | os.makedirs(args.model_dir) 55 | json_path = os.path.join(args.model_dir, "model_specs.json") 56 | 57 | # get the dataset / simulated system 58 | DataLoader = DataLoaderRobotCar if args.use_robotcar else DataLoader7Scenes 59 | dataloader = DataLoader(args.data_dir, img_width=args.dim_input[1], img_height=args.dim_input[0]) 60 | 61 | # save the training flags 62 | args_dict = vars(args) 63 | args_dict['scaling_factor'] = [dataloader.norm_xyz, dataloader.norm_q] 64 | with open(json_path, 'w') as outfile: 65 | json.dump(args_dict, outfile, indent=4) 66 | 67 | # setup model 68 | model = Model(dim_observation=args.dim_observation, 69 | batch_size=args.batch_size, 70 | reconstruction_accuracy=args.reconstruct_accuracy, 71 | image_size=args.dim_input, 72 | reconstruct_size=args.dim_reconstruct, 73 | training=True, 74 | learning_rate=args.learning_rate) 75 | 76 | # train model 77 | model.train(dataloader, 78 | model_dir=args.model_dir, 79 | epoch=args.num_epochs, 80 | save_every=args.save_every) 81 | model.close() 82 | 83 | 84 | if __name__ == "__main__": 85 | main() 86 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # Fitler related helper functions 2 | 3 | import math 4 | import numpy as np 5 | 6 | 7 | MACHINE_EPS = 1e-9 8 | SINGULAR_FIX = True 9 | 10 | 11 | def sigmoid(x): 12 | """Numerically stable sigmoid function""" 13 | y = np.zeros_like(x) 14 | y[x >= 0] = 1. / (1. + np.exp(- x[x >= 0])) 15 | z = np.exp(x[x < 0]) 16 | y[x < 0] = z / (1. + z) 17 | return y 18 | 19 | 20 | def predict(x, Sigma, u, A, B, Q, step_sim): 21 | """Prediction step of EKF""" 22 | x1 = step_sim(x, u) 23 | Sigma1 = A.dot(Sigma).dot(A.T) + Q 24 | return x1, Sigma1 25 | 26 | 27 | def correct(x, Sigma, o, R, H, observe_sim): 28 | """Correction step of EKF""" 29 | S = H.dot(Sigma).dot(H.T) + R 30 | y = o - observe_sim(x) 31 | if SINGULAR_FIX: 32 | S += np.eye(S.shape[0]) * MACHINE_EPS 33 | K = np.linalg.solve(S.T, H.dot(Sigma.T)).T 34 | x1 = x + K.dot(y) 35 | Sigma1 = Sigma - K.dot(H).dot(Sigma) 36 | return x1, Sigma1 37 | 38 | 39 | def finite_diff(simulate, x, e=1e-1, dim_out=None): 40 | dim = x.shape[0] 41 | if dim_out is None: 42 | dim_out = dim 43 | res = np.zeros((dim_out, 0)) 44 | for i in range(dim): 45 | x1 = x.copy() 46 | x1[i] += e 47 | x1_inc = simulate(x1).copy() 48 | x1[i] -= (2 * e) 49 | x1_dec = simulate(x1) 50 | res = np.hstack((res, (x1_inc - x1_dec) / (2 * e))) 51 | return res 52 | 53 | 54 | ### Test 55 | 56 | 57 | def quaternion_to_euler_angle(q): 58 | assert q.shape == (4, ) 59 | w, x, y, z = q 60 | t0 = +2.0 * (w * x + y * z) 61 | t1 = +1.0 - 2.0 * (x * x + y * y) 62 | X = math.atan2(t0, t1) 63 | 64 | t2 = +2.0 * (w * y - z * x) 65 | t2 = +1.0 if t2 > +1.0 else t2 66 | t2 = -1.0 if t2 < -1.0 else t2 67 | Y = math.asin(t2) 68 | 69 | t3 = +2.0 * (w * z + x * y) 70 | t4 = +1.0 - 2.0 * (y * y + z * z) 71 | Z = math.atan2(t3, t4) 72 | 73 | return np.array([X, Y, Z]) 74 | 75 | 76 | def rotation_matrix_to_euler_angle(R): 77 | X = math.atan2(R[1, 2], R[2, 2]) 78 | 79 | s1 = math.sin(X) 80 | c1 = math.cos(X) 81 | c2 = math.sqrt(R[0, 0]*R[0, 0] + R[0, 1]*R[0, 1]) 82 | 83 | Y = math.atan2(-1.0*R[0, 2], c2) 84 | Z = math.atan2(s1 * R[2, 0] - c1 * R[1, 0], c1 * R[1, 1] - s1 * R[2, 1]) 85 | 86 | return np.array([X, Y, Z]) 87 | 88 | 89 | def rotation_matrix_to_quaternion(R): 90 | w = np.sqrt( max(0, 1 + R[0, 0] + R[1, 1] + R[2, 2]) ) / 2 91 | x = - np.sign(R[2,1] - R[1,2]) * np.sqrt( max(0, 1 + R[0,0] - R[1,1] - R[2,2]) ) / 2 92 | y = - np.sign(R[0,2] - R[2,0]) * np.sqrt( max(0, 1 - R[0,0] + R[1,1] - R[2,2]) ) / 2 93 | z = - np.sign(R[1,0] - R[0,1]) * np.sqrt( max(0, 1 - R[0,0] - R[1,1] + R[2,2]) ) / 2 94 | 95 | return np.array([w, x, y, z]) 96 | 97 | 98 | def quaternion_to_rotation_matrix(q): 99 | R = [[q[0]**2 + q[1]**2 - q[2]**2 - q[3]**2, 2*(q[1]*q[2] - q[0]*q[3]), 2*(q[1]*q[3] + q[0]*q[2])], 100 | [2*(q[1]*q[2] + q[0]*q[3]), q[0]**2 - q[1]**2 + q[2]**2 - q[3]**2, 2*(q[2]*q[3] - q[0]*q[1])], 101 | [2*(q[1]*q[3] - q[0]*q[2]), 2*(q[2]*q[3] + q[0]*q[1]), q[0]**2 - q[1]**2 - q[2]**2 + q[3]**2]] 102 | 103 | return np.array(R) 104 | 105 | 106 | def rotation_matrix_to_log_quaternion(R): 107 | u = math.acos(0.5 * math.sqrt(R[0, 0] + R[1, 1] + R[2, 2] + 1.0)) 108 | 109 | v = np.array([R[1, 2] - R[2, 1], 110 | R[2, 0] - R[0, 2], 111 | R[0, 1] - R[1, 0]]) 112 | 113 | log_quat = v * (u / np.sqrt(np.sum(v*v))) 114 | 115 | return log_quat 116 | 117 | 118 | ## Euler angles 119 | 120 | 121 | def wrap_angles_to_state(angles): 122 | assert angles.shape == (3, ) 123 | theta, phi, psi = angles 124 | res = np.array([np.cos(theta), np.sin(theta), 125 | np.cos(phi), np.sin(phi), 126 | np.cos(psi), np.sin(psi)]) 127 | return res 128 | 129 | 130 | def update_angles(ang_state, dangles): 131 | c1, s1, c2, s2, c3, s3 = ang_state 132 | dc1, ds1, dc2, ds2, dc3, ds3 = wrap_angles_to_state(dangles) 133 | nc1 = c1 * dc1 - s1 * ds1 134 | ns1 = s1 * dc1 + c1 * ds1 135 | nc2 = c2 * dc2 - s2 * ds2 136 | ns2 = s2 * dc2 + c2 * ds2 137 | nc3 = c3 * dc3 - s3 * ds3 138 | ns3 = s3 * dc3 + c3 * ds3 139 | return np.array([nc1, ns1, nc2, ns2, nc3, ns3]) 140 | 141 | 142 | def quaternion_to_state(q): 143 | assert q.shape == (4, ) 144 | angles = quaternion_to_euler_angle(q) 145 | return wrap_angles_to_state(angles) 146 | 147 | 148 | def pose_to_state(s): 149 | xyz = np.array(s[:3]) 150 | angles = s[3:] 151 | angles = wrap_angles_to_state(np.array(angles)) 152 | state = np.concatenate((xyz, angles)) 153 | return state 154 | 155 | 156 | ## Quaternions ## 157 | 158 | def quaternion_distance(q0, q1): 159 | assert q0.shape == q1.shape == (4,) 160 | return np.arccos(2 * (q0.dot(q1) ** 2) - 1) 161 | 162 | 163 | def quaternion_multiply(q0, q1): 164 | w0, x0, y0, z0 = q0 165 | w1, x1, y1, z1 = q1 166 | return np.array([-x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0, 167 | x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0, 168 | -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0, 169 | x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0]) 170 | 171 | 172 | def quaternion_inverse(q): 173 | n = np.sum(q * q) 174 | q_inv = np.array([q[0], -q[1], -q[2], -q[3]]) 175 | return q_inv / n 176 | 177 | 178 | def log_quaternion(q): 179 | u, x, y, z = q 180 | l2norm2 = x ** 2 + y ** 2 + z ** 2 181 | if l2norm2 < MACHINE_EPS: 182 | return np.array([0, 0, 0]) 183 | u = np.clip(u, -1., 1.) 184 | factor = np.arccos(u) / np.sqrt(l2norm2) 185 | return np.array([x, y, z]) * factor 186 | 187 | 188 | def inv_log_quaternion(log_q): 189 | l2norm = np.sqrt(np.sum(np.square(log_q))) 190 | x, y, z = log_q * (np.sin(l2norm) / l2norm) 191 | return np.array([np.cos(l2norm), x, y, z]) 192 | --------------------------------------------------------------------------------