├── LICENSE
├── data_loader_7scenes.py
├── data_loader_robotcar.py
├── env.py
├── eval.py
├── eval_utils.py
├── gen_map.py
├── model.py
├── ops.py
├── readme.md
├── train.py
└── utils.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Mingpan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/data_loader_7scenes.py:
--------------------------------------------------------------------------------
  1 | # The data loader for "TUM style" of dataset
  2 | 
  3 | 
  4 | import os
  5 | import numpy as np
  6 | import random
  7 | from PIL import Image
  8 | import re
  9 | 
 10 | from tqdm import tqdm
 11 | 
 12 | from env import MobileRobot
 13 | from utils import rotation_matrix_to_quaternion, log_quaternion
 14 | from glob import glob
 15 | 
 16 | 
 17 | class CallableDict(dict):
 18 |     def __getitem__(self, key):
 19 |         val = super().__getitem__(key)
 20 |         if callable(val):
 21 |             return val(key)
 22 |         return val
 23 | 
 24 | 
 25 | class DataLoader:
 26 |     def __init__(self, root_dir, img_height=96, img_width=96, header_lines=0, dim_control=7,
 27 |                  norm_factor=None, no_model=False):
 28 | 
 29 |         split_file = os.path.join(root_dir, "TrainSplit.txt")
 30 |         with open(split_file, 'r') as f:
 31 |             data = [re.findall(r'\d+', line)[0] for line in f]
 32 |         train_path = []
 33 |         for idx in data:
 34 |             if len(idx) < 2:
 35 |                 idx = '0' + idx
 36 |             train_path.append('seq-' + idx)
 37 | 
 38 |         split_file = os.path.join(root_dir, "TestSplit.txt")
 39 |         with open(split_file, 'r') as f:
 40 |             data = [re.findall(r'\d+', line)[0] for line in f]
 41 |         test_path = []
 42 |         for idx in data:
 43 |             if len(idx) < 2:
 44 |                 idx = '0' + idx
 45 |             test_path.append('seq-' + idx)
 46 | 
 47 |         self.train_dir = [os.path.join(root_dir, path) for path in train_path]
 48 |         self.valid_dir = [os.path.join(root_dir, path) for path in test_path]
 49 |         self.test_dir = [os.path.join(root_dir, path) for path in test_path]
 50 |         
 51 |         self.height = img_height
 52 |         self.width = img_width
 53 |         self.header_lines = header_lines
 54 |         self.no_model = no_model
 55 |         self.dim_control = dim_control
 56 | 
 57 |         # require output elements
 58 |         self.time2rgb = CallableDict()
 59 |         self.time2rgb_unknown = CallableDict()
 60 |         self.time2pos = {}
 61 | 
 62 |         self.last_timestamp = 0
 63 |         
 64 |         self.train_sequences = []
 65 |         for path in self.train_dir:
 66 |             self.train_sequences.append(self.load_train(path))
 67 |         self.times_train = [t for sublist in self.train_sequences for t in sublist]
 68 | 
 69 |         if norm_factor is None:
 70 |             self.norm_xyz, self.norm_q = self.variance_norm()
 71 |         else:
 72 |             self.norm_xyz, self.norm_q = norm_factor
 73 |         
 74 |         self.valid_sequences = []
 75 |         for path in self.valid_dir:
 76 |             self.valid_sequences.append(self.load_train(path))
 77 |         self.times_valid = [t for sublist in self.valid_sequences for t in sublist]
 78 |         
 79 |         self.test_sequences = []
 80 |         for path in self.test_dir:
 81 |             self.test_sequences.append(self.load_test(path))
 82 |         self.times_test = [t for sublist in self.test_sequences for t in sublist]
 83 | 
 84 |     def load_rgb(self, rgb_path):
 85 |         img = Image.open(rgb_path)
 86 |         img = img.resize((self.width, self.height), Image.ANTIALIAS)
 87 |         data = np.array(img.getdata()).reshape(self.height, self.width, 3)
 88 |         data = data[..., ::-1].astype(np.float32) / 255.
 89 |         return data
 90 | 
 91 |     def load_train(self, train_dir):
 92 |         timestamps = []
 93 |         time2rbgpath = {}
 94 |         
 95 |         # load images
 96 |         image_list = sorted(glob(os.path.join(train_dir, "*.color.png")))
 97 |         for i, file_path in enumerate(image_list):
 98 |             timestamp = i + self.last_timestamp
 99 |             timestamps.append(timestamp)
100 |             time2rbgpath[timestamp] = file_path
101 |             self.time2rgb[timestamp] = lambda _timestamp: self.load_rgb(time2rbgpath[_timestamp])
102 |         
103 |         # load states
104 |         pose_list = sorted(glob(os.path.join(train_dir, "*.pose.txt")))
105 |         for i, truth_path in enumerate(pose_list):
106 |             timestamp = i + self.last_timestamp
107 |             with open(truth_path, 'r') as f_pos:
108 |                 # get transformation matrix H
109 |                 H = [list(map(float, line.strip().split())) for line in f_pos]
110 |                 H = np.array(H)
111 |                 # get euler angles
112 |                 R = H[:-1, :-1]
113 |                 # angles = rotation_matrix_to_euler_angle(R)
114 |                 q = rotation_matrix_to_quaternion(R)
115 |                 # get position
116 |                 xyz = H[:-1, -1]
117 |                 new_pos = np.concatenate((xyz, q))
118 |                 # store it into the dict
119 |                 self.time2pos[timestamp] = new_pos
120 |         
121 |         self.last_timestamp = timestamp + 1
122 |         
123 |         return timestamps
124 | 
125 |     def load_test(self, test_dir):
126 |         timestamps = []
127 |         time2rbgpath = {}
128 |         
129 |         # load images
130 |         image_list = sorted(glob(os.path.join(test_dir, "*.color.png")))
131 |         for i, file_path in enumerate(image_list):
132 |             timestamp = i + self.last_timestamp
133 |             timestamps.append(timestamp)
134 |             time2rbgpath[timestamp] = file_path
135 |             self.time2rgb_unknown[timestamp] = lambda _timestamp: self.load_rgb(time2rbgpath[_timestamp])
136 | 
137 |         self.last_timestamp = timestamp + 1
138 |         
139 |         return timestamps
140 | 
141 |     def get_train_set(self, size=None, sample=True):
142 |         """ Get random sample pairs of (inputs, labels) from training times sequence"""
143 |         inputs, labels = [], []
144 | 
145 |         if size is None:
146 |             size = len(self.times_train)
147 | 
148 |         def pos_to_state(p):
149 |             xyz = p[:3]
150 |             q = p[3:]
151 |             log_q = log_quaternion(q)
152 |             return np.concatenate((xyz, log_q))
153 | 
154 |         # print("Collecting training data...")
155 |         if sample:
156 |             for i in range(size):  # tqdm(range(size)):
157 |                 rand_idx = np.random.randint(len(self.times_train))
158 |                 t = self.times_train[rand_idx]
159 |                 img = self.time2rgb[t]
160 |                 pos = self.time2pos[t]
161 |                 lab = pos_to_state(pos)
162 |                 inputs.append(img.copy())
163 |                 labels.append(lab.copy())
164 |         else:
165 |             inputs = [self.time2rgb[t] for t in self.times_train]
166 |             labels = [pos_to_state(self.time2pos[t]) for t in self.times_train]
167 | 
168 |         # normalize
169 |         labels = np.array(labels)
170 |         labels[:, :3] /= self.norm_xyz
171 |         labels[:, 3:] /= self.norm_q
172 | 
173 |         return np.array(inputs), labels
174 | 
175 |     def get_train_seq(self, idx=None):
176 |         if idx is None:
177 |             idx = np.random.randint(len(self.train_sequences))
178 |         time_seq = self.train_sequences[idx].copy()
179 |         seq = MobileRobot(time_seq, self.time2rgb, self.dim_control, self.time2pos, no_model=self.no_model)
180 |         return seq
181 | 
182 |     def get_valid_seq(self, idx=None):
183 |         if idx is None:
184 |             idx = np.random.randint(len(self.valid_sequences))
185 |         time_seq = self.valid_sequences[idx].copy()
186 |         seq = MobileRobot(time_seq, self.time2rgb, self.dim_control, self.time2pos,
187 |                           no_model=self.no_model, norm_xyz=self.norm_xyz, norm_q=self.norm_q)
188 |         return seq
189 | 
190 |     def get_test_seq(self, idx=None):
191 |         if idx is None:
192 |             idx = np.random.randint(len(self.test_sequences))
193 |         time_seq = self.test_sequences[idx].copy()
194 |         seq = MobileRobot(time_seq, self.time2rgb_unknown, self.dim_control)
195 |         return seq
196 | 
197 |     def variance_norm(self):
198 |         print('Estimating the translation and rotational variance...', flush=True)
199 |         poses = []
200 |         for idx in tqdm(range(len(self.train_sequences))):
201 |             seq = self.get_train_seq(idx)
202 |             poses.append(seq.get_pos())
203 |             for i in range(seq.horizon-1):
204 |                 seq.step()
205 |                 p = seq.get_pos()
206 |                 poses.append(p)
207 |         poses = np.array(poses)[:, :, 0]
208 |         norm_xyz = np.std(poses[:, :3])
209 |         norm_q = np.std(poses[:, 3:])
210 |         print("Std of xyz: {}; std of log q: {}".format(norm_xyz, norm_q), flush=True)
211 |         return norm_xyz, norm_q
212 | 


--------------------------------------------------------------------------------
/data_loader_robotcar.py:
--------------------------------------------------------------------------------
  1 | # The data loader for "TUM style" of dataset
  2 | 
  3 | 
  4 | import os
  5 | import numpy as np
  6 | 
  7 | import pickle
  8 | import skimage.transform as transform
  9 | 
 10 | from env import MobileRobot
 11 | from utils import log_quaternion
 12 | 
 13 | 
 14 | class DataLoader:
 15 |     def __init__(self, root_dir, img_height=96, img_width=96, dim_control=7,
 16 |                  no_model=False, norm_factor=None):
 17 |         train_datasets = ['2014-06-26-08-53-56', '2014-06-26-09-24-58']
 18 |         valid_datasets = ['2014-06-23-15-36-04', '2014-06-23-15-41-25']
 19 | 
 20 |         self.train_dir = [os.path.join(root_dir, path, path + '.pkl') for path in train_datasets]
 21 |         self.valid_dir = [os.path.join(root_dir, path, path + '.pkl') for path in valid_datasets]
 22 |         self.vo_dir = [os.path.join(root_dir, path, path + '_vo.pkl') for path in valid_datasets]
 23 |         self.test_dir = None
 24 | 
 25 |         self.height = img_height
 26 |         self.width = img_width
 27 |         self.no_model = no_model
 28 |         self.dim_control = dim_control
 29 | 
 30 |         # require output elements
 31 |         self.time2rgb = {}
 32 |         self.time2pos = {}
 33 | 
 34 |         self.train_sequences = []
 35 |         for path in self.train_dir:
 36 |             self.train_sequences.append(self.load_train(path))
 37 |         self.times_train = [t for sublist in self.train_sequences for t in sublist]
 38 | 
 39 |         if norm_factor is None:
 40 |             self.norm_xyz, self.norm_q = self.variance_norm()
 41 |         else:
 42 |             self.norm_xyz, self.norm_q = norm_factor
 43 | 
 44 |     @staticmethod
 45 |     def pos_to_state(p, norm_xyz=1., norm_q=1.):
 46 |         xyz = p[:3]
 47 |         q = p[3:]
 48 |         log_q = log_quaternion(q)
 49 |         xyz = xyz / norm_xyz
 50 |         log_q = log_q / norm_q
 51 |         return np.concatenate((xyz, log_q))
 52 | 
 53 |     def variance_norm(self):
 54 |         print('Estimating the translation and rotational variance...', flush=True)
 55 |         states = np.array([self.pos_to_state(self.time2pos[t]) for t in self.times_train])
 56 |         print(states.shape)
 57 |         norm_xyz = np.std(states[:, :3])
 58 |         norm_q = np.std(states[:, 3:])
 59 |         print("Std of xyz: {}; std of log q: {}".format(norm_xyz, norm_q), flush=True)
 60 |         return norm_xyz, norm_q
 61 | 
 62 |     def load_vo(self, vo_path):
 63 |         timestamps, vo_poses = pickle.load(open(vo_path, 'rb'))
 64 |         for ts, pose in vo_poses.items():
 65 |             pose[2] = 0.
 66 |         return timestamps, vo_poses
 67 | 
 68 |     def load_file(self, file_path):
 69 |         timestamps, images, poses = pickle.load(open(file_path, 'rb'))
 70 | 
 71 |         for ts, pose in poses.items():
 72 |             pose[2] = 0.
 73 | 
 74 |         return timestamps, images, poses
 75 | 
 76 |     def load_train(self, train_dir):
 77 |         (timestamps, images, poses) = self.load_file(train_dir)
 78 | 
 79 |         for ts, img in images.items():
 80 |             img = transform.resize(img, (self.height, self.width), mode='constant', anti_aliasing=True)
 81 |             images[ts] = img
 82 | 
 83 |         self.time2rgb.update(images)
 84 |         self.time2pos.update(poses)
 85 | 
 86 |         return timestamps
 87 | 
 88 |     def get_train_set(self, size=None, sample=True):
 89 |         """ Get random sample pairs of (inputs, labels) from training times sequence"""
 90 |         inputs, poses = [], []
 91 | 
 92 |         if size is None:
 93 |             size = len(self.times_train)
 94 | 
 95 |         # print("Collecting training data...")
 96 |         if sample:
 97 |             for i in range(size):  # tqdm(range(size)):
 98 |                 rand_idx = np.random.randint(len(self.times_train))
 99 |                 t = self.times_train[rand_idx]
100 |                 img = self.time2rgb[t]
101 |                 pos = self.time2pos[t]
102 |                 inputs.append(img.copy())
103 |                 poses.append(pos.copy())
104 |         else:
105 |             inputs = [self.time2rgb[t] for t in self.times_train]
106 |             poses = [self.time2pos[t] for t in self.times_train]
107 | 
108 |         labels = np.array([self.pos_to_state(p, self.norm_xyz, self.norm_q) for p in poses])
109 | 
110 |         return np.array(inputs), np.array(labels)
111 | 
112 |     def get_train_seq(self, idx=None):
113 |         if idx is None:
114 |             idx = np.random.randint(len(self.train_dir))
115 |         time_seq, time2rgb, time2pos = self.load_file(self.train_dir[idx])
116 | 
117 |         for ts, img in time2rgb.items():
118 |             img = transform.resize(img, (self.height, self.width), mode='constant', anti_aliasing=True)
119 |             time2rgb[ts] = img
120 | 
121 |         seq = MobileRobot(time_seq, time2rgb, self.dim_control, time2pos, no_model=self.no_model)
122 |         return seq
123 | 
124 |     def get_valid_seq(self, idx=None):
125 |         if idx is None:
126 |             idx = np.random.randint(len(self.valid_dir))
127 |         time_seq, time2rgb, time2pos = self.load_file(self.valid_dir[idx])
128 |         _, time2vopos = self.load_vo(self.vo_dir[idx])
129 |         # fix the nan problem due to vo dataset
130 |         time2vopos[time_seq[-1]] = time2vopos[time_seq[-2]] = time2vopos[time_seq[-3]]
131 | 
132 |         for ts, img in time2rgb.items():
133 |             img = transform.resize(img, (self.height, self.width), mode='constant', anti_aliasing=True)
134 |             time2rgb[ts] = img
135 | 
136 |         seq = MobileRobot(time_seq, time2rgb, self.dim_control, time2pos, time2vopos,
137 |                           no_model=self.no_model, norm_xyz=self.norm_xyz, norm_q=self.norm_q)
138 |         return seq
139 | 


--------------------------------------------------------------------------------
/env.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import numpy as np
 4 | 
 5 | from utils import quaternion_multiply, quaternion_inverse, log_quaternion
 6 | 
 7 | norm = np.linalg.norm
 8 | 
 9 | 
10 | class MobileRobot:
11 |     """
12 |     Simulate a sequence as a mobile robot.
13 |     """
14 |     def __init__(self, times, time2rgb, dim_control, time2pos=None, time2vopos=None,
15 |                  no_model=False, norm_xyz=1., norm_q=1.):
16 |         self.timestamps = times
17 |         self.time2rgb = time2rgb
18 |         self.dim_control = dim_control
19 |         self.time2pos = time2pos
20 |         self.time2vopos = time2vopos if time2vopos is not None else self.time2pos
21 |         self.no_model = no_model
22 |         self.norm_xyz = norm_xyz
23 |         self.norm_q = norm_q
24 | 
25 |         self.pos = None
26 |         self.time = None
27 |         self.time_idx = 0
28 |         self.reset()
29 | 
30 |     def render(self, step=None):
31 |         if step is None:
32 |             step = self.time_idx
33 |         time = self.timestamps[step]
34 |         return self.time2rgb[time]
35 | 
36 |     def reset(self, time_idx=0):
37 |         self.time_idx = time_idx
38 |         self.time = self.timestamps[time_idx]
39 |         if self.time2pos is not None:
40 |             self.pos = self.time2pos[self.time]
41 |         return self.render()
42 | 
43 |     def get_pos(self):
44 |         if self.time2pos is None:
45 |             raise Exception("The ground truth position is not available!")
46 |         self.pos = self.time2pos[self.time]
47 |         xyz = self.pos[:3] / self.norm_xyz
48 |         q = self.pos[3:]
49 |         log_q = log_quaternion(q) / self.norm_q
50 |         return np.concatenate((xyz, log_q)).reshape(-1, 1)
51 | 
52 |     def step(self):
53 |         """
54 |         Return the next step image, the relative action (vel + quaternion), and time diff.
55 | 
56 |         If self.no_model is not set, the real velocity & real relative rotation (as a quaternion) will be returned.
57 | 
58 |         Otherwise, a vector of action = [0, 0, 0, 1, 0, 0, 0] is returned, indicating no
59 |         translational movements (velocity  = action[:3] = 0), and no orientation change, i.e., a constant transition.
60 |         """
61 |         new_time = self.timestamps[self.time_idx + 1]
62 |         dt = new_time - self.time
63 | 
64 |         new_pos = self.time2pos[new_time]
65 |         vel = (self.time2vopos[new_time][:3] - self.time2vopos[self.time][:3]) / dt
66 |         qt = self.time2vopos[self.time][3:]
67 |         qt_1 = self.time2vopos[new_time][3:]
68 |         # for quaternion, time is not considered
69 |         q = quaternion_multiply(qt_1, quaternion_inverse(qt))
70 |         # combine
71 |         action = np.concatenate((vel, q), axis=0)
72 | 
73 |         if self.no_model:
74 |             action = np.zeros_like(action)
75 |             action[3] = 1.  # quaternion constant model
76 |         self.pos = new_pos
77 |         self.time_idx += 1
78 |         self.time = new_time
79 |         return self.render(), action.reshape(-1, 1), dt
80 | 
81 |     @property
82 |     def horizon(self):
83 |         return len(self.timestamps)
84 | 
85 |     @property
86 |     def dim_state(self):
87 |         return len(self.get_pos())
88 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import json
  4 | import argparse
  5 | import pickle
  6 | 
  7 | import numpy as np
  8 | 
  9 | from tqdm import tqdm
 10 | 
 11 | from data_loader_7scenes import DataLoader as DataLoader7Scenes
 12 | from data_loader_robotcar import DataLoader as DataLoaderRobotCar
 13 | 
 14 | from model import Model
 15 | from utils import finite_diff, predict, correct
 16 | from eval_utils import VideoRecorder, plot_result, equidistant_generate, translational_error, rotational_error
 17 | 
 18 | 
 19 | def main():
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument('--model_dir', type=str, default=None,
 22 |                         help='directory to load model from. If not provided, the most recent dir'
 23 |                         ' in the default path will be used, raise an Error if failed.')
 24 |     parser.add_argument('--state_var', type=float, default=0.001,
 25 |                         help='estimated state transition variance for the model used by extended Kalman filter, '
 26 |                              'it is assumed to be independent and stay the same for all dimension')
 27 |     parser.add_argument('--visualize_dir', type=str, default='visualize',
 28 |                         help='directory to save the visualization output + video (if apply)')
 29 |     parser.add_argument('--video', default=False, action='store_true',
 30 |                         help='if a video should be recorded comparing real and estimated '
 31 |                              'reconstructed images, together with localization results')
 32 |     parser.add_argument('--no_model', default=False, action='store_true',
 33 |                         help='if the true state transition will be hidden to test localization based only on images.')
 34 |     parser.add_argument('--generation_mode', type=int, default=1,
 35 |                         help='0: equidistant sample image generation; 1: Kalman filter estimation for localization; '
 36 |                              '2: perform both')
 37 |     parser.add_argument('--all', default=False, action='store_true',
 38 |                         help='evaluate all the validation sequences and report the performance w/o a picture.')
 39 |     parser.add_argument('--seq_idx', type=int, default=0,
 40 |                         help='evaluate the given indexed sequence in the text.')
 41 |     parser.add_argument('--deviation', type=float, default=0.,
 42 |                         help='deviation across all state dimension for robust test.')
 43 |     args = parser.parse_args()
 44 | 
 45 |     evaluate(args)
 46 | 
 47 | 
 48 | def ekf_estimate(system, model, state_var, save_video_dir=None, deviation=0.):
 49 |     """ Estimate the system with extended Kalman filter
 50 | 
 51 |         :param system: the system to estimate,
 52 |         :param model: the model we build for mimic the system,
 53 |         :param state_var: estimated state variance
 54 |         :param save_video_dir: if given, step wise images will be saved and recorded as a video
 55 |         :param deviation: deviation added upon each dimension of the initial state
 56 |         :returns
 57 |             x_est: estimated states by extended Kalman filter based on the images,
 58 |             Sigma_est: estimated observation uncertainties
 59 |             x_true: the recorded true system states
 60 |             o_list: list of obtained observation
 61 |     """
 62 | 
 63 |     system.reset()
 64 |     x = system.get_pos()
 65 |     # set initial deviation
 66 |     x = x + (np.ones_like(x) * deviation)
 67 |     model.set_norm(system.norm_xyz, system.norm_q)
 68 | 
 69 |     Sigma = np.eye(model.dim_state) * state_var
 70 |     Q = np.eye(model.dim_state) * state_var
 71 | 
 72 |     x_true = []  # true system states
 73 |     o_list = []  # observations
 74 |     x_est = []  # estimated states
 75 |     Sigma_est = []  # estimated covariances
 76 |     R_list = []  # uncertainty matrix for observations
 77 | 
 78 |     # init the display for video recording
 79 |     recorder = None
 80 |     if save_video_dir is not None:
 81 |         recorder = VideoRecorder(save_video_dir, norm_xyz=system.norm_xyz)
 82 | 
 83 |     # init observation
 84 |     img = system.render()
 85 | 
 86 |     print("Simulating the system with EKF...")
 87 |     for t in tqdm(range(system.horizon)):
 88 |         o, cov_o = model.wrap(img)
 89 | 
 90 |         # estimate observation model H
 91 |         o_func = lambda _x: model.emit(_x)
 92 |         H = finite_diff(o_func, x, dim_out=model.dim_observation)
 93 | 
 94 |         # get the observation variance
 95 |         R = cov_o
 96 | 
 97 |         # correct the state using observation and H
 98 |         x, Sigma = correct(x, Sigma, o, R, H, model.emit)
 99 | 
100 |         # collect current step data
101 |         R_list.append(np.diag(R))
102 |         x_est.append(x)
103 |         Sigma_est.append(Sigma)
104 |         o_list.append(o)
105 |         x_sys = system.get_pos()
106 |         x_true.append(x_sys)
107 | 
108 |         if recorder is not None:
109 |             emit = model.generate(x)
110 |             recorder.record(img, emit, x_sys, x)
111 | 
112 |         if t < system.horizon - 1:
113 |             # evolve the system by one step
114 | 
115 |             img, u, dt = system.step()
116 |             dt = float(dt)
117 | 
118 |             # estimate state transition matrix A, B
119 |             x_func = lambda _x: model.step(_x, u, dt)
120 |             u_func = lambda _u: model.step(x, _u, dt)
121 |             A = finite_diff(x_func, x, dim_out=model.dim_state)
122 |             B = finite_diff(u_func, u, dim_out=model.dim_state)
123 | 
124 |             # get the transition mapping
125 |             sim_step = lambda _x, _u: model.step(_x, _u, dt)
126 | 
127 |             # predict the next state, let Qt be correlated with dt, since longer time exhibit more uncertainty
128 |             x, Sigma = predict(x, Sigma, u, A, B, Q*(dt**2), sim_step)
129 | 
130 |     if recorder is not None:
131 |         recorder.finalize()
132 | 
133 |     # show the sigma of z
134 |     print("Aver. inferred variance is {}".format(np.mean(R_list)))
135 | 
136 |     # revoke the normalization, prepare for evaluation
137 |     x_est = np.array(x_est)
138 |     x_est[:, :3, :] *= system.norm_xyz
139 |     x_est[:, 3:, :] *= system.norm_q
140 |     Sigma_est = np.array(Sigma_est)
141 |     x_true = np.array(x_true)
142 |     x_true[:, :3, :] *= system.norm_xyz
143 |     x_true[:, 3:, :] *= system.norm_q
144 |     o_list = np.array(o_list)
145 | 
146 |     return x_est, Sigma_est, x_true, o_list
147 | 
148 | 
149 | def load_model_specs(model_dir):
150 |     """Look for the model specifications in the given directory"""
151 |     if model_dir is None:
152 |         # get the latest modified dir in the default dir, raise an Error if there's none
153 |         default_dir = "checkpoints"
154 |         all_dirs = [os.path.join(default_dir, d)
155 |                     for d in os.listdir(default_dir) if os.path.isdir(os.path.join(default_dir, d))]
156 |         if len(all_dirs) == 0:
157 |             raise FileNotFoundError("Model directory is not provided & no directory in default path!")
158 |         model_dir = max(all_dirs, key=os.path.getmtime)
159 | 
160 |     if model_dir.endswith('.ckpt'):
161 |         mdir = os.path.dirname(model_dir)
162 |         json_path = os.path.join(mdir, "model_specs.json")
163 |     else:
164 |         json_path = os.path.join(model_dir, "model_specs.json")
165 |     with open(json_path, 'r') as infile:
166 |         data = json.load(infile)
167 |     train_args = argparse.Namespace(**data)
168 |     return train_args, model_dir
169 | 
170 | 
171 | def evaluate(eval_args):
172 |     train_args, model_dir = load_model_specs(eval_args.model_dir)
173 |     if len(eval_args.visualize_dir) > 0:
174 |         eval_args.visualize_dir = os.path.join(model_dir, eval_args.visualize_dir)
175 |         if not os.path.isdir(eval_args.visualize_dir):
176 |             os.makedirs(eval_args.visualize_dir)
177 |     else:
178 |         eval_args.visualize_dir = None
179 | 
180 |     print('Model specification loaded.')
181 |     print(eval_args)
182 |     print(train_args)
183 | 
184 |     model = Model(dim_observation=train_args.dim_observation,
185 |                   batch_size=1,
186 |                   image_size=train_args.dim_input,
187 |                   reconstruct_size=train_args.dim_reconstruct)
188 |     model.restore(model_dir)
189 | 
190 |     # get the dataset / simulated system
191 |     DataLoader = DataLoaderRobotCar if train_args.use_robotcar else DataLoader7Scenes
192 |     dataloader = DataLoader(train_args.data_dir, no_model=eval_args.no_model,
193 |                             img_width=train_args.dim_input[1], img_height=train_args.dim_input[0],
194 |                             norm_factor=train_args.scaling_factor)
195 | 
196 |     if not eval_args.all:
197 |         sequence = dataloader.get_valid_seq(eval_args.seq_idx)
198 | 
199 |         if eval_args.generation_mode == 0:
200 |             equidistant_generate(sequence, model,
201 |                                  save_path=eval_args.visualize_dir,
202 |                                  use_robotcar=train_args.use_robotcar)
203 |         else:
204 |             video_dir = eval_args.visualize_dir if eval_args.video else None
205 |             x_est, cov_est, x_true, ob = ekf_estimate(sequence, model, eval_args.state_var,
206 |                                                       save_video_dir=video_dir,
207 |                                                       deviation=eval_args.deviation)
208 | 
209 |             plot_result(x_est, x_true,
210 |                         save_dir=eval_args.visualize_dir,
211 |                         use_robotcar=train_args.use_robotcar)
212 | 
213 |             if eval_args.generation_mode > 1:
214 |                 equidistant_generate(sequence, model,
215 |                                      model_traj=x_est,
216 |                                      save_path=eval_args.visualize_dir,
217 |                                      use_robotcar=train_args.use_robotcar)
218 |     else:
219 |         x_est_all, x_true_all, x_pred_all = [], [], []
220 |         for i in range(len(dataloader.valid_sequences)):
221 |             sequence = dataloader.get_valid_seq(i)
222 |             x_est, _, x_true, _ = ekf_estimate(sequence, model, eval_args.state_var,
223 |                                                deviation=eval_args.deviation)
224 |             x_est_all.extend(x_est)
225 |             x_true_all.extend(x_true)
226 | 
227 |         x_est_all = np.array(x_est_all)
228 |         x_true_all = np.array(x_true_all)
229 | 
230 |         mean_est_err_xyz, median_est_err_xyz = translational_error(x_true_all, x_est_all)
231 |         mean_est_err_rot, median_est_err_rot = rotational_error(x_true_all, x_est_all)
232 |         print("Estimator:")
233 |         print("\t Mean Error: {} m, {} deg".format(mean_est_err_xyz, mean_est_err_rot))
234 |         print("\t Median Error: {} m, {} deg".format(median_est_err_xyz, median_est_err_rot))
235 | 
236 |         with open(os.path.join(eval_args.visualize_dir, 'evaluations.pkl'), 'wb') as f:
237 |             data = {
238 |                 'true': x_true_all,
239 |                 'estimate': x_est_all,
240 |                 'prediction': x_pred_all
241 |             }
242 |             pickle.dump(data, f)
243 | 
244 | 
245 | if __name__ == '__main__':
246 |     main()
247 | 
248 | 


--------------------------------------------------------------------------------
/eval_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import matplotlib.animation as animation
  7 | from mpl_toolkits.mplot3d import Axes3D
  8 | from matplotlib import gridspec
  9 | 
 10 | import skimage.transform as transform
 11 | 
 12 | from tqdm import tqdm
 13 | 
 14 | from utils import quaternion_distance, inv_log_quaternion
 15 | 
 16 | 
 17 | def reverse_preprocess(img, to_int=False, shift=False):
 18 |     img = img[..., ::-1]
 19 |     if shift:
 20 |         img += 0.5
 21 |     if to_int:
 22 |         return img.astype(np.int)
 23 |     return img
 24 | 
 25 | 
 26 | class VideoRecorder:
 27 |     def __init__(self, save_dir, dim_state=3, norm_xyz=1.):
 28 |         self.dim_state = dim_state
 29 |         self.save_dir = save_dir
 30 |         self.norm_xyz = norm_xyz
 31 |         self.traj_true = np.zeros((dim_state, 0))
 32 |         self.traj_est = np.zeros((dim_state, 0))
 33 |         self.counter = 1
 34 |         self.gs = gridspec.GridSpec(2, 3, width_ratios=[2, 2, 3])
 35 | 
 36 |     def record(self, real, emit, xt, xe):
 37 |         real = reverse_preprocess(real)
 38 |         emit = reverse_preprocess(emit)
 39 |         x_true = xt.copy()
 40 |         x_true *= self.norm_xyz
 41 |         x_est = xe.copy()
 42 |         x_est *= self.norm_xyz
 43 |         self.traj_true = np.hstack((self.traj_true, x_true[:3, :]))
 44 |         self.traj_est = np.hstack((self.traj_est, x_est[:3, :]))
 45 | 
 46 |         fig = plt.figure(figsize=(12, 6))
 47 |         plt.subplots_adjust(wspace=-0.2)
 48 | 
 49 |         ax = fig.add_subplot(self.gs[:2, :2])
 50 |         ax.plot(self.traj_true[0, :], self.traj_true[1, :], '--b', label='true')
 51 |         ax.plot(self.traj_est[0, :], self.traj_est[1, :], '-r', label='estimate')
 52 |         ax.scatter(self.traj_true[0, -1], self.traj_true[1, -1], c='b', s=20)
 53 |         ax.scatter(self.traj_est[0, -1], self.traj_est[1, -1], c='r', s=20)
 54 |         ax.set_xlabel("x [m]")
 55 |         ax.set_ylabel("y [m]")
 56 |         ax.tick_params(pad=-0.5)
 57 |         ax.set_xlim([-200, 200])
 58 |         ax.set_ylim([-250, 200])
 59 |         ax.legend()
 60 | 
 61 |         ax = fig.add_subplot(self.gs[0, 2])
 62 |         ax.imshow(real)
 63 |         ax.set_title("Real Image")
 64 |         ax.grid(False)
 65 |         plt.axis('off')
 66 | 
 67 |         ax = fig.add_subplot(self.gs[1, 2])
 68 |         ax.imshow(emit)
 69 |         ax.set_title("Generated Image")
 70 |         ax.grid(False)
 71 |         plt.axis('off')
 72 | 
 73 |         path = os.path.join(self.save_dir, "{}.png".format(self.counter))
 74 |         plt.savefig(path, bbox_inches='tight')
 75 |         plt.close(fig)
 76 |         self.counter += 1
 77 | 
 78 |     def finalize(self):
 79 | 
 80 |         def update_image(num, load_dir, ax, update):
 81 |             ax.clear()
 82 |             path = os.path.join(load_dir, "{}.png".format(num+1))
 83 |             data = plt.imread(path)
 84 |             update(1)
 85 |             img = ax.imshow(data)
 86 |             plt.axis('off')
 87 |             return img,
 88 | 
 89 |         print("Generating video in {}...".format(self.save_dir))
 90 |         with tqdm(total=self.counter) as tbar:
 91 |             writer = animation.writers['ffmpeg'](fps=10, metadata=dict(artist='Me'), bitrate=1800)
 92 |             f = plt.figure(figsize=(12, 5))
 93 |             f.tight_layout()
 94 |             ax = f.add_subplot(1, 1, 1)
 95 |             line_ani = animation.FuncAnimation(f, update_image, self.counter - 1,
 96 |                                                fargs=(self.save_dir, ax, tbar.update),
 97 |                                                interval=50, blit=True)
 98 |             video_path = os.path.join(self.save_dir, "video.mp4")
 99 |             line_ani.save(video_path, writer=writer)
100 |         plt.close(f)
101 | 
102 | 
103 | def translational_error(true, pred):
104 |     error = np.sqrt(np.sum(np.square(true[:, :3, 0] - pred[:, :3, 0]), axis=1))
105 |     return np.mean(error), np.median(error)
106 | 
107 | 
108 | def rotational_error(true, pred):
109 |     q_true = [inv_log_quaternion(true[i, 3:, 0]) for i in range(len(true))]
110 |     q_pred = [inv_log_quaternion(pred[i, 3:, 0]) for i in range(len(pred))]
111 |     error = [quaternion_distance(qt, qp) for qt, qp in zip(q_true, q_pred)]
112 |     return np.mean(error) / np.pi * 180, np.median(error) / np.pi * 180
113 | 
114 | 
115 | def plot_result(x_est, x_true=None, save_dir=None, use_robotcar=False):
116 |     """Plot the output trajectories, estimate the error if possible. """
117 |     if use_robotcar:
118 |         x_est[:, 2:-1, 0] = 0.
119 | 
120 |     f, axarr = plt.subplots(2, sharex='all', figsize=(12, 12))
121 |     for p in range(2):
122 |         start_idx = p * 3
123 |         end_idx = min(start_idx + 3, x_est.shape[1])
124 |         for i, idx in enumerate(range(start_idx, end_idx)):
125 |             axarr[p].plot(x_est[:, idx, 0], '-', label='x%d' % idx, color='C%d' % i)
126 |             if x_true is not None:
127 |                 axarr[p].plot(x_true[:, idx, 0], '--', label='x%d true' % idx, color='C%d' % i)
128 |         axarr[p].legend()
129 | 
130 |     if x_true is not None:
131 |         mu_xyz, median_xyz = translational_error(x_true, x_est)
132 |         mu_rot, median_rot = rotational_error(x_true, x_est)
133 |         if not use_robotcar:
134 |             axarr[0].set_title("Extended Kalman Filter Error: est = %.3fm, %.3fdeg" % (median_xyz, median_rot))
135 |         else:
136 |             axarr[0].set_title("Extended Kalman Filter Error (mean): est = %.3fm, %.3fdeg" % (mu_xyz, mu_rot))
137 |     else:
138 |         axarr[0].set_title("Extended Kalman Filter Result")
139 | 
140 |     if save_dir is not None:
141 |         path = os.path.join(save_dir, "filter")
142 |         plt.savefig(path)
143 | 
144 | 
145 | def equidistant_generate(system, model, num_sample=10, model_traj=None, save_path=None, use_robotcar=False):
146 |     # get system trajectory
147 |     horizon = system.horizon
148 |     model_traj = model_traj[:horizon, ...] if model_traj is not None else None
149 |     system.reset()
150 |     traj = [system.get_pos()]
151 |     for i in tqdm(range(horizon - 1)):
152 |         system.step()
153 |         traj.append(system.get_pos())
154 |     traj = np.asarray(traj)
155 | 
156 |     plotting_gap = (horizon - 1) // (num_sample - 1)
157 |     plotting_steps = list(range(0, horizon, plotting_gap))
158 | 
159 |     real_images = []
160 |     generated_images = []
161 | 
162 |     for i in plotting_steps:
163 |         if model_traj is not None:
164 |             pos = model_traj[i].copy()
165 |             pos[:3, :] /= system.norm_xyz
166 |             pos[3:, :] /= system.norm_q
167 |         else:
168 |             pos = traj[i]
169 |         gen = reverse_preprocess(model.generate(pos))
170 |         real = reverse_preprocess(system.render(i))
171 |         real = transform.resize(real, gen.shape,
172 |                                 anti_aliasing=True, mode='constant')
173 |         real_images.append(real)
174 |         generated_images.append(gen)
175 |         if save_path:
176 |             fig, ax = plt.subplots(1, frameon=False)
177 |             ax.imshow(real)
178 |             path = os.path.join(save_path, "real_{}".format(i))
179 |             ax.grid(False)
180 |             ax.xaxis.set_visible(False)
181 |             ax.yaxis.set_visible(False)
182 |             plt.axis('off')
183 |             plt.savefig(path, bbox_inches='tight')
184 |             plt.close(fig)
185 | 
186 |             fig, ax = plt.subplots(1, frameon=False)
187 |             ax.imshow(gen)
188 |             path = os.path.join(save_path, "generated_{}".format(i))
189 |             ax.grid(False)
190 |             ax.xaxis.set_visible(False)
191 |             ax.yaxis.set_visible(False)
192 |             plt.axis('off')
193 |             plt.savefig(path, bbox_inches='tight')
194 |             plt.close(fig)
195 | 
196 |     fig = plt.figure(figsize=(6, 6))
197 |     if not use_robotcar:
198 |         ax = fig.gca(projection='3d')
199 |         traj[:, :3, :] *= system.norm_xyz
200 |         traj[:, 3:, :] *= system.norm_q
201 |         ax.plot(traj[:, 0, 0], traj[:, 1, 0], traj[:, 2, 0], '--b')
202 |         if model_traj is not None:
203 |             ax.plot(model_traj[:, 0, 0], model_traj[:, 1, 0], model_traj[:, 2, 0], '-r')
204 |         for i in plotting_steps[:-1]:
205 |             ax.scatter(traj[i, 0, 0], traj[i, 1, 0], traj[i, 2, 0], c='b', s=20)
206 |             if model_traj is not None:
207 |                 ax.scatter(model_traj[i, 0, 0], model_traj[i, 1, 0], model_traj[i, 2, 0], c='r', s=20)
208 |         ax.set_xlabel('x[m]')
209 |         ax.set_ylabel('y[m]')
210 |         ax.set_zlabel('z[m]')
211 |         ax.tick_params(pad=-0.5)
212 |     else:
213 |         ax = fig.gca()
214 |         traj[:, :3, 0] *= system.norm_xyz
215 |         traj[:, 3:, 0] *= system.norm_q
216 |         ax.plot(traj[:, 0, 0], traj[:, 1, 0], '--b')
217 |         if model_traj is not None:
218 |             ax.plot(model_traj[:, 0, 0], model_traj[:, 1, 0], '-r')
219 |         for i in plotting_steps[:-1]:
220 |             ax.scatter(traj[i, 0, 0], traj[i, 1, 0], c='b', s=20)
221 |             if model_traj is not None:
222 |                 ax.scatter(model_traj[i, 0, 0], model_traj[i, 1, 0], c='r', s=20)
223 | 
224 |         ax.set_xlabel('x[m]')
225 |         ax.set_ylabel('y[m]')
226 |         ax.tick_params(pad=-0.5)
227 |         ax.set_xlim([-200, 200])
228 |         ax.set_ylim([-250, 200])
229 | 
230 |     if save_path is not None:
231 |         path = os.path.join(save_path, "trajectory")
232 |         plt.savefig(path, bbox_inches='tight')
233 |     plt.close(fig)
234 | 
235 |     # joint figure
236 |     if save_path:
237 |         shape = model.generate(traj[0]).shape
238 |         join_figure = np.zeros((shape[0] * 2, shape[1] * num_sample, 3))
239 |         for i, (real, gen) in enumerate(zip(real_images, generated_images)):
240 |             s = i * shape[1]
241 |             e = (i + 1) * shape[1]
242 |             join_figure[0:shape[0], s:e, :] = real
243 |             join_figure[shape[0]:2 * shape[0], s:e, :] = gen
244 | 
245 |         fig, ax = plt.subplots(1, frameon=False)
246 |         ax.imshow(join_figure)
247 |         path = os.path.join(save_path, "joint")
248 |         ax.grid(False)
249 |         ax.xaxis.set_visible(False)
250 |         ax.yaxis.set_visible(False)
251 |         plt.axis('off')
252 |         plt.savefig(path, bbox_inches='tight')
253 |         plt.close(fig)
254 | 
255 | 
256 | 


--------------------------------------------------------------------------------
/gen_map.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | from ops import build_fnn, dcgan_decode, dcgan_encode
  7 | from ops import loss_rec_normal, loss_reg_normal, loss_kl_normal
  8 | 
  9 | 
 10 | class GenMap(object):
 11 |     def __init__(self, arch_encoder_attribute, image_size, dim_features=512, num_conv_layer=4,
 12 |                  transfer_fct=tf.nn.relu, learning_rate=1e-3, batch_size=100, rec_log_var=0.,
 13 |                  training=False, reconstruct_size=(64, 64, 3)):
 14 |         self.transfer_fct = transfer_fct
 15 |         self.learning_rate = learning_rate
 16 |         self.batch_size = batch_size
 17 | 
 18 |         self.dim_attribute = arch_encoder_attribute[0]
 19 |         self.latent_dim = arch_encoder_attribute[-1]
 20 |         self.reconstruction_dim = np.prod(reconstruct_size)
 21 | 
 22 |         self.x = tf.placeholder(tf.float32, [None, self.dim_attribute], name="pose_input")
 23 |         y_shape = [None]
 24 |         y_shape.extend(image_size)
 25 |         self.y = tf.placeholder(tf.float32, y_shape, name="image_input")
 26 |         self.y_small = tf.placeholder(tf.float32, [None, self.reconstruction_dim], name="image_reconstruct")
 27 | 
 28 |         arch_encoder_x = arch_encoder_attribute.copy()
 29 |         arch_encoder_x[-1] = arch_encoder_x[-1] * 2
 30 |         z_x_all = build_fnn(self.x, arch_encoder_x, transfer_fct, scope="encoder_x")
 31 |         self.z_x_mean, z_x_log_sigma_sq = tf.split(z_x_all, num_or_size_splits=2, axis=1)
 32 | 
 33 |         self.z_x_log_sigma_sq = tf.zeros_like(z_x_log_sigma_sq)  # set the z_x_sigma to be 1
 34 | 
 35 |         # reparameterization
 36 |         epsilon_x = tf.random_normal((self.batch_size, self.latent_dim))
 37 |         self.z_x = self.z_x_mean + (tf.sqrt(tf.exp(self.z_x_log_sigma_sq)) * epsilon_x)
 38 | 
 39 |         self.z_y_mean, self.z_y_log_sigma_sq = dcgan_encode(final_channel=dim_features,
 40 |                                                             num_layer=num_conv_layer,
 41 |                                                             inputs=self.y,
 42 |                                                             output_size=self.latent_dim,
 43 |                                                             activation=transfer_fct,
 44 |                                                             scope="encoder_y",
 45 |                                                             training=training)
 46 | 
 47 |         # reparameterization
 48 |         epsilon_y = tf.random_normal((self.batch_size, self.latent_dim))
 49 |         self.z_y = self.z_y_mean + (tf.sqrt(tf.exp(self.z_y_log_sigma_sq)) * epsilon_y)
 50 | 
 51 |         # decode
 52 |         y_rec_pre = dcgan_decode(inputs=self.z_y,
 53 |                                  output_size=reconstruct_size,
 54 |                                  num_layer=num_conv_layer,
 55 |                                  channel_start=dim_features,
 56 |                                  activation=transfer_fct,
 57 |                                  scope="decoder_y",
 58 |                                  training=training)
 59 | 
 60 |         self.alpha = tf.get_variable("image_variance_log", initializer=rec_log_var * tf.ones((1,)), trainable=False)
 61 | 
 62 |         self.y_rec = (1 + 2e-6) * tf.sigmoid(y_rec_pre) - 1e-6  # to avoid numeric issues
 63 | 
 64 |         self._minimize_loss(self.y_small, self.y_rec, self.z_x_mean, self.z_x_log_sigma_sq,
 65 |                             self.z_y_mean, self.z_y_log_sigma_sq, learning_rate, self.alpha)
 66 | 
 67 |         self.saver = tf.train.Saver(max_to_keep=1)
 68 |         config = tf.ConfigProto()
 69 |         config.gpu_options.allow_growth = True
 70 |         self.sess = tf.Session(config=config)
 71 |         self.sess.run(tf.global_variables_initializer())
 72 | 
 73 |     def _minimize_loss(self, val, val_rec, z_s_mu, z_s_log_var, z_val_mu, z_val_log_var, learning_rate, val_log_var):
 74 |         """Minimize the loss of VAE, which contains kl-divergence term and reconstruction term."""
 75 |         loss_rec = loss_rec_normal(val, val_rec, log_var=val_log_var)
 76 |         loss_kl = loss_kl_normal(z_val_mu, z_val_log_var, z_s_mu, z_s_log_var)
 77 |         self.cost_1 = tf.reduce_mean(loss_rec / self.reconstruction_dim)
 78 |         self.cost_2 = tf.reduce_mean(loss_kl / self.latent_dim)
 79 |         self.cost = self.cost_1 + self.cost_2
 80 |         self.train_op = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
 81 | 
 82 |     def batch_train(self, x, y, y_rec):
 83 |         _, cost, cost1, cost2 = self.sess.run((self.train_op, self.cost, self.cost_1, self.cost_2),
 84 |                                               feed_dict={self.x: x, self.y: y, self.y_small: y_rec})
 85 |         return cost, cost1, cost2
 86 | 
 87 |     def infer_by_x(self, x, sample=False):
 88 |         """Infer the latent variable z given input x"""
 89 |         if not sample:
 90 |             return self.sess.run(self.z_x_mean, feed_dict={self.x: x})
 91 |         else:
 92 |             return self.sess.run(self.z_x, feed_dict={self.x: x})
 93 | 
 94 |     def cov_infer_by_y(self, y):
 95 |         log_var = self.sess.run(self.z_y_log_sigma_sq, feed_dict={self.y: y})
 96 |         cov = np.diag(np.squeeze(np.exp(log_var)))
 97 |         return cov
 98 | 
 99 |     def cov_infer_by_x(self, x):
100 |         log_var = self.sess.run(self.z_x_log_sigma_sq, feed_dict={self.x: x})
101 |         cov = np.diag(np.squeeze(np.exp(log_var)))
102 |         return cov
103 | 
104 |     def sigma_infer_by_y(self, y):
105 |         log_var = self.sess.run(self.z_y_log_sigma_sq, feed_dict={self.y: y})
106 |         return np.sqrt(np.exp(log_var))
107 | 
108 |     def sigma_infer_by_x(self, x):
109 |         log_var = self.sess.run(self.z_x_log_sigma_sq, feed_dict={self.x: x})
110 |         return np.sqrt(np.exp(log_var))
111 | 
112 |     def infer_by_y(self, y, sample=False):
113 |         """Infer the latent variable z given label y"""
114 |         if not sample:
115 |             return self.sess.run(self.z_y_mean, feed_dict={self.y: y})
116 |         else:
117 |             return self.sess.run(self.z_y, feed_dict={self.y: y})
118 | 
119 |     def construct_y_from_x(self, x):
120 |         z = self.sess.run(self.z_x_mean, feed_dict={self.x: x})
121 |         y = self.sess.run(self.y_rec, feed_dict={self.z_y: z})
122 |         return y
123 | 
124 |     def reconstruct(self, y):
125 |         z = self.sess.run(self.z_y_mean, feed_dict={self.y: y})
126 |         y = self.sess.run(self.y_rec, feed_dict={self.z_y: z})
127 |         return y
128 | 
129 |     def save(self, save_path):
130 |         self.saver.save(self.sess, save_path)
131 |         print("Model saved in path: {}".format(save_path), flush=True)
132 | 
133 |     def restore(self, path):
134 |         # reinitialize adam
135 |         # self.sess.run([v.initializer for v in self.adam_params])
136 |         if not path.endswith(".ckpt"):
137 |             path = tf.train.latest_checkpoint(path)
138 |         self.saver.restore(self.sess, path)
139 |         print("Model restored from path: {}".format(path))
140 | 
141 |     def destruct(self):
142 |         tf.reset_default_graph()
143 |         self.sess.close()
144 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import numpy as np
  4 | import skimage.transform as transform
  5 | 
  6 | import matplotlib
  7 | matplotlib.use('agg')
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from tqdm import tqdm
 11 | 
 12 | from utils import quaternion_multiply, inv_log_quaternion, log_quaternion
 13 | 
 14 | from gen_map import GenMap
 15 | 
 16 | plt.style.use("seaborn")
 17 | 
 18 | 
 19 | class Model:
 20 |     def __init__(self, batch_size=100, dim_state=6, dim_control=7, dim_observation=2,
 21 |                  image_size=(96, 96, 3), reconstruct_size=(64, 64, 3), reconstruction_accuracy=0.,
 22 |                  training=False, learning_rate=1e-3):
 23 |         self.dim_state = dim_state
 24 |         self.dim_control = dim_control
 25 |         self.dim_observation = dim_observation
 26 |         self.batch_size = batch_size
 27 |         self.image_size_origin = tuple(image_size)
 28 |         self.reconstruct_size_origin = tuple(reconstruct_size)
 29 |         self.reconstruct_size = np.prod(reconstruct_size)
 30 |         self.image_size = np.prod(image_size)
 31 |         self.norm_xyz = self.norm_q = 1.
 32 | 
 33 |         self.network = GenMap(arch_encoder_attribute=[self.dim_state, 512, self.dim_observation],
 34 |                               image_size=image_size,
 35 |                               dim_features=512,
 36 |                               num_conv_layer=4,
 37 |                               batch_size=batch_size,
 38 |                               rec_log_var=reconstruction_accuracy,
 39 |                               training=training,
 40 |                               reconstruct_size=self.reconstruct_size_origin,
 41 |                               learning_rate=learning_rate)
 42 | 
 43 |     def set_norm(self, norm_xyz, norm_q):
 44 |         self.norm_xyz = norm_xyz
 45 |         self.norm_q = norm_q
 46 | 
 47 |     def step(self, x, u, dt):
 48 |         """Apply quaternion for step"""
 49 |         xyz = (x[:3, 0] * self.norm_xyz + u[:3, 0] * dt) / self.norm_xyz
 50 |         qt = quaternion_multiply(u[3:, 0], inv_log_quaternion(x[3:, 0] * self.norm_q))
 51 |         x1 = np.concatenate((xyz, log_quaternion(qt) / self.norm_q), axis=0).reshape(-1, 1)
 52 |         return x1
 53 | 
 54 |     def emit(self, state):
 55 |         state = state.reshape(-1, self.dim_state)
 56 |         observation = self.network.infer_by_x(state)
 57 |         observation = observation.reshape(self.dim_observation, -1)
 58 |         return observation
 59 | 
 60 |     def generate(self, state):
 61 |         state = state.reshape(-1, self.dim_state)
 62 |         img = self.network.construct_y_from_x(state)
 63 |         img = img.reshape(self.reconstruct_size_origin)
 64 |         img = np.clip(img, 0, 1.)
 65 |         return img
 66 | 
 67 |     def reconstruct(self, img):
 68 |         img = img.reshape((-1,) + self.image_size_origin)
 69 |         rec = self.network.reconstruct(img)
 70 |         rec = rec.reshape(self.reconstruct_size_origin)
 71 |         rec = np.clip(rec, 0, 1.)
 72 |         return rec
 73 | 
 74 |     def wrap(self, img):
 75 |         img = img.reshape((-1,) + self.image_size_origin)  # for CNN
 76 |         observation = self.network.infer_by_y(img)
 77 |         cov_observation = self.network.cov_infer_by_y(img)
 78 |         observation = observation.reshape(self.dim_observation, -1)
 79 |         return observation, cov_observation
 80 | 
 81 |     def train(self, data_loader, model_dir, save_every=5, epoch=100):
 82 | 
 83 |         def update(loss_list, loss_list1, loss_list2, ep):
 84 |             """
 85 |             Plot the intemediate training loss curve, save it to the model dir, and save the checkpoint.
 86 |             :param loss_list: overall loss
 87 |             :param loss_list1: reconstruction loss
 88 |             :param loss_list2: KL-divergence loss
 89 |             :param ep: current epoch index
 90 |             :return: None
 91 |             """
 92 |             f = plt.figure()
 93 |             plt.plot(loss_list, label='overall')
 94 |             plt.plot(loss_list1, label='reconstruction')
 95 |             plt.plot(loss_list2, label='kl_div')
 96 |             plt.title("training curve")
 97 |             plt.xlabel("number of batch")
 98 |             plt.ylabel("-ELBO")
 99 |             p = int(0.05 * len(loss_list))
100 |             _max = np.max([loss_list[p], loss_list1[p], loss_list2[p]])
101 |             _min = np.min([loss_list, loss_list1, loss_list2])
102 |             _range = _max - _min
103 |             print("Error: {}, Reconstruction: {}, KL-div: {}".format(np.mean(loss_list[-100:]),
104 |                                                                      np.mean(loss_list1[-100:]),
105 |                                                                      np.mean(loss_list2[-100:])))
106 |             plt.ylim([_min - 0.03 * _range, _max + _range])
107 |             plt.legend()
108 |             path = os.path.join(model_dir, "training_curve.png")
109 |             plt.savefig(path)
110 |             checkpoint_path = os.path.join(model_dir, "model_{}.ckpt".format(ep))
111 |             self.save(checkpoint_path)
112 |             plt.close(f)
113 | 
114 |         def get_epoch(dataset):
115 |             train_inputs, train_targets = dataset.get_train_set(sample=False)
116 |             reconstructs = []
117 |             for im in train_inputs:
118 |                 reconstructs.append(transform.resize(im, self.reconstruct_size_origin,
119 |                                                      anti_aliasing=True, mode='constant').reshape(-1))  # * 255.)
120 |             reconstructs = np.array(reconstructs)
121 |             return train_inputs, train_targets, reconstructs
122 | 
123 |         loss_all, loss_all1, loss_all2 = [], [], []
124 |         input_len = len(data_loader.times_train)
125 | 
126 |         inputs, targets, reconstructs = get_epoch(data_loader)
127 | 
128 |         print("Training the observer...")
129 |         with tqdm(total=(epoch * input_len)) as tbar:
130 |             for e in range(epoch):
131 |                 loss_list, loss_list1, loss_list2 = [], [], []
132 |                 num_iters = int(input_len / self.batch_size)
133 |                 if num_iters * self.batch_size < input_len:
134 |                     num_iters += 1
135 | 
136 |                 for i in range(num_iters):
137 |                     idx = np.random.choice(input_len, size=self.batch_size)
138 |                     batch_inputs = inputs[idx, ...]
139 |                     batch_targets = targets[idx, ...]
140 |                     batch_reconstructs = reconstructs[idx, ...]
141 | 
142 |                     loss, loss_1, loss_2 = self.network.batch_train(batch_targets, batch_inputs, batch_reconstructs)
143 |                     loss_list.append(loss)
144 |                     loss_list1.append(loss_1)
145 |                     loss_list2.append(loss_2)
146 | 
147 |                 final_mse = np.mean(loss_list)
148 |                 tbar.set_postfix(err="%.3f" % final_mse)
149 |                 tbar.update(input_len)
150 |                 loss_all.extend(loss_list)
151 |                 loss_all1.extend(loss_list1)
152 |                 loss_all2.extend(loss_list2)
153 |                 if (e+1) % save_every == 0:
154 |                     update(loss_all, loss_all1, loss_all2, e + 1)
155 |         # save the final model
156 |         checkpoint_path = os.path.join(model_dir, 'model.ckpt')
157 |         self.save(checkpoint_path)
158 |         return loss_all
159 | 
160 |     def save(self, path):
161 |         self.network.save(path)
162 | 
163 |     def restore(self, path):
164 |         self.network.restore(path)
165 | 
166 |     def close(self):
167 |         self.network.destruct()
168 | 


--------------------------------------------------------------------------------
/ops.py:
--------------------------------------------------------------------------------
  1 | # Tensorflow related operations
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | 
  7 | def build_fnn(x, units_list, activation=tf.sigmoid, scope="fnn", reuse=False):
  8 |     """Construct a fnn under the scope, if the scope already exists, reuse needs to be set as true."""
  9 |     num_layers = len(units_list)
 10 |     h = x
 11 |     with tf.variable_scope(scope, reuse=reuse):
 12 |         for i in range(num_layers - 1):
 13 |             a = tf.layers.dense(h, units_list[i+1], activation=activation,
 14 |                                 kernel_initializer=tf.contrib.layers.xavier_initializer(),
 15 |                                 bias_initializer=tf.zeros_initializer())
 16 |             if i < num_layers - 2:
 17 |                 h = activation(a)
 18 |             else:
 19 |                 h = a
 20 |     return h
 21 | 
 22 | 
 23 | def loss_rec_bernoulli(ori, rec):
 24 |     return -tf.reduce_sum(ori * tf.log_sigmoid(rec) + (1 - ori) * tf.log_sigmoid(-rec), reduction_indices=1)
 25 | 
 26 | 
 27 | def loss_rec_categorical(ori, rec):
 28 |     """Categorial reconstrution """
 29 |     return -tf.reduce_sum(ori * tf.nn.log_softmax(rec), reduction_indices=1)
 30 | 
 31 | 
 32 | def loss_rec_normal(ori, mean, log_var=0.):
 33 |     """Negative log-likelihood of a normal distribution"""
 34 |     dim = ori.shape[1].value
 35 |     return 0.5 * (dim * np.log(np.pi * 2) + tf.reduce_sum(log_var + tf.square(ori - mean) / tf.exp(log_var),
 36 |                                                           reduction_indices=1))
 37 | 
 38 | 
 39 | def loss_reg_normal(mean, log_var):
 40 |     """(Positive) KL div from given distribution to standard normal distribution"""
 41 |     return -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), reduction_indices=1)
 42 | 
 43 | 
 44 | def loss_kl_normal(mean1, log_var1, mean2, log_var2):
 45 |     """(Positive) KL divergence from 1 to 2"""
 46 |     return 0.5 * tf.reduce_sum(-1 - log_var1 + log_var2
 47 |                                + tf.square(mean1 - mean2) / tf.exp(log_var2)
 48 |                                + tf.exp(log_var1 - log_var2),
 49 |                                reduction_indices=1)
 50 | 
 51 | # Conv / FSConv
 52 | 
 53 | 
 54 | def dcgan_decode(inputs, output_size, num_layer=3, channel_start=512, activation=tf.nn.relu,
 55 |                  scope="deconv", reuse=False, training=False):
 56 |     """Image decoder, taken ideas from the DC-GAN.
 57 | 
 58 |     :param inputs: the input tensor, 2D
 59 |     :param output_size: the size of the desired generated picture, a triple, e.g. (64, 64, 3)
 60 |     :param num_layer: number of Conv layers
 61 |     :param channel_start: number of initial features in the network
 62 |     :param activation: the activation function used
 63 |     :param scope: the tensorflow scope
 64 |     :param reuse: set to true if this scope of params were created somewhere before, and need to be reused.
 65 |     :param training: set to true if in training mode, for batch normalization only
 66 |     :returns
 67 |         h: the output tensor, 4D
 68 |     """
 69 |     y_w, y_h, y_c = output_size
 70 |     original_h = y_h // (2 ** num_layer)
 71 |     original_w = y_w // (2 ** num_layer)
 72 |     with tf.variable_scope(scope, reuse=reuse):
 73 |         # reshape layer --> cubic layer
 74 |         net = tf.layers.dense(inputs, channel_start * original_h * original_w, activation=activation,
 75 |                               kernel_initializer=tf.contrib.layers.xavier_initializer(),
 76 |                               bias_initializer=tf.zeros_initializer())
 77 |         net = tf.reshape(net, [-1, original_h, original_w, channel_start])
 78 | 
 79 |         for i in range(num_layer - 1):
 80 |             num_filter = channel_start // (2 ** (i+1))
 81 |             net = tf.layers.conv2d_transpose(net, filters=num_filter, kernel_size=[5, 5],
 82 |                                              strides=2, padding='same')
 83 |             # net = tf.layers.batch_normalization(net, training=training)
 84 |             net = activation(net)
 85 | 
 86 |         net = tf.layers.conv2d_transpose(net, filters=y_c, kernel_size=[5, 5], strides=(2, 2),
 87 |                                          padding='same')
 88 | 
 89 |         h = tf.reshape(net, [-1, np.prod(output_size)])
 90 | 
 91 |     return h
 92 | 
 93 | 
 94 | def dcgan_encode(inputs, output_size, num_layer=3, final_channel=512, activation=tf.nn.relu,
 95 |                  scope="encode", reuse=False, training=False):
 96 |     """Image encoder, taken ideas from the DC-GAN.
 97 | 
 98 |     :param inputs: the input tensor, should be 4D.
 99 |     :param output_size: the output size of the encoder, one dimensional, identical for mean and log variance
100 |     :param num_layer: how many conv layers are stacked, excluding the final linear layer
101 |     :param final_channel: number of filters for the final conv layer, also defines the sizes for previous layers
102 |     :param activation: activation function in use for conv layers, excluding the final linear layer
103 |     :param scope: scope name for tensorflow graph
104 |     :param reuse: if reuse parameters, set to true if this scope was created somewhere before and needs to be reused
105 |     :param training: if is in the training mode, for batch norm
106 |     :returns
107 |         mean: a tensor of size [inputs.shape[0], output_size]
108 |         log_variance: a tensor of size [inputs.shape[0], output_size]
109 |     """
110 |     with tf.variable_scope(scope, reuse=reuse):
111 | 
112 |         net = inputs
113 | 
114 |         for i in range(num_layer):
115 |             scale = 2**(num_layer - i - 1)  # e.g. 3-layer: 2**2, 2**1, 2**0
116 |             net = tf.layers.conv2d(net, filters=final_channel / scale, kernel_size=[5, 5],
117 |                                    strides=2, padding='same')
118 |             # net = tf.layers.batch_normalization(net, training=training)
119 |             net = activation(net)
120 | 
121 |         net = tf.reduce_mean(net, [1, 2], keepdims=False)
122 | 
123 |         mean = tf.layers.dense(inputs=net, units=output_size,
124 |                                kernel_initializer=tf.contrib.layers.xavier_initializer(),
125 |                                bias_initializer=tf.zeros_initializer())
126 |         log_variance = tf.layers.dense(inputs=net, units=output_size,
127 |                                        kernel_initializer=tf.contrib.layers.xavier_initializer(),
128 |                                        bias_initializer=tf.zeros_initializer())
129 |     return mean, log_variance


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Introduction
  3 | This is the code base for our work of Generative Map at <https://arxiv.org/abs/1902.11124>.  
  4 | It is an effort to combine generative model (in particular, [Variational Auto-Encoders](https://arxiv.org/abs/1312.6114)) and the classic Kalman filter for generation with localization.  
  5 | For more details, please refer to [our paper on arXiv](https://arxiv.org/abs/1902.11124).
  6 | 
  7 | ## Dependencies
  8 | 1. Python3, and the following packages  
  9 | ```
 10 | pip install numpy scikit-image tqdm tensorflow==1.13.1
 11 | ```
 12 | 
 13 | 2. Datasets:  
 14 |   * 7-Scenes: Download from [their website](https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/).  
 15 |   * RobotCar: Register & Download from [here](https://robotcar-dataset.robots.ox.ac.uk/). Remark: we adopt the [preprocessing toolkit from the RobotCar authors](https://github.com/ori-mrg/robotcar-dataset-sdk) and generate data first in `.pkl` format, then load it with `data_loader_robotcar.py`. 
 16 | 
 17 | ## Usage
 18 | For training, one example use can be found below. The `PATH_TO_DATA_DIR` points to the data folder, for example in 7-Scenes it is the folder where you can find `TrainSplit.txt`, `TestSplit.txt`, and folders of `seq-xx`.  
 19 | ```
 20 | python train.py --data_dir <PATH_TO_DATA_DIR>
 21 | ```
 22 | 
 23 | For evaluation, one example of estimation with generation using a constant model is  
 24 | ```
 25 | python eval.py --model_dir <PATH_TO_YOUR_MODEL_DIR_WITH_CHECKPOINTS> --generation_mode 2 --no_model
 26 | ```
 27 | 
 28 | ## Detail Usage
 29 | ```
 30 | usage: train.py [-h] [--training_size TRAINING_SIZE]
 31 |                 [--dim_observation DIM_OBSERVATION] [--batch_size BATCH_SIZE]
 32 |                 [--num_epochs NUM_EPOCHS] [--save_every SAVE_EVERY]
 33 |                 [--model_dir MODEL_DIR] [--data_dir DATA_DIR]
 34 |                 [--reconstruct_accuracy RECONSTRUCT_ACCURACY]
 35 |                 [--dim_input DIM_INPUT DIM_INPUT DIM_INPUT]
 36 |                 [--dim_reconstruct DIM_RECONSTRUCT DIM_RECONSTRUCT DIM_RECONSTRUCT]
 37 |                 [--learning_rate LEARNING_RATE] [--use_robotcar]
 38 | 
 39 | optional arguments:
 40 |   -h, --help            show this help message and exit
 41 |   --training_size TRAINING_SIZE
 42 |                         size of the training set, sampled from the given
 43 |                         sequences
 44 |   --dim_observation DIM_OBSERVATION
 45 |                         dimension of the latent representation ("observation")
 46 |   --batch_size BATCH_SIZE
 47 |                         mini-batch size for training
 48 |   --num_epochs NUM_EPOCHS
 49 |                         number of epochs for training
 50 |   --save_every SAVE_EVERY
 51 |                         intermediate saving frequency by epochs
 52 |   --model_dir MODEL_DIR
 53 |                         directory to save current model to
 54 |   --data_dir DATA_DIR   directory to load the dataset
 55 |   --reconstruct_accuracy RECONSTRUCT_ACCURACY
 56 |                         how accurate should the reconstruction be, float,
 57 |                         negative and positive, the smaller the more accurate
 58 |   --dim_input DIM_INPUT DIM_INPUT DIM_INPUT
 59 |                         the height, width, and number of channel for input
 60 |                         image, separated by space
 61 |   --dim_reconstruct DIM_RECONSTRUCT DIM_RECONSTRUCT DIM_RECONSTRUCT
 62 |                         the height, width, and number of channel for
 63 |                         reconstructed image, separated by space
 64 |   --learning_rate LEARNING_RATE
 65 |                         learning rate
 66 |   --use_robotcar        if the robotcar data loader will be used, since it is
 67 |                         stored in a different format.
 68 | ```
 69 |   
 70 | ```
 71 | usage: eval.py [-h] [--model_dir MODEL_DIR] [--state_var STATE_VAR]
 72 |                [--visualize_dir VISUALIZE_DIR] [--video] [--no_model]
 73 |                [--generation_mode GENERATION_MODE] [--all] [--seq_idx SEQ_IDX]
 74 |                [--deviation DEVIATION]
 75 | 
 76 | optional arguments:
 77 |   -h, --help            show this help message and exit
 78 |   --model_dir MODEL_DIR
 79 |                         directory to load model from. If not provided, the
 80 |                         most recent dir in the default path will be used,
 81 |                         raise an Error if failed.
 82 |   --state_var STATE_VAR
 83 |                         estimated state transition variance for the model used
 84 |                         by extended Kalman filter, it is assumed to be
 85 |                         independent and stay the same for all dimension
 86 |   --visualize_dir VISUALIZE_DIR
 87 |                         directory to save the visualization output + video (if
 88 |                         apply)
 89 |   --video               if a video should be recorded comparing real and
 90 |                         estimated reconstructed images, together with
 91 |                         localization results
 92 |   --no_model            if the true state transition will be hidden to test
 93 |                         localization based only on images.
 94 |   --generation_mode GENERATION_MODE
 95 |                         0: equidistant sample image generation; 1: Kalman
 96 |                         filter estimation for localization; 2: perform both
 97 |   --all                 evaluate all the validation sequences and report the
 98 |                         performance w/o a picture.
 99 |   --seq_idx SEQ_IDX     evaluate the given indexed sequence in the text.
100 |   --deviation DEVIATION
101 |                         deviation across all state dimension for robust test.
102 | ```
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import time
 4 | import json
 5 | import argparse
 6 | 
 7 | from data_loader_7scenes import DataLoader as DataLoader7Scenes
 8 | from data_loader_robotcar import DataLoader as DataLoaderRobotCar
 9 | 
10 | from model import Model
11 | 
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--training_size', type=int, default=3000,
16 |                         help='size of the training set, sampled from the given sequences')
17 |     parser.add_argument('--dim_observation', type=int, default=128,
18 |                         help='dimension of the latent representation ("observation")')
19 |     parser.add_argument('--batch_size', type=int, default=64,
20 |                         help='mini-batch size for training')
21 |     parser.add_argument('--num_epochs', type=int, default=5000,
22 |                         help='number of epochs for training')
23 |     parser.add_argument('--save_every', type=int, default=200,
24 |                         help='intermediate saving frequency by epochs')
25 |     parser.add_argument('--model_dir', type=str, default="checkpoints/%s" % time.strftime("%Y-%m-%d_%H-%M-%S"),
26 |                         help='directory to save current model to')
27 |     parser.add_argument('--data_dir', type=str, default='data/7scenes/chess',
28 |                         help='directory to load the dataset')
29 |     parser.add_argument('--reconstruct_accuracy', type=float, default=-3.,
30 |                         help='how accurate should the reconstruction be, float, negative and positive, '
31 |                         'the smaller the more accurate')
32 |     parser.add_argument('--dim_input', nargs=3, default=[96, 96, 3],
33 |                         help='the height, width, and number of channel for input image, separated by space')
34 |     parser.add_argument('--dim_reconstruct', nargs=3, default=[64, 64, 3],
35 |                         help='the height, width, and number of channel for reconstructed image, separated by space')
36 |     parser.add_argument('--learning_rate', type=float, default=0.0001,
37 |                         help='learning rate')
38 |     parser.add_argument('--use_robotcar', default=False, action='store_true',
39 |                         help='if the robotcar data loader will be used, since it is stored in a different format.')
40 |     # # TODO reload&train is not implemented yet, as it was never really needed
41 |     # parser.add_argument('--load_model', type=str, default=None,
42 |     #                     help='Reload a model checkpoint and restore training.')
43 |     args = parser.parse_args()
44 | 
45 |     # convert the list inputs into require format
46 |     args.dim_input = tuple(map(int, args.dim_input))
47 |     args.dim_reconstruct = tuple(map(int, args.dim_reconstruct))
48 | 
49 |     train(args)
50 | 
51 | 
52 | def train(args):
53 |     if not os.path.isdir(args.model_dir):
54 |         os.makedirs(args.model_dir)
55 |     json_path = os.path.join(args.model_dir, "model_specs.json")
56 | 
57 |     # get the dataset / simulated system
58 |     DataLoader = DataLoaderRobotCar if args.use_robotcar else DataLoader7Scenes
59 |     dataloader = DataLoader(args.data_dir, img_width=args.dim_input[1], img_height=args.dim_input[0])
60 | 
61 |     # save the training flags
62 |     args_dict = vars(args)
63 |     args_dict['scaling_factor'] = [dataloader.norm_xyz, dataloader.norm_q]
64 |     with open(json_path, 'w') as outfile:
65 |         json.dump(args_dict, outfile, indent=4)
66 |     
67 |     # setup model
68 |     model = Model(dim_observation=args.dim_observation,
69 |                   batch_size=args.batch_size,
70 |                   reconstruction_accuracy=args.reconstruct_accuracy,
71 |                   image_size=args.dim_input,
72 |                   reconstruct_size=args.dim_reconstruct,
73 |                   training=True,
74 |                   learning_rate=args.learning_rate)
75 | 
76 |     # train model
77 |     model.train(dataloader,
78 |                 model_dir=args.model_dir,
79 |                 epoch=args.num_epochs,
80 |                 save_every=args.save_every)
81 |     model.close()
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     main()
86 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # Fitler related helper functions
  2 | 
  3 | import math
  4 | import numpy as np
  5 | 
  6 | 
  7 | MACHINE_EPS = 1e-9
  8 | SINGULAR_FIX = True
  9 | 
 10 | 
 11 | def sigmoid(x):
 12 |     """Numerically stable sigmoid function"""
 13 |     y = np.zeros_like(x)
 14 |     y[x >= 0] = 1. / (1. + np.exp(- x[x >= 0]))
 15 |     z = np.exp(x[x < 0])
 16 |     y[x < 0] = z / (1. + z)
 17 |     return y
 18 | 
 19 | 
 20 | def predict(x, Sigma, u, A, B, Q, step_sim):
 21 |     """Prediction step of EKF"""
 22 |     x1 = step_sim(x, u)
 23 |     Sigma1 = A.dot(Sigma).dot(A.T) + Q
 24 |     return x1, Sigma1
 25 | 
 26 | 
 27 | def correct(x, Sigma, o, R, H, observe_sim):
 28 |     """Correction step of EKF"""
 29 |     S = H.dot(Sigma).dot(H.T) + R
 30 |     y = o - observe_sim(x)
 31 |     if SINGULAR_FIX:
 32 |         S += np.eye(S.shape[0]) * MACHINE_EPS
 33 |     K = np.linalg.solve(S.T, H.dot(Sigma.T)).T
 34 |     x1 = x + K.dot(y)
 35 |     Sigma1 = Sigma - K.dot(H).dot(Sigma)
 36 |     return x1, Sigma1
 37 | 
 38 | 
 39 | def finite_diff(simulate, x, e=1e-1, dim_out=None):
 40 |     dim = x.shape[0]
 41 |     if dim_out is None:
 42 |         dim_out = dim
 43 |     res = np.zeros((dim_out, 0))
 44 |     for i in range(dim):
 45 |         x1 = x.copy()
 46 |         x1[i] += e
 47 |         x1_inc = simulate(x1).copy()
 48 |         x1[i] -= (2 * e)
 49 |         x1_dec = simulate(x1)
 50 |         res = np.hstack((res, (x1_inc - x1_dec) / (2 * e)))
 51 |     return res
 52 | 
 53 | 
 54 | ### Test
 55 | 
 56 | 
 57 | def quaternion_to_euler_angle(q):
 58 |     assert q.shape == (4, )
 59 |     w, x, y, z = q
 60 |     t0 = +2.0 * (w * x + y * z)
 61 |     t1 = +1.0 - 2.0 * (x * x + y * y)
 62 |     X = math.atan2(t0, t1)
 63 | 
 64 |     t2 = +2.0 * (w * y - z * x)
 65 |     t2 = +1.0 if t2 > +1.0 else t2
 66 |     t2 = -1.0 if t2 < -1.0 else t2
 67 |     Y = math.asin(t2)
 68 | 
 69 |     t3 = +2.0 * (w * z + x * y)
 70 |     t4 = +1.0 - 2.0 * (y * y + z * z)
 71 |     Z = math.atan2(t3, t4)
 72 | 
 73 |     return np.array([X, Y, Z])
 74 | 
 75 | 
 76 | def rotation_matrix_to_euler_angle(R):
 77 |     X = math.atan2(R[1, 2], R[2, 2])
 78 | 
 79 |     s1 = math.sin(X)
 80 |     c1 = math.cos(X)
 81 |     c2 = math.sqrt(R[0, 0]*R[0, 0] + R[0, 1]*R[0, 1])
 82 |     
 83 |     Y = math.atan2(-1.0*R[0, 2], c2)
 84 |     Z = math.atan2(s1 * R[2, 0] - c1 * R[1, 0], c1 * R[1, 1] - s1 * R[2, 1])
 85 | 
 86 |     return np.array([X, Y, Z])
 87 |  
 88 |     
 89 | def rotation_matrix_to_quaternion(R):
 90 |     w = np.sqrt( max(0, 1 + R[0, 0] + R[1, 1] + R[2, 2]) ) / 2
 91 |     x = - np.sign(R[2,1] - R[1,2]) * np.sqrt( max(0, 1 + R[0,0] - R[1,1] - R[2,2]) ) / 2
 92 |     y = - np.sign(R[0,2] - R[2,0]) * np.sqrt( max(0, 1 - R[0,0] + R[1,1] - R[2,2]) ) / 2
 93 |     z = - np.sign(R[1,0] - R[0,1]) * np.sqrt( max(0, 1 - R[0,0] - R[1,1] + R[2,2]) ) / 2
 94 | 
 95 |     return np.array([w, x, y, z])
 96 | 
 97 | 
 98 | def quaternion_to_rotation_matrix(q):
 99 |     R = [[q[0]**2 + q[1]**2 - q[2]**2 - q[3]**2, 2*(q[1]*q[2] - q[0]*q[3]), 2*(q[1]*q[3] + q[0]*q[2])],
100 |          [2*(q[1]*q[2] + q[0]*q[3]), q[0]**2 - q[1]**2 + q[2]**2 - q[3]**2, 2*(q[2]*q[3] - q[0]*q[1])],
101 |          [2*(q[1]*q[3] - q[0]*q[2]), 2*(q[2]*q[3] + q[0]*q[1]), q[0]**2 - q[1]**2 - q[2]**2 + q[3]**2]]
102 | 
103 |     return np.array(R)
104 | 
105 | 
106 | def rotation_matrix_to_log_quaternion(R):
107 |     u = math.acos(0.5 * math.sqrt(R[0, 0] + R[1, 1] + R[2, 2] + 1.0))
108 |     
109 |     v = np.array([R[1, 2] - R[2, 1],
110 |                   R[2, 0] - R[0, 2],
111 |                   R[0, 1] - R[1, 0]])
112 | 
113 |     log_quat = v * (u / np.sqrt(np.sum(v*v)))
114 |     
115 |     return log_quat
116 | 
117 | 
118 | ## Euler angles
119 | 
120 | 
121 | def wrap_angles_to_state(angles):
122 |     assert angles.shape == (3, )
123 |     theta, phi, psi = angles
124 |     res = np.array([np.cos(theta), np.sin(theta),
125 |                     np.cos(phi), np.sin(phi),
126 |                     np.cos(psi), np.sin(psi)])
127 |     return res
128 | 
129 | 
130 | def update_angles(ang_state, dangles):
131 |     c1, s1, c2, s2, c3, s3 = ang_state
132 |     dc1, ds1, dc2, ds2, dc3, ds3 = wrap_angles_to_state(dangles)
133 |     nc1 = c1 * dc1 - s1 * ds1
134 |     ns1 = s1 * dc1 + c1 * ds1
135 |     nc2 = c2 * dc2 - s2 * ds2
136 |     ns2 = s2 * dc2 + c2 * ds2
137 |     nc3 = c3 * dc3 - s3 * ds3
138 |     ns3 = s3 * dc3 + c3 * ds3
139 |     return np.array([nc1, ns1, nc2, ns2, nc3, ns3])
140 | 
141 | 
142 | def quaternion_to_state(q):
143 |     assert q.shape == (4, )
144 |     angles = quaternion_to_euler_angle(q)
145 |     return wrap_angles_to_state(angles)
146 | 
147 | 
148 | def pose_to_state(s):
149 |     xyz = np.array(s[:3])
150 |     angles = s[3:]
151 |     angles = wrap_angles_to_state(np.array(angles))
152 |     state = np.concatenate((xyz, angles))
153 |     return state
154 | 
155 | 
156 | ## Quaternions ##
157 | 
158 | def quaternion_distance(q0, q1):
159 |     assert q0.shape == q1.shape == (4,)
160 |     return np.arccos(2 * (q0.dot(q1) ** 2) - 1)
161 | 
162 | 
163 | def quaternion_multiply(q0, q1):
164 |     w0, x0, y0, z0 = q0
165 |     w1, x1, y1, z1 = q1
166 |     return np.array([-x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0,
167 |                      x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0,
168 |                      -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0,
169 |                      x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0])
170 | 
171 | 
172 | def quaternion_inverse(q):
173 |     n = np.sum(q * q)
174 |     q_inv = np.array([q[0], -q[1], -q[2], -q[3]])
175 |     return q_inv / n
176 | 
177 | 
178 | def log_quaternion(q):
179 |     u, x, y, z = q
180 |     l2norm2 = x ** 2 + y ** 2 + z ** 2
181 |     if l2norm2 < MACHINE_EPS:
182 |         return np.array([0, 0, 0])
183 |     u = np.clip(u, -1., 1.)
184 |     factor = np.arccos(u) / np.sqrt(l2norm2)
185 |     return np.array([x, y, z]) * factor
186 | 
187 | 
188 | def inv_log_quaternion(log_q):
189 |     l2norm = np.sqrt(np.sum(np.square(log_q)))
190 |     x, y, z = log_q * (np.sin(l2norm) / l2norm)
191 |     return np.array([np.cos(l2norm), x, y, z])
192 | 


--------------------------------------------------------------------------------