├── .gitignore ├── DatasetLidarCamera.py ├── README.md ├── evaluate_calib.py ├── models ├── LCCNet.py ├── __pycache__ │ └── LCCNet.cpython-36.pyc └── correlation_package │ ├── LICENSE │ ├── __pycache__ │ └── correlation.cpython-36.pyc │ ├── build │ ├── lib.linux-x86_64-3.6 │ │ └── correlation_cuda.cpython-36m-x86_64-linux-gnu.so │ └── temp.linux-x86_64-3.6 │ │ ├── correlation_cuda.o │ │ └── correlation_cuda_kernel.o │ ├── correlation.py │ ├── correlation_cuda.cc │ ├── correlation_cuda.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ └── top_level.txt │ ├── correlation_cuda_kernel.cu │ ├── correlation_cuda_kernel.cuh │ ├── dist │ └── correlation_cuda-0.0.0-py3.6-linux-x86_64.egg │ ├── pyproject.toml │ └── setup.py ├── pictures ├── input.pdf ├── performance.pdf └── pipline.pdf ├── quaternion_distances.py ├── requirements.txt ├── train_with_sacred.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | checkpoints/ 2 | output/ 3 | **/__pycache__ 4 | results* -------------------------------------------------------------------------------- /DatasetLidarCamera.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------- 2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab 3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it) 4 | # Released under Creative Commons 5 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ 7 | # ------------------------------------------------------------------- 8 | 9 | # Modified Author: Xudong Lv 10 | # based on github.com/cattaneod/CMRNet/blob/master/DatasetVisibilityKitti.py 11 | 12 | import csv 13 | import os 14 | import random 15 | import os.path as osp 16 | from math import radians 17 | import cv2 18 | 19 | import h5py 20 | import mathutils 21 | import numpy as np 22 | import pandas as pd 23 | import torch 24 | import torchvision.transforms.functional as TTF 25 | from PIL import Image 26 | from torch.utils.data import Dataset 27 | from torchvision import transforms 28 | 29 | from utils import invert_pose, rotate_forward, quaternion_from_matrix #, read_calib_file 30 | from pykitti import odometry 31 | import pykitti 32 | from secrets import choice 33 | 34 | class DatasetLidarCameraKittiOdometry(Dataset): 35 | 36 | def __init__(self, dataset_dir, transform=None, augmentation=False, use_reflectance=False, 37 | max_t=1.5, max_r=20., split='val', device='cpu', val_sequence='00', suf='.png'): 38 | super(DatasetLidarCameraKittiOdometry, self).__init__() 39 | self.use_reflectance = use_reflectance 40 | self.maps_folder = '' 41 | self.device = device 42 | self.max_r = max_r 43 | self.max_t = max_t 44 | self.augmentation = augmentation 45 | self.root_dir = dataset_dir 46 | self.transform = transform 47 | self.split = split 48 | self.GTs_R = {} 49 | self.GTs_T = {} 50 | self.GTs_T_cam02_velo = {} 51 | self.K = {} 52 | self.suf = suf 53 | 54 | self.all_files = [] 55 | self.sequence_list = ['00','01', '02', '03', '04', '05', '06', '07', '08', '09', '10', 56 | '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21'] 57 | # self.sequence_list = ['00', '01', '02'] 58 | # self.model = CameraModel() 59 | # self.model.focal_length = [7.18856e+02, 7.18856e+02] 60 | # self.model.principal_point = [6.071928e+02, 1.852157e+02] 61 | # for seq in ['00', '03', '05', '06', '07', '08', '09']: 62 | for seq in self.sequence_list: 63 | odom = odometry(self.root_dir, seq) 64 | calib = odom.calib 65 | T_cam02_velo_np = calib.T_cam2_velo #gt pose from cam02 to velo_lidar (T_cam02_velo: 4x4) 66 | self.K[seq] = calib.K_cam2 # 3x3 67 | # T_cam02_velo = torch.from_numpy(T_cam02_velo_np) 68 | # GT_R = quaternion_from_matrix(T_cam02_velo[:3, :3]) 69 | # GT_T = T_cam02_velo[3:, :3] 70 | # self.GTs_R[seq] = GT_R # GT_R = np.array([row['qw'], row['qx'], row['qy'], row['qz']]) 71 | # self.GTs_T[seq] = GT_T # GT_T = np.array([row['x'], row['y'], row['z']]) 72 | self.GTs_T_cam02_velo[seq] = T_cam02_velo_np #gt pose from cam02 to velo_lidar (T_cam02_velo: 4x4) 73 | 74 | image_list = os.listdir(os.path.join(dataset_dir, 'sequences', seq, 'image_2')) 75 | image_list.sort() 76 | 77 | for image_name in image_list: 78 | if not os.path.exists(os.path.join(dataset_dir, 'sequences', seq, 'velodyne', 79 | str(image_name.split('.')[0])+'.bin')): 80 | continue 81 | if not os.path.exists(os.path.join(dataset_dir, 'sequences', seq, 'image_2', 82 | str(image_name.split('.')[0])+suf)): 83 | continue 84 | if seq == val_sequence: 85 | if split.startswith('val') or split == 'test': 86 | self.all_files.append(os.path.join(seq, image_name.split('.')[0])) 87 | elif (not seq == val_sequence) and split == 'train': 88 | self.all_files.append(os.path.join(seq, image_name.split('.')[0])) 89 | # self.train_RT = [] 90 | # if split == 'train': 91 | 92 | # train_RT_file = osp.join(dataset_dir, 'sequences', 93 | # f'train_RT_left_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv') 94 | 95 | # if osp.exists(train_RT_file): 96 | # print(f'TRAIN SET: Using this file: {train_RT_file}') 97 | # df_train_RT = pd.read_csv(train_RT_file, sep=',') 98 | # for index, row in df_train_RT.iterrows(): 99 | # # print("row:",row) 100 | # self.train_RT.append(list(row)) 101 | # else: 102 | # print(f'TRAIN SET - Not found: {train_RT_file}') 103 | # print("Generating a new one") 104 | # train_RT_file = open(train_RT_file, 'w') 105 | # train_RT_file = csv.writer(train_RT_file, delimiter=',') 106 | # train_RT_file.writerow(['id', 'pair', 'Yaw', 'Pitch', 'Roll', 'tx', 'ty', 'tz', 'classify_label']) 107 | 108 | # for i in range(len(self.all_files)): 109 | # data_seed = random.randint(1,10) #大于5数据为配准失败(负样本),小于等于5为配准成功(正样本) 110 | # rotz,roty,rotx,transl_x,transl_y,transl_z,label = self.generate_initial_transformation(data_seed) 111 | # # if data_seed > 5 : 112 | # # max_angle = self.max_r 113 | # # rotz = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0) 114 | # # roty = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0) 115 | # # rotx = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0) 116 | # # transl_x = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 117 | # # transl_y = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 118 | # # transl_z = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 119 | # # label = 0 120 | # # else: 121 | # # rotz = np.random.uniform(-1, 1) * (3.141592 / 180.0) 122 | # # roty = np.random.uniform(-1, 1) * (3.141592 / 180.0) 123 | # # rotx = np.random.uniform(-1, 1) * (3.141592 / 180.0) 124 | # # transl_x = np.random.uniform(-0.1, 0.1) 125 | # # transl_y = np.random.uniform(-0.1, 0.1) 126 | # # transl_z = np.random.uniform(-0.1, 0.1) 127 | # # label = 1 128 | 129 | # train_RT_file.writerow([str(self.all_files[i][:6]),str(self.all_files[i][:-1]), rotx, roty, rotz, transl_x, transl_y, transl_z, label]) 130 | # self.train_RT.append([str(self.all_files[i][:6]), str(self.all_files[i][:-1]), float(rotx), float(roty), float(rotz), float(transl_x), float(transl_y), float(transl_z), int(label)]) 131 | 132 | # assert len(self.train_RT) == len(self.all_files), "Something wrong with train RTs" 133 | # self.metadata = self.all_files 134 | 135 | # # self.val_RT = [] 136 | 137 | self.val_RT = [] 138 | if split == 'val' or split == 'test': 139 | # val_RT_file = os.path.join(dataset_dir, 'sequences', 140 | # f'val_RT_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv') 141 | val_RT_file = os.path.join(dataset_dir, 'sequences', 142 | f'val_RT_left_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv') 143 | if os.path.exists(val_RT_file): 144 | print(f'VAL SET: Using this file: {val_RT_file}') 145 | df_test_RT = pd.read_csv(val_RT_file, sep=',') 146 | for index, row in df_test_RT.iterrows(): 147 | # print("row:",row) 148 | self.val_RT.append(list(row)) 149 | else: 150 | print(f'VAL SET - Not found: {val_RT_file}') 151 | print("Generating a new one") 152 | val_RT_file = open(val_RT_file, 'w') 153 | val_RT_file = csv.writer(val_RT_file, delimiter=',') 154 | val_RT_file.writerow(['id', 'tx', 'ty', 'tz', 'rx', 'ry', 'rz']) 155 | for i in range(len(self.all_files)): 156 | # rotz = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0) 157 | # roty = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0) 158 | # rotx = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0) 159 | # transl_x = np.random.uniform(-max_t, max_t) 160 | # transl_y = np.random.uniform(-max_t, max_t) 161 | # transl_z = np.random.uniform(-max_t, max_t) 162 | data_seed = random.randint(1,10) #大于5数据为配准失败(负样本),小于等于5为配准成功(正样本) 163 | rotz,roty,rotx,transl_x,transl_y,transl_z,label = self.generate_initial_transformation(data_seed) 164 | # if data_seed > 5 : 165 | # max_angle = self.max_r 166 | # rotz = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0) 167 | # roty = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0) 168 | # rotx = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0) 169 | # transl_x = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 170 | # transl_y = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 171 | # transl_z = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 172 | # label = 0 173 | # else: 174 | # rotz = np.random.uniform(-1, 1) * (3.141592 / 180.0) 175 | # roty = np.random.uniform(-1, 1) * (3.141592 / 180.0) 176 | # rotx = np.random.uniform(-1, 1) * (3.141592 / 180.0) 177 | # transl_x = np.random.uniform(-0.1, 0.1) 178 | # transl_y = np.random.uniform(-0.1, 0.1) 179 | # transl_z = np.random.uniform(-0.1, 0.1) 180 | # label = 1 181 | val_RT_file.writerow([str(self.all_files[i][:6]),str(self.all_files[i][:-1]), rotx, roty, rotz, transl_x, transl_y, transl_z, label]) 182 | self.val_RT.append([str(self.all_files[i][:6]), str(self.all_files[i][:-1]), float(rotx), float(roty), float(rotz), float(transl_x), float(transl_y), float(transl_z), int(label)]) 183 | # transl_z = np.random.uniform(-max_t, min(max_t, 1.)) 184 | # val_RT_file.writerow([i, transl_x, transl_y, transl_z, 185 | # rotx, roty, rotz]) 186 | # self.val_RT.append([float(i), float(transl_x), float(transl_y), float(transl_z), 187 | # float(rotx), float(roty), float(rotz)]) 188 | print("len(self.val_RT):",len(self.val_RT)) 189 | print("len(self.all_files):",len(self.all_files)) 190 | assert len(self.val_RT) == len(self.all_files), "Something wrong with test RTs" 191 | def generate_initial_transformation(self, data_seed): 192 | max_angle = self.max_r 193 | if data_seed > 5: # negative samples have 194 | while True: 195 | rotz_seed = random.randint(1,2) 196 | roty_seed = random.randint(1,2) 197 | rotx_seed = random.randint(1,2) 198 | transl_x_seed = random.randint(1,2) 199 | transl_y_seed = random.randint(1,2) 200 | transl_z_seed = random.randint(1,2) 201 | if rotz_seed==2 and roty_seed==2 and rotx_seed==2 and transl_x_seed==2 and transl_y_seed==2 and transl_z_seed==2 : 202 | continue 203 | else: 204 | break 205 | if rotz_seed > 1: 206 | rotz = np.random.uniform(*(choice([(-max_angle,-0.5),(0.5,max_angle)]))) * (3.141592 / 180.0) 207 | else: 208 | rotz = np.random.uniform(-1, 1) * (3.141592 / 180.0) 209 | 210 | 211 | if roty_seed > 1: 212 | roty = np.random.uniform(*(choice([(-max_angle,-0.5),(0.5,max_angle)]))) * (3.141592 / 180.0) 213 | else: 214 | roty = np.random.uniform(-1, 1) * (3.141592 / 180.0) 215 | 216 | 217 | if rotx_seed > 1: 218 | rotx = np.random.uniform(*(choice([(-max_angle,-0.5),(0.5,max_angle)]))) * (3.141592 / 180.0) 219 | else: 220 | rotx = np.random.uniform(-1, 1) * (3.141592 / 180.0) 221 | 222 | 223 | if transl_x_seed > 1: 224 | transl_x = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 225 | else: 226 | transl_x = np.random.uniform(-0.1, 0.1) 227 | 228 | 229 | if transl_y_seed > 1: 230 | transl_y = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 231 | else: 232 | transl_y = np.random.uniform(-0.1, 0.1) 233 | 234 | 235 | if transl_z_seed > 1: 236 | transl_z = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)]))) 237 | else: 238 | transl_z = np.random.uniform(-0.1, 0.1) 239 | 240 | label = 0 241 | else: 242 | rotz = np.random.uniform(-0.5, 0.5) * (3.141592 / 180.0) 243 | roty = np.random.uniform(-0.5, 0.5) * (3.141592 / 180.0) 244 | rotx = np.random.uniform(-0.5, 0.5) * (3.141592 / 180.0) 245 | transl_x = np.random.uniform(-0.1, 0.1) 246 | transl_y = np.random.uniform(-0.1, 0.1) 247 | transl_z = np.random.uniform(-0.1, 0.1) 248 | label = 1 249 | 250 | return rotz,roty,rotx,transl_x,transl_y,transl_z,label 251 | 252 | def get_ground_truth_poses(self, sequence, frame): 253 | return self.GTs_T[sequence][frame], self.GTs_R[sequence][frame] 254 | 255 | def custom_transform(self, rgb, img_rotation=0., flip=False): 256 | to_tensor = transforms.ToTensor() 257 | normalization = transforms.Normalize(mean=[0.485, 0.456, 0.406], 258 | std=[0.229, 0.224, 0.225]) 259 | 260 | #rgb = crop(rgb) 261 | if self.split == 'train': 262 | color_transform = transforms.ColorJitter(0.1, 0.1, 0.1) 263 | rgb = color_transform(rgb) 264 | if flip: 265 | rgb = TTF.hflip(rgb) 266 | rgb = TTF.rotate(rgb, img_rotation) 267 | #io.imshow(np.array(rgb)) 268 | #io.show() 269 | 270 | rgb = to_tensor(rgb) 271 | rgb = normalization(rgb) 272 | return rgb 273 | 274 | def __len__(self): 275 | return len(self.all_files) 276 | 277 | def __getitem__(self, idx): 278 | item = self.all_files[idx] 279 | seq = str(item.split('/')[0]) 280 | rgb_name = str(item.split('/')[1]) 281 | img_path = os.path.join(self.root_dir, 'sequences', seq, 'image_2', rgb_name+self.suf) 282 | lidar_path = os.path.join(self.root_dir, 'sequences', seq, 'velodyne', rgb_name+'.bin') 283 | lidar_scan = np.fromfile(lidar_path, dtype=np.float32) 284 | pc = lidar_scan.reshape((-1, 4)) 285 | valid_indices = pc[:, 0] < -3. 286 | valid_indices = valid_indices | (pc[:, 0] > 3.) 287 | valid_indices = valid_indices | (pc[:, 1] < -3.) 288 | valid_indices = valid_indices | (pc[:, 1] > 3.) 289 | pc = pc[valid_indices].copy() 290 | pc_org = torch.from_numpy(pc.astype(np.float32)) 291 | # if self.use_reflectance: 292 | # reflectance = pc[:, 3].copy() 293 | # reflectance = torch.from_numpy(reflectance).float() 294 | 295 | RT = self.GTs_T_cam02_velo[seq].astype(np.float32) 296 | 297 | if pc_org.shape[1] == 4 or pc_org.shape[1] == 3: 298 | pc_org = pc_org.t() 299 | if pc_org.shape[0] == 3: 300 | homogeneous = torch.ones(pc_org.shape[1]).unsqueeze(0) 301 | pc_org = torch.cat((pc_org, homogeneous), 0) 302 | elif pc_org.shape[0] == 4: 303 | if not torch.all(pc_org[3, :] == 1.): 304 | pc_org[3, :] = 1. 305 | else: 306 | raise TypeError("Wrong PointCloud shape") 307 | 308 | pc_rot = np.matmul(RT, pc_org.numpy()) 309 | pc_rot = pc_rot.astype(np.float32).copy() 310 | pc_in = torch.from_numpy(pc_rot) 311 | 312 | # pc_rot = np.matmul(RT, pc.T) 313 | # pc_rot = pc_rot.astype(np.float).T.copy() 314 | # pc_in = torch.from_numpy(pc_rot.astype(np.float32))#.float() 315 | 316 | # if pc_in.shape[1] == 4 or pc_in.shape[1] == 3: 317 | # pc_in = pc_in.t() 318 | # if pc_in.shape[0] == 3: 319 | # homogeneous = torch.ones(pc_in.shape[1]).unsqueeze(0) 320 | # pc_in = torch.cat((pc_in, homogeneous), 0) 321 | # elif pc_in.shape[0] == 4: 322 | # if not torch.all(pc_in[3,:] == 1.): 323 | # pc_in[3,:] = 1. 324 | # else: 325 | # raise TypeError("Wrong PointCloud shape") 326 | 327 | h_mirror = False 328 | # if np.random.rand() > 0.5 and self.split == 'train': 329 | # h_mirror = True 330 | # pc_in[1, :] *= -1 331 | 332 | img = Image.open(img_path) 333 | # img = cv2.imread(img_path) 334 | img_rotation = 0. 335 | # if self.split == 'train': 336 | # img_rotation = np.random.uniform(-5, 5) 337 | try: 338 | img = self.custom_transform(img, img_rotation, h_mirror) 339 | except OSError: 340 | new_idx = np.random.randint(0, self.__len__()) 341 | return self.__getitem__(new_idx) 342 | 343 | # Rotate PointCloud for img_rotation 344 | if self.split == 'train': 345 | R = mathutils.Euler((radians(img_rotation), 0, 0), 'XYZ') 346 | T = mathutils.Vector((0., 0., 0.)) 347 | pc_in = rotate_forward(pc_in, R, T) 348 | 349 | if self.split == 'train': 350 | # max_angle = self.max_r 351 | # rotz = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) 352 | # roty = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) 353 | # rotx = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) 354 | # transl_x = np.random.uniform(-self.max_t, self.max_t) 355 | # transl_y = np.random.uniform(-self.max_t, self.max_t) 356 | # transl_z = np.random.uniform(-self.max_t, self.max_t) 357 | # transl_z = np.random.uniform(-self.max_t, min(self.max_t, 1.)) 358 | data_seed = random.randint(1,10) #大于5数据为配准失败(负样本),小于等于5为配准成功(正样本) 359 | rotz,roty,rotx,transl_x,transl_y,transl_z,label = self.generate_initial_transformation(data_seed) 360 | # initial_RT = self.train_RT[idx] 361 | 362 | # rotz = initial_RT[4] 363 | # roty = initial_RT[3] 364 | # rotx = initial_RT[2] 365 | # transl_x = initial_RT[5] 366 | # transl_y = initial_RT[6] 367 | # transl_z = initial_RT[7] 368 | # label = initial_RT[8] 369 | else: 370 | initial_RT = self.val_RT[idx] 371 | # print("initial_RT:",initial_RT) 372 | rotz = initial_RT[2] 373 | roty = initial_RT[1] 374 | rotx = initial_RT[0] 375 | transl_x = initial_RT[3] 376 | transl_y = initial_RT[4] 377 | transl_z = initial_RT[5] 378 | label = initial_RT[6] 379 | 380 | # 随机设置一定范围内的标定参数扰动值 381 | # train的时候每次都随机生成,每个epoch使用不同的参数 382 | # test则在初始化的时候提前设置好,每个epoch都使用相同的参数 383 | R = mathutils.Euler((rotx, roty, rotz), 'XYZ') 384 | T = mathutils.Vector((transl_x, transl_y, transl_z)) 385 | 386 | R, T = invert_pose(R, T) 387 | R, T = torch.tensor(R), torch.tensor(T) 388 | 389 | 390 | # # 正样本也需要R,T,但都设置为0 391 | # R_pos = mathutils.Euler((0.0, 0.0, 0.0), 'XYZ') 392 | # T_pos = mathutils.Vector((0.0, 0.0, 0.0)) 393 | 394 | # R_pos, T_pos = invert_pose(R_pos, T_pos) 395 | # R_pos, T_pos = torch.tensor(R_pos), torch.tensor(T_pos) 396 | #io.imshow(depth_img.numpy(), cmap='jet') 397 | #io.show() 398 | calib = self.K[seq] 399 | if h_mirror: 400 | calib[2] = (img.shape[2] / 2)*2 - calib[2] 401 | 402 | if self.split == 'test': 403 | sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib, 404 | 'tr_error': T, 'rot_error': R, 'seq': int(seq), 'img_path': img_path, 405 | 'rgb_name': rgb_name + '.png', 'item': item, 'extrin': RT,'label':label, 406 | 'initial_RT': initial_RT} 407 | else: 408 | sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib, 409 | 'tr_error': T, 'rot_error': R, 410 | # 'tr_pos': T_pos, 'rot_pos': R_pos, 411 | 'label_pos': 0, 'label_nag': 1, 'label':label, 412 | 'seq': int(seq), 413 | 'rgb_name': rgb_name, 'item': item, 'extrin': RT} 414 | 415 | return sample 416 | 417 | 418 | class DatasetLidarCameraKittiRaw(Dataset): 419 | 420 | def __init__(self, dataset_dir, transform=None, augmentation=False, use_reflectance=False, 421 | max_t=1.5, max_r=15.0, split='val', device='cpu', val_sequence='2011_09_26_drive_0117_sync'): 422 | super(DatasetLidarCameraKittiRaw, self).__init__() 423 | self.use_reflectance = use_reflectance 424 | self.maps_folder = '' 425 | self.device = device 426 | self.max_r = max_r 427 | self.max_t = max_t 428 | self.augmentation = augmentation 429 | self.root_dir = dataset_dir 430 | self.transform = transform 431 | self.split = split 432 | self.GTs_R = {} 433 | self.GTs_T = {} 434 | self.GTs_T_cam02_velo = {} 435 | self.max_depth = 80 436 | self.K_list = {} 437 | 438 | self.all_files = [] 439 | date_list = ['2011_09_26', '2011_09_28', '2011_09_29', '2011_09_30', '2011_10_03'] 440 | data_drive_list = ['0001', '0002', '0004', '0016', '0027'] 441 | self.calib_date = {} 442 | 443 | for i in range(len(date_list)): 444 | date = date_list[i] 445 | data_drive = data_drive_list[i] 446 | data = pykitti.raw(self.root_dir, date, data_drive) 447 | calib = {'K2': data.calib.K_cam2, 'K3': data.calib.K_cam3, 448 | 'RT2': data.calib.T_cam2_velo, 'RT3': data.calib.T_cam3_velo} 449 | self.calib_date[date] = calib 450 | 451 | # date = val_sequence[:10] 452 | # seq = val_sequence 453 | # image_list = os.listdir(os.path.join(dataset_dir, date, seq, 'image_02/data')) 454 | # image_list.sort() 455 | # 456 | # for image_name in image_list: 457 | # if not os.path.exists(os.path.join(dataset_dir, date, seq, 'velodyne_points/data', 458 | # str(image_name.split('.')[0]) + '.bin')): 459 | # continue 460 | # if not os.path.exists(os.path.join(dataset_dir, date, seq, 'image_02/data', 461 | # str(image_name.split('.')[0]) + '.jpg')): # png 462 | # continue 463 | # self.all_files.append(os.path.join(date, seq, 'image_02/data', image_name.split('.')[0])) 464 | 465 | date = val_sequence[:10] 466 | test_list = ['2011_09_26_drive_0005_sync', '2011_09_26_drive_0070_sync', '2011_10_03_drive_0027_sync'] 467 | seq_list = os.listdir(os.path.join(self.root_dir, date)) 468 | 469 | for seq in seq_list: 470 | if not os.path.isdir(os.path.join(dataset_dir, date, seq)): 471 | continue 472 | image_list = os.listdir(os.path.join(dataset_dir, date, seq, 'image_02/data')) 473 | image_list.sort() 474 | 475 | for image_name in image_list: 476 | if not os.path.exists(os.path.join(dataset_dir, date, seq, 'velodyne_points/data', 477 | str(image_name.split('.')[0])+'.bin')): 478 | continue 479 | if not os.path.exists(os.path.join(dataset_dir, date, seq, 'image_02/data', 480 | str(image_name.split('.')[0])+'.jpg')): # png 481 | continue 482 | if seq == val_sequence and (not split == 'train'): 483 | self.all_files.append(os.path.join(date, seq, 'image_02/data', image_name.split('.')[0])) 484 | elif (not seq == val_sequence) and split == 'train' and seq not in test_list: 485 | self.all_files.append(os.path.join(date, seq, 'image_02/data', image_name.split('.')[0])) 486 | 487 | self.val_RT = [] 488 | if split == 'val' or split == 'test': 489 | val_RT_file = os.path.join(dataset_dir, 490 | f'val_RT_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv') 491 | if os.path.exists(val_RT_file): 492 | print(f'VAL SET: Using this file: {val_RT_file}') 493 | df_test_RT = pd.read_csv(val_RT_file, sep=',') 494 | for index, row in df_test_RT.iterrows(): 495 | self.val_RT.append(list(row)) 496 | else: 497 | print(f'TEST SET - Not found: {val_RT_file}') 498 | print("Generating a new one") 499 | val_RT_file = open(val_RT_file, 'w') 500 | val_RT_file = csv.writer(val_RT_file, delimiter=',') 501 | val_RT_file.writerow(['id', 'tx', 'ty', 'tz', 'rx', 'ry', 'rz']) 502 | for i in range(len(self.all_files)): 503 | rotz = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0) 504 | roty = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0) 505 | rotx = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0) 506 | transl_x = np.random.uniform(-max_t, max_t) 507 | transl_y = np.random.uniform(-max_t, max_t) 508 | transl_z = np.random.uniform(-max_t, max_t) 509 | # transl_z = np.random.uniform(-max_t, min(max_t, 1.)) 510 | val_RT_file.writerow([i, transl_x, transl_y, transl_z, 511 | rotx, roty, rotz]) 512 | self.val_RT.append([float(i), transl_x, transl_y, transl_z, 513 | rotx, roty, rotz]) 514 | 515 | assert len(self.val_RT) == len(self.all_files), "Something wrong with test RTs" 516 | 517 | def get_ground_truth_poses(self, sequence, frame): 518 | return self.GTs_T[sequence][frame], self.GTs_R[sequence][frame] 519 | 520 | def custom_transform(self, rgb, img_rotation=0., flip=False): 521 | to_tensor = transforms.ToTensor() 522 | normalization = transforms.Normalize(mean=[0.485, 0.456, 0.406], 523 | std=[0.229, 0.224, 0.225]) 524 | 525 | #rgb = crop(rgb) 526 | if self.split == 'train': 527 | color_transform = transforms.ColorJitter(0.1, 0.1, 0.1) 528 | # color_transform = transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3 / 3.14) 529 | rgb = color_transform(rgb) 530 | if flip: 531 | rgb = TTF.hflip(rgb) 532 | rgb = TTF.rotate(rgb, img_rotation) 533 | #io.imshow(np.array(rgb)) 534 | #io.show() 535 | 536 | rgb = to_tensor(rgb) 537 | rgb = normalization(rgb) 538 | return rgb 539 | 540 | def __len__(self): 541 | return len(self.all_files) 542 | 543 | # self.all_files.append(os.path.join(date, seq, 'image_2/data', image_name.split('.')[0])) 544 | def __getitem__(self, idx): 545 | item = self.all_files[idx] 546 | date = str(item.split('/')[0]) 547 | seq = str(item.split('/')[1]) 548 | rgb_name = str(item.split('/')[4]) 549 | img_path = os.path.join(self.root_dir, date, seq, 'image_02/data', rgb_name+'.jpg') # png 550 | lidar_path = os.path.join(self.root_dir, date, seq, 'velodyne_points/data', rgb_name+'.bin') 551 | lidar_scan = np.fromfile(lidar_path, dtype=np.float32) 552 | pc = lidar_scan.reshape((-1, 4)) 553 | valid_indices = pc[:, 0] < -3. 554 | valid_indices = valid_indices | (pc[:, 0] > 3.) 555 | valid_indices = valid_indices | (pc[:, 1] < -3.) 556 | valid_indices = valid_indices | (pc[:, 1] > 3.) 557 | pc = pc[valid_indices].copy() 558 | pc_lidar = pc.copy() 559 | pc_org = torch.from_numpy(pc.astype(np.float32)) 560 | if self.use_reflectance: 561 | reflectance = pc[:, 3].copy() 562 | reflectance = torch.from_numpy(reflectance).float() 563 | 564 | calib = self.calib_date[date] 565 | RT_cam02 = calib['RT2'].astype(np.float32) 566 | # camera intrinsic parameter 567 | calib_cam02 = calib['K2'] # 3x3 568 | 569 | E_RT = RT_cam02 570 | 571 | if pc_org.shape[1] == 4 or pc_org.shape[1] == 3: 572 | pc_org = pc_org.t() 573 | if pc_org.shape[0] == 3: 574 | homogeneous = torch.ones(pc_org.shape[1]).unsqueeze(0) 575 | pc_org = torch.cat((pc_org, homogeneous), 0) 576 | elif pc_org.shape[0] == 4: 577 | if not torch.all(pc_org[3, :] == 1.): 578 | pc_org[3, :] = 1. 579 | else: 580 | raise TypeError("Wrong PointCloud shape") 581 | 582 | pc_rot = np.matmul(E_RT, pc_org.numpy()) 583 | pc_rot = pc_rot.astype(np.float32).copy() 584 | pc_in = torch.from_numpy(pc_rot) 585 | 586 | h_mirror = False 587 | # if np.random.rand() > 0.5 and self.split == 'train': 588 | # h_mirror = True 589 | # pc_in[0, :] *= -1 590 | 591 | img = Image.open(img_path) 592 | # print(img_path) 593 | # img = cv2.imread(img_path) 594 | img_rotation = 0. 595 | # if self.split == 'train': 596 | # img_rotation = np.random.uniform(-5, 5) 597 | try: 598 | img = self.custom_transform(img, img_rotation, h_mirror) 599 | except OSError: 600 | new_idx = np.random.randint(0, self.__len__()) 601 | return self.__getitem__(new_idx) 602 | 603 | # Rotate PointCloud for img_rotation 604 | # if self.split == 'train': 605 | # R = mathutils.Euler((radians(img_rotation), 0, 0), 'XYZ') 606 | # T = mathutils.Vector((0., 0., 0.)) 607 | # pc_in = rotate_forward(pc_in, R, T) 608 | 609 | if self.split == 'train': 610 | max_angle = self.max_r 611 | rotz = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) 612 | roty = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) 613 | rotx = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0) 614 | transl_x = np.random.uniform(-self.max_t, self.max_t) 615 | transl_y = np.random.uniform(-self.max_t, self.max_t) 616 | transl_z = np.random.uniform(-self.max_t, self.max_t) 617 | # transl_z = np.random.uniform(-self.max_t, min(self.max_t, 1.)) 618 | initial_RT = 0 619 | else: 620 | initial_RT = self.val_RT[idx] 621 | rotz = initial_RT[6] 622 | roty = initial_RT[5] 623 | rotx = initial_RT[4] 624 | transl_x = initial_RT[1] 625 | transl_y = initial_RT[2] 626 | transl_z = initial_RT[3] 627 | 628 | # 随机设置一定范围内的标定参数扰动值 629 | # train的时候每次都随机生成,每个epoch使用不同的参数 630 | # test则在初始化的时候提前设置好,每个epoch都使用相同的参数 631 | R = mathutils.Euler((rotx, roty, rotz), 'XYZ') 632 | T = mathutils.Vector((transl_x, transl_y, transl_z)) 633 | 634 | R, T = invert_pose(R, T) 635 | R, T = torch.tensor(R), torch.tensor(T) 636 | 637 | #io.imshow(depth_img.numpy(), cmap='jet') 638 | #io.show() 639 | calib = calib_cam02 640 | # calib = get_calib_kitti_odom(int(seq)) 641 | if h_mirror: 642 | calib[2] = (img.shape[2] / 2)*2 - calib[2] 643 | 644 | # sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib, 'pc_org': pc_org, 'img_path': img_path, 645 | # 'tr_error': T, 'rot_error': R, 'seq': int(seq), 'rgb_name': rgb_name, 'item': item, 646 | # 'extrin': E_RT, 'initial_RT': initial_RT} 647 | sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib, 'pc_org': pc_org, 'img_path': img_path, 648 | 'tr_error': T, 'rot_error': R, 'rgb_name': rgb_name + '.png', 'item': item, 649 | 'extrin': E_RT, 'initial_RT': initial_RT, 'pc_lidar': pc_lidar} 650 | 651 | return sample 652 | 653 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LiDAR2camera_self_check 2 | ![image](https://github.com/OpenCalib/LiDAR2camera_self-check/blob/master/pictures/pipline.pdf) 3 | ![image](https://github.com/OpenCalib/LiDAR2camera_self-check/blob/master/pictures/input.pdf) 4 | ![image](https://github.com/OpenCalib/LiDAR2camera_self-check/blob/master/pictures/performance.pdf) 5 | ## Table of Contents 6 | 7 | - [Requirements](#Requirements) 8 | - [Evaluation](#Evaluation) 9 | - [Train](#Train) 10 | - [Citation](#Citation) 11 | 12 | ## Requirements 13 | 14 | * python 3.6 (recommend to use [Anaconda](https://www.anaconda.com/)) 15 | * PyTorch==1.0.1.post2 16 | * Torchvision==0.2.2 17 | * Install requirements and dependencies 18 | ```commandline 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | ## Evaluation 23 | 24 | 1. Download [KITTI odometry dataset](http://www.cvlibs.net/datasets/kitti/eval_odometry.php). 25 | 2. Change the path to the dataset in `evaluate_calib.py`. 26 | ```python 27 | data_folder = '/path/to/the/KITTI/odometry_color/' 28 | ``` 29 | 3. Create a folder named `pretrained` to store the pre-trained models in the root path. 30 | 4. Download pre-trained models and modify the weights path in `evaluate_calib.py`. 31 | ```python 32 | weights = [ 33 | # './pretrained/final_checkpoint_r20.00_t1.50_e4_0.094.tar', 34 | # './pretrained/final_checkpoint_r2.00_t0.20_e4_0.228.tar', 35 | # './pretrained/final_checkpoint_r10.00_t1.00_e3_0.108.tar', 36 | './pretrained/final_checkpoint_r5.00_t0.50_e-1_0.145.tar', 37 | ] 38 | ``` 39 | 5. Run evaluation. 40 | ```commandline 41 | python evaluate_calib.py 42 | ``` 43 | 44 | ## Train 45 | ```commandline 46 | python train_with_sacred.py 47 | ``` 48 | 49 | 50 | 51 | ### Acknowledgments 52 | We are grateful to Daniele Cattaneo for his CMRNet [github repository](https://github.com/cattaneod/CMRNet) and LCCNet [github repository](https://github.com/IIPCVLAB/LCCNet). We use them as our initial code base. 53 | 54 | 57 | 58 | --- 59 | -------------------------------------------------------------------------------- /evaluate_calib.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------- 2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab 3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it) 4 | # Released under Creative Commons 5 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ 7 | # ------------------------------------------------------------------- 8 | 9 | # Modified Author: Xudong Lv 10 | # based on github.com/cattaneod/CMRNet/blob/master/evaluate_iterative_single_CALIB.py 11 | 12 | import csv 13 | import random 14 | import open3d as o3 15 | 16 | import cv2 17 | import mathutils 18 | # import matplotlib 19 | # matplotlib.use('Qt5Agg') 20 | import os 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import torch 24 | import torch.nn.functional as F 25 | import torch.nn.parallel 26 | import torch.utils.data 27 | from sacred import Experiment 28 | from sacred.utils import apply_backspaces_and_linefeeds 29 | from skimage import io 30 | from tqdm import tqdm 31 | import time 32 | 33 | from models.LCCNet import LCCNet 34 | from DatasetLidarCamera import DatasetLidarCameraKittiOdometry 35 | 36 | from quaternion_distances import quaternion_distance 37 | from utils import (mat2xyzrpy, merge_inputs, overlay_imgs, quat2mat, 38 | quaternion_from_matrix, rotate_back, rotate_forward, 39 | tvector2mat) 40 | 41 | from torch.utils.data import Dataset 42 | from pykitti import odometry 43 | import pandas as pd 44 | from PIL import Image 45 | from math import radians 46 | from utils import invert_pose 47 | from torchvision import transforms 48 | 49 | 50 | # import matplotlib 51 | # matplotlib.rc("font",family='AR PL UMing CN') 52 | plt.rcParams['axes.unicode_minus'] = False 53 | # plt.rc('font',family='Times New Roman') 54 | font_EN = {'family': 'Times New Roman', 'weight': 'normal', 'size': 16} 55 | font_CN = {'family': 'AR PL UMing CN', 'weight': 'normal', 'size': 16} 56 | plt_size = 10.5 57 | 58 | ex = Experiment("LCCNet-evaluate-iterative") 59 | ex.captured_out_filter = apply_backspaces_and_linefeeds 60 | 61 | os.environ['CUDA_VISIBLE_DEVICES'] = '2' 62 | 63 | # noinspection PyUnusedLocal 64 | @ex.config 65 | def config(): 66 | dataset = 'kitti/odom' 67 | # data_folder = '/home/wangshuo/Datasets/KITTI/odometry_color/' 68 | data_folder = '/mnt/petrelfs/weipengjin/file_trans/LCCNet_dataset/' 69 | test_sequence = 0 70 | use_prev_output = False 71 | max_t = 0.2 72 | max_r = 1.0 73 | occlusion_kernel = 5 74 | occlusion_threshold = 3.0 75 | network = 'Res_f1' 76 | norm = 'bn' 77 | show = False 78 | use_reflectance = False 79 | weight = None # List of weights' path, for iterative refinement 80 | save_name = True 81 | # Set to True only if you use two network, the first for rotation and the second for translation 82 | rot_transl_separated = False 83 | random_initial_pose = False 84 | save_log = False 85 | dropout = 0.0 86 | max_depth = 80. 87 | iterative_method = 'single' # ['multi_range', 'single_range', 'single'] 88 | output = './output' 89 | save_image = False 90 | outlier_filter = False 91 | outlier_filter_th = 10 92 | out_fig_lg = 'EN' # [EN, CN] 93 | 94 | weights = [ 95 | # # './pretrained/final_checkpoint_r20.00_t1.50_e4_0.094.tar', 96 | # # './pretrained/final_checkpoint_r2.00_t0.20_e4_0.228.tar', 97 | # # './pretrained/final_checkpoint_r10.00_t1.00_e3_0.108.tar', 98 | './checkpoints/kitti/odom/val_seq_00/models/checkpoint_r2.00_t0.20_e119_0.108.tar' 99 | ] 100 | 101 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 102 | 103 | EPOCH = 1 104 | 105 | 106 | def _init_fn(worker_id, seed): 107 | seed = seed + worker_id + EPOCH * 100 108 | print(f"Init worker {worker_id} with seed {seed}") 109 | torch.manual_seed(seed) 110 | np.random.seed(seed) 111 | random.seed(seed) 112 | 113 | 114 | def get_2D_lidar_projection(pcl, cam_intrinsic): 115 | pcl_xyz = cam_intrinsic @ pcl.T 116 | pcl_xyz = pcl_xyz.T 117 | pcl_z = pcl_xyz[:, 2] 118 | pcl_xyz = pcl_xyz / (pcl_xyz[:, 2, None] + 1e-10) 119 | pcl_uv = pcl_xyz[:, :2] 120 | 121 | return pcl_uv, pcl_z 122 | 123 | 124 | def lidar_project_depth(pc_rotated, cam_calib, img_shape): 125 | pc_rotated = pc_rotated[:3, :].detach().cpu().numpy() 126 | cam_intrinsic = cam_calib.numpy() 127 | pcl_uv, pcl_z = get_2D_lidar_projection(pc_rotated.T, cam_intrinsic) 128 | mask = (pcl_uv[:, 0] > 0) & (pcl_uv[:, 0] < img_shape[1]) & (pcl_uv[:, 1] > 0) & ( 129 | pcl_uv[:, 1] < img_shape[0]) & (pcl_z > 0) 130 | pcl_uv = pcl_uv[mask] 131 | pcl_z = pcl_z[mask] 132 | pcl_uv = pcl_uv.astype(np.uint32) 133 | pcl_z = pcl_z.reshape(-1, 1) 134 | depth_img = np.zeros((img_shape[0], img_shape[1], 1)) 135 | depth_img[pcl_uv[:, 1], pcl_uv[:, 0]] = pcl_z 136 | depth_img = torch.from_numpy(depth_img.astype(np.float32)) 137 | depth_img = depth_img.cuda() 138 | depth_img = depth_img.permute(2, 0, 1) 139 | pc_valid = pc_rotated.T[mask] 140 | 141 | return depth_img, pcl_uv, pc_valid 142 | 143 | 144 | @ex.automain 145 | def main(_config, seed): 146 | global EPOCH, weights 147 | if _config['weight'] is not None: 148 | weights = _config['weight'] 149 | 150 | # if _config['iterative_method'] == 'single': 151 | # weights = [weights[0]] 152 | 153 | dataset_class = DatasetLidarCameraKittiOdometry 154 | # dataset_class = DatasetTest 155 | img_shape = (384, 1280) 156 | input_size = (256, 512) 157 | 158 | # split = 'test' 159 | if _config['random_initial_pose']: 160 | split = 'test_random' 161 | 162 | 163 | if _config['test_sequence'] is None: 164 | raise TypeError('test_sequences cannot be None') 165 | else: 166 | if isinstance(_config['test_sequence'], int): 167 | _config['test_sequence'] = f"{_config['test_sequence']:02d}" 168 | dataset_val = dataset_class(_config['data_folder'], max_r=_config['max_r'], max_t=_config['max_t'], 169 | split='test', use_reflectance=_config['use_reflectance'], 170 | val_sequence=_config['test_sequence']) 171 | 172 | np.random.seed(seed) 173 | torch.random.manual_seed(seed) 174 | 175 | def init_fn(x): 176 | return _init_fn(x, seed) 177 | 178 | num_worker = 6 179 | batch_size = 1 180 | 181 | 182 | TestImgLoader = torch.utils.data.DataLoader(dataset=dataset_val, 183 | shuffle=False, 184 | batch_size=batch_size, 185 | num_workers=num_worker, 186 | worker_init_fn=init_fn, 187 | collate_fn=merge_inputs, 188 | drop_last=False, 189 | pin_memory=False) 190 | 191 | print(len(TestImgLoader)) 192 | if _config['network'].startswith('Res'): 193 | feat = 1 194 | md = 4 195 | split = _config['network'].split('_') 196 | for item in split[1:]: 197 | if item.startswith('f'): 198 | feat = int(item[-1]) 199 | elif item.startswith('md'): 200 | md = int(item[2:]) 201 | assert 0 < feat < 7, "Feature Number from PWC have to be between 1 and 6" 202 | assert 0 < md, "md must be positive" 203 | 204 | 205 | models = [] # iterative model 206 | for i in range(len(weights)): 207 | # network choice and settings 208 | if _config['network'].startswith('Res'): 209 | feat = 1 210 | md = 4 211 | split = _config['network'].split('_') 212 | for item in split[1:]: 213 | if item.startswith('f'): 214 | feat = int(item[-1]) 215 | elif item.startswith('md'): 216 | md = int(item[2:]) 217 | assert 0 < feat < 7, "Feature Number from PWC have to be between 1 and 6" 218 | assert 0 < md, "md must be positive" 219 | model = LCCNet(input_size, use_feat_from=feat, md=md, 220 | use_reflectance=_config['use_reflectance'], dropout=_config['dropout']) 221 | from thop import profile 222 | input1 = torch.randn(1, 3, 256, 512) 223 | input2 = torch.randn(1, 1, 256, 512) 224 | flops, params = profile(model, inputs=(input1, input2)) 225 | print('FLOPs = ' + str(flops/1000**3) + 'G') 226 | print('Params = ' + str(params/1000**2) + 'M') 227 | else: 228 | raise TypeError("Network unknown") 229 | 230 | checkpoint = torch.load(weights[i], map_location='cpu') 231 | saved_state_dict = checkpoint['state_dict'] 232 | model.load_state_dict(saved_state_dict,False) 233 | model = model.to(device) 234 | model.eval() 235 | models.append(model) 236 | 237 | 238 | if _config['save_log']: 239 | log_file = f'./results_for_paper/log_seq{_config["test_sequence"]}.csv' 240 | log_file = open(log_file, 'w') 241 | log_file = csv.writer(log_file) 242 | header = ['frame'] 243 | for i in range(len(weights) + 1): 244 | header += [f'iter{i}_error_t', f'iter{i}_error_r', f'iter{i}_error_x', f'iter{i}_error_y', 245 | f'iter{i}_error_z', f'iter{i}_error_r', f'iter{i}_error_p', f'iter{i}_error_y'] 246 | log_file.writerow(header) 247 | 248 | show = _config['show'] 249 | # save image to the output path 250 | _config['output'] = os.path.join(_config['output'], _config['iterative_method']) 251 | rgb_path = os.path.join(_config['output'], 'rgb') 252 | if not os.path.exists(rgb_path): 253 | os.makedirs(rgb_path) 254 | depth_path = os.path.join(_config['output'], 'depth') 255 | if not os.path.exists(depth_path): 256 | os.makedirs(depth_path) 257 | input_path = os.path.join(_config['output'], 'input') 258 | if not os.path.exists(input_path): 259 | os.makedirs(input_path) 260 | gt_path = os.path.join(_config['output'], 'gt') 261 | if not os.path.exists(gt_path): 262 | os.makedirs(gt_path) 263 | if _config['out_fig_lg'] == 'EN': 264 | results_path = os.path.join(_config['output'], 'results_en') 265 | elif _config['out_fig_lg'] == 'CN': 266 | results_path = os.path.join(_config['output'], 'results_cn') 267 | if not os.path.exists(results_path): 268 | os.makedirs(results_path) 269 | pred_path = os.path.join(_config['output'], 'pred') 270 | for it in range(len(weights)): 271 | if not os.path.exists(os.path.join(pred_path, 'iteration_'+str(it+1))): 272 | os.makedirs(os.path.join(pred_path, 'iteration_'+str(it+1))) 273 | 274 | # save pointcloud to the output path 275 | pc_lidar_path = os.path.join(_config['output'], 'pointcloud', 'lidar') 276 | if not os.path.exists(pc_lidar_path): 277 | os.makedirs(pc_lidar_path) 278 | pc_input_path = os.path.join(_config['output'], 'pointcloud', 'input') 279 | if not os.path.exists(pc_input_path): 280 | os.makedirs(pc_input_path) 281 | pc_pred_path = os.path.join(_config['output'], 'pointcloud', 'pred') 282 | if not os.path.exists(pc_pred_path): 283 | os.makedirs(pc_pred_path) 284 | 285 | 286 | errors_r = [] 287 | errors_t = [] 288 | errors_t2 = [] 289 | errors_xyz = [] 290 | errors_rpy = [] 291 | all_RTs = [] 292 | mis_calib_list = [] 293 | total_time = 0 294 | 295 | prev_tr_error = None 296 | prev_rot_error = None 297 | 298 | for i in range(len(weights) + 1): 299 | errors_r.append([]) 300 | errors_t.append([]) 301 | errors_t2.append([]) 302 | errors_rpy.append([]) 303 | 304 | correct_samples = 0 305 | wrong_samples = 0 306 | network_results = open("./results"+"_"+str(_config['max_r'])+"_"+str(_config['max_t']), 'a') 307 | network_results = csv.writer(network_results, delimiter=',') 308 | network_results.writerow(['gt', 'predict']) 309 | for batch_idx, sample in enumerate(tqdm(TestImgLoader)): 310 | N = 100 # 500 311 | # if batch_idx > 200: 312 | # break 313 | log_string = [str(batch_idx)] 314 | 315 | lidar_input = [] 316 | rgb_input = [] 317 | lidar_gt = [] 318 | shape_pad_input = [] 319 | real_shape_input = [] 320 | pc_rotated_input = [] 321 | RTs = [] 322 | shape_pad = [0, 0, 0, 0] 323 | outlier_filter = False 324 | 325 | if batch_idx == 0 or not _config['use_prev_output']: 326 | # Qui dare posizione di input del frame corrente rispetto alla GT 327 | sample['tr_error'] = sample['tr_error'].cuda() 328 | sample['rot_error'] = sample['rot_error'].cuda() 329 | else: 330 | sample['tr_error'] = prev_tr_error 331 | sample['rot_error'] = prev_rot_error 332 | sample['label'] = sample['label'].cuda() 333 | for idx in range(len(sample['rgb'])): 334 | # ProjectPointCloud in RT-pose 335 | real_shape = [sample['rgb'][idx].shape[1], sample['rgb'][idx].shape[2], sample['rgb'][idx].shape[0]] 336 | 337 | sample['point_cloud'][idx] = sample['point_cloud'][idx].cuda() # 变换到相机坐标系下的激光雷达点云 338 | pc_lidar = sample['point_cloud'][idx].clone() 339 | 340 | if _config['max_depth'] < 80.: 341 | pc_lidar = pc_lidar[:, pc_lidar[0, :] < _config['max_depth']].clone() 342 | 343 | depth_gt, uv_gt, pc_gt_valid = lidar_project_depth(pc_lidar, sample['calib'][idx], real_shape) # image_shape 344 | depth_gt /= _config['max_depth'] 345 | 346 | if _config['save_image']: 347 | # save the Lidar pointcloud 348 | pcl_lidar = o3.PointCloud() 349 | pc_lidar = pc_lidar.detach().cpu().numpy() 350 | pcl_lidar.points = o3.Vector3dVector(pc_lidar.T[:, :3]) 351 | 352 | # o3.draw_geometries(downpcd) 353 | o3.write_point_cloud(pc_lidar_path + '/{}.pcd'.format(batch_idx), pcl_lidar) 354 | 355 | 356 | R = quat2mat(sample['rot_error'][idx]) 357 | T = tvector2mat(sample['tr_error'][idx]) 358 | RT_inv = torch.mm(T, R) 359 | RT = RT_inv.clone().inverse() 360 | 361 | pc_rotated = rotate_back(sample['point_cloud'][idx], RT_inv) # Pc` = RT * Pc 362 | 363 | if _config['max_depth'] < 80.: 364 | pc_rotated = pc_rotated[:, pc_rotated[0, :] < _config['max_depth']].clone() 365 | 366 | depth_img, uv_input, pc_input_valid = lidar_project_depth(pc_rotated, sample['calib'][idx], real_shape) # image_shape 367 | depth_img /= _config['max_depth'] 368 | 369 | if _config['outlier_filter'] and uv_input.shape[0] <= _config['outlier_filter_th']: 370 | outlier_filter = True 371 | else: 372 | outlier_filter = False 373 | 374 | if _config['save_image']: 375 | # save the RGB input pointcloud 376 | img = cv2.imread(sample['img_path'][0]) 377 | R = img[uv_input[:, 1], uv_input[:, 0], 0] / 255 378 | G = img[uv_input[:, 1], uv_input[:, 0], 1] / 255 379 | B = img[uv_input[:, 1], uv_input[:, 0], 2] / 255 380 | pcl_input = o3.PointCloud() 381 | pcl_input.points = o3.Vector3dVector(pc_input_valid[:, :3]) 382 | pcl_input.colors = o3.Vector3dVector(np.vstack((R, G, B)).T) 383 | 384 | # o3.draw_geometries(downpcd) 385 | o3.write_point_cloud(pc_input_path + '/{}.pcd'.format(batch_idx), pcl_input) 386 | 387 | # PAD ONLY ON RIGHT AND BOTTOM SIDE 388 | rgb = sample['rgb'][idx].cuda() 389 | shape_pad = [0, 0, 0, 0] 390 | 391 | shape_pad[3] = (img_shape[0] - rgb.shape[1]) # // 2 392 | shape_pad[1] = (img_shape[1] - rgb.shape[2]) # // 2 + 1 393 | 394 | rgb = F.pad(rgb, shape_pad) 395 | depth_img = F.pad(depth_img, shape_pad) 396 | depth_gt = F.pad(depth_gt, shape_pad) 397 | 398 | rgb_input.append(rgb) 399 | lidar_input.append(depth_img) 400 | lidar_gt.append(depth_gt) 401 | real_shape_input.append(real_shape) 402 | shape_pad_input.append(shape_pad) 403 | pc_rotated_input.append(pc_rotated) 404 | RTs.append(RT) 405 | 406 | if outlier_filter: 407 | continue 408 | 409 | lidar_input = torch.stack(lidar_input) 410 | rgb_input = torch.stack(rgb_input) 411 | rgb_resize = F.interpolate(rgb_input, size=[256, 512], mode="bilinear") 412 | lidar_resize = F.interpolate(lidar_input, size=[256, 512], mode="bilinear") 413 | 414 | 415 | if _config['save_image']: 416 | out0 = overlay_imgs(rgb_input[0], lidar_input) 417 | out0 = out0[:376, :1241, :] 418 | cv2.imwrite(os.path.join(input_path, sample['rgb_name'][0]), out0[:, :, [2, 1, 0]]*255) 419 | out1 = overlay_imgs(rgb_input[0], lidar_gt[0].unsqueeze(0)) 420 | out1 = out1[:376, :1241, :] 421 | cv2.imwrite(os.path.join(gt_path, sample['rgb_name'][0]), out1[:, :, [2, 1, 0]]*255) 422 | 423 | depth_img = depth_img.detach().cpu().numpy() 424 | depth_img = (depth_img / np.max(depth_img)) * 255 425 | cv2.imwrite(os.path.join(depth_path, sample['rgb_name'][0]), depth_img[0, :376, :1241]) 426 | 427 | if show: 428 | out0 = overlay_imgs(rgb_input[0], lidar_input) 429 | out1 = overlay_imgs(rgb_input[0], lidar_gt[0].unsqueeze(0)) 430 | cv2.imshow("INPUT", out0[:, :, [2, 1, 0]]) 431 | cv2.imshow("GT", out1[:, :, [2, 1, 0]]) 432 | cv2.waitKey(1) 433 | 434 | rgb = rgb_input.to(device) 435 | lidar = lidar_input.to(device) 436 | rgb_resize = rgb_resize.to(device) 437 | lidar_resize = lidar_resize.to(device) 438 | 439 | target_transl = sample['tr_error'].to(device) 440 | target_rot = sample['rot_error'].to(device) 441 | 442 | # the initial calibration errors before sensor calibration 443 | RT1 = RTs[0] 444 | mis_calib = torch.stack(sample['initial_RT'])[1:] 445 | mis_calib_list.append(mis_calib) 446 | 447 | T_composed = RT1[:3, 3] 448 | R_composed = quaternion_from_matrix(RT1) 449 | errors_t[0].append(T_composed.norm().item()) 450 | errors_t2[0].append(T_composed) 451 | errors_r[0].append(quaternion_distance(R_composed.unsqueeze(0), 452 | torch.tensor([1., 0., 0., 0.], device=R_composed.device).unsqueeze(0), 453 | R_composed.device)) 454 | rpy_error = mat2xyzrpy(RT1)[3:] 455 | 456 | rpy_error *= (180.0 / 3.141592) 457 | errors_rpy[0].append(rpy_error) 458 | log_string += [str(errors_t[0][-1]), str(errors_r[0][-1]), str(errors_t2[0][-1][0].item()), 459 | str(errors_t2[0][-1][1].item()), str(errors_t2[0][-1][2].item()), 460 | str(errors_rpy[0][-1][0].item()), str(errors_rpy[0][-1][1].item()), 461 | str(errors_rpy[0][-1][2].item())] 462 | 463 | start = 0 464 | # t1 = time.time() 465 | 466 | # Run model 467 | with torch.no_grad(): 468 | for iteration in range(start, len(weights)): 469 | # Run the i-th network 470 | t1 = time.time() 471 | classification_results = models[iteration](rgb_resize, lidar_resize) 472 | run_time = time.time() - t1 473 | _, classify_reaults = torch.max(classification_results,dim=1) 474 | statistic_results = torch.eq(classify_reaults,sample['label'].long()) 475 | correct_samples += torch.sum(statistic_results, dim=0) 476 | wrong_samples += statistic_results.shape[0] - torch.sum(statistic_results, dim=0) 477 | for i in range(classify_reaults.shape[0]): 478 | network_results.writerow([str(int(sample['label'][i].cpu().numpy())), str(classify_reaults[i].cpu().numpy())]) 479 | 480 | R_predicted = torch.tensor([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]]).float().cuda() 481 | T_predicted = torch.tensor([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]]).float().cuda() 482 | RT_predicted = torch.mm(T_predicted, R_predicted) 483 | RTs.append(torch.mm(RTs[iteration], RT_predicted)) # inv(H_gt)*H_pred_1*H_pred_2*.....H_pred_n 484 | if iteration == 0: 485 | rotated_point_cloud = pc_rotated_input[0] 486 | else: 487 | rotated_point_cloud = rotated_point_cloud 488 | 489 | rotated_point_cloud = rotate_forward(rotated_point_cloud, RT_predicted) # H_pred*X_init 490 | 491 | depth_img_pred, uv_pred, pc_pred_valid = lidar_project_depth(rotated_point_cloud, sample['calib'][0], real_shape_input[0]) # image_shape 492 | depth_img_pred /= _config['max_depth'] 493 | depth_pred = F.pad(depth_img_pred, shape_pad_input[0]) 494 | lidar = depth_pred.unsqueeze(0) 495 | lidar_resize = F.interpolate(lidar, size=[256, 512], mode="bilinear") 496 | 497 | if iteration == len(weights)-1 and _config['save_image']: 498 | # save the RGB pointcloud 499 | img = cv2.imread(sample['img_path'][0]) 500 | R = img[uv_pred[:, 1], uv_pred[:, 0], 0] / 255 501 | G = img[uv_pred[:, 1], uv_pred[:, 0], 1] / 255 502 | B = img[uv_pred[:, 1], uv_pred[:, 0], 2] / 255 503 | pcl_pred = o3.PointCloud() 504 | pcl_pred.points = o3.Vector3dVector(pc_pred_valid[:, :3]) 505 | pcl_pred.colors = o3.Vector3dVector(np.vstack((R, G, B)).T) 506 | 507 | # o3.draw_geometries(downpcd) 508 | o3.write_point_cloud(pc_pred_path + '/{}.pcd'.format(batch_idx), pcl_pred) 509 | 510 | 511 | if _config['save_image']: 512 | out2 = overlay_imgs(rgb_input[0], lidar) 513 | out2 = out2[:376, :1241, :] 514 | cv2.imwrite(os.path.join(os.path.join(pred_path, 'iteration_'+str(iteration+1)), 515 | sample['rgb_name'][0]), out2[:, :, [2, 1, 0]]*255) 516 | if show: 517 | out2 = overlay_imgs(rgb_input[0], lidar) 518 | cv2.imshow(f'Pred_Iter_{iteration}', out2[:, :, [2, 1, 0]]) 519 | cv2.waitKey(1) 520 | 521 | # inv(H_init)*H_pred 522 | T_composed = RTs[iteration + 1][:3, 3] 523 | R_composed = quaternion_from_matrix(RTs[iteration + 1]) 524 | errors_t[iteration + 1].append(T_composed.norm().item()) 525 | errors_t2[iteration + 1].append(T_composed) 526 | errors_r[iteration + 1].append(quaternion_distance(R_composed.unsqueeze(0), 527 | torch.tensor([1., 0., 0., 0.], device=R_composed.device).unsqueeze(0), 528 | R_composed.device)) 529 | 530 | # rpy_error = quaternion_to_tait_bryan(R_composed) 531 | rpy_error = mat2xyzrpy(RTs[iteration + 1])[3:] 532 | rpy_error *= (180.0 / 3.141592) 533 | errors_rpy[iteration + 1].append(rpy_error) 534 | log_string += [str(errors_t[iteration + 1][-1]), str(errors_r[iteration + 1][-1]), 535 | str(errors_t2[iteration + 1][-1][0].item()), str(errors_t2[iteration + 1][-1][1].item()), 536 | str(errors_t2[iteration + 1][-1][2].item()), str(errors_rpy[iteration + 1][-1][0].item()), 537 | str(errors_rpy[iteration + 1][-1][1].item()), str(errors_rpy[iteration + 1][-1][2].item())] 538 | 539 | # run_time = time.time() - t1 540 | total_time += run_time 541 | 542 | # final calibration error 543 | all_RTs.append(RTs[-1]) 544 | prev_RT = RTs[-1].inverse() 545 | prev_tr_error = prev_RT[:3, 3].unsqueeze(0) 546 | prev_rot_error = quaternion_from_matrix(prev_RT).unsqueeze(0) 547 | 548 | if _config['save_log']: 549 | log_file.writerow(log_string) 550 | 551 | # Yaw(偏航):欧拉角向量的y轴 552 | # Pitch(俯仰):欧拉角向量的x轴 553 | # Roll(翻滚): 欧拉角向量的z轴 554 | # mis_calib_input[transl_x, transl_y, transl_z, rotx, roty, rotz] Nx6 555 | mis_calib_input = torch.stack(mis_calib_list)[:, :, 0] 556 | 557 | if _config['save_log']: 558 | log_file.close() 559 | print("Iterative refinement: ") 560 | for i in range(len(weights) + 1): 561 | errors_r[i] = torch.tensor(errors_r[i]).abs() * (180.0 / 3.141592) 562 | errors_t[i] = torch.tensor(errors_t[i]).abs() * 100 563 | 564 | for k in range(len(errors_rpy[i])): 565 | # errors_rpy[i][k] = torch.tensor(errors_rpy[i][k]) 566 | # errors_t2[i][k] = torch.tensor(errors_t2[i][k]) * 100 567 | errors_rpy[i][k] = errors_rpy[i][k].clone().detach().abs() 568 | errors_t2[i][k] = errors_t2[i][k].clone().detach().abs() * 100 569 | 570 | print(f"Iteration {i}: \tMean Translation Error: {errors_t[i].mean():.4f} cm " 571 | f" Mean Rotation Error: {errors_r[i].mean():.4f} °") 572 | print(f"Iteration {i}: \tMedian Translation Error: {errors_t[i].median():.4f} cm " 573 | f" Median Rotation Error: {errors_r[i].median():.4f} °") 574 | print(f"Iteration {i}: \tStd. Translation Error: {errors_t[i].std():.4f} cm " 575 | f" Std. Rotation Error: {errors_r[i].std():.4f} °\n") 576 | 577 | # translation xyz 578 | print(f"Iteration {i}: \tMean Translation X Error: {errors_t2[i][0].mean():.4f} cm " 579 | f" Median Translation X Error: {errors_t2[i][0].median():.4f} cm " 580 | f" Std. Translation X Error: {errors_t2[i][0].std():.4f} cm ") 581 | print(f"Iteration {i}: \tMean Translation Y Error: {errors_t2[i][1].mean():.4f} cm " 582 | f" Median Translation Y Error: {errors_t2[i][1].median():.4f} cm " 583 | f" Std. Translation Y Error: {errors_t2[i][1].std():.4f} cm ") 584 | print(f"Iteration {i}: \tMean Translation Z Error: {errors_t2[i][2].mean():.4f} cm " 585 | f" Median Translation Z Error: {errors_t2[i][2].median():.4f} cm " 586 | f" Std. Translation Z Error: {errors_t2[i][2].std():.4f} cm \n") 587 | 588 | # rotation rpy 589 | print(f"Iteration {i}: \tMean Rotation Roll Error: {errors_rpy[i][0].mean(): .4f} °" 590 | f" Median Rotation Roll Error: {errors_rpy[i][0].median():.4f} °" 591 | f" Std. Rotation Roll Error: {errors_rpy[i][0].std():.4f} °") 592 | print(f"Iteration {i}: \tMean Rotation Pitch Error: {errors_rpy[i][1].mean(): .4f} °" 593 | f" Median Rotation Pitch Error: {errors_rpy[i][1].median():.4f} °" 594 | f" Std. Rotation Pitch Error: {errors_rpy[i][1].std():.4f} °") 595 | print(f"Iteration {i}: \tMean Rotation Yaw Error: {errors_rpy[i][2].mean(): .4f} °" 596 | f" Median Rotation Yaw Error: {errors_rpy[i][2].median():.4f} °" 597 | f" Std. Rotation Yaw Error: {errors_rpy[i][2].std():.4f} °\n") 598 | 599 | 600 | with open(os.path.join(_config['output'], 'results.txt'), 601 | 'a', encoding='utf-8') as f: 602 | f.write(f"Iteration {i}: \n") 603 | f.write("Translation Error && Rotation Error:\n") 604 | f.write(f"Iteration {i}: \tMean Translation Error: {errors_t[i].mean():.4f} cm " 605 | f" Mean Rotation Error: {errors_r[i].mean():.4f} °\n") 606 | f.write(f"Iteration {i}: \tMedian Translation Error: {errors_t[i].median():.4f} cm " 607 | f" Median Rotation Error: {errors_r[i].median():.4f} °\n") 608 | f.write(f"Iteration {i}: \tStd. Translation Error: {errors_t[i].std():.4f} cm " 609 | f" Std. Rotation Error: {errors_r[i].std():.4f} °\n\n") 610 | 611 | # translation xyz 612 | f.write("Translation Error XYZ:\n") 613 | f.write(f"Iteration {i}: \tMean Translation X Error: {errors_t2[i][0].mean():.4f} cm " 614 | f" Median Translation X Error: {errors_t2[i][0].median():.4f} cm " 615 | f" Std. Translation X Error: {errors_t2[i][0].std():.4f} cm \n") 616 | f.write(f"Iteration {i}: \tMean Translation Y Error: {errors_t2[i][1].mean():.4f} cm " 617 | f" Median Translation Y Error: {errors_t2[i][1].median():.4f} cm " 618 | f" Std. Translation Y Error: {errors_t2[i][1].std():.4f} cm \n") 619 | f.write(f"Iteration {i}: \tMean Translation Z Error: {errors_t2[i][2].mean():.4f} cm " 620 | f" Median Translation Z Error: {errors_t2[i][2].median():.4f} cm " 621 | f" Std. Translation Z Error: {errors_t2[i][2].std():.4f} cm \n\n") 622 | 623 | # rotation rpy 624 | f.write("Rotation Error RPY:\n") 625 | f.write(f"Iteration {i}: \tMean Rotation Roll Error: {errors_rpy[i][0].mean(): .4f} °" 626 | f" Median Rotation Roll Error: {errors_rpy[i][0].median():.4f} °" 627 | f" Std. Rotation Roll Error: {errors_rpy[i][0].std():.4f} °\n") 628 | f.write(f"Iteration {i}: \tMean Rotation Pitch Error: {errors_rpy[i][1].mean(): .4f} °" 629 | f" Median Rotation Pitch Error: {errors_rpy[i][1].median():.4f} °" 630 | f" Std. Rotation Pitch Error: {errors_rpy[i][1].std():.4f} °\n") 631 | f.write(f"Iteration {i}: \tMean Rotation Yaw Error: {errors_rpy[i][2].mean(): .4f} °" 632 | f" Median Rotation Yaw Error: {errors_rpy[i][2].median():.4f} °" 633 | f" Std. Rotation Yaw Error: {errors_rpy[i][2].std():.4f} °\n\n\n") 634 | 635 | for i in range(len(errors_t2)): 636 | errors_t2[i] = torch.stack(errors_t2[i]).abs() / 100 637 | errors_rpy[i] = torch.stack(errors_rpy[i]).abs() 638 | 639 | plot_x = np.zeros((mis_calib_input.shape[0], 2)) 640 | plot_x[:, 0] = mis_calib_input[:, 0].cpu().numpy() 641 | plot_x[:, 1] = errors_t2[-1][:, 0].cpu().numpy() 642 | plot_x = plot_x[np.lexsort(plot_x[:, ::-1].T)] 643 | 644 | plot_y = np.zeros((mis_calib_input.shape[0], 2)) 645 | plot_y[:, 0] = mis_calib_input[:, 1].cpu().numpy() 646 | plot_y[:, 1] = errors_t2[-1][:, 1].cpu().numpy() 647 | plot_y = plot_y[np.lexsort(plot_y[:, ::-1].T)] 648 | 649 | plot_z = np.zeros((mis_calib_input.shape[0], 2)) 650 | plot_z[:, 0] = mis_calib_input[:, 2].cpu().numpy() 651 | plot_z[:, 1] = errors_t2[-1][:, 2].cpu().numpy() 652 | plot_z = plot_z[np.lexsort(plot_z[:, ::-1].T)] 653 | 654 | N_interval = plot_x.shape[0] // N 655 | plot_x = plot_x[::N_interval] 656 | plot_y = plot_y[::N_interval] 657 | plot_z = plot_z[::N_interval] 658 | 659 | plt.plot(plot_x[:, 0], plot_x[:, 1], c='red', label='X') 660 | plt.plot(plot_y[:, 0], plot_y[:, 1], c='blue', label='Y') 661 | plt.plot(plot_z[:, 0], plot_z[:, 1], c='green', label='Z') 662 | # plt.legend(loc='best') 663 | 664 | if _config['out_fig_lg'] == 'EN': 665 | plt.xlabel('Miscalibration (m)', font_EN) 666 | plt.ylabel('Absolute Error (m)', font_EN) 667 | plt.legend(loc='best', prop=font_EN) 668 | elif _config['out_fig_lg'] == 'CN': 669 | plt.xlabel('初始标定外参偏差/米', font_CN) 670 | plt.ylabel('绝对误差/米', font_CN) 671 | plt.legend(loc='best', prop=font_CN) 672 | 673 | plt.xticks(fontproperties='Times New Roman', size=plt_size) 674 | plt.yticks(fontproperties='Times New Roman', size=plt_size) 675 | 676 | plt.savefig(os.path.join(results_path, 'xyz_plot.png')) 677 | plt.close('all') 678 | 679 | errors_t = errors_t[-1].numpy() 680 | errors_t = np.sort(errors_t, axis=0)[:-10] # 去掉一些异常值 681 | # plt.title('Calibration Translation Error Distribution') 682 | plt.hist(errors_t / 100, bins=50) 683 | # ax = plt.gca() 684 | # ax.set_xlabel('Absolute Translation Error (m)') 685 | # ax.set_ylabel('Number of instances') 686 | # ax.set_xticks([0.00, 0.25, 0.00, 0.25, 0.50]) 687 | 688 | if _config['out_fig_lg'] == 'EN': 689 | plt.xlabel('Absolute Translation Error (m)', font_EN) 690 | plt.ylabel('Number of instances', font_EN) 691 | elif _config['out_fig_lg'] == 'CN': 692 | plt.xlabel('绝对平移误差/米', font_CN) 693 | plt.ylabel('实验序列数目/个', font_CN) 694 | plt.xticks(fontproperties='Times New Roman', size=plt_size) 695 | plt.yticks(fontproperties='Times New Roman', size=plt_size) 696 | 697 | plt.savefig(os.path.join(results_path, 'translation_error_distribution.png')) 698 | plt.close('all') 699 | 700 | # rotation error 701 | # fig = plt.figure(figsize=(6, 3)) # 设置图大小 figsize=(6,3) 702 | # plt.title('Calibration Rotation Error') 703 | plot_pitch = np.zeros((mis_calib_input.shape[0], 2)) 704 | plot_pitch[:, 0] = mis_calib_input[:, 3].cpu().numpy() * (180.0 / 3.141592) 705 | plot_pitch[:, 1] = errors_rpy[-1][:, 1].cpu().numpy() 706 | plot_pitch = plot_pitch[np.lexsort(plot_pitch[:, ::-1].T)] 707 | 708 | plot_yaw = np.zeros((mis_calib_input.shape[0], 2)) 709 | plot_yaw[:, 0] = mis_calib_input[:, 4].cpu().numpy() * (180.0 / 3.141592) 710 | plot_yaw[:, 1] = errors_rpy[-1][:, 2].cpu().numpy() 711 | plot_yaw = plot_yaw[np.lexsort(plot_yaw[:, ::-1].T)] 712 | 713 | plot_roll = np.zeros((mis_calib_input.shape[0], 2)) 714 | plot_roll[:, 0] = mis_calib_input[:, 5].cpu().numpy() * (180.0 / 3.141592) 715 | plot_roll[:, 1] = errors_rpy[-1][:, 0].cpu().numpy() 716 | plot_roll = plot_roll[np.lexsort(plot_roll[:, ::-1].T)] 717 | 718 | N_interval = plot_roll.shape[0] // N 719 | plot_pitch = plot_pitch[::N_interval] 720 | plot_yaw = plot_yaw[::N_interval] 721 | plot_roll = plot_roll[::N_interval] 722 | 723 | # Yaw(偏航):欧拉角向量的y轴 724 | # Pitch(俯仰):欧拉角向量的x轴 725 | # Roll(翻滚): 欧拉角向量的z轴 726 | 727 | if _config['out_fig_lg'] == 'EN': 728 | plt.plot(plot_yaw[:, 0], plot_yaw[:, 1], c='red', label='Yaw(Y)') 729 | plt.plot(plot_pitch[:, 0], plot_pitch[:, 1], c='blue', label='Pitch(X)') 730 | plt.plot(plot_roll[:, 0], plot_roll[:, 1], c='green', label='Roll(Z)') 731 | plt.xlabel('Miscalibration (°)', font_EN) 732 | plt.ylabel('Absolute Error (°)', font_EN) 733 | plt.legend(loc='best', prop=font_EN) 734 | elif _config['out_fig_lg'] == 'CN': 735 | plt.plot(plot_yaw[:, 0], plot_yaw[:, 1], c='red', label='偏航角') 736 | plt.plot(plot_pitch[:, 0], plot_pitch[:, 1], c='blue', label='俯仰角') 737 | plt.plot(plot_roll[:, 0], plot_roll[:, 1], c='green', label='翻滚角') 738 | plt.xlabel('初始标定外参偏差/度', font_CN) 739 | plt.ylabel('绝对误差/度', font_CN) 740 | plt.legend(loc='best', prop=font_CN) 741 | 742 | plt.xticks(fontproperties='Times New Roman', size=plt_size) 743 | plt.yticks(fontproperties='Times New Roman', size=plt_size) 744 | plt.savefig(os.path.join(results_path, 'rpy_plot.png')) 745 | plt.close('all') 746 | 747 | errors_r = errors_r[-1].numpy() 748 | errors_r = np.sort(errors_r, axis=0)[:-10] # 去掉一些异常值 749 | # np.savetxt('rot_error.txt', arr_, fmt='%0.8f') 750 | # print('max rotation_error: {}'.format(max(errors_r))) 751 | # plt.title('Calibration Rotation Error Distribution') 752 | plt.hist(errors_r, bins=50) 753 | #plt.xlim([0, 1.5]) # x轴边界 754 | #plt.xticks([0.0, 0.3, 0.6, 0.9, 1.2, 1.5]) # 设置x刻度 755 | # ax = plt.gca() 756 | 757 | if _config['out_fig_lg'] == 'EN': 758 | plt.xlabel('Absolute Rotation Error (°)', font_EN) 759 | plt.ylabel('Number of instances', font_EN) 760 | elif _config['out_fig_lg'] == 'CN': 761 | plt.xlabel('绝对旋转误差/度', font_CN) 762 | plt.ylabel('实验序列数目/个', font_CN) 763 | plt.xticks(fontproperties='Times New Roman', size=plt_size) 764 | plt.yticks(fontproperties='Times New Roman', size=plt_size) 765 | plt.savefig(os.path.join(results_path, 'rotation_error_distribution.png')) 766 | plt.close('all') 767 | 768 | print("total accuracy:",float(correct_samples)/float(correct_samples + wrong_samples)) 769 | if _config["save_name"] is not None: 770 | torch.save(torch.stack(errors_t).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_t') 771 | torch.save(torch.stack(errors_r).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_r') 772 | torch.save(torch.stack(errors_t2).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_t2') 773 | torch.save(torch.stack(errors_rpy).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_rpy') 774 | 775 | avg_time = total_time / len(TestImgLoader) 776 | print("average runing time on {} iteration: {} s".format(len(weights), avg_time)) 777 | print("End!") 778 | 779 | 780 | -------------------------------------------------------------------------------- /models/LCCNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Original implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018 3 | Jinwei Gu and Zhile Ren 4 | Modified version (CMRNet) by Daniele Cattaneo 5 | Modified version (LCCNet) by Xudong Lv 6 | Modified version (LCCNet) by Pengjin Wei 7 | """ 8 | 9 | import torch 10 | import torchvision 11 | import torchvision.transforms as transforms 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | from torch.autograd import Variable 15 | import torchvision.models as models 16 | import torch.utils.model_zoo as model_zoo 17 | #from models.CMRNet.modules.attention import * 18 | import torch.nn as nn 19 | import torch.nn.functional as F 20 | import torch.optim as optim 21 | import math 22 | import argparse 23 | import os 24 | import os.path 25 | import matplotlib.pyplot as plt 26 | import matplotlib.image as mpimg 27 | from PIL import Image 28 | 29 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' 30 | 31 | # from .networks.submodules import * 32 | # from .networks.correlation_package.correlation import Correlation 33 | from models.correlation_package.correlation import Correlation 34 | 35 | 36 | 37 | # __all__ = [ 38 | # 'calib_net' 39 | # ] 40 | 41 | 42 | class BasicBlock(nn.Module): 43 | expansion = 1 44 | 45 | def __init__(self, inplanes, planes, stride=1, downsample=None): 46 | super(BasicBlock, self).__init__() 47 | self.conv1 = conv3x3(inplanes, planes, stride) 48 | self.bn1 = nn.BatchNorm2d(planes) 49 | self.relu = nn.ReLU(inplace=True) 50 | self.conv2 = conv3x3(planes, planes) 51 | self.bn2 = nn.BatchNorm2d(planes) 52 | self.downsample = downsample 53 | self.stride = stride 54 | 55 | def forward(self, x): 56 | identity = x 57 | 58 | out = self.conv1(x) 59 | out = self.bn1(out) 60 | out = self.relu(out) 61 | 62 | out = self.conv2(out) 63 | out = self.bn2(out) 64 | 65 | if self.downsample is not None: 66 | identity = self.downsample(x) 67 | 68 | out += identity 69 | out = self.relu(out) 70 | 71 | return out 72 | 73 | 74 | class Bottleneck(nn.Module): 75 | expansion = 4 76 | 77 | def __init__(self, inplanes, planes, stride=1, downsample=None): 78 | super(Bottleneck, self).__init__() 79 | self.conv1 = conv1x1(inplanes, planes) 80 | self.bn1 = nn.BatchNorm2d(planes) 81 | self.conv2 = conv3x3(planes, planes, stride) 82 | self.bn2 = nn.BatchNorm2d(planes) 83 | self.conv3 = conv1x1(planes, planes * self.expansion) 84 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 85 | self.elu = nn.ELU() 86 | self.leakyRELU = nn.LeakyReLU(0.1) 87 | self.downsample = downsample 88 | self.stride = stride 89 | 90 | def forward(self, x): 91 | identity = x 92 | 93 | out = self.conv1(x) 94 | out = self.bn1(out) 95 | out = self.leakyRELU(out) 96 | 97 | out = self.conv2(out) 98 | out = self.bn2(out) 99 | out = self.leakyRELU(out) 100 | 101 | out = self.conv3(out) 102 | out = self.bn3(out) 103 | 104 | if self.downsample is not None: 105 | identity = self.downsample(x) 106 | 107 | out += identity 108 | out = self.leakyRELU(out) 109 | 110 | return out 111 | 112 | 113 | class SEBottleneck(nn.Module): 114 | expansion = 4 115 | __constants__ = ['downsample'] 116 | 117 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 118 | base_width=64, dilation=1, norm_layer=None, reduction=16): 119 | super(SEBottleneck, self).__init__() 120 | if norm_layer is None: 121 | norm_layer = nn.BatchNorm2d 122 | width = int(planes * (base_width / 64.)) * groups 123 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 124 | self.conv1 = conv1x1(inplanes, width) 125 | self.bn1 = norm_layer(width) 126 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 127 | self.bn2 = norm_layer(width) 128 | self.conv3 = conv1x1(width, planes * self.expansion) 129 | self.bn3 = norm_layer(planes * self.expansion) 130 | self.relu = nn.ReLU(inplace=True) 131 | self.leakyRELU = nn.LeakyReLU(0.1) 132 | # self.attention = SCSElayer(planes * self.expansion, ratio=reduction) 133 | # self.attention = SElayer(planes * self.expansion, ratio=reduction) 134 | # self.attention = ECAlayer(planes * self.expansion, ratio=reduction) 135 | # self.attention = SElayer_conv(planes * self.expansion, ratio=reduction) 136 | # self.attention = SCSElayer(planes * self.expansion, ratio=reduction) 137 | # self.attention = ModifiedSCSElayer(planes * self.expansion, ratio=reduction) 138 | # self.attention = DPCSAlayer(planes * self.expansion, ratio=reduction) 139 | # self.attention = PAlayer(planes * self.expansion, ratio=reduction) 140 | # self.attention = CAlayer(planes * self.expansion, ratio=reduction) 141 | self.downsample = downsample 142 | self.stride = stride 143 | 144 | def forward(self, x): 145 | identity = x 146 | 147 | out = self.conv1(x) 148 | out = self.bn1(out) 149 | out = self.leakyRELU(out) 150 | 151 | out = self.conv2(out) 152 | out = self.bn2(out) 153 | out = self.leakyRELU(out) 154 | 155 | out = self.conv3(out) 156 | out = self.bn3(out) 157 | out = self.attention(out) 158 | 159 | if self.downsample is not None: 160 | identity = self.downsample(x) 161 | 162 | out += identity 163 | out = self.leakyRELU(out) 164 | 165 | return out 166 | 167 | 168 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 169 | """3x3 convolution with padding""" 170 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 171 | padding=dilation, groups=groups, bias=False, dilation=dilation) 172 | 173 | 174 | def conv1x1(in_planes, out_planes, stride=1): 175 | """1x1 convolution""" 176 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 177 | 178 | 179 | def myconv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1): 180 | return nn.Sequential( 181 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, 182 | groups=1, bias=True), 183 | nn.LeakyReLU(0.1)) 184 | 185 | 186 | def predict_flow(in_planes): 187 | return nn.Conv2d(in_planes, 2, kernel_size=3, stride=1, padding=1, bias=True) 188 | 189 | 190 | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1): 191 | return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True) 192 | 193 | 194 | class ResnetEncoder(nn.Module): 195 | """Pytorch module for a resnet encoder 196 | """ 197 | def __init__(self, num_layers, pretrained, num_input_images=1): 198 | super(ResnetEncoder, self).__init__() 199 | 200 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 201 | 202 | resnets = {18: models.resnet18, 203 | 34: models.resnet34, 204 | 50: models.resnet50, 205 | 101: models.resnet101, 206 | 152: models.resnet152} 207 | 208 | if num_layers not in resnets: 209 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 210 | 211 | self.encoder = resnets[num_layers](pretrained) 212 | 213 | if num_layers > 34: 214 | self.num_ch_enc[1:] *= 4 215 | 216 | def forward(self, input_image): 217 | self.features = [] 218 | x = (input_image - 0.45) / 0.225 219 | x = self.encoder.conv1(x) 220 | x = self.encoder.bn1(x) 221 | self.features.append(self.encoder.relu(x)) 222 | # self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 223 | self.features.append(self.encoder.maxpool(self.features[-1])) 224 | self.features.append(self.encoder.layer1(self.features[-1])) 225 | self.features.append(self.encoder.layer2(self.features[-1])) 226 | self.features.append(self.encoder.layer3(self.features[-1])) 227 | self.features.append(self.encoder.layer4(self.features[-1])) 228 | 229 | return self.features 230 | 231 | 232 | class LCCNet(nn.Module): 233 | """ 234 | Based on the PWC-DC net. add resnet encoder, dilation convolution and densenet connections 235 | """ 236 | 237 | def __init__(self, image_size, use_feat_from=1, md=4, use_reflectance=False, dropout=0.0, 238 | Action_Func='leakyrelu', attention=False, res_num=18): 239 | """ 240 | input: md --- maximum displacement (for correlation. default: 4), after warpping 241 | """ 242 | super(LCCNet, self).__init__() 243 | input_lidar = 1 244 | self.res_num = res_num 245 | self.use_feat_from = use_feat_from 246 | if use_reflectance: 247 | input_lidar = 2 248 | 249 | # original resnet 250 | self.pretrained_encoder = False 251 | self.net_encoder = ResnetEncoder(num_layers=self.res_num, pretrained=True, num_input_images=1) 252 | 253 | # resnet with leakyRELU 254 | self.Action_Func = Action_Func 255 | self.attention = attention 256 | self.inplanes = 4 257 | if self.res_num == 50: 258 | layers = [3, 4, 6, 3] 259 | add_list = [1024, 512, 256, 64] 260 | elif self.res_num == 18: 261 | layers = [2, 2, 2, 2] 262 | add_list = [256, 128, 64, 64] 263 | 264 | if self.attention: 265 | block = SEBottleneck 266 | else: 267 | if self.res_num == 50: 268 | block = Bottleneck 269 | elif self.res_num == 18: 270 | block = BasicBlock 271 | 272 | 273 | # rgb_image 274 | self.conv1_rgb = nn.Conv2d(3, 4, kernel_size=7, stride=2, padding=3) 275 | self.elu_rgb = nn.ELU() 276 | self.leakyRELU_rgb = nn.LeakyReLU(0.1) 277 | self.maxpool_rgb = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 278 | self.layer1_rgb = self._make_layer(block, 4, layers[0]) 279 | self.layer2_rgb = self._make_layer(block, 8, layers[1], stride=2) 280 | self.layer3_rgb = self._make_layer(block, 16, layers[2], stride=2) 281 | self.layer4_rgb = self._make_layer(block, 32, layers[3], stride=2) 282 | 283 | # lidar_image 284 | self.inplanes = 4 285 | self.conv1_lidar = nn.Conv2d(input_lidar, 4, kernel_size=7, stride=2, padding=3) 286 | self.elu_lidar = nn.ELU() 287 | self.leakyRELU_lidar = nn.LeakyReLU(0.1) 288 | self.maxpool_lidar = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 289 | self.layer1_lidar = self._make_layer(block, 4, layers[0]) 290 | self.layer2_lidar = self._make_layer(block, 8, layers[1], stride=2) 291 | self.layer3_lidar = self._make_layer(block, 16, layers[2], stride=2) 292 | self.layer4_lidar = self._make_layer(block, 32, layers[3], stride=2) 293 | 294 | self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1) 295 | self.leakyRELU = nn.LeakyReLU(0.1) 296 | 297 | nd = (2 * md + 1) ** 2 298 | dd = np.cumsum([128, 128, 96, 64, 32]) 299 | 300 | od = nd 301 | self.conv6_0 = myconv(od, 128, kernel_size=3, stride=1) 302 | self.conv6_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1) 303 | self.conv6_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1) 304 | self.conv6_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1) 305 | self.conv6_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1) 306 | 307 | if use_feat_from > 1: 308 | self.predict_flow6 = predict_flow(od + dd[4]) 309 | self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 310 | self.upfeat6 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1) 311 | 312 | od = nd + add_list[0] + 4 313 | self.conv5_0 = myconv(od, 128, kernel_size=3, stride=1) 314 | self.conv5_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1) 315 | self.conv5_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1) 316 | self.conv5_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1) 317 | self.conv5_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1) 318 | 319 | if use_feat_from > 2: 320 | self.predict_flow5 = predict_flow(od + dd[4]) 321 | self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 322 | self.upfeat5 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1) 323 | 324 | od = nd + add_list[1] + 4 325 | self.conv4_0 = myconv(od, 128, kernel_size=3, stride=1) 326 | self.conv4_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1) 327 | self.conv4_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1) 328 | self.conv4_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1) 329 | self.conv4_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1) 330 | 331 | if use_feat_from > 3: 332 | self.predict_flow4 = predict_flow(od + dd[4]) 333 | self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 334 | self.upfeat4 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1) 335 | 336 | od = nd + add_list[2] + 4 337 | self.conv3_0 = myconv(od, 128, kernel_size=3, stride=1) 338 | self.conv3_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1) 339 | self.conv3_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1) 340 | self.conv3_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1) 341 | self.conv3_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1) 342 | 343 | if use_feat_from > 4: 344 | self.predict_flow3 = predict_flow(od + dd[4]) 345 | self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 346 | self.upfeat3 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1) 347 | 348 | od = nd + add_list[3] + 4 349 | self.conv2_0 = myconv(od, 128, kernel_size=3, stride=1) 350 | self.conv2_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1) 351 | self.conv2_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1) 352 | self.conv2_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1) 353 | self.conv2_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1) 354 | 355 | if use_feat_from > 5: 356 | self.predict_flow2 = predict_flow(od + dd[4]) 357 | self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 358 | 359 | self.dc_conv1 = myconv(od + dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1) 360 | self.dc_conv2 = myconv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2) 361 | self.dc_conv3 = myconv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4) 362 | self.dc_conv4 = myconv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8) 363 | self.dc_conv5 = myconv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16) 364 | self.dc_conv6 = myconv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1) 365 | self.dc_conv7 = predict_flow(32) 366 | 367 | fc_size = od + dd[4] 368 | downsample = 128 // (2**use_feat_from) 369 | if image_size[0] % downsample == 0: 370 | fc_size *= image_size[0] // downsample 371 | else: 372 | fc_size *= (image_size[0] // downsample)+1 373 | if image_size[1] % downsample == 0: 374 | fc_size *= image_size[1] // downsample 375 | else: 376 | fc_size *= (image_size[1] // downsample)+1 377 | self.fc1 = nn.Linear(fc_size * 4, 512) 378 | 379 | self.fc1_trasl = nn.Linear(512, 256) 380 | self.fc1_rot = nn.Linear(512, 256) 381 | 382 | self.fc2_trasl = nn.Linear(256, 3) 383 | self.fc2_rot = nn.Linear(256, 4) 384 | 385 | self.dropout = nn.Dropout(dropout) 386 | 387 | self.classification1 = nn.Linear(512,256) 388 | self.classification2 = nn.Linear(256,2) 389 | 390 | for m in self.modules(): 391 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 392 | nn.init.kaiming_normal_(m.weight.data, mode='fan_in') 393 | if m.bias is not None: 394 | m.bias.data.zero_() 395 | 396 | # def _make_layer(self, block, planes, blocks, stride=1): 397 | # layers = [] 398 | # layers.append(block(self.inplanes, planes, 1)) 399 | # self.inplanes = planes * block.expansion 400 | # for i in range(1, blocks - 1): 401 | # layers.append(block(self.inplanes, planes, 1)) 402 | # layers.append(block(self.inplanes, planes, 2)) 403 | # 404 | # return nn.Sequential(*layers) 405 | 406 | 407 | def _make_layer(self, block, planes, blocks, stride=1): 408 | downsample = None 409 | if stride != 1 or self.inplanes != planes * block.expansion: 410 | downsample = nn.Sequential( 411 | conv1x1(self.inplanes, planes * block.expansion, stride), 412 | nn.BatchNorm2d(planes * block.expansion), 413 | ) 414 | 415 | layers = [] 416 | layers.append(block(self.inplanes, planes, stride, downsample)) 417 | self.inplanes = planes * block.expansion 418 | for _ in range(1, blocks): 419 | layers.append(block(self.inplanes, planes)) 420 | 421 | return nn.Sequential(*layers) 422 | 423 | 424 | def warp(self, x, flo): 425 | """ 426 | warp an image/tensor (im2) back to im1, according to the optical flow 427 | x: [B, C, H, W] (im2) 428 | flo: [B, 2, H, W] flow 429 | """ 430 | B, C, H, W = x.size() 431 | # mesh grid 432 | xx = torch.arange(0, W).view(1, -1).repeat(H, 1) 433 | yy = torch.arange(0, H).view(-1, 1).repeat(1, W) 434 | xx = xx.view(1, 1, H, W).repeat(B, 1, 1, 1) 435 | yy = yy.view(1, 1, H, W).repeat(B, 1, 1, 1) 436 | grid = torch.cat((xx, yy), 1).float() 437 | 438 | if x.is_cuda: 439 | grid = grid.cuda() 440 | vgrid = Variable(grid) + flo 441 | 442 | # scale grid to [-1,1] 443 | vgrid[:, 0, :, :] = 2.0 * vgrid[:, 0, :, :].clone() / max(W - 1, 1) - 1.0 444 | vgrid[:, 1, :, :] = 2.0 * vgrid[:, 1, :, :].clone() / max(H - 1, 1) - 1.0 445 | 446 | vgrid = vgrid.permute(0, 2, 3, 1) 447 | output = nn.functional.grid_sample(x, vgrid) 448 | mask = torch.autograd.Variable(torch.ones(x.size())).cuda() 449 | mask = nn.functional.grid_sample(mask, vgrid) 450 | 451 | # if W==128: 452 | # np.save('mask.npy', mask.cpu().data.numpy()) 453 | # np.save('warp.npy', output.cpu().data.numpy()) 454 | 455 | # mask[mask<0.9999] = 0.0 456 | # mask[mask>0] = 1.0 457 | mask = torch.floor(torch.clamp(mask, 0, 1)) 458 | 459 | return output * mask 460 | 461 | def forward(self, rgb, lidar): 462 | H, W = rgb.shape[2:4] 463 | 464 | #encoder 465 | if self.pretrained_encoder: 466 | # rgb_image 467 | features1 = self.net_encoder(rgb) 468 | c12 = features1[0] # 2 469 | c13 = features1[2] # 4 470 | c14 = features1[3] # 8 471 | c15 = features1[4] # 16 472 | c16 = features1[5] # 32 473 | # lidar_image 474 | x2 = self.conv1_lidar(lidar) 475 | if self.Action_Func == 'leakyrelu': 476 | c22 = self.leakyRELU_lidar(x2) # 2 477 | elif self.Action_Func == 'elu': 478 | c22 = self.elu_lidar(x2) # 2 479 | c23 = self.layer1_lidar(self.maxpool_lidar(c22)) # 4 480 | c24 = self.layer2_lidar(c23) # 8 481 | c25 = self.layer3_lidar(c24) # 16 482 | c26 = self.layer4_lidar(c25) # 32 483 | 484 | else: 485 | x1 = self.conv1_rgb(rgb) 486 | x2 = self.conv1_lidar(lidar) 487 | if self.Action_Func == 'leakyrelu': 488 | c12 = self.leakyRELU_rgb(x1) # 2 489 | c22 = self.leakyRELU_lidar(x2) # 2 490 | elif self.Action_Func == 'elu': 491 | c12 = self.elu_rgb(x1) # 2 492 | c22 = self.elu_lidar(x2) # 2 493 | c13 = self.layer1_rgb(self.maxpool_rgb(c12)) # 4 494 | c23 = self.layer1_lidar(self.maxpool_lidar(c22)) # 4 495 | c14 = self.layer2_rgb(c13) # 8 496 | c24 = self.layer2_lidar(c23) # 8 497 | c15 = self.layer3_rgb(c14) # 16 498 | c25 = self.layer3_lidar(c24) # 16 499 | c16 = self.layer4_rgb(c15) # 32 500 | c26 = self.layer4_lidar(c25) # 32 501 | 502 | corr6 = self.corr(c16, c26) 503 | corr6 = self.leakyRELU(corr6) 504 | x = torch.cat((self.conv6_0(corr6), corr6), 1) 505 | x = torch.cat((self.conv6_1(x), x), 1) 506 | x = torch.cat((self.conv6_2(x), x), 1) 507 | x = torch.cat((self.conv6_3(x), x), 1) 508 | x = torch.cat((self.conv6_4(x), x), 1) 509 | 510 | if self.use_feat_from > 1: 511 | flow6 = self.predict_flow6(x) 512 | up_flow6 = self.deconv6(flow6) 513 | up_feat6 = self.upfeat6(x) 514 | 515 | warp5 = self.warp(c25, up_flow6*0.625) 516 | corr5 = self.corr(c15, warp5) 517 | corr5 = self.leakyRELU(corr5) 518 | x = torch.cat((corr5, c15, up_flow6, up_feat6), 1) 519 | x = torch.cat((self.conv5_0(x), x), 1) 520 | x = torch.cat((self.conv5_1(x), x), 1) 521 | x = torch.cat((self.conv5_2(x), x), 1) 522 | x = torch.cat((self.conv5_3(x), x), 1) 523 | x = torch.cat((self.conv5_4(x), x), 1) 524 | 525 | if self.use_feat_from > 2: 526 | flow5 = self.predict_flow5(x) 527 | up_flow5 = self.deconv5(flow5) 528 | up_feat5 = self.upfeat5(x) 529 | 530 | warp4 = self.warp(c24, up_flow5*1.25) 531 | corr4 = self.corr(c14, warp4) 532 | corr4 = self.leakyRELU(corr4) 533 | x = torch.cat((corr4, c14, up_flow5, up_feat5), 1) 534 | x = torch.cat((self.conv4_0(x), x), 1) 535 | x = torch.cat((self.conv4_1(x), x), 1) 536 | x = torch.cat((self.conv4_2(x), x), 1) 537 | x = torch.cat((self.conv4_3(x), x), 1) 538 | x = torch.cat((self.conv4_4(x), x), 1) 539 | 540 | if self.use_feat_from > 3: 541 | flow4 = self.predict_flow4(x) 542 | up_flow4 = self.deconv4(flow4) 543 | up_feat4 = self.upfeat4(x) 544 | 545 | warp3 = self.warp(c23, up_flow4*2.5) 546 | corr3 = self.corr(c13, warp3) 547 | corr3 = self.leakyRELU(corr3) 548 | x = torch.cat((corr3, c13, up_flow4, up_feat4), 1) 549 | x = torch.cat((self.conv3_0(x), x),1) 550 | x = torch.cat((self.conv3_1(x), x),1) 551 | x = torch.cat((self.conv3_2(x), x),1) 552 | x = torch.cat((self.conv3_3(x), x),1) 553 | x = torch.cat((self.conv3_4(x), x),1) 554 | 555 | if self.use_feat_from > 4: 556 | flow3 = self.predict_flow3(x) 557 | up_flow3 = self.deconv3(flow3) 558 | up_feat3 = self.upfeat3(x) 559 | 560 | 561 | warp2 = self.warp(c22, up_flow3*5.0) 562 | corr2 = self.corr(c12, warp2) 563 | corr2 = self.leakyRELU(corr2) 564 | x = torch.cat((corr2, c12, up_flow3, up_feat3), 1) 565 | x = torch.cat((self.conv2_0(x), x), 1) 566 | x = torch.cat((self.conv2_1(x), x), 1) 567 | x = torch.cat((self.conv2_2(x), x), 1) 568 | x = torch.cat((self.conv2_3(x), x), 1) 569 | x = torch.cat((self.conv2_4(x), x), 1) 570 | 571 | if self.use_feat_from > 5: 572 | flow2 = self.predict_flow2(x) 573 | 574 | x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x)))) 575 | #flow2 = flow2 + self.dc_conv7(self.dc_conv6(self.dc_conv5(x))) 576 | 577 | x = x.view(x.shape[0], -1) 578 | x = self.dropout(x) 579 | x = self.leakyRELU(self.fc1(x)) 580 | 581 | classification_results = self.classification2(self.classification1(x)) 582 | return classification_results 583 | 584 | 585 | -------------------------------------------------------------------------------- /models/__pycache__/LCCNet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/__pycache__/LCCNet.cpython-36.pyc -------------------------------------------------------------------------------- /models/correlation_package/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 NVIDIA CORPORATION 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /models/correlation_package/__pycache__/correlation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/__pycache__/correlation.cpython-36.pyc -------------------------------------------------------------------------------- /models/correlation_package/build/lib.linux-x86_64-3.6/correlation_cuda.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/build/lib.linux-x86_64-3.6/correlation_cuda.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda.o -------------------------------------------------------------------------------- /models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda_kernel.o -------------------------------------------------------------------------------- /models/correlation_package/correlation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.module import Module 3 | from torch.autograd import Function 4 | import correlation_cuda 5 | 6 | class CorrelationFunction(Function): 7 | @staticmethod 8 | def forward(ctx, input1, input2, pad_size, kernel_size, max_displacement,stride1, stride2, corr_multiply): 9 | ctx.save_for_backward(input1, input2) 10 | ctx.pad_size = pad_size 11 | ctx.kernel_size = kernel_size 12 | ctx.max_displacement = max_displacement 13 | ctx.stride1 = stride1 14 | ctx.stride2 = stride2 15 | ctx.corr_multiply = corr_multiply 16 | 17 | with torch.cuda.device_of(input1): 18 | rbot1 = input1.new() 19 | rbot2 = input2.new() 20 | output = input1.new() 21 | 22 | correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 23 | pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply) 24 | 25 | return output 26 | 27 | @staticmethod 28 | def backward(ctx, grad_output): 29 | input1, input2 = ctx.saved_tensors 30 | 31 | with torch.cuda.device_of(input1): 32 | rbot1 = input1.new() 33 | rbot2 = input2.new() 34 | 35 | grad_input1 = input1.new() 36 | grad_input2 = input2.new() 37 | 38 | correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2, 39 | ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply) 40 | 41 | return grad_input1, grad_input2, None, None, None, None, None, None 42 | 43 | 44 | class Correlation(Module): 45 | def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): 46 | super(Correlation, self).__init__() 47 | self.pad_size = pad_size 48 | self.kernel_size = kernel_size 49 | self.max_displacement = max_displacement 50 | self.stride1 = stride1 51 | self.stride2 = stride2 52 | self.corr_multiply = corr_multiply 53 | 54 | def forward(self, input1, input2): 55 | 56 | input1 = input1.contiguous() 57 | input2 = input2.contiguous() 58 | 59 | result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply) 60 | 61 | return result 62 | 63 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "correlation_cuda_kernel.cuh" 8 | 9 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output, 10 | int pad_size, 11 | int kernel_size, 12 | int max_displacement, 13 | int stride1, 14 | int stride2, 15 | int corr_type_multiply) 16 | { 17 | 18 | int batchSize = input1.size(0); 19 | 20 | int nInputChannels = input1.size(1); 21 | int inputHeight = input1.size(2); 22 | int inputWidth = input1.size(3); 23 | 24 | int kernel_radius = (kernel_size - 1) / 2; 25 | int border_radius = kernel_radius + max_displacement; 26 | 27 | int paddedInputHeight = inputHeight + 2 * pad_size; 28 | int paddedInputWidth = inputWidth + 2 * pad_size; 29 | 30 | int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1); 31 | 32 | int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1)); 33 | int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1)); 34 | 35 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 36 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 37 | output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth}); 38 | 39 | rInput1.fill_(0); 40 | rInput2.fill_(0); 41 | output.fill_(0); 42 | 43 | int success = correlation_forward_cuda_kernel( 44 | output, 45 | output.size(0), 46 | output.size(1), 47 | output.size(2), 48 | output.size(3), 49 | output.stride(0), 50 | output.stride(1), 51 | output.stride(2), 52 | output.stride(3), 53 | input1, 54 | input1.size(1), 55 | input1.size(2), 56 | input1.size(3), 57 | input1.stride(0), 58 | input1.stride(1), 59 | input1.stride(2), 60 | input1.stride(3), 61 | input2, 62 | input2.size(1), 63 | input2.stride(0), 64 | input2.stride(1), 65 | input2.stride(2), 66 | input2.stride(3), 67 | rInput1, 68 | rInput2, 69 | pad_size, 70 | kernel_size, 71 | max_displacement, 72 | stride1, 73 | stride2, 74 | corr_type_multiply, 75 | at::cuda::getCurrentCUDAStream() 76 | ); 77 | 78 | //check for errors 79 | if (!success) { 80 | AT_ERROR("CUDA call failed"); 81 | } 82 | 83 | return 1; 84 | 85 | } 86 | 87 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 88 | at::Tensor& gradInput1, at::Tensor& gradInput2, 89 | int pad_size, 90 | int kernel_size, 91 | int max_displacement, 92 | int stride1, 93 | int stride2, 94 | int corr_type_multiply) 95 | { 96 | 97 | int batchSize = input1.size(0); 98 | int nInputChannels = input1.size(1); 99 | int paddedInputHeight = input1.size(2)+ 2 * pad_size; 100 | int paddedInputWidth = input1.size(3)+ 2 * pad_size; 101 | 102 | int height = input1.size(2); 103 | int width = input1.size(3); 104 | 105 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 106 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 107 | gradInput1.resize_({batchSize, nInputChannels, height, width}); 108 | gradInput2.resize_({batchSize, nInputChannels, height, width}); 109 | 110 | rInput1.fill_(0); 111 | rInput2.fill_(0); 112 | gradInput1.fill_(0); 113 | gradInput2.fill_(0); 114 | 115 | int success = correlation_backward_cuda_kernel(gradOutput, 116 | gradOutput.size(0), 117 | gradOutput.size(1), 118 | gradOutput.size(2), 119 | gradOutput.size(3), 120 | gradOutput.stride(0), 121 | gradOutput.stride(1), 122 | gradOutput.stride(2), 123 | gradOutput.stride(3), 124 | input1, 125 | input1.size(1), 126 | input1.size(2), 127 | input1.size(3), 128 | input1.stride(0), 129 | input1.stride(1), 130 | input1.stride(2), 131 | input1.stride(3), 132 | input2, 133 | input2.stride(0), 134 | input2.stride(1), 135 | input2.stride(2), 136 | input2.stride(3), 137 | gradInput1, 138 | gradInput1.stride(0), 139 | gradInput1.stride(1), 140 | gradInput1.stride(2), 141 | gradInput1.stride(3), 142 | gradInput2, 143 | gradInput2.size(1), 144 | gradInput2.stride(0), 145 | gradInput2.stride(1), 146 | gradInput2.stride(2), 147 | gradInput2.stride(3), 148 | rInput1, 149 | rInput2, 150 | pad_size, 151 | kernel_size, 152 | max_displacement, 153 | stride1, 154 | stride2, 155 | corr_type_multiply, 156 | at::cuda::getCurrentCUDAStream() 157 | ); 158 | 159 | if (!success) { 160 | AT_ERROR("CUDA call failed"); 161 | } 162 | 163 | return 1; 164 | } 165 | 166 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 167 | m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)"); 168 | m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)"); 169 | } 170 | 171 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: correlation-cuda 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | License: UNKNOWN 7 | Platform: UNKNOWN 8 | License-File: LICENSE 9 | 10 | UNKNOWN 11 | 12 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE 2 | correlation_cuda.cc 3 | correlation_cuda_kernel.cu 4 | pyproject.toml 5 | setup.py 6 | correlation_cuda.egg-info/PKG-INFO 7 | correlation_cuda.egg-info/SOURCES.txt 8 | correlation_cuda.egg-info/dependency_links.txt 9 | correlation_cuda.egg-info/top_level.txt -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | correlation_cuda 2 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "correlation_cuda_kernel.cuh" 4 | 5 | #define CUDA_NUM_THREADS 1024 6 | #define THREADS_PER_BLOCK 32 7 | #define FULL_MASK 0xffffffff 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using at::Half; 15 | 16 | template 17 | __forceinline__ __device__ scalar_t warpReduceSum(scalar_t val) { 18 | for (int offset = 16; offset > 0; offset /= 2) 19 | val += __shfl_down_sync(FULL_MASK, val, offset); 20 | return val; 21 | } 22 | 23 | template 24 | __forceinline__ __device__ scalar_t blockReduceSum(scalar_t val) { 25 | 26 | static __shared__ scalar_t shared[32]; 27 | int lane = threadIdx.x % warpSize; 28 | int wid = threadIdx.x / warpSize; 29 | 30 | val = warpReduceSum(val); 31 | 32 | if (lane == 0) 33 | shared[wid] = val; 34 | 35 | __syncthreads(); 36 | 37 | val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; 38 | 39 | if (wid == 0) 40 | val = warpReduceSum(val); 41 | 42 | return val; 43 | } 44 | 45 | 46 | template 47 | __global__ void channels_first(const scalar_t* __restrict__ input, scalar_t* rinput, int channels, int height, int width, int pad_size) 48 | { 49 | 50 | // n (batch size), c (num of channels), y (height), x (width) 51 | int n = blockIdx.x; 52 | int y = blockIdx.y; 53 | int x = blockIdx.z; 54 | 55 | int ch_off = threadIdx.x; 56 | scalar_t value; 57 | 58 | int dimcyx = channels * height * width; 59 | int dimyx = height * width; 60 | 61 | int p_dimx = (width + 2 * pad_size); 62 | int p_dimy = (height + 2 * pad_size); 63 | int p_dimyxc = channels * p_dimy * p_dimx; 64 | int p_dimxc = p_dimx * channels; 65 | 66 | for (int c = ch_off; c < channels; c += THREADS_PER_BLOCK) { 67 | value = input[n * dimcyx + c * dimyx + y * width + x]; 68 | rinput[n * p_dimyxc + (y + pad_size) * p_dimxc + (x + pad_size) * channels + c] = value; 69 | } 70 | } 71 | 72 | 73 | template 74 | __global__ void correlation_forward(scalar_t* __restrict__ output, const int nOutputChannels, 75 | const int outputHeight, const int outputWidth, const scalar_t* __restrict__ rInput1, 76 | const int nInputChannels, const int inputHeight, const int inputWidth, 77 | const scalar_t* __restrict__ rInput2, const int pad_size, const int kernel_size, 78 | const int max_displacement, const int stride1, const int stride2) { 79 | 80 | int32_t pInputWidth = inputWidth + 2 * pad_size; 81 | int32_t pInputHeight = inputHeight + 2 * pad_size; 82 | 83 | int32_t kernel_rad = (kernel_size - 1) / 2; 84 | 85 | int32_t displacement_rad = max_displacement / stride2; 86 | 87 | int32_t displacement_size = 2 * displacement_rad + 1; 88 | 89 | int32_t n = blockIdx.x; 90 | int32_t y1 = blockIdx.y * stride1 + max_displacement; 91 | int32_t x1 = blockIdx.z * stride1 + max_displacement; 92 | int32_t c = threadIdx.x; 93 | 94 | int32_t pdimyxc = pInputHeight * pInputWidth * nInputChannels; 95 | 96 | int32_t pdimxc = pInputWidth * nInputChannels; 97 | 98 | int32_t pdimc = nInputChannels; 99 | 100 | int32_t tdimcyx = nOutputChannels * outputHeight * outputWidth; 101 | int32_t tdimyx = outputHeight * outputWidth; 102 | int32_t tdimx = outputWidth; 103 | 104 | int32_t nelems = kernel_size * kernel_size * pdimc; 105 | 106 | // element-wise product along channel axis 107 | for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) { 108 | for (int ti = -displacement_rad; ti <= displacement_rad; ++ti) { 109 | int x2 = x1 + ti * stride2; 110 | int y2 = y1 + tj * stride2; 111 | 112 | float acc0 = 0.0f; 113 | 114 | for (int j = -kernel_rad; j <= kernel_rad; ++j) { 115 | for (int i = -kernel_rad; i <= kernel_rad; ++i) { 116 | // THREADS_PER_BLOCK 117 | #pragma unroll 118 | for (int ch = c; ch < pdimc; ch += blockDim.x) { 119 | 120 | int indx1 = n * pdimyxc + (y1 + j) * pdimxc 121 | + (x1 + i) * pdimc + ch; 122 | int indx2 = n * pdimyxc + (y2 + j) * pdimxc 123 | + (x2 + i) * pdimc + ch; 124 | acc0 += static_cast(rInput1[indx1] * rInput2[indx2]); 125 | } 126 | } 127 | } 128 | 129 | if (blockDim.x == warpSize) { 130 | __syncwarp(); 131 | acc0 = warpReduceSum(acc0); 132 | } else { 133 | __syncthreads(); 134 | acc0 = blockReduceSum(acc0); 135 | } 136 | 137 | if (threadIdx.x == 0) { 138 | 139 | int tc = (tj + displacement_rad) * displacement_size 140 | + (ti + displacement_rad); 141 | const int tindx = n * tdimcyx + tc * tdimyx + blockIdx.y * tdimx 142 | + blockIdx.z; 143 | output[tindx] = static_cast(acc0 / nelems); 144 | } 145 | } 146 | } 147 | } 148 | 149 | 150 | template 151 | __global__ void correlation_backward_input1(int item, scalar_t* gradInput1, int nInputChannels, int inputHeight, int inputWidth, 152 | const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth, 153 | const scalar_t* __restrict__ rInput2, 154 | int pad_size, 155 | int kernel_size, 156 | int max_displacement, 157 | int stride1, 158 | int stride2) 159 | { 160 | // n (batch size), c (num of channels), y (height), x (width) 161 | 162 | int n = item; 163 | int y = blockIdx.x * stride1 + pad_size; 164 | int x = blockIdx.y * stride1 + pad_size; 165 | int c = blockIdx.z; 166 | int tch_off = threadIdx.x; 167 | 168 | int kernel_rad = (kernel_size - 1) / 2; 169 | int displacement_rad = max_displacement / stride2; 170 | int displacement_size = 2 * displacement_rad + 1; 171 | 172 | int xmin = (x - kernel_rad - max_displacement) / stride1; 173 | int ymin = (y - kernel_rad - max_displacement) / stride1; 174 | 175 | int xmax = (x + kernel_rad - max_displacement) / stride1; 176 | int ymax = (y + kernel_rad - max_displacement) / stride1; 177 | 178 | if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) { 179 | // assumes gradInput1 is pre-allocated and zero filled 180 | return; 181 | } 182 | 183 | if (xmin > xmax || ymin > ymax) { 184 | // assumes gradInput1 is pre-allocated and zero filled 185 | return; 186 | } 187 | 188 | xmin = max(0,xmin); 189 | xmax = min(outputWidth-1,xmax); 190 | 191 | ymin = max(0,ymin); 192 | ymax = min(outputHeight-1,ymax); 193 | 194 | int pInputWidth = inputWidth + 2 * pad_size; 195 | int pInputHeight = inputHeight + 2 * pad_size; 196 | 197 | int pdimyxc = pInputHeight * pInputWidth * nInputChannels; 198 | int pdimxc = pInputWidth * nInputChannels; 199 | int pdimc = nInputChannels; 200 | 201 | int tdimcyx = nOutputChannels * outputHeight * outputWidth; 202 | int tdimyx = outputHeight * outputWidth; 203 | int tdimx = outputWidth; 204 | 205 | int odimcyx = nInputChannels * inputHeight* inputWidth; 206 | int odimyx = inputHeight * inputWidth; 207 | int odimx = inputWidth; 208 | 209 | scalar_t nelems = kernel_size * kernel_size * nInputChannels; 210 | 211 | __shared__ scalar_t prod_sum[THREADS_PER_BLOCK]; 212 | prod_sum[tch_off] = 0; 213 | 214 | for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) { 215 | 216 | int i2 = (tc % displacement_size - displacement_rad) * stride2; 217 | int j2 = (tc / displacement_size - displacement_rad) * stride2; 218 | 219 | int indx2 = n * pdimyxc + (y + j2)* pdimxc + (x + i2) * pdimc + c; 220 | 221 | scalar_t val2 = rInput2[indx2]; 222 | 223 | for (int j = ymin; j <= ymax; ++j) { 224 | for (int i = xmin; i <= xmax; ++i) { 225 | int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i; 226 | prod_sum[tch_off] += gradOutput[tindx] * val2; 227 | } 228 | } 229 | } 230 | __syncthreads(); 231 | 232 | if(tch_off == 0) { 233 | scalar_t reduce_sum = 0; 234 | for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) { 235 | reduce_sum += prod_sum[idx]; 236 | } 237 | const int indx1 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size); 238 | gradInput1[indx1] = reduce_sum / nelems; 239 | } 240 | 241 | } 242 | 243 | template 244 | __global__ void correlation_backward_input2(int item, scalar_t* gradInput2, int nInputChannels, int inputHeight, int inputWidth, 245 | const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth, 246 | const scalar_t* __restrict__ rInput1, 247 | int pad_size, 248 | int kernel_size, 249 | int max_displacement, 250 | int stride1, 251 | int stride2) 252 | { 253 | // n (batch size), c (num of channels), y (height), x (width) 254 | 255 | int n = item; 256 | int y = blockIdx.x * stride1 + pad_size; 257 | int x = blockIdx.y * stride1 + pad_size; 258 | int c = blockIdx.z; 259 | 260 | int tch_off = threadIdx.x; 261 | 262 | int kernel_rad = (kernel_size - 1) / 2; 263 | int displacement_rad = max_displacement / stride2; 264 | int displacement_size = 2 * displacement_rad + 1; 265 | 266 | int pInputWidth = inputWidth + 2 * pad_size; 267 | int pInputHeight = inputHeight + 2 * pad_size; 268 | 269 | int pdimyxc = pInputHeight * pInputWidth * nInputChannels; 270 | int pdimxc = pInputWidth * nInputChannels; 271 | int pdimc = nInputChannels; 272 | 273 | int tdimcyx = nOutputChannels * outputHeight * outputWidth; 274 | int tdimyx = outputHeight * outputWidth; 275 | int tdimx = outputWidth; 276 | 277 | int odimcyx = nInputChannels * inputHeight* inputWidth; 278 | int odimyx = inputHeight * inputWidth; 279 | int odimx = inputWidth; 280 | 281 | scalar_t nelems = kernel_size * kernel_size * nInputChannels; 282 | 283 | __shared__ scalar_t prod_sum[THREADS_PER_BLOCK]; 284 | prod_sum[tch_off] = 0; 285 | 286 | for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) { 287 | int i2 = (tc % displacement_size - displacement_rad) * stride2; 288 | int j2 = (tc / displacement_size - displacement_rad) * stride2; 289 | 290 | int xmin = (x - kernel_rad - max_displacement - i2) / stride1; 291 | int ymin = (y - kernel_rad - max_displacement - j2) / stride1; 292 | 293 | int xmax = (x + kernel_rad - max_displacement - i2) / stride1; 294 | int ymax = (y + kernel_rad - max_displacement - j2) / stride1; 295 | 296 | if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) { 297 | // assumes gradInput2 is pre-allocated and zero filled 298 | continue; 299 | } 300 | 301 | if (xmin > xmax || ymin > ymax) { 302 | // assumes gradInput2 is pre-allocated and zero filled 303 | continue; 304 | } 305 | 306 | xmin = max(0,xmin); 307 | xmax = min(outputWidth-1,xmax); 308 | 309 | ymin = max(0,ymin); 310 | ymax = min(outputHeight-1,ymax); 311 | 312 | int indx1 = n * pdimyxc + (y - j2)* pdimxc + (x - i2) * pdimc + c; 313 | scalar_t val1 = rInput1[indx1]; 314 | 315 | for (int j = ymin; j <= ymax; ++j) { 316 | for (int i = xmin; i <= xmax; ++i) { 317 | int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i; 318 | prod_sum[tch_off] += gradOutput[tindx] * val1; 319 | } 320 | } 321 | } 322 | 323 | __syncthreads(); 324 | 325 | if(tch_off == 0) { 326 | scalar_t reduce_sum = 0; 327 | for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) { 328 | reduce_sum += prod_sum[idx]; 329 | } 330 | const int indx2 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size); 331 | gradInput2[indx2] = reduce_sum / nelems; 332 | } 333 | 334 | } 335 | 336 | int correlation_forward_cuda_kernel(at::Tensor& output, 337 | int ob, 338 | int oc, 339 | int oh, 340 | int ow, 341 | int osb, 342 | int osc, 343 | int osh, 344 | int osw, 345 | 346 | at::Tensor& input1, 347 | int ic, 348 | int ih, 349 | int iw, 350 | int isb, 351 | int isc, 352 | int ish, 353 | int isw, 354 | 355 | at::Tensor& input2, 356 | int gc, 357 | int gsb, 358 | int gsc, 359 | int gsh, 360 | int gsw, 361 | 362 | at::Tensor& rInput1, 363 | at::Tensor& rInput2, 364 | int pad_size, 365 | int kernel_size, 366 | int max_displacement, 367 | int stride1, 368 | int stride2, 369 | int corr_type_multiply, 370 | cudaStream_t stream) 371 | { 372 | 373 | int batchSize = ob; 374 | 375 | int nInputChannels = ic; 376 | int inputWidth = iw; 377 | int inputHeight = ih; 378 | 379 | int nOutputChannels = oc; 380 | int outputWidth = ow; 381 | int outputHeight = oh; 382 | 383 | dim3 blocks_grid(batchSize, inputHeight, inputWidth); 384 | dim3 threads_block(THREADS_PER_BLOCK); 385 | 386 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channels_first_fwd_1", ([&] { 387 | 388 | channels_first<<>>( 389 | input1.data(), rInput1.data(), nInputChannels, inputHeight, inputWidth, pad_size); 390 | 391 | })); 392 | 393 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "channels_first_fwd_2", ([&] { 394 | 395 | channels_first<<>> ( 396 | input2.data(), rInput2.data(), nInputChannels, inputHeight, inputWidth, pad_size); 397 | 398 | })); 399 | 400 | dim3 threadsPerBlock(THREADS_PER_BLOCK); 401 | dim3 totalBlocksCorr(batchSize, outputHeight, outputWidth); 402 | 403 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "correlation_forward", ([&] { 404 | 405 | correlation_forward<<>> 406 | (output.data(), nOutputChannels, outputHeight, outputWidth, 407 | rInput1.data(), nInputChannels, inputHeight, inputWidth, 408 | rInput2.data(), 409 | pad_size, 410 | kernel_size, 411 | max_displacement, 412 | stride1, 413 | stride2); 414 | 415 | })); 416 | 417 | cudaError_t err = cudaGetLastError(); 418 | 419 | 420 | // check for errors 421 | if (err != cudaSuccess) { 422 | printf("error in correlation_forward_cuda_kernel: %s\n", cudaGetErrorString(err)); 423 | return 0; 424 | } 425 | 426 | return 1; 427 | } 428 | 429 | 430 | int correlation_backward_cuda_kernel( 431 | at::Tensor& gradOutput, 432 | int gob, 433 | int goc, 434 | int goh, 435 | int gow, 436 | int gosb, 437 | int gosc, 438 | int gosh, 439 | int gosw, 440 | 441 | at::Tensor& input1, 442 | int ic, 443 | int ih, 444 | int iw, 445 | int isb, 446 | int isc, 447 | int ish, 448 | int isw, 449 | 450 | at::Tensor& input2, 451 | int gsb, 452 | int gsc, 453 | int gsh, 454 | int gsw, 455 | 456 | at::Tensor& gradInput1, 457 | int gisb, 458 | int gisc, 459 | int gish, 460 | int gisw, 461 | 462 | at::Tensor& gradInput2, 463 | int ggc, 464 | int ggsb, 465 | int ggsc, 466 | int ggsh, 467 | int ggsw, 468 | 469 | at::Tensor& rInput1, 470 | at::Tensor& rInput2, 471 | int pad_size, 472 | int kernel_size, 473 | int max_displacement, 474 | int stride1, 475 | int stride2, 476 | int corr_type_multiply, 477 | cudaStream_t stream) 478 | { 479 | 480 | int batchSize = gob; 481 | int num = batchSize; 482 | 483 | int nInputChannels = ic; 484 | int inputWidth = iw; 485 | int inputHeight = ih; 486 | 487 | int nOutputChannels = goc; 488 | int outputWidth = gow; 489 | int outputHeight = goh; 490 | 491 | dim3 blocks_grid(batchSize, inputHeight, inputWidth); 492 | dim3 threads_block(THREADS_PER_BLOCK); 493 | 494 | 495 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "lltm_forward_cuda", ([&] { 496 | 497 | channels_first<<>>( 498 | input1.data(), 499 | rInput1.data(), 500 | nInputChannels, 501 | inputHeight, 502 | inputWidth, 503 | pad_size 504 | ); 505 | })); 506 | 507 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] { 508 | 509 | channels_first<<>>( 510 | input2.data(), 511 | rInput2.data(), 512 | nInputChannels, 513 | inputHeight, 514 | inputWidth, 515 | pad_size 516 | ); 517 | })); 518 | 519 | dim3 threadsPerBlock(THREADS_PER_BLOCK); 520 | dim3 totalBlocksCorr(inputHeight, inputWidth, nInputChannels); 521 | 522 | for (int n = 0; n < num; ++n) { 523 | 524 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] { 525 | 526 | 527 | correlation_backward_input1<<>> ( 528 | n, gradInput1.data(), nInputChannels, inputHeight, inputWidth, 529 | gradOutput.data(), nOutputChannels, outputHeight, outputWidth, 530 | rInput2.data(), 531 | pad_size, 532 | kernel_size, 533 | max_displacement, 534 | stride1, 535 | stride2); 536 | })); 537 | } 538 | 539 | for(int n = 0; n < batchSize; n++) { 540 | 541 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(rInput1.type(), "lltm_forward_cuda", ([&] { 542 | 543 | correlation_backward_input2<<>>( 544 | n, gradInput2.data(), nInputChannels, inputHeight, inputWidth, 545 | gradOutput.data(), nOutputChannels, outputHeight, outputWidth, 546 | rInput1.data(), 547 | pad_size, 548 | kernel_size, 549 | max_displacement, 550 | stride1, 551 | stride2); 552 | 553 | })); 554 | } 555 | 556 | // check for errors 557 | cudaError_t err = cudaGetLastError(); 558 | if (err != cudaSuccess) { 559 | printf("error in correlation_backward_cuda_kernel: %s\n", cudaGetErrorString(err)); 560 | return 0; 561 | } 562 | 563 | return 1; 564 | } 565 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int correlation_forward_cuda_kernel(at::Tensor& output, 8 | int ob, 9 | int oc, 10 | int oh, 11 | int ow, 12 | int osb, 13 | int osc, 14 | int osh, 15 | int osw, 16 | 17 | at::Tensor& input1, 18 | int ic, 19 | int ih, 20 | int iw, 21 | int isb, 22 | int isc, 23 | int ish, 24 | int isw, 25 | 26 | at::Tensor& input2, 27 | int gc, 28 | int gsb, 29 | int gsc, 30 | int gsh, 31 | int gsw, 32 | 33 | at::Tensor& rInput1, 34 | at::Tensor& rInput2, 35 | int pad_size, 36 | int kernel_size, 37 | int max_displacement, 38 | int stride1, 39 | int stride2, 40 | int corr_type_multiply, 41 | cudaStream_t stream); 42 | 43 | 44 | int correlation_backward_cuda_kernel( 45 | at::Tensor& gradOutput, 46 | int gob, 47 | int goc, 48 | int goh, 49 | int gow, 50 | int gosb, 51 | int gosc, 52 | int gosh, 53 | int gosw, 54 | 55 | at::Tensor& input1, 56 | int ic, 57 | int ih, 58 | int iw, 59 | int isb, 60 | int isc, 61 | int ish, 62 | int isw, 63 | 64 | at::Tensor& input2, 65 | int gsb, 66 | int gsc, 67 | int gsh, 68 | int gsw, 69 | 70 | at::Tensor& gradInput1, 71 | int gisb, 72 | int gisc, 73 | int gish, 74 | int gisw, 75 | 76 | at::Tensor& gradInput2, 77 | int ggc, 78 | int ggsb, 79 | int ggsc, 80 | int ggsh, 81 | int ggsw, 82 | 83 | at::Tensor& rInput1, 84 | at::Tensor& rInput2, 85 | int pad_size, 86 | int kernel_size, 87 | int max_displacement, 88 | int stride1, 89 | int stride2, 90 | int corr_type_multiply, 91 | cudaStream_t stream); 92 | -------------------------------------------------------------------------------- /models/correlation_package/dist/correlation_cuda-0.0.0-py3.6-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/dist/correlation_cuda-0.0.0-py3.6-linux-x86_64.egg -------------------------------------------------------------------------------- /models/correlation_package/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # Minimum requirements for the build system to execute. 3 | requires = ["setuptools", "wheel", "numpy", "torch==1.0.1.post2"] # PEP 508 specifications. 4 | -------------------------------------------------------------------------------- /models/correlation_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | 4 | import torch 5 | from setuptools import find_packages, setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++14'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=sm_70', 17 | '-gencode', 'arch=compute_80,code=sm_80', 18 | ] 19 | 20 | setup( 21 | name='correlation_cuda', 22 | ext_modules=[ 23 | CUDAExtension('correlation_cuda', [ 24 | 'correlation_cuda.cc', 25 | 'correlation_cuda_kernel.cu' 26 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 27 | ], 28 | cmdclass={ 29 | 'build_ext': BuildExtension 30 | }) 31 | -------------------------------------------------------------------------------- /pictures/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/pictures/input.pdf -------------------------------------------------------------------------------- /pictures/performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/pictures/performance.pdf -------------------------------------------------------------------------------- /pictures/pipline.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/pictures/pipline.pdf -------------------------------------------------------------------------------- /quaternion_distances.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------- 2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab 3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it) 4 | # Released under Creative Commons 5 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ 7 | # ------------------------------------------------------------------- 8 | 9 | # Modified Author: Xudong Lv 10 | # based on github.com/cattaneod/CMRNet/blob/master/quaternion_distances.py 11 | 12 | import numpy as np 13 | import torch 14 | 15 | 16 | def quatmultiply(q, r, device='cpu'): 17 | """ 18 | Batch quaternion multiplication 19 | Args: 20 | q (torch.Tensor/np.ndarray): shape=[Nx4] 21 | r (torch.Tensor/np.ndarray): shape=[Nx4] 22 | device (str): 'cuda' or 'cpu' 23 | 24 | Returns: 25 | torch.Tensor: shape=[Nx4] 26 | """ 27 | if isinstance(q, torch.Tensor): 28 | t = torch.zeros(q.shape[0], 4, device=device) 29 | elif isinstance(q, np.ndarray): 30 | t = np.zeros(q.shape[0], 4) 31 | else: 32 | raise TypeError("Type not supported") 33 | t[:, 0] = r[:, 0] * q[:, 0] - r[:, 1] * q[:, 1] - r[:, 2] * q[:, 2] - r[:, 3] * q[:, 3] 34 | t[:, 1] = r[:, 0] * q[:, 1] + r[:, 1] * q[:, 0] - r[:, 2] * q[:, 3] + r[:, 3] * q[:, 2] 35 | t[:, 2] = r[:, 0] * q[:, 2] + r[:, 1] * q[:, 3] + r[:, 2] * q[:, 0] - r[:, 3] * q[:, 1] 36 | t[:, 3] = r[:, 0] * q[:, 3] - r[:, 1] * q[:, 2] + r[:, 2] * q[:, 1] + r[:, 3] * q[:, 0] 37 | return t 38 | 39 | 40 | def quatinv(q): 41 | """ 42 | Batch quaternion inversion 43 | Args: 44 | q (torch.Tensor/np.ndarray): shape=[Nx4] 45 | 46 | Returns: 47 | torch.Tensor/np.ndarray: shape=[Nx4] 48 | """ 49 | if isinstance(q, torch.Tensor): 50 | t = q.clone() 51 | elif isinstance(q, np.ndarray): 52 | t = q.copy() 53 | else: 54 | raise TypeError("Type not supported") 55 | t *= -1 56 | t[:, 0] *= -1 57 | return t 58 | 59 | 60 | def quaternion_distance(q, r, device): 61 | """ 62 | Batch quaternion distances, used as loss 63 | Args: 64 | q (torch.Tensor): shape=[Nx4] 65 | r (torch.Tensor): shape=[Nx4] 66 | device (str): 'cuda' or 'cpu' 67 | 68 | Returns: 69 | torch.Tensor: shape=[N] 70 | """ 71 | t = quatmultiply(q, quatinv(r), device) 72 | return 2 * torch.atan2(torch.norm(t[:, 1:], dim=1), torch.abs(t[:, 0])) 73 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit_image 2 | git+https://gitlab.com/m1lhaus/blender-mathutils.git 3 | tqdm==4.19.9 4 | pandas 5 | h5py 6 | matplotlib 7 | scipy 8 | pyquaternion 9 | opencv-python 10 | pykitti 11 | numpy 12 | open3d-python==0.7.0.0 13 | Pillow 14 | scikit-image 15 | sacred==0.7.4 16 | -------------------------------------------------------------------------------- /train_with_sacred.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------- 2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab 3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it) 4 | # Released under Creative Commons 5 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ 7 | # ------------------------------------------------------------------- 8 | 9 | # Modified Author: Pengjin Wei 10 | # based on https://github.com/IIPCVLAB/LCCNet/blob/main/train_with_sacred.py 11 | 12 | import math 13 | import os 14 | import random 15 | import time 16 | 17 | # import apex 18 | import mathutils 19 | import numpy as np 20 | import torch 21 | import torch.nn.functional as F 22 | import torch.nn.parallel 23 | import torch.optim as optim 24 | import torch.utils.data 25 | import torch.nn as nn 26 | 27 | from sacred import Experiment 28 | from sacred.utils import apply_backspaces_and_linefeeds 29 | 30 | from DatasetLidarCamera import DatasetLidarCameraKittiOdometry 31 | from models.LCCNet import LCCNet 32 | 33 | from quaternion_distances import quaternion_distance 34 | 35 | from tensorboardX import SummaryWriter 36 | from utils import (mat2xyzrpy, merge_inputs, overlay_imgs, quat2mat, 37 | quaternion_from_matrix, rotate_back, rotate_forward, 38 | tvector2mat) 39 | from sacred import SETTINGS 40 | SETTINGS.CONFIG.READ_ONLY_CONFIG=False 41 | 42 | 43 | torch.backends.cudnn.enabled = True 44 | torch.backends.cudnn.benchmark = False 45 | 46 | ex = Experiment("LCCNet") 47 | ex.captured_out_filter = apply_backspaces_and_linefeeds 48 | 49 | 50 | # noinspection PyUnusedLocal 51 | @ex.config 52 | def config(): 53 | checkpoints = './checkpoints/' 54 | dataset = 'kitti/odom' # 'kitti/raw' 55 | # data_folder = '/home/wangshuo/Datasets/KITTI/odometry/data_odometry_full/' 56 | data_folder = '/mnt/petrelfs/weipengjin/file_trans/LCCNet_dataset/' 57 | use_reflectance = False 58 | val_sequence = 0 59 | epochs = 120 60 | BASE_LEARNING_RATE = 3e-4 # 1e-4 61 | loss = 'CrossEntropy' #'combined' 62 | max_t = 0.2#1.5 # 1.5, 1.0, 0.5, 0.2, 0.1 63 | max_r = 2.0#20. # 20.0, 10.0, 5.0, 2.0, 1.0 64 | batch_size = 240 # 240 65 | num_worker = 12 66 | network = 'Res_f1' 67 | optimizer = 'adam' 68 | resume = False 69 | weights = None #'./pretrained/kitti_iter5.tar' 70 | rescale_rot = 1.0 71 | rescale_transl = 2.0 72 | precision = "O0" 73 | norm = 'bn' 74 | dropout = 0.0 75 | max_depth = 80. 76 | weight_point_cloud = 0.5 77 | log_frequency = 10 78 | print_frequency = 50 79 | starting_epoch = -1 80 | 81 | 82 | # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 83 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 84 | 85 | 86 | EPOCH = 1 87 | def _init_fn(worker_id, seed): 88 | seed = seed + worker_id + EPOCH*100 89 | print(f"Init worker {worker_id} with seed {seed}") 90 | torch.manual_seed(seed) 91 | np.random.seed(seed) 92 | random.seed(seed) 93 | 94 | 95 | def get_2D_lidar_projection(pcl, cam_intrinsic): 96 | pcl_xyz = cam_intrinsic @ pcl.T 97 | pcl_xyz = pcl_xyz.T 98 | pcl_z = pcl_xyz[:, 2] 99 | pcl_xyz = pcl_xyz / (pcl_xyz[:, 2, None] + 1e-10) 100 | pcl_uv = pcl_xyz[:, :2] 101 | 102 | return pcl_uv, pcl_z 103 | 104 | 105 | def lidar_project_depth(pc_rotated, cam_calib, img_shape): 106 | pc_rotated = pc_rotated[:3, :].detach().cpu().numpy() 107 | cam_intrinsic = cam_calib.numpy() 108 | pcl_uv, pcl_z = get_2D_lidar_projection(pc_rotated.T, cam_intrinsic) 109 | mask = (pcl_uv[:, 0] > 0) & (pcl_uv[:, 0] < img_shape[1]) & (pcl_uv[:, 1] > 0) & ( 110 | pcl_uv[:, 1] < img_shape[0]) & (pcl_z > 0) 111 | pcl_uv = pcl_uv[mask] 112 | pcl_z = pcl_z[mask] 113 | pcl_uv = pcl_uv.astype(np.uint32) 114 | pcl_z = pcl_z.reshape(-1, 1) 115 | depth_img = np.zeros((img_shape[0], img_shape[1], 1)) 116 | depth_img[pcl_uv[:, 1], pcl_uv[:, 0]] = pcl_z 117 | depth_img = torch.from_numpy(depth_img.astype(np.float32)) 118 | depth_img = depth_img.cuda() 119 | depth_img = depth_img.permute(2, 0, 1) 120 | 121 | return depth_img, pcl_uv 122 | 123 | 124 | # CCN training 125 | @ex.capture 126 | def train(model, optimizer, rgb_img, refl_img, target_transl, target_rot, loss_fn, point_clouds, loss): 127 | model.train() 128 | 129 | optimizer.zero_grad() 130 | 131 | # Run model 132 | transl_err, rot_err = model(rgb_img, refl_img) 133 | 134 | if loss == 'points_distance' or loss == 'combined': 135 | losses = loss_fn(point_clouds, target_transl, target_rot, transl_err, rot_err) 136 | else: 137 | losses = loss_fn(target_transl, target_rot, transl_err, rot_err) 138 | 139 | losses['total_loss'].backward() 140 | optimizer.step() 141 | 142 | return losses, rot_err, transl_err 143 | 144 | # CNN Classification training 145 | @ex.capture 146 | def train1(model, optimizer, rgb_img, refl_img, label, loss_fn): 147 | model.train() 148 | 149 | optimizer.zero_grad() 150 | 151 | # Run model 152 | classification_results = model(rgb_img, refl_img) 153 | 154 | crossentropyloss_output = loss_fn(classification_results,label) 155 | 156 | crossentropyloss_output.backward() 157 | optimizer.step() 158 | 159 | return crossentropyloss_output, classification_results 160 | 161 | 162 | # CNN test 163 | @ex.capture 164 | def val(model, rgb_img, refl_img, target_transl, target_rot, loss_fn, point_clouds, loss): 165 | model.eval() 166 | 167 | # Run model 168 | with torch.no_grad(): 169 | transl_err, rot_err = model(rgb_img, refl_img) 170 | 171 | if loss == 'points_distance' or loss == 'combined': 172 | losses = loss_fn(point_clouds, target_transl, target_rot, transl_err, rot_err) 173 | else: 174 | losses = loss_fn(target_transl, target_rot, transl_err, rot_err) 175 | 176 | 177 | total_trasl_error = torch.tensor(0.0) 178 | total_rot_error = quaternion_distance(target_rot, rot_err, target_rot.device) 179 | total_rot_error = total_rot_error * 180. / math.pi 180 | for j in range(rgb_img.shape[0]): 181 | total_trasl_error += torch.norm(target_transl[j] - transl_err[j]) * 100. 182 | 183 | 184 | return losses, total_trasl_error.item(), total_rot_error.sum().item(), rot_err, transl_err 185 | 186 | # CNN test 187 | @ex.capture 188 | def val1(model, optimizer, rgb_img, refl_img, label, loss_fn): 189 | model.eval() 190 | 191 | # Run model 192 | with torch.no_grad(): 193 | classification_results = model(rgb_img, refl_img) 194 | 195 | crossentropyloss_output = loss_fn(classification_results,label) 196 | 197 | return crossentropyloss_output, classification_results 198 | 199 | @ex.automain 200 | def main(_config, _run, seed): 201 | global EPOCH 202 | print('Loss Function Choice: {}'.format(_config['loss'])) 203 | 204 | if _config['val_sequence'] is None: 205 | raise TypeError('val_sequences cannot be None') 206 | else: 207 | _config['val_sequence'] = f"{_config['val_sequence']:02d}" 208 | print("Val Sequence: ", _config['val_sequence']) 209 | dataset_class = DatasetLidarCameraKittiOdometry 210 | img_shape = (384, 1280) # 网络的输入尺度 211 | input_size = (256, 512) 212 | _config["checkpoints"] = os.path.join(_config["checkpoints"], _config['dataset']) 213 | 214 | dataset_train = dataset_class(_config['data_folder'], max_r=_config['max_r'], max_t=_config['max_t'], 215 | split='train', use_reflectance=_config['use_reflectance'], 216 | val_sequence=_config['val_sequence']) 217 | dataset_val = dataset_class(_config['data_folder'], max_r=_config['max_r'], max_t=_config['max_t'], 218 | split='val', use_reflectance=_config['use_reflectance'], 219 | val_sequence=_config['val_sequence']) 220 | model_savepath = os.path.join(_config['checkpoints'], 'val_seq_' + _config['val_sequence'], 'models') 221 | if not os.path.exists(model_savepath): 222 | os.makedirs(model_savepath) 223 | log_savepath = os.path.join(_config['checkpoints'], 'val_seq_' + _config['val_sequence'], 'log') 224 | if not os.path.exists(log_savepath): 225 | os.makedirs(log_savepath) 226 | train_writer = SummaryWriter(os.path.join(log_savepath, 'train')) 227 | val_writer = SummaryWriter(os.path.join(log_savepath, 'val')) 228 | 229 | np.random.seed(seed) 230 | torch.random.manual_seed(seed) 231 | 232 | def init_fn(x): return _init_fn(x, seed) 233 | 234 | train_dataset_size = len(dataset_train) 235 | val_dataset_size = len(dataset_val) 236 | print('Number of the train dataset: {}'.format(train_dataset_size)) 237 | print('Number of the val dataset: {}'.format(val_dataset_size)) 238 | 239 | # Training and validation set creation 240 | num_worker = _config['num_worker'] 241 | batch_size = _config['batch_size'] 242 | TrainImgLoader = torch.utils.data.DataLoader(dataset=dataset_train, 243 | shuffle=True, 244 | batch_size=batch_size, 245 | num_workers=num_worker, 246 | worker_init_fn=init_fn, 247 | collate_fn=merge_inputs, 248 | drop_last=False, 249 | pin_memory=True) 250 | 251 | ValImgLoader = torch.utils.data.DataLoader(dataset=dataset_val, 252 | shuffle=False, 253 | batch_size=batch_size, 254 | num_workers=num_worker, 255 | worker_init_fn=init_fn, 256 | collate_fn=merge_inputs, 257 | drop_last=False, 258 | pin_memory=True) 259 | 260 | print(len(TrainImgLoader)) 261 | print(len(ValImgLoader)) 262 | 263 | if _config['loss'] == 'CrossEntropy': 264 | loss_fn = nn.CrossEntropyLoss() 265 | else: 266 | raise ValueError("Unknown Loss Function") 267 | 268 | # network choice and settings 269 | if _config['network'].startswith('Res'): 270 | feat = 1 271 | md = 4 272 | split = _config['network'].split('_') 273 | for item in split[1:]: 274 | if item.startswith('f'): 275 | feat = int(item[-1]) 276 | elif item.startswith('md'): 277 | md = int(item[2:]) 278 | assert 0 < feat < 7, "Feature Number from PWC have to be between 1 and 6" 279 | assert 0 < md, "md must be positive" 280 | model = LCCNet(input_size, use_feat_from=feat, md=md, 281 | use_reflectance=_config['use_reflectance'], dropout=_config['dropout'], 282 | Action_Func='leakyrelu', attention=False, res_num=18) 283 | else: 284 | raise TypeError("Network unknown") 285 | if _config['weights'] is not None: 286 | print(f"Loading weights from {_config['weights']}") 287 | checkpoint = torch.load(_config['weights'], map_location='cpu') 288 | saved_state_dict = checkpoint['state_dict'] 289 | model.load_state_dict(saved_state_dict, strict=False) 290 | 291 | # model = model.to(device) 292 | model = nn.DataParallel(model) 293 | model = model.cuda() 294 | 295 | print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) 296 | 297 | parameters = list(filter(lambda p: p.requires_grad, model.parameters())) 298 | if _config['loss'] == 'geometric': 299 | parameters += list(loss_fn.parameters()) 300 | if _config['optimizer'] == 'adam': 301 | optimizer = optim.Adam(parameters, lr=_config['BASE_LEARNING_RATE'], weight_decay=5e-6) 302 | # Probably this scheduler is not used 303 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 50, 70], gamma=0.5) 304 | else: 305 | optimizer = optim.SGD(parameters, lr=_config['BASE_LEARNING_RATE'], momentum=0.9, 306 | weight_decay=5e-6, nesterov=True) 307 | 308 | starting_epoch = _config['starting_epoch'] 309 | if _config['weights'] is not None and _config['resume']: 310 | checkpoint = torch.load(_config['weights'], map_location='cpu') 311 | opt_state_dict = checkpoint['optimizer'] 312 | optimizer.load_state_dict(opt_state_dict) 313 | if starting_epoch != 0: 314 | starting_epoch = checkpoint['epoch'] 315 | 316 | 317 | start_full_time = time.time() 318 | BEST_VAL_LOSS = 10000. 319 | old_save_filename = None 320 | 321 | train_iter = 0 322 | val_iter = 0 323 | for epoch in range(starting_epoch, _config['epochs'] + 1): 324 | EPOCH = epoch 325 | print('This is %d-th epoch' % epoch) 326 | epoch_start_time = time.time() 327 | total_train_loss = 0 328 | local_loss = 0. 329 | if _config['optimizer'] != 'adam': 330 | _run.log_scalar("LR", _config['BASE_LEARNING_RATE'] * 331 | math.exp((1 - epoch) * 4e-2), epoch) 332 | for param_group in optimizer.param_groups: 333 | param_group['lr'] = _config['BASE_LEARNING_RATE'] * \ 334 | math.exp((1 - epoch) * 4e-2) 335 | else: 336 | #scheduler.step(epoch%100) 337 | _run.log_scalar("LR", scheduler.get_lr()[0]) 338 | 339 | 340 | ## Training ## 341 | time_for_50ep = time.time() 342 | for batch_idx, sample in enumerate(TrainImgLoader): 343 | #print(f'batch {batch_idx+1}/{len(TrainImgLoader)}', end='\r') 344 | start_time = time.time() 345 | lidar_input = [] 346 | rgb_input = [] 347 | lidar_gt = [] 348 | shape_pad_input = [] 349 | real_shape_input = [] 350 | pc_rotated_input = [] 351 | 352 | # gt pose 353 | sample['tr_error'] = sample['tr_error'].cuda() 354 | sample['rot_error'] = sample['rot_error'].cuda() 355 | 356 | sample['label_nag'] = torch.from_numpy(np.array(sample['label_nag'])).cuda() 357 | sample['label_pos'] = torch.from_numpy(np.array(sample['label_pos'])).cuda() 358 | 359 | sample['label'] = sample['label'].cuda() 360 | 361 | start_preprocess = time.time() 362 | for idx in range(len(sample['rgb'])): 363 | # ProjectPointCloud in RT-pose 364 | real_shape = [sample['rgb'][idx].shape[1], sample['rgb'][idx].shape[2], sample['rgb'][idx].shape[0]] 365 | 366 | sample['point_cloud'][idx] = sample['point_cloud'][idx].cuda() # 变换到相机坐标系下的激光雷达点云 367 | pc_lidar = sample['point_cloud'][idx].clone() 368 | 369 | if _config['max_depth'] < 80.: 370 | pc_lidar = pc_lidar[:, pc_lidar[0, :] < _config['max_depth']].clone() 371 | 372 | depth_gt, uv = lidar_project_depth(pc_lidar, sample['calib'][idx], real_shape) # image_shape 373 | depth_gt /= _config['max_depth'] 374 | 375 | R = mathutils.Quaternion(sample['rot_error'][idx]).to_matrix() 376 | R.resize_4x4() 377 | T = mathutils.Matrix.Translation(sample['tr_error'][idx]) 378 | RT = T * R 379 | 380 | pc_rotated = rotate_back(sample['point_cloud'][idx], RT) # Pc` = RT * Pc 381 | 382 | if _config['max_depth'] < 80.: 383 | pc_rotated = pc_rotated[:, pc_rotated[0, :] < _config['max_depth']].clone() 384 | 385 | depth_img, uv = lidar_project_depth(pc_rotated, sample['calib'][idx], real_shape) # image_shape 386 | depth_img /= _config['max_depth'] 387 | 388 | # PAD ONLY ON RIGHT AND BOTTOM SIDE 389 | rgb = sample['rgb'][idx].cuda() 390 | shape_pad = [0, 0, 0, 0] 391 | 392 | shape_pad[3] = (img_shape[0] - rgb.shape[1]) # // 2 393 | shape_pad[1] = (img_shape[1] - rgb.shape[2]) # // 2 + 1 394 | 395 | rgb = F.pad(rgb, shape_pad) 396 | depth_img = F.pad(depth_img, shape_pad) 397 | depth_gt = F.pad(depth_gt, shape_pad) 398 | 399 | rgb_input.append(rgb) 400 | lidar_input.append(depth_img) 401 | lidar_gt.append(depth_gt) 402 | real_shape_input.append(real_shape) 403 | shape_pad_input.append(shape_pad) 404 | pc_rotated_input.append(pc_rotated) 405 | 406 | lidar_input = torch.stack(lidar_input) 407 | rgb_input = torch.stack(rgb_input) 408 | rgb_show = rgb_input.clone() 409 | lidar_show = lidar_input.clone() 410 | rgb_input = F.interpolate(rgb_input, size=[256, 512], mode="bilinear") 411 | lidar_input = F.interpolate(lidar_input, size=[256, 512], mode="bilinear") 412 | end_preprocess = time.time() 413 | 414 | loss, predict_label = train1(model, optimizer, rgb_input, lidar_input, sample['label'], loss_fn) 415 | 416 | _, classify_reaults = torch.max(predict_label,dim=1) 417 | statistic_results = torch.eq(classify_reaults,sample['label']) 418 | 419 | if batch_idx % _config['log_frequency'] == 0: 420 | train_writer.add_scalar("loss", loss.item(), train_iter) 421 | local_loss += loss.item() 422 | 423 | if batch_idx % 50 == 0 and batch_idx != 0: 424 | 425 | print(f'Iter {batch_idx}/{len(TrainImgLoader)} training loss = {local_loss/50:.3f}, ' 426 | f'time = {(time.time() - start_time)/lidar_input.shape[0]:.4f}, ' 427 | #f'time_preprocess = {(end_preprocess-start_preprocess)/lidar_input.shape[0]:.4f}, ' 428 | f'time for 50 iter: {time.time()-time_for_50ep:.4f}') 429 | time_for_50ep = time.time() 430 | _run.log_scalar("Loss", local_loss/50, train_iter) 431 | local_loss = 0. 432 | # total_train_loss += loss['total_loss'].item() * len(sample['rgb']) 433 | total_train_loss += loss.item() * len(sample['rgb']) 434 | train_iter += 1 435 | # total_iter += len(sample['rgb']) 436 | 437 | print("------------------------------------") 438 | print('epoch %d total training loss = %.3f' % (epoch, total_train_loss / len(dataset_train))) 439 | print('Total epoch time = %.2f' % (time.time() - epoch_start_time)) 440 | print("------------------------------------") 441 | _run.log_scalar("Total training loss", total_train_loss / len(dataset_train), epoch) 442 | 443 | ## Validation ## 444 | total_val_loss = 0. 445 | total_val_t = 0. 446 | total_val_r = 0. 447 | 448 | local_loss = 0.0 449 | correct_samples = 0 450 | wrong_samples = 0 451 | for batch_idx, sample in enumerate(ValImgLoader): 452 | #print(f'batch {batch_idx+1}/{len(TrainImgLoader)}', end='\r') 453 | start_time = time.time() 454 | lidar_input = [] 455 | rgb_input = [] 456 | lidar_gt = [] 457 | shape_pad_input = [] 458 | real_shape_input = [] 459 | pc_rotated_input = [] 460 | 461 | # gt pose 462 | sample['tr_error'] = sample['tr_error'].cuda() 463 | sample['rot_error'] = sample['rot_error'].cuda() 464 | sample['label'] = sample['label'].cuda() 465 | sample['label_nag'] = torch.from_numpy(np.array(sample['label_nag'])).cuda() 466 | sample['label_pos'] = torch.from_numpy(np.array(sample['label_pos'])).cuda() 467 | 468 | for idx in range(len(sample['rgb'])): 469 | # ProjectPointCloud in RT-pose 470 | real_shape = [sample['rgb'][idx].shape[1], sample['rgb'][idx].shape[2], sample['rgb'][idx].shape[0]] 471 | 472 | sample['point_cloud'][idx] = sample['point_cloud'][idx].cuda() # 变换到相机坐标系下的激光雷达点云 473 | pc_lidar = sample['point_cloud'][idx].clone() 474 | 475 | if _config['max_depth'] < 80.: 476 | pc_lidar = pc_lidar[:, pc_lidar[0, :] < _config['max_depth']].clone() 477 | 478 | depth_gt, uv = lidar_project_depth(pc_lidar, sample['calib'][idx], real_shape) # image_shape 479 | depth_gt /= _config['max_depth'] 480 | 481 | reflectance = None 482 | if _config['use_reflectance']: 483 | reflectance = sample['reflectance'][idx].cuda() 484 | 485 | R = mathutils.Quaternion(sample['rot_error'][idx]).to_matrix() 486 | R.resize_4x4() 487 | T = mathutils.Matrix.Translation(sample['tr_error'][idx]) 488 | RT = T * R 489 | 490 | pc_rotated = rotate_back(sample['point_cloud'][idx], RT) # Pc` = RT * Pc 491 | 492 | if _config['max_depth'] < 80.: 493 | pc_rotated = pc_rotated[:, pc_rotated[0, :] < _config['max_depth']].clone() 494 | 495 | depth_img, uv = lidar_project_depth(pc_rotated, sample['calib'][idx], real_shape) # image_shape 496 | depth_img /= _config['max_depth'] 497 | 498 | if _config['use_reflectance']: 499 | refl_img = None 500 | 501 | # PAD ONLY ON RIGHT AND BOTTOM SIDE 502 | rgb = sample['rgb'][idx].cuda() 503 | shape_pad = [0, 0, 0, 0] 504 | 505 | shape_pad[3] = (img_shape[0] - rgb.shape[1]) # // 2 506 | shape_pad[1] = (img_shape[1] - rgb.shape[2]) # // 2 + 1 507 | 508 | rgb = F.pad(rgb, shape_pad) 509 | depth_img = F.pad(depth_img, shape_pad) 510 | depth_gt = F.pad(depth_gt, shape_pad) 511 | 512 | rgb_input.append(rgb) 513 | lidar_input.append(depth_img) 514 | lidar_gt.append(depth_gt) 515 | real_shape_input.append(real_shape) 516 | shape_pad_input.append(shape_pad) 517 | pc_rotated_input.append(pc_rotated) 518 | 519 | lidar_input = torch.stack(lidar_input) 520 | rgb_input = torch.stack(rgb_input) 521 | rgb_show = rgb_input.clone() 522 | lidar_show = lidar_input.clone() 523 | rgb_input = F.interpolate(rgb_input, size=[256, 512], mode="bilinear") 524 | lidar_input = F.interpolate(lidar_input, size=[256, 512], mode="bilinear") 525 | 526 | loss, predict_label = val1(model, optimizer, rgb_input, lidar_input, sample['label'].long(), loss_fn) 527 | _, classify_reaults = torch.max(predict_label,dim=1) 528 | statistic_results = torch.eq(classify_reaults,sample['label'].long()) 529 | correct_samples += torch.sum(statistic_results, dim=0) 530 | wrong_samples += statistic_results.shape[0] - torch.sum(statistic_results, dim=0) 531 | 532 | total_val_loss += loss.item() * len(sample['rgb']) 533 | val_iter += 1 534 | 535 | print("------------------------------------") 536 | print('total val loss = %.3f' % (total_val_loss / len(dataset_val))) 537 | print(f'total traslation error: {total_val_t / len(dataset_val)} cm') 538 | print(f'total rotation error: {total_val_r / len(dataset_val)} °') 539 | print("total accuracy:",float(correct_samples)/float(correct_samples + wrong_samples)) 540 | print("------------------------------------") 541 | 542 | _run.log_scalar("Val_Loss", total_val_loss / len(dataset_val), epoch) 543 | _run.log_scalar("Val_t_error", total_val_t / len(dataset_val), epoch) 544 | _run.log_scalar("Val_r_error", total_val_r / len(dataset_val), epoch) 545 | 546 | # SAVE 547 | val_loss = total_val_loss / len(dataset_val) 548 | if val_loss < BEST_VAL_LOSS: 549 | BEST_VAL_LOSS = val_loss 550 | #_run.result = BEST_VAL_LOSS 551 | if _config['rescale_transl'] > 0: 552 | _run.result = total_val_t / len(dataset_val) 553 | else: 554 | _run.result = total_val_r / len(dataset_val) 555 | savefilename = f'{model_savepath}/checkpoint_r{_config["max_r"]:.2f}_t{_config["max_t"]:.2f}_e{epoch}_{val_loss:.3f}.tar' 556 | torch.save({ 557 | 'config': _config, 558 | 'epoch': epoch, 559 | # 'state_dict': model.state_dict(), # single gpu 560 | 'state_dict': model.module.state_dict(), # multi gpu 561 | 'optimizer': optimizer.state_dict(), 562 | 'train_loss': total_train_loss / len(dataset_train), 563 | 'val_loss': total_val_loss / len(dataset_val), 564 | }, savefilename) 565 | print(f'Model saved as {savefilename}') 566 | if old_save_filename is not None: 567 | if os.path.exists(old_save_filename): 568 | os.remove(old_save_filename) 569 | old_save_filename = savefilename 570 | 571 | print('full training time = %.2f HR' % ((time.time() - start_full_time) / 3600)) 572 | return _run.result 573 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------- 2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab 3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it) 4 | # Released under Creative Commons 5 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ 7 | # ------------------------------------------------------------------- 8 | 9 | # Modified Author: Xudong Lv 10 | # based on github.com/cattaneod/CMRNet/blob/master/utils.py 11 | 12 | import math 13 | 14 | import mathutils 15 | import numpy as np 16 | import torch 17 | import torch.nn.functional as F 18 | from matplotlib import cm 19 | from torch.utils.data.dataloader import default_collate 20 | 21 | 22 | def rotate_points(PC, R, T=None, inverse=True): 23 | if T is not None: 24 | R = R.to_matrix() 25 | R.resize_4x4() 26 | T = mathutils.Matrix.Translation(T) 27 | RT = T*R 28 | else: 29 | RT=R.copy() 30 | if inverse: 31 | RT.invert_safe() 32 | RT = torch.tensor(RT, device=PC.device, dtype=torch.float) 33 | 34 | if PC.shape[0] == 4: 35 | PC = torch.mm(RT, PC) 36 | elif PC.shape[1] == 4: 37 | PC = torch.mm(RT, PC.t()) 38 | PC = PC.t() 39 | else: 40 | raise TypeError("Point cloud must have shape [Nx4] or [4xN] (homogeneous coordinates)") 41 | return PC 42 | 43 | 44 | def rotate_points_torch(PC, R, T=None, inverse=True): 45 | if T is not None: 46 | R = quat2mat(R) 47 | T = tvector2mat(T) 48 | RT = torch.mm(T, R) 49 | else: 50 | RT = R.clone() 51 | if inverse: 52 | RT = RT.inverse() 53 | 54 | if PC.shape[0] == 4: 55 | PC = torch.mm(RT, PC) 56 | elif PC.shape[1] == 4: 57 | PC = torch.mm(RT, PC.t()) 58 | PC = PC.t() 59 | else: 60 | raise TypeError("Point cloud must have shape [Nx4] or [4xN] (homogeneous coordinates)") 61 | return PC 62 | 63 | 64 | def rotate_forward(PC, R, T=None): 65 | """ 66 | Transform the point cloud PC, so to have the points 'as seen from' the new 67 | pose T*R 68 | Args: 69 | PC (torch.Tensor): Point Cloud to be transformed, shape [4xN] or [Nx4] 70 | R (torch.Tensor/mathutils.Euler): can be either: 71 | * (mathutils.Euler) euler angles of the rotation part, in this case T cannot be None 72 | * (torch.Tensor shape [4]) quaternion representation of the rotation part, in this case T cannot be None 73 | * (mathutils.Matrix shape [4x4]) Rotation matrix, 74 | in this case it should contains the translation part, and T should be None 75 | * (torch.Tensor shape [4x4]) Rotation matrix, 76 | in this case it should contains the translation part, and T should be None 77 | T (torch.Tensor/mathutils.Vector): Translation of the new pose, shape [3], or None (depending on R) 78 | 79 | Returns: 80 | torch.Tensor: Transformed Point Cloud 'as seen from' pose T*R 81 | """ 82 | if isinstance(R, torch.Tensor): 83 | return rotate_points_torch(PC, R, T, inverse=True) 84 | else: 85 | return rotate_points(PC, R, T, inverse=True) 86 | 87 | 88 | def rotate_back(PC_ROTATED, R, T=None): 89 | """ 90 | Inverse of :func:`~utils.rotate_forward`. 91 | """ 92 | if isinstance(R, torch.Tensor): 93 | return rotate_points_torch(PC_ROTATED, R, T, inverse=False) 94 | else: 95 | return rotate_points(PC_ROTATED, R, T, inverse=False) 96 | 97 | 98 | def invert_pose(R, T): 99 | """ 100 | Given the 'sampled pose' (aka H_init), we want CMRNet to predict inv(H_init). 101 | inv(T*R) will be used as ground truth for the network. 102 | Args: 103 | R (mathutils.Euler): Rotation of 'sampled pose' 104 | T (mathutils.Vector): Translation of 'sampled pose' 105 | 106 | Returns: 107 | (R_GT, T_GT) = (mathutils.Quaternion, mathutils.Vector) 108 | """ 109 | R = R.to_matrix() 110 | R.resize_4x4() 111 | T = mathutils.Matrix.Translation(T) 112 | RT = T * R 113 | RT.invert_safe() 114 | T_GT, R_GT, _ = RT.decompose() 115 | return R_GT.normalized(), T_GT 116 | 117 | 118 | def merge_inputs(queries): 119 | point_clouds = [] 120 | imgs = [] 121 | reflectances = [] 122 | returns = {key: default_collate([d[key] for d in queries]) for key in queries[0] 123 | if key != 'point_cloud' and key != 'rgb' and key != 'reflectance'} 124 | for input in queries: 125 | point_clouds.append(input['point_cloud']) 126 | imgs.append(input['rgb']) 127 | if 'reflectance' in input: 128 | reflectances.append(input['reflectance']) 129 | returns['point_cloud'] = point_clouds 130 | returns['rgb'] = imgs 131 | if len(reflectances) > 0: 132 | returns['reflectance'] = reflectances 133 | return returns 134 | 135 | 136 | def quaternion_from_matrix(matrix): 137 | """ 138 | Convert a rotation matrix to quaternion. 139 | Args: 140 | matrix (torch.Tensor): [4x4] transformation matrix or [3,3] rotation matrix. 141 | 142 | Returns: 143 | torch.Tensor: shape [4], normalized quaternion 144 | """ 145 | if matrix.shape == (4, 4): 146 | R = matrix[:-1, :-1] 147 | elif matrix.shape == (3, 3): 148 | R = matrix 149 | else: 150 | raise TypeError("Not a valid rotation matrix") 151 | tr = R[0, 0] + R[1, 1] + R[2, 2] 152 | q = torch.zeros(4, device=matrix.device) 153 | if tr > 0.: 154 | S = (tr+1.0).sqrt() * 2 155 | q[0] = 0.25 * S 156 | q[1] = (R[2, 1] - R[1, 2]) / S 157 | q[2] = (R[0, 2] - R[2, 0]) / S 158 | q[3] = (R[1, 0] - R[0, 1]) / S 159 | elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]: 160 | S = (1.0 + R[0, 0] - R[1, 1] - R[2, 2]).sqrt() * 2 161 | q[0] = (R[2, 1] - R[1, 2]) / S 162 | q[1] = 0.25 * S 163 | q[2] = (R[0, 1] + R[1, 0]) / S 164 | q[3] = (R[0, 2] + R[2, 0]) / S 165 | elif R[1, 1] > R[2, 2]: 166 | S = (1.0 + R[1, 1] - R[0, 0] - R[2, 2]).sqrt() * 2 167 | q[0] = (R[0, 2] - R[2, 0]) / S 168 | q[1] = (R[0, 1] + R[1, 0]) / S 169 | q[2] = 0.25 * S 170 | q[3] = (R[1, 2] + R[2, 1]) / S 171 | else: 172 | S = (1.0 + R[2, 2] - R[0, 0] - R[1, 1]).sqrt() * 2 173 | q[0] = (R[1, 0] - R[0, 1]) / S 174 | q[1] = (R[0, 2] + R[2, 0]) / S 175 | q[2] = (R[1, 2] + R[2, 1]) / S 176 | q[3] = 0.25 * S 177 | return q / q.norm() 178 | 179 | 180 | def quatmultiply(q, r): 181 | """ 182 | Multiply two quaternions 183 | Args: 184 | q (torch.Tensor/nd.ndarray): shape=[4], first quaternion 185 | r (torch.Tensor/nd.ndarray): shape=[4], second quaternion 186 | 187 | Returns: 188 | torch.Tensor: shape=[4], normalized quaternion q*r 189 | """ 190 | t = torch.zeros(4, device=q.device) 191 | t[0] = r[0] * q[0] - r[1] * q[1] - r[2] * q[2] - r[3] * q[3] 192 | t[1] = r[0] * q[1] + r[1] * q[0] - r[2] * q[3] + r[3] * q[2] 193 | t[2] = r[0] * q[2] + r[1] * q[3] + r[2] * q[0] - r[3] * q[1] 194 | t[3] = r[0] * q[3] - r[1] * q[2] + r[2] * q[1] + r[3] * q[0] 195 | return t / t.norm() 196 | 197 | 198 | def quat2mat(q): 199 | """ 200 | Convert a quaternion to a rotation matrix 201 | Args: 202 | q (torch.Tensor): shape [4], input quaternion 203 | 204 | Returns: 205 | torch.Tensor: [4x4] homogeneous rotation matrix 206 | """ 207 | assert q.shape == torch.Size([4]), "Not a valid quaternion" 208 | if q.norm() != 1.: 209 | q = q / q.norm() 210 | mat = torch.zeros((4, 4), device=q.device) 211 | mat[0, 0] = 1 - 2*q[2]**2 - 2*q[3]**2 212 | mat[0, 1] = 2*q[1]*q[2] - 2*q[3]*q[0] 213 | mat[0, 2] = 2*q[1]*q[3] + 2*q[2]*q[0] 214 | mat[1, 0] = 2*q[1]*q[2] + 2*q[3]*q[0] 215 | mat[1, 1] = 1 - 2*q[1]**2 - 2*q[3]**2 216 | mat[1, 2] = 2*q[2]*q[3] - 2*q[1]*q[0] 217 | mat[2, 0] = 2*q[1]*q[3] - 2*q[2]*q[0] 218 | mat[2, 1] = 2*q[2]*q[3] + 2*q[1]*q[0] 219 | mat[2, 2] = 1 - 2*q[1]**2 - 2*q[2]**2 220 | mat[3, 3] = 1. 221 | return mat 222 | 223 | 224 | def tvector2mat(t): 225 | """ 226 | Translation vector to homogeneous transformation matrix with identity rotation 227 | Args: 228 | t (torch.Tensor): shape=[3], translation vector 229 | 230 | Returns: 231 | torch.Tensor: [4x4] homogeneous transformation matrix 232 | 233 | """ 234 | assert t.shape == torch.Size([3]), "Not a valid translation" 235 | mat = torch.eye(4, device=t.device) 236 | mat[0, 3] = t[0] 237 | mat[1, 3] = t[1] 238 | mat[2, 3] = t[2] 239 | return mat 240 | 241 | 242 | def mat2xyzrpy(rotmatrix): 243 | """ 244 | Decompose transformation matrix into components 245 | Args: 246 | rotmatrix (torch.Tensor/np.ndarray): [4x4] transformation matrix 247 | 248 | Returns: 249 | torch.Tensor: shape=[6], contains xyzrpy 250 | """ 251 | roll = math.atan2(-rotmatrix[1, 2], rotmatrix[2, 2]) 252 | pitch = math.asin ( rotmatrix[0, 2]) 253 | yaw = math.atan2(-rotmatrix[0, 1], rotmatrix[0, 0]) 254 | x = rotmatrix[:3, 3][0] 255 | y = rotmatrix[:3, 3][1] 256 | z = rotmatrix[:3, 3][2] 257 | 258 | return torch.tensor([x, y, z, roll, pitch, yaw], device=rotmatrix.device, dtype=rotmatrix.dtype) 259 | 260 | 261 | def to_rotation_matrix(R, T): 262 | R = quat2mat(R) 263 | T = tvector2mat(T) 264 | RT = torch.mm(T, R) 265 | return RT 266 | 267 | 268 | def overlay_imgs(rgb, lidar, idx=0): 269 | std = [0.229, 0.224, 0.225] 270 | mean = [0.485, 0.456, 0.406] 271 | 272 | rgb = rgb.clone().cpu().permute(1,2,0).numpy() 273 | rgb = rgb*std+mean 274 | lidar = lidar.clone() 275 | 276 | lidar[lidar == 0] = 1000. 277 | lidar = -lidar 278 | #lidar = F.max_pool2d(lidar, 3, 1, 1) 279 | lidar = F.max_pool2d(lidar, 3, 1, 1) 280 | lidar = -lidar 281 | lidar[lidar == 1000.] = 0. 282 | 283 | #lidar = lidar.squeeze() 284 | lidar = lidar[0][0] 285 | lidar = (lidar*255).int().cpu().numpy() 286 | lidar_color = cm.jet(lidar) 287 | lidar_color[:, :, 3] = 0.5 288 | lidar_color[lidar == 0] = [0, 0, 0, 0] 289 | blended_img = lidar_color[:, :, :3] * (np.expand_dims(lidar_color[:, :, 3], 2)) + \ 290 | rgb * (1. - np.expand_dims(lidar_color[:, :, 3], 2)) 291 | blended_img = blended_img.clip(min=0., max=1.) 292 | #io.imshow(blended_img) 293 | #io.show() 294 | #plt.figure() 295 | #plt.imshow(blended_img) 296 | #io.imsave(f'./IMGS/{idx:06d}.png', blended_img) 297 | return blended_img 298 | --------------------------------------------------------------------------------