├── .gitignore
├── DatasetLidarCamera.py
├── README.md
├── evaluate_calib.py
├── models
    ├── LCCNet.py
    ├── __pycache__
    │   └── LCCNet.cpython-36.pyc
    └── correlation_package
    │   ├── LICENSE
    │   ├── __pycache__
    │       └── correlation.cpython-36.pyc
    │   ├── build
    │       ├── lib.linux-x86_64-3.6
    │       │   └── correlation_cuda.cpython-36m-x86_64-linux-gnu.so
    │       └── temp.linux-x86_64-3.6
    │       │   ├── correlation_cuda.o
    │       │   └── correlation_cuda_kernel.o
    │   ├── correlation.py
    │   ├── correlation_cuda.cc
    │   ├── correlation_cuda.egg-info
    │       ├── PKG-INFO
    │       ├── SOURCES.txt
    │       ├── dependency_links.txt
    │       └── top_level.txt
    │   ├── correlation_cuda_kernel.cu
    │   ├── correlation_cuda_kernel.cuh
    │   ├── dist
    │       └── correlation_cuda-0.0.0-py3.6-linux-x86_64.egg
    │   ├── pyproject.toml
    │   └── setup.py
├── pictures
    ├── input.pdf
    ├── performance.pdf
    └── pipline.pdf
├── quaternion_distances.py
├── requirements.txt
├── train_with_sacred.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoints/
2 | output/
3 | **/__pycache__
4 | results*


--------------------------------------------------------------------------------
/DatasetLidarCamera.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------------------------
  2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab
  3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it)
  4 | # Released under Creative Commons
  5 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
  6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/
  7 | # -------------------------------------------------------------------
  8 | 
  9 | # Modified Author: Xudong Lv
 10 | # based on github.com/cattaneod/CMRNet/blob/master/DatasetVisibilityKitti.py
 11 | 
 12 | import csv
 13 | import os
 14 | import random
 15 | import os.path as osp
 16 | from math import radians
 17 | import cv2
 18 | 
 19 | import h5py
 20 | import mathutils
 21 | import numpy as np
 22 | import pandas as pd
 23 | import torch
 24 | import torchvision.transforms.functional as TTF
 25 | from PIL import Image
 26 | from torch.utils.data import Dataset
 27 | from torchvision import transforms
 28 | 
 29 | from utils import invert_pose, rotate_forward, quaternion_from_matrix #, read_calib_file
 30 | from pykitti import odometry
 31 | import pykitti
 32 | from secrets import choice
 33 | 
 34 | class DatasetLidarCameraKittiOdometry(Dataset):
 35 | 
 36 |     def __init__(self, dataset_dir, transform=None, augmentation=False, use_reflectance=False,
 37 |                  max_t=1.5, max_r=20., split='val', device='cpu', val_sequence='00', suf='.png'):
 38 |         super(DatasetLidarCameraKittiOdometry, self).__init__()
 39 |         self.use_reflectance = use_reflectance
 40 |         self.maps_folder = ''
 41 |         self.device = device
 42 |         self.max_r = max_r
 43 |         self.max_t = max_t
 44 |         self.augmentation = augmentation
 45 |         self.root_dir = dataset_dir
 46 |         self.transform = transform
 47 |         self.split = split
 48 |         self.GTs_R = {}
 49 |         self.GTs_T = {}
 50 |         self.GTs_T_cam02_velo = {}
 51 |         self.K = {}
 52 |         self.suf = suf
 53 | 
 54 |         self.all_files = []
 55 |         self.sequence_list = ['00','01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
 56 |                               '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21']
 57 |         # self.sequence_list = ['00', '01', '02']        
 58 |         # self.model = CameraModel()
 59 |         # self.model.focal_length = [7.18856e+02, 7.18856e+02]
 60 |         # self.model.principal_point = [6.071928e+02, 1.852157e+02]
 61 |         # for seq in ['00', '03', '05', '06', '07', '08', '09']:
 62 |         for seq in self.sequence_list:
 63 |             odom = odometry(self.root_dir, seq)
 64 |             calib = odom.calib
 65 |             T_cam02_velo_np = calib.T_cam2_velo #gt pose from cam02 to velo_lidar (T_cam02_velo: 4x4)
 66 |             self.K[seq] = calib.K_cam2 # 3x3
 67 |             # T_cam02_velo = torch.from_numpy(T_cam02_velo_np)
 68 |             # GT_R = quaternion_from_matrix(T_cam02_velo[:3, :3])
 69 |             # GT_T = T_cam02_velo[3:, :3]
 70 |             # self.GTs_R[seq] = GT_R # GT_R = np.array([row['qw'], row['qx'], row['qy'], row['qz']])
 71 |             # self.GTs_T[seq] = GT_T # GT_T = np.array([row['x'], row['y'], row['z']])
 72 |             self.GTs_T_cam02_velo[seq] = T_cam02_velo_np #gt pose from cam02 to velo_lidar (T_cam02_velo: 4x4)
 73 | 
 74 |             image_list = os.listdir(os.path.join(dataset_dir, 'sequences', seq, 'image_2'))
 75 |             image_list.sort()
 76 | 
 77 |             for image_name in image_list:
 78 |                 if not os.path.exists(os.path.join(dataset_dir, 'sequences', seq, 'velodyne',
 79 |                                                    str(image_name.split('.')[0])+'.bin')):
 80 |                     continue
 81 |                 if not os.path.exists(os.path.join(dataset_dir, 'sequences', seq, 'image_2',
 82 |                                                    str(image_name.split('.')[0])+suf)):
 83 |                     continue
 84 |                 if seq == val_sequence:
 85 |                     if split.startswith('val') or split == 'test':
 86 |                         self.all_files.append(os.path.join(seq, image_name.split('.')[0]))
 87 |                 elif (not seq == val_sequence) and split == 'train':
 88 |                     self.all_files.append(os.path.join(seq, image_name.split('.')[0]))
 89 |         # self.train_RT = []
 90 |         # if split == 'train':
 91 |             
 92 |         #     train_RT_file = osp.join(dataset_dir, 'sequences',
 93 |         #                                 f'train_RT_left_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv')
 94 | 
 95 |         #     if osp.exists(train_RT_file):
 96 |         #         print(f'TRAIN SET: Using this file: {train_RT_file}')
 97 |         #         df_train_RT = pd.read_csv(train_RT_file, sep=',')
 98 |         #         for index, row in df_train_RT.iterrows():
 99 |         #             # print("row:",row)
100 |         #             self.train_RT.append(list(row))
101 |         #     else:
102 |         #         print(f'TRAIN SET - Not found: {train_RT_file}')
103 |         #         print("Generating a new one")
104 |         #         train_RT_file = open(train_RT_file, 'w')
105 |         #         train_RT_file = csv.writer(train_RT_file, delimiter=',')
106 |         #         train_RT_file.writerow(['id', 'pair', 'Yaw', 'Pitch', 'Roll', 'tx', 'ty', 'tz', 'classify_label'])
107 | 
108 |         #         for i in range(len(self.all_files)):
109 |         #             data_seed = random.randint(1,10)  #大于5数据为配准失败（负样本），小于等于5为配准成功（正样本）
110 |         #             rotz,roty,rotx,transl_x,transl_y,transl_z,label = self.generate_initial_transformation(data_seed)
111 |         #             # if data_seed > 5 :
112 |         #             #     max_angle = self.max_r
113 |         #             #     rotz = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0)
114 |         #             #     roty = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0)
115 |         #             #     rotx = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0)
116 |         #             #     transl_x = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))
117 |         #             #     transl_y = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))
118 |         #             #     transl_z = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))  
119 |         #             #     label = 0
120 |         #             # else:
121 |         #             #     rotz = np.random.uniform(-1, 1) * (3.141592 / 180.0)
122 |         #             #     roty = np.random.uniform(-1, 1) * (3.141592 / 180.0)
123 |         #             #     rotx = np.random.uniform(-1, 1) * (3.141592 / 180.0)
124 |         #             #     transl_x = np.random.uniform(-0.1, 0.1)
125 |         #             #     transl_y = np.random.uniform(-0.1, 0.1)
126 |         #             #     transl_z = np.random.uniform(-0.1, 0.1)   
127 |         #             #     label = 1      
128 | 
129 |         #             train_RT_file.writerow([str(self.all_files[i][:6]),str(self.all_files[i][:-1]), rotx, roty, rotz, transl_x, transl_y, transl_z, label])
130 |         #             self.train_RT.append([str(self.all_files[i][:6]), str(self.all_files[i][:-1]), float(rotx), float(roty), float(rotz), float(transl_x), float(transl_y), float(transl_z), int(label)])
131 | 
132 |         #     assert len(self.train_RT) == len(self.all_files), "Something wrong with train RTs"
133 |         #     self.metadata = self.all_files
134 | 
135 |         # # self.val_RT = []
136 | 
137 |         self.val_RT = []
138 |         if split == 'val' or split == 'test':
139 |             # val_RT_file = os.path.join(dataset_dir, 'sequences',
140 |             #                            f'val_RT_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv')
141 |             val_RT_file = os.path.join(dataset_dir, 'sequences',
142 |                                        f'val_RT_left_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv')
143 |             if os.path.exists(val_RT_file):
144 |                 print(f'VAL SET: Using this file: {val_RT_file}')
145 |                 df_test_RT = pd.read_csv(val_RT_file, sep=',')
146 |                 for index, row in df_test_RT.iterrows():
147 |                     # print("row:",row)
148 |                     self.val_RT.append(list(row))
149 |             else:
150 |                 print(f'VAL SET - Not found: {val_RT_file}')
151 |                 print("Generating a new one")
152 |                 val_RT_file = open(val_RT_file, 'w')
153 |                 val_RT_file = csv.writer(val_RT_file, delimiter=',')
154 |                 val_RT_file.writerow(['id', 'tx', 'ty', 'tz', 'rx', 'ry', 'rz'])
155 |                 for i in range(len(self.all_files)):
156 |                     # rotz = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0)
157 |                     # roty = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0)
158 |                     # rotx = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0)
159 |                     # transl_x = np.random.uniform(-max_t, max_t)
160 |                     # transl_y = np.random.uniform(-max_t, max_t)
161 |                     # transl_z = np.random.uniform(-max_t, max_t)
162 |                     data_seed = random.randint(1,10)  #大于5数据为配准失败（负样本），小于等于5为配准成功（正样本）
163 |                     rotz,roty,rotx,transl_x,transl_y,transl_z,label = self.generate_initial_transformation(data_seed)
164 |                     # if data_seed > 5 :
165 |                     #     max_angle = self.max_r
166 |                     #     rotz = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0)
167 |                     #     roty = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0)
168 |                     #     rotx = np.random.uniform(*(choice([(-max_angle,-1),(1,max_angle)]))) * (3.141592 / 180.0)
169 |                     #     transl_x = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))
170 |                     #     transl_y = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))
171 |                     #     transl_z = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))  
172 |                     #     label = 0
173 |                     # else:
174 |                     #     rotz = np.random.uniform(-1, 1) * (3.141592 / 180.0)
175 |                     #     roty = np.random.uniform(-1, 1) * (3.141592 / 180.0)
176 |                     #     rotx = np.random.uniform(-1, 1) * (3.141592 / 180.0)
177 |                     #     transl_x = np.random.uniform(-0.1, 0.1)
178 |                     #     transl_y = np.random.uniform(-0.1, 0.1)
179 |                     #     transl_z = np.random.uniform(-0.1, 0.1)   
180 |                     #     label = 1              
181 |                     val_RT_file.writerow([str(self.all_files[i][:6]),str(self.all_files[i][:-1]), rotx, roty, rotz, transl_x, transl_y, transl_z, label])
182 |                     self.val_RT.append([str(self.all_files[i][:6]), str(self.all_files[i][:-1]), float(rotx), float(roty), float(rotz), float(transl_x), float(transl_y), float(transl_z), int(label)])                        
183 |                     # transl_z = np.random.uniform(-max_t, min(max_t, 1.))
184 |                     # val_RT_file.writerow([i, transl_x, transl_y, transl_z,
185 |                     #                        rotx, roty, rotz])
186 |                     # self.val_RT.append([float(i), float(transl_x), float(transl_y), float(transl_z),
187 |                     #                      float(rotx), float(roty), float(rotz)])
188 |             print("len(self.val_RT):",len(self.val_RT))
189 |             print("len(self.all_files):",len(self.all_files))
190 |             assert len(self.val_RT) == len(self.all_files), "Something wrong with test RTs"
191 |     def generate_initial_transformation(self, data_seed):
192 |         max_angle = self.max_r
193 |         if data_seed > 5: # negative samples have 
194 |             while True:
195 |                 rotz_seed = random.randint(1,2)
196 |                 roty_seed = random.randint(1,2)   
197 |                 rotx_seed = random.randint(1,2)  
198 |                 transl_x_seed = random.randint(1,2)   
199 |                 transl_y_seed = random.randint(1,2)   
200 |                 transl_z_seed = random.randint(1,2)     
201 |                 if rotz_seed==2 and roty_seed==2 and rotx_seed==2 and transl_x_seed==2 and transl_y_seed==2 and transl_z_seed==2 :
202 |                     continue
203 |                 else: 
204 |                     break
205 |             if rotz_seed > 1:
206 |                 rotz = np.random.uniform(*(choice([(-max_angle,-0.5),(0.5,max_angle)]))) * (3.141592 / 180.0)
207 |             else:
208 |                 rotz = np.random.uniform(-1, 1) * (3.141592 / 180.0)
209 | 
210 |  
211 |             if roty_seed > 1:
212 |                 roty = np.random.uniform(*(choice([(-max_angle,-0.5),(0.5,max_angle)]))) * (3.141592 / 180.0)
213 |             else:
214 |                 roty = np.random.uniform(-1, 1) * (3.141592 / 180.0)    
215 | 
216 |   
217 |             if rotx_seed > 1:
218 |                 rotx = np.random.uniform(*(choice([(-max_angle,-0.5),(0.5,max_angle)]))) * (3.141592 / 180.0)
219 |             else:
220 |                 rotx = np.random.uniform(-1, 1) * (3.141592 / 180.0)
221 | 
222 |  
223 |             if transl_x_seed > 1:
224 |                 transl_x = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))
225 |             else:
226 |                 transl_x = np.random.uniform(-0.1, 0.1)     
227 | 
228 |  
229 |             if transl_y_seed > 1:
230 |                 transl_y = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))
231 |             else:
232 |                 transl_y = np.random.uniform(-0.1, 0.1)    
233 | 
234 |   
235 |             if transl_z_seed > 1:
236 |                 transl_z = np.random.uniform(*(choice([(-self.max_t,-0.1),(0.1,self.max_t)])))
237 |             else:
238 |                 transl_z = np.random.uniform(-0.1, 0.1)  
239 | 
240 |             label = 0                                                        
241 |         else:
242 |             rotz = np.random.uniform(-0.5, 0.5) * (3.141592 / 180.0)
243 |             roty = np.random.uniform(-0.5, 0.5) * (3.141592 / 180.0)
244 |             rotx = np.random.uniform(-0.5, 0.5) * (3.141592 / 180.0)
245 |             transl_x = np.random.uniform(-0.1, 0.1)
246 |             transl_y = np.random.uniform(-0.1, 0.1)
247 |             transl_z = np.random.uniform(-0.1, 0.1)  
248 |             label = 1
249 |         
250 |         return rotz,roty,rotx,transl_x,transl_y,transl_z,label
251 | 
252 |     def get_ground_truth_poses(self, sequence, frame):
253 |         return self.GTs_T[sequence][frame], self.GTs_R[sequence][frame]
254 | 
255 |     def custom_transform(self, rgb, img_rotation=0., flip=False):
256 |         to_tensor = transforms.ToTensor()
257 |         normalization = transforms.Normalize(mean=[0.485, 0.456, 0.406],
258 |                                              std=[0.229, 0.224, 0.225])
259 | 
260 |         #rgb = crop(rgb)
261 |         if self.split == 'train':
262 |             color_transform = transforms.ColorJitter(0.1, 0.1, 0.1)
263 |             rgb = color_transform(rgb)
264 |             if flip:
265 |                 rgb = TTF.hflip(rgb)
266 |             rgb = TTF.rotate(rgb, img_rotation)
267 |             #io.imshow(np.array(rgb))
268 |             #io.show()
269 | 
270 |         rgb = to_tensor(rgb)
271 |         rgb = normalization(rgb)
272 |         return rgb
273 | 
274 |     def __len__(self):
275 |         return len(self.all_files)
276 | 
277 |     def __getitem__(self, idx):
278 |         item = self.all_files[idx]
279 |         seq = str(item.split('/')[0])
280 |         rgb_name = str(item.split('/')[1])
281 |         img_path = os.path.join(self.root_dir, 'sequences', seq, 'image_2', rgb_name+self.suf)
282 |         lidar_path = os.path.join(self.root_dir, 'sequences', seq, 'velodyne', rgb_name+'.bin')
283 |         lidar_scan = np.fromfile(lidar_path, dtype=np.float32)
284 |         pc = lidar_scan.reshape((-1, 4))
285 |         valid_indices = pc[:, 0] < -3.
286 |         valid_indices = valid_indices | (pc[:, 0] > 3.)
287 |         valid_indices = valid_indices | (pc[:, 1] < -3.)
288 |         valid_indices = valid_indices | (pc[:, 1] > 3.)
289 |         pc = pc[valid_indices].copy()
290 |         pc_org = torch.from_numpy(pc.astype(np.float32))
291 |         # if self.use_reflectance:
292 |         #     reflectance = pc[:, 3].copy()
293 |         #     reflectance = torch.from_numpy(reflectance).float()
294 | 
295 |         RT = self.GTs_T_cam02_velo[seq].astype(np.float32)
296 | 
297 |         if pc_org.shape[1] == 4 or pc_org.shape[1] == 3:
298 |             pc_org = pc_org.t()
299 |         if pc_org.shape[0] == 3:
300 |             homogeneous = torch.ones(pc_org.shape[1]).unsqueeze(0)
301 |             pc_org = torch.cat((pc_org, homogeneous), 0)
302 |         elif pc_org.shape[0] == 4:
303 |             if not torch.all(pc_org[3, :] == 1.):
304 |                 pc_org[3, :] = 1.
305 |         else:
306 |             raise TypeError("Wrong PointCloud shape")
307 |         
308 |         pc_rot = np.matmul(RT, pc_org.numpy())
309 |         pc_rot = pc_rot.astype(np.float32).copy()
310 |         pc_in = torch.from_numpy(pc_rot)
311 | 
312 |         # pc_rot = np.matmul(RT, pc.T)
313 |         # pc_rot = pc_rot.astype(np.float).T.copy()
314 |         # pc_in = torch.from_numpy(pc_rot.astype(np.float32))#.float()
315 | 
316 |         # if pc_in.shape[1] == 4 or pc_in.shape[1] == 3:
317 |         #     pc_in = pc_in.t()
318 |         # if pc_in.shape[0] == 3:
319 |         #     homogeneous = torch.ones(pc_in.shape[1]).unsqueeze(0)
320 |         #     pc_in = torch.cat((pc_in, homogeneous), 0)
321 |         # elif pc_in.shape[0] == 4:
322 |         #      if not torch.all(pc_in[3,:] == 1.):
323 |         #         pc_in[3,:] = 1.
324 |         # else:
325 |         #     raise TypeError("Wrong PointCloud shape")
326 | 
327 |         h_mirror = False
328 |         # if np.random.rand() > 0.5 and self.split == 'train':
329 |         #     h_mirror = True
330 |         #     pc_in[1, :] *= -1
331 | 
332 |         img = Image.open(img_path)
333 |         # img = cv2.imread(img_path)
334 |         img_rotation = 0.
335 |         # if self.split == 'train':
336 |         #     img_rotation = np.random.uniform(-5, 5)
337 |         try:
338 |             img = self.custom_transform(img, img_rotation, h_mirror)
339 |         except OSError:
340 |             new_idx = np.random.randint(0, self.__len__())
341 |             return self.__getitem__(new_idx)
342 | 
343 |         # Rotate PointCloud for img_rotation
344 |         if self.split == 'train':
345 |             R = mathutils.Euler((radians(img_rotation), 0, 0), 'XYZ')
346 |             T = mathutils.Vector((0., 0., 0.))
347 |             pc_in = rotate_forward(pc_in, R, T)
348 | 
349 |         if self.split == 'train':
350 |             # max_angle = self.max_r
351 |             # rotz = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0)
352 |             # roty = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0)
353 |             # rotx = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0)
354 |             # transl_x = np.random.uniform(-self.max_t, self.max_t)
355 |             # transl_y = np.random.uniform(-self.max_t, self.max_t)
356 |             # transl_z = np.random.uniform(-self.max_t, self.max_t)
357 |             # transl_z = np.random.uniform(-self.max_t, min(self.max_t, 1.))
358 |             data_seed = random.randint(1,10)  #大于5数据为配准失败（负样本），小于等于5为配准成功（正样本）
359 |             rotz,roty,rotx,transl_x,transl_y,transl_z,label = self.generate_initial_transformation(data_seed)
360 |             # initial_RT = self.train_RT[idx]
361 | 
362 |             # rotz = initial_RT[4]
363 |             # roty = initial_RT[3]
364 |             # rotx = initial_RT[2]
365 |             # transl_x = initial_RT[5]
366 |             # transl_y = initial_RT[6]
367 |             # transl_z = initial_RT[7]  
368 |             # label =  initial_RT[8]         
369 |         else:
370 |             initial_RT = self.val_RT[idx]
371 |             # print("initial_RT:",initial_RT)
372 |             rotz = initial_RT[2]
373 |             roty = initial_RT[1]
374 |             rotx = initial_RT[0]
375 |             transl_x = initial_RT[3]
376 |             transl_y = initial_RT[4]
377 |             transl_z = initial_RT[5]  
378 |             label =  initial_RT[6]    
379 | 
380 |         # 随机设置一定范围内的标定参数扰动值
381 |         # train的时候每次都随机生成,每个epoch使用不同的参数
382 |         # test则在初始化的时候提前设置好,每个epoch都使用相同的参数
383 |         R = mathutils.Euler((rotx, roty, rotz), 'XYZ')
384 |         T = mathutils.Vector((transl_x, transl_y, transl_z))
385 | 
386 |         R, T = invert_pose(R, T)
387 |         R, T = torch.tensor(R), torch.tensor(T)
388 | 
389 | 
390 |         # # 正样本也需要R，T，但都设置为0
391 |         # R_pos = mathutils.Euler((0.0, 0.0, 0.0), 'XYZ')
392 |         # T_pos = mathutils.Vector((0.0, 0.0, 0.0))
393 | 
394 |         # R_pos, T_pos = invert_pose(R_pos, T_pos)
395 |         # R_pos, T_pos = torch.tensor(R_pos), torch.tensor(T_pos)        
396 |         #io.imshow(depth_img.numpy(), cmap='jet')
397 |         #io.show()
398 |         calib = self.K[seq]
399 |         if h_mirror:
400 |             calib[2] = (img.shape[2] / 2)*2 - calib[2]
401 | 
402 |         if self.split == 'test':
403 |             sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib,
404 |                       'tr_error': T, 'rot_error': R, 'seq': int(seq), 'img_path': img_path,
405 |                       'rgb_name': rgb_name + '.png', 'item': item, 'extrin': RT,'label':label,
406 |                       'initial_RT': initial_RT}
407 |         else:
408 |             sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib,
409 |                       'tr_error': T, 'rot_error': R, 
410 |                     #   'tr_pos': T_pos, 'rot_pos': R_pos, 
411 |                       'label_pos': 0, 'label_nag': 1, 'label':label,
412 |                       'seq': int(seq),
413 |                       'rgb_name': rgb_name, 'item': item, 'extrin': RT}
414 | 
415 |         return sample
416 | 
417 | 
418 | class DatasetLidarCameraKittiRaw(Dataset):
419 | 
420 |     def __init__(self, dataset_dir, transform=None, augmentation=False, use_reflectance=False,
421 |                  max_t=1.5, max_r=15.0, split='val', device='cpu', val_sequence='2011_09_26_drive_0117_sync'):
422 |         super(DatasetLidarCameraKittiRaw, self).__init__()
423 |         self.use_reflectance = use_reflectance
424 |         self.maps_folder = ''
425 |         self.device = device
426 |         self.max_r = max_r
427 |         self.max_t = max_t
428 |         self.augmentation = augmentation
429 |         self.root_dir = dataset_dir
430 |         self.transform = transform
431 |         self.split = split
432 |         self.GTs_R = {}
433 |         self.GTs_T = {}
434 |         self.GTs_T_cam02_velo = {}
435 |         self.max_depth = 80
436 |         self.K_list = {}
437 | 
438 |         self.all_files = []
439 |         date_list = ['2011_09_26', '2011_09_28', '2011_09_29', '2011_09_30', '2011_10_03']
440 |         data_drive_list = ['0001', '0002', '0004', '0016', '0027']
441 |         self.calib_date = {}
442 | 
443 |         for i in range(len(date_list)):
444 |             date = date_list[i]
445 |             data_drive = data_drive_list[i]
446 |             data = pykitti.raw(self.root_dir, date, data_drive)
447 |             calib = {'K2': data.calib.K_cam2, 'K3': data.calib.K_cam3,
448 |                      'RT2': data.calib.T_cam2_velo, 'RT3': data.calib.T_cam3_velo}
449 |             self.calib_date[date] = calib
450 | 
451 |         # date = val_sequence[:10]
452 |         # seq = val_sequence
453 |         # image_list = os.listdir(os.path.join(dataset_dir, date, seq, 'image_02/data'))
454 |         # image_list.sort()
455 |         #
456 |         # for image_name in image_list:
457 |         #     if not os.path.exists(os.path.join(dataset_dir, date, seq, 'velodyne_points/data',
458 |         #                                        str(image_name.split('.')[0]) + '.bin')):
459 |         #         continue
460 |         #     if not os.path.exists(os.path.join(dataset_dir, date, seq, 'image_02/data',
461 |         #                                        str(image_name.split('.')[0]) + '.jpg')):  # png
462 |         #         continue
463 |         #     self.all_files.append(os.path.join(date, seq, 'image_02/data', image_name.split('.')[0]))
464 | 
465 |         date = val_sequence[:10]
466 |         test_list = ['2011_09_26_drive_0005_sync', '2011_09_26_drive_0070_sync', '2011_10_03_drive_0027_sync']
467 |         seq_list = os.listdir(os.path.join(self.root_dir, date))
468 | 
469 |         for seq in seq_list:
470 |             if not os.path.isdir(os.path.join(dataset_dir, date, seq)):
471 |                 continue
472 |             image_list = os.listdir(os.path.join(dataset_dir, date, seq, 'image_02/data'))
473 |             image_list.sort()
474 | 
475 |             for image_name in image_list:
476 |                 if not os.path.exists(os.path.join(dataset_dir, date, seq, 'velodyne_points/data',
477 |                                                    str(image_name.split('.')[0])+'.bin')):
478 |                     continue
479 |                 if not os.path.exists(os.path.join(dataset_dir, date, seq, 'image_02/data',
480 |                                                    str(image_name.split('.')[0])+'.jpg')): # png
481 |                     continue
482 |                 if seq == val_sequence and (not split == 'train'):
483 |                     self.all_files.append(os.path.join(date, seq, 'image_02/data', image_name.split('.')[0]))
484 |                 elif (not seq == val_sequence) and split == 'train' and seq not in test_list:
485 |                     self.all_files.append(os.path.join(date, seq, 'image_02/data', image_name.split('.')[0]))
486 | 
487 |         self.val_RT = []
488 |         if split == 'val' or split == 'test':
489 |             val_RT_file = os.path.join(dataset_dir,
490 |                                        f'val_RT_seq{val_sequence}_{max_r:.2f}_{max_t:.2f}.csv')
491 |             if os.path.exists(val_RT_file):
492 |                 print(f'VAL SET: Using this file: {val_RT_file}')
493 |                 df_test_RT = pd.read_csv(val_RT_file, sep=',')
494 |                 for index, row in df_test_RT.iterrows():
495 |                     self.val_RT.append(list(row))
496 |             else:
497 |                 print(f'TEST SET - Not found: {val_RT_file}')
498 |                 print("Generating a new one")
499 |                 val_RT_file = open(val_RT_file, 'w')
500 |                 val_RT_file = csv.writer(val_RT_file, delimiter=',')
501 |                 val_RT_file.writerow(['id', 'tx', 'ty', 'tz', 'rx', 'ry', 'rz'])
502 |                 for i in range(len(self.all_files)):
503 |                     rotz = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0)
504 |                     roty = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0)
505 |                     rotx = np.random.uniform(-max_r, max_r) * (3.141592 / 180.0)
506 |                     transl_x = np.random.uniform(-max_t, max_t)
507 |                     transl_y = np.random.uniform(-max_t, max_t)
508 |                     transl_z = np.random.uniform(-max_t, max_t)
509 |                     # transl_z = np.random.uniform(-max_t, min(max_t, 1.))
510 |                     val_RT_file.writerow([i, transl_x, transl_y, transl_z,
511 |                                            rotx, roty, rotz])
512 |                     self.val_RT.append([float(i), transl_x, transl_y, transl_z,
513 |                                          rotx, roty, rotz])
514 | 
515 |             assert len(self.val_RT) == len(self.all_files), "Something wrong with test RTs"
516 | 
517 |     def get_ground_truth_poses(self, sequence, frame):
518 |         return self.GTs_T[sequence][frame], self.GTs_R[sequence][frame]
519 | 
520 |     def custom_transform(self, rgb, img_rotation=0., flip=False):
521 |         to_tensor = transforms.ToTensor()
522 |         normalization = transforms.Normalize(mean=[0.485, 0.456, 0.406],
523 |                                              std=[0.229, 0.224, 0.225])
524 | 
525 |         #rgb = crop(rgb)
526 |         if self.split == 'train':
527 |             color_transform = transforms.ColorJitter(0.1, 0.1, 0.1)
528 |             # color_transform = transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3 / 3.14)
529 |             rgb = color_transform(rgb)
530 |             if flip:
531 |                 rgb = TTF.hflip(rgb)
532 |             rgb = TTF.rotate(rgb, img_rotation)
533 |             #io.imshow(np.array(rgb))
534 |             #io.show()
535 | 
536 |         rgb = to_tensor(rgb)
537 |         rgb = normalization(rgb)
538 |         return rgb
539 | 
540 |     def __len__(self):
541 |         return len(self.all_files)
542 | 
543 |     # self.all_files.append(os.path.join(date, seq, 'image_2/data', image_name.split('.')[0]))
544 |     def __getitem__(self, idx):
545 |         item = self.all_files[idx]
546 |         date = str(item.split('/')[0])
547 |         seq = str(item.split('/')[1])
548 |         rgb_name = str(item.split('/')[4])
549 |         img_path = os.path.join(self.root_dir, date, seq, 'image_02/data', rgb_name+'.jpg') # png
550 |         lidar_path = os.path.join(self.root_dir, date, seq, 'velodyne_points/data', rgb_name+'.bin')
551 |         lidar_scan = np.fromfile(lidar_path, dtype=np.float32)
552 |         pc = lidar_scan.reshape((-1, 4))
553 |         valid_indices = pc[:, 0] < -3.
554 |         valid_indices = valid_indices | (pc[:, 0] > 3.)
555 |         valid_indices = valid_indices | (pc[:, 1] < -3.)
556 |         valid_indices = valid_indices | (pc[:, 1] > 3.)
557 |         pc = pc[valid_indices].copy()
558 |         pc_lidar = pc.copy()
559 |         pc_org = torch.from_numpy(pc.astype(np.float32))
560 |         if self.use_reflectance:
561 |             reflectance = pc[:, 3].copy()
562 |             reflectance = torch.from_numpy(reflectance).float()
563 | 
564 |         calib = self.calib_date[date]
565 |         RT_cam02 = calib['RT2'].astype(np.float32)
566 |         # camera intrinsic parameter
567 |         calib_cam02 = calib['K2']  # 3x3
568 | 
569 |         E_RT = RT_cam02
570 | 
571 |         if pc_org.shape[1] == 4 or pc_org.shape[1] == 3:
572 |             pc_org = pc_org.t()
573 |         if pc_org.shape[0] == 3:
574 |             homogeneous = torch.ones(pc_org.shape[1]).unsqueeze(0)
575 |             pc_org = torch.cat((pc_org, homogeneous), 0)
576 |         elif pc_org.shape[0] == 4:
577 |             if not torch.all(pc_org[3, :] == 1.):
578 |                 pc_org[3, :] = 1.
579 |         else:
580 |             raise TypeError("Wrong PointCloud shape")
581 | 
582 |         pc_rot = np.matmul(E_RT, pc_org.numpy())
583 |         pc_rot = pc_rot.astype(np.float32).copy()
584 |         pc_in = torch.from_numpy(pc_rot)
585 | 
586 |         h_mirror = False
587 |         # if np.random.rand() > 0.5 and self.split == 'train':
588 |         #     h_mirror = True
589 |         #     pc_in[0, :] *= -1
590 | 
591 |         img = Image.open(img_path)
592 |         # print(img_path)
593 |         # img = cv2.imread(img_path)
594 |         img_rotation = 0.
595 |         # if self.split == 'train':
596 |         #     img_rotation = np.random.uniform(-5, 5)
597 |         try:
598 |             img = self.custom_transform(img, img_rotation, h_mirror)
599 |         except OSError:
600 |             new_idx = np.random.randint(0, self.__len__())
601 |             return self.__getitem__(new_idx)
602 | 
603 |         # Rotate PointCloud for img_rotation
604 |         # if self.split == 'train':
605 |         #     R = mathutils.Euler((radians(img_rotation), 0, 0), 'XYZ')
606 |         #     T = mathutils.Vector((0., 0., 0.))
607 |         #     pc_in = rotate_forward(pc_in, R, T)
608 | 
609 |         if self.split == 'train':
610 |             max_angle = self.max_r
611 |             rotz = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0)
612 |             roty = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0)
613 |             rotx = np.random.uniform(-max_angle, max_angle) * (3.141592 / 180.0)
614 |             transl_x = np.random.uniform(-self.max_t, self.max_t)
615 |             transl_y = np.random.uniform(-self.max_t, self.max_t)
616 |             transl_z = np.random.uniform(-self.max_t, self.max_t)
617 |             # transl_z = np.random.uniform(-self.max_t, min(self.max_t, 1.))
618 |             initial_RT = 0
619 |         else:
620 |             initial_RT = self.val_RT[idx]
621 |             rotz = initial_RT[6]
622 |             roty = initial_RT[5]
623 |             rotx = initial_RT[4]
624 |             transl_x = initial_RT[1]
625 |             transl_y = initial_RT[2]
626 |             transl_z = initial_RT[3]
627 | 
628 |         # 随机设置一定范围内的标定参数扰动值
629 |         # train的时候每次都随机生成,每个epoch使用不同的参数
630 |         # test则在初始化的时候提前设置好,每个epoch都使用相同的参数
631 |         R = mathutils.Euler((rotx, roty, rotz), 'XYZ')
632 |         T = mathutils.Vector((transl_x, transl_y, transl_z))
633 | 
634 |         R, T = invert_pose(R, T)
635 |         R, T = torch.tensor(R), torch.tensor(T)
636 | 
637 |         #io.imshow(depth_img.numpy(), cmap='jet')
638 |         #io.show()
639 |         calib = calib_cam02
640 |         # calib = get_calib_kitti_odom(int(seq))
641 |         if h_mirror:
642 |             calib[2] = (img.shape[2] / 2)*2 - calib[2]
643 | 
644 |         # sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib, 'pc_org': pc_org, 'img_path': img_path,
645 |         #           'tr_error': T, 'rot_error': R, 'seq': int(seq), 'rgb_name': rgb_name, 'item': item,
646 |         #           'extrin': E_RT, 'initial_RT': initial_RT}
647 |         sample = {'rgb': img, 'point_cloud': pc_in, 'calib': calib, 'pc_org': pc_org, 'img_path': img_path,
648 |                   'tr_error': T, 'rot_error': R, 'rgb_name': rgb_name + '.png', 'item': item,
649 |                   'extrin': E_RT, 'initial_RT': initial_RT, 'pc_lidar': pc_lidar}
650 | 
651 |         return sample
652 | 
653 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LiDAR2camera_self_check
 2 | ![image](https://github.com/OpenCalib/LiDAR2camera_self-check/blob/master/pictures/pipline.pdf)
 3 | ![image](https://github.com/OpenCalib/LiDAR2camera_self-check/blob/master/pictures/input.pdf)
 4 | ![image](https://github.com/OpenCalib/LiDAR2camera_self-check/blob/master/pictures/performance.pdf)
 5 | ## Table of Contents
 6 | 
 7 | - [Requirements](#Requirements)
 8 | - [Evaluation](#Evaluation)
 9 | - [Train](#Train)
10 | - [Citation](#Citation)
11 | 
12 | ## Requirements
13 | 
14 | * python 3.6 (recommend to use [Anaconda](https://www.anaconda.com/))
15 | * PyTorch==1.0.1.post2
16 | * Torchvision==0.2.2
17 | * Install requirements and dependencies
18 | ```commandline
19 | pip install -r requirements.txt
20 | ```
21 | 
22 | ## Evaluation
23 | 
24 | 1. Download [KITTI odometry dataset](http://www.cvlibs.net/datasets/kitti/eval_odometry.php).
25 | 2. Change the path to the dataset in `evaluate_calib.py`.
26 | ```python
27 | data_folder = '/path/to/the/KITTI/odometry_color/'
28 | ```
29 | 3. Create a folder named `pretrained` to store the pre-trained models in the root path.
30 | 4. Download pre-trained models and modify the weights path in `evaluate_calib.py`.
31 | ```python
32 | weights = [
33 |     # './pretrained/final_checkpoint_r20.00_t1.50_e4_0.094.tar',
34 |     # './pretrained/final_checkpoint_r2.00_t0.20_e4_0.228.tar',
35 |     # './pretrained/final_checkpoint_r10.00_t1.00_e3_0.108.tar',
36 |     './pretrained/final_checkpoint_r5.00_t0.50_e-1_0.145.tar',
37 | ]
38 | ```
39 | 5. Run evaluation.
40 | ```commandline
41 | python evaluate_calib.py
42 | ```
43 | 
44 | ## Train
45 | ```commandline
46 | python train_with_sacred.py
47 | ```
48 | 
49 | 
50 | 
51 | ### Acknowledgments
52 |  We are grateful to Daniele Cattaneo for his CMRNet [github repository](https://github.com/cattaneod/CMRNet) and LCCNet [github repository](https://github.com/IIPCVLAB/LCCNet). We use them as our initial code base.
53 |  
54 | <!-- [correlation_package](models/LCCNet/correlation_package) was taken from [flownet2](https://github.com/NVIDIA/flownet2-pytorch/tree/master/networks/correlation_package)
55 | 
56 | [LCCNet.py](model/LCCNet.py) is a modified version of the original [PWC-DC network](https://github.com/NVlabs/PWC-Net/blob/master/PyTorch/models/PWCNet.py) and modified version [CMRNet](https://github.com/cattaneod/CMRNet/blob/master/models/CMRNet/CMRNet.py)  -->
57 | 
58 | ---
59 | 


--------------------------------------------------------------------------------
/evaluate_calib.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------------------------
  2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab
  3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it)
  4 | # Released under Creative Commons
  5 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
  6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/
  7 | # -------------------------------------------------------------------
  8 | 
  9 | # Modified Author: Xudong Lv
 10 | # based on github.com/cattaneod/CMRNet/blob/master/evaluate_iterative_single_CALIB.py
 11 | 
 12 | import csv
 13 | import random
 14 | import open3d as o3
 15 | 
 16 | import cv2
 17 | import mathutils
 18 | # import matplotlib
 19 | # matplotlib.use('Qt5Agg')
 20 | import os
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import torch
 24 | import torch.nn.functional as F
 25 | import torch.nn.parallel
 26 | import torch.utils.data
 27 | from sacred import Experiment
 28 | from sacred.utils import apply_backspaces_and_linefeeds
 29 | from skimage import io
 30 | from tqdm import tqdm
 31 | import time
 32 | 
 33 | from models.LCCNet import LCCNet
 34 | from DatasetLidarCamera import DatasetLidarCameraKittiOdometry
 35 | 
 36 | from quaternion_distances import quaternion_distance
 37 | from utils import (mat2xyzrpy, merge_inputs, overlay_imgs, quat2mat,
 38 |                    quaternion_from_matrix, rotate_back, rotate_forward,
 39 |                    tvector2mat)
 40 | 
 41 | from torch.utils.data import Dataset
 42 | from pykitti import odometry
 43 | import pandas as pd
 44 | from PIL import Image
 45 | from math import radians
 46 | from utils import invert_pose
 47 | from torchvision import transforms
 48 | 
 49 | 
 50 | # import matplotlib
 51 | # matplotlib.rc("font",family='AR PL UMing CN')
 52 | plt.rcParams['axes.unicode_minus'] = False
 53 | # plt.rc('font',family='Times New Roman')
 54 | font_EN = {'family': 'Times New Roman', 'weight': 'normal', 'size': 16}
 55 | font_CN = {'family': 'AR PL UMing CN', 'weight': 'normal', 'size': 16}
 56 | plt_size = 10.5
 57 | 
 58 | ex = Experiment("LCCNet-evaluate-iterative")
 59 | ex.captured_out_filter = apply_backspaces_and_linefeeds
 60 | 
 61 | os.environ['CUDA_VISIBLE_DEVICES'] = '2'
 62 | 
 63 | # noinspection PyUnusedLocal
 64 | @ex.config
 65 | def config():
 66 |     dataset = 'kitti/odom'
 67 |     # data_folder = '/home/wangshuo/Datasets/KITTI/odometry_color/'
 68 |     data_folder = '/mnt/petrelfs/weipengjin/file_trans/LCCNet_dataset/'
 69 |     test_sequence = 0
 70 |     use_prev_output = False
 71 |     max_t = 0.2
 72 |     max_r = 1.0
 73 |     occlusion_kernel = 5
 74 |     occlusion_threshold = 3.0
 75 |     network = 'Res_f1'
 76 |     norm = 'bn'
 77 |     show = False
 78 |     use_reflectance = False
 79 |     weight = None  # List of weights' path, for iterative refinement
 80 |     save_name = True
 81 |     # Set to True only if you use two network, the first for rotation and the second for translation
 82 |     rot_transl_separated = False
 83 |     random_initial_pose = False
 84 |     save_log = False
 85 |     dropout = 0.0
 86 |     max_depth = 80.
 87 |     iterative_method = 'single' # ['multi_range', 'single_range', 'single']
 88 |     output = './output'
 89 |     save_image = False
 90 |     outlier_filter = False
 91 |     outlier_filter_th = 10
 92 |     out_fig_lg = 'EN' # [EN, CN]
 93 | 
 94 | weights = [
 95 | #     # './pretrained/final_checkpoint_r20.00_t1.50_e4_0.094.tar',
 96 | #     # './pretrained/final_checkpoint_r2.00_t0.20_e4_0.228.tar',
 97 | #     # './pretrained/final_checkpoint_r10.00_t1.00_e3_0.108.tar',
 98 | './checkpoints/kitti/odom/val_seq_00/models/checkpoint_r2.00_t0.20_e119_0.108.tar'
 99 | ]
100 | 
101 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102 | 
103 | EPOCH = 1
104 | 
105 | 
106 | def _init_fn(worker_id, seed):
107 |     seed = seed + worker_id + EPOCH * 100
108 |     print(f"Init worker {worker_id} with seed {seed}")
109 |     torch.manual_seed(seed)
110 |     np.random.seed(seed)
111 |     random.seed(seed)
112 | 
113 | 
114 | def get_2D_lidar_projection(pcl, cam_intrinsic):
115 |     pcl_xyz = cam_intrinsic @ pcl.T
116 |     pcl_xyz = pcl_xyz.T
117 |     pcl_z = pcl_xyz[:, 2]
118 |     pcl_xyz = pcl_xyz / (pcl_xyz[:, 2, None] + 1e-10)
119 |     pcl_uv = pcl_xyz[:, :2]
120 | 
121 |     return pcl_uv, pcl_z
122 | 
123 | 
124 | def lidar_project_depth(pc_rotated, cam_calib, img_shape):
125 |     pc_rotated = pc_rotated[:3, :].detach().cpu().numpy()
126 |     cam_intrinsic = cam_calib.numpy()
127 |     pcl_uv, pcl_z = get_2D_lidar_projection(pc_rotated.T, cam_intrinsic)
128 |     mask = (pcl_uv[:, 0] > 0) & (pcl_uv[:, 0] < img_shape[1]) & (pcl_uv[:, 1] > 0) & (
129 |             pcl_uv[:, 1] < img_shape[0]) & (pcl_z > 0)
130 |     pcl_uv = pcl_uv[mask]
131 |     pcl_z = pcl_z[mask]
132 |     pcl_uv = pcl_uv.astype(np.uint32)
133 |     pcl_z = pcl_z.reshape(-1, 1)
134 |     depth_img = np.zeros((img_shape[0], img_shape[1], 1))
135 |     depth_img[pcl_uv[:, 1], pcl_uv[:, 0]] = pcl_z
136 |     depth_img = torch.from_numpy(depth_img.astype(np.float32))
137 |     depth_img = depth_img.cuda()
138 |     depth_img = depth_img.permute(2, 0, 1)
139 |     pc_valid = pc_rotated.T[mask]
140 | 
141 |     return depth_img, pcl_uv, pc_valid
142 | 
143 | 
144 | @ex.automain
145 | def main(_config, seed):
146 |     global EPOCH, weights
147 |     if _config['weight'] is not None:
148 |         weights = _config['weight']
149 | 
150 |     # if _config['iterative_method'] == 'single':
151 |     #     weights = [weights[0]]
152 | 
153 |     dataset_class = DatasetLidarCameraKittiOdometry
154 |     # dataset_class = DatasetTest
155 |     img_shape = (384, 1280)
156 |     input_size = (256, 512)
157 | 
158 |     # split = 'test'
159 |     if _config['random_initial_pose']:
160 |         split = 'test_random'
161 | 
162 | 
163 |     if _config['test_sequence'] is None:
164 |         raise TypeError('test_sequences cannot be None')
165 |     else:
166 |         if isinstance(_config['test_sequence'], int):
167 |             _config['test_sequence'] = f"{_config['test_sequence']:02d}"
168 |         dataset_val = dataset_class(_config['data_folder'], max_r=_config['max_r'], max_t=_config['max_t'],
169 |                                     split='test', use_reflectance=_config['use_reflectance'],
170 |                                     val_sequence=_config['test_sequence'])
171 | 
172 |     np.random.seed(seed)
173 |     torch.random.manual_seed(seed)
174 | 
175 |     def init_fn(x):
176 |         return _init_fn(x, seed)
177 | 
178 |     num_worker = 6
179 |     batch_size = 1
180 | 
181 | 
182 |     TestImgLoader = torch.utils.data.DataLoader(dataset=dataset_val,
183 |                                                 shuffle=False,
184 |                                                 batch_size=batch_size,
185 |                                                 num_workers=num_worker,
186 |                                                 worker_init_fn=init_fn,
187 |                                                 collate_fn=merge_inputs,
188 |                                                 drop_last=False,
189 |                                                 pin_memory=False)
190 | 
191 |     print(len(TestImgLoader))
192 |     if _config['network'].startswith('Res'):
193 |         feat = 1
194 |         md = 4
195 |         split = _config['network'].split('_')
196 |         for item in split[1:]:
197 |             if item.startswith('f'):
198 |                 feat = int(item[-1])
199 |             elif item.startswith('md'):
200 |                 md = int(item[2:])
201 |         assert 0 < feat < 7, "Feature Number from PWC have to be between 1 and 6"
202 |         assert 0 < md, "md must be positive"
203 | 
204 | 
205 |     models = [] # iterative model
206 |     for i in range(len(weights)):
207 |         # network choice and settings
208 |         if _config['network'].startswith('Res'):
209 |             feat = 1
210 |             md = 4
211 |             split = _config['network'].split('_')
212 |             for item in split[1:]:
213 |                 if item.startswith('f'):
214 |                     feat = int(item[-1])
215 |                 elif item.startswith('md'):
216 |                     md = int(item[2:])
217 |             assert 0 < feat < 7, "Feature Number from PWC have to be between 1 and 6"
218 |             assert 0 < md, "md must be positive"
219 |             model = LCCNet(input_size, use_feat_from=feat, md=md,
220 |                             use_reflectance=_config['use_reflectance'], dropout=_config['dropout'])
221 |             from thop import profile
222 |             input1 = torch.randn(1, 3, 256, 512) 
223 |             input2 = torch.randn(1, 1, 256, 512) 
224 |             flops, params = profile(model, inputs=(input1, input2))                
225 |             print('FLOPs = ' + str(flops/1000**3) + 'G')
226 |             print('Params = ' + str(params/1000**2) + 'M')  
227 |         else:
228 |             raise TypeError("Network unknown")
229 | 
230 |         checkpoint = torch.load(weights[i], map_location='cpu')
231 |         saved_state_dict = checkpoint['state_dict']
232 |         model.load_state_dict(saved_state_dict,False)
233 |         model = model.to(device)
234 |         model.eval()
235 |         models.append(model)
236 | 
237 | 
238 |     if _config['save_log']:
239 |         log_file = f'./results_for_paper/log_seq{_config["test_sequence"]}.csv'
240 |         log_file = open(log_file, 'w')
241 |         log_file = csv.writer(log_file)
242 |         header = ['frame']
243 |         for i in range(len(weights) + 1):
244 |             header += [f'iter{i}_error_t', f'iter{i}_error_r', f'iter{i}_error_x', f'iter{i}_error_y',
245 |                        f'iter{i}_error_z', f'iter{i}_error_r', f'iter{i}_error_p', f'iter{i}_error_y']
246 |         log_file.writerow(header)
247 | 
248 |     show = _config['show']
249 |     # save image to the output path
250 |     _config['output'] = os.path.join(_config['output'], _config['iterative_method'])
251 |     rgb_path = os.path.join(_config['output'], 'rgb')
252 |     if not os.path.exists(rgb_path):
253 |         os.makedirs(rgb_path)
254 |     depth_path = os.path.join(_config['output'], 'depth')
255 |     if not os.path.exists(depth_path):
256 |         os.makedirs(depth_path)
257 |     input_path = os.path.join(_config['output'], 'input')
258 |     if not os.path.exists(input_path):
259 |         os.makedirs(input_path)
260 |     gt_path = os.path.join(_config['output'], 'gt')
261 |     if not os.path.exists(gt_path):
262 |         os.makedirs(gt_path)
263 |     if _config['out_fig_lg'] == 'EN':
264 |         results_path = os.path.join(_config['output'], 'results_en')
265 |     elif _config['out_fig_lg'] == 'CN':
266 |         results_path = os.path.join(_config['output'], 'results_cn')
267 |     if not os.path.exists(results_path):
268 |         os.makedirs(results_path)
269 |     pred_path = os.path.join(_config['output'], 'pred')
270 |     for it in range(len(weights)):
271 |         if not os.path.exists(os.path.join(pred_path, 'iteration_'+str(it+1))):
272 |             os.makedirs(os.path.join(pred_path, 'iteration_'+str(it+1)))
273 | 
274 |     # save pointcloud to the output path
275 |     pc_lidar_path = os.path.join(_config['output'], 'pointcloud', 'lidar')
276 |     if not os.path.exists(pc_lidar_path):
277 |         os.makedirs(pc_lidar_path)
278 |     pc_input_path = os.path.join(_config['output'], 'pointcloud', 'input')
279 |     if not os.path.exists(pc_input_path):
280 |         os.makedirs(pc_input_path)
281 |     pc_pred_path = os.path.join(_config['output'], 'pointcloud', 'pred')
282 |     if not os.path.exists(pc_pred_path):
283 |         os.makedirs(pc_pred_path)
284 | 
285 | 
286 |     errors_r = []
287 |     errors_t = []
288 |     errors_t2 = []
289 |     errors_xyz = []
290 |     errors_rpy = []
291 |     all_RTs = []
292 |     mis_calib_list = []
293 |     total_time = 0
294 | 
295 |     prev_tr_error = None
296 |     prev_rot_error = None
297 | 
298 |     for i in range(len(weights) + 1):
299 |         errors_r.append([])
300 |         errors_t.append([])
301 |         errors_t2.append([])
302 |         errors_rpy.append([])
303 | 
304 |     correct_samples = 0
305 |     wrong_samples = 0
306 |     network_results = open("./results"+"_"+str(_config['max_r'])+"_"+str(_config['max_t']), 'a')
307 |     network_results = csv.writer(network_results, delimiter=',')
308 |     network_results.writerow(['gt', 'predict'])    
309 |     for batch_idx, sample in enumerate(tqdm(TestImgLoader)):
310 |         N = 100 # 500
311 |         # if batch_idx > 200:
312 |         #    break
313 |         log_string = [str(batch_idx)]
314 | 
315 |         lidar_input = []
316 |         rgb_input = []
317 |         lidar_gt = []
318 |         shape_pad_input = []
319 |         real_shape_input = []
320 |         pc_rotated_input = []
321 |         RTs = []
322 |         shape_pad = [0, 0, 0, 0]
323 |         outlier_filter = False
324 | 
325 |         if batch_idx == 0 or not _config['use_prev_output']:
326 |             # Qui dare posizione di input del frame corrente rispetto alla GT
327 |             sample['tr_error'] = sample['tr_error'].cuda()
328 |             sample['rot_error'] = sample['rot_error'].cuda()
329 |         else:
330 |             sample['tr_error'] = prev_tr_error
331 |             sample['rot_error'] = prev_rot_error
332 |         sample['label'] = sample['label'].cuda()
333 |         for idx in range(len(sample['rgb'])):
334 |             # ProjectPointCloud in RT-pose
335 |             real_shape = [sample['rgb'][idx].shape[1], sample['rgb'][idx].shape[2], sample['rgb'][idx].shape[0]]
336 | 
337 |             sample['point_cloud'][idx] = sample['point_cloud'][idx].cuda()  # 变换到相机坐标系下的激光雷达点云
338 |             pc_lidar = sample['point_cloud'][idx].clone()
339 | 
340 |             if _config['max_depth'] < 80.:
341 |                 pc_lidar = pc_lidar[:, pc_lidar[0, :] < _config['max_depth']].clone()
342 | 
343 |             depth_gt, uv_gt, pc_gt_valid = lidar_project_depth(pc_lidar, sample['calib'][idx], real_shape)  # image_shape
344 |             depth_gt /= _config['max_depth']
345 | 
346 |             if _config['save_image']:
347 |                 # save the Lidar pointcloud
348 |                 pcl_lidar = o3.PointCloud()
349 |                 pc_lidar = pc_lidar.detach().cpu().numpy()
350 |                 pcl_lidar.points = o3.Vector3dVector(pc_lidar.T[:, :3])
351 | 
352 |                 # o3.draw_geometries(downpcd)
353 |                 o3.write_point_cloud(pc_lidar_path + '/{}.pcd'.format(batch_idx), pcl_lidar)
354 | 
355 | 
356 |             R = quat2mat(sample['rot_error'][idx])
357 |             T = tvector2mat(sample['tr_error'][idx])
358 |             RT_inv = torch.mm(T, R)
359 |             RT = RT_inv.clone().inverse()
360 | 
361 |             pc_rotated = rotate_back(sample['point_cloud'][idx], RT_inv)  # Pc` = RT * Pc
362 | 
363 |             if _config['max_depth'] < 80.:
364 |                 pc_rotated = pc_rotated[:, pc_rotated[0, :] < _config['max_depth']].clone()
365 | 
366 |             depth_img, uv_input, pc_input_valid = lidar_project_depth(pc_rotated, sample['calib'][idx], real_shape)  # image_shape
367 |             depth_img /= _config['max_depth']
368 | 
369 |             if _config['outlier_filter'] and uv_input.shape[0] <= _config['outlier_filter_th']:
370 |                 outlier_filter = True
371 |             else:
372 |                 outlier_filter = False
373 | 
374 |             if _config['save_image']:
375 |                 # save the RGB input pointcloud
376 |                 img = cv2.imread(sample['img_path'][0])
377 |                 R = img[uv_input[:, 1], uv_input[:, 0], 0] / 255
378 |                 G = img[uv_input[:, 1], uv_input[:, 0], 1] / 255
379 |                 B = img[uv_input[:, 1], uv_input[:, 0], 2] / 255
380 |                 pcl_input = o3.PointCloud()
381 |                 pcl_input.points = o3.Vector3dVector(pc_input_valid[:, :3])
382 |                 pcl_input.colors = o3.Vector3dVector(np.vstack((R, G, B)).T)
383 | 
384 |                 # o3.draw_geometries(downpcd)
385 |                 o3.write_point_cloud(pc_input_path + '/{}.pcd'.format(batch_idx), pcl_input)
386 | 
387 |             # PAD ONLY ON RIGHT AND BOTTOM SIDE
388 |             rgb = sample['rgb'][idx].cuda()
389 |             shape_pad = [0, 0, 0, 0]
390 | 
391 |             shape_pad[3] = (img_shape[0] - rgb.shape[1])  # // 2
392 |             shape_pad[1] = (img_shape[1] - rgb.shape[2])  # // 2 + 1
393 | 
394 |             rgb = F.pad(rgb, shape_pad)
395 |             depth_img = F.pad(depth_img, shape_pad)
396 |             depth_gt = F.pad(depth_gt, shape_pad)
397 | 
398 |             rgb_input.append(rgb)
399 |             lidar_input.append(depth_img)
400 |             lidar_gt.append(depth_gt)
401 |             real_shape_input.append(real_shape)
402 |             shape_pad_input.append(shape_pad)
403 |             pc_rotated_input.append(pc_rotated)
404 |             RTs.append(RT)
405 | 
406 |         if outlier_filter:
407 |             continue
408 | 
409 |         lidar_input = torch.stack(lidar_input)
410 |         rgb_input = torch.stack(rgb_input)
411 |         rgb_resize = F.interpolate(rgb_input, size=[256, 512], mode="bilinear")
412 |         lidar_resize = F.interpolate(lidar_input, size=[256, 512], mode="bilinear")
413 | 
414 | 
415 |         if _config['save_image']:
416 |             out0 = overlay_imgs(rgb_input[0], lidar_input)
417 |             out0 = out0[:376, :1241, :]
418 |             cv2.imwrite(os.path.join(input_path, sample['rgb_name'][0]), out0[:, :, [2, 1, 0]]*255)
419 |             out1 = overlay_imgs(rgb_input[0], lidar_gt[0].unsqueeze(0))
420 |             out1 = out1[:376, :1241, :]
421 |             cv2.imwrite(os.path.join(gt_path, sample['rgb_name'][0]), out1[:, :, [2, 1, 0]]*255)
422 | 
423 |             depth_img = depth_img.detach().cpu().numpy()
424 |             depth_img = (depth_img / np.max(depth_img)) * 255
425 |             cv2.imwrite(os.path.join(depth_path, sample['rgb_name'][0]), depth_img[0, :376, :1241])
426 | 
427 |         if show:
428 |             out0 = overlay_imgs(rgb_input[0], lidar_input)
429 |             out1 = overlay_imgs(rgb_input[0], lidar_gt[0].unsqueeze(0))
430 |             cv2.imshow("INPUT", out0[:, :, [2, 1, 0]])
431 |             cv2.imshow("GT", out1[:, :, [2, 1, 0]])
432 |             cv2.waitKey(1)
433 | 
434 |         rgb = rgb_input.to(device)
435 |         lidar = lidar_input.to(device)
436 |         rgb_resize = rgb_resize.to(device)
437 |         lidar_resize = lidar_resize.to(device)
438 | 
439 |         target_transl = sample['tr_error'].to(device)
440 |         target_rot = sample['rot_error'].to(device)
441 | 
442 |         # the initial calibration errors before sensor calibration
443 |         RT1 = RTs[0]
444 |         mis_calib = torch.stack(sample['initial_RT'])[1:]
445 |         mis_calib_list.append(mis_calib)
446 | 
447 |         T_composed = RT1[:3, 3]
448 |         R_composed = quaternion_from_matrix(RT1)
449 |         errors_t[0].append(T_composed.norm().item())
450 |         errors_t2[0].append(T_composed)
451 |         errors_r[0].append(quaternion_distance(R_composed.unsqueeze(0),
452 |                                                torch.tensor([1., 0., 0., 0.], device=R_composed.device).unsqueeze(0),
453 |                                                R_composed.device))
454 |         rpy_error = mat2xyzrpy(RT1)[3:]
455 | 
456 |         rpy_error *= (180.0 / 3.141592)
457 |         errors_rpy[0].append(rpy_error)
458 |         log_string += [str(errors_t[0][-1]), str(errors_r[0][-1]), str(errors_t2[0][-1][0].item()),
459 |                        str(errors_t2[0][-1][1].item()), str(errors_t2[0][-1][2].item()),
460 |                        str(errors_rpy[0][-1][0].item()), str(errors_rpy[0][-1][1].item()),
461 |                        str(errors_rpy[0][-1][2].item())]
462 | 
463 |         start = 0
464 |         # t1 = time.time()
465 | 
466 |         # Run model
467 |         with torch.no_grad():
468 |             for iteration in range(start, len(weights)):
469 |                 # Run the i-th network
470 |                 t1 = time.time()
471 |                 classification_results = models[iteration](rgb_resize, lidar_resize)
472 |                 run_time = time.time() - t1
473 |                 _, classify_reaults = torch.max(classification_results,dim=1)
474 |                 statistic_results = torch.eq(classify_reaults,sample['label'].long())
475 |                 correct_samples += torch.sum(statistic_results, dim=0)
476 |                 wrong_samples += statistic_results.shape[0] - torch.sum(statistic_results, dim=0)
477 |                 for i in range(classify_reaults.shape[0]):
478 |                     network_results.writerow([str(int(sample['label'][i].cpu().numpy())), str(classify_reaults[i].cpu().numpy())])
479 |                 
480 |                 R_predicted = torch.tensor([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]]).float().cuda()
481 |                 T_predicted = torch.tensor([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]]).float().cuda()
482 |                 RT_predicted = torch.mm(T_predicted, R_predicted)
483 |                 RTs.append(torch.mm(RTs[iteration], RT_predicted)) # inv(H_gt)*H_pred_1*H_pred_2*.....H_pred_n
484 |                 if iteration == 0:
485 |                     rotated_point_cloud = pc_rotated_input[0]
486 |                 else:
487 |                     rotated_point_cloud = rotated_point_cloud
488 | 
489 |                 rotated_point_cloud = rotate_forward(rotated_point_cloud, RT_predicted) # H_pred*X_init
490 | 
491 |                 depth_img_pred, uv_pred, pc_pred_valid = lidar_project_depth(rotated_point_cloud, sample['calib'][0], real_shape_input[0]) # image_shape
492 |                 depth_img_pred /= _config['max_depth']
493 |                 depth_pred = F.pad(depth_img_pred, shape_pad_input[0])
494 |                 lidar = depth_pred.unsqueeze(0)
495 |                 lidar_resize = F.interpolate(lidar, size=[256, 512], mode="bilinear")
496 | 
497 |                 if iteration == len(weights)-1 and _config['save_image']:
498 |                     # save the RGB pointcloud
499 |                     img = cv2.imread(sample['img_path'][0])
500 |                     R = img[uv_pred[:, 1], uv_pred[:, 0], 0] / 255
501 |                     G = img[uv_pred[:, 1], uv_pred[:, 0], 1] / 255
502 |                     B = img[uv_pred[:, 1], uv_pred[:, 0], 2] / 255
503 |                     pcl_pred = o3.PointCloud()
504 |                     pcl_pred.points = o3.Vector3dVector(pc_pred_valid[:, :3])
505 |                     pcl_pred.colors = o3.Vector3dVector(np.vstack((R, G, B)).T)
506 | 
507 |                     # o3.draw_geometries(downpcd)
508 |                     o3.write_point_cloud(pc_pred_path + '/{}.pcd'.format(batch_idx), pcl_pred)
509 | 
510 | 
511 |                 if _config['save_image']:
512 |                     out2 = overlay_imgs(rgb_input[0], lidar)
513 |                     out2 = out2[:376, :1241, :]
514 |                     cv2.imwrite(os.path.join(os.path.join(pred_path, 'iteration_'+str(iteration+1)),
515 |                                              sample['rgb_name'][0]), out2[:, :, [2, 1, 0]]*255)
516 |                 if show:
517 |                     out2 = overlay_imgs(rgb_input[0], lidar)
518 |                     cv2.imshow(f'Pred_Iter_{iteration}', out2[:, :, [2, 1, 0]])
519 |                     cv2.waitKey(1)
520 | 
521 |                 # inv(H_init)*H_pred
522 |                 T_composed = RTs[iteration + 1][:3, 3]
523 |                 R_composed = quaternion_from_matrix(RTs[iteration + 1])
524 |                 errors_t[iteration + 1].append(T_composed.norm().item())
525 |                 errors_t2[iteration + 1].append(T_composed)
526 |                 errors_r[iteration + 1].append(quaternion_distance(R_composed.unsqueeze(0),
527 |                                                                    torch.tensor([1., 0., 0., 0.], device=R_composed.device).unsqueeze(0),
528 |                                                                    R_composed.device))
529 | 
530 |                 # rpy_error = quaternion_to_tait_bryan(R_composed)
531 |                 rpy_error = mat2xyzrpy(RTs[iteration + 1])[3:]
532 |                 rpy_error *= (180.0 / 3.141592)
533 |                 errors_rpy[iteration + 1].append(rpy_error)
534 |                 log_string += [str(errors_t[iteration + 1][-1]), str(errors_r[iteration + 1][-1]),
535 |                                str(errors_t2[iteration + 1][-1][0].item()), str(errors_t2[iteration + 1][-1][1].item()),
536 |                                str(errors_t2[iteration + 1][-1][2].item()), str(errors_rpy[iteration + 1][-1][0].item()),
537 |                                str(errors_rpy[iteration + 1][-1][1].item()), str(errors_rpy[iteration + 1][-1][2].item())]
538 | 
539 |         # run_time = time.time() - t1
540 |         total_time += run_time
541 | 
542 |         # final calibration error
543 |         all_RTs.append(RTs[-1])
544 |         prev_RT = RTs[-1].inverse()
545 |         prev_tr_error = prev_RT[:3, 3].unsqueeze(0)
546 |         prev_rot_error = quaternion_from_matrix(prev_RT).unsqueeze(0)
547 | 
548 |         if _config['save_log']:
549 |             log_file.writerow(log_string)
550 | 
551 |     # Yaw（偏航）：欧拉角向量的y轴
552 |     # Pitch（俯仰）：欧拉角向量的x轴
553 |     # Roll（翻滚）： 欧拉角向量的z轴
554 |     # mis_calib_input[transl_x, transl_y, transl_z, rotx, roty, rotz] Nx6
555 |     mis_calib_input = torch.stack(mis_calib_list)[:, :, 0]
556 | 
557 |     if _config['save_log']:
558 |         log_file.close()
559 |     print("Iterative refinement: ")
560 |     for i in range(len(weights) + 1):
561 |         errors_r[i] = torch.tensor(errors_r[i]).abs() * (180.0 / 3.141592)
562 |         errors_t[i] = torch.tensor(errors_t[i]).abs() * 100
563 | 
564 |         for k in range(len(errors_rpy[i])):
565 |             # errors_rpy[i][k] = torch.tensor(errors_rpy[i][k])
566 |             # errors_t2[i][k] = torch.tensor(errors_t2[i][k]) * 100
567 |             errors_rpy[i][k] = errors_rpy[i][k].clone().detach().abs()
568 |             errors_t2[i][k] = errors_t2[i][k].clone().detach().abs() * 100
569 | 
570 |         print(f"Iteration {i}: \tMean Translation Error: {errors_t[i].mean():.4f} cm "
571 |               f"     Mean Rotation Error: {errors_r[i].mean():.4f} °")
572 |         print(f"Iteration {i}: \tMedian Translation Error: {errors_t[i].median():.4f} cm "
573 |               f"     Median Rotation Error: {errors_r[i].median():.4f} °")
574 |         print(f"Iteration {i}: \tStd. Translation Error: {errors_t[i].std():.4f} cm "
575 |               f"     Std. Rotation Error: {errors_r[i].std():.4f} °\n")
576 | 
577 |         # translation xyz
578 |         print(f"Iteration {i}: \tMean Translation X Error: {errors_t2[i][0].mean():.4f} cm "
579 |               f"     Median Translation X Error: {errors_t2[i][0].median():.4f} cm "
580 |               f"     Std. Translation X Error: {errors_t2[i][0].std():.4f} cm ")
581 |         print(f"Iteration {i}: \tMean Translation Y Error: {errors_t2[i][1].mean():.4f} cm "
582 |               f"     Median Translation Y Error: {errors_t2[i][1].median():.4f} cm "
583 |               f"     Std. Translation Y Error: {errors_t2[i][1].std():.4f} cm ")
584 |         print(f"Iteration {i}: \tMean Translation Z Error: {errors_t2[i][2].mean():.4f} cm "
585 |               f"     Median Translation Z Error: {errors_t2[i][2].median():.4f} cm "
586 |               f"     Std. Translation Z Error: {errors_t2[i][2].std():.4f} cm \n")
587 | 
588 |         # rotation rpy
589 |         print(f"Iteration {i}: \tMean Rotation Roll Error: {errors_rpy[i][0].mean(): .4f} °"
590 |               f"     Median Rotation Roll Error: {errors_rpy[i][0].median():.4f} °"
591 |               f"     Std. Rotation Roll Error: {errors_rpy[i][0].std():.4f} °")
592 |         print(f"Iteration {i}: \tMean Rotation Pitch Error: {errors_rpy[i][1].mean(): .4f} °"
593 |               f"     Median Rotation Pitch Error: {errors_rpy[i][1].median():.4f} °"
594 |               f"     Std. Rotation Pitch Error: {errors_rpy[i][1].std():.4f} °")
595 |         print(f"Iteration {i}: \tMean Rotation Yaw Error: {errors_rpy[i][2].mean(): .4f} °"
596 |               f"     Median Rotation Yaw Error: {errors_rpy[i][2].median():.4f} °"
597 |               f"     Std. Rotation Yaw Error: {errors_rpy[i][2].std():.4f} °\n")
598 | 
599 | 
600 |         with open(os.path.join(_config['output'], 'results.txt'),
601 |                   'a', encoding='utf-8') as f:
602 |             f.write(f"Iteration {i}: \n")
603 |             f.write("Translation Error && Rotation Error:\n")
604 |             f.write(f"Iteration {i}: \tMean Translation Error: {errors_t[i].mean():.4f} cm "
605 |                     f"     Mean Rotation Error: {errors_r[i].mean():.4f} °\n")
606 |             f.write(f"Iteration {i}: \tMedian Translation Error: {errors_t[i].median():.4f} cm "
607 |                     f"     Median Rotation Error: {errors_r[i].median():.4f} °\n")
608 |             f.write(f"Iteration {i}: \tStd. Translation Error: {errors_t[i].std():.4f} cm "
609 |                     f"     Std. Rotation Error: {errors_r[i].std():.4f} °\n\n")
610 | 
611 |             # translation xyz
612 |             f.write("Translation Error XYZ:\n")
613 |             f.write(f"Iteration {i}: \tMean Translation X Error: {errors_t2[i][0].mean():.4f} cm "
614 |                     f"     Median Translation X Error: {errors_t2[i][0].median():.4f} cm "
615 |                     f"     Std. Translation X Error: {errors_t2[i][0].std():.4f} cm \n")
616 |             f.write(f"Iteration {i}: \tMean Translation Y Error: {errors_t2[i][1].mean():.4f} cm "
617 |                     f"     Median Translation Y Error: {errors_t2[i][1].median():.4f} cm "
618 |                     f"     Std. Translation Y Error: {errors_t2[i][1].std():.4f} cm \n")
619 |             f.write(f"Iteration {i}: \tMean Translation Z Error: {errors_t2[i][2].mean():.4f} cm "
620 |                     f"     Median Translation Z Error: {errors_t2[i][2].median():.4f} cm "
621 |                     f"     Std. Translation Z Error: {errors_t2[i][2].std():.4f} cm \n\n")
622 | 
623 |             # rotation rpy
624 |             f.write("Rotation Error RPY:\n")
625 |             f.write(f"Iteration {i}: \tMean Rotation Roll Error: {errors_rpy[i][0].mean(): .4f} °"
626 |                     f"     Median Rotation Roll Error: {errors_rpy[i][0].median():.4f} °"
627 |                     f"     Std. Rotation Roll Error: {errors_rpy[i][0].std():.4f} °\n")
628 |             f.write(f"Iteration {i}: \tMean Rotation Pitch Error: {errors_rpy[i][1].mean(): .4f} °"
629 |                     f"     Median Rotation Pitch Error: {errors_rpy[i][1].median():.4f} °"
630 |                     f"     Std. Rotation Pitch Error: {errors_rpy[i][1].std():.4f} °\n")
631 |             f.write(f"Iteration {i}: \tMean Rotation Yaw Error: {errors_rpy[i][2].mean(): .4f} °"
632 |                     f"     Median Rotation Yaw Error: {errors_rpy[i][2].median():.4f} °"
633 |                     f"     Std. Rotation Yaw Error: {errors_rpy[i][2].std():.4f} °\n\n\n")
634 | 
635 |     for i in range(len(errors_t2)):
636 |         errors_t2[i] = torch.stack(errors_t2[i]).abs() / 100
637 |         errors_rpy[i] = torch.stack(errors_rpy[i]).abs()
638 | 
639 |     plot_x = np.zeros((mis_calib_input.shape[0], 2))
640 |     plot_x[:, 0] = mis_calib_input[:, 0].cpu().numpy()
641 |     plot_x[:, 1] = errors_t2[-1][:, 0].cpu().numpy()
642 |     plot_x = plot_x[np.lexsort(plot_x[:, ::-1].T)]
643 | 
644 |     plot_y = np.zeros((mis_calib_input.shape[0], 2))
645 |     plot_y[:, 0] = mis_calib_input[:, 1].cpu().numpy()
646 |     plot_y[:, 1] = errors_t2[-1][:, 1].cpu().numpy()
647 |     plot_y = plot_y[np.lexsort(plot_y[:, ::-1].T)]
648 | 
649 |     plot_z = np.zeros((mis_calib_input.shape[0], 2))
650 |     plot_z[:, 0] = mis_calib_input[:, 2].cpu().numpy()
651 |     plot_z[:, 1] = errors_t2[-1][:, 2].cpu().numpy()
652 |     plot_z = plot_z[np.lexsort(plot_z[:, ::-1].T)]
653 | 
654 |     N_interval = plot_x.shape[0] // N
655 |     plot_x = plot_x[::N_interval]
656 |     plot_y = plot_y[::N_interval]
657 |     plot_z = plot_z[::N_interval]
658 | 
659 |     plt.plot(plot_x[:, 0], plot_x[:, 1], c='red', label='X')
660 |     plt.plot(plot_y[:, 0], plot_y[:, 1], c='blue', label='Y')
661 |     plt.plot(plot_z[:, 0], plot_z[:, 1], c='green', label='Z')
662 |     # plt.legend(loc='best')
663 | 
664 |     if _config['out_fig_lg'] == 'EN':
665 |         plt.xlabel('Miscalibration (m)', font_EN)
666 |         plt.ylabel('Absolute Error (m)', font_EN)
667 |         plt.legend(loc='best', prop=font_EN)
668 |     elif _config['out_fig_lg'] == 'CN':
669 |         plt.xlabel('初始标定外参偏差/米', font_CN)
670 |         plt.ylabel('绝对误差/米', font_CN)
671 |         plt.legend(loc='best', prop=font_CN)
672 | 
673 |     plt.xticks(fontproperties='Times New Roman', size=plt_size)
674 |     plt.yticks(fontproperties='Times New Roman', size=plt_size)
675 | 
676 |     plt.savefig(os.path.join(results_path, 'xyz_plot.png'))
677 |     plt.close('all')
678 | 
679 |     errors_t = errors_t[-1].numpy()
680 |     errors_t = np.sort(errors_t, axis=0)[:-10] # 去掉一些异常值
681 |     # plt.title('Calibration Translation Error Distribution')
682 |     plt.hist(errors_t / 100, bins=50)
683 |     # ax = plt.gca()
684 |     # ax.set_xlabel('Absolute Translation Error (m)')
685 |     # ax.set_ylabel('Number of instances')
686 |     # ax.set_xticks([0.00, 0.25, 0.00, 0.25, 0.50])
687 | 
688 |     if _config['out_fig_lg'] == 'EN':
689 |         plt.xlabel('Absolute Translation Error (m)', font_EN)
690 |         plt.ylabel('Number of instances', font_EN)
691 |     elif _config['out_fig_lg'] == 'CN':
692 |         plt.xlabel('绝对平移误差/米', font_CN)
693 |         plt.ylabel('实验序列数目/个', font_CN)
694 |     plt.xticks(fontproperties='Times New Roman', size=plt_size)
695 |     plt.yticks(fontproperties='Times New Roman', size=plt_size)
696 | 
697 |     plt.savefig(os.path.join(results_path, 'translation_error_distribution.png'))
698 |     plt.close('all')
699 | 
700 |     # rotation error
701 |     # fig = plt.figure(figsize=(6, 3))  # 设置图大小 figsize=(6,3)
702 |     # plt.title('Calibration Rotation Error')
703 |     plot_pitch = np.zeros((mis_calib_input.shape[0], 2))
704 |     plot_pitch[:, 0] = mis_calib_input[:, 3].cpu().numpy() * (180.0 / 3.141592)
705 |     plot_pitch[:, 1] = errors_rpy[-1][:, 1].cpu().numpy()
706 |     plot_pitch = plot_pitch[np.lexsort(plot_pitch[:, ::-1].T)]
707 | 
708 |     plot_yaw = np.zeros((mis_calib_input.shape[0], 2))
709 |     plot_yaw[:, 0] = mis_calib_input[:, 4].cpu().numpy() * (180.0 / 3.141592)
710 |     plot_yaw[:, 1] = errors_rpy[-1][:, 2].cpu().numpy()
711 |     plot_yaw = plot_yaw[np.lexsort(plot_yaw[:, ::-1].T)]
712 | 
713 |     plot_roll = np.zeros((mis_calib_input.shape[0], 2))
714 |     plot_roll[:, 0] = mis_calib_input[:, 5].cpu().numpy() * (180.0 / 3.141592)
715 |     plot_roll[:, 1] = errors_rpy[-1][:, 0].cpu().numpy()
716 |     plot_roll = plot_roll[np.lexsort(plot_roll[:, ::-1].T)]
717 | 
718 |     N_interval = plot_roll.shape[0] // N
719 |     plot_pitch = plot_pitch[::N_interval]
720 |     plot_yaw = plot_yaw[::N_interval]
721 |     plot_roll = plot_roll[::N_interval]
722 | 
723 |     # Yaw（偏航）：欧拉角向量的y轴
724 |     # Pitch（俯仰）：欧拉角向量的x轴
725 |     # Roll（翻滚）： 欧拉角向量的z轴
726 | 
727 |     if _config['out_fig_lg'] == 'EN':
728 |         plt.plot(plot_yaw[:, 0], plot_yaw[:, 1], c='red', label='Yaw(Y)')
729 |         plt.plot(plot_pitch[:, 0], plot_pitch[:, 1], c='blue', label='Pitch(X)')
730 |         plt.plot(plot_roll[:, 0], plot_roll[:, 1], c='green', label='Roll(Z)')
731 |         plt.xlabel('Miscalibration (°)', font_EN)
732 |         plt.ylabel('Absolute Error (°)', font_EN)
733 |         plt.legend(loc='best', prop=font_EN)
734 |     elif _config['out_fig_lg'] == 'CN':
735 |         plt.plot(plot_yaw[:, 0], plot_yaw[:, 1], c='red', label='偏航角')
736 |         plt.plot(plot_pitch[:, 0], plot_pitch[:, 1], c='blue', label='俯仰角')
737 |         plt.plot(plot_roll[:, 0], plot_roll[:, 1], c='green', label='翻滚角')
738 |         plt.xlabel('初始标定外参偏差/度', font_CN)
739 |         plt.ylabel('绝对误差/度', font_CN)
740 |         plt.legend(loc='best', prop=font_CN)
741 | 
742 |     plt.xticks(fontproperties='Times New Roman', size=plt_size)
743 |     plt.yticks(fontproperties='Times New Roman', size=plt_size)
744 |     plt.savefig(os.path.join(results_path, 'rpy_plot.png'))
745 |     plt.close('all')
746 | 
747 |     errors_r = errors_r[-1].numpy()
748 |     errors_r = np.sort(errors_r, axis=0)[:-10] # 去掉一些异常值
749 |     # np.savetxt('rot_error.txt', arr_, fmt='%0.8f')
750 |     # print('max rotation_error: {}'.format(max(errors_r)))
751 |     # plt.title('Calibration Rotation Error Distribution')
752 |     plt.hist(errors_r, bins=50)
753 |     #plt.xlim([0, 1.5])  # x轴边界
754 |     #plt.xticks([0.0, 0.3, 0.6, 0.9, 1.2, 1.5])  # 设置x刻度
755 |     # ax = plt.gca()
756 | 
757 |     if _config['out_fig_lg'] == 'EN':
758 |         plt.xlabel('Absolute Rotation Error (°)', font_EN)
759 |         plt.ylabel('Number of instances', font_EN)
760 |     elif _config['out_fig_lg'] == 'CN':
761 |         plt.xlabel('绝对旋转误差/度', font_CN)
762 |         plt.ylabel('实验序列数目/个', font_CN)
763 |     plt.xticks(fontproperties='Times New Roman', size=plt_size)
764 |     plt.yticks(fontproperties='Times New Roman', size=plt_size)
765 |     plt.savefig(os.path.join(results_path, 'rotation_error_distribution.png'))
766 |     plt.close('all')
767 | 
768 |     print("total accuracy:",float(correct_samples)/float(correct_samples + wrong_samples))
769 |     if _config["save_name"] is not None:
770 |         torch.save(torch.stack(errors_t).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_t')
771 |         torch.save(torch.stack(errors_r).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_r')
772 |         torch.save(torch.stack(errors_t2).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_t2')
773 |         torch.save(torch.stack(errors_rpy).cpu().numpy(), f'./results_for_paper/{_config["save_name"]}_errors_rpy')
774 | 
775 |     avg_time = total_time / len(TestImgLoader)
776 |     print("average runing time on {} iteration: {} s".format(len(weights), avg_time))
777 |     print("End!")
778 | 
779 | 
780 | 


--------------------------------------------------------------------------------
/models/LCCNet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Original implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018
  3 | Jinwei Gu and Zhile Ren
  4 | Modified version (CMRNet) by Daniele Cattaneo
  5 | Modified version (LCCNet) by Xudong Lv
  6 | Modified version (LCCNet) by Pengjin Wei
  7 | """
  8 | 
  9 | import torch
 10 | import torchvision
 11 | import torchvision.transforms as transforms
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | from torch.autograd import Variable
 15 | import torchvision.models as models
 16 | import torch.utils.model_zoo as model_zoo
 17 | #from models.CMRNet.modules.attention import *
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | import torch.optim as optim
 21 | import math
 22 | import argparse
 23 | import os
 24 | import os.path
 25 | import matplotlib.pyplot as plt
 26 | import matplotlib.image as mpimg
 27 | from PIL import Image
 28 | 
 29 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
 30 | 
 31 | # from .networks.submodules import *
 32 | # from .networks.correlation_package.correlation import Correlation
 33 | from models.correlation_package.correlation import Correlation
 34 | 
 35 | 
 36 | 
 37 | # __all__ = [
 38 | #     'calib_net'
 39 | # ]
 40 | 
 41 | 
 42 | class BasicBlock(nn.Module):
 43 |     expansion = 1
 44 | 
 45 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 46 |         super(BasicBlock, self).__init__()
 47 |         self.conv1 = conv3x3(inplanes, planes, stride)
 48 |         self.bn1 = nn.BatchNorm2d(planes)
 49 |         self.relu = nn.ReLU(inplace=True)
 50 |         self.conv2 = conv3x3(planes, planes)
 51 |         self.bn2 = nn.BatchNorm2d(planes)
 52 |         self.downsample = downsample
 53 |         self.stride = stride
 54 | 
 55 |     def forward(self, x):
 56 |         identity = x
 57 | 
 58 |         out = self.conv1(x)
 59 |         out = self.bn1(out)
 60 |         out = self.relu(out)
 61 | 
 62 |         out = self.conv2(out)
 63 |         out = self.bn2(out)
 64 | 
 65 |         if self.downsample is not None:
 66 |             identity = self.downsample(x)
 67 | 
 68 |         out += identity
 69 |         out = self.relu(out)
 70 | 
 71 |         return out
 72 | 
 73 | 
 74 | class Bottleneck(nn.Module):
 75 |     expansion = 4
 76 | 
 77 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 78 |         super(Bottleneck, self).__init__()
 79 |         self.conv1 = conv1x1(inplanes, planes)
 80 |         self.bn1 = nn.BatchNorm2d(planes)
 81 |         self.conv2 = conv3x3(planes, planes, stride)
 82 |         self.bn2 = nn.BatchNorm2d(planes)
 83 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 84 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 85 |         self.elu = nn.ELU()
 86 |         self.leakyRELU = nn.LeakyReLU(0.1)
 87 |         self.downsample = downsample
 88 |         self.stride = stride
 89 | 
 90 |     def forward(self, x):
 91 |         identity = x
 92 | 
 93 |         out = self.conv1(x)
 94 |         out = self.bn1(out)
 95 |         out = self.leakyRELU(out)
 96 | 
 97 |         out = self.conv2(out)
 98 |         out = self.bn2(out)
 99 |         out = self.leakyRELU(out)
100 | 
101 |         out = self.conv3(out)
102 |         out = self.bn3(out)
103 | 
104 |         if self.downsample is not None:
105 |             identity = self.downsample(x)
106 | 
107 |         out += identity
108 |         out = self.leakyRELU(out)
109 | 
110 |         return out
111 | 
112 | 
113 | class SEBottleneck(nn.Module):
114 |     expansion = 4
115 |     __constants__ = ['downsample']
116 | 
117 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
118 |                  base_width=64, dilation=1, norm_layer=None, reduction=16):
119 |         super(SEBottleneck, self).__init__()
120 |         if norm_layer is None:
121 |             norm_layer = nn.BatchNorm2d
122 |         width = int(planes * (base_width / 64.)) * groups
123 |         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
124 |         self.conv1 = conv1x1(inplanes, width)
125 |         self.bn1 = norm_layer(width)
126 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
127 |         self.bn2 = norm_layer(width)
128 |         self.conv3 = conv1x1(width, planes * self.expansion)
129 |         self.bn3 = norm_layer(planes * self.expansion)
130 |         self.relu = nn.ReLU(inplace=True)
131 |         self.leakyRELU = nn.LeakyReLU(0.1)
132 |         # self.attention = SCSElayer(planes * self.expansion, ratio=reduction)
133 |         # self.attention = SElayer(planes * self.expansion, ratio=reduction)
134 |         # self.attention = ECAlayer(planes * self.expansion, ratio=reduction)
135 |         # self.attention = SElayer_conv(planes * self.expansion, ratio=reduction)
136 |         # self.attention = SCSElayer(planes * self.expansion, ratio=reduction)
137 |         # self.attention = ModifiedSCSElayer(planes * self.expansion, ratio=reduction)
138 |         # self.attention = DPCSAlayer(planes * self.expansion, ratio=reduction)
139 |         # self.attention = PAlayer(planes * self.expansion, ratio=reduction)
140 |         # self.attention = CAlayer(planes * self.expansion, ratio=reduction)
141 |         self.downsample = downsample
142 |         self.stride = stride
143 | 
144 |     def forward(self, x):
145 |         identity = x
146 | 
147 |         out = self.conv1(x)
148 |         out = self.bn1(out)
149 |         out = self.leakyRELU(out)
150 | 
151 |         out = self.conv2(out)
152 |         out = self.bn2(out)
153 |         out = self.leakyRELU(out)
154 | 
155 |         out = self.conv3(out)
156 |         out = self.bn3(out)
157 |         out = self.attention(out)
158 | 
159 |         if self.downsample is not None:
160 |             identity = self.downsample(x)
161 | 
162 |         out += identity
163 |         out = self.leakyRELU(out)
164 | 
165 |         return out
166 | 
167 | 
168 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
169 |     """3x3 convolution with padding"""
170 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
171 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
172 | 
173 | 
174 | def conv1x1(in_planes, out_planes, stride=1):
175 |     """1x1 convolution"""
176 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
177 | 
178 | 
179 | def myconv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
180 |     return nn.Sequential(
181 |         nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation,
182 |                   groups=1, bias=True),
183 |         nn.LeakyReLU(0.1))
184 | 
185 | 
186 | def predict_flow(in_planes):
187 |     return nn.Conv2d(in_planes, 2, kernel_size=3, stride=1, padding=1, bias=True)
188 | 
189 | 
190 | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
191 |     return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)
192 | 
193 | 
194 | class ResnetEncoder(nn.Module):
195 |     """Pytorch module for a resnet encoder
196 |     """
197 |     def __init__(self, num_layers, pretrained, num_input_images=1):
198 |         super(ResnetEncoder, self).__init__()
199 | 
200 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
201 | 
202 |         resnets = {18: models.resnet18,
203 |                    34: models.resnet34,
204 |                    50: models.resnet50,
205 |                    101: models.resnet101,
206 |                    152: models.resnet152}
207 | 
208 |         if num_layers not in resnets:
209 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
210 | 
211 |         self.encoder = resnets[num_layers](pretrained)
212 | 
213 |         if num_layers > 34:
214 |             self.num_ch_enc[1:] *= 4
215 | 
216 |     def forward(self, input_image):
217 |         self.features = []
218 |         x = (input_image - 0.45) / 0.225
219 |         x = self.encoder.conv1(x)
220 |         x = self.encoder.bn1(x)
221 |         self.features.append(self.encoder.relu(x))
222 |         # self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
223 |         self.features.append(self.encoder.maxpool(self.features[-1]))
224 |         self.features.append(self.encoder.layer1(self.features[-1]))
225 |         self.features.append(self.encoder.layer2(self.features[-1]))
226 |         self.features.append(self.encoder.layer3(self.features[-1]))
227 |         self.features.append(self.encoder.layer4(self.features[-1]))
228 | 
229 |         return self.features
230 | 
231 | 
232 | class LCCNet(nn.Module):
233 |     """
234 |     Based on the PWC-DC net. add resnet encoder, dilation convolution and densenet connections
235 |     """
236 | 
237 |     def __init__(self, image_size, use_feat_from=1, md=4, use_reflectance=False, dropout=0.0,
238 |                  Action_Func='leakyrelu', attention=False, res_num=18):
239 |         """
240 |         input: md --- maximum displacement (for correlation. default: 4), after warpping
241 |         """
242 |         super(LCCNet, self).__init__()
243 |         input_lidar = 1
244 |         self.res_num = res_num
245 |         self.use_feat_from = use_feat_from
246 |         if use_reflectance:
247 |             input_lidar = 2
248 | 
249 |         # original resnet
250 |         self.pretrained_encoder = False
251 |         self.net_encoder = ResnetEncoder(num_layers=self.res_num, pretrained=True, num_input_images=1)
252 | 
253 |         # resnet with leakyRELU
254 |         self.Action_Func = Action_Func
255 |         self.attention = attention
256 |         self.inplanes = 4
257 |         if self.res_num == 50:
258 |             layers = [3, 4, 6, 3]
259 |             add_list = [1024, 512, 256, 64]
260 |         elif self.res_num == 18:
261 |             layers = [2, 2, 2, 2]
262 |             add_list = [256, 128, 64, 64]
263 | 
264 |         if self.attention:
265 |             block = SEBottleneck
266 |         else:
267 |             if self.res_num == 50:
268 |                 block = Bottleneck
269 |             elif self.res_num == 18:
270 |                 block = BasicBlock
271 | 
272 | 
273 |         # rgb_image
274 |         self.conv1_rgb = nn.Conv2d(3, 4, kernel_size=7, stride=2, padding=3)
275 |         self.elu_rgb = nn.ELU()
276 |         self.leakyRELU_rgb = nn.LeakyReLU(0.1)
277 |         self.maxpool_rgb = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
278 |         self.layer1_rgb = self._make_layer(block, 4, layers[0])
279 |         self.layer2_rgb = self._make_layer(block, 8, layers[1], stride=2)
280 |         self.layer3_rgb = self._make_layer(block, 16, layers[2], stride=2)
281 |         self.layer4_rgb = self._make_layer(block, 32, layers[3], stride=2)
282 | 
283 |         # lidar_image
284 |         self.inplanes = 4
285 |         self.conv1_lidar = nn.Conv2d(input_lidar, 4, kernel_size=7, stride=2, padding=3)
286 |         self.elu_lidar = nn.ELU()
287 |         self.leakyRELU_lidar = nn.LeakyReLU(0.1)
288 |         self.maxpool_lidar = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
289 |         self.layer1_lidar = self._make_layer(block, 4, layers[0])
290 |         self.layer2_lidar = self._make_layer(block, 8, layers[1], stride=2)
291 |         self.layer3_lidar = self._make_layer(block, 16, layers[2], stride=2)
292 |         self.layer4_lidar = self._make_layer(block, 32, layers[3], stride=2)
293 | 
294 |         self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
295 |         self.leakyRELU = nn.LeakyReLU(0.1)
296 | 
297 |         nd = (2 * md + 1) ** 2
298 |         dd = np.cumsum([128, 128, 96, 64, 32])
299 | 
300 |         od = nd
301 |         self.conv6_0 = myconv(od, 128, kernel_size=3, stride=1)
302 |         self.conv6_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1)
303 |         self.conv6_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1)
304 |         self.conv6_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1)
305 |         self.conv6_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1)
306 | 
307 |         if use_feat_from > 1:
308 |             self.predict_flow6 = predict_flow(od + dd[4])
309 |             self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
310 |             self.upfeat6 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1)
311 | 
312 |             od = nd + add_list[0] + 4
313 |             self.conv5_0 = myconv(od, 128, kernel_size=3, stride=1)
314 |             self.conv5_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1)
315 |             self.conv5_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1)
316 |             self.conv5_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1)
317 |             self.conv5_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1)
318 | 
319 |         if use_feat_from > 2:
320 |             self.predict_flow5 = predict_flow(od + dd[4])
321 |             self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
322 |             self.upfeat5 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1)
323 | 
324 |             od = nd + add_list[1] + 4
325 |             self.conv4_0 = myconv(od, 128, kernel_size=3, stride=1)
326 |             self.conv4_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1)
327 |             self.conv4_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1)
328 |             self.conv4_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1)
329 |             self.conv4_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1)
330 | 
331 |         if use_feat_from > 3:
332 |             self.predict_flow4 = predict_flow(od + dd[4])
333 |             self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
334 |             self.upfeat4 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1)
335 | 
336 |             od = nd + add_list[2] + 4
337 |             self.conv3_0 = myconv(od, 128, kernel_size=3, stride=1)
338 |             self.conv3_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1)
339 |             self.conv3_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1)
340 |             self.conv3_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1)
341 |             self.conv3_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1)
342 | 
343 |         if use_feat_from > 4:
344 |             self.predict_flow3 = predict_flow(od + dd[4])
345 |             self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
346 |             self.upfeat3 = deconv(od + dd[4], 2, kernel_size=4, stride=2, padding=1)
347 | 
348 |             od = nd + add_list[3] + 4
349 |             self.conv2_0 = myconv(od, 128, kernel_size=3, stride=1)
350 |             self.conv2_1 = myconv(od + dd[0], 128, kernel_size=3, stride=1)
351 |             self.conv2_2 = myconv(od + dd[1], 96, kernel_size=3, stride=1)
352 |             self.conv2_3 = myconv(od + dd[2], 64, kernel_size=3, stride=1)
353 |             self.conv2_4 = myconv(od + dd[3], 32, kernel_size=3, stride=1)
354 | 
355 |         if use_feat_from > 5:
356 |             self.predict_flow2 = predict_flow(od + dd[4])
357 |             self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
358 | 
359 |             self.dc_conv1 = myconv(od + dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1)
360 |             self.dc_conv2 = myconv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
361 |             self.dc_conv3 = myconv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
362 |             self.dc_conv4 = myconv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
363 |             self.dc_conv5 = myconv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
364 |             self.dc_conv6 = myconv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
365 |             self.dc_conv7 = predict_flow(32)
366 | 
367 |         fc_size = od + dd[4]
368 |         downsample = 128 // (2**use_feat_from)
369 |         if image_size[0] % downsample == 0:
370 |             fc_size *= image_size[0] // downsample
371 |         else:
372 |             fc_size *= (image_size[0] // downsample)+1
373 |         if image_size[1] % downsample == 0:
374 |             fc_size *= image_size[1] // downsample
375 |         else:
376 |             fc_size *= (image_size[1] // downsample)+1
377 |         self.fc1 = nn.Linear(fc_size * 4, 512)
378 | 
379 |         self.fc1_trasl = nn.Linear(512, 256)
380 |         self.fc1_rot = nn.Linear(512, 256)
381 | 
382 |         self.fc2_trasl = nn.Linear(256, 3)
383 |         self.fc2_rot = nn.Linear(256, 4)
384 | 
385 |         self.dropout = nn.Dropout(dropout)
386 | 
387 |         self.classification1 = nn.Linear(512,256)
388 |         self.classification2 = nn.Linear(256,2)
389 | 
390 |         for m in self.modules():
391 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
392 |                 nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
393 |                 if m.bias is not None:
394 |                     m.bias.data.zero_()
395 | 
396 |     # def _make_layer(self, block, planes, blocks, stride=1):
397 |     #     layers = []
398 |     #     layers.append(block(self.inplanes, planes, 1))
399 |     #     self.inplanes = planes * block.expansion
400 |     #     for i in range(1, blocks - 1):
401 |     #         layers.append(block(self.inplanes, planes, 1))
402 |     #     layers.append(block(self.inplanes, planes, 2))
403 |     #
404 |     #     return nn.Sequential(*layers)
405 | 
406 | 
407 |     def _make_layer(self, block, planes, blocks, stride=1):
408 |         downsample = None
409 |         if stride != 1 or self.inplanes != planes * block.expansion:
410 |             downsample = nn.Sequential(
411 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
412 |                 nn.BatchNorm2d(planes * block.expansion),
413 |             )
414 | 
415 |         layers = []
416 |         layers.append(block(self.inplanes, planes, stride, downsample))
417 |         self.inplanes = planes * block.expansion
418 |         for _ in range(1, blocks):
419 |             layers.append(block(self.inplanes, planes))
420 | 
421 |         return nn.Sequential(*layers)
422 | 
423 | 
424 |     def warp(self, x, flo):
425 |         """
426 |         warp an image/tensor (im2) back to im1, according to the optical flow
427 |         x: [B, C, H, W] (im2)
428 |         flo: [B, 2, H, W] flow
429 |         """
430 |         B, C, H, W = x.size()
431 |         # mesh grid
432 |         xx = torch.arange(0, W).view(1, -1).repeat(H, 1)
433 |         yy = torch.arange(0, H).view(-1, 1).repeat(1, W)
434 |         xx = xx.view(1, 1, H, W).repeat(B, 1, 1, 1)
435 |         yy = yy.view(1, 1, H, W).repeat(B, 1, 1, 1)
436 |         grid = torch.cat((xx, yy), 1).float()
437 | 
438 |         if x.is_cuda:
439 |             grid = grid.cuda()
440 |         vgrid = Variable(grid) + flo
441 | 
442 |         # scale grid to [-1,1]
443 |         vgrid[:, 0, :, :] = 2.0 * vgrid[:, 0, :, :].clone() / max(W - 1, 1) - 1.0
444 |         vgrid[:, 1, :, :] = 2.0 * vgrid[:, 1, :, :].clone() / max(H - 1, 1) - 1.0
445 | 
446 |         vgrid = vgrid.permute(0, 2, 3, 1)
447 |         output = nn.functional.grid_sample(x, vgrid)
448 |         mask = torch.autograd.Variable(torch.ones(x.size())).cuda()
449 |         mask = nn.functional.grid_sample(mask, vgrid)
450 | 
451 |         # if W==128:
452 |         # np.save('mask.npy', mask.cpu().data.numpy())
453 |         # np.save('warp.npy', output.cpu().data.numpy())
454 | 
455 |         # mask[mask<0.9999] = 0.0
456 |         # mask[mask>0] = 1.0
457 |         mask = torch.floor(torch.clamp(mask, 0, 1))
458 | 
459 |         return output * mask
460 | 
461 |     def forward(self, rgb, lidar):
462 |         H, W = rgb.shape[2:4]
463 | 
464 |         #encoder
465 |         if self.pretrained_encoder:
466 |             # rgb_image
467 |             features1 = self.net_encoder(rgb)
468 |             c12 = features1[0]  # 2
469 |             c13 = features1[2]  # 4
470 |             c14 = features1[3]  # 8
471 |             c15 = features1[4]  # 16
472 |             c16 = features1[5]  # 32
473 |             # lidar_image
474 |             x2 = self.conv1_lidar(lidar)
475 |             if self.Action_Func == 'leakyrelu':
476 |                 c22 = self.leakyRELU_lidar(x2)  # 2
477 |             elif self.Action_Func == 'elu':
478 |                 c22 = self.elu_lidar(x2)  # 2
479 |             c23 = self.layer1_lidar(self.maxpool_lidar(c22))  # 4
480 |             c24 = self.layer2_lidar(c23)  # 8
481 |             c25 = self.layer3_lidar(c24)  # 16
482 |             c26 = self.layer4_lidar(c25)  # 32
483 | 
484 |         else:
485 |             x1 = self.conv1_rgb(rgb)
486 |             x2 = self.conv1_lidar(lidar)
487 |             if self.Action_Func == 'leakyrelu':
488 |                 c12 = self.leakyRELU_rgb(x1)  # 2
489 |                 c22 = self.leakyRELU_lidar(x2)  # 2
490 |             elif self.Action_Func == 'elu':
491 |                 c12 = self.elu_rgb(x1)  # 2
492 |                 c22 = self.elu_lidar(x2)  # 2
493 |             c13 = self.layer1_rgb(self.maxpool_rgb(c12))  # 4
494 |             c23 = self.layer1_lidar(self.maxpool_lidar(c22))  # 4
495 |             c14 = self.layer2_rgb(c13)  # 8
496 |             c24 = self.layer2_lidar(c23)  # 8
497 |             c15 = self.layer3_rgb(c14)  # 16
498 |             c25 = self.layer3_lidar(c24)  # 16
499 |             c16 = self.layer4_rgb(c15)  # 32
500 |             c26 = self.layer4_lidar(c25)  # 32
501 | 
502 |         corr6 = self.corr(c16, c26)
503 |         corr6 = self.leakyRELU(corr6)
504 |         x = torch.cat((self.conv6_0(corr6), corr6), 1)
505 |         x = torch.cat((self.conv6_1(x), x), 1)
506 |         x = torch.cat((self.conv6_2(x), x), 1)
507 |         x = torch.cat((self.conv6_3(x), x), 1)
508 |         x = torch.cat((self.conv6_4(x), x), 1)
509 | 
510 |         if self.use_feat_from > 1:
511 |             flow6 = self.predict_flow6(x)
512 |             up_flow6 = self.deconv6(flow6)
513 |             up_feat6 = self.upfeat6(x)
514 | 
515 |             warp5 = self.warp(c25, up_flow6*0.625)
516 |             corr5 = self.corr(c15, warp5)
517 |             corr5 = self.leakyRELU(corr5)
518 |             x = torch.cat((corr5, c15, up_flow6, up_feat6), 1)
519 |             x = torch.cat((self.conv5_0(x), x), 1)
520 |             x = torch.cat((self.conv5_1(x), x), 1)
521 |             x = torch.cat((self.conv5_2(x), x), 1)
522 |             x = torch.cat((self.conv5_3(x), x), 1)
523 |             x = torch.cat((self.conv5_4(x), x), 1)
524 | 
525 |         if self.use_feat_from > 2:
526 |             flow5 = self.predict_flow5(x)
527 |             up_flow5 = self.deconv5(flow5)
528 |             up_feat5 = self.upfeat5(x)
529 | 
530 |             warp4 = self.warp(c24, up_flow5*1.25)
531 |             corr4 = self.corr(c14, warp4)
532 |             corr4 = self.leakyRELU(corr4)
533 |             x = torch.cat((corr4, c14, up_flow5, up_feat5), 1)
534 |             x = torch.cat((self.conv4_0(x), x), 1)
535 |             x = torch.cat((self.conv4_1(x), x), 1)
536 |             x = torch.cat((self.conv4_2(x), x), 1)
537 |             x = torch.cat((self.conv4_3(x), x), 1)
538 |             x = torch.cat((self.conv4_4(x), x), 1)
539 | 
540 |         if self.use_feat_from > 3:
541 |             flow4 = self.predict_flow4(x)
542 |             up_flow4 = self.deconv4(flow4)
543 |             up_feat4 = self.upfeat4(x)
544 | 
545 |             warp3 = self.warp(c23, up_flow4*2.5)
546 |             corr3 = self.corr(c13, warp3)
547 |             corr3 = self.leakyRELU(corr3)
548 |             x = torch.cat((corr3, c13, up_flow4, up_feat4), 1)
549 |             x = torch.cat((self.conv3_0(x), x),1)
550 |             x = torch.cat((self.conv3_1(x), x),1)
551 |             x = torch.cat((self.conv3_2(x), x),1)
552 |             x = torch.cat((self.conv3_3(x), x),1)
553 |             x = torch.cat((self.conv3_4(x), x),1)
554 | 
555 |         if self.use_feat_from > 4:
556 |             flow3 = self.predict_flow3(x)
557 |             up_flow3 = self.deconv3(flow3)
558 |             up_feat3 = self.upfeat3(x)
559 | 
560 | 
561 |             warp2 = self.warp(c22, up_flow3*5.0)
562 |             corr2 = self.corr(c12, warp2)
563 |             corr2 = self.leakyRELU(corr2)
564 |             x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
565 |             x = torch.cat((self.conv2_0(x), x), 1)
566 |             x = torch.cat((self.conv2_1(x), x), 1)
567 |             x = torch.cat((self.conv2_2(x), x), 1)
568 |             x = torch.cat((self.conv2_3(x), x), 1)
569 |             x = torch.cat((self.conv2_4(x), x), 1)
570 | 
571 |         if self.use_feat_from > 5:
572 |             flow2 = self.predict_flow2(x)
573 | 
574 |             x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
575 |             #flow2 = flow2 + self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
576 | 
577 |         x = x.view(x.shape[0], -1)
578 |         x = self.dropout(x)
579 |         x = self.leakyRELU(self.fc1(x))
580 | 
581 |         classification_results = self.classification2(self.classification1(x))
582 |         return classification_results
583 |         
584 | 
585 | 


--------------------------------------------------------------------------------
/models/__pycache__/LCCNet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/__pycache__/LCCNet.cpython-36.pyc


--------------------------------------------------------------------------------
/models/correlation_package/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2017 NVIDIA CORPORATION
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.


--------------------------------------------------------------------------------
/models/correlation_package/__pycache__/correlation.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/__pycache__/correlation.cpython-36.pyc


--------------------------------------------------------------------------------
/models/correlation_package/build/lib.linux-x86_64-3.6/correlation_cuda.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/build/lib.linux-x86_64-3.6/correlation_cuda.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda.o


--------------------------------------------------------------------------------
/models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/build/temp.linux-x86_64-3.6/correlation_cuda_kernel.o


--------------------------------------------------------------------------------
/models/correlation_package/correlation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.modules.module import Module
 3 | from torch.autograd import Function
 4 | import correlation_cuda
 5 | 
 6 | class CorrelationFunction(Function):
 7 |     @staticmethod
 8 |     def forward(ctx, input1, input2, pad_size, kernel_size, max_displacement,stride1, stride2, corr_multiply):
 9 |         ctx.save_for_backward(input1, input2)
10 |         ctx.pad_size = pad_size
11 |         ctx.kernel_size = kernel_size
12 |         ctx.max_displacement = max_displacement
13 |         ctx.stride1 = stride1
14 |         ctx.stride2 = stride2
15 |         ctx.corr_multiply = corr_multiply
16 | 
17 |         with torch.cuda.device_of(input1):
18 |             rbot1 = input1.new()
19 |             rbot2 = input2.new()
20 |             output = input1.new()
21 | 
22 |             correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
23 |                                      pad_size, kernel_size, max_displacement, stride1, stride2, corr_multiply)
24 | 
25 |         return output
26 | 
27 |     @staticmethod
28 |     def backward(ctx, grad_output):
29 |         input1, input2 = ctx.saved_tensors
30 | 
31 |         with torch.cuda.device_of(input1):
32 |             rbot1 = input1.new()
33 |             rbot2 = input2.new()
34 | 
35 |             grad_input1 = input1.new()
36 |             grad_input2 = input2.new()
37 | 
38 |             correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
39 |                                       ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
40 | 
41 |         return grad_input1, grad_input2, None, None, None, None, None, None
42 | 
43 | 
44 | class Correlation(Module):
45 |     def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
46 |         super(Correlation, self).__init__()
47 |         self.pad_size = pad_size
48 |         self.kernel_size = kernel_size
49 |         self.max_displacement = max_displacement
50 |         self.stride1 = stride1
51 |         self.stride2 = stride2
52 |         self.corr_multiply = corr_multiply
53 | 
54 |     def forward(self, input1, input2):
55 | 
56 |         input1 = input1.contiguous()
57 |         input2 = input2.contiguous()
58 | 
59 |         result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)
60 | 
61 |         return result
62 | 
63 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda.cc:
--------------------------------------------------------------------------------
  1 | #include <torch/torch.h>
  2 | #include <ATen/ATen.h>
  3 | #include <stdio.h>
  4 | #include <iostream>
  5 | #include <ATen/cuda/CUDAContext.h>
  6 | 
  7 | #include "correlation_cuda_kernel.cuh"
  8 | 
  9 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
 10 |                        int pad_size,
 11 |                        int kernel_size,
 12 |                        int max_displacement,
 13 |                        int stride1,
 14 |                        int stride2,
 15 |                        int corr_type_multiply)
 16 | {
 17 | 
 18 |   int batchSize = input1.size(0);
 19 | 
 20 |   int nInputChannels = input1.size(1);
 21 |   int inputHeight = input1.size(2);
 22 |   int inputWidth = input1.size(3);
 23 | 
 24 |   int kernel_radius = (kernel_size - 1) / 2;
 25 |   int border_radius = kernel_radius + max_displacement;
 26 | 
 27 |   int paddedInputHeight = inputHeight + 2 * pad_size;
 28 |   int paddedInputWidth = inputWidth + 2 * pad_size;
 29 | 
 30 |   int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
 31 | 
 32 |   int outputHeight = ceil(static_cast<float>(paddedInputHeight - 2 * border_radius) / static_cast<float>(stride1));
 33 |   int outputwidth = ceil(static_cast<float>(paddedInputWidth - 2 * border_radius) / static_cast<float>(stride1));
 34 | 
 35 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 36 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 37 |   output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
 38 | 
 39 |   rInput1.fill_(0);
 40 |   rInput2.fill_(0);
 41 |   output.fill_(0);
 42 | 
 43 |   int success = correlation_forward_cuda_kernel(
 44 |     output,
 45 |     output.size(0), 
 46 |     output.size(1),
 47 |     output.size(2),
 48 |     output.size(3),
 49 |     output.stride(0),
 50 |     output.stride(1),
 51 |     output.stride(2),
 52 |     output.stride(3),
 53 |     input1,
 54 |     input1.size(1),
 55 |     input1.size(2),
 56 |     input1.size(3),
 57 |     input1.stride(0),
 58 |     input1.stride(1),
 59 |     input1.stride(2),
 60 |     input1.stride(3),
 61 |     input2,
 62 |     input2.size(1),
 63 |     input2.stride(0),
 64 |     input2.stride(1),
 65 |     input2.stride(2),
 66 |     input2.stride(3),
 67 |     rInput1,
 68 |     rInput2,
 69 |     pad_size,     
 70 |     kernel_size,
 71 |     max_displacement,
 72 |     stride1,
 73 |     stride2,
 74 |     corr_type_multiply,
 75 |     at::cuda::getCurrentCUDAStream()
 76 |   );
 77 | 
 78 |   //check for errors
 79 |   if (!success) {
 80 |     AT_ERROR("CUDA call failed");
 81 |   }
 82 | 
 83 |   return 1;
 84 | 
 85 | }
 86 | 
 87 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 
 88 |                        at::Tensor& gradInput1, at::Tensor& gradInput2,
 89 |                        int pad_size,
 90 |                        int kernel_size,
 91 |                        int max_displacement,
 92 |                        int stride1,
 93 |                        int stride2,
 94 |                        int corr_type_multiply)
 95 | {
 96 | 
 97 |   int batchSize = input1.size(0);
 98 |   int nInputChannels = input1.size(1);
 99 |   int paddedInputHeight = input1.size(2)+ 2 * pad_size;
100 |   int paddedInputWidth = input1.size(3)+ 2 * pad_size;
101 | 
102 |   int height = input1.size(2);
103 |   int width = input1.size(3);
104 | 
105 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
106 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
107 |   gradInput1.resize_({batchSize, nInputChannels, height, width});
108 |   gradInput2.resize_({batchSize, nInputChannels, height, width});
109 | 
110 |   rInput1.fill_(0);
111 |   rInput2.fill_(0);
112 |   gradInput1.fill_(0);
113 |   gradInput2.fill_(0);
114 | 
115 |   int success = correlation_backward_cuda_kernel(gradOutput,
116 |                                                 gradOutput.size(0),
117 |                                                 gradOutput.size(1),
118 |                                                 gradOutput.size(2),
119 |                                                 gradOutput.size(3),
120 |                                                 gradOutput.stride(0),
121 |                                                 gradOutput.stride(1),
122 |                                                 gradOutput.stride(2),
123 |                                                 gradOutput.stride(3),
124 |                                                 input1,
125 |                                                 input1.size(1),
126 |                                                 input1.size(2),
127 |                                                 input1.size(3),
128 |                                                 input1.stride(0),
129 |                                                 input1.stride(1),
130 |                                                 input1.stride(2),
131 |                                                 input1.stride(3),
132 |                                                 input2,  
133 |                                                 input2.stride(0),
134 |                                                 input2.stride(1),
135 |                                                 input2.stride(2),
136 |                                                 input2.stride(3),
137 |                                                 gradInput1,
138 |                                                 gradInput1.stride(0),
139 |                                                 gradInput1.stride(1),
140 |                                                 gradInput1.stride(2),
141 |                                                 gradInput1.stride(3),
142 |                                                 gradInput2,
143 |                                                 gradInput2.size(1),
144 |                                                 gradInput2.stride(0),
145 |                                                 gradInput2.stride(1),
146 |                                                 gradInput2.stride(2),
147 |                                                 gradInput2.stride(3),
148 |                                                 rInput1,
149 |                                                 rInput2,
150 |                                                 pad_size,
151 |                                                 kernel_size,
152 |                                                 max_displacement,
153 |                                                 stride1, 
154 |                                                 stride2,
155 |                                                 corr_type_multiply,
156 |                                                 at::cuda::getCurrentCUDAStream()
157 |                                                );
158 | 
159 |   if (!success) {
160 |     AT_ERROR("CUDA call failed");
161 |   }
162 | 
163 |   return 1;
164 | }
165 | 
166 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
167 |   m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
168 |   m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
169 | }
170 | 
171 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 2.1
 2 | Name: correlation-cuda
 3 | Version: 0.0.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | License: UNKNOWN
 7 | Platform: UNKNOWN
 8 | License-File: LICENSE
 9 | 
10 | UNKNOWN
11 | 
12 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | LICENSE
2 | correlation_cuda.cc
3 | correlation_cuda_kernel.cu
4 | pyproject.toml
5 | setup.py
6 | correlation_cuda.egg-info/PKG-INFO
7 | correlation_cuda.egg-info/SOURCES.txt
8 | correlation_cuda.egg-info/dependency_links.txt
9 | correlation_cuda.egg-info/top_level.txt


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | correlation_cuda
2 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | 
  3 | #include "correlation_cuda_kernel.cuh"
  4 | 
  5 | #define CUDA_NUM_THREADS 1024
  6 | #define THREADS_PER_BLOCK 32
  7 | #define FULL_MASK 0xffffffff
  8 | 
  9 | #include <ATen/ATen.h>
 10 | #include <ATen/NativeFunctions.h>
 11 | #include <ATen/Dispatch.h>
 12 | #include <ATen/cuda/CUDAApplyUtils.cuh>
 13 | 
 14 | using at::Half;
 15 | 
 16 | template<typename scalar_t>
 17 | __forceinline__ __device__ scalar_t warpReduceSum(scalar_t val) {
 18 |         for (int offset = 16; offset > 0; offset /= 2)
 19 |                 val += __shfl_down_sync(FULL_MASK, val, offset);
 20 |         return val;
 21 | }
 22 | 
 23 | template<typename scalar_t>
 24 | __forceinline__ __device__ scalar_t blockReduceSum(scalar_t val) {
 25 | 
 26 |         static __shared__ scalar_t shared[32];
 27 |         int lane = threadIdx.x % warpSize;
 28 |         int wid = threadIdx.x / warpSize;
 29 | 
 30 |         val = warpReduceSum(val);
 31 | 
 32 |         if (lane == 0)
 33 |                 shared[wid] = val;
 34 | 
 35 |         __syncthreads();
 36 | 
 37 |         val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
 38 | 
 39 |         if (wid == 0)
 40 |                 val = warpReduceSum(val);
 41 | 
 42 |         return val;
 43 | }
 44 | 
 45 | 
 46 | template <typename scalar_t>
 47 | __global__ void channels_first(const scalar_t* __restrict__ input, scalar_t* rinput, int channels, int height, int width, int pad_size)
 48 | {
 49 | 
 50 |     // n (batch size), c (num of channels), y (height), x (width)
 51 |     int n = blockIdx.x;
 52 |     int y = blockIdx.y;
 53 |     int x = blockIdx.z;
 54 | 
 55 |     int ch_off = threadIdx.x;
 56 |     scalar_t value;
 57 | 
 58 |     int dimcyx = channels * height * width;
 59 |     int dimyx = height * width;
 60 | 
 61 |     int p_dimx = (width + 2 * pad_size);
 62 |     int p_dimy = (height + 2 * pad_size);
 63 |     int p_dimyxc = channels * p_dimy * p_dimx;
 64 |     int p_dimxc = p_dimx * channels;
 65 | 
 66 |     for (int c = ch_off; c < channels; c += THREADS_PER_BLOCK) {
 67 |       value = input[n * dimcyx + c * dimyx + y * width + x];
 68 |       rinput[n * p_dimyxc + (y + pad_size) * p_dimxc + (x + pad_size) * channels + c] = value;
 69 |     }
 70 | }
 71 | 
 72 | 
 73 | template<typename scalar_t>
 74 | __global__ void correlation_forward(scalar_t* __restrict__ output, const int nOutputChannels,
 75 |                 const int outputHeight, const int outputWidth, const scalar_t* __restrict__ rInput1,
 76 |                 const int nInputChannels, const int inputHeight, const int inputWidth,
 77 |                 const scalar_t* __restrict__ rInput2, const int pad_size, const int kernel_size,
 78 |                 const int max_displacement, const int stride1, const int stride2) {
 79 | 
 80 |         int32_t pInputWidth = inputWidth + 2 * pad_size;
 81 |         int32_t pInputHeight = inputHeight + 2 * pad_size;
 82 | 
 83 |         int32_t kernel_rad = (kernel_size - 1) / 2;
 84 | 
 85 |         int32_t displacement_rad = max_displacement / stride2;
 86 | 
 87 |         int32_t displacement_size = 2 * displacement_rad + 1;
 88 | 
 89 |         int32_t n = blockIdx.x;
 90 |         int32_t y1 = blockIdx.y * stride1 + max_displacement;
 91 |         int32_t x1 = blockIdx.z * stride1 + max_displacement;
 92 |         int32_t c = threadIdx.x;
 93 | 
 94 |         int32_t pdimyxc = pInputHeight * pInputWidth * nInputChannels;
 95 | 
 96 |         int32_t pdimxc = pInputWidth * nInputChannels;
 97 | 
 98 |         int32_t pdimc = nInputChannels;
 99 | 
100 |         int32_t tdimcyx = nOutputChannels * outputHeight * outputWidth;
101 |         int32_t tdimyx = outputHeight * outputWidth;
102 |         int32_t tdimx = outputWidth;
103 | 
104 |         int32_t nelems = kernel_size * kernel_size * pdimc;
105 | 
106 |         // element-wise product along channel axis
107 |         for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
108 |                 for (int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
109 |                         int x2 = x1 + ti * stride2;
110 |                         int y2 = y1 + tj * stride2;
111 | 
112 |                         float acc0 = 0.0f;
113 | 
114 |                         for (int j = -kernel_rad; j <= kernel_rad; ++j) {
115 |                                 for (int i = -kernel_rad; i <= kernel_rad; ++i) {
116 |                                         // THREADS_PER_BLOCK
117 |                                         #pragma unroll
118 |                                         for (int ch = c; ch < pdimc; ch += blockDim.x) {
119 | 
120 |                                                 int indx1 = n * pdimyxc + (y1 + j) * pdimxc
121 |                                                                 + (x1 + i) * pdimc + ch;
122 |                                                 int indx2 = n * pdimyxc + (y2 + j) * pdimxc
123 |                                                                 + (x2 + i) * pdimc + ch;
124 |                                                 acc0 += static_cast<float>(rInput1[indx1] * rInput2[indx2]);
125 |                                         }
126 |                                 }
127 |                         }
128 | 
129 |                         if (blockDim.x == warpSize) {
130 |                             __syncwarp();
131 |                             acc0 = warpReduceSum(acc0);
132 |                         } else {
133 |                             __syncthreads();
134 |                             acc0 = blockReduceSum(acc0);
135 |                         }
136 | 
137 |                         if (threadIdx.x == 0) {
138 | 
139 |                                 int tc = (tj + displacement_rad) * displacement_size
140 |                                                 + (ti + displacement_rad);
141 |                                 const int tindx = n * tdimcyx + tc * tdimyx + blockIdx.y * tdimx
142 |                                                 + blockIdx.z;
143 |                                 output[tindx] = static_cast<scalar_t>(acc0 / nelems);
144 |                         }
145 |             }
146 |         }
147 | }
148 | 
149 | 
150 | template <typename scalar_t>
151 | __global__ void correlation_backward_input1(int item, scalar_t* gradInput1, int nInputChannels, int inputHeight, int inputWidth, 
152 |                                             const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth, 
153 |                                             const scalar_t* __restrict__ rInput2, 
154 |                                             int pad_size,
155 |                                             int kernel_size,
156 |                                             int max_displacement,
157 |                                             int stride1,
158 |                                             int stride2)
159 |   {
160 |     // n (batch size), c (num of channels), y (height), x (width)
161 | 
162 |     int n = item; 
163 |     int y = blockIdx.x * stride1 + pad_size;
164 |     int x = blockIdx.y * stride1 + pad_size;
165 |     int c = blockIdx.z;
166 |     int tch_off = threadIdx.x;
167 | 
168 |     int kernel_rad = (kernel_size - 1) / 2;
169 |     int displacement_rad = max_displacement / stride2;
170 |     int displacement_size = 2 * displacement_rad + 1;
171 | 
172 |     int xmin = (x - kernel_rad - max_displacement) / stride1;
173 |     int ymin = (y - kernel_rad - max_displacement) / stride1;
174 | 
175 |     int xmax = (x + kernel_rad - max_displacement) / stride1;
176 |     int ymax = (y + kernel_rad - max_displacement) / stride1;
177 | 
178 |     if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) {
179 |         // assumes gradInput1 is pre-allocated and zero filled
180 |       return;
181 |     }
182 | 
183 |     if (xmin > xmax || ymin > ymax) {
184 |         // assumes gradInput1 is pre-allocated and zero filled
185 |         return;
186 |     }
187 | 
188 |     xmin = max(0,xmin);
189 |     xmax = min(outputWidth-1,xmax);
190 | 
191 |     ymin = max(0,ymin);
192 |     ymax = min(outputHeight-1,ymax);
193 | 
194 |     int pInputWidth = inputWidth + 2 * pad_size;
195 |     int pInputHeight = inputHeight + 2 * pad_size;
196 | 
197 |     int pdimyxc = pInputHeight * pInputWidth * nInputChannels;
198 |     int pdimxc = pInputWidth * nInputChannels;
199 |     int pdimc = nInputChannels;
200 | 
201 |     int tdimcyx = nOutputChannels * outputHeight * outputWidth;
202 |     int tdimyx = outputHeight * outputWidth;
203 |     int tdimx = outputWidth;
204 | 
205 |     int odimcyx = nInputChannels * inputHeight* inputWidth;
206 |     int odimyx = inputHeight * inputWidth;
207 |     int odimx = inputWidth;
208 | 
209 |     scalar_t nelems = kernel_size * kernel_size * nInputChannels;
210 | 
211 |     __shared__ scalar_t prod_sum[THREADS_PER_BLOCK];
212 |     prod_sum[tch_off] = 0;
213 | 
214 |     for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) {
215 | 
216 |       int i2 = (tc % displacement_size - displacement_rad) * stride2;
217 |       int j2 = (tc / displacement_size - displacement_rad) * stride2;
218 | 
219 |       int indx2 = n * pdimyxc + (y + j2)* pdimxc + (x + i2) * pdimc + c;
220 |       
221 |       scalar_t val2 = rInput2[indx2];
222 | 
223 |       for (int j = ymin; j <= ymax; ++j) {
224 |         for (int i = xmin; i <= xmax; ++i) {
225 |           int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i;
226 |           prod_sum[tch_off] += gradOutput[tindx] * val2;
227 |         }
228 |       }
229 |     }
230 |     __syncthreads();
231 | 
232 |     if(tch_off == 0) {
233 |       scalar_t reduce_sum = 0;
234 |       for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) {
235 |           reduce_sum += prod_sum[idx];
236 |       }
237 |       const int indx1 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size);
238 |       gradInput1[indx1] = reduce_sum / nelems;
239 |     }
240 | 
241 | }
242 | 
243 | template <typename scalar_t>
244 | __global__ void correlation_backward_input2(int item, scalar_t*  gradInput2, int nInputChannels, int inputHeight, int inputWidth,
245 |                                             const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth,
246 |                                             const scalar_t* __restrict__ rInput1,
247 |                                             int pad_size,
248 |                                             int kernel_size,
249 |                                             int max_displacement,
250 |                                             int stride1,
251 |                                             int stride2)
252 | {
253 |     // n (batch size), c (num of channels), y (height), x (width)
254 | 
255 |     int n = item;
256 |     int y = blockIdx.x * stride1 + pad_size;
257 |     int x = blockIdx.y * stride1 + pad_size;
258 |     int c = blockIdx.z;
259 | 
260 |     int tch_off = threadIdx.x;
261 | 
262 |     int kernel_rad = (kernel_size - 1) / 2;
263 |     int displacement_rad = max_displacement / stride2;
264 |     int displacement_size = 2 * displacement_rad + 1;
265 | 
266 |     int pInputWidth = inputWidth + 2 * pad_size;
267 |     int pInputHeight = inputHeight + 2 * pad_size;
268 | 
269 |     int pdimyxc = pInputHeight * pInputWidth * nInputChannels;
270 |     int pdimxc = pInputWidth * nInputChannels;
271 |     int pdimc = nInputChannels;
272 | 
273 |     int tdimcyx = nOutputChannels * outputHeight * outputWidth;
274 |     int tdimyx = outputHeight * outputWidth;
275 |     int tdimx = outputWidth;
276 | 
277 |     int odimcyx = nInputChannels * inputHeight* inputWidth;
278 |     int odimyx = inputHeight * inputWidth;
279 |     int odimx = inputWidth;
280 | 
281 |     scalar_t nelems = kernel_size * kernel_size * nInputChannels;
282 | 
283 |     __shared__ scalar_t prod_sum[THREADS_PER_BLOCK];
284 |     prod_sum[tch_off] = 0;
285 | 
286 |     for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) {
287 |       int i2 = (tc % displacement_size - displacement_rad) * stride2;
288 |       int j2 = (tc / displacement_size - displacement_rad) * stride2;
289 | 
290 |       int xmin = (x - kernel_rad - max_displacement - i2) / stride1;
291 |       int ymin = (y - kernel_rad - max_displacement - j2) / stride1;
292 | 
293 |       int xmax = (x + kernel_rad - max_displacement - i2) / stride1;
294 |       int ymax = (y + kernel_rad - max_displacement - j2) / stride1;
295 | 
296 |       if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) {
297 |           // assumes gradInput2 is pre-allocated and zero filled
298 |         continue;
299 |       }
300 | 
301 |       if (xmin > xmax || ymin > ymax) {
302 |           // assumes gradInput2 is pre-allocated and zero filled
303 |           continue;
304 |       }
305 | 
306 |       xmin = max(0,xmin);
307 |       xmax = min(outputWidth-1,xmax);
308 | 
309 |       ymin = max(0,ymin);
310 |       ymax = min(outputHeight-1,ymax);
311 |       
312 |       int indx1 = n * pdimyxc + (y - j2)* pdimxc + (x - i2) * pdimc + c;
313 |       scalar_t val1 = rInput1[indx1];
314 | 
315 |       for (int j = ymin; j <= ymax; ++j) {
316 |         for (int i = xmin; i <= xmax; ++i) {
317 |           int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i;
318 |           prod_sum[tch_off] += gradOutput[tindx] * val1;
319 |         }
320 |       }
321 |     }
322 | 
323 |     __syncthreads();
324 | 
325 |     if(tch_off == 0) {
326 |       scalar_t reduce_sum = 0;
327 |       for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) {
328 |           reduce_sum += prod_sum[idx];
329 |       }
330 |       const int indx2 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size);
331 |       gradInput2[indx2] = reduce_sum / nelems;
332 |     }
333 | 
334 | }
335 | 
336 | int correlation_forward_cuda_kernel(at::Tensor& output,
337 |                                     int ob,
338 |                                     int oc,
339 |                                     int oh,
340 |                                     int ow,
341 |                                     int osb,
342 |                                     int osc,
343 |                                     int osh,
344 |                                     int osw,
345 | 
346 |                                     at::Tensor& input1,
347 |                                     int ic,
348 |                                     int ih,
349 |                                     int iw,
350 |                                     int isb,
351 |                                     int isc,
352 |                                     int ish,
353 |                                     int isw,
354 | 
355 |                                     at::Tensor& input2,
356 |                                     int gc,
357 |                                     int gsb,
358 |                                     int gsc,
359 |                                     int gsh,
360 |                                     int gsw,
361 | 
362 |                                     at::Tensor& rInput1,
363 |                                     at::Tensor& rInput2,
364 |                                     int pad_size,
365 |                                     int kernel_size,
366 |                                     int max_displacement,
367 |                                     int stride1,
368 |                                     int stride2,
369 |                                     int corr_type_multiply,
370 |                                     cudaStream_t stream) 
371 | {
372 | 
373 |    int batchSize = ob;
374 | 
375 |    int nInputChannels = ic;
376 |    int inputWidth = iw;
377 |    int inputHeight = ih;
378 | 
379 |    int nOutputChannels = oc;
380 |    int outputWidth = ow;
381 |    int outputHeight = oh;
382 | 
383 |    dim3 blocks_grid(batchSize, inputHeight, inputWidth);
384 |    dim3 threads_block(THREADS_PER_BLOCK);
385 | 
386 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channels_first_fwd_1", ([&] {
387 | 
388 |   channels_first<scalar_t><<<blocks_grid,threads_block, 0, stream>>>(
389 |       input1.data<scalar_t>(), rInput1.data<scalar_t>(), nInputChannels, inputHeight, inputWidth, pad_size);
390 | 
391 |   }));
392 | 
393 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "channels_first_fwd_2", ([&] {
394 | 
395 |   channels_first<scalar_t><<<blocks_grid,threads_block, 0, stream>>> (
396 |       input2.data<scalar_t>(), rInput2.data<scalar_t>(), nInputChannels, inputHeight, inputWidth, pad_size);
397 | 
398 |   }));
399 | 
400 |    dim3 threadsPerBlock(THREADS_PER_BLOCK);
401 |    dim3 totalBlocksCorr(batchSize, outputHeight, outputWidth);
402 | 
403 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "correlation_forward", ([&] {
404 | 
405 |    correlation_forward<scalar_t><<<totalBlocksCorr, threadsPerBlock, 0, stream>>> 
406 |                         (output.data<scalar_t>(), nOutputChannels, outputHeight, outputWidth,
407 |                          rInput1.data<scalar_t>(), nInputChannels, inputHeight, inputWidth,
408 |                          rInput2.data<scalar_t>(),
409 |                          pad_size,
410 |                          kernel_size,
411 |                          max_displacement,
412 |                          stride1,
413 |                          stride2);
414 | 
415 |   }));
416 | 
417 |   cudaError_t err = cudaGetLastError();
418 | 
419 | 
420 |   // check for errors
421 |   if (err != cudaSuccess) {
422 |     printf("error in correlation_forward_cuda_kernel: %s\n", cudaGetErrorString(err));
423 |     return 0;
424 |   }
425 | 
426 |   return 1;
427 | }
428 | 
429 | 
430 | int correlation_backward_cuda_kernel(
431 |                                     at::Tensor& gradOutput,
432 |                                     int gob,
433 |                                     int goc,
434 |                                     int goh,
435 |                                     int gow,
436 |                                     int gosb,
437 |                                     int gosc,
438 |                                     int gosh,
439 |                                     int gosw,
440 | 
441 |                                     at::Tensor& input1,
442 |                                     int ic,
443 |                                     int ih,
444 |                                     int iw,
445 |                                     int isb,
446 |                                     int isc,
447 |                                     int ish,
448 |                                     int isw,
449 | 
450 |                                     at::Tensor& input2,
451 |                                     int gsb,
452 |                                     int gsc,
453 |                                     int gsh,
454 |                                     int gsw,
455 | 
456 |                                     at::Tensor& gradInput1,
457 |                                     int gisb,
458 |                                     int gisc,
459 |                                     int gish,
460 |                                     int gisw,
461 | 
462 |                                     at::Tensor& gradInput2,
463 |                                     int ggc,
464 |                                     int ggsb,
465 |                                     int ggsc,
466 |                                     int ggsh,
467 |                                     int ggsw,
468 | 
469 |                                     at::Tensor& rInput1,
470 |                                     at::Tensor& rInput2,
471 |                                     int pad_size,
472 |                                     int kernel_size,
473 |                                     int max_displacement,
474 |                                     int stride1,
475 |                                     int stride2,
476 |                                     int corr_type_multiply,
477 |                                     cudaStream_t stream)
478 | {
479 | 
480 |     int batchSize = gob;
481 |     int num = batchSize;
482 | 
483 |     int nInputChannels = ic;
484 |     int inputWidth = iw;
485 |     int inputHeight = ih;
486 | 
487 |     int nOutputChannels = goc;
488 |     int outputWidth = gow;
489 |     int outputHeight = goh;
490 | 
491 |     dim3 blocks_grid(batchSize, inputHeight, inputWidth);
492 |     dim3 threads_block(THREADS_PER_BLOCK);
493 | 
494 | 
495 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "lltm_forward_cuda", ([&] {
496 | 
497 |         channels_first<scalar_t><<<blocks_grid, threads_block, 0, stream>>>(
498 |             input1.data<scalar_t>(),
499 |             rInput1.data<scalar_t>(),
500 |             nInputChannels,
501 |             inputHeight,
502 |             inputWidth,
503 |             pad_size
504 |         );
505 |     }));
506 | 
507 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] {
508 | 
509 |         channels_first<scalar_t><<<blocks_grid, threads_block, 0, stream>>>(
510 |             input2.data<scalar_t>(),
511 |             rInput2.data<scalar_t>(),
512 |             nInputChannels,
513 |             inputHeight,
514 |             inputWidth,
515 |             pad_size
516 |         );
517 |     }));
518 | 
519 |     dim3 threadsPerBlock(THREADS_PER_BLOCK);
520 |     dim3 totalBlocksCorr(inputHeight, inputWidth, nInputChannels);
521 | 
522 |     for (int n = 0; n < num; ++n) {
523 | 
524 |       AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] {
525 | 
526 | 
527 |           correlation_backward_input1<scalar_t><<<totalBlocksCorr, threadsPerBlock, 0, stream>>> (
528 |               n, gradInput1.data<scalar_t>(), nInputChannels, inputHeight, inputWidth,
529 |               gradOutput.data<scalar_t>(), nOutputChannels, outputHeight, outputWidth,
530 |               rInput2.data<scalar_t>(),
531 |               pad_size,
532 |               kernel_size,
533 |               max_displacement,
534 |               stride1,
535 |               stride2);
536 |       }));
537 |     }
538 | 
539 |     for(int n = 0; n < batchSize; n++) {
540 | 
541 |       AT_DISPATCH_FLOATING_TYPES_AND_HALF(rInput1.type(), "lltm_forward_cuda", ([&] {
542 | 
543 |         correlation_backward_input2<scalar_t><<<totalBlocksCorr, threadsPerBlock, 0, stream>>>(
544 |             n, gradInput2.data<scalar_t>(), nInputChannels, inputHeight, inputWidth,
545 |             gradOutput.data<scalar_t>(), nOutputChannels, outputHeight, outputWidth,
546 |             rInput1.data<scalar_t>(),
547 |             pad_size,
548 |             kernel_size,
549 |             max_displacement,
550 |             stride1,
551 |             stride2);
552 | 
553 |         }));
554 |     }
555 | 
556 |   // check for errors
557 |   cudaError_t err = cudaGetLastError();
558 |   if (err != cudaSuccess) {
559 |     printf("error in correlation_backward_cuda_kernel: %s\n", cudaGetErrorString(err));
560 |     return 0;
561 |   }
562 | 
563 |   return 1;
564 | }
565 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/Context.h>
 5 | #include <cuda_runtime.h>
 6 | 
 7 | int correlation_forward_cuda_kernel(at::Tensor& output,
 8 |     int ob,
 9 |     int oc,
10 |     int oh,
11 |     int ow,
12 |     int osb,
13 |     int osc,
14 |     int osh,
15 |     int osw,
16 | 
17 |     at::Tensor& input1,
18 |     int ic,
19 |     int ih,
20 |     int iw,
21 |     int isb,
22 |     int isc,
23 |     int ish,
24 |     int isw,
25 | 
26 |     at::Tensor& input2,
27 |     int gc,
28 |     int gsb,
29 |     int gsc,
30 |     int gsh,
31 |     int gsw,
32 | 
33 |     at::Tensor& rInput1,
34 |     at::Tensor& rInput2,
35 |     int pad_size,
36 |     int kernel_size,
37 |     int max_displacement,
38 |     int stride1,
39 |     int stride2,
40 |     int corr_type_multiply,
41 |     cudaStream_t stream);
42 | 
43 | 
44 | int correlation_backward_cuda_kernel(   
45 |     at::Tensor& gradOutput,
46 |     int gob,
47 |     int goc,
48 |     int goh,
49 |     int gow,
50 |     int gosb,
51 |     int gosc,
52 |     int gosh,
53 |     int gosw,
54 | 
55 |     at::Tensor& input1,
56 |     int ic,
57 |     int ih,
58 |     int iw,
59 |     int isb,
60 |     int isc,
61 |     int ish,
62 |     int isw,
63 | 
64 |     at::Tensor& input2,
65 |     int gsb,
66 |     int gsc,
67 |     int gsh,
68 |     int gsw,
69 | 
70 |     at::Tensor& gradInput1, 
71 |     int gisb,
72 |     int gisc,
73 |     int gish,
74 |     int gisw,
75 | 
76 |     at::Tensor& gradInput2,
77 |     int ggc,
78 |     int ggsb,
79 |     int ggsc,
80 |     int ggsh,
81 |     int ggsw,
82 | 
83 |     at::Tensor& rInput1,
84 |     at::Tensor& rInput2,
85 |     int pad_size,
86 |     int kernel_size,
87 |     int max_displacement,
88 |     int stride1,
89 |     int stride2,
90 |     int corr_type_multiply,
91 |     cudaStream_t stream);
92 | 


--------------------------------------------------------------------------------
/models/correlation_package/dist/correlation_cuda-0.0.0-py3.6-linux-x86_64.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/models/correlation_package/dist/correlation_cuda-0.0.0-py3.6-linux-x86_64.egg


--------------------------------------------------------------------------------
/models/correlation_package/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | # Minimum requirements for the build system to execute.
3 | requires = ["setuptools", "wheel", "numpy", "torch==1.0.1.post2"]  # PEP 508 specifications.
4 | 


--------------------------------------------------------------------------------
/models/correlation_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | 
 4 | import torch
 5 | from setuptools import find_packages, setup
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | cxx_args = ['-std=c++14']
 9 | 
10 | nvcc_args = [
11 |     '-gencode', 'arch=compute_50,code=sm_50',
12 |     '-gencode', 'arch=compute_52,code=sm_52',
13 |     '-gencode', 'arch=compute_60,code=sm_60',
14 |     '-gencode', 'arch=compute_61,code=sm_61',
15 |     '-gencode', 'arch=compute_70,code=sm_70',
16 |     '-gencode', 'arch=compute_70,code=sm_70',
17 |     '-gencode', 'arch=compute_80,code=sm_80',
18 | ]
19 | 
20 | setup(
21 |     name='correlation_cuda',
22 |     ext_modules=[
23 |         CUDAExtension('correlation_cuda', [
24 |             'correlation_cuda.cc',
25 |             'correlation_cuda_kernel.cu'
26 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
27 |     ],
28 |     cmdclass={
29 |         'build_ext': BuildExtension
30 |     })
31 | 


--------------------------------------------------------------------------------
/pictures/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/pictures/input.pdf


--------------------------------------------------------------------------------
/pictures/performance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/pictures/performance.pdf


--------------------------------------------------------------------------------
/pictures/pipline.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenCalib/LiDAR2camera_self-check/64fe6335a42947e1b313f3bc0d9d15189890a1e2/pictures/pipline.pdf


--------------------------------------------------------------------------------
/quaternion_distances.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------
 2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab
 3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it)
 4 | # Released under Creative Commons
 5 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
 6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/
 7 | # -------------------------------------------------------------------
 8 | 
 9 | # Modified Author: Xudong Lv
10 | # based on github.com/cattaneod/CMRNet/blob/master/quaternion_distances.py
11 | 
12 | import numpy as np
13 | import torch
14 | 
15 | 
16 | def quatmultiply(q, r, device='cpu'):
17 |     """
18 |     Batch quaternion multiplication
19 |     Args:
20 |         q (torch.Tensor/np.ndarray): shape=[Nx4]
21 |         r (torch.Tensor/np.ndarray): shape=[Nx4]
22 |         device (str): 'cuda' or 'cpu'
23 | 
24 |     Returns:
25 |         torch.Tensor: shape=[Nx4]
26 |     """
27 |     if isinstance(q, torch.Tensor):
28 |         t = torch.zeros(q.shape[0], 4, device=device)
29 |     elif isinstance(q, np.ndarray):
30 |         t = np.zeros(q.shape[0], 4)
31 |     else:
32 |         raise TypeError("Type not supported")
33 |     t[:, 0] = r[:, 0] * q[:, 0] - r[:, 1] * q[:, 1] - r[:, 2] * q[:, 2] - r[:, 3] * q[:, 3]
34 |     t[:, 1] = r[:, 0] * q[:, 1] + r[:, 1] * q[:, 0] - r[:, 2] * q[:, 3] + r[:, 3] * q[:, 2]
35 |     t[:, 2] = r[:, 0] * q[:, 2] + r[:, 1] * q[:, 3] + r[:, 2] * q[:, 0] - r[:, 3] * q[:, 1]
36 |     t[:, 3] = r[:, 0] * q[:, 3] - r[:, 1] * q[:, 2] + r[:, 2] * q[:, 1] + r[:, 3] * q[:, 0]
37 |     return t
38 | 
39 | 
40 | def quatinv(q):
41 |     """
42 |     Batch quaternion inversion
43 |     Args:
44 |         q (torch.Tensor/np.ndarray): shape=[Nx4]
45 | 
46 |     Returns:
47 |         torch.Tensor/np.ndarray: shape=[Nx4]
48 |     """
49 |     if isinstance(q, torch.Tensor):
50 |         t = q.clone()
51 |     elif isinstance(q, np.ndarray):
52 |         t = q.copy()
53 |     else:
54 |         raise TypeError("Type not supported")
55 |     t *= -1
56 |     t[:, 0] *= -1
57 |     return t
58 | 
59 | 
60 | def quaternion_distance(q, r, device):
61 |     """
62 |     Batch quaternion distances, used as loss
63 |     Args:
64 |         q (torch.Tensor): shape=[Nx4]
65 |         r (torch.Tensor): shape=[Nx4]
66 |         device (str): 'cuda' or 'cpu'
67 | 
68 |     Returns:
69 |         torch.Tensor: shape=[N]
70 |     """
71 |     t = quatmultiply(q, quatinv(r), device)
72 |     return 2 * torch.atan2(torch.norm(t[:, 1:], dim=1), torch.abs(t[:, 0]))
73 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | scikit_image
 2 | git+https://gitlab.com/m1lhaus/blender-mathutils.git
 3 | tqdm==4.19.9
 4 | pandas
 5 | h5py
 6 | matplotlib
 7 | scipy
 8 | pyquaternion
 9 | opencv-python
10 | pykitti
11 | numpy
12 | open3d-python==0.7.0.0
13 | Pillow
14 | scikit-image
15 | sacred==0.7.4
16 | 


--------------------------------------------------------------------------------
/train_with_sacred.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------------------------
  2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab
  3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it)
  4 | # Released under Creative Commons
  5 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
  6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/
  7 | # -------------------------------------------------------------------
  8 | 
  9 | # Modified Author: Pengjin Wei
 10 | # based on https://github.com/IIPCVLAB/LCCNet/blob/main/train_with_sacred.py
 11 | 
 12 | import math
 13 | import os
 14 | import random
 15 | import time
 16 | 
 17 | # import apex
 18 | import mathutils
 19 | import numpy as np
 20 | import torch
 21 | import torch.nn.functional as F
 22 | import torch.nn.parallel
 23 | import torch.optim as optim
 24 | import torch.utils.data
 25 | import torch.nn as nn
 26 | 
 27 | from sacred import Experiment
 28 | from sacred.utils import apply_backspaces_and_linefeeds
 29 | 
 30 | from DatasetLidarCamera import DatasetLidarCameraKittiOdometry
 31 | from models.LCCNet import LCCNet
 32 | 
 33 | from quaternion_distances import quaternion_distance
 34 | 
 35 | from tensorboardX import SummaryWriter
 36 | from utils import (mat2xyzrpy, merge_inputs, overlay_imgs, quat2mat,
 37 |                    quaternion_from_matrix, rotate_back, rotate_forward,
 38 |                    tvector2mat)
 39 | from sacred import SETTINGS
 40 | SETTINGS.CONFIG.READ_ONLY_CONFIG=False
 41 | 
 42 | 
 43 | torch.backends.cudnn.enabled = True
 44 | torch.backends.cudnn.benchmark = False
 45 | 
 46 | ex = Experiment("LCCNet")
 47 | ex.captured_out_filter = apply_backspaces_and_linefeeds
 48 | 
 49 | 
 50 | # noinspection PyUnusedLocal
 51 | @ex.config
 52 | def config():
 53 |     checkpoints = './checkpoints/'
 54 |     dataset = 'kitti/odom' # 'kitti/raw'
 55 |     # data_folder = '/home/wangshuo/Datasets/KITTI/odometry/data_odometry_full/'
 56 |     data_folder = '/mnt/petrelfs/weipengjin/file_trans/LCCNet_dataset/'
 57 |     use_reflectance = False
 58 |     val_sequence = 0
 59 |     epochs = 120
 60 |     BASE_LEARNING_RATE = 3e-4  # 1e-4
 61 |     loss = 'CrossEntropy' #'combined'
 62 |     max_t = 0.2#1.5 # 1.5, 1.0,  0.5,  0.2,  0.1
 63 |     max_r = 2.0#20. # 20.0, 10.0, 5.0,  2.0,  1.0
 64 |     batch_size = 240  # 240
 65 |     num_worker = 12
 66 |     network = 'Res_f1'
 67 |     optimizer = 'adam'
 68 |     resume = False
 69 |     weights = None #'./pretrained/kitti_iter5.tar'
 70 |     rescale_rot = 1.0
 71 |     rescale_transl = 2.0
 72 |     precision = "O0"
 73 |     norm = 'bn'
 74 |     dropout = 0.0
 75 |     max_depth = 80.
 76 |     weight_point_cloud = 0.5
 77 |     log_frequency = 10
 78 |     print_frequency = 50
 79 |     starting_epoch = -1
 80 | 
 81 | 
 82 | # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 83 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 84 | 
 85 | 
 86 | EPOCH = 1
 87 | def _init_fn(worker_id, seed):
 88 |     seed = seed + worker_id + EPOCH*100
 89 |     print(f"Init worker {worker_id} with seed {seed}")
 90 |     torch.manual_seed(seed)
 91 |     np.random.seed(seed)
 92 |     random.seed(seed)
 93 | 
 94 | 
 95 | def get_2D_lidar_projection(pcl, cam_intrinsic):
 96 |     pcl_xyz = cam_intrinsic @ pcl.T
 97 |     pcl_xyz = pcl_xyz.T
 98 |     pcl_z = pcl_xyz[:, 2]
 99 |     pcl_xyz = pcl_xyz / (pcl_xyz[:, 2, None] + 1e-10)
100 |     pcl_uv = pcl_xyz[:, :2]
101 | 
102 |     return pcl_uv, pcl_z
103 | 
104 | 
105 | def lidar_project_depth(pc_rotated, cam_calib, img_shape):
106 |     pc_rotated = pc_rotated[:3, :].detach().cpu().numpy()
107 |     cam_intrinsic = cam_calib.numpy()
108 |     pcl_uv, pcl_z = get_2D_lidar_projection(pc_rotated.T, cam_intrinsic)
109 |     mask = (pcl_uv[:, 0] > 0) & (pcl_uv[:, 0] < img_shape[1]) & (pcl_uv[:, 1] > 0) & (
110 |             pcl_uv[:, 1] < img_shape[0]) & (pcl_z > 0)
111 |     pcl_uv = pcl_uv[mask]
112 |     pcl_z = pcl_z[mask]
113 |     pcl_uv = pcl_uv.astype(np.uint32)
114 |     pcl_z = pcl_z.reshape(-1, 1)
115 |     depth_img = np.zeros((img_shape[0], img_shape[1], 1))
116 |     depth_img[pcl_uv[:, 1], pcl_uv[:, 0]] = pcl_z
117 |     depth_img = torch.from_numpy(depth_img.astype(np.float32))
118 |     depth_img = depth_img.cuda()
119 |     depth_img = depth_img.permute(2, 0, 1)
120 | 
121 |     return depth_img, pcl_uv
122 | 
123 | 
124 | # CCN training
125 | @ex.capture
126 | def train(model, optimizer, rgb_img, refl_img, target_transl, target_rot, loss_fn, point_clouds, loss):
127 |     model.train()
128 | 
129 |     optimizer.zero_grad()
130 | 
131 |     # Run model
132 |     transl_err, rot_err = model(rgb_img, refl_img)
133 | 
134 |     if loss == 'points_distance' or loss == 'combined':
135 |         losses = loss_fn(point_clouds, target_transl, target_rot, transl_err, rot_err)
136 |     else:
137 |         losses = loss_fn(target_transl, target_rot, transl_err, rot_err)
138 | 
139 |     losses['total_loss'].backward()
140 |     optimizer.step()
141 | 
142 |     return losses, rot_err, transl_err
143 | 
144 | # CNN Classification training
145 | @ex.capture
146 | def train1(model, optimizer, rgb_img, refl_img, label, loss_fn):
147 |     model.train()
148 | 
149 |     optimizer.zero_grad()
150 | 
151 |     # Run model
152 |     classification_results = model(rgb_img, refl_img)
153 | 
154 |     crossentropyloss_output = loss_fn(classification_results,label)
155 | 
156 |     crossentropyloss_output.backward()
157 |     optimizer.step()
158 | 
159 |     return crossentropyloss_output, classification_results
160 | 
161 | 
162 | # CNN test
163 | @ex.capture
164 | def val(model, rgb_img, refl_img, target_transl, target_rot, loss_fn, point_clouds, loss):
165 |     model.eval()
166 | 
167 |     # Run model
168 |     with torch.no_grad():
169 |         transl_err, rot_err = model(rgb_img, refl_img)
170 | 
171 |     if loss == 'points_distance' or loss == 'combined':
172 |         losses = loss_fn(point_clouds, target_transl, target_rot, transl_err, rot_err)
173 |     else:
174 |         losses = loss_fn(target_transl, target_rot, transl_err, rot_err)
175 | 
176 | 
177 |     total_trasl_error = torch.tensor(0.0)
178 |     total_rot_error = quaternion_distance(target_rot, rot_err, target_rot.device)
179 |     total_rot_error = total_rot_error * 180. / math.pi
180 |     for j in range(rgb_img.shape[0]):
181 |         total_trasl_error += torch.norm(target_transl[j] - transl_err[j]) * 100.
182 | 
183 | 
184 |     return losses, total_trasl_error.item(), total_rot_error.sum().item(), rot_err, transl_err
185 | 
186 | # CNN test
187 | @ex.capture
188 | def val1(model, optimizer, rgb_img, refl_img, label, loss_fn):
189 |     model.eval()
190 | 
191 |     # Run model
192 |     with torch.no_grad():
193 |         classification_results = model(rgb_img, refl_img)
194 | 
195 |     crossentropyloss_output = loss_fn(classification_results,label)
196 | 
197 |     return crossentropyloss_output, classification_results
198 | 
199 | @ex.automain
200 | def main(_config, _run, seed):
201 |     global EPOCH
202 |     print('Loss Function Choice: {}'.format(_config['loss']))
203 | 
204 |     if _config['val_sequence'] is None:
205 |         raise TypeError('val_sequences cannot be None')
206 |     else:
207 |         _config['val_sequence'] = f"{_config['val_sequence']:02d}"
208 |         print("Val Sequence: ", _config['val_sequence'])
209 |         dataset_class = DatasetLidarCameraKittiOdometry
210 |     img_shape = (384, 1280) # 网络的输入尺度
211 |     input_size = (256, 512)
212 |     _config["checkpoints"] = os.path.join(_config["checkpoints"], _config['dataset'])
213 | 
214 |     dataset_train = dataset_class(_config['data_folder'], max_r=_config['max_r'], max_t=_config['max_t'],
215 |                                   split='train', use_reflectance=_config['use_reflectance'],
216 |                                   val_sequence=_config['val_sequence'])
217 |     dataset_val = dataset_class(_config['data_folder'], max_r=_config['max_r'], max_t=_config['max_t'],
218 |                                 split='val', use_reflectance=_config['use_reflectance'],
219 |                                 val_sequence=_config['val_sequence'])
220 |     model_savepath = os.path.join(_config['checkpoints'], 'val_seq_' + _config['val_sequence'], 'models')
221 |     if not os.path.exists(model_savepath):
222 |         os.makedirs(model_savepath)
223 |     log_savepath = os.path.join(_config['checkpoints'], 'val_seq_' + _config['val_sequence'], 'log')
224 |     if not os.path.exists(log_savepath):
225 |         os.makedirs(log_savepath)
226 |     train_writer = SummaryWriter(os.path.join(log_savepath, 'train'))
227 |     val_writer = SummaryWriter(os.path.join(log_savepath, 'val'))
228 | 
229 |     np.random.seed(seed)
230 |     torch.random.manual_seed(seed)
231 | 
232 |     def init_fn(x): return _init_fn(x, seed)
233 | 
234 |     train_dataset_size = len(dataset_train)
235 |     val_dataset_size = len(dataset_val)
236 |     print('Number of the train dataset: {}'.format(train_dataset_size))
237 |     print('Number of the val dataset: {}'.format(val_dataset_size))
238 | 
239 |     # Training and validation set creation
240 |     num_worker = _config['num_worker']
241 |     batch_size = _config['batch_size']
242 |     TrainImgLoader = torch.utils.data.DataLoader(dataset=dataset_train,
243 |                                                  shuffle=True,
244 |                                                  batch_size=batch_size,
245 |                                                  num_workers=num_worker,
246 |                                                  worker_init_fn=init_fn,
247 |                                                  collate_fn=merge_inputs,
248 |                                                  drop_last=False,
249 |                                                  pin_memory=True)
250 | 
251 |     ValImgLoader = torch.utils.data.DataLoader(dataset=dataset_val,
252 |                                                 shuffle=False,
253 |                                                 batch_size=batch_size,
254 |                                                 num_workers=num_worker,
255 |                                                 worker_init_fn=init_fn,
256 |                                                 collate_fn=merge_inputs,
257 |                                                 drop_last=False,
258 |                                                 pin_memory=True)
259 | 
260 |     print(len(TrainImgLoader))
261 |     print(len(ValImgLoader))
262 | 
263 |     if _config['loss'] == 'CrossEntropy':
264 |         loss_fn = nn.CrossEntropyLoss()   
265 |     else:
266 |         raise ValueError("Unknown Loss Function")
267 | 
268 |     # network choice and settings
269 |     if _config['network'].startswith('Res'):
270 |         feat = 1
271 |         md = 4
272 |         split = _config['network'].split('_')
273 |         for item in split[1:]:
274 |             if item.startswith('f'):
275 |                 feat = int(item[-1])
276 |             elif item.startswith('md'):
277 |                 md = int(item[2:])
278 |         assert 0 < feat < 7, "Feature Number from PWC have to be between 1 and 6"
279 |         assert 0 < md, "md must be positive"
280 |         model = LCCNet(input_size, use_feat_from=feat, md=md,
281 |                          use_reflectance=_config['use_reflectance'], dropout=_config['dropout'],
282 |                          Action_Func='leakyrelu', attention=False, res_num=18)
283 |     else:
284 |         raise TypeError("Network unknown")
285 |     if _config['weights'] is not None:
286 |         print(f"Loading weights from {_config['weights']}")
287 |         checkpoint = torch.load(_config['weights'], map_location='cpu')
288 |         saved_state_dict = checkpoint['state_dict']
289 |         model.load_state_dict(saved_state_dict, strict=False)
290 | 
291 |     # model = model.to(device)
292 |     model = nn.DataParallel(model)
293 |     model = model.cuda()
294 | 
295 |     print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))
296 | 
297 |     parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
298 |     if _config['loss'] == 'geometric':
299 |         parameters += list(loss_fn.parameters())
300 |     if _config['optimizer'] == 'adam':
301 |         optimizer = optim.Adam(parameters, lr=_config['BASE_LEARNING_RATE'], weight_decay=5e-6)
302 |         # Probably this scheduler is not used
303 |         scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 50, 70], gamma=0.5)
304 |     else:
305 |         optimizer = optim.SGD(parameters, lr=_config['BASE_LEARNING_RATE'], momentum=0.9,
306 |                               weight_decay=5e-6, nesterov=True)
307 | 
308 |     starting_epoch = _config['starting_epoch']
309 |     if _config['weights'] is not None and _config['resume']:
310 |         checkpoint = torch.load(_config['weights'], map_location='cpu')
311 |         opt_state_dict = checkpoint['optimizer']
312 |         optimizer.load_state_dict(opt_state_dict)
313 |         if starting_epoch != 0:
314 |             starting_epoch = checkpoint['epoch']
315 | 
316 | 
317 |     start_full_time = time.time()
318 |     BEST_VAL_LOSS = 10000.
319 |     old_save_filename = None
320 | 
321 |     train_iter = 0
322 |     val_iter = 0
323 |     for epoch in range(starting_epoch, _config['epochs'] + 1):
324 |         EPOCH = epoch
325 |         print('This is %d-th epoch' % epoch)
326 |         epoch_start_time = time.time()
327 |         total_train_loss = 0
328 |         local_loss = 0.
329 |         if _config['optimizer'] != 'adam':
330 |             _run.log_scalar("LR", _config['BASE_LEARNING_RATE'] *
331 |                             math.exp((1 - epoch) * 4e-2), epoch)
332 |             for param_group in optimizer.param_groups:
333 |                 param_group['lr'] = _config['BASE_LEARNING_RATE'] * \
334 |                                     math.exp((1 - epoch) * 4e-2)
335 |         else:
336 |             #scheduler.step(epoch%100)
337 |             _run.log_scalar("LR", scheduler.get_lr()[0])
338 | 
339 | 
340 |         ## Training ##
341 |         time_for_50ep = time.time()
342 |         for batch_idx, sample in enumerate(TrainImgLoader):
343 |             #print(f'batch {batch_idx+1}/{len(TrainImgLoader)}', end='\r')
344 |             start_time = time.time()
345 |             lidar_input = []
346 |             rgb_input = []
347 |             lidar_gt = []
348 |             shape_pad_input = []
349 |             real_shape_input = []
350 |             pc_rotated_input = []
351 | 
352 |             # gt pose
353 |             sample['tr_error'] = sample['tr_error'].cuda()
354 |             sample['rot_error'] = sample['rot_error'].cuda()
355 |             
356 |             sample['label_nag'] = torch.from_numpy(np.array(sample['label_nag'])).cuda()
357 |             sample['label_pos'] = torch.from_numpy(np.array(sample['label_pos'])).cuda()
358 | 
359 |             sample['label'] = sample['label'].cuda()
360 | 
361 |             start_preprocess = time.time()
362 |             for idx in range(len(sample['rgb'])):
363 |                 # ProjectPointCloud in RT-pose
364 |                 real_shape = [sample['rgb'][idx].shape[1], sample['rgb'][idx].shape[2], sample['rgb'][idx].shape[0]]
365 | 
366 |                 sample['point_cloud'][idx] = sample['point_cloud'][idx].cuda() # 变换到相机坐标系下的激光雷达点云
367 |                 pc_lidar = sample['point_cloud'][idx].clone()
368 | 
369 |                 if _config['max_depth'] < 80.:
370 |                     pc_lidar = pc_lidar[:, pc_lidar[0, :] < _config['max_depth']].clone()
371 | 
372 |                 depth_gt, uv = lidar_project_depth(pc_lidar, sample['calib'][idx], real_shape) # image_shape
373 |                 depth_gt /= _config['max_depth']
374 | 
375 |                 R = mathutils.Quaternion(sample['rot_error'][idx]).to_matrix()
376 |                 R.resize_4x4()
377 |                 T = mathutils.Matrix.Translation(sample['tr_error'][idx])
378 |                 RT = T * R
379 | 
380 |                 pc_rotated = rotate_back(sample['point_cloud'][idx], RT) # Pc` = RT * Pc
381 | 
382 |                 if _config['max_depth'] < 80.:
383 |                     pc_rotated = pc_rotated[:, pc_rotated[0, :] < _config['max_depth']].clone()
384 | 
385 |                 depth_img, uv = lidar_project_depth(pc_rotated, sample['calib'][idx], real_shape) # image_shape
386 |                 depth_img /= _config['max_depth']
387 | 
388 |                 # PAD ONLY ON RIGHT AND BOTTOM SIDE
389 |                 rgb = sample['rgb'][idx].cuda()
390 |                 shape_pad = [0, 0, 0, 0]
391 | 
392 |                 shape_pad[3] = (img_shape[0] - rgb.shape[1])  # // 2
393 |                 shape_pad[1] = (img_shape[1] - rgb.shape[2])  # // 2 + 1
394 | 
395 |                 rgb = F.pad(rgb, shape_pad)
396 |                 depth_img = F.pad(depth_img, shape_pad)
397 |                 depth_gt = F.pad(depth_gt, shape_pad)
398 | 
399 |                 rgb_input.append(rgb)
400 |                 lidar_input.append(depth_img)
401 |                 lidar_gt.append(depth_gt)
402 |                 real_shape_input.append(real_shape)
403 |                 shape_pad_input.append(shape_pad)
404 |                 pc_rotated_input.append(pc_rotated)
405 | 
406 |             lidar_input = torch.stack(lidar_input)
407 |             rgb_input = torch.stack(rgb_input)
408 |             rgb_show = rgb_input.clone()
409 |             lidar_show = lidar_input.clone()
410 |             rgb_input = F.interpolate(rgb_input, size=[256, 512], mode="bilinear")
411 |             lidar_input = F.interpolate(lidar_input, size=[256, 512], mode="bilinear")
412 |             end_preprocess = time.time()
413 | 
414 |             loss, predict_label = train1(model, optimizer, rgb_input, lidar_input, sample['label'], loss_fn)
415 |             
416 |             _, classify_reaults = torch.max(predict_label,dim=1)
417 |             statistic_results = torch.eq(classify_reaults,sample['label'])
418 | 
419 |             if batch_idx % _config['log_frequency'] == 0:
420 |                 train_writer.add_scalar("loss", loss.item(), train_iter)
421 |             local_loss += loss.item()
422 | 
423 |             if batch_idx % 50 == 0 and batch_idx != 0:
424 | 
425 |                 print(f'Iter {batch_idx}/{len(TrainImgLoader)} training loss = {local_loss/50:.3f}, '
426 |                       f'time = {(time.time() - start_time)/lidar_input.shape[0]:.4f}, '
427 |                       #f'time_preprocess = {(end_preprocess-start_preprocess)/lidar_input.shape[0]:.4f}, '
428 |                       f'time for 50 iter: {time.time()-time_for_50ep:.4f}')
429 |                 time_for_50ep = time.time()
430 |                 _run.log_scalar("Loss", local_loss/50, train_iter)
431 |                 local_loss = 0.
432 |             # total_train_loss += loss['total_loss'].item() * len(sample['rgb'])
433 |             total_train_loss += loss.item() * len(sample['rgb'])
434 |             train_iter += 1
435 |             # total_iter += len(sample['rgb'])
436 | 
437 |         print("------------------------------------")
438 |         print('epoch %d total training loss = %.3f' % (epoch, total_train_loss / len(dataset_train)))
439 |         print('Total epoch time = %.2f' % (time.time() - epoch_start_time))
440 |         print("------------------------------------")
441 |         _run.log_scalar("Total training loss", total_train_loss / len(dataset_train), epoch)
442 | 
443 |         ## Validation ##
444 |         total_val_loss = 0.
445 |         total_val_t = 0.
446 |         total_val_r = 0.
447 | 
448 |         local_loss = 0.0
449 |         correct_samples = 0
450 |         wrong_samples = 0
451 |         for batch_idx, sample in enumerate(ValImgLoader):
452 |             #print(f'batch {batch_idx+1}/{len(TrainImgLoader)}', end='\r')
453 |             start_time = time.time()
454 |             lidar_input = []
455 |             rgb_input = []
456 |             lidar_gt = []
457 |             shape_pad_input = []
458 |             real_shape_input = []
459 |             pc_rotated_input = []
460 | 
461 |             # gt pose
462 |             sample['tr_error'] = sample['tr_error'].cuda()
463 |             sample['rot_error'] = sample['rot_error'].cuda()
464 |             sample['label'] = sample['label'].cuda()
465 |             sample['label_nag'] = torch.from_numpy(np.array(sample['label_nag'])).cuda()
466 |             sample['label_pos'] = torch.from_numpy(np.array(sample['label_pos'])).cuda()
467 | 
468 |             for idx in range(len(sample['rgb'])):
469 |                 # ProjectPointCloud in RT-pose
470 |                 real_shape = [sample['rgb'][idx].shape[1], sample['rgb'][idx].shape[2], sample['rgb'][idx].shape[0]]
471 | 
472 |                 sample['point_cloud'][idx] = sample['point_cloud'][idx].cuda() # 变换到相机坐标系下的激光雷达点云
473 |                 pc_lidar = sample['point_cloud'][idx].clone()
474 | 
475 |                 if _config['max_depth'] < 80.:
476 |                     pc_lidar = pc_lidar[:, pc_lidar[0, :] < _config['max_depth']].clone()
477 | 
478 |                 depth_gt, uv = lidar_project_depth(pc_lidar, sample['calib'][idx], real_shape) # image_shape
479 |                 depth_gt /= _config['max_depth']
480 | 
481 |                 reflectance = None
482 |                 if _config['use_reflectance']:
483 |                     reflectance = sample['reflectance'][idx].cuda()
484 | 
485 |                 R = mathutils.Quaternion(sample['rot_error'][idx]).to_matrix()
486 |                 R.resize_4x4()
487 |                 T = mathutils.Matrix.Translation(sample['tr_error'][idx])
488 |                 RT = T * R
489 | 
490 |                 pc_rotated = rotate_back(sample['point_cloud'][idx], RT) # Pc` = RT * Pc
491 | 
492 |                 if _config['max_depth'] < 80.:
493 |                     pc_rotated = pc_rotated[:, pc_rotated[0, :] < _config['max_depth']].clone()
494 | 
495 |                 depth_img, uv = lidar_project_depth(pc_rotated, sample['calib'][idx], real_shape) # image_shape
496 |                 depth_img /= _config['max_depth']
497 | 
498 |                 if _config['use_reflectance']:
499 |                     refl_img = None
500 | 
501 |                 # PAD ONLY ON RIGHT AND BOTTOM SIDE
502 |                 rgb = sample['rgb'][idx].cuda()
503 |                 shape_pad = [0, 0, 0, 0]
504 | 
505 |                 shape_pad[3] = (img_shape[0] - rgb.shape[1])  # // 2
506 |                 shape_pad[1] = (img_shape[1] - rgb.shape[2])  # // 2 + 1
507 | 
508 |                 rgb = F.pad(rgb, shape_pad)
509 |                 depth_img = F.pad(depth_img, shape_pad)
510 |                 depth_gt = F.pad(depth_gt, shape_pad)
511 | 
512 |                 rgb_input.append(rgb)
513 |                 lidar_input.append(depth_img)
514 |                 lidar_gt.append(depth_gt)
515 |                 real_shape_input.append(real_shape)
516 |                 shape_pad_input.append(shape_pad)
517 |                 pc_rotated_input.append(pc_rotated)
518 | 
519 |             lidar_input = torch.stack(lidar_input)
520 |             rgb_input = torch.stack(rgb_input)
521 |             rgb_show = rgb_input.clone()
522 |             lidar_show = lidar_input.clone()
523 |             rgb_input = F.interpolate(rgb_input, size=[256, 512], mode="bilinear")
524 |             lidar_input = F.interpolate(lidar_input, size=[256, 512], mode="bilinear")
525 | 
526 |             loss, predict_label = val1(model, optimizer, rgb_input, lidar_input, sample['label'].long(), loss_fn)
527 |             _, classify_reaults = torch.max(predict_label,dim=1)
528 |             statistic_results = torch.eq(classify_reaults,sample['label'].long())
529 |             correct_samples += torch.sum(statistic_results, dim=0)
530 |             wrong_samples += statistic_results.shape[0] - torch.sum(statistic_results, dim=0)
531 |         
532 |             total_val_loss += loss.item() * len(sample['rgb'])
533 |             val_iter += 1
534 | 
535 |         print("------------------------------------")
536 |         print('total val loss = %.3f' % (total_val_loss / len(dataset_val)))
537 |         print(f'total traslation error: {total_val_t / len(dataset_val)} cm')
538 |         print(f'total rotation error: {total_val_r / len(dataset_val)} °')
539 |         print("total accuracy:",float(correct_samples)/float(correct_samples + wrong_samples))
540 |         print("------------------------------------")
541 | 
542 |         _run.log_scalar("Val_Loss", total_val_loss / len(dataset_val), epoch)
543 |         _run.log_scalar("Val_t_error", total_val_t / len(dataset_val), epoch)
544 |         _run.log_scalar("Val_r_error", total_val_r / len(dataset_val), epoch)
545 | 
546 |         # SAVE
547 |         val_loss = total_val_loss / len(dataset_val)
548 |         if val_loss < BEST_VAL_LOSS:
549 |             BEST_VAL_LOSS = val_loss
550 |             #_run.result = BEST_VAL_LOSS
551 |             if _config['rescale_transl'] > 0:
552 |                 _run.result = total_val_t / len(dataset_val)
553 |             else:
554 |                 _run.result = total_val_r / len(dataset_val)
555 |             savefilename = f'{model_savepath}/checkpoint_r{_config["max_r"]:.2f}_t{_config["max_t"]:.2f}_e{epoch}_{val_loss:.3f}.tar'
556 |             torch.save({
557 |                 'config': _config,
558 |                 'epoch': epoch,
559 |                 # 'state_dict': model.state_dict(), # single gpu
560 |                 'state_dict': model.module.state_dict(), # multi gpu
561 |                 'optimizer': optimizer.state_dict(),
562 |                 'train_loss': total_train_loss / len(dataset_train),
563 |                 'val_loss': total_val_loss / len(dataset_val),
564 |             }, savefilename)
565 |             print(f'Model saved as {savefilename}')
566 |             if old_save_filename is not None:
567 |                 if os.path.exists(old_save_filename):
568 |                     os.remove(old_save_filename)
569 |             old_save_filename = savefilename
570 | 
571 |     print('full training time = %.2f HR' % ((time.time() - start_full_time) / 3600))
572 |     return _run.result
573 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------------------------
  2 | # Copyright (C) 2020 Università degli studi di Milano-Bicocca, iralab
  3 | # Author: Daniele Cattaneo (d.cattaneo10@campus.unimib.it)
  4 | # Released under Creative Commons
  5 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
  6 | # http://creativecommons.org/licenses/by-nc-sa/4.0/
  7 | # -------------------------------------------------------------------
  8 | 
  9 | # Modified Author: Xudong Lv
 10 | # based on github.com/cattaneod/CMRNet/blob/master/utils.py
 11 | 
 12 | import math
 13 | 
 14 | import mathutils
 15 | import numpy as np
 16 | import torch
 17 | import torch.nn.functional as F
 18 | from matplotlib import cm
 19 | from torch.utils.data.dataloader import default_collate
 20 | 
 21 | 
 22 | def rotate_points(PC, R, T=None, inverse=True):
 23 |     if T is not None:
 24 |         R = R.to_matrix()
 25 |         R.resize_4x4()
 26 |         T = mathutils.Matrix.Translation(T)
 27 |         RT = T*R
 28 |     else:
 29 |         RT=R.copy()
 30 |     if inverse:
 31 |         RT.invert_safe()
 32 |     RT = torch.tensor(RT, device=PC.device, dtype=torch.float)
 33 | 
 34 |     if PC.shape[0] == 4:
 35 |         PC = torch.mm(RT, PC)
 36 |     elif PC.shape[1] == 4:
 37 |         PC = torch.mm(RT, PC.t())
 38 |         PC = PC.t()
 39 |     else:
 40 |         raise TypeError("Point cloud must have shape [Nx4] or [4xN] (homogeneous coordinates)")
 41 |     return PC
 42 | 
 43 | 
 44 | def rotate_points_torch(PC, R, T=None, inverse=True):
 45 |     if T is not None:
 46 |         R = quat2mat(R)
 47 |         T = tvector2mat(T)
 48 |         RT = torch.mm(T, R)
 49 |     else:
 50 |         RT = R.clone()
 51 |     if inverse:
 52 |         RT = RT.inverse()
 53 | 
 54 |     if PC.shape[0] == 4:
 55 |         PC = torch.mm(RT, PC)
 56 |     elif PC.shape[1] == 4:
 57 |         PC = torch.mm(RT, PC.t())
 58 |         PC = PC.t()
 59 |     else:
 60 |         raise TypeError("Point cloud must have shape [Nx4] or [4xN] (homogeneous coordinates)")
 61 |     return PC
 62 | 
 63 | 
 64 | def rotate_forward(PC, R, T=None):
 65 |     """
 66 |     Transform the point cloud PC, so to have the points 'as seen from' the new
 67 |     pose T*R
 68 |     Args:
 69 |         PC (torch.Tensor): Point Cloud to be transformed, shape [4xN] or [Nx4]
 70 |         R (torch.Tensor/mathutils.Euler): can be either:
 71 |             * (mathutils.Euler) euler angles of the rotation part, in this case T cannot be None
 72 |             * (torch.Tensor shape [4]) quaternion representation of the rotation part, in this case T cannot be None
 73 |             * (mathutils.Matrix shape [4x4]) Rotation matrix,
 74 |                 in this case it should contains the translation part, and T should be None
 75 |             * (torch.Tensor shape [4x4]) Rotation matrix,
 76 |                 in this case it should contains the translation part, and T should be None
 77 |         T (torch.Tensor/mathutils.Vector): Translation of the new pose, shape [3], or None (depending on R)
 78 | 
 79 |     Returns:
 80 |         torch.Tensor: Transformed Point Cloud 'as seen from' pose T*R
 81 |     """
 82 |     if isinstance(R, torch.Tensor):
 83 |         return rotate_points_torch(PC, R, T, inverse=True)
 84 |     else:
 85 |         return rotate_points(PC, R, T, inverse=True)
 86 | 
 87 | 
 88 | def rotate_back(PC_ROTATED, R, T=None):
 89 |     """
 90 |     Inverse of :func:`~utils.rotate_forward`.
 91 |     """
 92 |     if isinstance(R, torch.Tensor):
 93 |         return rotate_points_torch(PC_ROTATED, R, T, inverse=False)
 94 |     else:
 95 |         return rotate_points(PC_ROTATED, R, T, inverse=False)
 96 | 
 97 | 
 98 | def invert_pose(R, T):
 99 |     """
100 |     Given the 'sampled pose' (aka H_init), we want CMRNet to predict inv(H_init).
101 |     inv(T*R) will be used as ground truth for the network.
102 |     Args:
103 |         R (mathutils.Euler): Rotation of 'sampled pose'
104 |         T (mathutils.Vector): Translation of 'sampled pose'
105 | 
106 |     Returns:
107 |         (R_GT, T_GT) = (mathutils.Quaternion, mathutils.Vector)
108 |     """
109 |     R = R.to_matrix()
110 |     R.resize_4x4()
111 |     T = mathutils.Matrix.Translation(T)
112 |     RT = T * R
113 |     RT.invert_safe()
114 |     T_GT, R_GT, _ = RT.decompose()
115 |     return R_GT.normalized(), T_GT
116 | 
117 | 
118 | def merge_inputs(queries):
119 |     point_clouds = []
120 |     imgs = []
121 |     reflectances = []
122 |     returns = {key: default_collate([d[key] for d in queries]) for key in queries[0]
123 |                if key != 'point_cloud' and key != 'rgb' and key != 'reflectance'}
124 |     for input in queries:
125 |         point_clouds.append(input['point_cloud'])
126 |         imgs.append(input['rgb'])
127 |         if 'reflectance' in input:
128 |             reflectances.append(input['reflectance'])
129 |     returns['point_cloud'] = point_clouds
130 |     returns['rgb'] = imgs
131 |     if len(reflectances) > 0:
132 |         returns['reflectance'] = reflectances
133 |     return returns
134 | 
135 | 
136 | def quaternion_from_matrix(matrix):
137 |     """
138 |     Convert a rotation matrix to quaternion.
139 |     Args:
140 |         matrix (torch.Tensor): [4x4] transformation matrix or [3,3] rotation matrix.
141 | 
142 |     Returns:
143 |         torch.Tensor: shape [4], normalized quaternion
144 |     """
145 |     if matrix.shape == (4, 4):
146 |         R = matrix[:-1, :-1]
147 |     elif matrix.shape == (3, 3):
148 |         R = matrix
149 |     else:
150 |         raise TypeError("Not a valid rotation matrix")
151 |     tr = R[0, 0] + R[1, 1] + R[2, 2]
152 |     q = torch.zeros(4, device=matrix.device)
153 |     if tr > 0.:
154 |         S = (tr+1.0).sqrt() * 2
155 |         q[0] = 0.25 * S
156 |         q[1] = (R[2, 1] - R[1, 2]) / S
157 |         q[2] = (R[0, 2] - R[2, 0]) / S
158 |         q[3] = (R[1, 0] - R[0, 1]) / S
159 |     elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]:
160 |         S = (1.0 + R[0, 0] - R[1, 1] - R[2, 2]).sqrt() * 2
161 |         q[0] = (R[2, 1] - R[1, 2]) / S
162 |         q[1] = 0.25 * S
163 |         q[2] = (R[0, 1] + R[1, 0]) / S
164 |         q[3] = (R[0, 2] + R[2, 0]) / S
165 |     elif R[1, 1] > R[2, 2]:
166 |         S = (1.0 + R[1, 1] - R[0, 0] - R[2, 2]).sqrt() * 2
167 |         q[0] = (R[0, 2] - R[2, 0]) / S
168 |         q[1] = (R[0, 1] + R[1, 0]) / S
169 |         q[2] = 0.25 * S
170 |         q[3] = (R[1, 2] + R[2, 1]) / S
171 |     else:
172 |         S = (1.0 + R[2, 2] - R[0, 0] - R[1, 1]).sqrt() * 2
173 |         q[0] = (R[1, 0] - R[0, 1]) / S
174 |         q[1] = (R[0, 2] + R[2, 0]) / S
175 |         q[2] = (R[1, 2] + R[2, 1]) / S
176 |         q[3] = 0.25 * S
177 |     return q / q.norm()
178 | 
179 | 
180 | def quatmultiply(q, r):
181 |     """
182 |     Multiply two quaternions
183 |     Args:
184 |         q (torch.Tensor/nd.ndarray): shape=[4], first quaternion
185 |         r (torch.Tensor/nd.ndarray): shape=[4], second quaternion
186 | 
187 |     Returns:
188 |         torch.Tensor: shape=[4], normalized quaternion q*r
189 |     """
190 |     t = torch.zeros(4, device=q.device)
191 |     t[0] = r[0] * q[0] - r[1] * q[1] - r[2] * q[2] - r[3] * q[3]
192 |     t[1] = r[0] * q[1] + r[1] * q[0] - r[2] * q[3] + r[3] * q[2]
193 |     t[2] = r[0] * q[2] + r[1] * q[3] + r[2] * q[0] - r[3] * q[1]
194 |     t[3] = r[0] * q[3] - r[1] * q[2] + r[2] * q[1] + r[3] * q[0]
195 |     return t / t.norm()
196 | 
197 | 
198 | def quat2mat(q):
199 |     """
200 |     Convert a quaternion to a rotation matrix
201 |     Args:
202 |         q (torch.Tensor): shape [4], input quaternion
203 | 
204 |     Returns:
205 |         torch.Tensor: [4x4] homogeneous rotation matrix
206 |     """
207 |     assert q.shape == torch.Size([4]), "Not a valid quaternion"
208 |     if q.norm() != 1.:
209 |         q = q / q.norm()
210 |     mat = torch.zeros((4, 4), device=q.device)
211 |     mat[0, 0] = 1 - 2*q[2]**2 - 2*q[3]**2
212 |     mat[0, 1] = 2*q[1]*q[2] - 2*q[3]*q[0]
213 |     mat[0, 2] = 2*q[1]*q[3] + 2*q[2]*q[0]
214 |     mat[1, 0] = 2*q[1]*q[2] + 2*q[3]*q[0]
215 |     mat[1, 1] = 1 - 2*q[1]**2 - 2*q[3]**2
216 |     mat[1, 2] = 2*q[2]*q[3] - 2*q[1]*q[0]
217 |     mat[2, 0] = 2*q[1]*q[3] - 2*q[2]*q[0]
218 |     mat[2, 1] = 2*q[2]*q[3] + 2*q[1]*q[0]
219 |     mat[2, 2] = 1 - 2*q[1]**2 - 2*q[2]**2
220 |     mat[3, 3] = 1.
221 |     return mat
222 | 
223 | 
224 | def tvector2mat(t):
225 |     """
226 |     Translation vector to homogeneous transformation matrix with identity rotation
227 |     Args:
228 |         t (torch.Tensor): shape=[3], translation vector
229 | 
230 |     Returns:
231 |         torch.Tensor: [4x4] homogeneous transformation matrix
232 | 
233 |     """
234 |     assert t.shape == torch.Size([3]), "Not a valid translation"
235 |     mat = torch.eye(4, device=t.device)
236 |     mat[0, 3] = t[0]
237 |     mat[1, 3] = t[1]
238 |     mat[2, 3] = t[2]
239 |     return mat
240 | 
241 | 
242 | def mat2xyzrpy(rotmatrix):
243 |     """
244 |     Decompose transformation matrix into components
245 |     Args:
246 |         rotmatrix (torch.Tensor/np.ndarray): [4x4] transformation matrix
247 | 
248 |     Returns:
249 |         torch.Tensor: shape=[6], contains xyzrpy
250 |     """
251 |     roll = math.atan2(-rotmatrix[1, 2], rotmatrix[2, 2])
252 |     pitch = math.asin ( rotmatrix[0, 2])
253 |     yaw = math.atan2(-rotmatrix[0, 1], rotmatrix[0, 0])
254 |     x = rotmatrix[:3, 3][0]
255 |     y = rotmatrix[:3, 3][1]
256 |     z = rotmatrix[:3, 3][2]
257 | 
258 |     return torch.tensor([x, y, z, roll, pitch, yaw], device=rotmatrix.device, dtype=rotmatrix.dtype)
259 | 
260 | 
261 | def to_rotation_matrix(R, T):
262 |     R = quat2mat(R)
263 |     T = tvector2mat(T)
264 |     RT = torch.mm(T, R)
265 |     return RT
266 | 
267 | 
268 | def overlay_imgs(rgb, lidar, idx=0):
269 |     std = [0.229, 0.224, 0.225]
270 |     mean = [0.485, 0.456, 0.406]
271 | 
272 |     rgb = rgb.clone().cpu().permute(1,2,0).numpy()
273 |     rgb = rgb*std+mean
274 |     lidar = lidar.clone()
275 | 
276 |     lidar[lidar == 0] = 1000.
277 |     lidar = -lidar
278 |     #lidar = F.max_pool2d(lidar, 3, 1, 1)
279 |     lidar = F.max_pool2d(lidar, 3, 1, 1)
280 |     lidar = -lidar
281 |     lidar[lidar == 1000.] = 0.
282 | 
283 |     #lidar = lidar.squeeze()
284 |     lidar = lidar[0][0]
285 |     lidar = (lidar*255).int().cpu().numpy()
286 |     lidar_color = cm.jet(lidar)
287 |     lidar_color[:, :, 3] = 0.5
288 |     lidar_color[lidar == 0] = [0, 0, 0, 0]
289 |     blended_img = lidar_color[:, :, :3] * (np.expand_dims(lidar_color[:, :, 3], 2)) + \
290 |                   rgb * (1. - np.expand_dims(lidar_color[:, :, 3], 2))
291 |     blended_img = blended_img.clip(min=0., max=1.)
292 |     #io.imshow(blended_img)
293 |     #io.show()
294 |     #plt.figure()
295 |     #plt.imshow(blended_img)
296 |     #io.imsave(f'./IMGS/{idx:06d}.png', blended_img)
297 |     return blended_img
298 | 


--------------------------------------------------------------------------------