├── .gitignore ├── README.md └── detection_toolbox ├── comet └── send_data.py ├── kitti ├── calibration.py ├── eval copy.py ├── eval.py ├── kitti_label.py ├── kitti_object.py └── nms.py ├── std ├── __init__.py ├── log.py └── os.py ├── utils_3d └── utils.py └── vis ├── vis2d.py └── vis3d.py /.gitignore: -------------------------------------------------------------------------------- 1 | run/ 2 | *pyc 3 | *ipynb 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a util toolbox that I'm slowly building up. 2 | A lot of it's geared to 2d/3d detection and the KITTI dataset, but it includes some stuff interfacing with comet and random linux utilities I've had to use. 3 | 4 | Goal of this util repo is to have a store of functions that are versatile. 5 | As such, I am trying my hardest to keep all the functions/files free of individual use cases. 6 | Individual use cases will be limited to run/, which will be gitignored. 7 | 8 | -------------------------------------------------------------------------------- /detection_toolbox/comet/send_data.py: -------------------------------------------------------------------------------- 1 | from comet_ml import Experiment 2 | import time 3 | import os 4 | 5 | ''' 6 | This file sends the contents of a log file (that some other process constantly updates with more info), 7 | and uploads it to comet output. 8 | Inputs: 9 | project_name: Project name on comet 10 | exp_name: Experiment name on comet 11 | log_file_path: Log file to keep track of/send 12 | refresh_rate: Time between checks of log_file to see if anything changed. (seconds) 13 | ''' 14 | def log_file_to_comet_output( 15 | project_name, 16 | exp_name, 17 | log_file_path, 18 | refresh_rate 19 | ): 20 | experiment = Experiment( 21 | api_key = os.environ["COMET_API_KEY"], 22 | project_name = project_name 23 | ) 24 | experiment.set_name(exp_name) 25 | 26 | with open(log_file_path, "r") as f: 27 | while True: 28 | print(f.read(), end='') 29 | time.sleep(refresh_rate) -------------------------------------------------------------------------------- /detection_toolbox/kitti/calibration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Calibration(object): 4 | def __init__(self, calib_file_path): 5 | self._read_calib_from_file(calib_file_path) 6 | 7 | 8 | def _read_calib_from_file(self, calib_file_path): 9 | lines = open(calib_file_path, "r").readlines() 10 | for line in lines: 11 | line = line.strip() 12 | if line == '': 13 | continue 14 | key, val = line.split(":", 1) 15 | val = np.array(val.strip().split(" "), dtype=np.float32) 16 | 17 | if "P" == key[0]: 18 | setattr(self, key, val.reshape(3, 4)) 19 | elif key == "Tr_velo_to_p2" or key == "Tr_velo_to_cam": 20 | self.V2C = val.reshape(3, 4) 21 | self.C2V = inverse_rigid_trans(self.V2C) 22 | elif key == "Tr_imu_to_velo": 23 | self.I2V = val.reshape(3, 4) 24 | elif key == "R0_rect": 25 | self.R0 = val.reshape(3, 3) 26 | else: 27 | raise Exception("Undefined key in calib file: {}, {}".format(key, calib_file_path)) 28 | 29 | def cart2hom(self, pts_3d): 30 | ''' Input: nx3 points in Cartesian 31 | Oupput: nx4 points in Homogeneous by pending 1 32 | ''' 33 | n = pts_3d.shape[0] 34 | pts_3d_hom = np.hstack((pts_3d, np.ones((n,1)))) 35 | return pts_3d_hom 36 | 37 | # =========================== 38 | # ------- 3d to 3d ---------- 39 | # =========================== 40 | def project_velo_to_ref(self, pts_3d_velo): 41 | pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4 42 | return np.dot(pts_3d_velo, np.transpose(self.V2C)) 43 | 44 | def project_ref_to_velo(self, pts_3d_ref): 45 | pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4 46 | return np.dot(pts_3d_ref, self.C2V.T) 47 | 48 | def project_rect_to_ref(self, pts_3d_rect): 49 | ''' Input and Output are nx3 points ''' 50 | return np.dot(np.linalg.inv(self.R0), pts_3d_rect.T).T 51 | 52 | def project_ref_to_rect(self, pts_3d_ref): 53 | ''' Input and Output are nx3 points ''' 54 | return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref))) 55 | 56 | def project_rect_to_velo(self, pts_3d_rect): 57 | ''' Input: nx3 points in rect camera coord. 58 | Output: nx3 points in velodyne coord. 59 | ''' 60 | pts_3d_ref = self.project_rect_to_ref(pts_3d_rect) 61 | return self.project_ref_to_velo(pts_3d_ref) 62 | 63 | def project_velo_to_rect(self, pts_3d_velo): 64 | pts_3d_ref = self.project_velo_to_ref(pts_3d_velo) 65 | return self.project_ref_to_rect(pts_3d_ref) 66 | 67 | # =========================== 68 | # ------- 3d to 2d ---------- 69 | # =========================== 70 | def project_rect_to_image(self, pts_3d_rect, view): 71 | ''' Input: nx3 points in rect camera coord. 72 | Output: nx2 points in image2 coord. 73 | ''' 74 | pts_3d_rect = self.cart2hom(pts_3d_rect) 75 | pts_2d = np.dot(pts_3d_rect, np.transpose(getattr(self, "P" + str(view)))) # nx3 76 | pts_2d[:,0] /= pts_2d[:,2] 77 | pts_2d[:,1] /= pts_2d[:,2] 78 | return pts_2d[:,0:2] 79 | 80 | def project_velo_to_image(self, pts_3d_velo, view): 81 | ''' Input: nx3 points in velodyne coord. 82 | Output: nx2 points in image2 coord. 83 | ''' 84 | pts_3d_rect = self.project_velo_to_rect(pts_3d_velo) 85 | return self.project_rect_to_image(pts_3d_rect) 86 | 87 | # # =========================== 88 | # # ------- 2d to 3d ---------- 89 | # # =========================== 90 | # def project_image_to_rect(self, uv_depth): 91 | # ''' Input: nx3 first two channels are uv, 3rd channel 92 | # is depth in rect camera coord. 93 | # Output: nx3 points in rect camera coord. 94 | # ''' 95 | # n = uv_depth.shape[0] 96 | # x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x 97 | # y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y 98 | # pts_3d_rect = np.zeros((n,3)) 99 | # pts_3d_rect[:,0] = x 100 | # pts_3d_rect[:,1] = y 101 | # pts_3d_rect[:,2] = uv_depth[:,2] 102 | # return pts_3d_rect 103 | 104 | 105 | # #! From Xinshuo's file 106 | # def img_to_rect(self, u, v, depth_rect): 107 | # """ 108 | # :param u: (N) 109 | # :param v: (N) 110 | # :param depth_rect: (N) 111 | # :return: 112 | # """ 113 | 114 | # # split the extrinsics from the projection matrix 115 | # proj_matrix = self.P.astype('float64') 116 | # ref_proj = self.P2.astype('float64') 117 | # intrinsics = ref_proj[:, :3] 118 | # if self.view == 5: intrinsics[1, 2] = intrinsics[0, 2] 119 | # extrinsics = np.matmul(np.linalg.inv(intrinsics), proj_matrix) # 3 x 4 120 | 121 | # # invert the extrinsics 122 | # extrin = np.concatenate((extrinsics, np.array([0, 0, 0, 1]).reshape((1, 4))), axis=0) # 4 x 4 123 | # extrin = np.linalg.inv(extrin) 124 | # extrin = extrin[:3, :] 125 | 126 | # # project the points back to the 3D coordinate with respect to P2 127 | # data_cam = self.get_intrisics_extrinsics(ref_proj) 128 | # x = ((u - data_cam['cu']) * depth_rect) / data_cam['fu'] 129 | # y = ((v - data_cam['cv']) * depth_rect) / data_cam['fv'] 130 | # num_pts = x.shape[0] 131 | # pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), \ 132 | # depth_rect.reshape(-1, 1), np.ones((num_pts, 1), dtype='float64')), axis=1) # N x 4 133 | 134 | # # rotate and translate to the 3D coordinate with respect to any camera 135 | # pts_rect = np.matmul(pts_rect, extrin.transpose()) 136 | 137 | # return pts_rect 138 | 139 | # #! From Xinshuo's file 140 | # #! Changed: box specifies coordinates of passed depth_map in original image 141 | # #! used for when we took only a 2dbbox part of the image out 142 | # def depthmap_to_rect(self, depth_map, segmap=None, box=None, depth_limit=120): 143 | # """ 144 | # :param depth_map: (H, W), depth_map 145 | # :return: 146 | # """ 147 | # if box is not None: 148 | # xmin, ymin = box 149 | # else: 150 | # xmin = 0 151 | # ymin = 0 152 | 153 | # x_range = np.arange(0, depth_map.shape[1]) 154 | # y_range = np.arange(0, depth_map.shape[0]) 155 | 156 | # x_idxs, y_idxs = np.meshgrid(x_range, y_range) 157 | # x_idxs, y_idxs = x_idxs.reshape(-1), y_idxs.reshape(-1) 158 | # depth = depth_map[y_idxs, x_idxs] 159 | 160 | # # remove the depth point which does not reflect back and has the maximum depth range 161 | # valid_index = np.where(depth < depth_limit)[0].tolist() 162 | # x_idxs, y_idxs, depth = x_idxs[valid_index], y_idxs[valid_index], depth[valid_index] 163 | 164 | # x_idxs += xmin 165 | # y_idxs += ymin #! Scale to proper positio in original image 166 | 167 | # pts_rect = self.img_to_rect(x_idxs, y_idxs, depth) 168 | # return pts_rect, x_idxs, y_idxs 169 | 170 | # def project_image_to_velo(self, uv_depth): 171 | # pts_3d_rect = self.project_image_to_rect(uv_depth) 172 | # return self.project_rect_to_velo(pts_3d_rect) 173 | 174 | 175 | def inverse_rigid_trans(Tr): 176 | ''' Inverse a rigid body transform matrix (3x4 as [R|t]) 177 | [R'|-R't; 0|1] 178 | ''' 179 | inv_Tr = np.zeros_like(Tr) # 3x4 180 | inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3]) 181 | inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3]) 182 | return inv_Tr -------------------------------------------------------------------------------- /detection_toolbox/kitti/eval.py: -------------------------------------------------------------------------------- 1 | import io as sysio 2 | import os 3 | import time 4 | 5 | import numba 6 | import numpy as np 7 | from scipy.interpolate import interp1d 8 | 9 | # from second.core.non_max_suppression.nms_gpu import rotate_iou_gpu_eval 10 | import math 11 | from pathlib import Path 12 | 13 | import numba 14 | import numpy as np 15 | from numba import cuda 16 | from detection_toolbox.std import dprint 17 | import time 18 | 19 | 20 | @cuda.jit(device=True, inline=True) 21 | def iou_device(a, b): 22 | left = max(a[0], b[0]) 23 | right = min(a[2], b[2]) 24 | top = max(a[1], b[1]) 25 | bottom = min(a[3], b[3]) 26 | width = max(right - left + 1, 0.) 27 | height = max(bottom - top + 1, 0.) 28 | interS = width * height 29 | Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1) 30 | Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1) 31 | return interS / (Sa + Sb - interS) 32 | 33 | 34 | @cuda.jit() 35 | def nms_kernel_v2(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): 36 | threadsPerBlock = 8 * 8 37 | row_start = cuda.blockIdx.y 38 | col_start = cuda.blockIdx.x 39 | tx = cuda.threadIdx.x 40 | row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) 41 | col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) 42 | block_boxes = cuda.shared.array( 43 | shape=(threadsPerBlock, 5), dtype=numba.float32) 44 | dev_box_idx = threadsPerBlock * col_start + tx 45 | if (tx < col_size): 46 | block_boxes[tx, 0] = dev_boxes[dev_box_idx, 0] 47 | block_boxes[tx, 1] = dev_boxes[dev_box_idx, 1] 48 | block_boxes[tx, 2] = dev_boxes[dev_box_idx, 2] 49 | block_boxes[tx, 3] = dev_boxes[dev_box_idx, 3] 50 | block_boxes[tx, 4] = dev_boxes[dev_box_idx, 4] 51 | cuda.syncthreads() 52 | if (cuda.threadIdx.x < row_size): 53 | cur_box_idx = threadsPerBlock * row_start + cuda.threadIdx.x 54 | # cur_box = dev_boxes + cur_box_idx * 5; 55 | i = 0 56 | t = 0 57 | start = 0 58 | if (row_start == col_start): 59 | start = tx + 1 60 | for i in range(start, col_size): 61 | if (iou_device(dev_boxes[cur_box_idx], block_boxes[i]) > 62 | nms_overlap_thresh): 63 | t |= 1 << i 64 | col_blocks = ((n_boxes) // (threadsPerBlock) + ( 65 | (n_boxes) % (threadsPerBlock) > 0)) 66 | dev_mask[cur_box_idx * col_blocks + col_start] = t 67 | 68 | 69 | @cuda.jit() 70 | def nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): 71 | threadsPerBlock = 8 * 8 72 | row_start = cuda.blockIdx.y 73 | col_start = cuda.blockIdx.x 74 | tx = cuda.threadIdx.x 75 | row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) 76 | col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) 77 | block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) 78 | dev_box_idx = threadsPerBlock * col_start + tx 79 | if (tx < col_size): 80 | block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] 81 | block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] 82 | block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] 83 | block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] 84 | block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] 85 | cuda.syncthreads() 86 | if (tx < row_size): 87 | cur_box_idx = threadsPerBlock * row_start + tx 88 | # cur_box = dev_boxes + cur_box_idx * 5; 89 | t = 0 90 | start = 0 91 | if (row_start == col_start): 92 | start = tx + 1 93 | for i in range(start, col_size): 94 | iou = iou_device(dev_boxes[cur_box_idx * 5:cur_box_idx * 5 + 4], 95 | block_boxes[i * 5:i * 5 + 4]) 96 | if (iou > nms_overlap_thresh): 97 | t |= 1 << i 98 | col_blocks = ((n_boxes) // (threadsPerBlock) + ( 99 | (n_boxes) % (threadsPerBlock) > 0)) 100 | dev_mask[cur_box_idx * col_blocks + col_start] = t 101 | 102 | 103 | @numba.jit(nopython=True) 104 | def div_up(m, n): 105 | return m // n + (m % n > 0) 106 | 107 | 108 | @numba.jit(nopython=True) 109 | def nms_postprocess(keep_out, mask_host, boxes_num): 110 | threadsPerBlock = 8 * 8 111 | col_blocks = div_up(boxes_num, threadsPerBlock) 112 | remv = np.zeros((col_blocks), dtype=np.uint64) 113 | num_to_keep = 0 114 | for i in range(boxes_num): 115 | nblock = i // threadsPerBlock 116 | inblock = i % threadsPerBlock 117 | mask = np.array(1 << inblock, dtype=np.uint64) 118 | if not (remv[nblock] & mask): 119 | keep_out[num_to_keep] = i 120 | num_to_keep += 1 121 | # unsigned long long *p = &mask_host[0] + i * col_blocks; 122 | for j in range(nblock, col_blocks): 123 | remv[j] |= mask_host[i * col_blocks + j] 124 | # remv[j] |= p[j]; 125 | return num_to_keep 126 | 127 | 128 | def nms_gpu(dets, nms_overlap_thresh, device_id=0): 129 | """nms in gpu. 130 | 131 | Args: 132 | dets ([type]): [description] 133 | nms_overlap_thresh ([type]): [description] 134 | device_id ([type], optional): Defaults to 0. [description] 135 | 136 | Returns: 137 | [type]: [description] 138 | """ 139 | 140 | boxes_num = dets.shape[0] 141 | keep_out = np.zeros([boxes_num], dtype=np.int32) 142 | scores = dets[:, 4] 143 | order = scores.argsort()[::-1].astype(np.int32) 144 | boxes_host = dets[order, :] 145 | 146 | threadsPerBlock = 8 * 8 147 | col_blocks = div_up(boxes_num, threadsPerBlock) 148 | cuda.select_device(device_id) 149 | mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64) 150 | blockspergrid = (div_up(boxes_num, threadsPerBlock), 151 | div_up(boxes_num, threadsPerBlock)) 152 | stream = cuda.stream() 153 | with stream.auto_synchronize(): 154 | boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream) 155 | mask_dev = cuda.to_device(mask_host, stream) 156 | nms_kernel[blockspergrid, threadsPerBlock, stream]( 157 | boxes_num, nms_overlap_thresh, boxes_dev, mask_dev) 158 | mask_dev.copy_to_host(mask_host, stream=stream) 159 | # stream.synchronize() 160 | num_out = nms_postprocess(keep_out, mask_host, boxes_num) 161 | keep = keep_out[:num_out] 162 | return list(order[keep]) 163 | 164 | 165 | @cuda.jit(device=True, inline=True) 166 | def trangle_area(a, b, c): 167 | return ( 168 | (a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0])) / 2.0 169 | 170 | 171 | @cuda.jit(device=True, inline=True) 172 | def area(int_pts, num_of_inter): 173 | area_val = 0.0 174 | for i in range(num_of_inter - 2): 175 | area_val += abs( 176 | trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4], 177 | int_pts[2 * i + 4:2 * i + 6])) 178 | return area_val 179 | 180 | 181 | @cuda.jit(device=True, inline=True) 182 | def sort_vertex_in_convex_polygon(int_pts, num_of_inter): 183 | if num_of_inter > 0: 184 | center = cuda.local.array((2, ), dtype=numba.float32) 185 | center[:] = 0.0 186 | for i in range(num_of_inter): 187 | center[0] += int_pts[2 * i] 188 | center[1] += int_pts[2 * i + 1] 189 | center[0] /= num_of_inter 190 | center[1] /= num_of_inter 191 | v = cuda.local.array((2, ), dtype=numba.float32) 192 | vs = cuda.local.array((16, ), dtype=numba.float32) 193 | for i in range(num_of_inter): 194 | v[0] = int_pts[2 * i] - center[0] 195 | v[1] = int_pts[2 * i + 1] - center[1] 196 | d = math.sqrt(v[0] * v[0] + v[1] * v[1]) 197 | v[0] = v[0] / d 198 | v[1] = v[1] / d 199 | if v[1] < 0: 200 | v[0] = -2 - v[0] 201 | vs[i] = v[0] 202 | j = 0 203 | temp = 0 204 | for i in range(1, num_of_inter): 205 | if vs[i - 1] > vs[i]: 206 | temp = vs[i] 207 | tx = int_pts[2 * i] 208 | ty = int_pts[2 * i + 1] 209 | j = i 210 | while j > 0 and vs[j - 1] > temp: 211 | vs[j] = vs[j - 1] 212 | int_pts[j * 2] = int_pts[j * 2 - 2] 213 | int_pts[j * 2 + 1] = int_pts[j * 2 - 1] 214 | j -= 1 215 | 216 | vs[j] = temp 217 | int_pts[j * 2] = tx 218 | int_pts[j * 2 + 1] = ty 219 | 220 | 221 | @cuda.jit( 222 | device=True, 223 | inline=True) 224 | def line_segment_intersection(pts1, pts2, i, j, temp_pts): 225 | A = cuda.local.array((2, ), dtype=numba.float32) 226 | B = cuda.local.array((2, ), dtype=numba.float32) 227 | C = cuda.local.array((2, ), dtype=numba.float32) 228 | D = cuda.local.array((2, ), dtype=numba.float32) 229 | 230 | A[0] = pts1[2 * i] 231 | A[1] = pts1[2 * i + 1] 232 | 233 | B[0] = pts1[2 * ((i + 1) % 4)] 234 | B[1] = pts1[2 * ((i + 1) % 4) + 1] 235 | 236 | C[0] = pts2[2 * j] 237 | C[1] = pts2[2 * j + 1] 238 | 239 | D[0] = pts2[2 * ((j + 1) % 4)] 240 | D[1] = pts2[2 * ((j + 1) % 4) + 1] 241 | BA0 = B[0] - A[0] 242 | BA1 = B[1] - A[1] 243 | DA0 = D[0] - A[0] 244 | CA0 = C[0] - A[0] 245 | DA1 = D[1] - A[1] 246 | CA1 = C[1] - A[1] 247 | acd = DA1 * CA0 > CA1 * DA0 248 | bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0]) 249 | if acd != bcd: 250 | abc = CA1 * BA0 > BA1 * CA0 251 | abd = DA1 * BA0 > BA1 * DA0 252 | if abc != abd: 253 | DC0 = D[0] - C[0] 254 | DC1 = D[1] - C[1] 255 | ABBA = A[0] * B[1] - B[0] * A[1] 256 | CDDC = C[0] * D[1] - D[0] * C[1] 257 | DH = BA1 * DC0 - BA0 * DC1 258 | Dx = ABBA * DC0 - BA0 * CDDC 259 | Dy = ABBA * DC1 - BA1 * CDDC 260 | temp_pts[0] = Dx / DH 261 | temp_pts[1] = Dy / DH 262 | return True 263 | return False 264 | 265 | 266 | @cuda.jit( 267 | device=True, 268 | inline=True) 269 | def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts): 270 | a = cuda.local.array((2, ), dtype=numba.float32) 271 | b = cuda.local.array((2, ), dtype=numba.float32) 272 | c = cuda.local.array((2, ), dtype=numba.float32) 273 | d = cuda.local.array((2, ), dtype=numba.float32) 274 | 275 | a[0] = pts1[2 * i] 276 | a[1] = pts1[2 * i + 1] 277 | 278 | b[0] = pts1[2 * ((i + 1) % 4)] 279 | b[1] = pts1[2 * ((i + 1) % 4) + 1] 280 | 281 | c[0] = pts2[2 * j] 282 | c[1] = pts2[2 * j + 1] 283 | 284 | d[0] = pts2[2 * ((j + 1) % 4)] 285 | d[1] = pts2[2 * ((j + 1) % 4) + 1] 286 | 287 | area_abc = trangle_area(a, b, c) 288 | area_abd = trangle_area(a, b, d) 289 | 290 | if area_abc * area_abd >= 0: 291 | return False 292 | 293 | area_cda = trangle_area(c, d, a) 294 | area_cdb = area_cda + area_abc - area_abd 295 | 296 | if area_cda * area_cdb >= 0: 297 | return False 298 | t = area_cda / (area_abd - area_abc) 299 | 300 | dx = t * (b[0] - a[0]) 301 | dy = t * (b[1] - a[1]) 302 | temp_pts[0] = a[0] + dx 303 | temp_pts[1] = a[1] + dy 304 | return True 305 | 306 | 307 | @cuda.jit(device=True, inline=True) 308 | def point_in_quadrilateral(pt_x, pt_y, corners): 309 | ab0 = corners[2] - corners[0] 310 | ab1 = corners[3] - corners[1] 311 | 312 | ad0 = corners[6] - corners[0] 313 | ad1 = corners[7] - corners[1] 314 | 315 | ap0 = pt_x - corners[0] 316 | ap1 = pt_y - corners[1] 317 | 318 | abab = ab0 * ab0 + ab1 * ab1 319 | abap = ab0 * ap0 + ab1 * ap1 320 | adad = ad0 * ad0 + ad1 * ad1 321 | adap = ad0 * ap0 + ad1 * ap1 322 | 323 | eps = -1e-6 324 | return abab - abap >= eps and abap >= eps and adad - adap >= eps and adap >= eps 325 | 326 | 327 | 328 | @cuda.jit(device=True, inline=True) 329 | def quadrilateral_intersection(pts1, pts2, int_pts): 330 | num_of_inter = 0 331 | for i in range(4): 332 | if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): 333 | int_pts[num_of_inter * 2] = pts1[2 * i] 334 | int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] 335 | num_of_inter += 1 336 | if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): 337 | int_pts[num_of_inter * 2] = pts2[2 * i] 338 | int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] 339 | num_of_inter += 1 340 | temp_pts = cuda.local.array((2, ), dtype=numba.float32) 341 | for i in range(4): 342 | for j in range(4): 343 | has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) 344 | if has_pts: 345 | int_pts[num_of_inter * 2] = temp_pts[0] 346 | int_pts[num_of_inter * 2 + 1] = temp_pts[1] 347 | num_of_inter += 1 348 | 349 | return num_of_inter 350 | 351 | 352 | @cuda.jit(device=True, inline=True) 353 | def rbbox_to_corners(corners, rbbox): 354 | # generate clockwise corners and rotate it clockwise 355 | angle = rbbox[4] 356 | a_cos = math.cos(angle) 357 | a_sin = math.sin(angle) 358 | center_x = rbbox[0] 359 | center_y = rbbox[1] 360 | x_d = rbbox[2] 361 | y_d = rbbox[3] 362 | corners_x = cuda.local.array((4, ), dtype=numba.float32) 363 | corners_y = cuda.local.array((4, ), dtype=numba.float32) 364 | corners_x[0] = -x_d / 2 365 | corners_x[1] = -x_d / 2 366 | corners_x[2] = x_d / 2 367 | corners_x[3] = x_d / 2 368 | corners_y[0] = -y_d / 2 369 | corners_y[1] = y_d / 2 370 | corners_y[2] = y_d / 2 371 | corners_y[3] = -y_d / 2 372 | for i in range(4): 373 | corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x 374 | corners[2 * i + 375 | 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y 376 | 377 | 378 | @cuda.jit(device=True, inline=True) 379 | def inter(rbbox1, rbbox2): 380 | corners1 = cuda.local.array((8, ), dtype=numba.float32) 381 | corners2 = cuda.local.array((8, ), dtype=numba.float32) 382 | intersection_corners = cuda.local.array((16, ), dtype=numba.float32) 383 | 384 | rbbox_to_corners(corners1, rbbox1) 385 | rbbox_to_corners(corners2, rbbox2) 386 | 387 | num_intersection = quadrilateral_intersection(corners1, corners2, 388 | intersection_corners) 389 | sort_vertex_in_convex_polygon(intersection_corners, num_intersection) 390 | # print(intersection_corners.reshape([-1, 2])[:num_intersection]) 391 | 392 | return area(intersection_corners, num_intersection) 393 | 394 | 395 | @cuda.jit(device=True, inline=True) 396 | def devRotateIoU(rbox1, rbox2): 397 | area1 = rbox1[2] * rbox1[3] 398 | area2 = rbox2[2] * rbox2[3] 399 | area_inter = inter(rbox1, rbox2) 400 | return area_inter / (area1 + area2 - area_inter) 401 | 402 | 403 | @cuda.jit() 404 | def rotate_nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): 405 | threadsPerBlock = 8 * 8 406 | row_start = cuda.blockIdx.y 407 | col_start = cuda.blockIdx.x 408 | tx = cuda.threadIdx.x 409 | row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) 410 | col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) 411 | block_boxes = cuda.shared.array(shape=(64 * 6, ), dtype=numba.float32) 412 | dev_box_idx = threadsPerBlock * col_start + tx 413 | if (tx < col_size): 414 | block_boxes[tx * 6 + 0] = dev_boxes[dev_box_idx * 6 + 0] 415 | block_boxes[tx * 6 + 1] = dev_boxes[dev_box_idx * 6 + 1] 416 | block_boxes[tx * 6 + 2] = dev_boxes[dev_box_idx * 6 + 2] 417 | block_boxes[tx * 6 + 3] = dev_boxes[dev_box_idx * 6 + 3] 418 | block_boxes[tx * 6 + 4] = dev_boxes[dev_box_idx * 6 + 4] 419 | block_boxes[tx * 6 + 5] = dev_boxes[dev_box_idx * 6 + 5] 420 | cuda.syncthreads() 421 | if (tx < row_size): 422 | cur_box_idx = threadsPerBlock * row_start + tx 423 | # cur_box = dev_boxes + cur_box_idx * 5; 424 | t = 0 425 | start = 0 426 | if (row_start == col_start): 427 | start = tx + 1 428 | for i in range(start, col_size): 429 | iou = devRotateIoU(dev_boxes[cur_box_idx * 6:cur_box_idx * 6 + 5], 430 | block_boxes[i * 6:i * 6 + 5]) 431 | # print('iou', iou, cur_box_idx, i) 432 | if (iou > nms_overlap_thresh): 433 | t |= 1 << i 434 | col_blocks = ((n_boxes) // (threadsPerBlock) + ( 435 | (n_boxes) % (threadsPerBlock) > 0)) 436 | dev_mask[cur_box_idx * col_blocks + col_start] = t 437 | 438 | 439 | def rotate_nms_gpu(dets, nms_overlap_thresh, device_id=0): 440 | """nms in gpu. WARNING: this function can provide right result 441 | but its performance isn't be tested 442 | 443 | Args: 444 | dets ([type]): [description] 445 | nms_overlap_thresh ([type]): [description] 446 | device_id ([type], optional): Defaults to 0. [description] 447 | 448 | Returns: 449 | [type]: [description] 450 | """ 451 | dets = dets.astype(np.float32) 452 | boxes_num = dets.shape[0] 453 | keep_out = np.zeros([boxes_num], dtype=np.int32) 454 | scores = dets[:, 5] 455 | order = scores.argsort()[::-1].astype(np.int32) 456 | boxes_host = dets[order, :] 457 | 458 | threadsPerBlock = 8 * 8 459 | col_blocks = div_up(boxes_num, threadsPerBlock) 460 | cuda.select_device(device_id) 461 | # mask_host shape: boxes_num * col_blocks * sizeof(np.uint64) 462 | mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64) 463 | blockspergrid = (div_up(boxes_num, threadsPerBlock), 464 | div_up(boxes_num, threadsPerBlock)) 465 | stream = cuda.stream() 466 | with stream.auto_synchronize(): 467 | boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream) 468 | mask_dev = cuda.to_device(mask_host, stream) 469 | rotate_nms_kernel[blockspergrid, threadsPerBlock, stream]( 470 | boxes_num, nms_overlap_thresh, boxes_dev, mask_dev) 471 | mask_dev.copy_to_host(mask_host, stream=stream) 472 | num_out = nms_postprocess(keep_out, mask_host, boxes_num) 473 | keep = keep_out[:num_out] 474 | return list(order[keep]) 475 | 476 | 477 | @cuda.jit('(int64, int64, float32[:], float32[:], float32[:])', fastmath=False) 478 | def rotate_iou_kernel(N, K, dev_boxes, dev_query_boxes, dev_iou): 479 | threadsPerBlock = 8 * 8 480 | row_start = cuda.blockIdx.x 481 | col_start = cuda.blockIdx.y 482 | tx = cuda.threadIdx.x 483 | row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) 484 | col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) 485 | block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) 486 | block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) 487 | 488 | dev_query_box_idx = threadsPerBlock * col_start + tx 489 | dev_box_idx = threadsPerBlock * row_start + tx 490 | if (tx < col_size): 491 | block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] 492 | block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] 493 | block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] 494 | block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] 495 | block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] 496 | if (tx < row_size): 497 | block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] 498 | block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] 499 | block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] 500 | block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] 501 | block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] 502 | cuda.syncthreads() 503 | if tx < row_size: 504 | for i in range(col_size): 505 | offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i 506 | dev_iou[offset] = devRotateIoU(block_qboxes[i * 5:i * 5 + 5], 507 | block_boxes[tx * 5:tx * 5 + 5]) 508 | 509 | 510 | def rotate_iou_gpu(boxes, query_boxes, device_id=0): 511 | """rotated box iou running in gpu. 500x faster than cpu version 512 | (take 5ms in one example with numba.cuda code). 513 | convert from [this project]( 514 | https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). 515 | 516 | Args: 517 | boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, 518 | angles(clockwise when positive) 519 | query_boxes (float tensor: [K, 5]): [description] 520 | device_id (int, optional): Defaults to 0. [description] 521 | 522 | Returns: 523 | [type]: [description] 524 | """ 525 | box_dtype = boxes.dtype 526 | boxes = boxes.astype(np.float32) 527 | query_boxes = query_boxes.astype(np.float32) 528 | N = boxes.shape[0] 529 | K = query_boxes.shape[0] 530 | iou = np.zeros((N, K), dtype=np.float32) 531 | if N == 0 or K == 0: 532 | return iou 533 | threadsPerBlock = 8 * 8 534 | cuda.select_device(device_id) 535 | blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) 536 | 537 | stream = cuda.stream() 538 | with stream.auto_synchronize(): 539 | boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) 540 | query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) 541 | iou_dev = cuda.to_device(iou.reshape([-1]), stream) 542 | rotate_iou_kernel[blockspergrid, threadsPerBlock, stream]( 543 | N, K, boxes_dev, query_boxes_dev, iou_dev) 544 | iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) 545 | return iou.astype(boxes.dtype) 546 | 547 | 548 | @cuda.jit('(float32[:], float32[:], int32)', device=True, inline=True) 549 | def devRotateIoUEval(rbox1, rbox2, criterion=-1): 550 | area1 = rbox1[2] * rbox1[3] 551 | area2 = rbox2[2] * rbox2[3] 552 | area_inter = inter(rbox1, rbox2) 553 | if criterion == -1: 554 | return area_inter / (area1 + area2 - area_inter) 555 | elif criterion == 0: 556 | return area_inter / area1 557 | elif criterion == 1: 558 | return area_inter / area2 559 | else: 560 | return area_inter 561 | 562 | 563 | @cuda.jit( 564 | '(int64, int64, float32[:], float32[:], float32[:], int32)', 565 | fastmath=False) 566 | def rotate_iou_kernel_eval(N, 567 | K, 568 | dev_boxes, 569 | dev_query_boxes, 570 | dev_iou, 571 | criterion=-1): 572 | threadsPerBlock = 8 * 8 573 | row_start = cuda.blockIdx.x 574 | col_start = cuda.blockIdx.y 575 | tx = cuda.threadIdx.x 576 | row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) 577 | col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) 578 | block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) 579 | block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) 580 | 581 | dev_query_box_idx = threadsPerBlock * col_start + tx 582 | dev_box_idx = threadsPerBlock * row_start + tx 583 | if (tx < col_size): 584 | block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] 585 | block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] 586 | block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] 587 | block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] 588 | block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] 589 | if (tx < row_size): 590 | block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] 591 | block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] 592 | block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] 593 | block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] 594 | block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] 595 | cuda.syncthreads() 596 | if tx < row_size: 597 | for i in range(col_size): 598 | offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i 599 | dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5], 600 | block_boxes[tx * 5:tx * 5 + 5], 601 | criterion) 602 | 603 | 604 | def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0): 605 | """rotated box iou running in gpu. 8x faster than cpu version 606 | (take 5ms in one example with numba.cuda code). 607 | convert from [this project]( 608 | https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). 609 | 610 | Args: 611 | boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, 612 | angles(clockwise when positive) 613 | query_boxes (float tensor: [K, 5]): [description] 614 | device_id (int, optional): Defaults to 0. [description] 615 | 616 | Returns: 617 | [type]: [description] 618 | """ 619 | box_dtype = boxes.dtype 620 | boxes = boxes.astype(np.float32) 621 | query_boxes = query_boxes.astype(np.float32) 622 | N = boxes.shape[0] 623 | K = query_boxes.shape[0] 624 | iou = np.zeros((N, K), dtype=np.float32) 625 | if N == 0 or K == 0: 626 | return iou 627 | threadsPerBlock = 8 * 8 628 | cuda.select_device(device_id) 629 | blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) 630 | 631 | stream = cuda.stream() 632 | with stream.auto_synchronize(): 633 | boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) 634 | query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) 635 | iou_dev = cuda.to_device(iou.reshape([-1]), stream) 636 | rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream]( 637 | N, K, boxes_dev, query_boxes_dev, iou_dev, criterion) 638 | iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) 639 | return iou.astype(boxes.dtype) 640 | 641 | 642 | def get_mAP(prec): 643 | sums = 0 644 | for i in range(0, len(prec), 4): 645 | sums += prec[i] 646 | return sums / 11 * 100 647 | 648 | #! scores is a 1d array of scores of matched dts. 649 | #! num_gt is the total number of valid gt boxes in the dataset 650 | #? Honestly, i'm not sure. It looks like it divies the space of scores into num_sample_pts parts 651 | #? And returns the scores at each of the parts as the thresholds. 652 | #? The scores I think are in decreasing order. 653 | #? So it's not necessarily that the returned thresholds are 1.0, 0.9, 0.8, ... 0 if um_sample_pts = 11 654 | #? But it's scores[len(scores) * 0 / 10], scores[len(scores) * 1 /10], .... i think... 655 | #? Well all that really matters is that the thresholds returns at the end are length num_sample pts sorted decreasing 656 | #! Okay, so it's finding the score thresholds that correspond to the 41 recall positions. 657 | ''' 658 | Here's a walk through. 659 | First, note that the big if statement w/ continue triggers if the current recall is closer to right recall 660 | than left recall. So, if current recall is closer to LEFT recall, we store the left recall's corresponding 661 | threshold. Why? Because it means we find the threshold corresponding best to the current recall position. 662 | 663 | Better general mental image? Think of a p/r curve, y-axis p, x-axis r. We're trying to find the area under 664 | the curve. Since we can't take the integral directly, we approximate it. We actually use right riemann sum 665 | here. we ignore curr_recall = 0, since we don't use it to calculate final mAP anyway. Now what is the prec. 666 | corr. to 1/40 recall? Well we need a way to filter the detections by finding a threshold that causes 1/40 667 | recall. So we go through the for loop, and we append a threshold that does so, and so on. 668 | Lastly, to compute the actual map, we add up 1/40 to 40/40 recall positions and divide by 40. Visually, 669 | think of this as adding up the areas of the boxes, with top right corner on the curve at 1/40 to 40/40 recall 670 | positions, and the boxes have width 1/40. 671 | 672 | Now what if we fed in ground truth boxes as the predictions, all with score 1? 673 | Then the thresholds array would be entirely populated with 1. Then since precision at every threshold is 1, 674 | the overall score would be 1 (100%). 675 | 676 | Now why is this the metric everyone uses? What is the significance of a p/r curve? 677 | An alternative would be to directly toss out "score" and compute recall. However, then there is no measure 678 | of how many false positives we have. Then we could report precision as well. Then we run into "What is a good 679 | balance of precision & recall scores?" We could ensure precision = 1 but recall might be 0, and vice versa. 680 | Then the idea of "well, since p & r are correlated to some extent, we can graph it. What accounts for different 681 | combinations of p & r values? p & r values change based on filtering detections, and the answer ends up being 682 | assigning a "confidence" to each detection. And as such, wehave the p/r curve. 683 | 684 | How to interpret a p/r curve? 685 | An idea p/r curve would just be p = 1 for all recall values. 686 | If the p/r curve drops early (precision drops even for low recall values), this means we have some high- 687 | confidence false positives. Why? Because low recall <=> high confidence threshold, which should have high 688 | precision. We can then filter the detections at that threhsold and see what false positives there are. 689 | Look at recall value where p = 0. Ideally, it should be 1. Otherwise, it means we're always missing some 690 | ground truth (or they're being assigned super low confidence, so they are lost amongst low-confidence false 691 | positives). 692 | Odd case: the right side of the curve slowly starts going down, but very suddenly drops to p = 0. (as in if 693 | you were to continue drawing the curve, it feels like it'll go on for a little longer, but it doesn't). Then 694 | there is a chance that there is a confidence threshold filtering of the detections done, with a too-aggressive 695 | threshold clipping out potentially good detections. 696 | 697 | How is p/r curve affected by manually truncation of confidence? Aka, filter out all detections <0.3 confidence. 698 | a) This frankly only affects the right-most end of the curve, because the left side corresponds to higher 699 | confidence detections only. 700 | b) Will throwing in a bunch of low-confidence detections increase performance? Yup. 701 | Case 1) The detector is missing a bunch of boxes entirely. Let's say 10%. 702 | Then, the rightmost-part flatlines at p = 0. Why? When presented the question "What is the precision 703 | when recall is 0.95?", the response is "well we can't achieve recall 0.95, so I guess precision would 704 | be 0." So, you get penalized. 705 | Now here, if we toss in a bunch of low-confidence detections, the detector might miss fewer boxes. 706 | Let's say it misses 3% now. When presented with the same question, the response is "well we can now 707 | achieve recall 0.95, so precision isn't automatically 0. But in achieving this, we got a lot of 708 | false positives that were rightly assigned low confidence, so precision will be quite low here, maybe 709 | like 0.1" This is still higher than 0. 710 | Case 2) The detector is missing some boxes but not that much. Let's say 3%. 711 | Truth: the current detections all of confidence >= 0.3. 712 | Truth: Adding detections < 0.3 confidence WILL NOT change the confidence threshold at recall level 713 | 0 ~ 0.97. Looking at the for loop, it won't even look at the low confidence detections until it gets 714 | past 0.97 since scores are sorted in decreasing order, looking like [previous detections, low 715 | confidence detections]. 716 | By Case 1 argument, if the best recall is increased even just to 0.98, score improves. 717 | Case 3) The detector has 100% recall. We have an even simpler argument. This means all the added boxes 718 | won't even make it to the "scores" array since len(scores) <= len(num_gt) always, and here, with 719 | recall 100%, we have equality. At the harshest score threshold in scores array, all the added 720 | detections should be filtered out. 721 | ''' 722 | @numba.jit 723 | def get_thresholds(scores: np.ndarray, num_gt, num_sample_pts=41): 724 | scores.sort() 725 | scores = scores[::-1] #! scores are in decreasing order. 726 | current_recall = 0 727 | thresholds = [] 728 | 729 | for i, score in enumerate(scores): 730 | l_recall = (i + 1) / num_gt 731 | if i < (len(scores) - 1): 732 | r_recall = (i + 2) / num_gt 733 | else: 734 | r_recall = l_recall 735 | #! I literally have no clue 736 | #! if the current recall is closer to right (bigger) recall than left, skip. 737 | if (((r_recall - current_recall) < (current_recall - l_recall)) 738 | and (i < (len(scores) - 1))): 739 | continue 740 | # recall = l_recall 741 | thresholds.append(score) 742 | current_recall += 1 / (num_sample_pts - 1.0) 743 | # print(len(thresholds), len(scores), num_gt) 744 | return thresholds 745 | 746 | #! gt_anno is anno for single image 747 | #! current class is an index 748 | #! difficulty is 0, 1, or 2 749 | #? Note that images can be in multiple difficulty groups 750 | #? I think Easy \subset Moderate \subset Hard 751 | def clean_data(gt_anno, dt_anno, current_class, difficulty, extra_info_single): 752 | 753 | gt_extra_info_single, dt_extra_info_single, general_extra_info = extra_info_single 754 | 755 | MIN_HEIGHT = general_extra_info['MIN_HEIGHT'] 756 | MAX_OCCLUSION = general_extra_info['MAX_OCCLUSION'] 757 | MAX_TRUNCATION = general_extra_info['MAX_TRUNCATION'] 758 | MAX_DISTANCE = general_extra_info['MAX_DISTANCE'] 759 | MIN_POINTS_THRESHOLD = general_extra_info['MIN_POINTS_THRESHOLD'] #! int 760 | CLASS_NAMES = list(map(lambda s: s.lower(), general_extra_info['CLASS_NAMES'])) 761 | #! Added later in eval.py, no need for user to specify 762 | curr_metric = general_extra_info['curr_metric'] #! 0 or 1 or 2 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | # CLASS_NAMES = [ 775 | # 'car', 'pedestrian', 'cyclist', 'van', 'person_sitting', 'car', 776 | # 'tractor', 'trailer' 777 | # ] 778 | # if os.environ["KITTI_EVAL_CHANGES"] == "0": 779 | # MIN_HEIGHT = [40, 25, 25] 780 | # MAX_OCCLUSION = [0, 1, 2] 781 | # MAX_TRUNCATION = [0.15, 0.3, 0.5] 782 | 783 | # elif os.environ["KITTI_EVAL_CHANGES"] == "1": 784 | # MAX_TRUNCATION = [0.99, 0.99, 0.99] # filter out stuff bigger than this. 785 | # #! This is mostly to filter out all the truncation = 1s 786 | # #! Note that there are indeed some things that have truncation < 1 and occlusion = 1. So, occlusion threshold 787 | # #! starts at 0.99 to get rid of all fully occluded things (in diagnostics.py) 788 | # #? For sanity, when we had occlusion = 0 and integer occlusions we ended up removing all the things with occlusion = 1 789 | 790 | 791 | # if "," in os.environ["KITTI_EVAL_MIN_HEIGHT"]: #! if we passed in something like 40,20,0 792 | # split = os.environ["KITTI_EVAL_MIN_HEIGHT"].split(",") 793 | # MIN_HEIGHT = [int(s) for s in split] 794 | # assert len(MIN_HEIGHT) == 3 795 | # else: #! otherwise, just use a single value 796 | # MIN_HEIGHT = [int(os.environ["KITTI_EVAL_MIN_HEIGHT"])] * 3 797 | 798 | # max_occlusion = float(os.environ["KITTI_EVAL_MAX_OCCLUSION"]) 799 | # MAX_OCCLUSION = [max_occlusion] * 3 800 | 801 | # #! Special: includes max distance 802 | # elif os.environ["KITTI_EVAL_CHANGES"] == "2": 803 | # # MAX_TRUNCATION = [0.99, 0.99, 0.99] # filter out stuff bigger than this. 804 | # split = os.environ["KITTI_EVAL_MAX_TRUNCATION"].split(",") 805 | # MAX_TRUNCATION = [float(s) for s in split] 806 | # assert len(MAX_TRUNCATION) == 3 807 | 808 | # split = os.environ["KITTI_EVAL_MIN_HEIGHT"].split(",") 809 | # MIN_HEIGHT = [int(s) for s in split] 810 | # assert len(MIN_HEIGHT) == 3 811 | 812 | # split = os.environ["KITTI_EVAL_MAX_OCCLUSION"].split(",") 813 | # MAX_OCCLUSION = [float(s) for s in split] 814 | # assert len(MAX_OCCLUSION) == 3 815 | 816 | # split = os.environ["KITTI_EVAL_MAX_DISTANCE"].split(",") 817 | # MAX_DISTANCE = [int(s) for s in split] 818 | # assert len(MAX_DISTANCE) == 3 819 | 820 | # if CLASS_NAMES[current_class] == "cyclist" and os.environ["KITTI_EVAL_CYC_MAX_OCCLUSION"] != "": 821 | # split = os.environ["KITTI_EVAL_CYC_MAX_OCCLUSION"].split(",") #? Separate for cyclists 822 | # MAX_OCCLUSION = [float(s) for s in split] 823 | # assert len(MAX_OCCLUSION) == 3 824 | 825 | # else: 826 | # raise Exception("Unsupported kitti eval changes") 827 | 828 | 829 | dc_bboxes, ignored_gt, ignored_dt = [], [], [] 830 | 831 | 832 | current_cls_name = CLASS_NAMES[current_class].lower() 833 | num_gt = len(gt_anno["name"]) 834 | num_dt = len(dt_anno["name"]) #! number of boxes 835 | 836 | 837 | num_valid_gt = 0 #! Keeps the number of boxes that perfecty match the current class and fit the current difficulty 838 | 839 | for i in range(num_gt): 840 | bbox = gt_anno["bbox"][i] 841 | gt_name = gt_anno["name"][i].lower() 842 | height = bbox[3] - bbox[1] 843 | 844 | 845 | valid_class = -1 846 | if (gt_name == current_cls_name): #! This bbox corresponds with the class we're doing rn 847 | valid_class = 1 848 | elif (current_cls_name == "Pedestrian".lower() 849 | and "Person_sitting".lower() == gt_name): 850 | valid_class = 0 851 | elif (current_cls_name == "Car".lower() and "Van".lower() == gt_name): # 852 | valid_class = 0 853 | elif (current_cls_name == "Car".lower() and "Undefined".lower() == gt_name): #! don't treat undefined as fp for cars 854 | valid_class = 0 855 | # elif (current_cls_name == "Cyclist".lower() and "Motorcycle".lower() == gt_name): 856 | # valid_class = 0 857 | else: #! no relationship with current class 858 | valid_class = -1 859 | 860 | 861 | ignore = False 862 | # if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty]) 863 | # or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty]) 864 | # or (height <= MIN_HEIGHT[difficulty])): 865 | 866 | # ignore = True 867 | 868 | if ((gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty]) 869 | or (gt_extra_info_single["num_points"][i] < MIN_POINTS_THRESHOLD)): 870 | 871 | ignore = True 872 | 873 | # if (curr_metric == 0 or curr_metric == 1 or curr_metric == 2): #! 2d bbox or bev 874 | # # if (1 == 2): 875 | # if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty]) 876 | # or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty]) 877 | # or (height <= MIN_HEIGHT[difficulty]) 878 | # or (gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty])): 879 | 880 | # ignore = True 881 | 882 | # else: #! 3d 883 | # if ((gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty]) 884 | # or (gt_extra_info_single["num_points"][i] < MIN_POINTS_THRESHOLD)): 885 | # # or gt_anno["occluded"][i] == 1): #? GET RID OF THIS TODO: 886 | 887 | # ignore = True 888 | 889 | # if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty]) 890 | # or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty]) 891 | # or (height <= MIN_HEIGHT[difficulty]) 892 | # or (gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty]) 893 | # or (curr_metric == 2 and gt_extra_info_single["num_points"] < MIN_POINTS_THRESHOLD)): 894 | 895 | # ignore = True 896 | 897 | 898 | # if os.environ["KITTI_EVAL_CHANGES"] == "0" or os.environ["KITTI_EVAL_CHANGES"] == "1": 899 | # # if gt_anno["occluded"][i] > 1.0: 900 | # # print(gt_anno["occluded"][i]) 901 | # if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty]) 902 | # or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty]) 903 | # or (height <= MIN_HEIGHT[difficulty])): 904 | # # if gt_anno["difficulty"][i] > difficulty or gt_anno["difficulty"][i] == -1: 905 | # ignore = True #! out of this difficult, ignore 906 | # #! Includes distance 907 | # elif os.environ["KITTI_EVAL_CHANGES"] == "2": 908 | # if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty]) 909 | # or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty]) 910 | # or (height <= MIN_HEIGHT[difficulty]) 911 | # or (gt_anno["distance"][i] > MAX_DISTANCE[difficulty])): 912 | # ignore = True 913 | # else: 914 | # raise Exception("Unsupported kitti eval changes") 915 | 916 | 917 | 918 | #? Ignored_gt: 0 -> keep, don't ignore. 1 -> Ignore, but don't treat as FP. -1 -> Ignore, treat as FP. 919 | if valid_class == 1 and not ignore: #! all good to go, keep 920 | ignored_gt.append(0) 921 | num_valid_gt += 1 922 | #! Don't treat as false positive. 923 | #! Translation: If we have a detection that detections this, don't treat as part of the denominator for AP 924 | #! Two cases: If valid_class == 0, one of special FP classes 925 | #! ignore and valid_class == 1: If same class but harder. So if the model ends up predicting a harder box 926 | #! it's not penalized for it. 927 | elif (valid_class == 0 or (ignore and (valid_class == 1))): 928 | ignored_gt.append(1) 929 | #! Unrelated 930 | else: 931 | ignored_gt.append(-1) 932 | 933 | 934 | #! store don't care boxes so we can ignore detections in this area 935 | if gt_anno["name"][i] == "DontCare": 936 | dc_bboxes.append(gt_anno["bbox"][i]) 937 | 938 | 939 | for i in range(num_dt): 940 | #! Filter out irrelevant detection classes 941 | if (dt_anno["name"][i].lower() == current_cls_name): 942 | valid_class = 1 943 | else: 944 | valid_class = -1 945 | 946 | height = abs(dt_anno["bbox"][i, 3] - dt_anno["bbox"][i, 1]) 947 | 948 | #! This is a detection that's smaller than min_height 949 | #! This is the "new" change. They say it's because: 950 | ''' 951 | ! suppose we're doing evalulation for easy. Apparently, if we have a bbox of size 39 pixels, we don't want 952 | ! it to be a FP for the 40 pixel easy box. 953 | ? Frankly I have no clue why. 954 | ! Bottom line is all detection boxes smaller than the current GT difficulty height are cut out. 955 | ! Note that this does still include detections of other classes 956 | ''' 957 | # or (curr_metric == 2 and dt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty]) 958 | if height < MIN_HEIGHT[difficulty] or (curr_metric == 2 and dt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty]): 959 | ignored_dt.append(1) 960 | # if height < MIN_HEIGHT[difficulty]: 961 | # ignored_dt.append(1) 962 | #! detection matches class, keep 963 | elif valid_class == 1: 964 | ignored_dt.append(0) 965 | #! mismatch, toss. 966 | else: 967 | ignored_dt.append(-1) 968 | 969 | 970 | ''' 971 | ! num_valid_gt are the number of gt boxes in this image that: 1) are of the current class 2) fit the difficulty req. 972 | ! ignored_gt is a list of -1, 0, 1 length total # of GT boxes in this image. 973 | ! Ignored_gt: 0 -> keep, don't ignore. 1 -> Ignore, but don't treat as FP. -1 -> Ignore, treat as FP. 974 | ! ignored_dt is a list of -1, 0, 1 length total # of DT boxes in the image 975 | ! Ignored_dt: 0 -> keep (matches height and class), 1 -> Doesn't match height (too small), -1 -> class mismatch 976 | ! dc_bboxes: list of bounding boxes that are DontCare. These have value -1 in ignored_gt 977 | ''' 978 | return num_valid_gt, ignored_gt, ignored_dt, dc_bboxes 979 | 980 | 981 | @numba.jit(nopython=True) 982 | #! boxes: gt. query_boxes: detections. 983 | #! returns an N x K matrix of ious. 984 | def image_box_overlap(boxes, query_boxes, criterion=-1): 985 | N = boxes.shape[0] #! total number of gt boxes 986 | K = query_boxes.shape[0] #! total number of detections 987 | overlaps = np.zeros((N, K), dtype=boxes.dtype) #! type np float 988 | for k in range(K): 989 | qbox_area = ((query_boxes[k, 2] - query_boxes[k, 0]) * 990 | (query_boxes[k, 3] - query_boxes[k, 1])) #! area of the k-th dt box 991 | for n in range(N): 992 | iw = (min(boxes[n, 2], query_boxes[k, 2]) - max( 993 | boxes[n, 0], query_boxes[k, 0])) 994 | if iw > 0: 995 | ih = (min(boxes[n, 3], query_boxes[k, 3]) - max( 996 | boxes[n, 1], query_boxes[k, 1])) 997 | if ih > 0: 998 | if criterion == -1: 999 | ua = ( 1000 | (boxes[n, 2] - boxes[n, 0]) * 1001 | (boxes[n, 3] - boxes[n, 1]) + qbox_area - iw * ih) 1002 | elif criterion == 0: 1003 | ua = ((boxes[n, 2] - boxes[n, 0]) * 1004 | (boxes[n, 3] - boxes[n, 1])) 1005 | elif criterion == 1: 1006 | ua = qbox_area 1007 | else: 1008 | ua = 1.0 1009 | overlaps[n, k] = iw * ih / ua #! yada yada i'm pretty sure this is just iou 1010 | #? Why does this calculate iou between boxes from different images too? 1011 | return overlaps 1012 | 1013 | 1014 | def bev_box_overlap(boxes, qboxes, criterion=-1): 1015 | riou = rotate_iou_gpu_eval(boxes, qboxes, criterion) 1016 | return riou 1017 | 1018 | 1019 | # @numba.jit(nopython=True, parallel=True) 1020 | @numba.jit(nopython=True, parallel=False) 1021 | def d3_box_overlap_kernel(boxes, 1022 | qboxes, 1023 | rinc, 1024 | criterion=-1, 1025 | z_axis=1, 1026 | z_center=1.0): 1027 | """ 1028 | z_axis: the z (height) axis. 1029 | z_center: unified z (height) center of box. 1030 | """ 1031 | N, K = boxes.shape[0], qboxes.shape[0] 1032 | for i in range(N): 1033 | for j in range(K): 1034 | if rinc[i, j] > 0: 1035 | min_z = min( 1036 | boxes[i, z_axis] + boxes[i, z_axis + 3] * (1 - z_center), 1037 | qboxes[j, z_axis] + qboxes[j, z_axis + 3] * (1 - z_center)) 1038 | max_z = max( 1039 | boxes[i, z_axis] - boxes[i, z_axis + 3] * z_center, 1040 | qboxes[j, z_axis] - qboxes[j, z_axis + 3] * z_center) 1041 | iw = min_z - max_z 1042 | if iw > 0: 1043 | area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5] 1044 | area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5] 1045 | inc = iw * rinc[i, j] 1046 | if criterion == -1: 1047 | ua = (area1 + area2 - inc) 1048 | elif criterion == 0: 1049 | ua = area1 1050 | elif criterion == 1: 1051 | ua = area2 1052 | else: 1053 | ua = 1.0 1054 | rinc[i, j] = inc / ua 1055 | else: 1056 | rinc[i, j] = 0.0 1057 | 1058 | 1059 | def d3_box_overlap(boxes, qboxes, criterion=-1, z_axis=1, z_center=1.0): 1060 | """kitti camera format z_axis=1. 1061 | """ 1062 | bev_axes = list(range(7)) 1063 | bev_axes.pop(z_axis + 3) 1064 | bev_axes.pop(z_axis) 1065 | rinc = rotate_iou_gpu_eval(boxes[:, bev_axes], qboxes[:, bev_axes], 2) 1066 | d3_box_overlap_kernel(boxes, qboxes, rinc, criterion, z_axis, z_center) 1067 | return rinc 1068 | 1069 | 1070 | #? It appears that if we're computing recall thresholds, we set compute_fp to be False. 1071 | @numba.jit(nopython=True) 1072 | def compute_statistics_jit(overlaps, 1073 | gt_datas, 1074 | dt_datas, 1075 | ignored_gt, 1076 | ignored_det, 1077 | dc_bboxes, 1078 | metric, 1079 | min_overlap, 1080 | thresh=0, 1081 | compute_fp=False, 1082 | compute_aos=False): 1083 | 1084 | det_size = dt_datas.shape[0] 1085 | gt_size = gt_datas.shape[0] 1086 | dt_scores = dt_datas[:, -1] 1087 | dt_alphas = dt_datas[:, 4] 1088 | gt_alphas = gt_datas[:, 4] 1089 | dt_bboxes = dt_datas[:, :4] 1090 | # gt_bboxes = gt_datas[:, :4] 1091 | 1092 | assigned_detection = [False] * det_size #! probably storing whether each detection was assigned to a gt. 1093 | ignored_threshold = [False] * det_size #! array storing if detection score was below thresh 1094 | if compute_fp: 1095 | for i in range(det_size): 1096 | if (dt_scores[i] < thresh): 1097 | ignored_threshold[i] = True 1098 | 1099 | 1100 | NO_DETECTION = -10000000 1101 | tp, fp, fn, similarity = 0, 0, 0, 0 1102 | # thresholds = [0.0] 1103 | # delta = [0.0] 1104 | thresholds = np.zeros((gt_size, )) 1105 | thresh_idx = 0 #! Likely used for computing thresholds? 1106 | delta = np.zeros((gt_size, )) 1107 | delta_idx = 0 1108 | 1109 | 1110 | #! My own code ---- 1111 | #! For each gt box, store whether it was -1 (ignored), 0 (false negative (unmatched)), 1 (true positive (matched)) 1112 | gt_box_type = np.full((gt_size, ), -1) 1113 | #! For each dt box, store whether it was -1 (irrelevant), 0 (false positive (unmatched)), 1 (true positive (matched)) 1114 | #! Note that -1 could mean it was in don't care territory, was of a different class, etc 1115 | dt_box_type = np.full((det_size, ), -1) 1116 | #! Stores idx of matched object 1117 | gt_box_matched_idx = np.full((gt_size, ), -1) 1118 | dt_box_matched_idx = np.full((det_size, ), -1) 1119 | 1120 | #! loop over gt boxes 1121 | for i in range(gt_size): 1122 | if ignored_gt[i] == -1: #! Don't match completely irrelevant gt boxes 1123 | continue 1124 | 1125 | det_idx = -1 #! the best detection for this gt stored 1126 | valid_detection = NO_DETECTION #! Stores the max score so far of the detection. 1127 | max_overlap = 0 #! The overlap for the best detection. "best" is highest overlap 1128 | assigned_ignored_det = False 1129 | 1130 | for j in range(det_size): 1131 | if (ignored_det[j] == -1): #! Don't match with completely irrelevant dt boxes 1132 | continue 1133 | if (assigned_detection[j]): #! if dt was already assigned, skip (assigned to a better gt) 1134 | continue 1135 | if (ignored_threshold[j]): #! if dt score is below threhsold, skip 1136 | continue 1137 | 1138 | 1139 | overlap = overlaps[j, i] #! Current overlap between this dt and this gt. 1140 | dt_score = dt_scores[j] #! score of current dt 1141 | 1142 | #! If compute_fp is false, this is the only part that matters. 1143 | #! Just finds the detection with sufficient overlap and highest score. 1144 | if (not compute_fp and (overlap > min_overlap) 1145 | and dt_score > valid_detection): 1146 | det_idx = j 1147 | valid_detection = dt_score 1148 | 1149 | #! compute_fp is true. This means we're acutally doing the metric and not making thresholds. 1150 | #! If overlap is sufficient, (better than previous overlap or previous was a det we don't care about) 1151 | #! and the current det is something we care about, 1152 | #! Assign. Update overlap, det_idx. Note that we 1-out valid-detection since we dont rank by score. 1153 | #! we 1-out it to show that we have assigned a det we care about. 1154 | #! When compute_fp is true, we choose based on overlap 1155 | elif (compute_fp and (overlap > min_overlap) 1156 | and (overlap > max_overlap or assigned_ignored_det) 1157 | and ignored_det[j] == 0): 1158 | max_overlap = overlap 1159 | det_idx = j 1160 | valid_detection = 1 1161 | assigned_ignored_det = False 1162 | 1163 | #! compute_fp is true. 1164 | #! if overlap is sufficient, nothing was assigned yet, and it's a detection we don't care about 1165 | #! We assign it. Note that we leave max_overlap as default so anything can overwrite this. 1166 | #? One curious thing is that of the dets we don't care about, if we assign the first one, the enxt one 1167 | #? can't overwrite it because valid_detection != NO_DETECTION 1168 | elif (compute_fp and (overlap > min_overlap) 1169 | and (valid_detection == NO_DETECTION) 1170 | and ignored_det[j] == 1): 1171 | det_idx = j 1172 | valid_detection = 1 1173 | assigned_ignored_det = True 1174 | 1175 | #! If we couldn't match this gt to anything and it's something we care about, it's a false negative. 1176 | if (valid_detection == NO_DETECTION) and ignored_gt[i] == 0: 1177 | fn += 1 1178 | gt_box_type[i] = 0 1179 | 1180 | #! If we did match this gt to something and 1181 | #! (gt is something we don't care about or det is something we don't care about) 1182 | #! We assign it. Why? probably because if we don't assign it, it'll be a false positive later (unassigned det) 1183 | elif ((valid_detection != NO_DETECTION) 1184 | and (ignored_gt[i] == 1 or ignored_det[det_idx] == 1 )): 1185 | assigned_detection[det_idx] = True 1186 | gt_box_type[i] = -1 1187 | dt_box_type[det_idx] = -1 1188 | 1189 | #! If we did match this gt to something and 1190 | #! the remaining condition is: (gt is something we care about and det is something we care about) 1191 | #! It's a good match! true positive. 1192 | #! Here, we also (basically) append the det score to the end of thersholds 1193 | #! Then, assign detection True 1194 | elif valid_detection != NO_DETECTION: 1195 | # only a tp add a threshold. 1196 | gt_box_type[i] = 1 1197 | dt_box_type[det_idx] = 1 1198 | gt_box_matched_idx[i] = det_idx 1199 | dt_box_matched_idx[det_idx] = i 1200 | tp += 1 1201 | # thresholds.append(dt_scores[det_idx]) 1202 | thresholds[thresh_idx] = dt_scores[det_idx] 1203 | thresh_idx += 1 1204 | if compute_aos: 1205 | # delta.append(gt_alphas[i] - dt_alphas[det_idx]) 1206 | delta[delta_idx] = gt_alphas[i] - dt_alphas[det_idx] 1207 | delta_idx += 1 1208 | 1209 | assigned_detection[det_idx] = True 1210 | #! This should be when there is no detection and gt is something we don't care about 1211 | else: 1212 | gt_box_type[i] = -1 1213 | 1214 | 1215 | 1216 | #? Note that so far, we have not used dc boxes. This is because they are only used for false positive calculation 1217 | #? as we haven't looked at unmatched detections yet. If an unmatched is inside don't care, we dont' count it as FP 1218 | if compute_fp: 1219 | #! loop through detections 1220 | for i in range(det_size): 1221 | #! When is a detection a false positive? Well, it is a false positive if it is: 1222 | #! NOT assigned to a gt, and 1223 | #! NOT of a different class, and 1224 | #! NOT of the same class but of a different size, and 1225 | #! NOT below the score threshold. 1226 | if (not (assigned_detection[i] or ignored_det[i] == -1 1227 | or ignored_det[i] == 1 or ignored_threshold[i])): 1228 | fp += 1 1229 | dt_box_type[i] = 0 #! false positive! 1230 | 1231 | #! I believe this is the number of detections we harvest from don't care regions. We'll subtract it from fp. 1232 | nstuff = 0 1233 | #! Metric == 0 is 2d bbox 1234 | if metric == 0: 1235 | #! ious between dt boxes and dc boxes. 1236 | overlaps_dt_dc = image_box_overlap(dt_bboxes, dc_bboxes, 0) 1237 | for i in range(dc_bboxes.shape[0]): 1238 | for j in range(det_size): 1239 | #! skip stuff that doesn't add to fp right above 1240 | if (assigned_detection[j]): 1241 | continue 1242 | if (ignored_det[j] == -1 or ignored_det[j] == 1): 1243 | continue 1244 | if (ignored_threshold[j]): 1245 | continue 1246 | #! if the overlap between the two is bigger than min_overlap 1247 | #! assign the detection to dc and add it to somethign we take away from fp. 1248 | if overlaps_dt_dc[j, i] > min_overlap: 1249 | assigned_detection[j] = True 1250 | nstuff += 1 1251 | dt_box_type[j] = -1 #! nvm, don't care about this one 1252 | #! take nstuff away from fp. 1253 | fp -= nstuff 1254 | 1255 | #TODO: annotate this 1256 | if compute_aos: 1257 | tmp = np.zeros((fp + delta_idx, )) 1258 | # tmp = [0] * fp 1259 | for i in range(delta_idx): 1260 | tmp[i + fp] = (1.0 + np.cos(delta[i])) / 2.0 1261 | # tmp.append((1.0 + np.cos(delta[i])) / 2.0) 1262 | # assert len(tmp) == fp + tp 1263 | # assert len(delta) == tp 1264 | if tp > 0 or fp > 0: 1265 | similarity = np.sum(tmp) 1266 | else: 1267 | similarity = -1 1268 | 1269 | ''' 1270 | ! Let's have a conditionsl here. 1271 | ! if compute_fp: 1272 | ! tp and fn are here. fp and similarity is nonsense. 1273 | ! thresholds[:thresh_idx] == thresholds 1274 | ! So basically, we have to tools to calculate recall and the scores of the matched dts. 1275 | ''' 1276 | return tp, fp, fn, similarity, thresholds[:thresh_idx], \ 1277 | (gt_box_type, dt_box_type, gt_box_matched_idx, dt_box_matched_idx) 1278 | 1279 | 1280 | def get_split_parts(num, num_part): 1281 | same_part = num // num_part 1282 | remain_num = num % num_part 1283 | if remain_num == 0: 1284 | return [same_part] * num_part 1285 | else: 1286 | return [same_part] * num_part + [remain_num] 1287 | 1288 | 1289 | # @numba.jit(nopython=True) 1290 | def fused_compute_statistics(overlaps, 1291 | pr, 1292 | gt_nums, 1293 | dt_nums, 1294 | dc_nums, 1295 | gt_datas, 1296 | dt_datas, 1297 | dontcares, 1298 | ignored_gts, 1299 | ignored_dets, 1300 | metric, 1301 | min_overlap, 1302 | thresholds, 1303 | extras, 1304 | compute_aos=False): 1305 | gt_num = 0 1306 | dt_num = 0 1307 | dc_num = 0 1308 | 1309 | for i in range(gt_nums.shape[0]): 1310 | 1311 | for t, thresh in enumerate(thresholds): 1312 | overlap = overlaps[dt_num:dt_num + dt_nums[i], gt_num:gt_num + 1313 | gt_nums[i]] 1314 | 1315 | gt_data = gt_datas[gt_num:gt_num + gt_nums[i]] 1316 | dt_data = dt_datas[dt_num:dt_num + dt_nums[i]] 1317 | ignored_gt = ignored_gts[gt_num:gt_num + gt_nums[i]] 1318 | ignored_det = ignored_dets[dt_num:dt_num + dt_nums[i]] 1319 | dontcare = dontcares[dc_num:dc_num + dc_nums[i]] 1320 | tp, fp, fn, similarity, _, extra = compute_statistics_jit( 1321 | overlap, 1322 | gt_data, 1323 | dt_data, 1324 | ignored_gt, 1325 | ignored_det, 1326 | dontcare, 1327 | metric, 1328 | min_overlap=min_overlap, 1329 | thresh=thresh, #! note that we pass in threshold we generated 1330 | compute_fp=True, 1331 | compute_aos=compute_aos) 1332 | pr[t, 0] += tp 1333 | pr[t, 1] += fp 1334 | pr[t, 2] += fn 1335 | if similarity != -1: 1336 | pr[t, 3] += similarity 1337 | 1338 | gt_box_type, dt_box_type, gt_box_matched_idx, dt_box_matched_idx = extra 1339 | 1340 | extras['gt_box_types'][t].append(gt_box_type) 1341 | extras['dt_box_types'][t].append(dt_box_type) 1342 | extras['gt_box_matched_idxs'][t].append(gt_box_matched_idx) 1343 | extras['dt_box_matched_idxs'][t].append(dt_box_matched_idx) 1344 | 1345 | # if t == len(thresholds) - 1: #! just do for last threshold, since last one is smallest 1346 | # gt_box_types.append(gt_box_type) 1347 | # dt_box_types.append(dt_box_type) 1348 | # # print(thresh) 1349 | # # print(gt_box_type) 1350 | # # print(dt_box_type) 1351 | # # print(tp) 1352 | # # print(fp) 1353 | # # print(fn) 1354 | # # print(thresh == 0.05203958600759506) 1355 | # # assert 1 == 2 1356 | # # print(thresh) 1357 | 1358 | gt_num += gt_nums[i] 1359 | dt_num += dt_nums[i] 1360 | dc_num += dc_nums[i] 1361 | 1362 | 1363 | def calculate_iou_partly(gt_annos, 1364 | dt_annos, 1365 | metric, 1366 | num_parts=50, 1367 | z_axis=1, 1368 | z_center=1.0): 1369 | """fast iou algorithm. this function can be used independently to 1370 | do result analysis. 1371 | Args: 1372 | gt_annos: dict, must from get_label_annos() in kitti_common.py #! Actually a list of dicts 1373 | dt_annos: dict, must from get_label_annos() in kitti_common.py 1374 | metric: eval type. 0: bbox, 1: bev, 2: 3d 1375 | num_parts: int. a parameter for fast calculate algorithm 1376 | z_axis: height axis. kitti camera use 1, lidar use 2. 1377 | 1378 | annos = [ 1379 | { 1380 | 'name': np.array(["Car", "Pedestrian", "Car", ...]), 1381 | 'truncated': np.array([0.1, 0.5, 1.0, ...]), 1382 | 'occluded': np.array([0, 1, 2, 3, 0, ...]), 1383 | 'alpha': np.array([-3.14, 3.14, 0.0, ...]), 1384 | 'bbox': np.array([ 1385 | [x1, y1, x2, y2], 1386 | [left, top, right, bot], 1387 | [0.0, 0.0, 385.0, 1280.0], 1388 | ... 1389 | ]), #! N x 4 1390 | 'dimensions': Don't care for now 1391 | 'location': Don't care for now 1392 | 'rotation_y': Don't care for now 1393 | 'score': np.array([ 1394 | 0.1, 1395 | 0.3, 1396 | ... 1397 | ]) #! or all 0s for gt 1398 | } 1399 | ] 1400 | """ 1401 | assert len(gt_annos) == len(dt_annos) 1402 | total_dt_num = np.stack([len(a["name"]) for a in dt_annos], 0) #! a list of number of annotations in each file 1403 | total_gt_num = np.stack([len(a["name"]) for a in gt_annos], 0) 1404 | 1405 | num_examples = len(gt_annos) 1406 | #! returns a list of numbers, which is num_examples split up into num_parts, with a remainder at the end. 1407 | #! So (13, 2) would return [6, 6, 1] or something 1408 | split_parts = get_split_parts(num_examples, num_parts) 1409 | 1410 | parted_overlaps = [] 1411 | example_idx = 0 1412 | bev_axes = list(range(3)) 1413 | bev_axes.pop(z_axis) 1414 | for num_part in split_parts: 1415 | gt_annos_part = gt_annos[example_idx:example_idx + num_part] #! basically chop up dataset into parts and iterate 1416 | dt_annos_part = dt_annos[example_idx:example_idx + num_part] 1417 | if metric == 0: #! This is the 2D bbox part 1418 | #! appears like it concats ALL the bounding boxes in the entire dataset into a super tall array 1419 | #? Correction: PART of the dataset gt_annos_part and dt_annos_part 1420 | #! shape (total number of bboxes, 4) 1421 | gt_boxes = np.concatenate([a["bbox"] for a in gt_annos_part], 0) 1422 | dt_boxes = np.concatenate([a["bbox"] for a in dt_annos_part], 0) 1423 | #! returns np array of shape (total # of gt boxes, total # of dt boxes) 1424 | overlap_part = image_box_overlap(gt_boxes, dt_boxes) 1425 | elif metric == 1: 1426 | loc = np.concatenate( 1427 | [a["location"][:, bev_axes] for a in gt_annos_part], 0) 1428 | dims = np.concatenate( 1429 | [a["dimensions"][:, bev_axes] for a in gt_annos_part], 0) 1430 | rots = np.concatenate([a["rotation_y"] for a in gt_annos_part], 0) 1431 | gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]], 1432 | axis=1) 1433 | loc = np.concatenate( 1434 | [a["location"][:, bev_axes] for a in dt_annos_part], 0) 1435 | dims = np.concatenate( 1436 | [a["dimensions"][:, bev_axes] for a in dt_annos_part], 0) 1437 | rots = np.concatenate([a["rotation_y"] for a in dt_annos_part], 0) 1438 | dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]], 1439 | axis=1) 1440 | overlap_part = bev_box_overlap(gt_boxes, 1441 | dt_boxes).astype(np.float64) 1442 | elif metric == 2: 1443 | loc = np.concatenate([a["location"] for a in gt_annos_part], 0) 1444 | dims = np.concatenate([a["dimensions"] for a in gt_annos_part], 0) 1445 | rots = np.concatenate([a["rotation_y"] for a in gt_annos_part], 0) 1446 | gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]], 1447 | axis=1) 1448 | loc = np.concatenate([a["location"] for a in dt_annos_part], 0) 1449 | dims = np.concatenate([a["dimensions"] for a in dt_annos_part], 0) 1450 | rots = np.concatenate([a["rotation_y"] for a in dt_annos_part], 0) 1451 | dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]], 1452 | axis=1) 1453 | overlap_part = d3_box_overlap( 1454 | gt_boxes, dt_boxes, z_axis=z_axis, 1455 | z_center=z_center).astype(np.float64) 1456 | else: 1457 | raise ValueError("unknown metric") 1458 | 1459 | parted_overlaps.append(overlap_part) #! ends up being a list of iou matrices b/n parts of the dataset 1460 | example_idx += num_part 1461 | overlaps = [] 1462 | example_idx = 0 1463 | for j, num_part in enumerate(split_parts): 1464 | gt_annos_part = gt_annos[example_idx:example_idx + num_part] 1465 | dt_annos_part = dt_annos[example_idx:example_idx + num_part] #! these two aren't used... 1466 | gt_num_idx, dt_num_idx = 0, 0 1467 | for i in range(num_part): 1468 | gt_box_num = total_gt_num[example_idx + i] 1469 | dt_box_num = total_dt_num[example_idx + i] 1470 | overlaps.append( 1471 | parted_overlaps[j][gt_num_idx:gt_num_idx + 1472 | gt_box_num, dt_num_idx:dt_num_idx + 1473 | dt_box_num]) #! slice out the part that corresponds to a single image 1474 | gt_num_idx += gt_box_num 1475 | dt_num_idx += dt_box_num 1476 | example_idx += num_part 1477 | 1478 | #! In the end, overlaps becomes a list of matrices. The list is length len(dt_annos) == len(gt_annos) (number of images) 1479 | #! In each index is a iou matrix shape (number of gt boxes in that image, number of dt boxes in that image) 1480 | #! parted_overlaps is overlap matrices over parts of dataset 1481 | #! total_gt_num is list of number of boxes in each image. 1482 | 1483 | return overlaps, parted_overlaps, total_gt_num, total_dt_num 1484 | 1485 | 1486 | def _prepare_data(gt_annos, dt_annos, current_class, difficulty, extra_info=None): 1487 | gt_datas_list = [] 1488 | dt_datas_list = [] 1489 | total_dc_num = [] 1490 | ignored_gts, ignored_dets, dontcares = [], [], [] 1491 | total_num_valid_gt = 0 1492 | 1493 | gt_extra_info, dt_extra_info, general_extra_info = extra_info 1494 | 1495 | #! Loop through each image 1496 | for i in range(len(gt_annos)): 1497 | if 'clean_data_function' not in general_extra_info.keys() or general_extra_info['clean_data_function'] == None: 1498 | rets = clean_data(gt_annos[i], dt_annos[i], current_class, difficulty, \ 1499 | extra_info_single=(gt_extra_info[i], dt_extra_info[i], general_extra_info)) 1500 | else: 1501 | rets = general_extra_info['clean_data_function']( 1502 | gt_annos[i], dt_annos[i], current_class, difficulty, \ 1503 | extra_info_single=(gt_extra_info[i], dt_extra_info[i], general_extra_info) 1504 | ) 1505 | 1506 | num_valid_gt, ignored_gt, ignored_det, dc_bboxes = rets 1507 | ignored_gts.append(np.array(ignored_gt, dtype=np.int64)) 1508 | ignored_dets.append(np.array(ignored_det, dtype=np.int64)) 1509 | #! Ends up being a list of ignored_gts. etc... 1510 | 1511 | if len(dc_bboxes) == 0: 1512 | dc_bboxes = np.zeros((0, 4)).astype(np.float64) 1513 | else: 1514 | dc_bboxes = np.stack(dc_bboxes, 0).astype(np.float64) 1515 | #! dc_boxes is a np array shape (# of don't care boxes IN THIS IMAGE, 4) 1516 | #! Each row is a Don't Care bbox 1517 | total_dc_num.append(dc_bboxes.shape[0]) 1518 | #! Number of don't care boxes. total_dc_num is a list of # of dc_boxes for each iamge 1519 | dontcares.append(dc_bboxes) 1520 | #! list of list of dc_boxes for each image 1521 | 1522 | total_num_valid_gt += num_valid_gt 1523 | #! counter of total number of valid gt boxes 1524 | 1525 | #! bbox index is N x 4 1526 | #! alpha index is N -> with the np.newaxis, it's N x 1 1527 | #! So concat makes it an N x 5 with the "5" dimension being [x1, y1, x2, y2, alpha] 1528 | gt_datas = np.concatenate( 1529 | [gt_annos[i]["bbox"], gt_annos[i]["alpha"][..., np.newaxis]], 1) 1530 | 1531 | #! Similarly, N x 6. "6" dimension is [x1, y1, x2, y2, alpha, score] 1532 | dt_datas = np.concatenate([ 1533 | dt_annos[i]["bbox"], dt_annos[i]["alpha"][..., np.newaxis], 1534 | dt_annos[i]["score"][..., np.newaxis] 1535 | ], 1) 1536 | 1537 | gt_datas_list.append(gt_datas) 1538 | dt_datas_list.append(dt_datas) 1539 | #! list lists of boxes 1540 | #! I don't know why they do this instead of np.array. This just makes a length # of images array of 1541 | #! number of dontcare boxes in each image. 1542 | total_dc_num = np.stack(total_dc_num, axis=0) 1543 | ''' 1544 | ? All the arrays here have length = # of images in dataset 1545 | ! gt_datas_list: list of (N x 5 arrays) 1546 | ! dt_datas_list: list of (N x 6 arrays) 1547 | ! ignored_gts: list of (length N array (vals -1, 0, or 1)) 1548 | ! ignored_dets: list of (length N array (vals -1, 0, or 1)) 1549 | ! dontcares: list of (# of don't care boxes in image x 4 arrays) 1550 | ! total_dc_num: list of (# of don'tcare boxes in image value) 1551 | ! total_num_valid_gt: total number of valid gts (int) 1552 | ''' 1553 | return (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets, dontcares, 1554 | total_dc_num, total_num_valid_gt) 1555 | 1556 | 1557 | def eval_class(gt_annos, 1558 | dt_annos, 1559 | current_classes, 1560 | difficultys, # ! Is a tuple (0, 1, 2) 1561 | metric, #! is 0 (bbox), 1 (bev), or 2 (3d) 1562 | min_overlaps, #! I believe this is shape (2, 3, num_classes) where: 1563 | #! 2 is just moderate thresholds, easy thresholds. DIFFERENT FROM BBOX DIFFICULTY 1564 | #! 3 is the different metrics (bbox, bev, 3d), 1565 | #! num_classes is for threshold for each class 1566 | compute_aos=False, 1567 | z_axis=1, 1568 | z_center=1.0, 1569 | num_parts=50, 1570 | extra_info=None): 1571 | """Kitti eval. support 2d/bev/3d/aos eval. support 0.5:0.05:0.95 coco AP. 1572 | Args: 1573 | gt_annos: dict, must from get_label_annos() in kitti_common.py 1574 | dt_annos: dict, must from get_label_annos() in kitti_common.py 1575 | current_class: int, 0: car, 1: pedestrian, 2: cyclist 1576 | difficulty: int. eval difficulty, 0: easy, 1: normal, 2: hard # ! No, actually a tuple (0, 1, 2) 1577 | metric: eval type. 0: bbox, 1: bev, 2: 3d 1578 | min_overlap: float, min overlap. official: 1579 | [[0.7, 0.5, 0.5], [0.7, 0.5, 0.5], [0.7, 0.5, 0.5]] 1580 | format: [metric, class]. choose one from matrix above. 1581 | num_parts: int. a parameter for fast calculate algorithm 1582 | extra_info: a tuple (gt_extra_info, dt_extra_info, general_extra_info). Check get_kitti_eval for more details 1583 | 1584 | Returns: 1585 | dict of recall, precision and aos 1586 | """ 1587 | # print(len(gt_annos)) 1588 | # print(len(dt_annos)) 1589 | assert len(gt_annos) == len(dt_annos) 1590 | num_examples = len(gt_annos) 1591 | split_parts = get_split_parts(num_examples, num_parts) 1592 | 1593 | rets = calculate_iou_partly( 1594 | dt_annos, 1595 | gt_annos, 1596 | metric, 1597 | num_parts, 1598 | z_axis=z_axis, 1599 | z_center=z_center) 1600 | overlaps, parted_overlaps, total_dt_num, total_gt_num = rets 1601 | #! In the end, overlaps becomes a list of matrices. The list is length len(dt_annos) == len(gt_annos) (number of images) 1602 | #! In each index is a iou matrix shape (number of gt boxes in that image, number of dt boxes in that image) 1603 | #! parted_overlaps is overlap matrices over parts of dataset 1604 | #! total_gt_num is list of number of boxes in each image. 1605 | 1606 | N_SAMPLE_PTS = 41 1607 | 1608 | num_minoverlap = len(min_overlaps) #! moderate, or easy 1609 | num_class = len(current_classes) 1610 | num_difficulty = len(difficultys) 1611 | 1612 | #! A single point would be the precision for a class, a certain bbox difficulty, the type of threhsolds (mod or easy) 1613 | precision = np.zeros( 1614 | [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]) 1615 | recall = np.zeros( 1616 | [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]) 1617 | 1618 | #! To store gt_box_types, dt_box_types for each class, per difficulty, per num_minoverlap 1619 | extrass = { 1620 | "gt_box_typess": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object), 1621 | "dt_box_typess": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object), 1622 | "gt_box_matched_idxss": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object), 1623 | "dt_box_matched_idxss": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object) 1624 | } 1625 | 1626 | 1627 | aos = np.zeros([num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]) 1628 | all_thresholds = np.zeros([num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]) 1629 | 1630 | 1631 | #! Per class 1632 | for m, current_class in enumerate(current_classes): 1633 | #! Per difficulty 1634 | for l, difficulty in enumerate(difficultys): 1635 | gt_extra_info, dt_extra_info, general_extra_info = extra_info 1636 | general_extra_info['curr_metric'] = metric #! pass on which metric we're doing 1637 | extra_info = (gt_extra_info, dt_extra_info, general_extra_info) 1638 | 1639 | rets = _prepare_data(gt_annos, dt_annos, current_class, difficulty, extra_info=extra_info) 1640 | (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets, 1641 | dontcares, total_dc_num, total_num_valid_gt) = rets 1642 | ''' 1643 | ? All the arrays here have length = # of images in dataset 1644 | ! gt_datas_list: list of (N x 5 arrays) 1645 | ! dt_datas_list: list of (N x 6 arrays) 1646 | ! ignored_gts: list of (length N array (vals -1, 0, or 1)) 1647 | ! ignored_dets: list of (length N array (vals -1, 0, or 1)) 1648 | ! dontcares: list of (# of don't care boxes in image x 4 arrays) 1649 | ! total_dc_num: list of (# of don'tcare boxes in image value) 1650 | ! total_num_valid_gt: total number of valid gts (int) 1651 | ''' 1652 | 1653 | #! Runs twice, first for moderate overall difficulty setting, then easy. 1654 | for k, min_overlap in enumerate(min_overlaps[:, metric, m]): 1655 | thresholdss = [] 1656 | 1657 | #! Loop over images in dataset. So single image at a time. 1658 | for i in range(len(gt_annos)): 1659 | 1660 | rets = compute_statistics_jit( 1661 | overlaps[i], #! iou values b/n gt and dt for single image 1662 | gt_datas_list[i], #! N x 5 array 1663 | dt_datas_list[i], #! N x 6 array 1664 | ignored_gts[i], #! Length N array of -1, 0, 1 1665 | ignored_dets[i], #! Length N array of -1, 0, 1 1666 | dontcares[i], #! Length number of don't care boxes x 4 1667 | metric, #! 0, 1, or 2 (bbox, bev, 3d) 1668 | min_overlap=min_overlap, #! float minimum iou threshold for positive 1669 | thresh=0.0, #! ignore dt with scores below this. 1670 | compute_fp=False) 1671 | tp, fp, fn, similarity, thresholds, _ = rets #! Don't carea bout gt_box_type, dt_box_type here 1672 | thresholdss += thresholds.tolist() 1673 | #! A 1d array of scores of matched dts. 1674 | thresholdss = np.array(thresholdss) 1675 | 1676 | thresholds = get_thresholds(thresholdss, total_num_valid_gt) 1677 | thresholds = np.array(thresholds) 1678 | #! N_SAMPLE_PTS length array of scores, decreasing. these are the thresholds 1679 | 1680 | all_thresholds[m, l, k, :len(thresholds)] = thresholds 1681 | #! Threshold for each combo 1682 | #! storing 4 "things" for each threshold. 1683 | #? [tp, fp, fn, similarity] 1684 | pr = np.zeros([len(thresholds), 4]) 1685 | 1686 | #! My addition - stores information about gt/dt boxes (whether ignored, fn, tn, fp) 1687 | #! ends up being a list of np arrays 1688 | #! CHANGED TO SAVE @ EVERY THRESHOLD. Now, should be a 1689 | #! Numpy Array (length N_SAMPLE_PTS), of list (length # of frames), of np arrays (# of objects in each frame) 1690 | extras = { 1691 | "gt_box_types": np.empty(N_SAMPLE_PTS, dtype=object), 1692 | "dt_box_types": np.empty(N_SAMPLE_PTS, dtype=object), 1693 | "gt_box_matched_idxs": np.empty(N_SAMPLE_PTS, dtype=object), 1694 | "dt_box_matched_idxs": np.empty(N_SAMPLE_PTS, dtype=object) 1695 | } 1696 | extras['gt_box_types'][...] = [[] for _ in range(N_SAMPLE_PTS)] 1697 | extras['dt_box_types'][...] = [[] for _ in range(N_SAMPLE_PTS)] 1698 | extras['gt_box_matched_idxs'][...] = [[] for _ in range(N_SAMPLE_PTS)] 1699 | extras['dt_box_matched_idxs'][...] = [[] for _ in range(N_SAMPLE_PTS)] 1700 | 1701 | #! Again, we're splitting up the dataset into parts and running it in. 1702 | idx = 0 1703 | # start_time = time.time() 1704 | for j, num_part in enumerate(split_parts): 1705 | gt_datas_part = np.concatenate( 1706 | gt_datas_list[idx:idx + num_part], 0) 1707 | dt_datas_part = np.concatenate( 1708 | dt_datas_list[idx:idx + num_part], 0) 1709 | dc_datas_part = np.concatenate( 1710 | dontcares[idx:idx + num_part], 0) 1711 | ignored_dets_part = np.concatenate( 1712 | ignored_dets[idx:idx + num_part], 0) 1713 | ignored_gts_part = np.concatenate( 1714 | ignored_gts[idx:idx + num_part], 0) 1715 | fused_compute_statistics( 1716 | parted_overlaps[j], 1717 | pr, 1718 | total_gt_num[idx:idx + num_part], 1719 | total_dt_num[idx:idx + num_part], 1720 | total_dc_num[idx:idx + num_part], 1721 | gt_datas_part, 1722 | dt_datas_part, 1723 | dc_datas_part, 1724 | ignored_gts_part, 1725 | ignored_dets_part, 1726 | metric, 1727 | min_overlap=min_overlap, 1728 | thresholds=thresholds, 1729 | extras=extras, 1730 | compute_aos=compute_aos) 1731 | idx += num_part 1732 | # print(time.time() - start_time) 1733 | 1734 | extrass['gt_box_typess'][m, l, k, :] = extras['gt_box_types'] 1735 | extrass['dt_box_typess'][m, l, k, :] = extras['dt_box_types'] 1736 | extrass['gt_box_matched_idxss'][m, l, k, :] = extras['gt_box_matched_idxs'] 1737 | extrass['dt_box_matched_idxss'][m, l, k, :] = extras['dt_box_matched_idxs'] 1738 | for i in range(len(thresholds)): 1739 | precision[m, l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 1]) #! true pos / (true pos + false pos) 1740 | recall[m, l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 2]) #! true pos / (true pos + false neg) 1741 | if compute_aos: 1742 | aos[m, l, k, i] = pr[i, 3] / (pr[i, 0] + pr[i, 1]) 1743 | for i in range(len(thresholds)): 1744 | precision[m, l, k, i] = np.max( 1745 | precision[m, l, k, i:], axis=-1) #? INTERPOLATES AND FLIPS THE ORDER!!! 1746 | #? NOW ITS IN ORDER OF INCREASING RECALL 1747 | if compute_aos: 1748 | aos[m, l, k, i] = np.max(aos[m, l, k, i:], axis=-1) 1749 | 1750 | 1751 | ret_dict = { 1752 | "recall": recall, # [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS] 1753 | "precision": precision, #? Order of INCREASING RECALL, so precision DECREASES (as we would expect in a graph) 1754 | "orientation": aos, 1755 | "thresholds": all_thresholds, 1756 | "min_overlaps": min_overlaps 1757 | } 1758 | ret_dict.update(extrass) 1759 | return ret_dict 1760 | 1761 | 1762 | def get_mAP_R11(prec): 1763 | sums = 0 1764 | for i in range(0, prec.shape[-1], 4): 1765 | sums = sums + prec[..., i] 1766 | return sums / 11 * 100 1767 | 1768 | def get_mAP_R40(prec): 1769 | sums = 0 1770 | for i in range(1, prec.shape[-1]): 1771 | sums = sums + prec[..., i] 1772 | return sums / 40 * 100 1773 | 1774 | 1775 | def do_eval_v2(gt_annos, 1776 | dt_annos, 1777 | current_classes, 1778 | min_overlaps, 1779 | compute_aos=False, 1780 | difficultys=(0, 1, 2), 1781 | z_axis=1, 1782 | z_center=1.0): 1783 | # min_overlaps: [num_minoverlap, metric, num_class] 1784 | ret = eval_class( 1785 | gt_annos, 1786 | dt_annos, 1787 | current_classes, 1788 | difficultys, 1789 | 0, 1790 | min_overlaps, 1791 | compute_aos, 1792 | z_axis=z_axis, 1793 | z_center=z_center) 1794 | # ret: [num_class, num_diff, num_minoverlap, num_sample_points] 1795 | mAP_bbox = get_mAP_v2(ret["precision"]) 1796 | mAP_aos = None 1797 | if compute_aos: 1798 | mAP_aos = get_mAP_v2(ret["orientation"]) 1799 | ret = eval_class( 1800 | gt_annos, 1801 | dt_annos, 1802 | current_classes, 1803 | difficultys, 1804 | 1, 1805 | min_overlaps, 1806 | z_axis=z_axis, 1807 | z_center=z_center) 1808 | mAP_bev = get_mAP_v2(ret["precision"]) 1809 | ret = eval_class( 1810 | gt_annos, 1811 | dt_annos, 1812 | current_classes, 1813 | difficultys, 1814 | 2, 1815 | min_overlaps, 1816 | z_axis=z_axis, 1817 | z_center=z_center) 1818 | mAP_3d = get_mAP_v2(ret["precision"]) 1819 | return mAP_bbox, mAP_bev, mAP_3d, mAP_aos 1820 | 1821 | def do_eval_v3(gt_annos, 1822 | dt_annos, 1823 | current_classes, 1824 | min_overlaps, 1825 | compute_aos=False, 1826 | difficultys=(0, 1, 2), 1827 | z_axis=1, 1828 | z_center=1.0, 1829 | extra_info=None, 1830 | eval_modes=[0, 1, 2]): #! Represents idxes of types to evaluate (here, it's 0 for bbox, 1 for bev, 2 for 3d) so all 3 1831 | # min_overlaps: [num_minoverlap, metric, num_class] 1832 | types = ["bbox", "bev", "3d"] 1833 | metrics = {} 1834 | for i in eval_modes: 1835 | dprint("Currently on {}".format(types[i])) 1836 | ret = eval_class( 1837 | gt_annos, 1838 | dt_annos, 1839 | current_classes, 1840 | difficultys, 1841 | metric=i, 1842 | min_overlaps=min_overlaps, 1843 | compute_aos=compute_aos, 1844 | z_axis=z_axis, 1845 | z_center=z_center, 1846 | extra_info=extra_info) 1847 | metrics[types[i]] = ret 1848 | return metrics 1849 | 1850 | 1851 | def do_coco_style_eval(gt_annos, 1852 | dt_annos, 1853 | current_classes, 1854 | overlap_ranges, 1855 | compute_aos, 1856 | z_axis=1, 1857 | z_center=1.0): 1858 | # overlap_ranges: [range, metric, num_class] 1859 | min_overlaps = np.zeros([10, *overlap_ranges.shape[1:]]) 1860 | for i in range(overlap_ranges.shape[1]): 1861 | for j in range(overlap_ranges.shape[2]): 1862 | min_overlaps[:, i, j] = np.linspace(*overlap_ranges[:, i, j]) 1863 | mAP_bbox, mAP_bev, mAP_3d, mAP_aos = do_eval_v2( 1864 | gt_annos, 1865 | dt_annos, 1866 | current_classes, 1867 | min_overlaps, 1868 | compute_aos, 1869 | z_axis=z_axis, 1870 | z_center=z_center) 1871 | # ret: [num_class, num_diff, num_minoverlap] 1872 | mAP_bbox = mAP_bbox.mean(-1) 1873 | mAP_bev = mAP_bev.mean(-1) 1874 | mAP_3d = mAP_3d.mean(-1) 1875 | if mAP_aos is not None: 1876 | mAP_aos = mAP_aos.mean(-1) 1877 | return mAP_bbox, mAP_bev, mAP_3d, mAP_aos 1878 | 1879 | 1880 | def print_str(value, *arg, sstream=None): 1881 | if sstream is None: 1882 | sstream = sysio.StringIO() 1883 | sstream.truncate(0) 1884 | sstream.seek(0) 1885 | print(value, *arg, file=sstream) 1886 | return sstream.getvalue() 1887 | 1888 | ''' 1889 | Args: 1890 | gt_annos: list of annotation dicts. Reference kitti_label.py for format 1891 | dt_annos: list of annotation dicts. Reference kitti_label.py for format 1892 | extra_info: tuple (gt_extra_info, dt_extra_info, general_extra_info). 1893 | gt_extra_info and dt_extra_info must be lists of dicts, either empty or of same length as gt_annos and dt_annos. 1894 | general_extra_info is a dict w/ thresholds, current classes, etc 1895 | current_classes: list of strings denoting classes we're evaluating 1896 | ex: ["Car", "Pedestrian", "Cyclist"] 1897 | IoUs: either 1898 | (3, len(current_classes)) numpy array. IoUs[i, c] denotes the required overlap for a detection, for 1899 | metric type i and class current_classes[c]. 1900 | metric type 0 -> bbox 1901 | 1 -> bev 1902 | 2 -> 3d 1903 | or (# overall evaluation levels, 3, len(current_classes)). Same as above but the first dimension denotes the number 1904 | of overall rounds of evaluation we do. For reference, the first case is puffed up to (1, 3, len(current_classes)). 1905 | recall_positions_40: Whether to use 40 recall positions or 11. Default 40, because KITTI is 40 1906 | eval_modes: List of modes to evaluate. 0 for bbox, 1 for bev, 2 for 3d 1907 | ''' 1908 | def kitti_eval( 1909 | gt_annos, 1910 | dt_annos, 1911 | extra_info, 1912 | current_classes, 1913 | IoUs, 1914 | recall_positions_40=True, 1915 | eval_modes=[0, 1, 2] 1916 | ): 1917 | try: 1918 | assert len(gt_annos) == len(dt_annos) 1919 | assert len(extra_info[0]) == 0 or len(extra_info[0]) == len(gt_annos) 1920 | assert len(extra_info[1]) == 0 or len(extra_info[1]) == len(dt_annos) 1921 | # assert IoUs.shape == (3, len(current_classes)) 1922 | except Exception as e: 1923 | print("gt_annos: {}, dt_annos: {}, gt_extra_info: {}, dt_extra_info: {}, current_classes: {}, IoUs: {}".format( 1924 | len(gt_annos), 1925 | len(dt_annos), 1926 | len(extra_info[0]), 1927 | len(extra_info[1]), 1928 | len(current_classes), 1929 | IoUs.shape 1930 | )) 1931 | raise e 1932 | 1933 | print("Doing evaluation over: \nclasses {}, \nIoUs {}".format(current_classes, IoUs)) 1934 | 1935 | if IoUs.shape == 2: 1936 | IoUs = np.expand_dims(IoUs, axis=0) #! Now (1, 3, len(current_classes)) to fit original format of min_overlaps 1937 | class_to_name = { 1938 | i: c for i, c in enumerate(current_classes) 1939 | } #! int -> string 1940 | current_classes = list(range(len(current_classes))) #! Change to numbers 1941 | 1942 | compute_aos = False 1943 | if 0 in eval_modes: # only even consider computing aos if bbox is one of the things computed 1944 | for anno in dt_annos: 1945 | if anno['alpha'].shape[0] != 0: 1946 | if anno['alpha'][0] != -10: 1947 | compute_aos = True 1948 | break 1949 | 1950 | ''' 1951 | metrics: { 1952 | 'bbox': ... 1953 | 'bev': ... 1954 | '3d': ... 1955 | } 1956 | 1957 | ... = { 1958 | "recall": recall, # [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS] 1959 | "precision": precision, #? Order of INCREASING RECALL, so precision DECREASES (as we would expect in a graph) 1960 | "orientation": aos, 1961 | "thresholds": all_thresholds, 1962 | "min_overlaps": min_overlaps, 1963 | "gt_box_typess": gt_box_typess, # np array shape [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS], each elem is list 1964 | "dt_box_typess": dt_box_typess, 1965 | "gt_box_matched_idx": gt_box_matched_idx, # same shape as above, holds either -1 or idx of matched dt box 1966 | "dt_box_matched_idx": dt_box_matched_idx 1967 | } 1968 | ''' 1969 | eval_types = ["bbox", "bev", "3d"] 1970 | metrics = do_eval_v3( 1971 | gt_annos, 1972 | dt_annos, 1973 | current_classes, 1974 | min_overlaps=IoUs, 1975 | compute_aos=compute_aos, 1976 | extra_info=extra_info, 1977 | eval_modes=eval_modes 1978 | ) 1979 | dprint("Done generating metrics.") 1980 | 1981 | if recall_positions_40: 1982 | get_mAP = get_mAP_R40 1983 | else: 1984 | get_mAP = get_mAP_R11 1985 | 1986 | result = '' 1987 | result += "Using mAP: {}\n".format(get_mAP.__name__) 1988 | for j, curcls in enumerate(current_classes): 1989 | # mAP threshold array: [num_minoverlap, metric, class] 1990 | # mAP result: [num_class, num_diff, num_minoverlap] 1991 | for i in range(IoUs.shape[0]): 1992 | result += print_str( 1993 | (f"{class_to_name[curcls]} " 1994 | "AP(Average Precision)@{:.2f}, {:.2f}, {:.2f}:".format(*IoUs[i, :, j]))) 1995 | for mode in eval_modes: 1996 | eval_type = eval_types[mode] 1997 | result += print_str("{:4} AP: {}".format( 1998 | eval_type, 1999 | ", ".join(f"{v:.2f}" for v in get_mAP(metrics[eval_type]["precision"][j, :, i])) 2000 | )) 2001 | 2002 | if compute_aos: 2003 | mAPaos = get_mAP(metrics["bbox"]["orientation"][j, :, i]) 2004 | mAPaos = ", ".join(f"{v:.2f}" for v in mAPaos) 2005 | result += print_str(f"aos AP:{mAPaos}") 2006 | 2007 | return result, metrics 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | def get_official_eval_result(gt_annos, 2014 | dt_annos, 2015 | current_classes, 2016 | difficultys=[0, 1, 2], 2017 | z_axis=1, 2018 | z_center=1.0): 2019 | """ 2020 | gt_annos and dt_annos must contains following keys: 2021 | [bbox, location, dimensions, rotation_y, score] 2022 | """ 2023 | if os.environ["KITTI_EVAL_CHANGES"] == "0": 2024 | print("Using Kitti Eval {}".format(os.environ["KITTI_EVAL_CHANGES"])) 2025 | overlap_mod = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7], 2026 | [0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7], 2027 | [0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7]]) 2028 | # ! All that matters here is that Car required over lap is 0.7, etc 2029 | elif os.environ["KITTI_EVAL_CHANGES"] == "1" or os.environ["KITTI_EVAL_CHANGES"] == "2": 2030 | print("Using Kitti Eval {}".format(os.environ["KITTI_EVAL_CHANGES"])) 2031 | CAR_IOU = float(os.environ["KITTI_EVAL_CAR_IOU"]) 2032 | PED_IOU = float(os.environ["KITTI_EVAL_PED_IOU"]) 2033 | CYC_IOU = float(os.environ["KITTI_EVAL_CYC_IOU"]) 2034 | overlap_mod = np.array( 2035 | [[CAR_IOU, PED_IOU, CYC_IOU, CAR_IOU, PED_IOU, CAR_IOU, CAR_IOU, CAR_IOU]] * 3 2036 | ) 2037 | 2038 | 2039 | 2040 | overlap_easy = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5], 2041 | [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5], 2042 | [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5]]) 2043 | min_overlaps = np.stack([overlap_mod, overlap_easy], axis=0) # [2, 3, 5] 2044 | class_to_name = { 2045 | 0: 'Car', 2046 | 1: 'Pedestrian', 2047 | 2: 'Cyclist', 2048 | 3: 'Van', 2049 | 4: 'Person_sitting', 2050 | 5: 'car', 2051 | 6: 'tractor', 2052 | 7: 'trailer', 2053 | } 2054 | name_to_class = {v: n for n, v in class_to_name.items()} 2055 | if not isinstance(current_classes, (list, tuple)): 2056 | current_classes = [current_classes] 2057 | current_classes_int = [] 2058 | for curcls in current_classes: 2059 | if isinstance(curcls, str): 2060 | current_classes_int.append(name_to_class[curcls]) 2061 | else: 2062 | current_classes_int.append(curcls) 2063 | current_classes = current_classes_int 2064 | min_overlaps = min_overlaps[:, :, current_classes] 2065 | result = '' 2066 | # check whether alpha is valid 2067 | compute_aos = False 2068 | for anno in dt_annos: 2069 | if anno['alpha'].shape[0] != 0: 2070 | if anno['alpha'][0] != -10: 2071 | compute_aos = True 2072 | break 2073 | 2074 | metrics = do_eval_v3( # ! Now go to here 2075 | gt_annos, 2076 | dt_annos, 2077 | current_classes, 2078 | min_overlaps, 2079 | compute_aos, 2080 | difficultys, 2081 | z_axis=z_axis, 2082 | z_center=z_center) 2083 | mAPbbox_store = None 2084 | res_precision = None 2085 | res_recall = None 2086 | gt_box_types = None 2087 | dt_box_types = None #TODO: probably should add some stuff to make these work for 3d boxes in the future 2088 | #TODO: Does not work for multiple classes, nor does precision or recall. 2089 | #? Note that the return format is different from precision or recall. for p/r, we don't index into difficulty 2090 | for j, curcls in enumerate(current_classes): 2091 | # mAP threshold array: [num_minoverlap, metric, class] 2092 | # mAP result: [num_class, num_diff, num_minoverlap] 2093 | # precision is shape [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS] 2094 | for i in range(min_overlaps.shape[0]): 2095 | mAPbbox = get_mAP_v2(metrics["bbox"]["precision"][j, :, i]) 2096 | if mAPbbox_store is None: 2097 | mAPbbox_store = [ 2098 | get_mAP_v2(metrics["bbox"]["precision"][c, :, i]) 2099 | for c in range(len(current_classes)) 2100 | ] #! Just stores the first overlap_mod metrics, for all classes 2101 | 2102 | res_precision = metrics["bbox"]["precision"][:, :, i] #! # classes x 3 x 41 (difficulty x points) 2103 | res_recall = metrics["bbox"]["recall"][:, :, i] 2104 | gt_box_types = metrics["bbox"]["gt_box_typess"][:, :, i] #! #classes x difficulty, with values being (list of numpy arrays) 2105 | dt_box_types = metrics["bbox"]["dt_box_typess"][:, :, i] 2106 | mAPbbox = ", ".join(f"{v:.2f}" for v in mAPbbox) # ! This is what we care about 2107 | mAPbev = get_mAP_v2(metrics["bev"]["precision"][j, :, i]) 2108 | mAPbev = ", ".join(f"{v:.2f}" for v in mAPbev) 2109 | mAP3d = get_mAP_v2(metrics["3d"]["precision"][j, :, i]) 2110 | mAP3d = ", ".join(f"{v:.2f}" for v in mAP3d) 2111 | result += print_str( 2112 | (f"{class_to_name[curcls]} " 2113 | "AP(Average Precision)@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j]))) 2114 | result += print_str(f"bbox AP:{mAPbbox}") 2115 | result += print_str(f"bev AP:{mAPbev}") 2116 | result += print_str(f"3d AP:{mAP3d}") 2117 | if compute_aos: 2118 | mAPaos = get_mAP_v2(metrics["bbox"]["orientation"][j, :, i]) 2119 | mAPaos = ", ".join(f"{v:.2f}" for v in mAPaos) 2120 | result += print_str(f"aos AP:{mAPaos}") 2121 | 2122 | 2123 | return result, mAPbbox_store, (res_precision, res_recall), (gt_box_types, dt_box_types) 2124 | 2125 | 2126 | def get_coco_eval_result(gt_annos, 2127 | dt_annos, 2128 | current_classes, 2129 | z_axis=1, 2130 | z_center=1.0): 2131 | class_to_name = { 2132 | 0: 'Car', 2133 | 1: 'Pedestrian', 2134 | 2: 'Cyclist', 2135 | 3: 'Van', 2136 | 4: 'Person_sitting', 2137 | 5: 'car', 2138 | 6: 'tractor', 2139 | 7: 'trailer', 2140 | } 2141 | class_to_range = { 2142 | 0: [0.5, 1.0, 0.05], 2143 | 1: [0.25, 0.75, 0.05], 2144 | 2: [0.25, 0.75, 0.05], 2145 | 3: [0.5, 1.0, 0.05], 2146 | 4: [0.25, 0.75, 0.05], 2147 | 5: [0.5, 1.0, 0.05], 2148 | 6: [0.5, 1.0, 0.05], 2149 | 7: [0.5, 1.0, 0.05], 2150 | } 2151 | class_to_range = { 2152 | 0: [0.5, 0.95, 10], 2153 | 1: [0.25, 0.7, 10], 2154 | 2: [0.25, 0.7, 10], 2155 | 3: [0.5, 0.95, 10], 2156 | 4: [0.25, 0.7, 10], 2157 | 5: [0.5, 0.95, 10], 2158 | 6: [0.5, 0.95, 10], 2159 | 7: [0.5, 0.95, 10], 2160 | } 2161 | 2162 | name_to_class = {v: n for n, v in class_to_name.items()} 2163 | if not isinstance(current_classes, (list, tuple)): 2164 | current_classes = [current_classes] 2165 | current_classes_int = [] 2166 | for curcls in current_classes: 2167 | if isinstance(curcls, str): 2168 | current_classes_int.append(name_to_class[curcls]) 2169 | else: 2170 | current_classes_int.append(curcls) 2171 | current_classes = current_classes_int 2172 | overlap_ranges = np.zeros([3, 3, len(current_classes)]) 2173 | for i, curcls in enumerate(current_classes): 2174 | overlap_ranges[:, :, i] = np.array( 2175 | class_to_range[curcls])[:, np.newaxis] 2176 | result = '' 2177 | # check whether alpha is valid 2178 | compute_aos = False 2179 | for anno in dt_annos: 2180 | if anno['alpha'].shape[0] != 0: 2181 | if anno['alpha'][0] != -10: 2182 | compute_aos = True 2183 | break 2184 | mAPbbox, mAPbev, mAP3d, mAPaos = do_coco_style_eval( 2185 | gt_annos, 2186 | dt_annos, 2187 | current_classes, 2188 | overlap_ranges, 2189 | compute_aos, 2190 | z_axis=z_axis, 2191 | z_center=z_center) 2192 | for j, curcls in enumerate(current_classes): 2193 | # mAP threshold array: [num_minoverlap, metric, class] 2194 | # mAP result: [num_class, num_diff, num_minoverlap] 2195 | o_range = np.array(class_to_range[curcls])[[0, 2, 1]] 2196 | o_range[1] = (o_range[2] - o_range[0]) / (o_range[1] - 1) 2197 | result += print_str((f"{class_to_name[curcls]} " 2198 | "coco AP@{:.2f}:{:.2f}:{:.2f}:".format(*o_range))) 2199 | result += print_str((f"bbox AP:{mAPbbox[j, 0]:.2f}, " 2200 | f"{mAPbbox[j, 1]:.2f}, " 2201 | f"{mAPbbox[j, 2]:.2f}")) 2202 | result += print_str((f"bev AP:{mAPbev[j, 0]:.2f}, " 2203 | f"{mAPbev[j, 1]:.2f}, " 2204 | f"{mAPbev[j, 2]:.2f}")) 2205 | result += print_str((f"3d AP:{mAP3d[j, 0]:.2f}, " 2206 | f"{mAP3d[j, 1]:.2f}, " 2207 | f"{mAP3d[j, 2]:.2f}")) 2208 | if compute_aos: 2209 | result += print_str((f"aos AP:{mAPaos[j, 0]:.2f}, " 2210 | f"{mAPaos[j, 1]:.2f}, " 2211 | f"{mAPaos[j, 2]:.2f}")) 2212 | return result -------------------------------------------------------------------------------- /detection_toolbox/kitti/kitti_label.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import imgaug 3 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage 4 | import math 5 | 6 | class SingleLabel(object): 7 | # __slots__ = ['gt', 'garbage', 'type', 'truncation', 'occlusion', 'alpha', 8 | # 'xmin', 'ymin', 'xmax', 'ymax', 'box2d', 9 | # 'h', 'w', 'l', 't', 'ry', 'score', 10 | # 'distance', 'num_points'] 11 | def __init__(self, label_line, gt): 12 | self.gt = gt 13 | self._process_label_line(label_line) 14 | 15 | def _process_label_line(self, label_line): 16 | self.garbage = False 17 | split = label_line.split(" ") 18 | if len(split) == 15: 19 | assert self.gt 20 | elif len(split) == 16: 21 | assert not self.gt 22 | else: 23 | self.garbage = True #! Garbage 24 | return 25 | 26 | 27 | split[1:] = [float(x) for x in split[1:]] 28 | 29 | self.type = split[0] # 'Car', 'Pedestrian', ... 30 | self.truncation = split[1] # truncated pixel ratio [0..1] 31 | self.occlusion = split[2] # For KITTI dataset, int 0, 1, 2, 3. For other, just float 32 | self.alpha = split[3] # object observation angle [-pi..pi] 33 | 34 | # extract 2d bounding box in 0-based coordinates 35 | self.xmin = split[4] # left 36 | self.ymin = split[5] # top 37 | self.xmax = split[6] # right 38 | self.ymax = split[7] # bottom 39 | self.height2d = self.ymax - self.ymin 40 | self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax]) 41 | 42 | # extract 3d bounding box information 43 | self.h = split[8] # box height 44 | self.w = split[9] # box width 45 | self.l = split[10] # box length (in meters) 46 | self.t = (split[11],split[12],split[13]) # location (x,y,z) in rect. camera coord. 47 | self.tx = self.t[0] 48 | self.ty = self.t[1] 49 | self.tz = self.t[2] 50 | 51 | self.ry = split[14] # yaw angle (around Y-axis in rect. camera coordinates) [-pi..pi] 52 | 53 | if len(split) == 16: 54 | self.score = split[15] #! If DT, include score as well. 55 | 56 | self.distance = np.sqrt(self.t[0] ** 2 + self.t[1] ** 2 + self.t[2] ** 2) 57 | self.num_points = None #! has to be calculated later 58 | 59 | ''' 60 | Returns corners of 3d bbox in rect camera coords. (8, 3) np array. 61 | ''' 62 | def compute_box_3d(self): 63 | # compute rotational matrix around yaw axis 64 | R = roty(self.ry) 65 | 66 | # 3d bounding box dimensions 67 | l = self.l 68 | w = self.w 69 | h = self.h 70 | 71 | # 3d bounding box corners 72 | x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2] 73 | y_corners = [0,0,0,0,-h,-h,-h,-h] 74 | z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2] 75 | 76 | # rotate and translate 3d bounding box 77 | corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners])) 78 | 79 | corners_3d[0,:] = corners_3d[0,:] + self.t[0] 80 | corners_3d[1,:] = corners_3d[1,:] + self.t[1] 81 | corners_3d[2,:] = corners_3d[2,:] + self.t[2] 82 | 83 | return corners_3d.T 84 | 85 | ''' 86 | Converts to line (original label) form, without newline character 87 | ''' 88 | def to_line(self): 89 | line = "{} {} {} {} {} {} {} {} {} {} {} {} {} {} {}".format( 90 | self.type, self.truncation, self.occlusion, self.alpha, 91 | self.xmin, self.ymin, self.xmax, self.ymax, 92 | self.h, self.w, self.l, 93 | self.t[0], self.t[1], self.t[2], 94 | self.ry 95 | ) 96 | if hasattr(self, "score"): 97 | line += " {}".format(self.score) 98 | return line 99 | 100 | ''' 101 | Returns 2d bbox in imgaug BoundingBox format 102 | ''' 103 | def get_imgaug_bbox(self): 104 | return BoundingBox( 105 | x1=self.xmin, y1=self.ymin, x2=self.xmax, y2=self.ymax 106 | ) 107 | 108 | 109 | class KittiLabel(object): 110 | ''' 111 | view is an int (could also be None if gt is False) 112 | gt is boolean 113 | idx is the {idx}.txt this label came from 114 | 115 | num_points_file_path is optional - it has # of points of the non-garbage labels, one on each line 116 | #! Careful: the order/number of this is dependent on which labels were actually inside KittiLabel when this file 117 | #! was generated. And which labels were inside KittiLabel depends on _read_label_from_file. 118 | #! NMS might remove some objects, but num_points only matters for gt, and we don't run NMS on gt. 119 | ''' 120 | def __init__(self, label_file_path, view, gt, idx, filter_truncation_1=True, num_points_file_path=None): 121 | self.view = view 122 | self.gt = gt 123 | self.idx = idx 124 | self._read_label_from_file(label_file_path, filter_truncation_1) 125 | 126 | if num_points_file_path is not None: 127 | self._read_label_from_file(num_points_file_path) 128 | 129 | #! if filter_truncation_1 is True, gets rid of labels with truncation = 1 (100%) 130 | def _read_label_from_file(self, label_file_path, filter_truncation_1=True): 131 | labels = [SingleLabel(line.strip(), self.gt) for line in open(label_file_path, "r").readlines()] 132 | 133 | labels = list(filter(lambda l: (not l.garbage) and (not filter_truncation_1 or l.truncation != 1), labels)) 134 | 135 | self.labels = labels 136 | 137 | def _read_num_points_file_path(self, num_points_file_path): 138 | num_points = [int(line.strip()) for line in open(num_points_file_path, "r").readlines()] 139 | 140 | self.add_label_attribute("num_points", num_points) 141 | 142 | # assert len(num_points) == len(self.labels) 143 | 144 | # for label_ind, label in enumerate(self.labels): 145 | # label.num_points = num_points[label_ind] 146 | 147 | #! remove labels with score < score_thresh 148 | #! Returns num removed 149 | def filter_score(self, score_thresh): 150 | assert not self.gt 151 | prev_num = len(self.labels) 152 | self.labels = list(filter(lambda l: l.score >= score_thresh, self.labels)) 153 | return len(self.labels) - prev_num 154 | 155 | ''' 156 | Writes contents of label to file 157 | Assumes that relevant folders are created 158 | returns passed in write_file_path 159 | ''' 160 | def write_to_file(self, write_file_path): 161 | with open(write_file_path, "w+") as f: 162 | for label in self.labels: 163 | f.write(label.to_line() + "\n") 164 | return write_file_path 165 | 166 | ''' 167 | Writes contents of label.num_points to file, one on each line. 168 | ''' 169 | def write_num_points_to_file(self, write_file_path): 170 | with open(write_file_path, "w+") as f: 171 | for label in self.labels: 172 | f.write(str(label.num_points) + "\n") 173 | return write_file_path 174 | 175 | ''' 176 | Computes # of points in each 3d box and saves them into self.labels' num_points attribute 177 | pc_rect is n x (3 or 4) numpy array of point cloud in rect. coords. 178 | ''' 179 | def compute_num_points_inside_3d_box(self, pc_rect): 180 | from detection_toolbox.utils_3d.utils import extract_pc_in_box3d 181 | if pc_rect.shape[-1] != 3: 182 | pc_rect = pc_rect[:, :3] 183 | 184 | for label in self.labels: 185 | corners_3d_rect = label.compute_box_3d() 186 | pc_in_box, _ = extract_pc_in_box3d(pc_rect, corners_3d_rect) 187 | label.num_points = pc_in_box.shape[0] 188 | 189 | 190 | ''' 191 | Returns a dict in format 192 | { 193 | 'name': [], 194 | 'truncated': [], 195 | 'occluded': [], 196 | 'alpha': [], 197 | 'bbox': [], 198 | 'dimensions': [], 199 | 'location': [], 200 | 'rotation_y': [], 201 | } 202 | ''' 203 | def get_annotation_dict(self): 204 | res = dict() 205 | 206 | res['name'] = np.array([label.type for label in self.labels]) 207 | res['truncated'] = np.array([label.truncation for label in self.labels]) 208 | res['occluded'] = np.array([label.occlusion for label in self.labels]) 209 | res['alpha'] = np.array([label.alpha for label in self.labels]) 210 | res['bbox'] = np.array([label.box2d for label in self.labels]).reshape(-1, 4) 211 | #? l, h, w is not read-in order 212 | res['dimensions'] = np.array([[label.l, label.h, label.w] for label in self.labels]).reshape(-1, 3) 213 | res['location'] = np.array([label.t for label in self.labels]).reshape(-1, 3) 214 | res['rotation_y'] = np.array([label.ry for label in self.labels]) 215 | 216 | if not self.gt: 217 | res['score'] = np.array([label.score for label in self.labels]) 218 | 219 | return res 220 | 221 | ''' 222 | if gt: 223 | { 224 | 'distance': [], 225 | 'num_points': [], # fills in with 100k if they haven't been calculated yet 226 | } 227 | ''' 228 | def get_extra_info(self): 229 | res = dict() 230 | if self.gt: 231 | res['distance'] = np.array([label.distance for label in self.labels]) 232 | res['num_points'] = np.array([ 233 | (label.num_points if label.num_points is not None else 100000) 234 | for label in self.labels 235 | ]) 236 | return res 237 | else: 238 | res['distance'] = np.array([label.distance for label in self.labels]) 239 | return res 240 | 241 | ''' 242 | Returns bboxes in imgaug BoundingBoxesOnImage format 243 | TODO: have some mechanisms for gt vs not gt, filtering, labels, etc 244 | ''' 245 | def get_imgaug_bboxes(self, img_shape=(720, 1920)): 246 | bboxes = [label.get_imgaug_bbox() for label in self.labels] 247 | return BoundingBoxesOnImage(bboxes, shape=img_shape) 248 | 249 | #! Adds attribute of attribute name to each label 250 | #! Attribute vals should be a list 251 | def add_label_attribute(self, attribute_name, attribute_vals): 252 | assert len(self.labels) == len(attribute_vals) 253 | 254 | for label, attribute_val in zip(self.labels, attribute_vals): 255 | setattr(label, attribute_name, attribute_val) 256 | 257 | 258 | def __len__(self): 259 | return len(self.labels) 260 | 261 | def __iter__(self): 262 | return iter(self.labels) 263 | 264 | 265 | def rotx(t): 266 | ''' 3D Rotation about the x-axis. ''' 267 | c = np.cos(t) 268 | s = np.sin(t) 269 | return np.array([[1, 0, 0], 270 | [0, c, -s], 271 | [0, s, c]]) 272 | 273 | 274 | def roty(t): 275 | ''' Rotation about the y-axis. ''' 276 | c = np.cos(t) 277 | s = np.sin(t) 278 | return np.array([[c, 0, s], 279 | [0, 1, 0], 280 | [-s, 0, c]]) 281 | 282 | 283 | def rotz(t): 284 | ''' Rotation about the z-axis. ''' 285 | c = np.cos(t) 286 | s = np.sin(t) 287 | return np.array([[c, -s, 0], 288 | [s, c, 0], 289 | [0, 0, 1]]) 290 | -------------------------------------------------------------------------------- /detection_toolbox/kitti/kitti_object.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | 5 | import functools 6 | 7 | from .calibration import Calibration 8 | from .kitti_label import KittiLabel 9 | 10 | 11 | class Kitti(object): 12 | ''' 13 | calib_dir: directory with all the calib files. 14 | image_dir: directory with folders image_0, image_1, ..., each with images 15 | gt_label_dir: directory with folders label_0, label_1, ..., each with gt labels 16 | dt_label_dir: Either: 17 | 1) directory with folders label_0, label_1, ..., each with dt labels 18 | 2) directory with dt labels 000000.txt, ..., 19 | lidar_dir: directory with lidar files 20 | depthmap_dir: directory with folders depth_0, depth_1, ..., each with depthmaps 21 | ''' 22 | def __init__( 23 | self, 24 | calib_dir=None, 25 | image_dir=None, 26 | gt_label_dir=None, 27 | dt_label_dir=None, 28 | lidar_dir=None, 29 | depthmap_dir=None 30 | ): 31 | self.calib_dir = calib_dir 32 | self.image_dir = image_dir 33 | self.gt_label_dir = gt_label_dir 34 | self.dt_label_dir = dt_label_dir 35 | self.lidar_dir = lidar_dir 36 | self.depthmap_dir = depthmap_dir 37 | 38 | #! Returns Calibration Object 39 | def get_calib(self, idx): 40 | if self.calib_dir is None: 41 | raise Exception("calib_dir not provided") 42 | else: 43 | return Calibration(os.path.join(self.calib_dir, str(idx).zfill(6) + ".txt")) 44 | 45 | def get_gt_label(self, view, idx, filter_truncation_1=True): 46 | if self.gt_label_dir is None: 47 | raise Exception("gt_label_dir not provided") 48 | else: 49 | return KittiLabel( 50 | os.path.join( 51 | os.path.join(self.gt_label_dir, "label_{}".format(view)), 52 | str(idx).zfill(6) + ".txt" 53 | ), 54 | view=view, 55 | gt=True, 56 | idx=idx, 57 | filter_truncation_1=filter_truncation_1 58 | ) 59 | 60 | ''' 61 | Either pass in a view = 0, 1, 2, 3, 4, then it goes to label_{view} inside self.dt_label_dir 62 | or pass in view = None, then it directly looks for {idx}.txt inside self.dt_label_dir 63 | ''' 64 | def get_dt_label(self, view, idx): 65 | if self.dt_label_dir is None: 66 | raise Exception("dt_label_dir not provided") 67 | elif view is None: 68 | return KittiLabel(os.path.join(self.dt_label_dir, str(idx).zfill(6) + ".txt"), view=None, gt=False, idx=idx) 69 | else: 70 | return KittiLabel( 71 | os.path.join( 72 | os.path.join(self.dt_label_dir, "label_{}".format(view)), 73 | str(idx).zfill(6) + ".txt" 74 | ), 75 | view=view, 76 | gt=False, 77 | idx=idx 78 | ) 79 | 80 | #! BGR Format 81 | def get_image(self, view, idx): 82 | if self.image_dir is None: 83 | raise Exception("image_dir not provided") 84 | else: 85 | return cv2.imread( 86 | os.path.join( 87 | os.path.join(self.image_dir, "image_{}".format(view)), 88 | str(idx).zfill(6) + ".png" 89 | ) 90 | ) 91 | 92 | #! Returns lidar in velodyne format, n x 4 93 | def get_lidar(self, idx): 94 | if self.lidar_dir is None: 95 | raise Exception("lidar_dir not provided") 96 | else: 97 | return np.fromfile(os.path.join(self.lidar_dir, str(idx).zfill(6) + ".bin"), dtype=np.float32).reshape((-1, 4)) 98 | 99 | #! Returns in BGR Format. Likely not ideal 100 | def get_depthmap(self, view, idx): 101 | if self.depthmap_dir is None: 102 | raise Exception("depthmap_dir not provided") 103 | else: 104 | return cv2.imread( 105 | os.path.join( 106 | os.path.join(self.depthmap_dir, "image_{}".format(view)), 107 | str(idx).zfill(6) + ".png" 108 | ) 109 | ) 110 | 111 | #! Returns gt inds 112 | def get_gt_inds(self): 113 | if self.gt_label_dir is None: 114 | raise Exception("gt_label_dir not provided") 115 | else: 116 | gt_label_sub_dirs = os.listdir(self.gt_label_dir) #! label_0, label_1, ... 117 | gt_label_sub_dirs = list(filter(lambda s: "label_" in s, gt_label_sub_dirs)) 118 | each_ind_set = [] 119 | for gt_label_sub_dir in gt_label_sub_dirs: 120 | inds = set(list(map(lambda s: s[:-4], os.listdir(os.path.join(self.gt_label_dir, gt_label_sub_dir))))) 121 | each_ind_set.append(inds) 122 | for i in range(len(each_ind_set) - 1): #! Check that each sub dir has the same inds 123 | assert (each_ind_set[i] == each_ind_set[i + 1]) 124 | 125 | return sorted(each_ind_set[0]) 126 | 127 | #! Returns dt inds 128 | #! if direct_dir is true, doesn't need directory to have label_0, label_1, etc. 129 | def get_dt_inds(self, direct_dir=False): 130 | if self.dt_label_dir is None: 131 | raise Exception("dt_label_dir not provided") 132 | elif direct_dir: 133 | inds = list(map(lambda s: s[:-4], os.listdir(self.dt_label_dir))) 134 | return sorted(inds) 135 | else: 136 | dt_label_sub_dirs = os.listdir(self.dt_label_dir) #! label_0, label_1, ... 137 | dt_label_sub_dirs = list(filter(lambda s: "label_" in s, dt_label_sub_dirs)) 138 | each_ind_set = [] 139 | for dt_label_sub_dir in dt_label_sub_dirs: 140 | inds = set(list(map(lambda s: s[:-4], os.listdir(os.path.join(self.dt_label_dir, dt_label_sub_dir))))) 141 | each_ind_set.append(inds) 142 | for i in range(len(each_ind_set) - 1): #! Check that each sub dir has the same inds 143 | assert (each_ind_set[i] == each_ind_set[i + 1]) 144 | 145 | return sorted(each_ind_set[0]) 146 | 147 | def get_gt_annotated_image(self, view, idx): 148 | img = self.get_image(view, idx) 149 | gt = self.get_gt_label(view, idx) 150 | return gt.get_imgaug_bboxes().draw_on_image(img, size=3) 151 | 152 | def get_dt_annotated_image(self, view, idx): 153 | img = self.get_image(view, idx) 154 | dt = self.get_dt_label(view, idx) 155 | return dt.get_imgaug_bboxes().draw_on_image(img, size=3) 156 | 157 | ''' 158 | Calculates num_points for each non-garbage label in GT view 159 | Saves in save_dir/label_{view}/000000.txt, ... 160 | 161 | Only does so for inds in inds argument. If inds argument is None, gets them from get_dt_inds. 162 | If self.dt_label_dir was not provided, goes through everything in gt. 163 | 164 | nonempty_ok should generally be false unless the views are being done in parallel. 165 | filter_truncation_1 should be True for regular kitti data. 166 | filter_truncation_1 should be False for simulation data, where each label file contains annotations for all objects 167 | with correct 3d coords but potentially wrong 2d bbox 168 | ''' 169 | def generate_and_save_gt_num_points(self, view, save_dir, inds=None, tqdm=False, nonempty_ok=False, filter_truncation_1=True): 170 | if tqdm: from tqdm import tqdm 171 | from detection_toolbox.std import makedirs 172 | 173 | save_view_dir = os.path.join(save_dir, "label_{}".format(view)) 174 | makedirs(save_view_dir, exist_ok=True, nonempty_ok=nonempty_ok) 175 | 176 | if inds is None: 177 | if self.dt_label_dir is None: 178 | inds = self.get_gt_inds() 179 | else: 180 | inds = self.get_dt_inds() 181 | 182 | if tqdm: inds = tqdm(inds) 183 | import time 184 | 185 | for ind in inds: 186 | gt = self.get_gt_label(view, ind, filter_truncation_1=filter_truncation_1) 187 | lidar_rect = self.get_calib(ind).project_velo_to_rect(self.get_lidar(ind)[:, :3]) 188 | gt.compute_num_points_inside_3d_box(lidar_rect) 189 | gt.write_num_points_to_file(os.path.join(save_view_dir, str(ind).zfill(6) + ".txt")) 190 | 191 | ''' 192 | Returns a tuple of gt_annos, dt_annos that can be put into "get_official_eval_result" 193 | Only considers/returns labels corresponding to view argument & inds in dt_label_dir/label_{view} 194 | ''' 195 | def get_eval_annos(self, view, gt_filter_truncation_1=True, tqdm=False): 196 | if tqdm: from tqdm import tqdm 197 | dt_inds = self.get_dt_inds() 198 | 199 | gt_annos = [] 200 | dt_annos = [] 201 | 202 | if tqdm: dt_inds = tqdm(dt_inds) 203 | for dt_ind in dt_inds: 204 | gt_annos.append(self.get_gt_label(view, dt_ind, filter_truncation_1=gt_filter_truncation_1).get_annotation_dict()) 205 | dt_annos.append(self.get_dt_label(view, dt_ind).get_annotation_dict()) 206 | 207 | return gt_annos, dt_annos 208 | 209 | def get_eval_extra_info(self, view, gt_filter_truncation_1=True, num_points_dir=None, tqdm=False): 210 | if tqdm: from tqdm import tqdm 211 | dt_inds = self.get_dt_inds() 212 | 213 | gt_extra_info = [] 214 | dt_extra_info = [] 215 | 216 | if tqdm: dt_inds = tqdm(dt_inds) 217 | for dt_ind in dt_inds: 218 | gt = self.get_gt_label(view, dt_ind, filter_truncation_1=gt_filter_truncation_1) 219 | if num_points_dir is not None: 220 | if functools.reduce(lambda a,b: a or b, map(lambda s: "label" in s, os.listdir(num_points_dir))): 221 | num_points_file_path = os.path.join(num_points_dir, "{}/{}.txt".format(os.listdir(num_points_dir)[0], dt_ind)) 222 | else: 223 | num_points_file_path = os.path.join(num_points_dir, "{}.txt".format(dt_ind)) 224 | gt._read_num_points_file_path(num_points_file_path=num_points_file_path) 225 | 226 | gt_extra_info.append(gt.get_extra_info()) 227 | dt_extra_info.append(self.get_dt_label(view, dt_ind).get_extra_info()) 228 | 229 | return gt_extra_info, dt_extra_info -------------------------------------------------------------------------------- /detection_toolbox/kitti/nms.py: -------------------------------------------------------------------------------- 1 | ''' 2 | REQUIRES DETECTRON2 TO BE INSTALLED 3 | https://github.com/facebookresearch/detectron2/blob/de098423c675dad38c23110407926ccf2919474d/detectron2/layers/nms.py#L101 4 | ''' 5 | 6 | ''' 7 | Takes in a list of KittiLabel classes, and returns a list of KittiLabel classes after doing NMS with iou_threshold in bev 8 | over all the KittiLabel classes together. 9 | ''' 10 | def bev_nms(kitti_labels, iou_threshold): 11 | from detectron2.layers import batched_nms_rotated 12 | import numpy as np 13 | import torch 14 | 15 | #? NOTE: This might be different from elsewhere. However, does not matter because this CATEGORY_TO_IDX will never 16 | #? have any influence outside this function. 17 | CATEGORY_TO_IDX = { 18 | "Car": 0, 19 | "Pedestrian": 1, 20 | "Cyclist": 2, 21 | "Motorcycle": 3, 22 | "Undefined": 4 23 | } 24 | boxes = [] 25 | scores = [] 26 | idxs = [] 27 | 28 | #! For each label, maps its index in boxes (and scores, idx) to 29 | #! -> a tuple (index of its parent (view) in kitti_labels, its index inside its kitti_label) 30 | overall_idx_to_label_idx = dict() 31 | curr_overall_idx = 0 32 | for kitti_label_idx, kitti_label in enumerate(kitti_labels): 33 | for label_idx, label in enumerate(kitti_label): 34 | # should be (x_ctr, y_ctr, width, height, angle_degrees) 35 | boxes.append([ 36 | label.t[0], label.t[2], label.l, label.w, label.ry * (180.0 / np.pi) 37 | ]) 38 | scores.append(label.score) 39 | idxs.append(CATEGORY_TO_IDX[label.type]) 40 | 41 | overall_idx_to_label_idx[curr_overall_idx] = (kitti_label_idx, label_idx) 42 | curr_overall_idx += 1 43 | 44 | if len(boxes) == 0: #! No detections 45 | return kitti_labels 46 | 47 | boxes = torch.FloatTensor(boxes).to("cuda") 48 | scores = torch.FloatTensor(scores).to("cuda") 49 | idxs = torch.LongTensor(idxs).to("cuda") 50 | 51 | #! Performs per-class nms 52 | resulting_box_inds = batched_nms_rotated( 53 | boxes, 54 | scores, 55 | idxs, 56 | iou_threshold 57 | ) 58 | keep_inds = [[] for i in range(len(kitti_labels))] 59 | for overall_idx in resulting_box_inds.cpu().tolist(): 60 | kitti_label_idx, label_idx = overall_idx_to_label_idx[overall_idx] 61 | keep_inds[kitti_label_idx].append(label_idx) 62 | 63 | for kitti_label_idx, kitti_label in enumerate(kitti_labels): 64 | kitti_label.labels = [kitti_label.labels[i] for i in keep_inds[kitti_label_idx]] 65 | 66 | del boxes, scores, idxs, resulting_box_inds 67 | 68 | return kitti_labels -------------------------------------------------------------------------------- /detection_toolbox/std/__init__.py: -------------------------------------------------------------------------------- 1 | from .log import dprint 2 | from .os import makedirs -------------------------------------------------------------------------------- /detection_toolbox/std/log.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | __all__ = ["dprint"] 4 | 5 | ''' 6 | Adds a timestamp 7 | ''' 8 | def dprint(*args): 9 | now = datetime.now() 10 | print('[{:02d}:{:02d}:{:02d}]: {}'.format(now.hour, now.minute, now.second, " ".join(map(lambda s: str(s), args)))) -------------------------------------------------------------------------------- /detection_toolbox/std/os.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | __all__ = ["makedirs"] 4 | 5 | ''' 6 | Creates dir_path (and all intermediate directories) 7 | Note that exist_ok=False => nonempty_ok=False 8 | If: exist_ok, nonempty_ok 9 | False _ : same as os.makedirs(dir_path, exist_ok=False) 10 | True False : dir_path can exist, but it must be empty 11 | True True : dir_path can exist, and it can be non-empty 12 | ''' 13 | def makedirs(dir_path, exist_ok=False, nonempty_ok=False): 14 | if os.path.isdir(dir_path): #! exists already 15 | if not exist_ok: 16 | raise Exception("{} already exists".format(dir_path)) 17 | else: 18 | if len(os.listdir(dir_path)) != 0: #! nonempty 19 | if not nonempty_ok: 20 | raise Exception("{} is not empty".format(dir_path)) 21 | else: 22 | return dir_path 23 | else: #! exists, but is empty 24 | return dir_path 25 | else: #! does not exist 26 | os.makedirs(dir_path, exist_ok=exist_ok) 27 | return dir_path -------------------------------------------------------------------------------- /detection_toolbox/utils_3d/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import cv2 5 | 6 | def in_hull(p, hull): 7 | from scipy.spatial import Delaunay 8 | if not isinstance(hull,Delaunay): 9 | hull = Delaunay(hull) 10 | return hull.find_simplex(p)>=0 11 | 12 | ''' 13 | pc is point cloud, box3d are corners 14 | ''' 15 | def extract_pc_in_box3d(pc, box3d): 16 | ''' pc: (N,3), box3d: (8,3) ''' 17 | box3d_roi_inds = in_hull(pc[:,0:3], box3d) 18 | return pc[box3d_roi_inds,:], box3d_roi_inds 19 | 20 | 21 | def get_lidar_in_image_fov(pc_rect, calib, view, xmin, ymin, xmax, ymax, 22 | return_more=False, clip_distance=.1): 23 | ''' Filter lidar points, keep those in image FOV ''' 24 | pts_2d = calib.project_rect_to_image(pc_rect) 25 | fov_inds = (pts_2d[:,0]=xmin) & \ 26 | (pts_2d[:,1]=ymin) 27 | 28 | clip_filter = None 29 | if view == 0: 30 | clip_filter = pc_rect[:, 2] < -clip_distance 31 | elif view == 1: 32 | clip_filter = pc_rect[:, 0] < -clip_distance 33 | elif view == 2: 34 | clip_filter = pc_rect[:, 2] > clip_distance 35 | elif view == 4: 36 | clip_filter = pc_rect[:, 0] > clip_distance 37 | 38 | 39 | fov_inds = fov_inds & clip_filter 40 | imgfov_pc_rect = pc_rect[fov_inds,:] 41 | if return_more: 42 | return imgfov_pc_velo, pts_2d, fov_inds 43 | else: 44 | return imgfov_pc_velo -------------------------------------------------------------------------------- /detection_toolbox/vis/vis2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | import time 4 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage 5 | 6 | ''' 7 | For details, reference vis3d draw_3d_boxes_from_objects_advanced 8 | ''' 9 | def draw_2d_boxes_from_objects_advanced( 10 | objects, 11 | calib, 12 | img, 13 | default_color=(0, 1, 0), #! default color is green 14 | color_func=None, 15 | text_func=None, 16 | size=3 17 | ): 18 | color_dict = dict() 19 | 20 | for label_ind, label in enumerate(objects): 21 | if color_func is not None: 22 | color = color_func(label_ind, label) 23 | if color is None: 24 | continue 25 | else: 26 | color = default_color 27 | 28 | if text_func is not None: 29 | text = text_func(label_ind, label) 30 | if text is "": 31 | text = None 32 | else: 33 | text = None 34 | 35 | bbox = label.get_imgaug_bbox() 36 | bbox.label = text 37 | 38 | if color not in color_dict.keys(): 39 | color_dict[color] = { 40 | "boxes": [] 41 | } 42 | 43 | color_dict[color]['boxes'].append(bbox) 44 | 45 | for color, val in color_dict.items(): 46 | bboxes = BoundingBoxesOnImage(val['boxes'], shape=img.shape[:2]) 47 | #! flip color tuple b/c img is bgr and provided color is rgb 48 | img = bboxes.draw_on_image(img, color=tuple(int(i * 255) for i in color)[::-1], size=size) 49 | 50 | return img -------------------------------------------------------------------------------- /detection_toolbox/vis/vis3d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | import time 4 | 5 | ''' 6 | pc is n x (at least 3) 7 | ''' 8 | def draw_lidar( 9 | pc, 10 | ptcolor=(1, 1, 1), 11 | fig=None, 12 | bgcolor=(0, 0, 0), 13 | fig_size=(8000, 4000), 14 | draw_range_squares=False #! if you want to also see square boxes at 40, 80, 120 meters 15 | ): 16 | from mayavi import mlab 17 | mlab.options.offscreen = True 18 | 19 | if fig is None: 20 | fig = mlab.figure(bgcolor=bgcolor, size=fig_size) 21 | 22 | #! Draw origin & axes 23 | mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=1) 24 | 25 | axis_len = 2.0 26 | #! Each row is the "ending point" of each axis: x, y, z, starting from 0 27 | axes = np.array([ 28 | [axis_len, 0., 0.], 29 | [0., axis_len, 0.], 30 | [0., 0., axis_len], 31 | ], dtype=np.float32) 32 | 33 | for axis_ind, axis in enumerate(["x", "y", "z"]): 34 | mlab.plot3d( 35 | [0, axes[axis_ind, 0]], 36 | [0, axes[axis_ind, 1]], 37 | [0, axes[axis_ind, 2]], 38 | color=tuple([int(c) for c in axes[axis_ind] / axis_len]), #!(1,0,0), (0,1,0), (0,0,1), 39 | line_width=4, 40 | tube_radius=None, 41 | figure=fig 42 | ) 43 | mlab.text3d( 44 | axes[axis_ind, 0], 45 | axes[axis_ind, 1], 46 | axes[axis_ind, 2], 47 | text=axis, 48 | color=tuple([int(c) for c in axes[axis_ind] / axis_len]), 49 | figure=fig, 50 | scale=(0.5, 0.5, 0.5) 51 | ) 52 | 53 | if draw_range_squares: 54 | for i in [40, 80, 120]: 55 | mlab.plot3d( 56 | [i, i, -i, -i, i], 57 | [i, -i, -i, i, i], 58 | [0, 0, 0, 0, 0], 59 | color=(0.2, 0.2, 0.2), 60 | line_width=6, 61 | tube_radius=None, 62 | figure=fig 63 | ) 64 | 65 | #! Draw point cloud 66 | mlab.points3d( 67 | pc[:,0], 68 | pc[:,1], 69 | pc[:,2], 70 | color=ptcolor, 71 | mode='point', 72 | colormap='gnuplot', 73 | scale_factor=1, 74 | figure=fig 75 | ) 76 | 77 | return fig 78 | 79 | ''' 80 | Input: 81 | objects: KittiLabel object 82 | calib: Calibration object 83 | fig: mayavi figure object 84 | 85 | The goal of this function is to display only specific boxes in objects and be able to control what color each box is. 86 | If color_func is None, all boxes are displayed & default_color is used for all. 87 | if color_func is not None, it should be a function that takes in (index in objects.labels, label: SingleLabel) and 88 | returns either "None" or a tuple denoting the color the object should be displayed with. 89 | If it returns None for a label, the box is not displayed for that object/label 90 | text_func has the same inputs, but should output a string or None 91 | ''' 92 | def draw_3d_boxes_from_objects_advanced( 93 | objects, 94 | calib, 95 | fig, 96 | default_color=(0, 1, 0), #! default color is green 97 | color_func=None, 98 | text_func=None 99 | ): 100 | from mayavi import mlab 101 | mlab.options.offscreen = True 102 | 103 | color_dict = dict() 104 | 105 | for label_ind, label in enumerate(objects): 106 | if color_func is not None: 107 | color = color_func(label_ind, label) 108 | if color is None: 109 | continue 110 | else: 111 | color = default_color 112 | 113 | if text_func is not None: 114 | text = text_func(label_ind, label) 115 | if text is None: 116 | text = "" 117 | else: 118 | text = "" 119 | 120 | corners_3d_rect = label.compute_box_3d() #! Gets corners of 3d box 121 | corners_3d_velo = calib.project_rect_to_velo(corners_3d_rect) 122 | 123 | if color not in color_dict.keys(): 124 | color_dict[color] = { 125 | "boxes": [], 126 | "texts": [], 127 | "tmp": [] 128 | } 129 | 130 | color_dict[color]['boxes'].append(corners_3d_velo) 131 | color_dict[color]['texts'].append(text) 132 | color_dict[color]['tmp'].append(label) 133 | 134 | for color, val in color_dict.items(): 135 | draw_boxes_3d(val['boxes'], fig, box_color=color, text_color=color, text_list=val['texts']) 136 | 137 | return fig 138 | 139 | 140 | ''' 141 | #! objects is type KittiLabel 142 | #! calib is type Calibration 143 | #! gt is boolean - whether it's gt boxes or not 144 | gt is drawn in green, dt is drawn in red. dt also writes score 145 | ''' 146 | def draw_boxes_from_objects( 147 | objects, 148 | calib, 149 | fig, 150 | occ_thresh=0.7, 151 | categories=["Car", "Pedestrian", "Cyclist", "Motorcycle", "Undefined"], 152 | text_func=None 153 | ): 154 | from mayavi import mlab 155 | mlab.options.offscreen = True 156 | 157 | default_connections = [ 158 | (0, 1), (4, 5), (0, 4), 159 | (1, 2), (5, 6), (1, 5), 160 | (2, 3), (6, 7), (2, 6), 161 | (3, 0), (7, 4), (3, 7) 162 | ] #! If the 8 corners were 0 indexed, these are the connections between them 163 | 164 | all_boxes = [] 165 | all_text = [] 166 | real_index = -1 #! keep track of & later display index of object in label file, so we can zoom in later 167 | 168 | for label in objects: 169 | real_index += 1 170 | if label.occlusion > occ_thresh or label.type not in categories: 171 | continue 172 | corners_3d_rect = label.compute_box_3d() #! Gets corners of 3d box 173 | corners_3d_velo = calib.project_rect_to_velo(corners_3d_rect) 174 | 175 | all_boxes.append(corners_3d_velo) 176 | 177 | if objects.gt: 178 | if text_func is None: 179 | all_text.append("v{}_{}".format(objects.view, real_index)) 180 | else: 181 | all_text.append(str(text_func(label))) 182 | else: 183 | if text_func is None: 184 | all_text.append("{:.2f}".format(label.score)) 185 | else: 186 | all_text.append(str(text_func(label))) 187 | 188 | if objects.gt: 189 | box_color = text_color = (0, 1, 0) #! green gt boxes 190 | # all_text = None 191 | else: 192 | box_color = text_color = (1, 0, 0) #! red dt boxes 193 | 194 | draw_boxes_3d(all_boxes, fig, box_color=box_color, text_color=text_color, text_list=all_text) 195 | 196 | return fig 197 | 198 | def draw_boxes_3d(box3d, fig, box_color=(1,1,1), text_color=(1,0,0), text_scale=(.5,.5,.5), text_list=None): 199 | from mayavi import mlab 200 | mlab.options.offscreen = True 201 | 202 | default_connections = [ 203 | (0, 1), (4, 5), (0, 4), 204 | (1, 2), (5, 6), (1, 5), 205 | (2, 3), (6, 7), (2, 6), 206 | (3, 0), (7, 4), (3, 7) 207 | ] #! If the 8 corners were 0 indexed, these are the connections between them 208 | 209 | all_connections = [] 210 | 211 | for box_index in range(len(box3d)): 212 | b = box3d[box_index] 213 | if text_list is not None: 214 | text_tmp = text_list[box_index] 215 | mlab.text3d(b[4,0], b[4,1], b[4,2], str(text_tmp), scale=text_scale, color=text_color, figure=fig) 216 | 217 | all_connections += default_connections #! Put in connections 218 | default_connections = [(a + 8, b + 8) for (a, b) in default_connections] #! Increment default by 8 219 | 220 | 221 | all_corners_3d_velo = np.array(box3d).reshape(-1, 3) # just make it a list of points 222 | pts = mlab.points3d( 223 | all_corners_3d_velo[:, 0], 224 | all_corners_3d_velo[:, 1], 225 | all_corners_3d_velo[:, 2], 226 | color=box_color, 227 | mode="point", 228 | scale_factor=1 229 | ) 230 | pts.mlab_source.dataset.lines = np.array(all_connections) 231 | tube = mlab.pipeline.tube(pts, tube_radius=0.05) 232 | tube.filter.radius_factor = 1. 233 | mlab.pipeline.surface(tube, color=box_color) 234 | 235 | return fig 236 | 237 | 238 | def set_view(fig, azimuth, elevation, distance, focalpoint=[0, 0, 0]): 239 | from mayavi import mlab 240 | mlab.options.offscreen = True 241 | #! view 242 | mlab.view( 243 | azimuth=azimuth, 244 | elevation=elevation, 245 | distance=distance, 246 | focalpoint=focalpoint, 247 | figure=fig 248 | ) 249 | return fig 250 | 251 | #! Zooms-in the view to zoom_idx-th object 252 | def zoom_view(objects, calib, fig, zoom_idx): 253 | from mayavi import mlab 254 | mlab.options.offscreen = True 255 | 256 | zoom_object = objects.labels[zoom_idx] #! Object to focus on 257 | x, y, z = zoom_object.t #! Center of object in rect camera coord. 258 | x_velo, y_velo, z_velo = calib.project_rect_to_velo(np.array([[x, y, z]]))[0] 259 | 260 | curr_azimuth, curr_elevation, curr_distance, _ = mlab.view() 261 | curr_x, curr_y, curr_z = spherical_to_cartesian(curr_azimuth, curr_elevation, curr_distance) 262 | 263 | ratio = 10 264 | new_x = curr_x / ratio + x_velo * (ratio - 1) / ratio 265 | new_y = curr_y / ratio + y_velo * (ratio - 1) / ratio 266 | new_z = curr_z / ratio + z_velo * (ratio - 1) / ratio 267 | 268 | new_azimuth, new_elevation, new_distance = cartesian_to_spherical(new_x, new_y, new_z) 269 | 270 | mlab.view( 271 | azimuth=new_azimuth, 272 | elevation=new_elevation, 273 | distance=new_distance, 274 | focalpoint=[x_velo, y_velo, z_velo], 275 | figure=fig 276 | ) 277 | 278 | return fig 279 | 280 | def spherical_to_cartesian(azimuth, elevation, distance): 281 | pi_over_180 = np.pi / 180.0 282 | x = distance * np.sin(elevation * pi_over_180) * np.cos(azimuth * pi_over_180) 283 | y = distance * np.sin(elevation * pi_over_180) * np.sin(azimuth * pi_over_180) 284 | z = distance * np.cos(elevation * pi_over_180) 285 | 286 | return x, y, z 287 | 288 | def cartesian_to_spherical(x, y, z): 289 | pi_below_180 = 180.0 / np.pi 290 | distance = np.sqrt(x ** 2 + y ** 2 + z ** 2) 291 | azimuth = pi_below_180 * np.arctan(y / x) 292 | elevation = pi_below_180 * np.arctan(np.sqrt(x ** 2 + y ** 2) / z) 293 | 294 | return azimuth, elevation, distance --------------------------------------------------------------------------------