├── .gitignore
├── README.md
└── detection_toolbox
    ├── comet
        └── send_data.py
    ├── kitti
        ├── calibration.py
        ├── eval copy.py
        ├── eval.py
        ├── kitti_label.py
        ├── kitti_object.py
        └── nms.py
    ├── std
        ├── __init__.py
        ├── log.py
        └── os.py
    ├── utils_3d
        └── utils.py
    └── vis
        ├── vis2d.py
        └── vis3d.py


/.gitignore:
--------------------------------------------------------------------------------
1 | run/
2 | *pyc
3 | *ipynb
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This is a util toolbox that I'm slowly building up.
2 | A lot of it's geared to 2d/3d detection and the KITTI dataset, but it includes some stuff interfacing with comet and random linux utilities I've had to use.
3 | 
4 | Goal of this util repo is to have a store of functions that are versatile.
5 | As such, I am trying my hardest to keep all the functions/files free of individual use cases.
6 | Individual use cases will be limited to run/, which will be gitignored.
7 | 
8 | 


--------------------------------------------------------------------------------
/detection_toolbox/comet/send_data.py:
--------------------------------------------------------------------------------
 1 | from comet_ml import Experiment
 2 | import time
 3 | import os
 4 | 
 5 | '''
 6 | This file sends the contents of a log file (that some other process constantly updates with more info), 
 7 | and uploads it to comet output.
 8 | Inputs:
 9 |     project_name: Project name on comet 
10 |     exp_name: Experiment name on comet
11 |     log_file_path: Log file to keep track of/send
12 |     refresh_rate: Time between checks of log_file to see if anything changed. (seconds)
13 | '''
14 | def log_file_to_comet_output(
15 |     project_name,
16 |     exp_name,
17 |     log_file_path,
18 |     refresh_rate
19 | ):
20 |     experiment = Experiment(
21 |         api_key = os.environ["COMET_API_KEY"],
22 |         project_name = project_name
23 |     )
24 |     experiment.set_name(exp_name)
25 | 
26 |     with open(log_file_path, "r") as f:
27 |         while True:
28 |             print(f.read(), end='')
29 |             time.sleep(refresh_rate)


--------------------------------------------------------------------------------
/detection_toolbox/kitti/calibration.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Calibration(object):
  4 |     def __init__(self, calib_file_path):
  5 |         self._read_calib_from_file(calib_file_path)
  6 | 
  7 | 
  8 |     def _read_calib_from_file(self, calib_file_path):
  9 |         lines = open(calib_file_path, "r").readlines()
 10 |         for line in lines:
 11 |             line = line.strip()
 12 |             if line == '':
 13 |                 continue
 14 |             key, val = line.split(":", 1)
 15 |             val = np.array(val.strip().split(" "), dtype=np.float32)
 16 | 
 17 |             if "P" == key[0]:
 18 |                 setattr(self, key, val.reshape(3, 4))
 19 |             elif key == "Tr_velo_to_p2" or key == "Tr_velo_to_cam":
 20 |                 self.V2C = val.reshape(3, 4)
 21 |                 self.C2V = inverse_rigid_trans(self.V2C)
 22 |             elif key == "Tr_imu_to_velo":
 23 |                 self.I2V = val.reshape(3, 4)
 24 |             elif key == "R0_rect":
 25 |                 self.R0 = val.reshape(3, 3)
 26 |             else:
 27 |                 raise Exception("Undefined key in calib file: {}, {}".format(key, calib_file_path))
 28 | 
 29 |     def cart2hom(self, pts_3d):
 30 |         ''' Input: nx3 points in Cartesian
 31 |             Oupput: nx4 points in Homogeneous by pending 1
 32 |         '''
 33 |         n = pts_3d.shape[0]
 34 |         pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
 35 |         return pts_3d_hom
 36 |  
 37 |     # =========================== 
 38 |     # ------- 3d to 3d ---------- 
 39 |     # =========================== 
 40 |     def project_velo_to_ref(self, pts_3d_velo):
 41 |         pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
 42 |         return np.dot(pts_3d_velo, np.transpose(self.V2C))
 43 | 
 44 |     def project_ref_to_velo(self, pts_3d_ref):
 45 |         pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
 46 |         return np.dot(pts_3d_ref, self.C2V.T)
 47 | 
 48 |     def project_rect_to_ref(self, pts_3d_rect):
 49 |         ''' Input and Output are nx3 points '''
 50 |         return np.dot(np.linalg.inv(self.R0), pts_3d_rect.T).T
 51 |     
 52 |     def project_ref_to_rect(self, pts_3d_ref):
 53 |         ''' Input and Output are nx3 points '''
 54 |         return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
 55 |  
 56 |     def project_rect_to_velo(self, pts_3d_rect):
 57 |         ''' Input: nx3 points in rect camera coord.
 58 |             Output: nx3 points in velodyne coord.
 59 |         ''' 
 60 |         pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
 61 |         return self.project_ref_to_velo(pts_3d_ref)
 62 | 
 63 |     def project_velo_to_rect(self, pts_3d_velo):
 64 |         pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
 65 |         return self.project_ref_to_rect(pts_3d_ref)
 66 | 
 67 |     # =========================== 
 68 |     # ------- 3d to 2d ---------- 
 69 |     # =========================== 
 70 |     def project_rect_to_image(self, pts_3d_rect, view):
 71 |         ''' Input: nx3 points in rect camera coord.
 72 |             Output: nx2 points in image2 coord.
 73 |         '''
 74 |         pts_3d_rect = self.cart2hom(pts_3d_rect)
 75 |         pts_2d = np.dot(pts_3d_rect, np.transpose(getattr(self, "P" + str(view)))) # nx3
 76 |         pts_2d[:,0] /= pts_2d[:,2]
 77 |         pts_2d[:,1] /= pts_2d[:,2]
 78 |         return pts_2d[:,0:2]
 79 |     
 80 |     def project_velo_to_image(self, pts_3d_velo, view):
 81 |         ''' Input: nx3 points in velodyne coord.
 82 |             Output: nx2 points in image2 coord.
 83 |         '''
 84 |         pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
 85 |         return self.project_rect_to_image(pts_3d_rect)
 86 | 
 87 |     # # =========================== 
 88 |     # # ------- 2d to 3d ---------- 
 89 |     # # =========================== 
 90 |     # def project_image_to_rect(self, uv_depth):
 91 |     #     ''' Input: nx3 first two channels are uv, 3rd channel
 92 |     #                is depth in rect camera coord.
 93 |     #         Output: nx3 points in rect camera coord.
 94 |     #     '''
 95 |     #     n = uv_depth.shape[0]
 96 |     #     x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
 97 |     #     y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
 98 |     #     pts_3d_rect = np.zeros((n,3))
 99 |     #     pts_3d_rect[:,0] = x
100 |     #     pts_3d_rect[:,1] = y
101 |     #     pts_3d_rect[:,2] = uv_depth[:,2]
102 |     #     return pts_3d_rect
103 |     
104 | 
105 |     # #! From Xinshuo's file
106 |     # def img_to_rect(self, u, v, depth_rect):
107 |     #     """
108 |     #     :param u: (N)
109 |     #     :param v: (N)
110 |     #     :param depth_rect: (N)
111 |     #     :return:
112 |     #     """
113 | 
114 |     #     # split the extrinsics from the projection matrix
115 |     #     proj_matrix = self.P.astype('float64')
116 |     #     ref_proj = self.P2.astype('float64')
117 |     #     intrinsics = ref_proj[:, :3]
118 |     #     if self.view == 5: intrinsics[1, 2] = intrinsics[0, 2]
119 |     #     extrinsics = np.matmul(np.linalg.inv(intrinsics), proj_matrix)          # 3 x 4
120 | 
121 |     #     # invert the extrinsics
122 |     #     extrin = np.concatenate((extrinsics, np.array([0, 0, 0, 1]).reshape((1, 4))), axis=0)     # 4 x 4
123 |     #     extrin = np.linalg.inv(extrin)
124 |     #     extrin = extrin[:3, :]
125 |         
126 |     #     # project the points back to the 3D coordinate with respect to P2
127 |     #     data_cam = self.get_intrisics_extrinsics(ref_proj)
128 |     #     x = ((u - data_cam['cu']) * depth_rect) / data_cam['fu']
129 |     #     y = ((v - data_cam['cv']) * depth_rect) / data_cam['fv']
130 |     #     num_pts = x.shape[0]
131 |     #     pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), \
132 |     #         depth_rect.reshape(-1, 1), np.ones((num_pts, 1), dtype='float64')), axis=1)         # N x 4
133 | 
134 |     #     # rotate and translate to the 3D coordinate with respect to any camera
135 |     #     pts_rect = np.matmul(pts_rect, extrin.transpose())
136 | 
137 |     #     return pts_rect
138 |     
139 |     # #! From Xinshuo's file
140 |     # #! Changed: box specifies coordinates of passed depth_map in original image
141 |     # #! used for when we took only a 2dbbox part of the image out
142 |     # def depthmap_to_rect(self, depth_map, segmap=None, box=None, depth_limit=120):
143 |     #     """
144 |     #     :param depth_map: (H, W), depth_map
145 |     #     :return:
146 |     #     """
147 |     #     if box is not None:
148 |     #         xmin, ymin = box
149 |     #     else:
150 |     #         xmin = 0
151 |     #         ymin = 0
152 |         
153 |     #     x_range = np.arange(0, depth_map.shape[1])
154 |     #     y_range = np.arange(0, depth_map.shape[0])
155 | 
156 |     #     x_idxs, y_idxs = np.meshgrid(x_range, y_range)
157 |     #     x_idxs, y_idxs = x_idxs.reshape(-1), y_idxs.reshape(-1)
158 |     #     depth = depth_map[y_idxs, x_idxs]
159 | 
160 |     #     # remove the depth point which does not reflect back and has the maximum depth range
161 |     #     valid_index = np.where(depth < depth_limit)[0].tolist()
162 |     #     x_idxs, y_idxs, depth = x_idxs[valid_index], y_idxs[valid_index], depth[valid_index]
163 | 
164 |     #     x_idxs += xmin
165 |     #     y_idxs += ymin #! Scale to proper positio in original image
166 | 
167 |     #     pts_rect = self.img_to_rect(x_idxs, y_idxs, depth)
168 |     #     return pts_rect, x_idxs, y_idxs
169 | 
170 |     # def project_image_to_velo(self, uv_depth):
171 |     #     pts_3d_rect = self.project_image_to_rect(uv_depth)
172 |     #     return self.project_rect_to_velo(pts_3d_rect)
173 | 
174 | 
175 | def inverse_rigid_trans(Tr):
176 |     ''' Inverse a rigid body transform matrix (3x4 as [R|t])
177 |         [R'|-R't; 0|1]
178 |     '''
179 |     inv_Tr = np.zeros_like(Tr) # 3x4
180 |     inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
181 |     inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
182 |     return inv_Tr


--------------------------------------------------------------------------------
/detection_toolbox/kitti/eval.py:
--------------------------------------------------------------------------------
   1 | import io as sysio
   2 | import os
   3 | import time
   4 | 
   5 | import numba
   6 | import numpy as np
   7 | from scipy.interpolate import interp1d
   8 | 
   9 | # from second.core.non_max_suppression.nms_gpu import rotate_iou_gpu_eval
  10 | import math
  11 | from pathlib import Path
  12 | 
  13 | import numba
  14 | import numpy as np
  15 | from numba import cuda
  16 | from detection_toolbox.std import dprint
  17 | import time
  18 | 
  19 | 
  20 | @cuda.jit(device=True, inline=True)
  21 | def iou_device(a, b):
  22 |     left = max(a[0], b[0])
  23 |     right = min(a[2], b[2])
  24 |     top = max(a[1], b[1])
  25 |     bottom = min(a[3], b[3])
  26 |     width = max(right - left + 1, 0.)
  27 |     height = max(bottom - top + 1, 0.)
  28 |     interS = width * height
  29 |     Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1)
  30 |     Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1)
  31 |     return interS / (Sa + Sb - interS)
  32 | 
  33 | 
  34 | @cuda.jit()
  35 | def nms_kernel_v2(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask):
  36 |     threadsPerBlock = 8 * 8
  37 |     row_start = cuda.blockIdx.y
  38 |     col_start = cuda.blockIdx.x
  39 |     tx = cuda.threadIdx.x
  40 |     row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock)
  41 |     col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock)
  42 |     block_boxes = cuda.shared.array(
  43 |         shape=(threadsPerBlock, 5), dtype=numba.float32)
  44 |     dev_box_idx = threadsPerBlock * col_start + tx
  45 |     if (tx < col_size):
  46 |         block_boxes[tx, 0] = dev_boxes[dev_box_idx, 0]
  47 |         block_boxes[tx, 1] = dev_boxes[dev_box_idx, 1]
  48 |         block_boxes[tx, 2] = dev_boxes[dev_box_idx, 2]
  49 |         block_boxes[tx, 3] = dev_boxes[dev_box_idx, 3]
  50 |         block_boxes[tx, 4] = dev_boxes[dev_box_idx, 4]
  51 |     cuda.syncthreads()
  52 |     if (cuda.threadIdx.x < row_size):
  53 |         cur_box_idx = threadsPerBlock * row_start + cuda.threadIdx.x
  54 |         # cur_box = dev_boxes + cur_box_idx * 5;
  55 |         i = 0
  56 |         t = 0
  57 |         start = 0
  58 |         if (row_start == col_start):
  59 |             start = tx + 1
  60 |         for i in range(start, col_size):
  61 |             if (iou_device(dev_boxes[cur_box_idx], block_boxes[i]) >
  62 |                     nms_overlap_thresh):
  63 |                 t |= 1 << i
  64 |         col_blocks = ((n_boxes) // (threadsPerBlock) + (
  65 |             (n_boxes) % (threadsPerBlock) > 0))
  66 |         dev_mask[cur_box_idx * col_blocks + col_start] = t
  67 | 
  68 | 
  69 | @cuda.jit()
  70 | def nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask):
  71 |     threadsPerBlock = 8 * 8
  72 |     row_start = cuda.blockIdx.y
  73 |     col_start = cuda.blockIdx.x
  74 |     tx = cuda.threadIdx.x
  75 |     row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock)
  76 |     col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock)
  77 |     block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
  78 |     dev_box_idx = threadsPerBlock * col_start + tx
  79 |     if (tx < col_size):
  80 |         block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]
  81 |         block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]
  82 |         block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]
  83 |         block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]
  84 |         block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]
  85 |     cuda.syncthreads()
  86 |     if (tx < row_size):
  87 |         cur_box_idx = threadsPerBlock * row_start + tx
  88 |         # cur_box = dev_boxes + cur_box_idx * 5;
  89 |         t = 0
  90 |         start = 0
  91 |         if (row_start == col_start):
  92 |             start = tx + 1
  93 |         for i in range(start, col_size):
  94 |             iou = iou_device(dev_boxes[cur_box_idx * 5:cur_box_idx * 5 + 4],
  95 |                              block_boxes[i * 5:i * 5 + 4])
  96 |             if (iou > nms_overlap_thresh):
  97 |                 t |= 1 << i
  98 |         col_blocks = ((n_boxes) // (threadsPerBlock) + (
  99 |             (n_boxes) % (threadsPerBlock) > 0))
 100 |         dev_mask[cur_box_idx * col_blocks + col_start] = t
 101 | 
 102 | 
 103 | @numba.jit(nopython=True)
 104 | def div_up(m, n):
 105 |     return m // n + (m % n > 0)
 106 | 
 107 | 
 108 | @numba.jit(nopython=True)
 109 | def nms_postprocess(keep_out, mask_host, boxes_num):
 110 |     threadsPerBlock = 8 * 8
 111 |     col_blocks = div_up(boxes_num, threadsPerBlock)
 112 |     remv = np.zeros((col_blocks), dtype=np.uint64)
 113 |     num_to_keep = 0
 114 |     for i in range(boxes_num):
 115 |         nblock = i // threadsPerBlock
 116 |         inblock = i % threadsPerBlock
 117 |         mask = np.array(1 << inblock, dtype=np.uint64)
 118 |         if not (remv[nblock] & mask):
 119 |             keep_out[num_to_keep] = i
 120 |             num_to_keep += 1
 121 |             # unsigned long long *p = &mask_host[0] + i * col_blocks;
 122 |             for j in range(nblock, col_blocks):
 123 |                 remv[j] |= mask_host[i * col_blocks + j]
 124 |                 # remv[j] |= p[j];
 125 |     return num_to_keep
 126 | 
 127 | 
 128 | def nms_gpu(dets, nms_overlap_thresh, device_id=0):
 129 |     """nms in gpu. 
 130 |     
 131 |     Args:
 132 |         dets ([type]): [description]
 133 |         nms_overlap_thresh ([type]): [description]
 134 |         device_id ([type], optional): Defaults to 0. [description]
 135 |     
 136 |     Returns:
 137 |         [type]: [description]
 138 |     """
 139 | 
 140 |     boxes_num = dets.shape[0]
 141 |     keep_out = np.zeros([boxes_num], dtype=np.int32)
 142 |     scores = dets[:, 4]
 143 |     order = scores.argsort()[::-1].astype(np.int32)
 144 |     boxes_host = dets[order, :]
 145 | 
 146 |     threadsPerBlock = 8 * 8
 147 |     col_blocks = div_up(boxes_num, threadsPerBlock)
 148 |     cuda.select_device(device_id)
 149 |     mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64)
 150 |     blockspergrid = (div_up(boxes_num, threadsPerBlock),
 151 |                      div_up(boxes_num, threadsPerBlock))
 152 |     stream = cuda.stream()
 153 |     with stream.auto_synchronize():
 154 |         boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream)
 155 |         mask_dev = cuda.to_device(mask_host, stream)
 156 |         nms_kernel[blockspergrid, threadsPerBlock, stream](
 157 |             boxes_num, nms_overlap_thresh, boxes_dev, mask_dev)
 158 |         mask_dev.copy_to_host(mask_host, stream=stream)
 159 |     # stream.synchronize()
 160 |     num_out = nms_postprocess(keep_out, mask_host, boxes_num)
 161 |     keep = keep_out[:num_out]
 162 |     return list(order[keep])
 163 | 
 164 | 
 165 | @cuda.jit(device=True, inline=True)
 166 | def trangle_area(a, b, c):
 167 |     return (
 168 |         (a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0])) / 2.0
 169 | 
 170 | 
 171 | @cuda.jit(device=True, inline=True)
 172 | def area(int_pts, num_of_inter):
 173 |     area_val = 0.0
 174 |     for i in range(num_of_inter - 2):
 175 |         area_val += abs(
 176 |             trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4],
 177 |                          int_pts[2 * i + 4:2 * i + 6]))
 178 |     return area_val
 179 | 
 180 | 
 181 | @cuda.jit(device=True, inline=True)
 182 | def sort_vertex_in_convex_polygon(int_pts, num_of_inter):
 183 |     if num_of_inter > 0:
 184 |         center = cuda.local.array((2, ), dtype=numba.float32)
 185 |         center[:] = 0.0
 186 |         for i in range(num_of_inter):
 187 |             center[0] += int_pts[2 * i]
 188 |             center[1] += int_pts[2 * i + 1]
 189 |         center[0] /= num_of_inter
 190 |         center[1] /= num_of_inter
 191 |         v = cuda.local.array((2, ), dtype=numba.float32)
 192 |         vs = cuda.local.array((16, ), dtype=numba.float32)
 193 |         for i in range(num_of_inter):
 194 |             v[0] = int_pts[2 * i] - center[0]
 195 |             v[1] = int_pts[2 * i + 1] - center[1]
 196 |             d = math.sqrt(v[0] * v[0] + v[1] * v[1])
 197 |             v[0] = v[0] / d
 198 |             v[1] = v[1] / d
 199 |             if v[1] < 0:
 200 |                 v[0] = -2 - v[0]
 201 |             vs[i] = v[0]
 202 |         j = 0
 203 |         temp = 0
 204 |         for i in range(1, num_of_inter):
 205 |             if vs[i - 1] > vs[i]:
 206 |                 temp = vs[i]
 207 |                 tx = int_pts[2 * i]
 208 |                 ty = int_pts[2 * i + 1]
 209 |                 j = i
 210 |                 while j > 0 and vs[j - 1] > temp:
 211 |                     vs[j] = vs[j - 1]
 212 |                     int_pts[j * 2] = int_pts[j * 2 - 2]
 213 |                     int_pts[j * 2 + 1] = int_pts[j * 2 - 1]
 214 |                     j -= 1
 215 | 
 216 |                 vs[j] = temp
 217 |                 int_pts[j * 2] = tx
 218 |                 int_pts[j * 2 + 1] = ty
 219 | 
 220 | 
 221 | @cuda.jit(
 222 |     device=True,
 223 |     inline=True)
 224 | def line_segment_intersection(pts1, pts2, i, j, temp_pts):
 225 |     A = cuda.local.array((2, ), dtype=numba.float32)
 226 |     B = cuda.local.array((2, ), dtype=numba.float32)
 227 |     C = cuda.local.array((2, ), dtype=numba.float32)
 228 |     D = cuda.local.array((2, ), dtype=numba.float32)
 229 | 
 230 |     A[0] = pts1[2 * i]
 231 |     A[1] = pts1[2 * i + 1]
 232 | 
 233 |     B[0] = pts1[2 * ((i + 1) % 4)]
 234 |     B[1] = pts1[2 * ((i + 1) % 4) + 1]
 235 | 
 236 |     C[0] = pts2[2 * j]
 237 |     C[1] = pts2[2 * j + 1]
 238 | 
 239 |     D[0] = pts2[2 * ((j + 1) % 4)]
 240 |     D[1] = pts2[2 * ((j + 1) % 4) + 1]
 241 |     BA0 = B[0] - A[0]
 242 |     BA1 = B[1] - A[1]
 243 |     DA0 = D[0] - A[0]
 244 |     CA0 = C[0] - A[0]
 245 |     DA1 = D[1] - A[1]
 246 |     CA1 = C[1] - A[1]
 247 |     acd = DA1 * CA0 > CA1 * DA0
 248 |     bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0])
 249 |     if acd != bcd:
 250 |         abc = CA1 * BA0 > BA1 * CA0
 251 |         abd = DA1 * BA0 > BA1 * DA0
 252 |         if abc != abd:
 253 |             DC0 = D[0] - C[0]
 254 |             DC1 = D[1] - C[1]
 255 |             ABBA = A[0] * B[1] - B[0] * A[1]
 256 |             CDDC = C[0] * D[1] - D[0] * C[1]
 257 |             DH = BA1 * DC0 - BA0 * DC1
 258 |             Dx = ABBA * DC0 - BA0 * CDDC
 259 |             Dy = ABBA * DC1 - BA1 * CDDC
 260 |             temp_pts[0] = Dx / DH
 261 |             temp_pts[1] = Dy / DH
 262 |             return True
 263 |     return False
 264 | 
 265 | 
 266 | @cuda.jit(
 267 |     device=True,
 268 |     inline=True)
 269 | def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts):
 270 |     a = cuda.local.array((2, ), dtype=numba.float32)
 271 |     b = cuda.local.array((2, ), dtype=numba.float32)
 272 |     c = cuda.local.array((2, ), dtype=numba.float32)
 273 |     d = cuda.local.array((2, ), dtype=numba.float32)
 274 | 
 275 |     a[0] = pts1[2 * i]
 276 |     a[1] = pts1[2 * i + 1]
 277 | 
 278 |     b[0] = pts1[2 * ((i + 1) % 4)]
 279 |     b[1] = pts1[2 * ((i + 1) % 4) + 1]
 280 | 
 281 |     c[0] = pts2[2 * j]
 282 |     c[1] = pts2[2 * j + 1]
 283 | 
 284 |     d[0] = pts2[2 * ((j + 1) % 4)]
 285 |     d[1] = pts2[2 * ((j + 1) % 4) + 1]
 286 | 
 287 |     area_abc = trangle_area(a, b, c)
 288 |     area_abd = trangle_area(a, b, d)
 289 | 
 290 |     if area_abc * area_abd >= 0:
 291 |         return False
 292 | 
 293 |     area_cda = trangle_area(c, d, a)
 294 |     area_cdb = area_cda + area_abc - area_abd
 295 | 
 296 |     if area_cda * area_cdb >= 0:
 297 |         return False
 298 |     t = area_cda / (area_abd - area_abc)
 299 | 
 300 |     dx = t * (b[0] - a[0])
 301 |     dy = t * (b[1] - a[1])
 302 |     temp_pts[0] = a[0] + dx
 303 |     temp_pts[1] = a[1] + dy
 304 |     return True
 305 | 
 306 | 
 307 | @cuda.jit(device=True, inline=True)
 308 | def point_in_quadrilateral(pt_x, pt_y, corners):
 309 |     ab0 = corners[2] - corners[0]
 310 |     ab1 = corners[3] - corners[1]
 311 | 
 312 |     ad0 = corners[6] - corners[0]
 313 |     ad1 = corners[7] - corners[1]
 314 | 
 315 |     ap0 = pt_x - corners[0]
 316 |     ap1 = pt_y - corners[1]
 317 | 
 318 |     abab = ab0 * ab0 + ab1 * ab1
 319 |     abap = ab0 * ap0 + ab1 * ap1
 320 |     adad = ad0 * ad0 + ad1 * ad1
 321 |     adap = ad0 * ap0 + ad1 * ap1
 322 | 
 323 |     eps = -1e-6
 324 |     return abab - abap >= eps and abap >= eps and adad - adap >= eps and adap >= eps
 325 |     
 326 | 
 327 | 
 328 | @cuda.jit(device=True, inline=True)
 329 | def quadrilateral_intersection(pts1, pts2, int_pts):
 330 |     num_of_inter = 0
 331 |     for i in range(4):
 332 |         if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
 333 |             int_pts[num_of_inter * 2] = pts1[2 * i]
 334 |             int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
 335 |             num_of_inter += 1
 336 |         if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
 337 |             int_pts[num_of_inter * 2] = pts2[2 * i]
 338 |             int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
 339 |             num_of_inter += 1
 340 |     temp_pts = cuda.local.array((2, ), dtype=numba.float32)
 341 |     for i in range(4):
 342 |         for j in range(4):
 343 |             has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
 344 |             if has_pts:
 345 |                 int_pts[num_of_inter * 2] = temp_pts[0]
 346 |                 int_pts[num_of_inter * 2 + 1] = temp_pts[1]
 347 |                 num_of_inter += 1
 348 | 
 349 |     return num_of_inter
 350 | 
 351 | 
 352 | @cuda.jit(device=True, inline=True)
 353 | def rbbox_to_corners(corners, rbbox):
 354 |     # generate clockwise corners and rotate it clockwise
 355 |     angle = rbbox[4]
 356 |     a_cos = math.cos(angle)
 357 |     a_sin = math.sin(angle)
 358 |     center_x = rbbox[0]
 359 |     center_y = rbbox[1]
 360 |     x_d = rbbox[2]
 361 |     y_d = rbbox[3]
 362 |     corners_x = cuda.local.array((4, ), dtype=numba.float32)
 363 |     corners_y = cuda.local.array((4, ), dtype=numba.float32)
 364 |     corners_x[0] = -x_d / 2
 365 |     corners_x[1] = -x_d / 2
 366 |     corners_x[2] = x_d / 2
 367 |     corners_x[3] = x_d / 2
 368 |     corners_y[0] = -y_d / 2
 369 |     corners_y[1] = y_d / 2
 370 |     corners_y[2] = y_d / 2
 371 |     corners_y[3] = -y_d / 2
 372 |     for i in range(4):
 373 |         corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
 374 |         corners[2 * i +
 375 |                 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
 376 | 
 377 | 
 378 | @cuda.jit(device=True, inline=True)
 379 | def inter(rbbox1, rbbox2):
 380 |     corners1 = cuda.local.array((8, ), dtype=numba.float32)
 381 |     corners2 = cuda.local.array((8, ), dtype=numba.float32)
 382 |     intersection_corners = cuda.local.array((16, ), dtype=numba.float32)
 383 | 
 384 |     rbbox_to_corners(corners1, rbbox1)
 385 |     rbbox_to_corners(corners2, rbbox2)
 386 | 
 387 |     num_intersection = quadrilateral_intersection(corners1, corners2,
 388 |                                                   intersection_corners)
 389 |     sort_vertex_in_convex_polygon(intersection_corners, num_intersection)
 390 |     # print(intersection_corners.reshape([-1, 2])[:num_intersection])
 391 | 
 392 |     return area(intersection_corners, num_intersection)
 393 | 
 394 | 
 395 | @cuda.jit(device=True, inline=True)
 396 | def devRotateIoU(rbox1, rbox2):
 397 |     area1 = rbox1[2] * rbox1[3]
 398 |     area2 = rbox2[2] * rbox2[3]
 399 |     area_inter = inter(rbox1, rbox2)
 400 |     return area_inter / (area1 + area2 - area_inter)
 401 | 
 402 | 
 403 | @cuda.jit()
 404 | def rotate_nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask):
 405 |     threadsPerBlock = 8 * 8
 406 |     row_start = cuda.blockIdx.y
 407 |     col_start = cuda.blockIdx.x
 408 |     tx = cuda.threadIdx.x
 409 |     row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock)
 410 |     col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock)
 411 |     block_boxes = cuda.shared.array(shape=(64 * 6, ), dtype=numba.float32)
 412 |     dev_box_idx = threadsPerBlock * col_start + tx
 413 |     if (tx < col_size):
 414 |         block_boxes[tx * 6 + 0] = dev_boxes[dev_box_idx * 6 + 0]
 415 |         block_boxes[tx * 6 + 1] = dev_boxes[dev_box_idx * 6 + 1]
 416 |         block_boxes[tx * 6 + 2] = dev_boxes[dev_box_idx * 6 + 2]
 417 |         block_boxes[tx * 6 + 3] = dev_boxes[dev_box_idx * 6 + 3]
 418 |         block_boxes[tx * 6 + 4] = dev_boxes[dev_box_idx * 6 + 4]
 419 |         block_boxes[tx * 6 + 5] = dev_boxes[dev_box_idx * 6 + 5]
 420 |     cuda.syncthreads()
 421 |     if (tx < row_size):
 422 |         cur_box_idx = threadsPerBlock * row_start + tx
 423 |         # cur_box = dev_boxes + cur_box_idx * 5;
 424 |         t = 0
 425 |         start = 0
 426 |         if (row_start == col_start):
 427 |             start = tx + 1
 428 |         for i in range(start, col_size):
 429 |             iou = devRotateIoU(dev_boxes[cur_box_idx * 6:cur_box_idx * 6 + 5],
 430 |                                block_boxes[i * 6:i * 6 + 5])
 431 |             # print('iou', iou, cur_box_idx, i)
 432 |             if (iou > nms_overlap_thresh):
 433 |                 t |= 1 << i
 434 |         col_blocks = ((n_boxes) // (threadsPerBlock) + (
 435 |             (n_boxes) % (threadsPerBlock) > 0))
 436 |         dev_mask[cur_box_idx * col_blocks + col_start] = t
 437 | 
 438 | 
 439 | def rotate_nms_gpu(dets, nms_overlap_thresh, device_id=0):
 440 |     """nms in gpu. WARNING: this function can provide right result 
 441 |     but its performance isn't be tested
 442 |     
 443 |     Args:
 444 |         dets ([type]): [description]
 445 |         nms_overlap_thresh ([type]): [description]
 446 |         device_id ([type], optional): Defaults to 0. [description]
 447 |     
 448 |     Returns:
 449 |         [type]: [description]
 450 |     """
 451 |     dets = dets.astype(np.float32)
 452 |     boxes_num = dets.shape[0]
 453 |     keep_out = np.zeros([boxes_num], dtype=np.int32)
 454 |     scores = dets[:, 5]
 455 |     order = scores.argsort()[::-1].astype(np.int32)
 456 |     boxes_host = dets[order, :]
 457 | 
 458 |     threadsPerBlock = 8 * 8
 459 |     col_blocks = div_up(boxes_num, threadsPerBlock)
 460 |     cuda.select_device(device_id)
 461 |     # mask_host shape: boxes_num * col_blocks * sizeof(np.uint64)
 462 |     mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64)
 463 |     blockspergrid = (div_up(boxes_num, threadsPerBlock),
 464 |                      div_up(boxes_num, threadsPerBlock))
 465 |     stream = cuda.stream()
 466 |     with stream.auto_synchronize():
 467 |         boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream)
 468 |         mask_dev = cuda.to_device(mask_host, stream)
 469 |         rotate_nms_kernel[blockspergrid, threadsPerBlock, stream](
 470 |             boxes_num, nms_overlap_thresh, boxes_dev, mask_dev)
 471 |         mask_dev.copy_to_host(mask_host, stream=stream)
 472 |     num_out = nms_postprocess(keep_out, mask_host, boxes_num)
 473 |     keep = keep_out[:num_out]
 474 |     return list(order[keep])
 475 | 
 476 | 
 477 | @cuda.jit('(int64, int64, float32[:], float32[:], float32[:])', fastmath=False)
 478 | def rotate_iou_kernel(N, K, dev_boxes, dev_query_boxes, dev_iou):
 479 |     threadsPerBlock = 8 * 8
 480 |     row_start = cuda.blockIdx.x
 481 |     col_start = cuda.blockIdx.y
 482 |     tx = cuda.threadIdx.x
 483 |     row_size = min(N - row_start * threadsPerBlock, threadsPerBlock)
 484 |     col_size = min(K - col_start * threadsPerBlock, threadsPerBlock)
 485 |     block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
 486 |     block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
 487 | 
 488 |     dev_query_box_idx = threadsPerBlock * col_start + tx
 489 |     dev_box_idx = threadsPerBlock * row_start + tx
 490 |     if (tx < col_size):
 491 |         block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0]
 492 |         block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1]
 493 |         block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2]
 494 |         block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3]
 495 |         block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4]
 496 |     if (tx < row_size):
 497 |         block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]
 498 |         block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]
 499 |         block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]
 500 |         block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]
 501 |         block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]
 502 |     cuda.syncthreads()
 503 |     if tx < row_size:
 504 |         for i in range(col_size):
 505 |             offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i
 506 |             dev_iou[offset] = devRotateIoU(block_qboxes[i * 5:i * 5 + 5],
 507 |                                            block_boxes[tx * 5:tx * 5 + 5])
 508 | 
 509 | 
 510 | def rotate_iou_gpu(boxes, query_boxes, device_id=0):
 511 |     """rotated box iou running in gpu. 500x faster than cpu version
 512 |     (take 5ms in one example with numba.cuda code).
 513 |     convert from [this project](
 514 |         https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
 515 |     
 516 |     Args:
 517 |         boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, 
 518 |             angles(clockwise when positive)
 519 |         query_boxes (float tensor: [K, 5]): [description]
 520 |         device_id (int, optional): Defaults to 0. [description]
 521 |     
 522 |     Returns:
 523 |         [type]: [description]
 524 |     """
 525 |     box_dtype = boxes.dtype
 526 |     boxes = boxes.astype(np.float32)
 527 |     query_boxes = query_boxes.astype(np.float32)
 528 |     N = boxes.shape[0]
 529 |     K = query_boxes.shape[0]
 530 |     iou = np.zeros((N, K), dtype=np.float32)
 531 |     if N == 0 or K == 0:
 532 |         return iou
 533 |     threadsPerBlock = 8 * 8
 534 |     cuda.select_device(device_id)
 535 |     blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
 536 | 
 537 |     stream = cuda.stream()
 538 |     with stream.auto_synchronize():
 539 |         boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
 540 |         query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
 541 |         iou_dev = cuda.to_device(iou.reshape([-1]), stream)
 542 |         rotate_iou_kernel[blockspergrid, threadsPerBlock, stream](
 543 |             N, K, boxes_dev, query_boxes_dev, iou_dev)
 544 |         iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
 545 |     return iou.astype(boxes.dtype)
 546 | 
 547 | 
 548 | @cuda.jit('(float32[:], float32[:], int32)', device=True, inline=True)
 549 | def devRotateIoUEval(rbox1, rbox2, criterion=-1):
 550 |     area1 = rbox1[2] * rbox1[3]
 551 |     area2 = rbox2[2] * rbox2[3]
 552 |     area_inter = inter(rbox1, rbox2)
 553 |     if criterion == -1:
 554 |         return area_inter / (area1 + area2 - area_inter)
 555 |     elif criterion == 0:
 556 |         return area_inter / area1
 557 |     elif criterion == 1:
 558 |         return area_inter / area2
 559 |     else:
 560 |         return area_inter
 561 | 
 562 | 
 563 | @cuda.jit(
 564 |     '(int64, int64, float32[:], float32[:], float32[:], int32)',
 565 |     fastmath=False)
 566 | def rotate_iou_kernel_eval(N,
 567 |                            K,
 568 |                            dev_boxes,
 569 |                            dev_query_boxes,
 570 |                            dev_iou,
 571 |                            criterion=-1):
 572 |     threadsPerBlock = 8 * 8
 573 |     row_start = cuda.blockIdx.x
 574 |     col_start = cuda.blockIdx.y
 575 |     tx = cuda.threadIdx.x
 576 |     row_size = min(N - row_start * threadsPerBlock, threadsPerBlock)
 577 |     col_size = min(K - col_start * threadsPerBlock, threadsPerBlock)
 578 |     block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
 579 |     block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
 580 | 
 581 |     dev_query_box_idx = threadsPerBlock * col_start + tx
 582 |     dev_box_idx = threadsPerBlock * row_start + tx
 583 |     if (tx < col_size):
 584 |         block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0]
 585 |         block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1]
 586 |         block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2]
 587 |         block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3]
 588 |         block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4]
 589 |     if (tx < row_size):
 590 |         block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]
 591 |         block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]
 592 |         block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]
 593 |         block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]
 594 |         block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]
 595 |     cuda.syncthreads()
 596 |     if tx < row_size:
 597 |         for i in range(col_size):
 598 |             offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i
 599 |             dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5],
 600 |                                                block_boxes[tx * 5:tx * 5 + 5],
 601 |                                                criterion)
 602 | 
 603 | 
 604 | def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
 605 |     """rotated box iou running in gpu. 8x faster than cpu version
 606 |     (take 5ms in one example with numba.cuda code).
 607 |     convert from [this project](
 608 |         https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
 609 |     
 610 |     Args:
 611 |         boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, 
 612 |             angles(clockwise when positive)
 613 |         query_boxes (float tensor: [K, 5]): [description]
 614 |         device_id (int, optional): Defaults to 0. [description]
 615 |     
 616 |     Returns:
 617 |         [type]: [description]
 618 |     """
 619 |     box_dtype = boxes.dtype
 620 |     boxes = boxes.astype(np.float32)
 621 |     query_boxes = query_boxes.astype(np.float32)
 622 |     N = boxes.shape[0]
 623 |     K = query_boxes.shape[0]
 624 |     iou = np.zeros((N, K), dtype=np.float32)
 625 |     if N == 0 or K == 0:
 626 |         return iou
 627 |     threadsPerBlock = 8 * 8
 628 |     cuda.select_device(device_id)
 629 |     blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
 630 | 
 631 |     stream = cuda.stream()
 632 |     with stream.auto_synchronize():
 633 |         boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
 634 |         query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
 635 |         iou_dev = cuda.to_device(iou.reshape([-1]), stream)
 636 |         rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
 637 |             N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
 638 |         iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
 639 |     return iou.astype(boxes.dtype)
 640 | 
 641 | 
 642 | def get_mAP(prec):
 643 |     sums = 0
 644 |     for i in range(0, len(prec), 4):
 645 |         sums += prec[i]
 646 |     return sums / 11 * 100
 647 | 
 648 | #! scores is a 1d array of scores of matched dts.
 649 | #! num_gt is the total number of valid gt boxes in the dataset
 650 | #? Honestly, i'm not sure. It looks like it divies the space of scores into num_sample_pts parts
 651 | #? And returns the scores at each of the parts as the thresholds.
 652 | #? The scores I think are in decreasing order.
 653 | #? So it's not necessarily that the returned thresholds are 1.0, 0.9, 0.8, ... 0 if um_sample_pts = 11
 654 | #? But it's scores[len(scores) * 0 / 10], scores[len(scores) * 1 /10], .... i think...
 655 | #? Well all that really matters is that the thresholds returns at the end are length num_sample pts sorted decreasing
 656 | #! Okay, so it's finding the score thresholds that correspond to the 41 recall positions.
 657 | '''
 658 | Here's a walk through.
 659 | First, note that the big if statement w/ continue triggers if the current recall is closer to right recall
 660 | than left recall. So, if current recall is closer to LEFT recall, we store the left recall's corresponding 
 661 | threshold. Why? Because it means we find the threshold corresponding best to the current recall position.
 662 | 
 663 | Better general mental image? Think of a p/r curve, y-axis p, x-axis r. We're trying to find the area under 
 664 | the curve. Since we can't take the integral directly, we approximate it. We actually use right riemann sum
 665 | here. we ignore curr_recall = 0, since we don't use it to calculate final mAP anyway. Now what is the prec.
 666 | corr. to 1/40 recall? Well we need a way to filter the detections by finding a threshold that causes 1/40
 667 | recall. So we go through the for loop, and we append a threshold that does so, and so on.
 668 | Lastly, to compute the actual map, we add up 1/40 to 40/40 recall positions and divide by 40. Visually,
 669 | think of this as adding up the areas of the boxes, with top right corner on the curve at 1/40 to 40/40 recall
 670 | positions, and the boxes have width 1/40. 
 671 | 
 672 | Now what if we fed in ground truth boxes as the predictions, all with score 1?
 673 | Then the thresholds array would be entirely populated with 1. Then since precision at every threshold is 1,
 674 | the overall score would be 1 (100%).
 675 | 
 676 | Now why is this the metric everyone uses? What is the significance of a p/r curve? 
 677 | An alternative would be to directly toss out "score" and compute recall. However, then there is no measure
 678 | of how many false positives we have. Then we could report precision as well. Then we run into "What is a good
 679 | balance of precision & recall scores?" We could ensure precision = 1 but recall might be 0, and vice versa.
 680 | Then the idea of "well, since p & r are correlated to some extent, we can graph it. What accounts for different
 681 | combinations of p & r values? p & r values change based on filtering detections, and the answer ends up being
 682 | assigning a "confidence" to each detection. And as such, wehave the p/r curve.
 683 | 
 684 | How to interpret a p/r curve?
 685 | An idea p/r curve would just be p = 1 for all recall values. 
 686 | If the p/r curve drops early (precision drops even for low recall values), this means we have some high-
 687 | confidence false positives. Why? Because low recall <=> high confidence threshold, which should have high
 688 | precision. We can then filter the detections at that threhsold and see what false positives there are.
 689 | Look at recall value where p = 0. Ideally, it should be 1. Otherwise, it means we're always missing some 
 690 | ground truth (or they're being assigned super low confidence, so they are lost amongst low-confidence false 
 691 | positives).
 692 | Odd case: the right side of the curve slowly starts going down, but very suddenly drops to p = 0. (as in if
 693 | you were to continue drawing the curve, it feels like it'll go on for a little longer, but it doesn't). Then
 694 | there is a chance that there is a confidence threshold filtering of the detections done, with a too-aggressive
 695 | threshold clipping out potentially good detections.
 696 | 
 697 | How is p/r curve affected by manually truncation of confidence? Aka, filter out all detections <0.3 confidence.
 698 | a) This frankly only affects the right-most end of the curve, because the left side corresponds to higher
 699 |     confidence detections only.
 700 | b) Will throwing in a bunch of low-confidence detections increase performance? Yup.
 701 |     Case 1) The detector is missing a bunch of boxes entirely. Let's say 10%.
 702 |         Then, the rightmost-part flatlines at p = 0. Why? When presented the question "What is the precision
 703 |         when recall is 0.95?", the response is "well we can't achieve recall 0.95, so I guess precision would
 704 |         be 0." So, you get penalized.
 705 |         Now here, if we toss in a bunch of low-confidence detections, the detector might miss fewer boxes.
 706 |         Let's say it misses 3% now. When presented with the same question, the response is "well we can now
 707 |         achieve recall 0.95, so precision isn't automatically 0. But in achieving this, we got a lot of
 708 |         false positives that were rightly assigned low confidence, so precision will be quite low here, maybe
 709 |         like 0.1" This is still higher than 0.
 710 |     Case 2) The detector is missing some boxes but not that much. Let's say 3%. 
 711 |         Truth: the current detections all of confidence >= 0.3.
 712 |         Truth: Adding detections < 0.3 confidence WILL NOT change the confidence threshold at recall level 
 713 |         0 ~ 0.97. Looking at the for loop, it won't even look at the low confidence detections until it gets 
 714 |         past 0.97 since scores are sorted in decreasing order, looking like [previous detections, low 
 715 |         confidence detections].
 716 |         By Case 1 argument, if the best recall is increased even just to 0.98, score improves.
 717 |     Case 3) The detector has 100% recall. We have an even simpler argument. This means all the added boxes
 718 |         won't even make it to the "scores" array since len(scores) <= len(num_gt) always, and here, with
 719 |         recall 100%, we have equality. At the harshest score threshold in scores array, all the added
 720 |         detections should be filtered out.
 721 | '''
 722 | @numba.jit
 723 | def get_thresholds(scores: np.ndarray, num_gt, num_sample_pts=41):
 724 |     scores.sort()
 725 |     scores = scores[::-1] #! scores are in decreasing order.
 726 |     current_recall = 0
 727 |     thresholds = []
 728 | 
 729 |     for i, score in enumerate(scores):
 730 |         l_recall = (i + 1) / num_gt
 731 |         if i < (len(scores) - 1):
 732 |             r_recall = (i + 2) / num_gt
 733 |         else:
 734 |             r_recall = l_recall
 735 |         #! I literally have no clue
 736 |         #! if the current recall is closer to right (bigger) recall than left, skip.
 737 |         if (((r_recall - current_recall) < (current_recall - l_recall))
 738 |                 and (i < (len(scores) - 1))): 
 739 |             continue
 740 |         # recall = l_recall
 741 |         thresholds.append(score)
 742 |         current_recall += 1 / (num_sample_pts - 1.0)
 743 |     # print(len(thresholds), len(scores), num_gt)
 744 |     return thresholds
 745 | 
 746 | #! gt_anno is anno for single image
 747 | #! current class is an index
 748 | #! difficulty is 0, 1, or 2
 749 | #? Note that images can be in multiple difficulty groups
 750 | #? I think Easy \subset Moderate \subset Hard
 751 | def clean_data(gt_anno, dt_anno, current_class, difficulty, extra_info_single):
 752 | 
 753 |     gt_extra_info_single, dt_extra_info_single, general_extra_info = extra_info_single
 754 | 
 755 |     MIN_HEIGHT = general_extra_info['MIN_HEIGHT']
 756 |     MAX_OCCLUSION = general_extra_info['MAX_OCCLUSION']
 757 |     MAX_TRUNCATION = general_extra_info['MAX_TRUNCATION']
 758 |     MAX_DISTANCE = general_extra_info['MAX_DISTANCE']
 759 |     MIN_POINTS_THRESHOLD = general_extra_info['MIN_POINTS_THRESHOLD'] #! int
 760 |     CLASS_NAMES = list(map(lambda s: s.lower(), general_extra_info['CLASS_NAMES']))
 761 |     #! Added later in eval.py, no need for user to specify
 762 |     curr_metric = general_extra_info['curr_metric'] #! 0 or 1 or 2
 763 | 
 764 | 
 765 | 
 766 | 
 767 | 
 768 | 
 769 | 
 770 | 
 771 | 
 772 | 
 773 | 
 774 |     # CLASS_NAMES = [
 775 |     #     'car', 'pedestrian', 'cyclist', 'van', 'person_sitting', 'car',
 776 |     #     'tractor', 'trailer'
 777 |     # ]
 778 |     # if os.environ["KITTI_EVAL_CHANGES"] == "0":
 779 |     #     MIN_HEIGHT = [40, 25, 25]
 780 |     #     MAX_OCCLUSION = [0, 1, 2]
 781 |     #     MAX_TRUNCATION = [0.15, 0.3, 0.5]
 782 |     
 783 |     # elif os.environ["KITTI_EVAL_CHANGES"] == "1":
 784 |     #     MAX_TRUNCATION = [0.99, 0.99, 0.99] # filter out stuff bigger than this.
 785 |     #     #! This is mostly to filter out all the truncation = 1s
 786 |     #     #! Note that there are indeed some things that have truncation < 1 and occlusion = 1. So, occlusion threshold
 787 |     #     #! starts at 0.99 to get rid of all fully occluded things (in diagnostics.py)
 788 |     #     #? For sanity, when we had occlusion = 0 and integer occlusions we ended up removing all the things with occlusion = 1
 789 | 
 790 | 
 791 |     #     if "," in os.environ["KITTI_EVAL_MIN_HEIGHT"]: #! if we passed in something like 40,20,0
 792 |     #         split = os.environ["KITTI_EVAL_MIN_HEIGHT"].split(",")
 793 |     #         MIN_HEIGHT = [int(s) for s in split]
 794 |     #         assert len(MIN_HEIGHT) == 3
 795 |     #     else: #! otherwise, just use a single value
 796 |     #         MIN_HEIGHT = [int(os.environ["KITTI_EVAL_MIN_HEIGHT"])] * 3
 797 |         
 798 |     #     max_occlusion = float(os.environ["KITTI_EVAL_MAX_OCCLUSION"])
 799 |     #     MAX_OCCLUSION = [max_occlusion] * 3
 800 | 
 801 |     # #! Special: includes max distance
 802 |     # elif os.environ["KITTI_EVAL_CHANGES"] == "2":
 803 |     #     # MAX_TRUNCATION = [0.99, 0.99, 0.99] # filter out stuff bigger than this.
 804 |     #     split = os.environ["KITTI_EVAL_MAX_TRUNCATION"].split(",")
 805 |     #     MAX_TRUNCATION = [float(s) for s in split]
 806 |     #     assert len(MAX_TRUNCATION) == 3
 807 | 
 808 |     #     split = os.environ["KITTI_EVAL_MIN_HEIGHT"].split(",")
 809 |     #     MIN_HEIGHT = [int(s) for s in split]
 810 |     #     assert len(MIN_HEIGHT) == 3
 811 | 
 812 |     #     split = os.environ["KITTI_EVAL_MAX_OCCLUSION"].split(",")
 813 |     #     MAX_OCCLUSION = [float(s) for s in split]
 814 |     #     assert len(MAX_OCCLUSION) == 3
 815 | 
 816 |     #     split = os.environ["KITTI_EVAL_MAX_DISTANCE"].split(",")
 817 |     #     MAX_DISTANCE = [int(s) for s in split]
 818 |     #     assert len(MAX_DISTANCE) == 3
 819 | 
 820 |     #     if CLASS_NAMES[current_class] == "cyclist" and os.environ["KITTI_EVAL_CYC_MAX_OCCLUSION"] != "": 
 821 |     #         split = os.environ["KITTI_EVAL_CYC_MAX_OCCLUSION"].split(",") #? Separate for cyclists
 822 |     #         MAX_OCCLUSION = [float(s) for s in split]
 823 |     #         assert len(MAX_OCCLUSION) == 3
 824 | 
 825 |     # else:
 826 |     #     raise Exception("Unsupported kitti eval changes")
 827 | 
 828 | 
 829 |     dc_bboxes, ignored_gt, ignored_dt = [], [], []
 830 | 
 831 | 
 832 |     current_cls_name = CLASS_NAMES[current_class].lower()
 833 |     num_gt = len(gt_anno["name"])
 834 |     num_dt = len(dt_anno["name"]) #! number of boxes
 835 | 
 836 | 
 837 |     num_valid_gt = 0 #! Keeps the number of boxes that perfecty match the current class and fit the current difficulty
 838 | 
 839 |     for i in range(num_gt):
 840 |         bbox = gt_anno["bbox"][i]
 841 |         gt_name = gt_anno["name"][i].lower()
 842 |         height = bbox[3] - bbox[1]
 843 | 
 844 | 
 845 |         valid_class = -1
 846 |         if (gt_name == current_cls_name): #! This bbox corresponds with the class we're doing rn
 847 |             valid_class = 1
 848 |         elif (current_cls_name == "Pedestrian".lower()
 849 |               and "Person_sitting".lower() == gt_name):
 850 |             valid_class = 0
 851 |         elif (current_cls_name == "Car".lower() and "Van".lower() == gt_name): #
 852 |             valid_class = 0
 853 |         elif (current_cls_name == "Car".lower() and "Undefined".lower() == gt_name): #! don't treat undefined as fp for cars
 854 |             valid_class = 0
 855 |         # elif (current_cls_name == "Cyclist".lower() and "Motorcycle".lower() == gt_name):
 856 |         #     valid_class = 0
 857 |         else: #! no relationship with current class
 858 |             valid_class = -1
 859 | 
 860 | 
 861 |         ignore = False
 862 |         # if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty])
 863 |         #         or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty])
 864 |         #         or (height <= MIN_HEIGHT[difficulty])):
 865 |                 
 866 |         #     ignore = True
 867 |         
 868 |         if ((gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty])
 869 |             or (gt_extra_info_single["num_points"][i] < MIN_POINTS_THRESHOLD)):
 870 | 
 871 |             ignore = True
 872 |         
 873 |         # if (curr_metric == 0 or curr_metric == 1 or curr_metric == 2): #! 2d bbox or bev
 874 |         # # if (1 == 2):
 875 |         #     if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty])
 876 |         #         or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty])
 877 |         #         or (height <= MIN_HEIGHT[difficulty])
 878 |         #         or (gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty])):
 879 | 
 880 |         #         ignore = True
 881 | 
 882 |         # else: #! 3d
 883 |             # if ((gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty])
 884 |             #     or (gt_extra_info_single["num_points"][i] < MIN_POINTS_THRESHOLD)):
 885 |             #     # or gt_anno["occluded"][i] == 1): #? GET RID OF THIS TODO:
 886 | 
 887 |             #     ignore = True
 888 | 
 889 |         # if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty])
 890 |         #     or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty])
 891 |         #     or (height <= MIN_HEIGHT[difficulty])
 892 |         #     or (gt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty])
 893 |         #     or (curr_metric == 2 and gt_extra_info_single["num_points"] < MIN_POINTS_THRESHOLD)):
 894 | 
 895 |         #     ignore = True
 896 | 
 897 | 
 898 |         # if os.environ["KITTI_EVAL_CHANGES"] == "0" or os.environ["KITTI_EVAL_CHANGES"] == "1":
 899 |         #     # if gt_anno["occluded"][i] > 1.0:
 900 |         #     #     print(gt_anno["occluded"][i])
 901 |         #     if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty])
 902 |         #             or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty])
 903 |         #             or (height <= MIN_HEIGHT[difficulty])):
 904 |         #         # if gt_anno["difficulty"][i] > difficulty or gt_anno["difficulty"][i] == -1:
 905 |         #         ignore = True #! out of this difficult, ignore
 906 |         # #! Includes distance
 907 |         # elif os.environ["KITTI_EVAL_CHANGES"] == "2":
 908 |         #     if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty])
 909 |         #             or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty])
 910 |         #             or (height <= MIN_HEIGHT[difficulty])
 911 |         #             or (gt_anno["distance"][i] > MAX_DISTANCE[difficulty])):
 912 |         #         ignore = True
 913 |         # else:
 914 |         #     raise Exception("Unsupported kitti eval changes")
 915 | 
 916 |         
 917 | 
 918 |         #? Ignored_gt: 0 -> keep, don't ignore. 1 -> Ignore, but don't treat as FP. -1 -> Ignore, treat as FP.
 919 |         if valid_class == 1 and not ignore: #! all good to go, keep
 920 |             ignored_gt.append(0)
 921 |             num_valid_gt += 1
 922 |         #! Don't treat as false positive.
 923 |         #! Translation: If we have a detection that detections this, don't treat as part of the denominator for AP
 924 |         #! Two cases: If valid_class == 0, one of special FP classes
 925 |         #! ignore and valid_class == 1: If same class but harder. So if the model ends up predicting a harder box
 926 |         #! it's not penalized for it.
 927 |         elif (valid_class == 0 or (ignore and (valid_class == 1))):
 928 |             ignored_gt.append(1)
 929 |         #! Unrelated 
 930 |         else:
 931 |             ignored_gt.append(-1)
 932 | 
 933 | 
 934 |         #! store don't care boxes so we can ignore detections in this area
 935 |         if gt_anno["name"][i] == "DontCare":
 936 |             dc_bboxes.append(gt_anno["bbox"][i])
 937 | 
 938 | 
 939 |     for i in range(num_dt):
 940 |         #! Filter out irrelevant detection classes
 941 |         if (dt_anno["name"][i].lower() == current_cls_name):
 942 |             valid_class = 1
 943 |         else:
 944 |             valid_class = -1
 945 |         
 946 |         height = abs(dt_anno["bbox"][i, 3] - dt_anno["bbox"][i, 1])
 947 | 
 948 |         #! This is a detection that's smaller than min_height
 949 |         #! This is the "new" change. They say it's because:
 950 |         '''
 951 |         ! suppose we're doing evalulation for easy. Apparently, if we have a bbox of size 39 pixels, we don't want
 952 |         ! it to be a FP for the 40 pixel easy box.
 953 |         ? Frankly I have no clue why.
 954 |         ! Bottom line is all detection boxes smaller than the current GT difficulty height are cut out.
 955 |         ! Note that this does still include detections of other classes
 956 |         '''
 957 |         # or (curr_metric == 2 and dt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty])
 958 |         if height < MIN_HEIGHT[difficulty] or (curr_metric == 2 and dt_extra_info_single["distance"][i] > MAX_DISTANCE[difficulty]):
 959 |             ignored_dt.append(1)
 960 |         # if height < MIN_HEIGHT[difficulty]:
 961 |         #     ignored_dt.append(1)
 962 |         #! detection matches class, keep
 963 |         elif valid_class == 1:
 964 |             ignored_dt.append(0)
 965 |         #! mismatch, toss.
 966 |         else:
 967 |             ignored_dt.append(-1)
 968 | 
 969 | 
 970 |     '''
 971 |     ! num_valid_gt are the number of gt boxes in this image that: 1) are of the current class 2) fit the difficulty req.
 972 |     ! ignored_gt is a list of -1, 0, 1 length total # of GT boxes in this image.
 973 |     !   Ignored_gt: 0 -> keep, don't ignore. 1 -> Ignore, but don't treat as FP. -1 -> Ignore, treat as FP.
 974 |     ! ignored_dt is a list of -1, 0, 1 length total # of DT boxes in the image
 975 |     !   Ignored_dt: 0 -> keep (matches height and class), 1 -> Doesn't match height (too small), -1 -> class mismatch
 976 |     ! dc_bboxes: list of bounding boxes that are DontCare. These have value -1 in ignored_gt
 977 |     '''
 978 |     return num_valid_gt, ignored_gt, ignored_dt, dc_bboxes
 979 | 
 980 | 
 981 | @numba.jit(nopython=True)
 982 | #! boxes: gt. query_boxes: detections.
 983 | #! returns an N x K matrix of ious.
 984 | def image_box_overlap(boxes, query_boxes, criterion=-1):
 985 |     N = boxes.shape[0] #! total number of gt boxes
 986 |     K = query_boxes.shape[0] #! total number of detections
 987 |     overlaps = np.zeros((N, K), dtype=boxes.dtype) #! type np float
 988 |     for k in range(K):
 989 |         qbox_area = ((query_boxes[k, 2] - query_boxes[k, 0]) *
 990 |                      (query_boxes[k, 3] - query_boxes[k, 1])) #! area of the k-th dt box
 991 |         for n in range(N):
 992 |             iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(
 993 |                 boxes[n, 0], query_boxes[k, 0]))
 994 |             if iw > 0:
 995 |                 ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(
 996 |                     boxes[n, 1], query_boxes[k, 1]))
 997 |                 if ih > 0:
 998 |                     if criterion == -1:
 999 |                         ua = (
1000 |                             (boxes[n, 2] - boxes[n, 0]) *
1001 |                             (boxes[n, 3] - boxes[n, 1]) + qbox_area - iw * ih)
1002 |                     elif criterion == 0:
1003 |                         ua = ((boxes[n, 2] - boxes[n, 0]) *
1004 |                               (boxes[n, 3] - boxes[n, 1]))
1005 |                     elif criterion == 1:
1006 |                         ua = qbox_area
1007 |                     else:
1008 |                         ua = 1.0
1009 |                     overlaps[n, k] = iw * ih / ua #! yada yada i'm pretty sure this is just iou
1010 |                     #? Why does this calculate iou between boxes from different images too? 
1011 |     return overlaps
1012 | 
1013 | 
1014 | def bev_box_overlap(boxes, qboxes, criterion=-1):
1015 |     riou = rotate_iou_gpu_eval(boxes, qboxes, criterion)
1016 |     return riou
1017 | 
1018 | 
1019 | # @numba.jit(nopython=True, parallel=True)
1020 | @numba.jit(nopython=True, parallel=False)
1021 | def d3_box_overlap_kernel(boxes,
1022 |                           qboxes,
1023 |                           rinc,
1024 |                           criterion=-1,
1025 |                           z_axis=1,
1026 |                           z_center=1.0):
1027 |     """
1028 |         z_axis: the z (height) axis.
1029 |         z_center: unified z (height) center of box.
1030 |     """
1031 |     N, K = boxes.shape[0], qboxes.shape[0]
1032 |     for i in range(N):
1033 |         for j in range(K):
1034 |             if rinc[i, j] > 0:
1035 |                 min_z = min(
1036 |                     boxes[i, z_axis] + boxes[i, z_axis + 3] * (1 - z_center),
1037 |                     qboxes[j, z_axis] + qboxes[j, z_axis + 3] * (1 - z_center))
1038 |                 max_z = max(
1039 |                     boxes[i, z_axis] - boxes[i, z_axis + 3] * z_center,
1040 |                     qboxes[j, z_axis] - qboxes[j, z_axis + 3] * z_center)
1041 |                 iw = min_z - max_z
1042 |                 if iw > 0:
1043 |                     area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5]
1044 |                     area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5]
1045 |                     inc = iw * rinc[i, j]
1046 |                     if criterion == -1:
1047 |                         ua = (area1 + area2 - inc)
1048 |                     elif criterion == 0:
1049 |                         ua = area1
1050 |                     elif criterion == 1:
1051 |                         ua = area2
1052 |                     else:
1053 |                         ua = 1.0
1054 |                     rinc[i, j] = inc / ua
1055 |                 else:
1056 |                     rinc[i, j] = 0.0
1057 | 
1058 | 
1059 | def d3_box_overlap(boxes, qboxes, criterion=-1, z_axis=1, z_center=1.0):
1060 |     """kitti camera format z_axis=1.
1061 |     """
1062 |     bev_axes = list(range(7))
1063 |     bev_axes.pop(z_axis + 3)
1064 |     bev_axes.pop(z_axis)
1065 |     rinc = rotate_iou_gpu_eval(boxes[:, bev_axes], qboxes[:, bev_axes], 2)
1066 |     d3_box_overlap_kernel(boxes, qboxes, rinc, criterion, z_axis, z_center)
1067 |     return rinc
1068 | 
1069 | 
1070 | #? It appears that if we're computing recall thresholds, we set compute_fp to be False.
1071 | @numba.jit(nopython=True)
1072 | def compute_statistics_jit(overlaps,
1073 |                            gt_datas,
1074 |                            dt_datas,
1075 |                            ignored_gt,
1076 |                            ignored_det,
1077 |                            dc_bboxes,
1078 |                            metric,
1079 |                            min_overlap,
1080 |                            thresh=0,
1081 |                            compute_fp=False,
1082 |                            compute_aos=False):
1083 | 
1084 |     det_size = dt_datas.shape[0]
1085 |     gt_size = gt_datas.shape[0]
1086 |     dt_scores = dt_datas[:, -1]
1087 |     dt_alphas = dt_datas[:, 4]
1088 |     gt_alphas = gt_datas[:, 4]
1089 |     dt_bboxes = dt_datas[:, :4]
1090 |     # gt_bboxes = gt_datas[:, :4]
1091 | 
1092 |     assigned_detection = [False] * det_size #! probably storing whether each detection was assigned to a gt.
1093 |     ignored_threshold = [False] * det_size #! array storing if detection score was below thresh
1094 |     if compute_fp:
1095 |         for i in range(det_size):
1096 |             if (dt_scores[i] < thresh):
1097 |                 ignored_threshold[i] = True
1098 | 
1099 | 
1100 |     NO_DETECTION = -10000000
1101 |     tp, fp, fn, similarity = 0, 0, 0, 0
1102 |     # thresholds = [0.0]
1103 |     # delta = [0.0]
1104 |     thresholds = np.zeros((gt_size, ))
1105 |     thresh_idx = 0 #! Likely used for computing thresholds?
1106 |     delta = np.zeros((gt_size, ))
1107 |     delta_idx = 0
1108 | 
1109 | 
1110 |     #! My own code ----
1111 |     #! For each gt box, store whether it was -1 (ignored), 0 (false negative (unmatched)), 1 (true positive (matched))
1112 |     gt_box_type = np.full((gt_size, ), -1)
1113 |     #! For each dt box, store whether it was -1 (irrelevant), 0 (false positive (unmatched)), 1 (true positive (matched))
1114 |     #! Note that -1 could mean it was in don't care territory, was of a different class, etc
1115 |     dt_box_type = np.full((det_size, ), -1)
1116 |     #! Stores idx of matched object
1117 |     gt_box_matched_idx = np.full((gt_size, ), -1)
1118 |     dt_box_matched_idx = np.full((det_size, ), -1)
1119 | 
1120 |     #! loop over gt boxes
1121 |     for i in range(gt_size):
1122 |         if ignored_gt[i] == -1: #! Don't match completely irrelevant gt boxes
1123 |             continue
1124 |         
1125 |         det_idx = -1 #! the best detection for this gt stored
1126 |         valid_detection = NO_DETECTION #! Stores the max score so far of the detection.
1127 |         max_overlap = 0 #! The overlap for the best detection. "best" is highest overlap
1128 |         assigned_ignored_det = False
1129 | 
1130 |         for j in range(det_size):
1131 |             if (ignored_det[j] == -1): #! Don't match with completely irrelevant dt boxes
1132 |                 continue
1133 |             if (assigned_detection[j]): #! if dt was already assigned, skip (assigned to a better gt)
1134 |                 continue
1135 |             if (ignored_threshold[j]): #! if dt score is below threhsold, skip
1136 |                 continue
1137 |             
1138 |             
1139 |             overlap = overlaps[j, i] #! Current overlap between this dt and this gt.
1140 |             dt_score = dt_scores[j] #! score of current dt
1141 | 
1142 |             #! If compute_fp is false, this is the only part that matters.
1143 |             #! Just finds the detection with sufficient overlap and highest score.
1144 |             if (not compute_fp and (overlap > min_overlap)
1145 |                     and dt_score > valid_detection):
1146 |                 det_idx = j
1147 |                 valid_detection = dt_score
1148 | 
1149 |             #! compute_fp is true. This means we're acutally doing the metric and not making thresholds.
1150 |             #! If overlap is sufficient, (better than previous overlap or previous was a det we don't care about)
1151 |             #!  and the current det is something we care about,
1152 |             #! Assign. Update overlap, det_idx. Note that we 1-out valid-detection since we dont rank by score.
1153 |             #! we 1-out it to show that we have assigned a det we care about.
1154 |             #! When compute_fp is true, we choose based on overlap
1155 |             elif (compute_fp and (overlap > min_overlap)
1156 |                   and (overlap > max_overlap or assigned_ignored_det)
1157 |                   and ignored_det[j] == 0):
1158 |                 max_overlap = overlap
1159 |                 det_idx = j
1160 |                 valid_detection = 1
1161 |                 assigned_ignored_det = False
1162 | 
1163 |             #! compute_fp is true.
1164 |             #! if overlap is sufficient, nothing was assigned yet, and it's a detection we don't care about
1165 |             #! We assign it. Note that we leave max_overlap as default so anything can overwrite this.
1166 |             #? One curious thing is that of the dets we don't care about, if we assign the first one, the enxt one
1167 |             #?  can't overwrite it because valid_detection != NO_DETECTION
1168 |             elif (compute_fp and (overlap > min_overlap)
1169 |                   and (valid_detection == NO_DETECTION)
1170 |                   and ignored_det[j] == 1):
1171 |                 det_idx = j
1172 |                 valid_detection = 1
1173 |                 assigned_ignored_det = True
1174 | 
1175 |         #! If we couldn't match this gt to anything and it's something we care about, it's a false negative.
1176 |         if (valid_detection == NO_DETECTION) and ignored_gt[i] == 0:
1177 |             fn += 1
1178 |             gt_box_type[i] = 0
1179 | 
1180 |         #! If we did match this gt to something and
1181 |         #!  (gt is something we don't care about or det is something we don't care about)
1182 |         #! We assign it. Why? probably because if we don't assign it, it'll be a false positive later (unassigned det)
1183 |         elif ((valid_detection != NO_DETECTION)
1184 |               and (ignored_gt[i] == 1 or ignored_det[det_idx] == 1 )):
1185 |             assigned_detection[det_idx] = True
1186 |             gt_box_type[i] = -1
1187 |             dt_box_type[det_idx] = -1
1188 | 
1189 |         #! If we did match this gt to something and
1190 |         #!  the remaining condition is: (gt is something we care about and det is something we care about)
1191 |         #! It's a good match! true positive. 
1192 |         #! Here, we also (basically) append the det score to the end of thersholds
1193 |         #! Then, assign detection True
1194 |         elif valid_detection != NO_DETECTION:
1195 |             # only a tp add a threshold.
1196 |             gt_box_type[i] = 1
1197 |             dt_box_type[det_idx] = 1
1198 |             gt_box_matched_idx[i] = det_idx
1199 |             dt_box_matched_idx[det_idx] = i
1200 |             tp += 1
1201 |             # thresholds.append(dt_scores[det_idx])
1202 |             thresholds[thresh_idx] = dt_scores[det_idx]
1203 |             thresh_idx += 1
1204 |             if compute_aos:
1205 |                 # delta.append(gt_alphas[i] - dt_alphas[det_idx])
1206 |                 delta[delta_idx] = gt_alphas[i] - dt_alphas[det_idx]
1207 |                 delta_idx += 1
1208 | 
1209 |             assigned_detection[det_idx] = True
1210 |         #! This should be when there is no detection and  gt is something we don't care about
1211 |         else:
1212 |             gt_box_type[i] = -1
1213 | 
1214 | 
1215 |     
1216 |     #? Note that so far, we have not used dc boxes. This is because they are only used for false positive calculation
1217 |     #?  as we haven't looked at unmatched detections yet. If an unmatched is inside don't care, we dont' count it as FP
1218 |     if compute_fp:
1219 |         #! loop through detections
1220 |         for i in range(det_size):
1221 |             #! When is a detection a false positive? Well, it is a false positive if it is:
1222 |             #!  NOT assigned to a gt, and
1223 |             #!  NOT of a different class, and
1224 |             #!  NOT of the same class but of a different size, and
1225 |             #!  NOT below the score threshold.
1226 |             if (not (assigned_detection[i] or ignored_det[i] == -1
1227 |                      or ignored_det[i] == 1 or ignored_threshold[i])):
1228 |                 fp += 1
1229 |                 dt_box_type[i] = 0 #! false positive!
1230 | 
1231 |         #! I believe this is the number of detections we harvest from don't care regions. We'll subtract it from fp.
1232 |         nstuff = 0
1233 |         #! Metric == 0 is 2d bbox
1234 |         if metric == 0:
1235 |             #! ious between dt boxes and dc boxes.
1236 |             overlaps_dt_dc = image_box_overlap(dt_bboxes, dc_bboxes, 0)
1237 |             for i in range(dc_bboxes.shape[0]):
1238 |                 for j in range(det_size):
1239 |                     #! skip stuff that doesn't add to fp right above
1240 |                     if (assigned_detection[j]):
1241 |                         continue
1242 |                     if (ignored_det[j] == -1 or ignored_det[j] == 1):
1243 |                         continue
1244 |                     if (ignored_threshold[j]):
1245 |                         continue
1246 |                     #! if the overlap between the two is bigger than min_overlap
1247 |                     #! assign the detection to dc and add it to somethign we take away from fp.
1248 |                     if overlaps_dt_dc[j, i] > min_overlap:
1249 |                         assigned_detection[j] = True
1250 |                         nstuff += 1
1251 |                         dt_box_type[j] = -1 #! nvm, don't care about this one
1252 |         #! take nstuff away from fp.
1253 |         fp -= nstuff
1254 | 
1255 |         #TODO: annotate this
1256 |         if compute_aos:
1257 |             tmp = np.zeros((fp + delta_idx, ))
1258 |             # tmp = [0] * fp
1259 |             for i in range(delta_idx):
1260 |                 tmp[i + fp] = (1.0 + np.cos(delta[i])) / 2.0
1261 |                 # tmp.append((1.0 + np.cos(delta[i])) / 2.0)
1262 |             # assert len(tmp) == fp + tp
1263 |             # assert len(delta) == tp
1264 |             if tp > 0 or fp > 0:
1265 |                 similarity = np.sum(tmp)
1266 |             else:
1267 |                 similarity = -1
1268 | 
1269 |     '''
1270 |     ! Let's have a conditionsl here. 
1271 |     ! if compute_fp:
1272 |     !   tp and fn are here. fp and similarity is nonsense.
1273 |     !   thresholds[:thresh_idx] == thresholds
1274 |     !   So basically, we have to tools to calculate recall and the scores of the matched dts.
1275 |     '''
1276 |     return tp, fp, fn, similarity, thresholds[:thresh_idx], \
1277 |         (gt_box_type, dt_box_type, gt_box_matched_idx, dt_box_matched_idx)
1278 | 
1279 | 
1280 | def get_split_parts(num, num_part):
1281 |     same_part = num // num_part
1282 |     remain_num = num % num_part
1283 |     if remain_num == 0:
1284 |         return [same_part] * num_part
1285 |     else:
1286 |         return [same_part] * num_part + [remain_num]
1287 | 
1288 | 
1289 | # @numba.jit(nopython=True)
1290 | def fused_compute_statistics(overlaps,
1291 |                              pr,
1292 |                              gt_nums,
1293 |                              dt_nums,
1294 |                              dc_nums,
1295 |                              gt_datas,
1296 |                              dt_datas,
1297 |                              dontcares,
1298 |                              ignored_gts,
1299 |                              ignored_dets,
1300 |                              metric,
1301 |                              min_overlap,
1302 |                              thresholds,
1303 |                              extras,
1304 |                              compute_aos=False):
1305 |     gt_num = 0
1306 |     dt_num = 0
1307 |     dc_num = 0
1308 | 
1309 |     for i in range(gt_nums.shape[0]):
1310 |         
1311 |         for t, thresh in enumerate(thresholds):
1312 |             overlap = overlaps[dt_num:dt_num + dt_nums[i], gt_num:gt_num +
1313 |                                gt_nums[i]]
1314 | 
1315 |             gt_data = gt_datas[gt_num:gt_num + gt_nums[i]]
1316 |             dt_data = dt_datas[dt_num:dt_num + dt_nums[i]]
1317 |             ignored_gt = ignored_gts[gt_num:gt_num + gt_nums[i]]
1318 |             ignored_det = ignored_dets[dt_num:dt_num + dt_nums[i]]
1319 |             dontcare = dontcares[dc_num:dc_num + dc_nums[i]]
1320 |             tp, fp, fn, similarity, _, extra = compute_statistics_jit(
1321 |                 overlap,
1322 |                 gt_data,
1323 |                 dt_data,
1324 |                 ignored_gt,
1325 |                 ignored_det,
1326 |                 dontcare,
1327 |                 metric,
1328 |                 min_overlap=min_overlap,
1329 |                 thresh=thresh, #! note that we pass in threshold we generated
1330 |                 compute_fp=True,
1331 |                 compute_aos=compute_aos)
1332 |             pr[t, 0] += tp
1333 |             pr[t, 1] += fp
1334 |             pr[t, 2] += fn
1335 |             if similarity != -1:
1336 |                 pr[t, 3] += similarity
1337 | 
1338 |             gt_box_type, dt_box_type, gt_box_matched_idx, dt_box_matched_idx = extra
1339 | 
1340 |             extras['gt_box_types'][t].append(gt_box_type)
1341 |             extras['dt_box_types'][t].append(dt_box_type)
1342 |             extras['gt_box_matched_idxs'][t].append(gt_box_matched_idx)
1343 |             extras['dt_box_matched_idxs'][t].append(dt_box_matched_idx)
1344 | 
1345 |             # if t == len(thresholds) - 1: #! just do for last threshold, since last one is smallest
1346 |             #     gt_box_types.append(gt_box_type)
1347 |             #     dt_box_types.append(dt_box_type)
1348 |             #     # print(thresh)
1349 |             #     # print(gt_box_type)
1350 |             #     # print(dt_box_type)
1351 |             #     # print(tp)
1352 |             #     # print(fp)
1353 |             #     # print(fn)
1354 |             #     # print(thresh == 0.05203958600759506)
1355 |             #     # assert 1 == 2
1356 |             #     # print(thresh)
1357 | 
1358 |         gt_num += gt_nums[i]
1359 |         dt_num += dt_nums[i]
1360 |         dc_num += dc_nums[i]
1361 | 
1362 | 
1363 | def calculate_iou_partly(gt_annos,
1364 |                          dt_annos,
1365 |                          metric,
1366 |                          num_parts=50,
1367 |                          z_axis=1,
1368 |                          z_center=1.0):
1369 |     """fast iou algorithm. this function can be used independently to
1370 |     do result analysis. 
1371 |     Args:
1372 |         gt_annos: dict, must from get_label_annos() in kitti_common.py #! Actually a list of dicts
1373 |         dt_annos: dict, must from get_label_annos() in kitti_common.py
1374 |         metric: eval type. 0: bbox, 1: bev, 2: 3d
1375 |         num_parts: int. a parameter for fast calculate algorithm
1376 |         z_axis: height axis. kitti camera use 1, lidar use 2.
1377 | 
1378 |     annos = [
1379 |         {
1380 |             'name': np.array(["Car", "Pedestrian", "Car", ...]),
1381 |             'truncated': np.array([0.1, 0.5, 1.0, ...]),
1382 |             'occluded': np.array([0, 1, 2, 3, 0, ...]),
1383 |             'alpha': np.array([-3.14, 3.14, 0.0, ...]),
1384 |             'bbox': np.array([
1385 |                 [x1, y1, x2, y2],
1386 |                 [left, top, right, bot],
1387 |                 [0.0, 0.0, 385.0, 1280.0],
1388 |                 ...
1389 |             ]), #! N x 4
1390 |             'dimensions': Don't care for now
1391 |             'location': Don't care for now
1392 |             'rotation_y': Don't care for now
1393 |             'score': np.array([
1394 |                 0.1,
1395 |                 0.3,
1396 |                 ...
1397 |             ]) #! or all 0s for gt
1398 |         }
1399 |     ]
1400 |     """
1401 |     assert len(gt_annos) == len(dt_annos)
1402 |     total_dt_num = np.stack([len(a["name"]) for a in dt_annos], 0) #! a list of number of annotations in each file
1403 |     total_gt_num = np.stack([len(a["name"]) for a in gt_annos], 0)
1404 | 
1405 |     num_examples = len(gt_annos)
1406 |     #! returns a list of numbers, which is num_examples split up into num_parts, with a remainder at the end.
1407 |     #! So (13, 2) would return [6, 6, 1] or something
1408 |     split_parts = get_split_parts(num_examples, num_parts) 
1409 | 
1410 |     parted_overlaps = []
1411 |     example_idx = 0
1412 |     bev_axes = list(range(3))
1413 |     bev_axes.pop(z_axis)
1414 |     for num_part in split_parts:
1415 |         gt_annos_part = gt_annos[example_idx:example_idx + num_part] #! basically chop up dataset into parts and iterate
1416 |         dt_annos_part = dt_annos[example_idx:example_idx + num_part]
1417 |         if metric == 0: #! This is the 2D bbox part
1418 |             #! appears like it concats ALL the bounding boxes in the entire dataset into a super tall array
1419 |             #? Correction: PART of the dataset gt_annos_part and dt_annos_part
1420 |             #! shape (total number of bboxes, 4)
1421 |             gt_boxes = np.concatenate([a["bbox"] for a in gt_annos_part], 0)
1422 |             dt_boxes = np.concatenate([a["bbox"] for a in dt_annos_part], 0)
1423 |             #! returns np array of shape (total # of gt boxes, total # of dt boxes)
1424 |             overlap_part = image_box_overlap(gt_boxes, dt_boxes) 
1425 |         elif metric == 1:
1426 |             loc = np.concatenate(
1427 |                 [a["location"][:, bev_axes] for a in gt_annos_part], 0)
1428 |             dims = np.concatenate(
1429 |                 [a["dimensions"][:, bev_axes] for a in gt_annos_part], 0)
1430 |             rots = np.concatenate([a["rotation_y"] for a in gt_annos_part], 0)
1431 |             gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],
1432 |                                       axis=1)
1433 |             loc = np.concatenate(
1434 |                 [a["location"][:, bev_axes] for a in dt_annos_part], 0)
1435 |             dims = np.concatenate(
1436 |                 [a["dimensions"][:, bev_axes] for a in dt_annos_part], 0)
1437 |             rots = np.concatenate([a["rotation_y"] for a in dt_annos_part], 0)
1438 |             dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],
1439 |                                       axis=1)
1440 |             overlap_part = bev_box_overlap(gt_boxes,
1441 |                                            dt_boxes).astype(np.float64)
1442 |         elif metric == 2:
1443 |             loc = np.concatenate([a["location"] for a in gt_annos_part], 0)
1444 |             dims = np.concatenate([a["dimensions"] for a in gt_annos_part], 0)
1445 |             rots = np.concatenate([a["rotation_y"] for a in gt_annos_part], 0)
1446 |             gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],
1447 |                                       axis=1)
1448 |             loc = np.concatenate([a["location"] for a in dt_annos_part], 0)
1449 |             dims = np.concatenate([a["dimensions"] for a in dt_annos_part], 0)
1450 |             rots = np.concatenate([a["rotation_y"] for a in dt_annos_part], 0)
1451 |             dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],
1452 |                                       axis=1)
1453 |             overlap_part = d3_box_overlap(
1454 |                 gt_boxes, dt_boxes, z_axis=z_axis,
1455 |                 z_center=z_center).astype(np.float64)
1456 |         else:
1457 |             raise ValueError("unknown metric")
1458 |         
1459 |         parted_overlaps.append(overlap_part) #! ends up being a list of iou matrices b/n parts of the dataset
1460 |         example_idx += num_part
1461 |     overlaps = []
1462 |     example_idx = 0
1463 |     for j, num_part in enumerate(split_parts):
1464 |         gt_annos_part = gt_annos[example_idx:example_idx + num_part]
1465 |         dt_annos_part = dt_annos[example_idx:example_idx + num_part] #! these two aren't used...
1466 |         gt_num_idx, dt_num_idx = 0, 0
1467 |         for i in range(num_part):
1468 |             gt_box_num = total_gt_num[example_idx + i]
1469 |             dt_box_num = total_dt_num[example_idx + i]
1470 |             overlaps.append(
1471 |                 parted_overlaps[j][gt_num_idx:gt_num_idx +
1472 |                                    gt_box_num, dt_num_idx:dt_num_idx +
1473 |                                    dt_box_num]) #! slice out the part that corresponds to a single image
1474 |             gt_num_idx += gt_box_num
1475 |             dt_num_idx += dt_box_num
1476 |         example_idx += num_part
1477 | 
1478 |     #! In the end, overlaps becomes a list of matrices. The list is length len(dt_annos) == len(gt_annos) (number of images)
1479 |     #! In each index is a iou matrix shape (number of gt boxes in that image, number of dt boxes in that image)
1480 |     #! parted_overlaps is overlap matrices over parts of dataset
1481 |     #! total_gt_num is list of number of boxes in each image.
1482 | 
1483 |     return overlaps, parted_overlaps, total_gt_num, total_dt_num
1484 | 
1485 | 
1486 | def _prepare_data(gt_annos, dt_annos, current_class, difficulty, extra_info=None):
1487 |     gt_datas_list = []
1488 |     dt_datas_list = []
1489 |     total_dc_num = []
1490 |     ignored_gts, ignored_dets, dontcares = [], [], []
1491 |     total_num_valid_gt = 0
1492 | 
1493 |     gt_extra_info, dt_extra_info, general_extra_info = extra_info
1494 | 
1495 |     #! Loop through each image
1496 |     for i in range(len(gt_annos)):
1497 |         if 'clean_data_function' not in general_extra_info.keys() or general_extra_info['clean_data_function'] == None:
1498 |             rets = clean_data(gt_annos[i], dt_annos[i], current_class, difficulty, \
1499 |                 extra_info_single=(gt_extra_info[i], dt_extra_info[i], general_extra_info))
1500 |         else:
1501 |             rets = general_extra_info['clean_data_function'](
1502 |                 gt_annos[i], dt_annos[i], current_class, difficulty, \
1503 |                 extra_info_single=(gt_extra_info[i], dt_extra_info[i], general_extra_info)
1504 |             )
1505 | 
1506 |         num_valid_gt, ignored_gt, ignored_det, dc_bboxes = rets
1507 |         ignored_gts.append(np.array(ignored_gt, dtype=np.int64))
1508 |         ignored_dets.append(np.array(ignored_det, dtype=np.int64))
1509 |         #! Ends up being a list of ignored_gts. etc...
1510 | 
1511 |         if len(dc_bboxes) == 0:
1512 |             dc_bboxes = np.zeros((0, 4)).astype(np.float64)
1513 |         else:
1514 |             dc_bboxes = np.stack(dc_bboxes, 0).astype(np.float64)
1515 |         #! dc_boxes is a np array shape (# of don't care boxes IN THIS IMAGE, 4)
1516 |         #! Each row is a Don't Care bbox
1517 |         total_dc_num.append(dc_bboxes.shape[0])
1518 |         #! Number of don't care boxes. total_dc_num is a list of # of dc_boxes for each iamge
1519 |         dontcares.append(dc_bboxes)
1520 |         #! list of list of dc_boxes for each image
1521 | 
1522 |         total_num_valid_gt += num_valid_gt
1523 |         #! counter of total number of valid gt boxes
1524 | 
1525 |         #! bbox index is N x 4
1526 |         #! alpha index is N -> with the np.newaxis, it's N x 1
1527 |         #! So concat makes it an N x 5 with the "5" dimension being [x1, y1, x2, y2, alpha]
1528 |         gt_datas = np.concatenate(
1529 |             [gt_annos[i]["bbox"], gt_annos[i]["alpha"][..., np.newaxis]], 1)
1530 | 
1531 |         #! Similarly, N x 6. "6" dimension is [x1, y1, x2, y2, alpha, score]
1532 |         dt_datas = np.concatenate([
1533 |             dt_annos[i]["bbox"], dt_annos[i]["alpha"][..., np.newaxis],
1534 |             dt_annos[i]["score"][..., np.newaxis]
1535 |         ], 1)
1536 | 
1537 |         gt_datas_list.append(gt_datas)
1538 |         dt_datas_list.append(dt_datas)
1539 |         #! list lists of boxes
1540 |     #! I don't know why they do this instead of np.array. This just makes a length # of images array of 
1541 |     #! number of dontcare boxes in each image.
1542 |     total_dc_num = np.stack(total_dc_num, axis=0)
1543 |     '''
1544 |     ? All the arrays here have length = # of images in dataset
1545 |     ! gt_datas_list: list of (N x 5 arrays)
1546 |     ! dt_datas_list: list of (N x 6 arrays)
1547 |     ! ignored_gts: list of (length N array (vals -1, 0, or 1))
1548 |     ! ignored_dets: list of (length N array (vals -1, 0, or 1))
1549 |     ! dontcares: list of (# of don't care boxes in image x 4 arrays)
1550 |     ! total_dc_num: list of (# of don'tcare boxes in image value)
1551 |     ! total_num_valid_gt: total number of valid gts (int)
1552 |     '''
1553 |     return (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets, dontcares,
1554 |             total_dc_num, total_num_valid_gt)
1555 | 
1556 | 
1557 | def eval_class(gt_annos,
1558 |                   dt_annos,
1559 |                   current_classes,
1560 |                   difficultys,  # ! Is a tuple (0, 1, 2)
1561 |                   metric,       #! is 0 (bbox), 1 (bev), or 2 (3d)
1562 |                   min_overlaps, #! I believe this is shape (2, 3, num_classes) where:
1563 |                                 #! 2 is just moderate thresholds, easy thresholds. DIFFERENT FROM BBOX DIFFICULTY
1564 |                                 #! 3 is the different metrics (bbox, bev, 3d), 
1565 |                                 #! num_classes is  for threshold for each class
1566 |                   compute_aos=False,
1567 |                   z_axis=1,
1568 |                   z_center=1.0,
1569 |                   num_parts=50,
1570 |                   extra_info=None):
1571 |     """Kitti eval. support 2d/bev/3d/aos eval. support 0.5:0.05:0.95 coco AP.
1572 |     Args:
1573 |         gt_annos: dict, must from get_label_annos() in kitti_common.py
1574 |         dt_annos: dict, must from get_label_annos() in kitti_common.py
1575 |         current_class: int, 0: car, 1: pedestrian, 2: cyclist
1576 |         difficulty: int. eval difficulty, 0: easy, 1: normal, 2: hard # ! No, actually a tuple (0, 1, 2)
1577 |         metric: eval type. 0: bbox, 1: bev, 2: 3d
1578 |         min_overlap: float, min overlap. official: 
1579 |             [[0.7, 0.5, 0.5], [0.7, 0.5, 0.5], [0.7, 0.5, 0.5]] 
1580 |             format: [metric, class]. choose one from matrix above.
1581 |         num_parts: int. a parameter for fast calculate algorithm
1582 |         extra_info: a tuple (gt_extra_info, dt_extra_info, general_extra_info). Check get_kitti_eval for more details
1583 | 
1584 |     Returns:
1585 |         dict of recall, precision and aos
1586 |     """
1587 |     # print(len(gt_annos))
1588 |     # print(len(dt_annos))
1589 |     assert len(gt_annos) == len(dt_annos)
1590 |     num_examples = len(gt_annos)
1591 |     split_parts = get_split_parts(num_examples, num_parts)
1592 | 
1593 |     rets = calculate_iou_partly(
1594 |         dt_annos,
1595 |         gt_annos,
1596 |         metric,
1597 |         num_parts,
1598 |         z_axis=z_axis,
1599 |         z_center=z_center)
1600 |     overlaps, parted_overlaps, total_dt_num, total_gt_num = rets
1601 |     #! In the end, overlaps becomes a list of matrices. The list is length len(dt_annos) == len(gt_annos) (number of images)
1602 |     #! In each index is a iou matrix shape (number of gt boxes in that image, number of dt boxes in that image)
1603 |     #! parted_overlaps is overlap matrices over parts of dataset
1604 |     #! total_gt_num is list of number of boxes in each image.
1605 | 
1606 |     N_SAMPLE_PTS = 41
1607 | 
1608 |     num_minoverlap = len(min_overlaps) #! moderate, or easy
1609 |     num_class = len(current_classes)
1610 |     num_difficulty = len(difficultys)
1611 | 
1612 |     #! A single point would be the precision for a class, a certain bbox difficulty, the type of threhsolds (mod or easy)
1613 |     precision = np.zeros(
1614 |         [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])
1615 |     recall = np.zeros(
1616 |         [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])
1617 | 
1618 |     #! To store gt_box_types, dt_box_types for each class, per difficulty, per num_minoverlap
1619 |     extrass = {
1620 |         "gt_box_typess": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object),
1621 |         "dt_box_typess": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object),
1622 |         "gt_box_matched_idxss": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object),
1623 |         "dt_box_matched_idxss": np.full((num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS), None, dtype=object)
1624 |     }
1625 |     
1626 | 
1627 |     aos = np.zeros([num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])
1628 |     all_thresholds = np.zeros([num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])
1629 | 
1630 | 
1631 |     #! Per class
1632 |     for m, current_class in enumerate(current_classes):
1633 |         #! Per difficulty
1634 |         for l, difficulty in enumerate(difficultys):
1635 |             gt_extra_info, dt_extra_info, general_extra_info = extra_info
1636 |             general_extra_info['curr_metric'] = metric #! pass on which metric we're doing
1637 |             extra_info = (gt_extra_info, dt_extra_info, general_extra_info)
1638 | 
1639 |             rets = _prepare_data(gt_annos, dt_annos, current_class, difficulty, extra_info=extra_info)
1640 |             (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets,
1641 |              dontcares, total_dc_num, total_num_valid_gt) = rets
1642 |             '''
1643 |             ? All the arrays here have length = # of images in dataset
1644 |             ! gt_datas_list: list of (N x 5 arrays)
1645 |             ! dt_datas_list: list of (N x 6 arrays)
1646 |             ! ignored_gts: list of (length N array (vals -1, 0, or 1))
1647 |             ! ignored_dets: list of (length N array (vals -1, 0, or 1))
1648 |             ! dontcares: list of (# of don't care boxes in image x 4 arrays)
1649 |             ! total_dc_num: list of (# of don'tcare boxes in image value)
1650 |             ! total_num_valid_gt: total number of valid gts (int)
1651 |             '''
1652 | 
1653 |             #! Runs twice, first for moderate overall difficulty setting, then easy.
1654 |             for k, min_overlap in enumerate(min_overlaps[:, metric, m]):
1655 |                 thresholdss = []
1656 | 
1657 |                 #! Loop over images in dataset. So single image at a time.
1658 |                 for i in range(len(gt_annos)):
1659 | 
1660 |                     rets = compute_statistics_jit(
1661 |                         overlaps[i], #! iou values b/n gt and dt for single image
1662 |                         gt_datas_list[i], #! N x 5 array
1663 |                         dt_datas_list[i], #! N x 6 array
1664 |                         ignored_gts[i], #! Length N array of -1, 0, 1
1665 |                         ignored_dets[i], #! Length N array of -1, 0, 1
1666 |                         dontcares[i], #! Length number of don't care boxes x 4
1667 |                         metric, #! 0, 1, or 2 (bbox, bev, 3d)
1668 |                         min_overlap=min_overlap, #! float minimum iou threshold for positive
1669 |                         thresh=0.0, #! ignore dt with scores below this.
1670 |                         compute_fp=False)
1671 |                     tp, fp, fn, similarity, thresholds, _ = rets #! Don't carea bout gt_box_type, dt_box_type here
1672 |                     thresholdss += thresholds.tolist()
1673 |                 #! A 1d array of scores of matched dts.
1674 |                 thresholdss = np.array(thresholdss)
1675 | 
1676 |                 thresholds = get_thresholds(thresholdss, total_num_valid_gt)
1677 |                 thresholds = np.array(thresholds)
1678 |                 #! N_SAMPLE_PTS length array of scores, decreasing. these are the thresholds
1679 | 
1680 |                 all_thresholds[m, l, k, :len(thresholds)] = thresholds
1681 |                 #! Threshold for each combo
1682 |                 #! storing 4 "things" for each threshold.
1683 |                 #? [tp, fp, fn, similarity]
1684 |                 pr = np.zeros([len(thresholds), 4])
1685 |                 
1686 |                 #! My addition - stores information about gt/dt boxes (whether ignored, fn, tn, fp)
1687 |                 #! ends up being a list of np arrays
1688 |                 #! CHANGED TO SAVE @ EVERY THRESHOLD. Now, should be a 
1689 |                 #!  Numpy Array (length N_SAMPLE_PTS), of list (length # of frames), of np arrays (# of objects in each frame)
1690 |                 extras = {
1691 |                     "gt_box_types": np.empty(N_SAMPLE_PTS, dtype=object),
1692 |                     "dt_box_types": np.empty(N_SAMPLE_PTS, dtype=object),
1693 |                     "gt_box_matched_idxs": np.empty(N_SAMPLE_PTS, dtype=object),
1694 |                     "dt_box_matched_idxs": np.empty(N_SAMPLE_PTS, dtype=object)
1695 |                 }
1696 |                 extras['gt_box_types'][...] = [[] for _ in range(N_SAMPLE_PTS)]
1697 |                 extras['dt_box_types'][...] = [[] for _ in range(N_SAMPLE_PTS)]
1698 |                 extras['gt_box_matched_idxs'][...] = [[] for _ in range(N_SAMPLE_PTS)]
1699 |                 extras['dt_box_matched_idxs'][...] = [[] for _ in range(N_SAMPLE_PTS)]
1700 | 
1701 |                 #! Again, we're splitting up the dataset into parts and running it in.
1702 |                 idx = 0
1703 |                 # start_time = time.time()
1704 |                 for j, num_part in enumerate(split_parts):
1705 |                     gt_datas_part = np.concatenate(
1706 |                         gt_datas_list[idx:idx + num_part], 0)
1707 |                     dt_datas_part = np.concatenate(
1708 |                         dt_datas_list[idx:idx + num_part], 0)
1709 |                     dc_datas_part = np.concatenate(
1710 |                         dontcares[idx:idx + num_part], 0)
1711 |                     ignored_dets_part = np.concatenate(
1712 |                         ignored_dets[idx:idx + num_part], 0)
1713 |                     ignored_gts_part = np.concatenate(
1714 |                         ignored_gts[idx:idx + num_part], 0)
1715 |                     fused_compute_statistics(
1716 |                         parted_overlaps[j],
1717 |                         pr,
1718 |                         total_gt_num[idx:idx + num_part],
1719 |                         total_dt_num[idx:idx + num_part],
1720 |                         total_dc_num[idx:idx + num_part],
1721 |                         gt_datas_part,
1722 |                         dt_datas_part,
1723 |                         dc_datas_part,
1724 |                         ignored_gts_part,
1725 |                         ignored_dets_part,
1726 |                         metric,
1727 |                         min_overlap=min_overlap,
1728 |                         thresholds=thresholds,
1729 |                         extras=extras,
1730 |                         compute_aos=compute_aos)
1731 |                     idx += num_part
1732 |                 # print(time.time() - start_time)
1733 | 
1734 |                 extrass['gt_box_typess'][m, l, k, :] = extras['gt_box_types']
1735 |                 extrass['dt_box_typess'][m, l, k, :] = extras['dt_box_types']
1736 |                 extrass['gt_box_matched_idxss'][m, l, k, :] = extras['gt_box_matched_idxs']
1737 |                 extrass['dt_box_matched_idxss'][m, l, k, :] = extras['dt_box_matched_idxs']
1738 |                 for i in range(len(thresholds)):
1739 |                     precision[m, l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 1]) #! true pos / (true pos + false pos)
1740 |                     recall[m, l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 2]) #! true pos / (true pos + false neg)
1741 |                     if compute_aos:
1742 |                         aos[m, l, k, i] = pr[i, 3] / (pr[i, 0] + pr[i, 1])
1743 |                 for i in range(len(thresholds)):
1744 |                     precision[m, l, k, i] = np.max(
1745 |                         precision[m, l, k, i:], axis=-1) #? INTERPOLATES AND FLIPS THE ORDER!!! 
1746 |                                                          #? NOW ITS IN ORDER OF INCREASING RECALL
1747 |                     if compute_aos:
1748 |                         aos[m, l, k, i] = np.max(aos[m, l, k, i:], axis=-1)
1749 |                 
1750 | 
1751 |     ret_dict = {
1752 |         "recall": recall, # [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]
1753 |         "precision": precision, #? Order of INCREASING RECALL, so precision DECREASES (as we would expect in a graph)
1754 |         "orientation": aos,
1755 |         "thresholds": all_thresholds,
1756 |         "min_overlaps": min_overlaps
1757 |     }
1758 |     ret_dict.update(extrass)
1759 |     return ret_dict
1760 | 
1761 | 
1762 | def get_mAP_R11(prec):
1763 |     sums = 0
1764 |     for i in range(0, prec.shape[-1], 4):
1765 |         sums = sums + prec[..., i]
1766 |     return sums / 11 * 100
1767 | 
1768 | def get_mAP_R40(prec):
1769 |     sums = 0
1770 |     for i in range(1, prec.shape[-1]):
1771 |         sums = sums + prec[..., i]
1772 |     return sums / 40 * 100
1773 | 
1774 | 
1775 | def do_eval_v2(gt_annos,
1776 |                dt_annos,
1777 |                current_classes,
1778 |                min_overlaps,
1779 |                compute_aos=False,
1780 |                difficultys=(0, 1, 2),
1781 |                z_axis=1,
1782 |                z_center=1.0):
1783 |     # min_overlaps: [num_minoverlap, metric, num_class]
1784 |     ret = eval_class(
1785 |         gt_annos,
1786 |         dt_annos,
1787 |         current_classes,
1788 |         difficultys,
1789 |         0,
1790 |         min_overlaps,
1791 |         compute_aos,
1792 |         z_axis=z_axis,
1793 |         z_center=z_center)
1794 |     # ret: [num_class, num_diff, num_minoverlap, num_sample_points]
1795 |     mAP_bbox = get_mAP_v2(ret["precision"])
1796 |     mAP_aos = None
1797 |     if compute_aos:
1798 |         mAP_aos = get_mAP_v2(ret["orientation"])
1799 |     ret = eval_class(
1800 |         gt_annos,
1801 |         dt_annos,
1802 |         current_classes,
1803 |         difficultys,
1804 |         1,
1805 |         min_overlaps,
1806 |         z_axis=z_axis,
1807 |         z_center=z_center)
1808 |     mAP_bev = get_mAP_v2(ret["precision"])
1809 |     ret = eval_class(
1810 |         gt_annos,
1811 |         dt_annos,
1812 |         current_classes,
1813 |         difficultys,
1814 |         2,
1815 |         min_overlaps,
1816 |         z_axis=z_axis,
1817 |         z_center=z_center)
1818 |     mAP_3d = get_mAP_v2(ret["precision"])
1819 |     return mAP_bbox, mAP_bev, mAP_3d, mAP_aos
1820 | 
1821 | def do_eval_v3(gt_annos,
1822 |                dt_annos,
1823 |                current_classes,
1824 |                min_overlaps,
1825 |                compute_aos=False,
1826 |                difficultys=(0, 1, 2),
1827 |                z_axis=1,
1828 |                z_center=1.0,
1829 |                extra_info=None,
1830 |                eval_modes=[0, 1, 2]): #! Represents idxes of types to evaluate (here, it's 0 for bbox, 1 for bev, 2 for 3d) so all 3
1831 |     # min_overlaps: [num_minoverlap, metric, num_class]
1832 |     types = ["bbox", "bev", "3d"]
1833 |     metrics = {}
1834 |     for i in eval_modes:
1835 |         dprint("Currently on {}".format(types[i]))
1836 |         ret = eval_class(
1837 |             gt_annos,
1838 |             dt_annos,
1839 |             current_classes,
1840 |             difficultys,
1841 |             metric=i,
1842 |             min_overlaps=min_overlaps,
1843 |             compute_aos=compute_aos,
1844 |             z_axis=z_axis,
1845 |             z_center=z_center,
1846 |             extra_info=extra_info)
1847 |         metrics[types[i]] = ret
1848 |     return metrics
1849 | 
1850 | 
1851 | def do_coco_style_eval(gt_annos,
1852 |                        dt_annos,
1853 |                        current_classes,
1854 |                        overlap_ranges,
1855 |                        compute_aos,
1856 |                        z_axis=1,
1857 |                        z_center=1.0):
1858 |     # overlap_ranges: [range, metric, num_class]
1859 |     min_overlaps = np.zeros([10, *overlap_ranges.shape[1:]])
1860 |     for i in range(overlap_ranges.shape[1]):
1861 |         for j in range(overlap_ranges.shape[2]):
1862 |             min_overlaps[:, i, j] = np.linspace(*overlap_ranges[:, i, j])
1863 |     mAP_bbox, mAP_bev, mAP_3d, mAP_aos = do_eval_v2(
1864 |         gt_annos,
1865 |         dt_annos,
1866 |         current_classes,
1867 |         min_overlaps,
1868 |         compute_aos,
1869 |         z_axis=z_axis,
1870 |         z_center=z_center)
1871 |     # ret: [num_class, num_diff, num_minoverlap]
1872 |     mAP_bbox = mAP_bbox.mean(-1)
1873 |     mAP_bev = mAP_bev.mean(-1)
1874 |     mAP_3d = mAP_3d.mean(-1)
1875 |     if mAP_aos is not None:
1876 |         mAP_aos = mAP_aos.mean(-1)
1877 |     return mAP_bbox, mAP_bev, mAP_3d, mAP_aos
1878 | 
1879 | 
1880 | def print_str(value, *arg, sstream=None):
1881 |     if sstream is None:
1882 |         sstream = sysio.StringIO()
1883 |     sstream.truncate(0)
1884 |     sstream.seek(0)
1885 |     print(value, *arg, file=sstream)
1886 |     return sstream.getvalue()
1887 | 
1888 | '''
1889 | Args:
1890 |     gt_annos: list of annotation dicts. Reference kitti_label.py for format
1891 |     dt_annos: list of annotation dicts. Reference kitti_label.py for format
1892 |     extra_info: tuple (gt_extra_info, dt_extra_info, general_extra_info). 
1893 |         gt_extra_info and dt_extra_info must be lists of dicts, either empty or of same length as gt_annos and dt_annos.
1894 |         general_extra_info is a dict w/ thresholds, current classes, etc
1895 |     current_classes: list of strings denoting classes we're evaluating
1896 |         ex: ["Car", "Pedestrian", "Cyclist"]
1897 |     IoUs: either
1898 |         (3, len(current_classes)) numpy array. IoUs[i, c] denotes the required overlap for a detection, for
1899 |             metric type i and class current_classes[c].
1900 |             metric type 0 -> bbox
1901 |                         1 -> bev
1902 |                         2 -> 3d
1903 |         or (# overall evaluation levels, 3, len(current_classes)). Same as above but the first dimension denotes the number
1904 |         of overall rounds of evaluation we do. For reference, the first case is puffed up to (1, 3, len(current_classes)).
1905 |     recall_positions_40: Whether to use 40 recall positions or 11. Default 40, because KITTI is 40
1906 |     eval_modes: List of modes to evaluate. 0 for bbox, 1 for bev, 2 for 3d
1907 | '''
1908 | def kitti_eval(
1909 |     gt_annos,
1910 |     dt_annos,
1911 |     extra_info,
1912 |     current_classes,
1913 |     IoUs,
1914 |     recall_positions_40=True,
1915 |     eval_modes=[0, 1, 2]
1916 | ):
1917 |     try:
1918 |         assert len(gt_annos) == len(dt_annos)
1919 |         assert len(extra_info[0]) == 0 or len(extra_info[0]) == len(gt_annos)
1920 |         assert len(extra_info[1]) == 0 or len(extra_info[1]) == len(dt_annos)
1921 |         # assert IoUs.shape == (3, len(current_classes))
1922 |     except Exception as e:
1923 |         print("gt_annos: {}, dt_annos: {}, gt_extra_info: {}, dt_extra_info: {}, current_classes: {}, IoUs: {}".format(
1924 |             len(gt_annos),
1925 |             len(dt_annos),
1926 |             len(extra_info[0]),
1927 |             len(extra_info[1]),
1928 |             len(current_classes),
1929 |             IoUs.shape
1930 |         ))
1931 |         raise e
1932 | 
1933 |     print("Doing evaluation over: \nclasses {}, \nIoUs {}".format(current_classes, IoUs))
1934 |     
1935 |     if IoUs.shape == 2:
1936 |         IoUs = np.expand_dims(IoUs, axis=0) #! Now (1, 3, len(current_classes)) to fit original format of min_overlaps
1937 |     class_to_name = {
1938 |         i: c for i, c in enumerate(current_classes)
1939 |     } #! int -> string
1940 |     current_classes = list(range(len(current_classes))) #! Change to numbers
1941 | 
1942 |     compute_aos = False
1943 |     if 0 in eval_modes: # only even consider computing aos if bbox is one of the things computed
1944 |         for anno in dt_annos:
1945 |             if anno['alpha'].shape[0] != 0:
1946 |                 if anno['alpha'][0] != -10:
1947 |                     compute_aos = True
1948 |                 break
1949 | 
1950 |     '''
1951 |     metrics: {
1952 |         'bbox': ...
1953 |         'bev': ...
1954 |         '3d': ...
1955 |     }
1956 | 
1957 |     ... = {
1958 |         "recall": recall, # [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]
1959 |         "precision": precision, #? Order of INCREASING RECALL, so precision DECREASES (as we would expect in a graph)
1960 |         "orientation": aos,
1961 |         "thresholds": all_thresholds,
1962 |         "min_overlaps": min_overlaps,
1963 |         "gt_box_typess": gt_box_typess, # np array shape [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS], each elem is list
1964 |         "dt_box_typess": dt_box_typess,
1965 |         "gt_box_matched_idx": gt_box_matched_idx, # same shape as above, holds either -1 or idx of matched dt box
1966 |         "dt_box_matched_idx": dt_box_matched_idx
1967 |     }
1968 |     '''
1969 |     eval_types = ["bbox", "bev", "3d"]
1970 |     metrics = do_eval_v3(
1971 |         gt_annos,
1972 |         dt_annos,
1973 |         current_classes,
1974 |         min_overlaps=IoUs,
1975 |         compute_aos=compute_aos,
1976 |         extra_info=extra_info,
1977 |         eval_modes=eval_modes
1978 |     )
1979 |     dprint("Done generating metrics.")
1980 | 
1981 |     if recall_positions_40:
1982 |         get_mAP = get_mAP_R40
1983 |     else:
1984 |         get_mAP = get_mAP_R11
1985 | 
1986 |     result = ''
1987 |     result += "Using mAP: {}\n".format(get_mAP.__name__)
1988 |     for j, curcls in enumerate(current_classes):
1989 |         # mAP threshold array: [num_minoverlap, metric, class]
1990 |         # mAP result: [num_class, num_diff, num_minoverlap]
1991 |         for i in range(IoUs.shape[0]):
1992 |             result += print_str(
1993 |                 (f"{class_to_name[curcls]} "
1994 |                  "AP(Average Precision)@{:.2f}, {:.2f}, {:.2f}:".format(*IoUs[i, :, j])))
1995 |             for mode in eval_modes:
1996 |                 eval_type = eval_types[mode]
1997 |                 result += print_str("{:4} AP: {}".format(
1998 |                     eval_type,
1999 |                     ", ".join(f"{v:.2f}" for v in get_mAP(metrics[eval_type]["precision"][j, :, i]))
2000 |                 ))
2001 | 
2002 |             if compute_aos:
2003 |                 mAPaos = get_mAP(metrics["bbox"]["orientation"][j, :, i])
2004 |                 mAPaos = ", ".join(f"{v:.2f}" for v in mAPaos)
2005 |                 result += print_str(f"aos  AP:{mAPaos}")
2006 | 
2007 |     return result, metrics
2008 | 
2009 | 
2010 | 
2011 | 
2012 | 
2013 | def get_official_eval_result(gt_annos,
2014 |                              dt_annos,
2015 |                              current_classes,
2016 |                              difficultys=[0, 1, 2],
2017 |                              z_axis=1,
2018 |                              z_center=1.0):
2019 |     """
2020 |         gt_annos and dt_annos must contains following keys:
2021 |         [bbox, location, dimensions, rotation_y, score]
2022 |     """
2023 |     if os.environ["KITTI_EVAL_CHANGES"] == "0":
2024 |         print("Using Kitti Eval {}".format(os.environ["KITTI_EVAL_CHANGES"]))
2025 |         overlap_mod = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
2026 |                                 [0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
2027 |                                 [0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7]])
2028 |     # ! All that matters here is that Car required over lap is 0.7, etc
2029 |     elif os.environ["KITTI_EVAL_CHANGES"] == "1" or os.environ["KITTI_EVAL_CHANGES"] == "2":
2030 |         print("Using Kitti Eval {}".format(os.environ["KITTI_EVAL_CHANGES"]))
2031 |         CAR_IOU = float(os.environ["KITTI_EVAL_CAR_IOU"])
2032 |         PED_IOU = float(os.environ["KITTI_EVAL_PED_IOU"])
2033 |         CYC_IOU = float(os.environ["KITTI_EVAL_CYC_IOU"])
2034 |         overlap_mod = np.array(
2035 |             [[CAR_IOU, PED_IOU, CYC_IOU, CAR_IOU, PED_IOU, CAR_IOU, CAR_IOU, CAR_IOU]] * 3
2036 |         )
2037 | 
2038 |     
2039 | 
2040 |     overlap_easy = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5],
2041 |                             [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5],
2042 |                             [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5]])
2043 |     min_overlaps = np.stack([overlap_mod, overlap_easy], axis=0)  # [2, 3, 5]
2044 |     class_to_name = {
2045 |         0: 'Car',
2046 |         1: 'Pedestrian',
2047 |         2: 'Cyclist',
2048 |         3: 'Van',
2049 |         4: 'Person_sitting',
2050 |         5: 'car',
2051 |         6: 'tractor',
2052 |         7: 'trailer',
2053 |     }
2054 |     name_to_class = {v: n for n, v in class_to_name.items()}
2055 |     if not isinstance(current_classes, (list, tuple)):
2056 |         current_classes = [current_classes]
2057 |     current_classes_int = []
2058 |     for curcls in current_classes:
2059 |         if isinstance(curcls, str):
2060 |             current_classes_int.append(name_to_class[curcls])
2061 |         else:
2062 |             current_classes_int.append(curcls)
2063 |     current_classes = current_classes_int
2064 |     min_overlaps = min_overlaps[:, :, current_classes]
2065 |     result = ''
2066 |     # check whether alpha is valid
2067 |     compute_aos = False
2068 |     for anno in dt_annos:
2069 |         if anno['alpha'].shape[0] != 0:
2070 |             if anno['alpha'][0] != -10:
2071 |                 compute_aos = True
2072 |             break
2073 |     
2074 |     metrics = do_eval_v3(  # ! Now go to here
2075 |         gt_annos,
2076 |         dt_annos,
2077 |         current_classes,
2078 |         min_overlaps,
2079 |         compute_aos,
2080 |         difficultys,
2081 |         z_axis=z_axis,
2082 |         z_center=z_center)
2083 |     mAPbbox_store = None
2084 |     res_precision = None
2085 |     res_recall = None
2086 |     gt_box_types = None
2087 |     dt_box_types = None #TODO: probably should add some stuff to make these work for 3d boxes in the future
2088 |     #TODO: Does not work for multiple classes, nor does precision or recall. 
2089 |     #? Note that the return format is different from precision or recall. for p/r, we don't index into difficulty
2090 |     for j, curcls in enumerate(current_classes):
2091 |         # mAP threshold array: [num_minoverlap, metric, class]
2092 |         # mAP result: [num_class, num_diff, num_minoverlap]
2093 |         # precision is shape [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]
2094 |         for i in range(min_overlaps.shape[0]):
2095 |             mAPbbox = get_mAP_v2(metrics["bbox"]["precision"][j, :, i])
2096 |             if mAPbbox_store is None:
2097 |                 mAPbbox_store = [
2098 |                     get_mAP_v2(metrics["bbox"]["precision"][c, :, i])
2099 |                     for c in range(len(current_classes))
2100 |                 ] #! Just stores the first overlap_mod metrics, for all classes
2101 | 
2102 |                 res_precision = metrics["bbox"]["precision"][:, :, i] #! # classes x 3 x 41 (difficulty x points)
2103 |                 res_recall = metrics["bbox"]["recall"][:, :, i]
2104 |                 gt_box_types = metrics["bbox"]["gt_box_typess"][:, :, i] #! #classes x difficulty, with values being (list of numpy arrays)
2105 |                 dt_box_types = metrics["bbox"]["dt_box_typess"][:, :, i]
2106 |             mAPbbox = ", ".join(f"{v:.2f}" for v in mAPbbox) # ! This is what we care about
2107 |             mAPbev = get_mAP_v2(metrics["bev"]["precision"][j, :, i])
2108 |             mAPbev = ", ".join(f"{v:.2f}" for v in mAPbev)
2109 |             mAP3d = get_mAP_v2(metrics["3d"]["precision"][j, :, i])
2110 |             mAP3d = ", ".join(f"{v:.2f}" for v in mAP3d)
2111 |             result += print_str(
2112 |                 (f"{class_to_name[curcls]} "
2113 |                  "AP(Average Precision)@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j])))
2114 |             result += print_str(f"bbox AP:{mAPbbox}")
2115 |             result += print_str(f"bev  AP:{mAPbev}")
2116 |             result += print_str(f"3d   AP:{mAP3d}")
2117 |             if compute_aos:
2118 |                 mAPaos = get_mAP_v2(metrics["bbox"]["orientation"][j, :, i])
2119 |                 mAPaos = ", ".join(f"{v:.2f}" for v in mAPaos)
2120 |                 result += print_str(f"aos  AP:{mAPaos}")
2121 | 
2122 | 
2123 |     return result, mAPbbox_store, (res_precision, res_recall), (gt_box_types, dt_box_types)
2124 | 
2125 | 
2126 | def get_coco_eval_result(gt_annos,
2127 |                          dt_annos,
2128 |                          current_classes,
2129 |                          z_axis=1,
2130 |                          z_center=1.0):
2131 |     class_to_name = {
2132 |         0: 'Car',
2133 |         1: 'Pedestrian',
2134 |         2: 'Cyclist',
2135 |         3: 'Van',
2136 |         4: 'Person_sitting',
2137 |         5: 'car',
2138 |         6: 'tractor',
2139 |         7: 'trailer',
2140 |     }
2141 |     class_to_range = {
2142 |         0: [0.5, 1.0, 0.05],
2143 |         1: [0.25, 0.75, 0.05],
2144 |         2: [0.25, 0.75, 0.05],
2145 |         3: [0.5, 1.0, 0.05],
2146 |         4: [0.25, 0.75, 0.05],
2147 |         5: [0.5, 1.0, 0.05],
2148 |         6: [0.5, 1.0, 0.05],
2149 |         7: [0.5, 1.0, 0.05],
2150 |     }
2151 |     class_to_range = {
2152 |         0: [0.5, 0.95, 10],
2153 |         1: [0.25, 0.7, 10],
2154 |         2: [0.25, 0.7, 10],
2155 |         3: [0.5, 0.95, 10],
2156 |         4: [0.25, 0.7, 10],
2157 |         5: [0.5, 0.95, 10],
2158 |         6: [0.5, 0.95, 10],
2159 |         7: [0.5, 0.95, 10],
2160 |     }
2161 | 
2162 |     name_to_class = {v: n for n, v in class_to_name.items()}
2163 |     if not isinstance(current_classes, (list, tuple)):
2164 |         current_classes = [current_classes]
2165 |     current_classes_int = []
2166 |     for curcls in current_classes:
2167 |         if isinstance(curcls, str):
2168 |             current_classes_int.append(name_to_class[curcls])
2169 |         else:
2170 |             current_classes_int.append(curcls)
2171 |     current_classes = current_classes_int
2172 |     overlap_ranges = np.zeros([3, 3, len(current_classes)])
2173 |     for i, curcls in enumerate(current_classes):
2174 |         overlap_ranges[:, :, i] = np.array(
2175 |             class_to_range[curcls])[:, np.newaxis]
2176 |     result = ''
2177 |     # check whether alpha is valid
2178 |     compute_aos = False
2179 |     for anno in dt_annos:
2180 |         if anno['alpha'].shape[0] != 0:
2181 |             if anno['alpha'][0] != -10:
2182 |                 compute_aos = True
2183 |             break
2184 |     mAPbbox, mAPbev, mAP3d, mAPaos = do_coco_style_eval(
2185 |         gt_annos,
2186 |         dt_annos,
2187 |         current_classes,
2188 |         overlap_ranges,
2189 |         compute_aos,
2190 |         z_axis=z_axis,
2191 |         z_center=z_center)
2192 |     for j, curcls in enumerate(current_classes):
2193 |         # mAP threshold array: [num_minoverlap, metric, class]
2194 |         # mAP result: [num_class, num_diff, num_minoverlap]
2195 |         o_range = np.array(class_to_range[curcls])[[0, 2, 1]]
2196 |         o_range[1] = (o_range[2] - o_range[0]) / (o_range[1] - 1)
2197 |         result += print_str((f"{class_to_name[curcls]} "
2198 |                              "coco AP@{:.2f}:{:.2f}:{:.2f}:".format(*o_range)))
2199 |         result += print_str((f"bbox AP:{mAPbbox[j, 0]:.2f}, "
2200 |                              f"{mAPbbox[j, 1]:.2f}, "
2201 |                              f"{mAPbbox[j, 2]:.2f}"))
2202 |         result += print_str((f"bev  AP:{mAPbev[j, 0]:.2f}, "
2203 |                              f"{mAPbev[j, 1]:.2f}, "
2204 |                              f"{mAPbev[j, 2]:.2f}"))
2205 |         result += print_str((f"3d   AP:{mAP3d[j, 0]:.2f}, "
2206 |                              f"{mAP3d[j, 1]:.2f}, "
2207 |                              f"{mAP3d[j, 2]:.2f}"))
2208 |         if compute_aos:
2209 |             result += print_str((f"aos  AP:{mAPaos[j, 0]:.2f}, "
2210 |                                  f"{mAPaos[j, 1]:.2f}, "
2211 |                                  f"{mAPaos[j, 2]:.2f}"))
2212 |     return result


--------------------------------------------------------------------------------
/detection_toolbox/kitti/kitti_label.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import imgaug
  3 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
  4 | import math
  5 | 
  6 | class SingleLabel(object):
  7 |     # __slots__ = ['gt', 'garbage', 'type', 'truncation', 'occlusion', 'alpha',
  8 |     #              'xmin', 'ymin', 'xmax', 'ymax', 'box2d',
  9 |     #              'h', 'w', 'l', 't', 'ry', 'score',
 10 |     #              'distance', 'num_points']
 11 |     def __init__(self, label_line, gt):
 12 |         self.gt = gt
 13 |         self._process_label_line(label_line)
 14 |     
 15 |     def _process_label_line(self, label_line):
 16 |         self.garbage = False
 17 |         split = label_line.split(" ")
 18 |         if len(split) == 15:
 19 |             assert self.gt
 20 |         elif len(split) == 16:
 21 |             assert not self.gt
 22 |         else:
 23 |             self.garbage = True #! Garbage
 24 |             return
 25 |         
 26 |         
 27 |         split[1:] = [float(x) for x in split[1:]]
 28 | 
 29 |         self.type = split[0] # 'Car', 'Pedestrian', ...
 30 |         self.truncation = split[1] # truncated pixel ratio [0..1]
 31 |         self.occlusion = split[2] # For KITTI dataset, int 0, 1, 2, 3. For other, just float
 32 |         self.alpha = split[3] # object observation angle [-pi..pi]
 33 | 
 34 |         # extract 2d bounding box in 0-based coordinates
 35 |         self.xmin = split[4] # left
 36 |         self.ymin = split[5] # top
 37 |         self.xmax = split[6] # right
 38 |         self.ymax = split[7] # bottom
 39 |         self.height2d = self.ymax - self.ymin
 40 |         self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax])
 41 |         
 42 |         # extract 3d bounding box information
 43 |         self.h = split[8] # box height
 44 |         self.w = split[9] # box width
 45 |         self.l = split[10] # box length (in meters)
 46 |         self.t = (split[11],split[12],split[13]) # location (x,y,z) in rect. camera coord.
 47 |         self.tx = self.t[0]
 48 |         self.ty = self.t[1]
 49 |         self.tz = self.t[2]
 50 | 
 51 |         self.ry = split[14] # yaw angle (around Y-axis in rect. camera coordinates) [-pi..pi]
 52 | 
 53 |         if len(split) == 16:
 54 |             self.score = split[15] #! If DT, include score as well.
 55 | 
 56 |         self.distance = np.sqrt(self.t[0] ** 2 + self.t[1] ** 2 + self.t[2] ** 2)
 57 |         self.num_points = None #! has to be calculated later
 58 | 
 59 |     '''
 60 |     Returns corners of 3d bbox in rect camera coords. (8, 3) np array.
 61 |     '''
 62 |     def compute_box_3d(self):
 63 |         # compute rotational matrix around yaw axis
 64 |         R = roty(self.ry)    
 65 | 
 66 |         # 3d bounding box dimensions
 67 |         l = self.l
 68 |         w = self.w
 69 |         h = self.h
 70 |         
 71 |         # 3d bounding box corners
 72 |         x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2]
 73 |         y_corners = [0,0,0,0,-h,-h,-h,-h]
 74 |         z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2]
 75 |         
 76 |         # rotate and translate 3d bounding box
 77 |         corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
 78 | 
 79 |         corners_3d[0,:] = corners_3d[0,:] + self.t[0]
 80 |         corners_3d[1,:] = corners_3d[1,:] + self.t[1]
 81 |         corners_3d[2,:] = corners_3d[2,:] + self.t[2]
 82 | 
 83 |         return corners_3d.T
 84 | 
 85 |     '''
 86 |     Converts to line (original label) form, without newline character
 87 |     '''
 88 |     def to_line(self):
 89 |         line = "{} {} {} {} {} {} {} {} {} {} {} {} {} {} {}".format(
 90 |             self.type, self.truncation, self.occlusion, self.alpha,
 91 |             self.xmin, self.ymin, self.xmax, self.ymax,
 92 |             self.h, self.w, self.l,
 93 |             self.t[0], self.t[1], self.t[2],
 94 |             self.ry
 95 |         )
 96 |         if hasattr(self, "score"):
 97 |             line += " {}".format(self.score)
 98 |         return line
 99 | 
100 |     '''
101 |     Returns 2d bbox in imgaug BoundingBox format
102 |     '''
103 |     def get_imgaug_bbox(self):
104 |         return BoundingBox(
105 |             x1=self.xmin, y1=self.ymin, x2=self.xmax, y2=self.ymax
106 |         )
107 | 
108 | 
109 | class KittiLabel(object):
110 |     '''
111 |     view is an int (could also be None if gt is False)
112 |     gt is boolean
113 |     idx is the {idx}.txt this label came from
114 | 
115 |     num_points_file_path is optional - it has # of points of the non-garbage labels, one on each line
116 |     #! Careful: the order/number of this is dependent on which labels were actually inside KittiLabel when this file
117 |     #! was generated. And which labels were inside KittiLabel depends on _read_label_from_file.
118 |     #! NMS might remove some objects, but num_points only matters for gt, and we don't run NMS on gt. 
119 |     '''
120 |     def __init__(self, label_file_path, view, gt, idx, filter_truncation_1=True, num_points_file_path=None):
121 |         self.view = view
122 |         self.gt = gt
123 |         self.idx = idx
124 |         self._read_label_from_file(label_file_path, filter_truncation_1)
125 | 
126 |         if num_points_file_path is not None:
127 |             self._read_label_from_file(num_points_file_path)
128 |         
129 |     #! if filter_truncation_1 is True, gets rid of labels with truncation = 1 (100%)
130 |     def _read_label_from_file(self, label_file_path, filter_truncation_1=True):
131 |         labels = [SingleLabel(line.strip(), self.gt) for line in open(label_file_path, "r").readlines()]
132 | 
133 |         labels = list(filter(lambda l: (not l.garbage) and (not filter_truncation_1 or l.truncation != 1), labels))
134 | 
135 |         self.labels = labels
136 |     
137 |     def _read_num_points_file_path(self, num_points_file_path):
138 |         num_points = [int(line.strip()) for line in open(num_points_file_path, "r").readlines()]
139 | 
140 |         self.add_label_attribute("num_points", num_points)
141 |         
142 |         # assert len(num_points) == len(self.labels)
143 | 
144 |         # for label_ind, label in enumerate(self.labels):
145 |         #     label.num_points = num_points[label_ind]
146 | 
147 |     #! remove labels with score < score_thresh
148 |     #! Returns num removed
149 |     def filter_score(self, score_thresh):
150 |         assert not self.gt
151 |         prev_num = len(self.labels)
152 |         self.labels = list(filter(lambda l: l.score >= score_thresh, self.labels))
153 |         return len(self.labels) - prev_num
154 | 
155 |     '''
156 |     Writes contents of label to file
157 |     Assumes that relevant folders are created
158 |     returns passed in write_file_path
159 |     '''
160 |     def write_to_file(self, write_file_path):
161 |         with open(write_file_path, "w+") as f:
162 |             for label in self.labels:
163 |                 f.write(label.to_line() + "\n")
164 |         return write_file_path
165 | 
166 |     '''
167 |     Writes contents of label.num_points to file, one on each line.
168 |     '''
169 |     def write_num_points_to_file(self, write_file_path):
170 |         with open(write_file_path, "w+") as f:
171 |             for label in self.labels:
172 |                 f.write(str(label.num_points) + "\n")
173 |         return write_file_path
174 | 
175 |     '''
176 |     Computes # of points in each 3d box and saves them into self.labels' num_points attribute
177 |     pc_rect is n x (3 or 4) numpy array of point cloud in rect. coords.
178 |     '''
179 |     def compute_num_points_inside_3d_box(self, pc_rect):
180 |         from detection_toolbox.utils_3d.utils import extract_pc_in_box3d
181 |         if pc_rect.shape[-1] != 3:
182 |             pc_rect = pc_rect[:, :3]
183 |         
184 |         for label in self.labels:
185 |             corners_3d_rect = label.compute_box_3d()
186 |             pc_in_box, _ = extract_pc_in_box3d(pc_rect, corners_3d_rect)
187 |             label.num_points = pc_in_box.shape[0]
188 | 
189 | 
190 |     '''
191 |     Returns a dict in format 
192 |     {
193 |         'name': [],
194 |         'truncated': [],
195 |         'occluded': [],
196 |         'alpha': [],
197 |         'bbox': [],
198 |         'dimensions': [],
199 |         'location': [],
200 |         'rotation_y': [],
201 |     }
202 |     '''
203 |     def get_annotation_dict(self):
204 |         res = dict()
205 | 
206 |         res['name'] = np.array([label.type for label in self.labels])
207 |         res['truncated'] = np.array([label.truncation for label in self.labels])
208 |         res['occluded'] = np.array([label.occlusion for label in self.labels])
209 |         res['alpha'] = np.array([label.alpha for label in self.labels])
210 |         res['bbox'] = np.array([label.box2d for label in self.labels]).reshape(-1, 4)
211 |         #? l, h, w is not read-in order
212 |         res['dimensions'] = np.array([[label.l, label.h, label.w] for label in self.labels]).reshape(-1, 3) 
213 |         res['location'] = np.array([label.t for label in self.labels]).reshape(-1, 3)
214 |         res['rotation_y'] = np.array([label.ry for label in self.labels])
215 | 
216 |         if not self.gt:
217 |             res['score'] = np.array([label.score for label in self.labels])
218 | 
219 |         return res
220 | 
221 |     '''
222 |     if gt:
223 |         {
224 |             'distance': [],
225 |             'num_points': [], # fills in with 100k if they haven't been calculated yet
226 |         }
227 |     '''
228 |     def get_extra_info(self):
229 |         res = dict()
230 |         if self.gt:
231 |             res['distance'] = np.array([label.distance for label in self.labels])
232 |             res['num_points'] = np.array([
233 |                 (label.num_points if label.num_points is not None else 100000)
234 |                 for label in self.labels
235 |             ])
236 |             return res
237 |         else:
238 |             res['distance'] = np.array([label.distance for label in self.labels])
239 |             return res
240 | 
241 |     '''
242 |     Returns bboxes in imgaug BoundingBoxesOnImage format
243 |     TODO: have some mechanisms for gt vs not gt, filtering, labels, etc
244 |     '''
245 |     def get_imgaug_bboxes(self, img_shape=(720, 1920)):
246 |         bboxes = [label.get_imgaug_bbox() for label in self.labels]
247 |         return BoundingBoxesOnImage(bboxes, shape=img_shape)
248 | 
249 |     #! Adds attribute of attribute name to each label
250 |     #! Attribute vals should be a list
251 |     def add_label_attribute(self, attribute_name, attribute_vals):
252 |         assert len(self.labels) == len(attribute_vals)
253 | 
254 |         for label, attribute_val in zip(self.labels, attribute_vals):
255 |             setattr(label, attribute_name, attribute_val)
256 | 
257 | 
258 |     def __len__(self):
259 |         return len(self.labels)
260 | 
261 |     def __iter__(self):
262 |         return iter(self.labels)
263 | 
264 | 
265 | def rotx(t):
266 |     ''' 3D Rotation about the x-axis. '''
267 |     c = np.cos(t)
268 |     s = np.sin(t)
269 |     return np.array([[1,  0,  0],
270 |                      [0,  c, -s],
271 |                      [0,  s,  c]])
272 | 
273 | 
274 | def roty(t):
275 |     ''' Rotation about the y-axis. '''
276 |     c = np.cos(t)
277 |     s = np.sin(t)
278 |     return np.array([[c,  0,  s],
279 |                      [0,  1,  0],
280 |                      [-s, 0,  c]])
281 | 
282 | 
283 | def rotz(t):
284 |     ''' Rotation about the z-axis. '''
285 |     c = np.cos(t)
286 |     s = np.sin(t)
287 |     return np.array([[c, -s,  0],
288 |                      [s,  c,  0],
289 |                      [0,  0,  1]])
290 | 


--------------------------------------------------------------------------------
/detection_toolbox/kitti/kitti_object.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | 
  5 | import functools
  6 | 
  7 | from .calibration import Calibration
  8 | from .kitti_label import KittiLabel
  9 | 
 10 | 
 11 | class Kitti(object):
 12 |     '''
 13 |     calib_dir: directory with all the calib files.
 14 |     image_dir: directory with folders image_0, image_1, ..., each with images
 15 |     gt_label_dir: directory with folders label_0, label_1, ..., each with gt labels
 16 |     dt_label_dir: Either:
 17 |         1) directory with folders label_0, label_1, ..., each with dt labels
 18 |         2) directory with dt labels 000000.txt, ..., 
 19 |     lidar_dir: directory with lidar files
 20 |     depthmap_dir: directory with folders depth_0, depth_1, ..., each with depthmaps
 21 |     '''
 22 |     def __init__(
 23 |         self,
 24 |         calib_dir=None,
 25 |         image_dir=None,
 26 |         gt_label_dir=None,
 27 |         dt_label_dir=None,
 28 |         lidar_dir=None,
 29 |         depthmap_dir=None
 30 |     ):
 31 |         self.calib_dir = calib_dir
 32 |         self.image_dir = image_dir
 33 |         self.gt_label_dir = gt_label_dir
 34 |         self.dt_label_dir = dt_label_dir
 35 |         self.lidar_dir = lidar_dir
 36 |         self.depthmap_dir = depthmap_dir
 37 | 
 38 |     #! Returns Calibration Object
 39 |     def get_calib(self, idx):
 40 |         if self.calib_dir is None:
 41 |             raise Exception("calib_dir not provided")
 42 |         else:
 43 |             return Calibration(os.path.join(self.calib_dir, str(idx).zfill(6) + ".txt"))
 44 | 
 45 |     def get_gt_label(self, view, idx, filter_truncation_1=True):
 46 |         if self.gt_label_dir is None:
 47 |             raise Exception("gt_label_dir not provided")
 48 |         else:
 49 |             return KittiLabel(
 50 |                 os.path.join(
 51 |                     os.path.join(self.gt_label_dir, "label_{}".format(view)), 
 52 |                     str(idx).zfill(6) + ".txt"
 53 |                 ),
 54 |                 view=view,
 55 |                 gt=True,
 56 |                 idx=idx,
 57 |                 filter_truncation_1=filter_truncation_1
 58 |             )
 59 | 
 60 |     '''
 61 |     Either pass in a view = 0, 1, 2, 3, 4, then it goes to label_{view} inside self.dt_label_dir
 62 |     or pass in view = None, then it directly looks for {idx}.txt inside self.dt_label_dir
 63 |     '''
 64 |     def get_dt_label(self, view, idx):
 65 |         if self.dt_label_dir is None:
 66 |             raise Exception("dt_label_dir not provided")
 67 |         elif view is None:
 68 |             return KittiLabel(os.path.join(self.dt_label_dir, str(idx).zfill(6) + ".txt"), view=None, gt=False, idx=idx)
 69 |         else:
 70 |             return KittiLabel(
 71 |                 os.path.join(
 72 |                     os.path.join(self.dt_label_dir, "label_{}".format(view)), 
 73 |                     str(idx).zfill(6) + ".txt"
 74 |                 ),
 75 |                 view=view,
 76 |                 gt=False,
 77 |                 idx=idx
 78 |             )
 79 | 
 80 |     #! BGR Format
 81 |     def get_image(self, view, idx):
 82 |         if self.image_dir is None:
 83 |             raise Exception("image_dir not provided")
 84 |         else:
 85 |             return cv2.imread(
 86 |                 os.path.join(
 87 |                     os.path.join(self.image_dir, "image_{}".format(view)), 
 88 |                     str(idx).zfill(6) + ".png"
 89 |                 )
 90 |             )
 91 | 
 92 |     #! Returns lidar in velodyne format, n x 4
 93 |     def get_lidar(self, idx):
 94 |         if self.lidar_dir is None:
 95 |             raise Exception("lidar_dir not provided")
 96 |         else:
 97 |             return np.fromfile(os.path.join(self.lidar_dir, str(idx).zfill(6) + ".bin"), dtype=np.float32).reshape((-1, 4))
 98 | 
 99 |     #! Returns in BGR Format. Likely not ideal
100 |     def get_depthmap(self, view, idx):
101 |         if self.depthmap_dir is None:
102 |             raise Exception("depthmap_dir not provided")
103 |         else:
104 |             return cv2.imread(
105 |                 os.path.join(
106 |                     os.path.join(self.depthmap_dir, "image_{}".format(view)), 
107 |                     str(idx).zfill(6) + ".png"
108 |                 )
109 |             )
110 | 
111 |     #! Returns gt inds
112 |     def get_gt_inds(self):
113 |         if self.gt_label_dir is None:
114 |             raise Exception("gt_label_dir not provided")
115 |         else:
116 |             gt_label_sub_dirs = os.listdir(self.gt_label_dir) #! label_0, label_1, ...
117 |             gt_label_sub_dirs = list(filter(lambda s: "label_" in s, gt_label_sub_dirs))
118 |             each_ind_set = []
119 |             for gt_label_sub_dir in gt_label_sub_dirs:
120 |                 inds = set(list(map(lambda s: s[:-4], os.listdir(os.path.join(self.gt_label_dir, gt_label_sub_dir)))))
121 |                 each_ind_set.append(inds)
122 |             for i in range(len(each_ind_set) - 1): #! Check that each sub dir has the same inds
123 |                 assert (each_ind_set[i] == each_ind_set[i + 1])
124 |             
125 |             return sorted(each_ind_set[0])
126 |     
127 |     #! Returns dt inds
128 |     #! if direct_dir is true, doesn't need directory to have label_0, label_1, etc.
129 |     def get_dt_inds(self, direct_dir=False):
130 |         if self.dt_label_dir is None:
131 |             raise Exception("dt_label_dir not provided")
132 |         elif direct_dir:
133 |             inds = list(map(lambda s: s[:-4], os.listdir(self.dt_label_dir)))
134 |             return sorted(inds)
135 |         else:
136 |             dt_label_sub_dirs = os.listdir(self.dt_label_dir) #! label_0, label_1, ...
137 |             dt_label_sub_dirs = list(filter(lambda s: "label_" in s, dt_label_sub_dirs))
138 |             each_ind_set = []
139 |             for dt_label_sub_dir in dt_label_sub_dirs:
140 |                 inds = set(list(map(lambda s: s[:-4], os.listdir(os.path.join(self.dt_label_dir, dt_label_sub_dir)))))
141 |                 each_ind_set.append(inds)
142 |             for i in range(len(each_ind_set) - 1): #! Check that each sub dir has the same inds
143 |                 assert (each_ind_set[i] == each_ind_set[i + 1])
144 |             
145 |             return sorted(each_ind_set[0])
146 | 
147 |     def get_gt_annotated_image(self, view, idx):
148 |         img = self.get_image(view, idx)
149 |         gt = self.get_gt_label(view, idx)
150 |         return gt.get_imgaug_bboxes().draw_on_image(img, size=3)
151 | 
152 |     def get_dt_annotated_image(self, view, idx):
153 |         img = self.get_image(view, idx)
154 |         dt = self.get_dt_label(view, idx)
155 |         return dt.get_imgaug_bboxes().draw_on_image(img, size=3)
156 | 
157 |     '''
158 |     Calculates num_points for each non-garbage label in GT view 
159 |     Saves in save_dir/label_{view}/000000.txt, ...
160 | 
161 |     Only does so for inds in inds argument. If inds argument is None, gets them from get_dt_inds. 
162 |     If self.dt_label_dir was not provided, goes through everything in gt.
163 | 
164 |     nonempty_ok should generally be false unless the views are being done in parallel.
165 |     filter_truncation_1 should be True for regular kitti data.
166 |     filter_truncation_1 should be False for simulation data, where each label file contains annotations for all objects
167 |         with correct 3d coords but potentially wrong 2d bbox
168 |     '''
169 |     def generate_and_save_gt_num_points(self, view, save_dir, inds=None, tqdm=False, nonempty_ok=False, filter_truncation_1=True):
170 |         if tqdm: from tqdm import tqdm
171 |         from detection_toolbox.std import makedirs
172 | 
173 |         save_view_dir = os.path.join(save_dir, "label_{}".format(view))
174 |         makedirs(save_view_dir, exist_ok=True, nonempty_ok=nonempty_ok)
175 | 
176 |         if inds is None:
177 |             if self.dt_label_dir is None:
178 |                 inds = self.get_gt_inds()
179 |             else:
180 |                 inds = self.get_dt_inds()
181 | 
182 |         if tqdm: inds = tqdm(inds)
183 |         import time
184 |         
185 |         for ind in inds:
186 |             gt = self.get_gt_label(view, ind, filter_truncation_1=filter_truncation_1)
187 |             lidar_rect = self.get_calib(ind).project_velo_to_rect(self.get_lidar(ind)[:, :3])
188 |             gt.compute_num_points_inside_3d_box(lidar_rect)
189 |             gt.write_num_points_to_file(os.path.join(save_view_dir, str(ind).zfill(6) + ".txt"))
190 | 
191 |     '''
192 |     Returns a tuple of gt_annos, dt_annos that can be put into "get_official_eval_result"
193 |     Only considers/returns labels corresponding to view argument & inds in dt_label_dir/label_{view}
194 |     '''
195 |     def get_eval_annos(self, view, gt_filter_truncation_1=True, tqdm=False):
196 |         if tqdm: from tqdm import tqdm
197 |         dt_inds = self.get_dt_inds()
198 |         
199 |         gt_annos = []
200 |         dt_annos = []
201 | 
202 |         if tqdm: dt_inds = tqdm(dt_inds)
203 |         for dt_ind in dt_inds:
204 |             gt_annos.append(self.get_gt_label(view, dt_ind, filter_truncation_1=gt_filter_truncation_1).get_annotation_dict())
205 |             dt_annos.append(self.get_dt_label(view, dt_ind).get_annotation_dict())
206 |         
207 |         return gt_annos, dt_annos
208 | 
209 |     def get_eval_extra_info(self, view, gt_filter_truncation_1=True, num_points_dir=None, tqdm=False):
210 |         if tqdm: from tqdm import tqdm
211 |         dt_inds = self.get_dt_inds()
212 | 
213 |         gt_extra_info = []
214 |         dt_extra_info = []
215 |         
216 |         if tqdm: dt_inds = tqdm(dt_inds)
217 |         for dt_ind in dt_inds:
218 |             gt = self.get_gt_label(view, dt_ind, filter_truncation_1=gt_filter_truncation_1)
219 |             if num_points_dir is not None:
220 |                 if functools.reduce(lambda a,b: a or b, map(lambda s: "label" in s, os.listdir(num_points_dir))):
221 |                     num_points_file_path = os.path.join(num_points_dir, "{}/{}.txt".format(os.listdir(num_points_dir)[0], dt_ind))
222 |                 else:
223 |                     num_points_file_path = os.path.join(num_points_dir, "{}.txt".format(dt_ind))
224 |                 gt._read_num_points_file_path(num_points_file_path=num_points_file_path)
225 | 
226 |             gt_extra_info.append(gt.get_extra_info())
227 |             dt_extra_info.append(self.get_dt_label(view, dt_ind).get_extra_info())
228 | 
229 |         return gt_extra_info, dt_extra_info


--------------------------------------------------------------------------------
/detection_toolbox/kitti/nms.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | REQUIRES DETECTRON2 TO BE INSTALLED
 3 | https://github.com/facebookresearch/detectron2/blob/de098423c675dad38c23110407926ccf2919474d/detectron2/layers/nms.py#L101
 4 | '''
 5 | 
 6 | '''
 7 | Takes in a list of KittiLabel classes, and returns a list of KittiLabel classes after doing NMS with iou_threshold in bev
 8 | over all the KittiLabel classes together.
 9 | '''
10 | def bev_nms(kitti_labels, iou_threshold):
11 |     from detectron2.layers import batched_nms_rotated
12 |     import numpy as np
13 |     import torch
14 | 
15 |     #? NOTE: This might be different from elsewhere. However, does not matter because this CATEGORY_TO_IDX will never
16 |     #? have any influence outside this function.
17 |     CATEGORY_TO_IDX = {
18 |         "Car": 0,
19 |         "Pedestrian": 1,
20 |         "Cyclist": 2,
21 |         "Motorcycle": 3,
22 |         "Undefined": 4
23 |     }
24 |     boxes = []
25 |     scores = []
26 |     idxs = []
27 | 
28 |     #! For each label, maps its index in boxes (and scores, idx) to 
29 |     #! -> a tuple (index of its parent (view) in kitti_labels, its index inside its kitti_label)
30 |     overall_idx_to_label_idx = dict()
31 |     curr_overall_idx = 0
32 |     for kitti_label_idx, kitti_label in enumerate(kitti_labels):
33 |         for label_idx, label in enumerate(kitti_label):
34 |             # should be (x_ctr, y_ctr, width, height, angle_degrees)
35 |             boxes.append([
36 |                 label.t[0], label.t[2], label.l, label.w, label.ry * (180.0 / np.pi)
37 |             ])
38 |             scores.append(label.score)
39 |             idxs.append(CATEGORY_TO_IDX[label.type])
40 | 
41 |             overall_idx_to_label_idx[curr_overall_idx] = (kitti_label_idx, label_idx)
42 |             curr_overall_idx += 1
43 | 
44 |     if len(boxes) == 0: #! No detections
45 |         return kitti_labels
46 | 
47 |     boxes = torch.FloatTensor(boxes).to("cuda")
48 |     scores = torch.FloatTensor(scores).to("cuda")
49 |     idxs = torch.LongTensor(idxs).to("cuda")
50 | 
51 |     #! Performs per-class nms
52 |     resulting_box_inds = batched_nms_rotated(
53 |         boxes,
54 |         scores,
55 |         idxs,
56 |         iou_threshold
57 |     )
58 |     keep_inds = [[] for i in range(len(kitti_labels))]
59 |     for overall_idx in resulting_box_inds.cpu().tolist():
60 |         kitti_label_idx, label_idx = overall_idx_to_label_idx[overall_idx]
61 |         keep_inds[kitti_label_idx].append(label_idx)
62 | 
63 |     for kitti_label_idx, kitti_label in enumerate(kitti_labels):
64 |         kitti_label.labels = [kitti_label.labels[i] for i in keep_inds[kitti_label_idx]]
65 | 
66 |     del boxes, scores, idxs, resulting_box_inds
67 |     
68 |     return kitti_labels


--------------------------------------------------------------------------------
/detection_toolbox/std/__init__.py:
--------------------------------------------------------------------------------
1 | from .log import dprint
2 | from .os import makedirs


--------------------------------------------------------------------------------
/detection_toolbox/std/log.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | __all__ = ["dprint"]
 4 | 
 5 | '''
 6 | Adds a timestamp
 7 | '''
 8 | def dprint(*args):
 9 |     now = datetime.now()
10 |     print('[{:02d}:{:02d}:{:02d}]: {}'.format(now.hour, now.minute, now.second, " ".join(map(lambda s: str(s), args))))


--------------------------------------------------------------------------------
/detection_toolbox/std/os.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | __all__ = ["makedirs"]
 4 | 
 5 | '''
 6 | Creates dir_path (and all intermediate directories)
 7 | Note that exist_ok=False => nonempty_ok=False
 8 | If: exist_ok, nonempty_ok
 9 |     False     _          : same as os.makedirs(dir_path, exist_ok=False)
10 |     True      False      : dir_path can exist, but it must be empty
11 |     True      True       : dir_path can exist, and it can be non-empty
12 | '''
13 | def makedirs(dir_path, exist_ok=False, nonempty_ok=False):
14 |     if os.path.isdir(dir_path): #! exists already
15 |         if not exist_ok:
16 |             raise Exception("{} already exists".format(dir_path))
17 |         else:
18 |             if len(os.listdir(dir_path)) != 0: #! nonempty
19 |                 if not nonempty_ok:
20 |                     raise Exception("{} is not empty".format(dir_path))
21 |                 else:
22 |                     return dir_path
23 |             else: #! exists, but is empty
24 |                 return dir_path
25 |     else: #! does not exist
26 |         os.makedirs(dir_path, exist_ok=exist_ok)
27 |         return dir_path


--------------------------------------------------------------------------------
/detection_toolbox/utils_3d/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | def in_hull(p, hull):
 7 |     from scipy.spatial import Delaunay
 8 |     if not isinstance(hull,Delaunay):
 9 |         hull = Delaunay(hull)
10 |     return hull.find_simplex(p)>=0
11 | 
12 | '''
13 | pc is point cloud, box3d are corners
14 | '''
15 | def extract_pc_in_box3d(pc, box3d):
16 |     ''' pc: (N,3), box3d: (8,3) '''
17 |     box3d_roi_inds = in_hull(pc[:,0:3], box3d)
18 |     return pc[box3d_roi_inds,:], box3d_roi_inds
19 | 
20 | 
21 | def get_lidar_in_image_fov(pc_rect, calib, view, xmin, ymin, xmax, ymax,
22 |                         return_more=False, clip_distance=.1):
23 |     ''' Filter lidar points, keep those in image FOV '''
24 |     pts_2d = calib.project_rect_to_image(pc_rect)
25 |     fov_inds = (pts_2d[:,0]<xmax) & (pts_2d[:,0]>=xmin) & \
26 |         (pts_2d[:,1]<ymax) & (pts_2d[:,1]>=ymin)
27 | 
28 |     clip_filter = None
29 |     if view == 0:
30 |         clip_filter = pc_rect[:, 2] < -clip_distance
31 |     elif view == 1:
32 |         clip_filter = pc_rect[:, 0] < -clip_distance
33 |     elif view == 2:
34 |         clip_filter = pc_rect[:, 2] > clip_distance
35 |     elif view == 4:
36 |         clip_filter = pc_rect[:, 0] > clip_distance
37 | 
38 |     
39 |     fov_inds = fov_inds & clip_filter
40 |     imgfov_pc_rect = pc_rect[fov_inds,:]
41 |     if return_more:
42 |         return imgfov_pc_velo, pts_2d, fov_inds
43 |     else:
44 |         return imgfov_pc_velo


--------------------------------------------------------------------------------
/detection_toolbox/vis/vis2d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tqdm import tqdm
 3 | import time
 4 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
 5 | 
 6 | '''
 7 | For details, reference vis3d draw_3d_boxes_from_objects_advanced
 8 | '''
 9 | def draw_2d_boxes_from_objects_advanced(
10 | 	objects,
11 | 	calib,
12 | 	img,
13 | 	default_color=(0, 1, 0), #! default color is green
14 | 	color_func=None,
15 | 	text_func=None,
16 | 	size=3
17 | ):
18 | 	color_dict = dict()
19 | 
20 | 	for label_ind, label in enumerate(objects):
21 | 		if color_func is not None:
22 | 			color = color_func(label_ind, label)
23 | 			if color is None:
24 | 				continue
25 | 		else:
26 | 			color = default_color
27 | 
28 | 		if text_func is not None:
29 | 			text = text_func(label_ind, label)
30 | 			if text is "":
31 | 				text = None
32 | 		else:
33 | 			text = None
34 | 
35 | 		bbox = label.get_imgaug_bbox()
36 | 		bbox.label = text
37 | 
38 | 		if color not in color_dict.keys():
39 | 			color_dict[color] = {
40 | 				"boxes": []
41 | 			}
42 | 		
43 | 		color_dict[color]['boxes'].append(bbox)
44 | 
45 | 	for color, val in color_dict.items():
46 | 		bboxes = BoundingBoxesOnImage(val['boxes'], shape=img.shape[:2])
47 | 		#! flip color tuple b/c img is bgr and provided color is rgb
48 | 		img = bboxes.draw_on_image(img, color=tuple(int(i * 255) for i in color)[::-1], size=size) 
49 | 
50 | 	return img


--------------------------------------------------------------------------------
/detection_toolbox/vis/vis3d.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tqdm import tqdm
  3 | import time
  4 | 
  5 | '''
  6 | pc is n x (at least 3)
  7 | '''
  8 | def draw_lidar(
  9 | 	pc, 
 10 | 	ptcolor=(1, 1, 1), 
 11 | 	fig=None,
 12 | 	bgcolor=(0, 0, 0), 
 13 | 	fig_size=(8000, 4000),
 14 | 	draw_range_squares=False #! if you want to also see square boxes at 40, 80, 120 meters
 15 | ):
 16 | 	from mayavi import mlab
 17 | 	mlab.options.offscreen = True
 18 | 
 19 | 	if fig is None:
 20 | 		fig = mlab.figure(bgcolor=bgcolor, size=fig_size)
 21 | 
 22 | 	#! Draw origin & axes
 23 | 	mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=1)
 24 | 
 25 | 	axis_len = 2.0
 26 | 	#! Each row is the "ending point" of each axis: x, y, z, starting from 0
 27 | 	axes = np.array([
 28 | 		[axis_len, 0., 0.], 
 29 | 		[0., axis_len, 0.],
 30 | 		[0., 0., axis_len],
 31 | 	], dtype=np.float32)
 32 | 	
 33 | 	for axis_ind, axis in enumerate(["x", "y", "z"]):
 34 | 		mlab.plot3d(
 35 | 			[0, axes[axis_ind, 0]], 
 36 | 			[0, axes[axis_ind, 1]], 
 37 | 			[0, axes[axis_ind, 2]], 
 38 | 			color=tuple([int(c) for c in axes[axis_ind] / axis_len]), #!(1,0,0), (0,1,0), (0,0,1),
 39 | 			line_width=4,
 40 | 			tube_radius=None,
 41 | 			figure=fig
 42 | 		)
 43 | 		mlab.text3d(
 44 | 			axes[axis_ind, 0],
 45 | 			axes[axis_ind, 1],
 46 | 			axes[axis_ind, 2],
 47 | 			text=axis,
 48 | 			color=tuple([int(c) for c in axes[axis_ind] / axis_len]),
 49 | 			figure=fig,
 50 | 			scale=(0.5, 0.5, 0.5)
 51 | 		)
 52 | 
 53 | 	if draw_range_squares:
 54 | 		for i in [40, 80, 120]:
 55 | 			mlab.plot3d(
 56 | 				[i, i, -i, -i, i],
 57 | 				[i, -i, -i, i, i],
 58 | 				[0, 0, 0, 0, 0],
 59 | 				color=(0.2, 0.2, 0.2),
 60 | 				line_width=6,
 61 | 				tube_radius=None,
 62 | 				figure=fig
 63 | 			)
 64 | 	
 65 | 	#! Draw point cloud
 66 | 	mlab.points3d(
 67 | 		pc[:,0], 
 68 | 		pc[:,1], 
 69 | 		pc[:,2], 
 70 | 		color=ptcolor, 
 71 | 		mode='point', 
 72 | 		colormap='gnuplot', 
 73 | 		scale_factor=1, 
 74 | 		figure=fig
 75 | 	)
 76 | 
 77 | 	return fig
 78 | 
 79 | '''
 80 | Input:
 81 | 	objects: KittiLabel object
 82 | 	calib: Calibration object
 83 | 	fig: mayavi figure object
 84 | 	
 85 | 	The goal of this function is to display only specific boxes in objects and be able to control what color each box is.
 86 | 	If color_func is None, all boxes are displayed & default_color is used for all.
 87 | 	if color_func is not None, it should be a function that takes in (index in objects.labels, label: SingleLabel) and
 88 | 		returns either "None" or a tuple denoting the color the object should be displayed with.
 89 | 		If it returns None for a label, the box is not displayed for that object/label
 90 | 	text_func has the same inputs, but should output a string or None
 91 | '''
 92 | def draw_3d_boxes_from_objects_advanced(
 93 | 	objects,
 94 | 	calib,
 95 | 	fig,
 96 | 	default_color=(0, 1, 0), #! default color is green
 97 | 	color_func=None,
 98 | 	text_func=None
 99 | ):
100 | 	from mayavi import mlab
101 | 	mlab.options.offscreen = True
102 | 
103 | 	color_dict = dict()
104 | 
105 | 	for label_ind, label in enumerate(objects):
106 | 		if color_func is not None:
107 | 			color = color_func(label_ind, label)
108 | 			if color is None:
109 | 				continue
110 | 		else:
111 | 			color = default_color
112 | 
113 | 		if text_func is not None:
114 | 			text = text_func(label_ind, label)
115 | 			if text is None:
116 | 				text = ""
117 | 		else:
118 | 			text = ""
119 | 
120 | 		corners_3d_rect = label.compute_box_3d() #! Gets corners of 3d box
121 | 		corners_3d_velo = calib.project_rect_to_velo(corners_3d_rect)
122 | 
123 | 		if color not in color_dict.keys():
124 | 			color_dict[color] = {
125 | 				"boxes": [],
126 | 				"texts": [],
127 | 				"tmp": []
128 | 			}
129 | 		
130 | 		color_dict[color]['boxes'].append(corners_3d_velo)
131 | 		color_dict[color]['texts'].append(text)
132 | 		color_dict[color]['tmp'].append(label)
133 | 
134 | 	for color, val in color_dict.items():
135 | 		draw_boxes_3d(val['boxes'], fig, box_color=color, text_color=color, text_list=val['texts'])
136 | 
137 | 	return fig
138 | 
139 | 
140 | '''
141 | #! objects is type KittiLabel
142 | #! calib is type Calibration
143 | #! gt is boolean - whether it's gt boxes or not
144 | gt is drawn in green, dt is drawn in red. dt also writes score
145 | '''
146 | def draw_boxes_from_objects(
147 | 	objects,
148 | 	calib,
149 | 	fig,
150 | 	occ_thresh=0.7,
151 | 	categories=["Car", "Pedestrian", "Cyclist", "Motorcycle", "Undefined"],
152 | 	text_func=None
153 | ):
154 | 	from mayavi import mlab
155 | 	mlab.options.offscreen = True
156 | 
157 | 	default_connections = [
158 | 		(0, 1), (4, 5), (0, 4),
159 | 		(1, 2), (5, 6), (1, 5),
160 | 		(2, 3), (6, 7), (2, 6),
161 | 		(3, 0), (7, 4), (3, 7)
162 | 	] #! If the 8 corners were 0 indexed, these are the connections between them
163 | 
164 | 	all_boxes = []
165 | 	all_text = []
166 | 	real_index = -1 #! keep track of & later display index of object in label file, so we can zoom in later
167 | 
168 | 	for label in objects:
169 | 		real_index += 1
170 | 		if label.occlusion > occ_thresh or label.type not in categories:
171 | 			continue
172 | 		corners_3d_rect = label.compute_box_3d() #! Gets corners of 3d box
173 | 		corners_3d_velo = calib.project_rect_to_velo(corners_3d_rect)
174 | 
175 | 		all_boxes.append(corners_3d_velo)
176 | 
177 | 		if objects.gt:
178 | 			if text_func is None:
179 | 				all_text.append("v{}_{}".format(objects.view, real_index))
180 | 			else:
181 | 				all_text.append(str(text_func(label)))
182 | 		else:
183 | 			if text_func is None:
184 | 				all_text.append("{:.2f}".format(label.score))
185 | 			else:
186 | 				all_text.append(str(text_func(label)))
187 | 
188 | 	if objects.gt:
189 | 		box_color = text_color = (0, 1, 0) #! green gt boxes
190 | 		# all_text = None
191 | 	else:
192 | 		box_color = text_color = (1, 0, 0) #! red dt boxes
193 | 	
194 | 	draw_boxes_3d(all_boxes, fig, box_color=box_color, text_color=text_color, text_list=all_text)
195 | 
196 | 	return fig
197 | 
198 | def draw_boxes_3d(box3d, fig, box_color=(1,1,1), text_color=(1,0,0), text_scale=(.5,.5,.5), text_list=None):
199 | 	from mayavi import mlab
200 | 	mlab.options.offscreen = True
201 | 
202 | 	default_connections = [
203 | 		(0, 1), (4, 5), (0, 4),
204 | 		(1, 2), (5, 6), (1, 5),
205 | 		(2, 3), (6, 7), (2, 6),
206 | 		(3, 0), (7, 4), (3, 7)
207 | 	] #! If the 8 corners were 0 indexed, these are the connections between them
208 | 
209 | 	all_connections = []
210 | 
211 | 	for box_index in range(len(box3d)):
212 | 		b = box3d[box_index]
213 | 		if text_list is not None:
214 | 			text_tmp = text_list[box_index]
215 | 			mlab.text3d(b[4,0], b[4,1], b[4,2], str(text_tmp), scale=text_scale, color=text_color, figure=fig)
216 | 
217 | 		all_connections += default_connections #! Put in connections
218 | 		default_connections = [(a + 8, b + 8) for (a, b) in default_connections] #! Increment default by 8 
219 | 
220 | 	
221 | 	all_corners_3d_velo = np.array(box3d).reshape(-1, 3) # just make it a list of points
222 | 	pts = mlab.points3d(
223 | 		all_corners_3d_velo[:, 0], 
224 | 		all_corners_3d_velo[:, 1], 
225 | 		all_corners_3d_velo[:, 2], 
226 | 		color=box_color, 
227 | 		mode="point", 
228 | 		scale_factor=1
229 | 	)
230 | 	pts.mlab_source.dataset.lines = np.array(all_connections)
231 | 	tube = mlab.pipeline.tube(pts, tube_radius=0.05)
232 | 	tube.filter.radius_factor = 1.
233 | 	mlab.pipeline.surface(tube, color=box_color)
234 | 
235 | 	return fig
236 | 
237 | 
238 | def set_view(fig, azimuth, elevation, distance, focalpoint=[0, 0, 0]):
239 | 	from mayavi import mlab
240 | 	mlab.options.offscreen = True
241 | 	#! view
242 | 	mlab.view(
243 | 		azimuth=azimuth,
244 | 		elevation=elevation,
245 | 		distance=distance,
246 | 		focalpoint=focalpoint,
247 | 		figure=fig
248 | 	)
249 | 	return fig
250 | 
251 | #! Zooms-in the view to zoom_idx-th object
252 | def zoom_view(objects, calib, fig, zoom_idx):
253 | 	from mayavi import mlab
254 | 	mlab.options.offscreen = True
255 | 
256 | 	zoom_object = objects.labels[zoom_idx] #! Object to focus on
257 | 	x, y, z = zoom_object.t #! Center of object in rect camera coord.
258 | 	x_velo, y_velo, z_velo = calib.project_rect_to_velo(np.array([[x, y, z]]))[0]
259 | 
260 | 	curr_azimuth, curr_elevation, curr_distance, _ = mlab.view()
261 | 	curr_x, curr_y, curr_z = spherical_to_cartesian(curr_azimuth, curr_elevation, curr_distance)
262 | 
263 | 	ratio = 10 
264 | 	new_x = curr_x / ratio + x_velo * (ratio - 1) / ratio
265 | 	new_y = curr_y / ratio + y_velo * (ratio - 1) / ratio
266 | 	new_z = curr_z / ratio + z_velo * (ratio - 1) / ratio
267 | 
268 | 	new_azimuth, new_elevation, new_distance = cartesian_to_spherical(new_x, new_y, new_z)
269 | 
270 | 	mlab.view(
271 | 		azimuth=new_azimuth,
272 | 		elevation=new_elevation,
273 | 		distance=new_distance,
274 | 		focalpoint=[x_velo, y_velo, z_velo],
275 | 		figure=fig
276 | 	)
277 | 
278 | 	return fig
279 | 
280 | def spherical_to_cartesian(azimuth, elevation, distance):
281 | 	pi_over_180 = np.pi / 180.0
282 | 	x = distance * np.sin(elevation * pi_over_180) * np.cos(azimuth * pi_over_180)
283 | 	y = distance * np.sin(elevation * pi_over_180) * np.sin(azimuth * pi_over_180)
284 | 	z = distance * np.cos(elevation * pi_over_180)
285 | 
286 | 	return x, y, z
287 | 
288 | def cartesian_to_spherical(x, y, z):
289 | 	pi_below_180 = 180.0 / np.pi
290 | 	distance = np.sqrt(x ** 2 + y ** 2 + z ** 2)
291 | 	azimuth = pi_below_180 * np.arctan(y / x)
292 | 	elevation = pi_below_180 * np.arctan(np.sqrt(x ** 2 + y ** 2) / z)
293 | 
294 | 	return azimuth, elevation, distance


--------------------------------------------------------------------------------