├── 3DIOU_custom.py
├── IOU.py
├── README.md
├── config
    └── config_carla.yaml
├── data_import.py
├── data_import_carla.py
├── explain_figure
    └── overall.png
├── image
    ├── lidar_image_301_in_0.png
    └── lidar_image_366_in_0.png
├── loss.py
├── model.py
├── separation_axis_theorem.py
├── test.py
└── train.py


/3DIOU_custom.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | '''
 5 | IOU calculation is not perfection, its too simple
 6 | YOU NEED TO MODIFY CONVEX
 7 | '''
 8 | import torch
 9 | 
10 | EPSILON = 1e-5
11 | 
12 | def getCornerPoint(bbox):
13 |     x1 = bbox[4]/2 * torch.cos(bbox[-1]) - bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0]
14 |     x2 =-bbox[4]/2 * torch.cos(bbox[-1]) - bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0]
15 |     x3 =-bbox[4]/2 * torch.cos(bbox[-1]) + bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0]
16 |     x4 = bbox[4]/2 * torch.cos(bbox[-1]) + bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0]
17 |     y1 = bbox[4]/2 * torch.sin(bbox[-1]) + bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1]
18 |     y2 =-bbox[4]/2 * torch.sin(bbox[-1]) + bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1]
19 |     y3 =-bbox[4]/2 * torch.sin(bbox[-1]) - bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1]
20 |     y4 = bbox[4]/2 * torch.sin(bbox[-1]) - bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1]
21 |     return torch.tensor([[x1,y1], [x2,y2], [x3,y3], [x4,y4]]), torch.tensor([[x2,y2], [x3,y3], [x4,y4],[x1,y1]])
22 | 
23 | # def CornerInRectangle(c1,bbox):
24 | #     for corner in c1:
25 | 
26 | def getLineEq(c1, c1_s):
27 |     slope = (c1_s[:,1] - c1[:,1])/(c1_s[:,0] - c1[:,0])
28 |     return slope
29 | 
30 | 
31 | def get3DIOU(bbox_1, bbox_2):
32 |     #bbox_1, bbox_2 : size(7), x,y,z,width,length,height,orientation
33 |     c1, c1_s = getCornerPoint(bbox_1)
34 |     c2, c2_s = getCornerPoint(bbox_2)
35 |     slope1 = getLineEq(c1, c1_s)
36 |     slope2 = getLineEq(c2, c2_s)
37 |     c_in_set = []
38 |     for i in range(4):
39 |         x_min = c1[i][0] if c1[i][0] < c1_s[i][0] else c1_s[i][0]
40 |         x_max = c1[i][0] if c1[i][0] > c1_s[i][0] else c1_s[i][0]
41 |         y_min = c1[i][1] if c1[i][1] < c1_s[i][1] else c1_s[i][1]
42 |         y_max = c1[i][1] if c1[i][1] > c1_s[i][1] else c1_s[i][1]
43 |         for j in range(4):
44 |             c_in = 1/(slope2[j] - slope1[i]) * torch.matmul(torch.tensor([[-1.0, 1.0],
45 |                                                                           [-slope2[j], slope1[i]]]),
46 |                                                             torch.tensor([[slope1[i]*c1[i][0] - c1[i][1]],
47 |                                                                           [slope2[i]*c2[i][0] - c2[i][1]]]))
48 |             if x_max - x_min < EPSILON:
49 |                 if c_in[1] > y_min and c_in[1] < y_max:
50 |                     c_in_set.append(c_in)
51 |             elif y_max - y_min < EPSILON:
52 |                 if c_in[0] > x_min and c_in[0] < x_max:
53 |                     c_in_set.append(c_in)
54 |             else:
55 |                 if c_in[0] > x_min and c_in[0] < x_max and c_in[1] > y_min and c_in[1] < y_max:
56 |                     c_in_set.append(c_in)
57 | 
58 |     IOU = 1
59 |     return IOU
60 | 
61 | 


--------------------------------------------------------------------------------
/IOU.py:
--------------------------------------------------------------------------------
  1 | # 3D IoU caculate code for 3D object detection
  2 | # Kent 2018/12
  3 | 
  4 | import numpy as np
  5 | from scipy.spatial import ConvexHull
  6 | from numpy import *
  7 | 
  8 | 
  9 | def polygon_clip(subjectPolygon, clipPolygon):
 10 |     """ Clip a polygon with another polygon.
 11 | 
 12 |     Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
 13 | 
 14 |     Args:
 15 |       subjectPolygon: a list of (x,y) 2d points, any polygon.
 16 |       clipPolygon: a list of (x,y) 2d points, has to be *convex*
 17 |     Note:
 18 |       **points have to be counter-clockwise ordered**
 19 | 
 20 |     Return:
 21 |       a list of (x,y) vertex point for the intersection polygon.
 22 |     """
 23 | 
 24 |     def inside(p):
 25 |         return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0])
 26 | 
 27 |     def computeIntersection():
 28 |         dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]]
 29 |         dp = [s[0] - e[0], s[1] - e[1]]
 30 |         n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
 31 |         n2 = s[0] * e[1] - s[1] * e[0]
 32 |         n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
 33 |         return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3]
 34 | 
 35 |     outputList = subjectPolygon
 36 |     cp1 = clipPolygon[-1]
 37 | 
 38 |     for clipVertex in clipPolygon:
 39 |         cp2 = clipVertex
 40 |         inputList = outputList
 41 |         outputList = []
 42 |         s = inputList[-1]
 43 | 
 44 |         for subjectVertex in inputList:
 45 |             e = subjectVertex
 46 |             if inside(e):
 47 |                 if not inside(s):
 48 |                     outputList.append(computeIntersection())
 49 |                 outputList.append(e)
 50 |             elif inside(s):
 51 |                 outputList.append(computeIntersection())
 52 |             s = e
 53 |         cp1 = cp2
 54 |         if len(outputList) == 0:
 55 |             return None
 56 |     return (outputList)
 57 | 
 58 | 
 59 | def poly_area(x, y):
 60 |     """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """
 61 |     return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
 62 | 
 63 | 
 64 | def convex_hull_intersection(p1, p2):
 65 |     """ Compute area of two convex hull's intersection area.
 66 |         p1,p2 are a list of (x,y) tuples of hull vertices.
 67 |         return a list of (x,y) for the intersection and its volume
 68 |     """
 69 |     inter_p = polygon_clip(p1, p2)
 70 |     if inter_p is not None:
 71 |         hull_inter = ConvexHull(inter_p)
 72 |         return inter_p, hull_inter.volume
 73 |     else:
 74 |         return None, 0.0
 75 | 
 76 | 
 77 | def box3d_vol(corners):
 78 |     ''' corners: (8,3) no assumption on axis direction '''
 79 |     a = np.sqrt(np.sum((corners[0, :] - corners[1, :]) ** 2))
 80 |     b = np.sqrt(np.sum((corners[1, :] - corners[2, :]) ** 2))
 81 |     c = np.sqrt(np.sum((corners[0, :] - corners[4, :]) ** 2))
 82 |     return a * b * c
 83 | 
 84 | 
 85 | def is_clockwise(p):
 86 |     x = p[:, 0]
 87 |     y = p[:, 1]
 88 |     return np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)) > 0
 89 | 
 90 | 
 91 | def box3d_iou(corners1, corners2):
 92 |     ''' Compute 3D bounding box IoU.
 93 | 
 94 |     Input:
 95 |         corners1: numpy array (8,3), assume up direction is negative Y
 96 |         corners2: numpy array (8,3), assume up direction is negative Y
 97 |     Output:
 98 |         iou: 3D bounding box IoU
 99 |         iou_2d: bird's eye view 2D bounding box IoU
100 | 
101 |     todo (kent): add more description on corner points' orders.
102 |     '''
103 |     # corner points are in counter clockwise order
104 |     rect1 = [(corners1[i, 0], corners1[i, 2]) for i in range(3, -1, -1)]
105 |     rect2 = [(corners2[i, 0], corners2[i, 2]) for i in range(3, -1, -1)]
106 | 
107 |     area1 = poly_area(np.array(rect1)[:, 0], np.array(rect1)[:, 1])
108 |     area2 = poly_area(np.array(rect2)[:, 0], np.array(rect2)[:, 1])
109 | 
110 |     inter, inter_area = convex_hull_intersection(rect1, rect2)
111 |     iou_2d = inter_area / (area1 + area2 - inter_area)
112 |     ymax = min(corners1[0, 1], corners2[0, 1])
113 |     ymin = max(corners1[4, 1], corners2[4, 1])
114 | 
115 |     inter_vol = inter_area * max(0.0, ymax - ymin)
116 | 
117 |     vol1 = box3d_vol(corners1)
118 |     vol2 = box3d_vol(corners2)
119 |     iou = inter_vol / (vol1 + vol2 - inter_vol)
120 |     return iou, iou_2d
121 | 
122 | 
123 | # ----------------------------------
124 | # Helper functions for evaluation
125 | # ----------------------------------
126 | 
127 | def get_3d_box(center, box_size, heading_angle):
128 |     ''' Calculate 3D bounding box corners from its parameterization.
129 | 
130 |     Input:
131 |         box_size: tuple of (length,wide,height)
132 |         heading_angle: rad scalar, clockwise from pos x axis
133 |         center: tuple of (x,y,z)
134 |     Output:
135 |         corners_3d: numpy array of shape (8,3) for 3D box cornders
136 |     '''
137 | 
138 |     def roty(t):
139 |         c = np.cos(t)
140 |         s = np.sin(t)
141 |         return np.array([[c, 0, s],
142 |                          [0, 1, 0],
143 |                          [-s, 0, c]])
144 | 
145 |     R = roty(heading_angle)
146 |     l, w, h = box_size
147 |     x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2];
148 |     y_corners = [h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2, -h / 2, -h / 2];
149 |     z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2];
150 |     corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners]))
151 |     corners_3d[0, :] = corners_3d[0, :] + center[0];
152 |     corners_3d[1, :] = corners_3d[1, :] + center[1];
153 |     corners_3d[2, :] = corners_3d[2, :] + center[2];
154 |     corners_3d = np.transpose(corners_3d)
155 |     return corners_3d
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     print('------------------')
160 |     # get_3d_box(center, box_size, heading_angle)
161 |     corners_3d_ground = get_3d_box((2.882992, 1.698800, 20.785644), (1.497255, 1.644981, 3.628938), -1.531692)
162 |     corners_3d_predict = get_3d_box((2.756923, 1.661275, 20.943280), (1.458242, 1.604773, 3.707947), -1.549553)
163 |     print("predict corner is")
164 |     print(corners_3d_predict)
165 |     print("ground corner is")
166 |     print(corners_3d_ground)
167 |     (IOU_3d, IOU_2d) = box3d_iou(corners_3d_predict, corners_3d_ground)
168 |     print(IOU_3d, IOU_2d)  # 3d IoU/ 2d IoU of BEV(bird eye's view)
169 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection
 2 | 
 3 | *Caution: Its unofficial algorithm and under construction. we will make total architecture soon.
 4 | 
 5 | reference: Deep Continuous Fusion for Multi-Sensor 3D Object Detection (Ming Liang et.al. , ECCV 2018)
 6 | 
 7 | ## Overall
 8 | 
 9 | ![](./explain_figure/overall.png)
10 | 
11 | 
12 | 
13 | 
14 | 
15 | ## How to use it
16 | 
17 | ### Data directory 
18 | 
19 | Data parse is in data_import_carla.py. You can change other dataset by changing Dataset class!
20 | 
21 | ### Training
22 | 
23 | ```
24 | $ python3.5 train.py --cuda 0,1 --port 12321 --data carla
25 | ```
26 | 
27 | ### Testing
28 | 
29 | ```
30 | $ python3.5 test.py --cuda 0,1 --port 12321 --data carla
31 | ```
32 | 


--------------------------------------------------------------------------------
/config/config_carla.yaml:
--------------------------------------------------------------------------------
 1 | #train.py configuration
 2 | batch_size: 8
 3 | dataset_name: carla
 4 | cuda_visible_id: "1,2,3,4"
 5 | port_number: "12233"
 6 | saved_model_name: model_
 7 | num_epoch: 60
 8 | learning_rate: 0.0001
 9 | beta1: 0.9
10 | plot_AP_graph: False
11 | 
12 | 
13 | # data_import_carla.py configuration
14 | train_data_dir: /media/mmc-server1/Server1/chanuk/ready_for_journal/dataset/carla_object
15 | test_data_dir: /media/mmc-server1/Server1/chanuk/ready_for_journal/dataset/carla_object/test
16 | max_num_pc: 20000
17 | max_num_bbox: 20
18 | # lidar&voxel configuration
19 | lidar_x_min: 0.0
20 | lidar_x_max: 70.0
21 | lidar_y_min: -30.0
22 | lidar_y_max: 30.0
23 | lidar_z_min: -2.4
24 | lidar_z_max: 0.8
25 | delta: 0.2
26 | voxel_length: 384 # X
27 | voxel_width: 256  # Y
28 | voxel_channel: 32 # Z
29 | # image configuration
30 | image_height: 480
31 | image_width: 640
32 | 
33 | #loss.py configuration
34 | regress_type: 0
35 | regress_loss_gain: 3
36 | positive_range: 5
37 | pos_sample_threshold: 128
38 | neg_sample_threshold: 128
39 | 
40 | # model.py
41 | anchor_bbox_feature: # also used in loss.py
42 |   width: 2.0
43 |   length: 4.0
44 |   height: 1.5
45 |   reduced_scale: 4 # MUST MATCH WITH OUT_FEATURE3 OF LIDAR_MODULE
46 | lidar_module:
47 |   out_feature1: 32
48 |   out_feature2: 64
49 |   out_feature3: 128
50 |   out_feature4: 192
51 |   out_feature5: 256
52 | 
53 |   num_res_block1: 1
54 |   num_res_block2: 2
55 |   num_res_block3: 4
56 |   num_res_block4: 6
57 |   num_res_block5: 6
58 | 
59 | # test.py
60 | nms_iou_score_theshold: 0.01
61 | score_threshold: 0.8


--------------------------------------------------------------------------------
/data_import.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from PIL import Image, ImageDraw
 4 | 
 5 | def getRect(x, y, width, height, angle):
 6 |     rect = np.array([(-width/2, -height/2), (width/2, -height/2),
 7 |                     (width/2, height/2), (-width/2, height/2),
 8 |                      (-width/2, -height/2)])
 9 |     theta = angle
10 |     R = np.array([[np.cos(theta), -np.sin(theta)],
11 |                   [np.sin(theta), np.cos(theta)]])
12 |     offset = np.array([x, y])
13 |     transformed_rect = np.dot(rect, R) + offset
14 |     return transformed_rect
15 | 
16 | def putBoundingBox(lidar_image, reference_bboxes, config, color=1):
17 |     lidar_image_with_bbox = lidar_image.cpu().clone().numpy()
18 |     img = Image.fromarray((255*lidar_image_with_bbox).astype(np.uint8))
19 |     draw = ImageDraw.Draw(img)
20 |     x_scale = int(config["voxel_length"] / (config["lidar_x_max"] - config["lidar_x_min"]))
21 |     y_scale = int(config["voxel_width"] / (config["lidar_y_max"] - config["lidar_y_min"]))
22 |     x_offset = int(-config["lidar_x_min"] * x_scale)
23 |     y_offset = int(-config["lidar_y_min"] * y_scale)
24 |     for bbox in reference_bboxes:
25 |         x = int(bbox[1]*y_scale + y_offset)
26 |         y = int(bbox[0]*x_scale)
27 |         width = bbox[3]*y_scale    # WARNING! IT SHOULD BE SAME SCALE IN X & Y
28 |         height = bbox[4]*x_scale   # WARNING! IT SHOULD BE SAME SCALE IN X & Y
29 |         angle = bbox[6] - 1.57
30 |         rect = getRect(x=x, y=y, width=width, height=height, angle=angle)
31 |         draw.polygon([tuple(p) for p in rect], fill=color)
32 |     lidar_image_with_bbox = np.asarray(img)
33 |     return torch.tensor(lidar_image_with_bbox)
34 | 


--------------------------------------------------------------------------------
/data_import_carla.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | import h5py
  4 | from torch.utils.data import Dataset
  5 | from torchvision.utils import save_image
  6 | from data_import import putBoundingBox
  7 | import time
  8 | import numpy as np
  9 | import quaternion
 10 | 
 11 | class CarlaDataset(Dataset):
 12 |     def __init__(self, config, mode="train",want_bev_image=False):
 13 |         super(CarlaDataset, self).__init__()
 14 |         self.config = config
 15 |         self.hdf5_files = self.load_dataset(mode = mode)
 16 |         self.hdf5_id_dict = self.getIdDict(self.hdf5_files)
 17 |         self.length = 0
 18 |         self.scenario_length = []
 19 |         self.scenario_name = []
 20 |         if (want_bev_image):
 21 |             self.want_bev_image = True
 22 |         else:
 23 |             self.want_bev_image = False
 24 | 
 25 |         for hdf5_file in self.hdf5_files:
 26 |             single_data_scenario = self.hdf5_files[hdf5_file]
 27 |             self.length += len(single_data_scenario)
 28 |             self.scenario_name.append(hdf5_file)
 29 |             self.scenario_length.append(len(single_data_scenario))
 30 | 
 31 |         RT = self.get_extrinsic_parameter()
 32 |         C = self.get_intrinsic_parameter()
 33 |         CRT = np.matmul(C, RT)
 34 |         self.CRT_tensor = torch.tensor(CRT).permute(1,0).type(torch.float)
 35 |         x_scale = int(self.config["voxel_length"] / (self.config["lidar_x_max"] - self.config["lidar_x_min"]))
 36 |         y_scale = int(self.config["voxel_width"] / (self.config["lidar_y_max"] - self.config["lidar_y_min"]))
 37 |         z_scale = int(self.config["voxel_channel"] / (self.config["lidar_z_max"] - self.config["lidar_z_min"]))
 38 |         x_offset = int(-self.config["lidar_x_min"] * x_scale)
 39 |         y_offset = int(-self.config["lidar_y_min"] * y_scale)
 40 |         z_offset = int(-self.config["lidar_z_min"] * z_scale)
 41 |         self.pc_to_voxel_indice = torch.tensor([[x_scale,0,0,x_offset],
 42 |                                                 [0,y_scale,0,y_offset],
 43 |                                                 [0,0,z_scale,z_offset]], dtype=torch.float).permute(1,0)
 44 | 
 45 |     def __len__(self):
 46 |         return self.length
 47 | 
 48 |     def __getitem__(self, idx):
 49 |         idx_for_scenario = idx
 50 |         if idx > self.length or idx < 0:
 51 |             RuntimeError("idx is not in data file")
 52 |             return -1
 53 |         for scenario_file_index in range(len(self.scenario_length)):
 54 |             length = self.scenario_length[scenario_file_index]
 55 |             if (idx_for_scenario - length >= 0):
 56 |                 idx_for_scenario = idx_for_scenario - length
 57 |             else:
 58 |                 file_name = self.scenario_name[scenario_file_index]
 59 |                 data = self.hdf5_files[file_name]
 60 |                 id = self.hdf5_id_dict[file_name][idx_for_scenario].strip()
 61 |                 object_datas, lidar_data, image_data = self.getOneStepData(data, id)
 62 |                 image_data = torch.tensor(image_data).permute(2, 0, 1)
 63 |                 reference_bboxes, num_reference_bboxes = self.arangeLabelData(object_datas)
 64 |                 voxelized_lidar, point_cloud_raw, uv, num_points_raw, indices_for_bev = self.Voxelization_Projection(lidar_data)
 65 |                 if (self.want_bev_image):
 66 |                     bev_image = self.getLidarImage(indices_for_bev)
 67 |                     return {'image': image_data,
 68 |                             'bboxes': reference_bboxes,
 69 |                             "num_bboxes": num_reference_bboxes,
 70 |                             "pointcloud": voxelized_lidar,
 71 |                             "pointcloud_raw": point_cloud_raw,
 72 |                             "projected_loc_uv": uv,
 73 |                             "num_points_raw": num_points_raw,
 74 |                             "lidar_bev_2Dimage": bev_image}
 75 |                 else:
 76 |                     return {'image': image_data,
 77 |                             'bboxes': reference_bboxes,
 78 |                             "num_bboxes": num_reference_bboxes,
 79 |                             "pointcloud_raw":point_cloud_raw,
 80 |                             "projected_loc_uv": uv,
 81 |                             "num_points_raw": num_points_raw,
 82 |                             "pointcloud" : voxelized_lidar}
 83 | 
 84 |     def load_dataset(self, mode = "train"):
 85 |         if mode == "train":
 86 |             label_path = self.config["train_data_dir"]
 87 |         elif mode == "test":
 88 |             label_path = self.config["test_data_dir"]
 89 |         else:
 90 |             print ("ERROR IN MODE TYPE, PRESS [train] OR [test] !!")
 91 |             return -1
 92 |         hdf5_files = {}
 93 |         print("reading hdf5 file...")
 94 |         file_list = os.listdir(label_path)
 95 |         for file in file_list:
 96 |             if file.split('.')[-1] == 'hdf5':
 97 |                 file_dir = os.path.join(label_path, file)
 98 |                 try:
 99 |                     hdf5_files[file] = h5py.File(file_dir, 'r')
100 |                     print(file)
101 |                 except:
102 |                     print(file + ' doesnt work. we except this folder')
103 |         print("reading hdf5 end")
104 |         return hdf5_files
105 | 
106 |     def valid_bbox(self, object_data):
107 |         loc_x = object_data[0]
108 |         loc_y = object_data[1]
109 |         if loc_x >= self.config["lidar_x_min"] and loc_x < self.config["lidar_x_max"] and loc_y >= self.config["lidar_y_min"] and loc_y < self.config["lidar_y_max"]:
110 |             return True
111 |         return False
112 | 
113 |     def orientation_inner_bound(self, ori):
114 |         if ori > 3.141592:
115 |             while(1):
116 |                 ori = ori - 3.141592
117 |                 if ori > 0 and ori < 3.141592:
118 |                     break
119 |         elif ori < 0:
120 |             while(1):
121 |                 ori = ori + 3.141592
122 |                 if ori > 0 and ori < 3.141592:
123 |                     break
124 |         else:
125 |             ori = ori # 3 and 4 should be carefully look whether is pitch or roll
126 |         return ori
127 | 
128 |     def arangeLabelData(self, object_datas):
129 |         """
130 |         uint8 CLASSIFICATION_UNKNOWN=0
131 |         uint8 CLASSIFICATION_UNKNOWN_SMALL=1
132 |         uint8 CLASSIFICATION_UNKNOWN_MEDIUM=2
133 |         uint8 CLASSIFICATION_UNKNOWN_BIG=3
134 |         uint8 CLASSIFICATION_PEDESTRIAN=4
135 |         uint8 CLASSIFICATION_BIKE=5
136 |         uint8 CLASSIFICATION_CAR=6
137 |         uint8 CLASSIFICATION_TRUCK=7
138 |         uint8 CLASSIFICATION_MOTORCYCLE=8
139 |         uint8 CLASSIFICATION_OTHER_VEHICLE=9
140 |         uint8 CLASSIFICATION_BARRIER=10
141 |         uint8 CLASSIFICATION_SIGN=11
142 |         """
143 |         ref_bboxes = torch.zeros(self.config["max_num_bbox"],9)
144 |         i = 0
145 |         for object_data in object_datas:
146 |             if i>self.config["max_num_bbox"]:
147 |                 break
148 |             if not self.valid_bbox(object_data):
149 |                 continue
150 |             object_class = object_data[9]
151 |             if object_class == 6:
152 |                 rel_x = object_data[0]
153 |                 rel_y = object_data[1]
154 |                 rel_z = object_data[2]
155 |                 ori = self.orientation_inner_bound(object_data[5])
156 |                 width = object_data[6]
157 |                 length = object_data[7]
158 |                 height = object_data[8]
159 |                 ref_bboxes[i,:] = torch.tensor([rel_x, rel_y, rel_z, length, width, height, ori, object_class, 1])
160 |                 i+=1
161 |         return ref_bboxes, i
162 | 
163 |     def getOneStepData(self, data, id):
164 |         image_name = 'center_image_data'
165 |         lidar_name = 'lidar_data'
166 |         object_data_name = 'object_data'  # relative position and rotation data
167 |         object_data = torch.tensor(np.array(data[id][object_data_name]))                   # N * 10 (x, y, z, roll, pitch, yaw, wid, len, hei, class)
168 |         lidar_data = torch.tensor(np.array(data[id][lidar_name])).type(torch.float)[:,3:6] # N * 3  (x, y, z)        
169 |         image_data = torch.tensor(np.array(data[id][image_name]))                          # 480*640*3
170 |         
171 |         return object_data, lidar_data, image_data
172 | 
173 |     def getIdDict(self, hdf5_files):
174 |         hdf5_id_dict = {}
175 |         for hdf5_file in hdf5_files:
176 |             data_list = list(hdf5_files[hdf5_file].keys())
177 |             hdf5_id_dict[hdf5_file] = data_list
178 |         return hdf5_id_dict
179 | 
180 |     def get_extrinsic_parameter(self):
181 |         trans = np.zeros((3,1)) # translation is 0, 0, 0
182 |         v_lidar = np.array([  -1.57079633,    3.12042851,   -1.57079633 ])
183 |         v_cam = np.array([  -3.13498819,    1.59196951,    1.56942932 ])
184 |         v_diff = v_cam - v_lidar
185 |         q = quaternion.from_euler_angles(v_diff)
186 |         R_ = quaternion.as_rotation_matrix(q)
187 |         RT = np.concatenate((R_,trans), axis=-1)
188 |         return RT
189 | 
190 |     def get_intrinsic_parameter(self):
191 |         cameraMatrix = np.array([[268.51188197672957, 0.0, 320.0],
192 |                                 [0.0, 268.51188197672957, 240.0], 
193 |                                 [0.0, 0.0, 1.0]])
194 |         return cameraMatrix
195 | 
196 |     def Projection(self, point_cloud_raw):
197 |         ones = torch.ones((point_cloud_raw.shape[0],1))
198 |         xyz_one = torch.cat((point_cloud_raw, ones), dim=-1) # input        
199 |         uv_z = torch.matmul(xyz_one, self.CRT_tensor).permute(1,0)
200 |         uv = uv_z/uv_z[-1]
201 |         uv = uv[:2]
202 |         uv = torch.where(uv[0] > 0, uv, torch.tensor(0).type(torch.float))
203 |         uv = torch.where(uv[0] < self.config["image_height"], uv, torch.tensor(0).type(torch.float))
204 |         uv = torch.where(uv[1] > 0, uv, torch.tensor(0).type(torch.float))
205 |         uv = torch.where(uv[1] < self.config["image_width"], uv, torch.tensor(0).type(torch.float))
206 |         indices = torch.nonzero(uv)
207 |         indices = indices[:int(indices.shape[0]/2),1]
208 |         filtered_points_raw = point_cloud_raw[indices]
209 | 
210 |         return uv.permute(1,0)[indices], filtered_points_raw  
211 | 
212 |     def Voxelization_Projection(self, lidar_data, interpolate=True):
213 |         # Voxelization
214 |         lidar_data = lidar_data.permute(1,0) # 3 * N
215 |         lidar_data = torch.where(lidar_data[0] > self.config["lidar_x_min"],
216 |                                 lidar_data, torch.tensor(0).type(torch.float))
217 |         lidar_data = torch.where(lidar_data[0] < self.config["lidar_x_max"] - self.config["delta"], 
218 |                                 lidar_data, torch.tensor(0).type(torch.float))
219 |         lidar_data = torch.where(lidar_data[1] > self.config["lidar_y_min"], 
220 |                                 lidar_data, torch.tensor(0).type(torch.float))
221 |         lidar_data = torch.where(lidar_data[1] < self.config["lidar_y_max"] - self.config["delta"], 
222 |                                 lidar_data, torch.tensor(0).type(torch.float))
223 |         lidar_data = torch.where(lidar_data[2] > self.config["lidar_z_min"], 
224 |                                 lidar_data, torch.tensor(0).type(torch.float))
225 |         lidar_data = torch.where(lidar_data[2] < self.config["lidar_z_max"] - self.config["delta"], 
226 |                                 lidar_data, torch.tensor(0).type(torch.float))
227 |         valid_indices = torch.nonzero(lidar_data)
228 |         valid_indices = valid_indices[:int(valid_indices.shape[0]/3),1]        
229 |         lidar_data = lidar_data.permute(1,0)[valid_indices]
230 |         lidar_data_ = torch.cat((lidar_data, torch.ones(lidar_data.shape[0],1).type(torch.float)), dim=-1)
231 |         if not interpolate:
232 |             indices = torch.matmul(lidar_data_, self.pc_to_voxel_indice).type(torch.long).permute(1,0)
233 |             lidar_voxel = torch.zeros(self.config["voxel_channel"], self.config["voxel_length"], self.config["voxel_width"])
234 |             lidar_voxel[indices[2],indices[0],indices[1]]=1
235 |         else:
236 |             indices_float = torch.matmul(lidar_data_, self.pc_to_voxel_indice).permute(1,0)
237 |             x = indices_float[0]
238 |             y = indices_float[1]
239 |             z = indices_float[2]
240 |             x_lower = indices_float[0].type(torch.long)
241 |             x_upper = indices_float[0].type(torch.long) + 1
242 |             y_lower = indices_float[1].type(torch.long)
243 |             y_upper = indices_float[1].type(torch.long) + 1
244 |             z_lower = indices_float[2].type(torch.long)
245 |             z_upper = indices_float[2].type(torch.long) + 1
246 |             dx = x - x_lower.type(torch.float)
247 |             dy = y - y_lower.type(torch.float)
248 |             dz = z - z_lower.type(torch.float)
249 |             lidar_voxel = torch.zeros(self.config["voxel_channel"], self.config["voxel_length"], self.config["voxel_width"])
250 |             lidar_voxel[z_lower, x_lower, y_lower] += (1-dx)*(1-dy)*(1-dz)
251 |             lidar_voxel[z_upper, x_lower, y_lower] += (1-dx)*(1-dy)*dz
252 |             lidar_voxel[z_lower, x_upper, y_lower] += dx*(1-dy)*(1-dz)
253 |             lidar_voxel[z_upper, x_upper, y_lower] += dx*(1-dy)*dz
254 |             lidar_voxel[z_lower, x_lower, y_upper] += (1-dx)*dy*(1-dz)
255 |             lidar_voxel[z_upper, x_lower, y_upper] += (1-dx)*dy*dz
256 |             lidar_voxel[z_lower, x_upper, y_upper] += dx*dy*(1-dz)
257 |             lidar_voxel[z_upper, x_upper, y_upper] += dx*dy*dz
258 |             indices = indices_float.type(torch.long)
259 |             
260 |         # Projection
261 |         uv, filtered_points_raw = self.Projection(lidar_data)
262 |         num_point_cloud_raw = filtered_points_raw.shape[0]
263 |         point_cloud_raw_tensor = torch.zeros(self.config["max_num_pc"], 3)
264 |         point_cloud_raw_tensor[:num_point_cloud_raw,:] = filtered_points_raw
265 |         uv_tensor = torch.zeros(self.config["max_num_pc"], 2)
266 |         uv_tensor[:num_point_cloud_raw,:] = uv
267 |         return lidar_voxel, point_cloud_raw_tensor, uv_tensor, num_point_cloud_raw, indices
268 | 
269 |     def getLidarImage(self, indices_for_bev):
270 |         lidar_image = torch.zeros(3, self.config["voxel_length"], self.config["voxel_width"])
271 |         lidar_image[:, indices_for_bev[0], indices_for_bev[1]] = 1
272 |         return lidar_image
273 | 
274 | if __name__ == "__main__":
275 |     os.environ['CUDA_VISIBLE_DEVICES'] = '4'
276 |     dataset = CarlaDataset(mode="test", want_bev_image=True)
277 |     data_loader = torch.utils.data.DataLoader(dataset,
278 |                                           batch_size=2,
279 |                                           shuffle=True)
280 |     start = time.time()
281 |     for batch_ndx, sample in enumerate(data_loader):
282 |         print("total time: ", time.time() - start)
283 |         start = time.time()
284 |         print("batch_ndx is ", batch_ndx)
285 |         print("sample keys are ", sample.keys())
286 |         print("bbox shape is ", sample["bboxes"].shape)
287 |         print("num bboxes is ", sample["num_bboxes"])
288 |         print("image shape is ", sample["image"].shape)
289 |         print("pointcloud shape is ", sample["pointcloud"].shape)
290 |         print("voxel type is ", sample["pointcloud"].type())
291 |         print("bev image shape: ", sample["lidar_bev_2Dimage"].shape)
292 |         bev_image_ = 0.5*sample["lidar_bev_2Dimage"][-1].permute(1,2,0)
293 |         bev_image_with_bbox = putBoundingBox(bev_image_, sample["bboxes"][-1,:sample["num_bboxes"][-1]], color="red").permute(2,0,1).type(torch.float)
294 |         save_image(bev_image_with_bbox, 'image/lidar_image_{}.png'.format(batch_ndx))
295 |         # print(sample["image"][-1].dtype)
296 |         save_image(torch.cat((sample["image"][-1][2:3],sample["image"][-1][1:2],sample["image"][-1][0:1]) ,dim=0).type(torch.float)/256,'image/RGB_image_{}.png'.format(batch_ndx) )
297 |         print("image max, min value is ",torch.max(sample["image"][-1]), torch.min(sample["image"][-1]))
298 |         print("pointcloud_raw shape is ", sample["pointcloud_raw"].shape)
299 |         print("num points is ", sample["num_points_raw"])
300 |         print("projected_loc_uv shape is ", sample["projected_loc_uv"].shape)
301 | 
302 |         print("="*50)
303 |         if batch_ndx >10:
304 |             break
305 |     # print(dataset[len(dataset)-1])


--------------------------------------------------------------------------------
/explain_figure/overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chanuk-Yang/Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection/d85e3618351af1060864d24de6a4af59017da99c/explain_figure/overall.png


--------------------------------------------------------------------------------
/image/lidar_image_301_in_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chanuk-Yang/Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection/d85e3618351af1060864d24de6a4af59017da99c/image/lidar_image_301_in_0.png


--------------------------------------------------------------------------------
/image/lidar_image_366_in_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chanuk-Yang/Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection/d85e3618351af1060864d24de6a4af59017da99c/image/lidar_image_366_in_0.png


--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision.utils import save_image
  4 | 
  5 | import numpy as np
  6 | import random
  7 | import torch.nn.functional as F
  8 | import os
  9 | 
 10 | from IOU import get_3d_box, box3d_iou
 11 | from model import AnchorBoundingBoxFeature
 12 | 
 13 | 
 14 | class LossClass(nn.Module):
 15 |     def __init__(self):
 16 |         super().__init__()
 17 |         self.loss = torch.nn.CrossEntropyLoss()
 18 | 
 19 |     def forward(self, predicted_feature, binary_label):
 20 |         loss = self.loss(predicted_feature, binary_label)
 21 |         return loss
 22 | 
 23 | class LossReg(nn.Module):
 24 |     def __init__(self):
 25 |         super().__init__()
 26 |         self.loss = torch.nn.SmoothL1Loss(reduction="none")
 27 | 
 28 |     def forward(self, predicted_feature, binary_label):
 29 |         loss = self.loss(predicted_feature, binary_label)
 30 |         return loss
 31 | 
 32 | 
 33 | class LossTotal(nn.Module):
 34 |     def __init__(self, config):
 35 |         super().__init__()
 36 |         self.config = config
 37 |         self.loss_class = LossClass()
 38 |         self.loss_regress = LossReg()
 39 |         self.anchor_bbox_feature = AnchorBoundingBoxFeature(config)
 40 |         anchor_set = self.anchor_bbox_feature().cuda()
 41 |         anchor_set_shape = anchor_set.shape
 42 |         self.anchor_set = anchor_set.reshape(2,int(anchor_set_shape[0]/2),
 43 |                                             anchor_set_shape[1],anchor_set_shape[2])
 44 |         self.regress_type = self.config["regress_type"]
 45 | 
 46 |     def forward(self, reference_bboxes_batch, num_ref_bbox_batch, 
 47 |                 predicted_class_feature_batch, predicted_regress_feature_batch):
 48 |         # reference_bboxes : B, max_n(20), 8
 49 |         B, max_num, _ = reference_bboxes_batch.shape
 50 |         total_loss = torch.zeros(1).cuda()
 51 |         self.anchor_set_ = self.anchor_set.unsqueeze(0).repeat(B,1,1,1,1)
 52 |         for b in range(B):
 53 |             reference_bboxes = reference_bboxes_batch[b,:num_ref_bbox_batch[b]]
 54 |             predicted_class_feature = predicted_class_feature_batch[b]
 55 |             predicted_regress_feature = predicted_regress_feature_batch[b]
 56 |             anchor = self.anchor_set_[b]
 57 | 
 58 |             IDX, positive_position_list_all, positive_position_list = self.getPositionOfPositive(anchor,reference_bboxes,
 59 |                                                                                                 sample_threshold=self.config["pos_sample_threshold"])
 60 |             negative_position_list = self.getPositionOfNegative(anchor, positive_position_list,
 61 |                                                                 sample_threshold=self.config["neg_sample_threshold"])
 62 |             # positive_position_list = [[1,2], [3,4], [11,12], [13,14]]
 63 |             # negative_position_list = [[5,6], [7,8], [9,10]]
 64 |             total_loss_class = self.getClassSum(positive_position_list, negative_position_list, predicted_class_feature[:2,:,:], self.loss_class)    # per anchor
 65 |             total_loss_class += self.getClassSum(positive_position_list, negative_position_list, predicted_class_feature[2:4,:,:], self.loss_class)  # per anchor
 66 | 
 67 |             ## anchor의 좌표를 절대 좌표로 바꿔야 한다.
 68 |             ## position_list_all은 positive_position이 다 들어간것, true 하나당 한 픽셀이려면 수정 필요
 69 | 
 70 |             Reg_loss = self.getRegSum(IDX, positive_position_list_all, reference_bboxes, predicted_regress_feature, anchor)
 71 |             total_loss = total_loss_class + self.config["regress_loss_gain"] * Reg_loss
 72 |         return total_loss
 73 | 
 74 |     def getPositionOfPositive(self, anchor_bbox_feature, ref_bboxes, sample_threshold = 128):
 75 |         _, C, H, W = anchor_bbox_feature.shape
 76 |         positive_position_list = []
 77 |         positive_position_regress = []
 78 |         positive_position_idx = {}
 79 |         temp_cnt = 0
 80 |         for i, ref_bbox in enumerate(ref_bboxes):
 81 |             positive_position_idx[i] = []
 82 |             x_scale = int(self.config["voxel_length"] / (self.config["lidar_x_max"] - self.config["lidar_x_min"]))
 83 |             y_scale = int(self.config["voxel_width"] / (self.config["lidar_y_max"] - self.config["lidar_y_min"]))
 84 |             x_offset = int(-self.config["lidar_x_min"] * x_scale)
 85 |             y_offset = int(-self.config["lidar_y_min"] * y_scale)
 86 |             reduced_scale = self.config["anchor_bbox_feature"]["reduced_scale"]
 87 |             point_x = int((ref_bbox[0]*x_scale + x_offset)/reduced_scale)   # (0~ 700/4)
 88 |             point_y = int((ref_bbox[1]*y_scale + y_offset)/reduced_scale)  #(0 ~ 700/4)
 89 |             if point_x < 0 or point_x > H - 1 or point_y < 0 or point_y > W - 1:
 90 |                 continue
 91 |             for x_int in range(self.config["positive_range"]):
 92 |                 pos_x = point_x - int(self.config["positive_range"]/2) + x_int
 93 |                 for y_int in range(self.config["positive_range"]):
 94 |                     pos_y = point_y - int(self.config["positive_range"]/2) + y_int
 95 |                     if pos_x < 0 or pos_x > H - 1 or pos_y < 0 or pos_y > W - 1:
 96 |                         continue
 97 |                     positive_position_list.append([pos_x, pos_y])
 98 |                     if self.regress_type==0:
 99 |                         positive_position_regress.append([pos_x, pos_y])
100 |                         positive_position_idx[i].append(temp_cnt)
101 |                         temp_cnt+=1
102 |                     else:
103 |                         if pos_x == point_x and pos_y == point_y:
104 |                             positive_position_regress.append([pos_x, pos_y]) #중심만 추가하기
105 |                             positive_position_idx[i].append(temp_cnt)
106 |                             temp_cnt+=1
107 |         #     sample_idx = np.random.choice(len(positive_position_list), np.max(sample_threshold, len(positive_position_list)), replace=False)
108 |         np.random.shuffle(positive_position_list)
109 |         if len(positive_position_list) > sample_threshold:
110 |             positive_position_list = positive_position_list[:sample_threshold]
111 |         return positive_position_idx, np.array(positive_position_regress), positive_position_list
112 | 
113 |     def getPositionOfNegative(self, anchor_bbox_feature, positive_position_list, sample_threshold = 128):
114 |         _, C, H, W = anchor_bbox_feature.shape
115 |         negative_position_list = []
116 |         sample = 0
117 |         while(1):
118 |             x = np.random.randint(H)
119 |             y = np.random.randint(W)
120 |             if [x, y] in positive_position_list:
121 |                 continue
122 |             else:
123 |                 negative_position_list.append([x, y])
124 |                 sample += 1
125 |             if sample > sample_threshold:
126 |                 break
127 |         return negative_position_list
128 | 
129 |     def getClassSum(self, positive_position_list, negative_position_list, predicted_class, loss_class):
130 |         positive_size = len(positive_position_list)
131 |         negative_size = len(negative_position_list)
132 |         positive_label = torch.ones((positive_size), dtype=torch.long).cuda()
133 |         negative_label = torch.zeros((negative_size), dtype=torch.long).cuda()
134 |         negative_position = torch.tensor(negative_position_list, dtype=torch.long).cuda()
135 |         positive_position = torch.tensor(positive_position_list, dtype=torch.long).cuda()
136 |         c = predicted_class[:, negative_position[:,0], negative_position[:,1]].permute(1,0)
137 |         if positive_size > 0:
138 |             a = predicted_class[:, positive_position[:,0], positive_position[:,1]].permute(1,0)            
139 |             loss_sum = loss_class(a, positive_label) + loss_class(c, negative_label)
140 |         else:
141 |             loss_sum = loss_class(c, negative_label)
142 |         return loss_sum
143 | 
144 |     def LossReg(self, ref_box, pred_box, a_box):
145 | 
146 |         ### Rel coordinate 기준
147 | 
148 |         # ref_box : [7,]
149 |         # pred_box : [N, 14, ]
150 |         # anchor_box : [N, 2, 7]
151 | 
152 |         N, num_anchor, char = a_box.shape
153 |         ref_box = ref_box.unsqueeze(0).unsqueeze(0).repeat(N,num_anchor,1)
154 |         pred_box = pred_box.reshape(N,num_anchor,char)
155 |         xy_ref_offset = (ref_box[:,:,:2]-a_box[:,:,:2])/torch.sqrt(torch.pow(a_box[:,:,3:4],2) + torch.pow(a_box[:,:,4:5],2))
156 |         z_ref_offset = (ref_box[:,:,2:3]-a_box[:,:,2:3])/(a_box[:,:,5:6])
157 |         whd_ref_offset = torch.log(ref_box[:,:,3:6]/(a_box[:,:,3:6]))
158 |         ori_ref_offset = torch.atan2(torch.sin(ref_box[:,:,6] - a_box[:,:,-1]), torch.cos(ref_box[:,:,6] - a_box[:,:,-1]))
159 |         ref_offset = torch.cat([xy_ref_offset, z_ref_offset, whd_ref_offset, ori_ref_offset.unsqueeze(-1)], dim=-1)  #[N,2,7]
160 | 
161 |         l1_loss = self.loss_regress(pred_box[:,:,:char], ref_offset)
162 |     
163 |         loss = 1.0/(N*num_anchor*char) * torch.sum(l1_loss)
164 | 
165 |         return loss
166 | 
167 |     def getRegSum(self, index, positive_position_list, reference_bboxes, predicted_regress_feature, anchor):
168 | 
169 |         # reference_bboxes = x : 0~70.0,  y : -35.0 ~ 35.0
170 |         # positive_position_list = x : 700/4  y: 700/4 , size : [700/4, 700/4]
171 |         # predicted_regress_feature = x : ?, y: ?, size : [700/4, 700/4]
172 |         # anchor = x : 0~70.0, y : -35.0~35.0,  size : [700/4, 700/4]
173 |         
174 |         # back propagation is error. Batch sum and loss device type is error i think
175 |         reg_loss = torch.zeros(1).cuda()
176 |         for idx, reference_box in enumerate(reference_bboxes):
177 |             positive_position = torch.tensor(positive_position_list[index[idx]], dtype=torch.long).cuda()
178 |             predicted_box = predicted_regress_feature[:,positive_position[:,0], positive_position[:,1]].permute(1,0)
179 |             anchor_box = anchor[:,:, positive_position[:,0], positive_position[:,1]].permute(2,0,1) #[2,7]
180 |             if predicted_box.shape[0] == 0:
181 |                 continue
182 |             # predicted_box = torch.stack(box_list, dim=0) #[N,14], predicted bbox set
183 |             # anchor_box = torch.stack(abox_list, dim=0) #[N,2,7], anchor bbox set
184 |             
185 |             loss = self.LossReg(reference_box, predicted_box, anchor_box)
186 |             reg_loss +=loss
187 | 
188 | 
189 |         return reg_loss
190 | 
191 | if __name__ == '__main__':
192 |     os.environ['CUDA_VISIBLE_DEVICES'] = '2'
193 |     # print(anchor_set_1[0, 0, :, :])
194 |     # print(anchor_set_1[0, 1, :, :])
195 |     # save_image(anchor_set_1[0, 0, :, :]/70.0, 'anchor/x.png')
196 |     # save_image(anchor_set_1[0, 1, :, :]/(70.0)+0.5, 'anchor/y.png')
197 | #### Not use this function but will be use next time...
198 | # def getIOUfeature(anchor_bbox_feature, ref_bboxes):
199 | #     '''
200 | #     caution! it will be extremely slow i guess... need to make faster algorithm
201 | #     :param anchor_bbox_feature: reference bbox feature (7,H,W)
202 | #     :param ref_bboxes: predicted bbox feature (7,H,W)
203 | #     :return:IOU_feature
204 | #     '''
205 | #     anchor_bbox_feature = anchor_bbox_feature.cpu()
206 | #     C, H, W = anchor_bbox_feature.shape
207 | #     IOU_feature = torch.zeros((1, H, W))
208 | #     for h in range(H):
209 | #         for w in range(W):
210 | #             for ref_bbox in ref_bboxes:
211 | #                 anchor_bbox = anchor_bbox_feature[:, h, w]
212 | #                 distance = torch.sqrt(torch.sum(torch.pow((anchor_bbox[:3] - ref_bbox[:3]),2)))
213 | #                 if distance < 4.0:
214 | #                     ref_bbox_corners = get_3d_box(ref_bbox[:3], ref_bbox[3:6], ref_bbox[-1])
215 | #                     anchor_bbox_corners = get_3d_box(anchor_bbox[:3], anchor_bbox[3:6], anchor_bbox[-1])
216 | #                     (IOU_3d, IOU_2d) = box3d_iou(ref_bbox_corners, anchor_bbox_corners)
217 | #                     IOU_feature[:, h, w] = IOU_3d
218 | #                 else:
219 | #                     IOU_feature[:, h, w] = 0
220 | #     return IOU_feature
221 | 
222 | # def getClassRefFromDist(anchor_bbox_feature, ref_bboxes):
223 | #     '''
224 | #     its extremely slow. how could we change this?
225 | #     :param anchor_bbox_feature: reference bbox. list(bboxes)
226 | #     :param ref_bboxes: predicted bbox feature. (7,H,W)
227 | #     :return:class_feature. (2, H, W)
228 | #     '''
229 | #     C, H, W = anchor_bbox_feature.shape
230 | #     class_feature = torch.zeros((2, H, W)).cuda()
231 | #     anchor_bbox_feature = anchor_bbox_feature.cuda()
232 | #     ref_bboxes = ref_bboxes.cuda()
233 | #     for h in range(H):
234 | #         print("doing at ",h, "in class ref.")
235 | #         for w in range(W):
236 | #             for ref_bbox in ref_bboxes:
237 | #                 anchor_bbox = anchor_bbox_feature[:, h, w]
238 | #                 # distance = torch.sqrt(torch.sum(torch.pow((anchor_bbox[:3] - ref_bbox[:3]),2)))
239 | #                 # if class_feature[0, h, w] == 0 and class_feature[1, h, w] == 0:
240 | #                 #     if distance < 1:
241 | #                 #         class_feature[0, h, w] = 1
242 | #                 #     elif distance > 4:
243 | #                 #         class_feature[1, h, w] = 1
244 | #     return class_feature
245 | 
246 | # def getClassRefFromDist_fast(anchor_bbox_feature, ref_bboxes):
247 | #     '''
248 | #     :param anchor_bbox_feature: reference bbox. list(bboxes)
249 | #     :param ref_bboxes: predicted bbox feature. (7,H,W)
250 | #     :return:class_feature. (2, H, W)
251 | #     '''
252 | #     C, H, W = anchor_bbox_feature.shape
253 | #     class_feature_pos = torch.zeros((1, H, W)).cuda()
254 | #     class_feature_neg = torch.ones((1, H, W)).cuda()
255 | #     class_feature = torch.cat((class_feature_pos,class_feature_neg))
256 | #     # anchor_bbox_feature = anchor_bbox_feature.cuda()
257 | #     for ref_bbox in ref_bboxes:
258 | #         point_x = int(ref_bbox[0]*10/4)
259 | #         point_y = int((ref_bbox[1]*10 + 350)/4)
260 | #         if point_x < 0 or point_x > int(700/4) - 1 or point_y < 0 or point_y > int(700/4) - 1:
261 | #             continue
262 | #         for x_int in range(5):
263 | #             for y_int in range(5):
264 | #                 class_feature[0, point_x - 2 + x_int, point_y - 2 + y_int] = 1
265 | #                 class_feature[1, point_x - 2 + x_int, point_y - 2 + y_int] = 0
266 | #         # if class_feature[0, h, w] == 0 and class_feature[1, h, w] == 0:
267 | #         #     if distance < 1:
268 | #         #         class_feature[0, h, w] = 1
269 | #         #     elif distance > 4:
270 | #         #         class_feature[1, h, w] = 1
271 | #     return class_feature


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | # This is a sample Python script.
  2 | 
  3 | # Press Shift+F10 to execute it or replace it with your code.
  4 | # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
  5 | import torch
  6 | import torch.nn as nn
  7 | from torchvision import models
  8 | 
  9 | 
 10 | class ResidualBlock(nn.Module):
 11 |     def __init__(self, in_channels, out_channels):
 12 |         super(ResidualBlock,self).__init__()
 13 |         self.in_channels, self.out_channels = in_channels, out_channels
 14 |         if self.should_apply_shortcut:
 15 |             self.conv1 = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=(3, 3), stride=(2, 2),
 16 |                                    padding=(1, 1), bias=False)
 17 |         else:
 18 |             self.conv1 = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=(3, 3), stride=(1, 1),
 19 |                                    padding=(1, 1), bias=False)
 20 |         self.bn1 = nn.BatchNorm2d(self.out_channels, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 21 |         self.relu1 = nn.ReLU(inplace=True)
 22 |         self.conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
 23 |                                bias=False)
 24 |         self.bn2 = nn.BatchNorm2d(self.out_channels, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 25 |         self.relu2 = nn.ReLU(inplace=True)
 26 |         if self.should_apply_shortcut:
 27 |             self.down_conv = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=(1, 1), stride=(2, 2),
 28 |                                        bias=False)
 29 |             self.down_bn = nn.BatchNorm2d(self.out_channels, eps=1e-05, momentum=0.1, affine=True,
 30 |                                           track_running_stats=True)
 31 | 
 32 |     def forward(self, x):
 33 |         if self.should_apply_shortcut:
 34 |             residual = self.down_bn(self.down_conv(x))
 35 |         else:
 36 |             residual = x
 37 |         x = self.relu1(self.bn1(self.conv1(x)))
 38 |         x = self.bn2(self.conv2(x))
 39 |         x += residual
 40 |         x = self.relu2(x)
 41 |         return x
 42 | 
 43 |     @property
 44 |     def should_apply_shortcut(self):
 45 |         return self.in_channels != self.out_channels
 46 | 
 47 | 
 48 | class ResidualBlockModule(nn.Module):
 49 |     def __init__(self, first_in_channel, last_out_channel, num_resblock):
 50 |         super(ResidualBlockModule, self).__init__()
 51 |         self.sequential = nn.Sequential()
 52 |         for i in range(num_resblock):
 53 |             if i == 0:
 54 |                 self.sequential.add_module("resblock_" + str(i), 
 55 |                                             ResidualBlock(first_in_channel, last_out_channel))
 56 |             else:
 57 |                 self.sequential.add_module("resblock_" + str(i), 
 58 |                                             ResidualBlock(last_out_channel, last_out_channel))
 59 |     def forward(self, x):
 60 |         x = self.sequential(x)
 61 |         return(x)
 62 | 
 63 | 
 64 | class ResnetCustomed(nn.Module):
 65 |     def __init__(self, out_feature=(32, 64, 128, 192, 256), num_res_block=(1,2,4,6,6)):
 66 |         super(ResnetCustomed, self).__init__()
 67 |         self.layer1 = ResidualBlockModule(out_feature[0], out_feature[0], num_res_block[0])
 68 |         self.layer2 = ResidualBlockModule(out_feature[0], out_feature[1], num_res_block[1])
 69 |         self.layer3 = ResidualBlockModule(out_feature[1], out_feature[2], num_res_block[2])
 70 |         self.layer4 = ResidualBlockModule(out_feature[2], out_feature[3], num_res_block[3])
 71 |         self.layer5 = ResidualBlockModule(out_feature[3], out_feature[4], num_res_block[4])
 72 | 
 73 |     def forward(self, x):
 74 |         x = self.layer1(x)
 75 |         x1 = self.layer2(x)
 76 |         x2 = self.layer3(x1)
 77 |         x3 = self.layer4(x2)
 78 |         x4 = self.layer5(x3)
 79 |         return x4, x3, x2
 80 | 
 81 | 
 82 | class AnchorBoundingBoxFeature(nn.Module):
 83 |     def __init__(self, config):
 84 |         super(AnchorBoundingBoxFeature,self).__init__()
 85 |         self.config = config
 86 | 
 87 |         self.f_height = int(self.config["voxel_length"]/self.config["anchor_bbox_feature"]["reduced_scale"])
 88 |         self.f_width = int(self.config["voxel_width"]/self.config["anchor_bbox_feature"]["reduced_scale"])
 89 |         self.width = self.config["anchor_bbox_feature"]["width"]
 90 |         self.length = self.config["anchor_bbox_feature"]["length"]
 91 |         self.height = self.config["anchor_bbox_feature"]["height"]
 92 |         
 93 |     def forward(self):
 94 |         anc_x = torch.matmul(
 95 |                 torch.linspace(self.config["lidar_x_min"], 
 96 |                                self.config["lidar_x_max"], 
 97 |                                self.f_height).view(self.f_height, 1), 
 98 |                             torch.ones(1, self.f_width)).view(1, self.f_height, self.f_width)
 99 |         anc_y = torch.matmul(
100 |                 torch.ones(self.f_height, 1), 
101 |                             torch.linspace(self.config["lidar_y_min"], 
102 |                                             self.config["lidar_y_max"], 
103 |                                             self.f_width).view(1, self.f_width)).view(1, self.f_height, self.f_width)
104 |         anc_z = torch.ones(1, self.f_height, self.f_width) * (-4.5)
105 |         anc_w = torch.ones(1, self.f_height, self.f_width) * self.width
106 |         anc_l = torch.ones(1, self.f_height, self.f_width) * self.length
107 |         anc_h = torch.ones(1, self.f_height, self.f_width) * self.height 
108 |         anc_ori = torch.ones(1, self.f_height, self.f_width) * 0
109 |         anc_ori_90 = torch.ones(1, self.f_height, self.f_width) * 3.1415926/2
110 |         anc_set_1 = torch.cat((anc_x, anc_y, anc_z, anc_l, anc_w, anc_h, anc_ori), 0)
111 |         anc_set_2 = torch.cat((anc_x, anc_y, anc_z, anc_l, anc_w, anc_h, anc_ori_90), 0)
112 |         anc_set = torch.cat((anc_set_1,anc_set_2), dim=0) # dim = [2*7, self.f_height, self.f_width]
113 |         return anc_set
114 | 
115 | 
116 | class OffsettoBbox(nn.Module):
117 |     def __init__(self, config):
118 |         super(OffsettoBbox, self).__init__()
119 |         self.anchor_bbox_feature = AnchorBoundingBoxFeature(config)
120 |         
121 |     def forward(self, x):
122 |         """
123 |         x: x_reg [b,num_anc*7,wid,hei]
124 |         """
125 |         anc_set = self.anchor_bbox_feature().cuda().unsqueeze(0)
126 |         pred_xy_1 = x[:,:2,:,:] * torch.sqrt(torch.pow(anc_set[:,3:4,:,:],2) + torch.pow(anc_set[:,4:5,:,:],2)) + anc_set[:,:2,:,:]
127 |         pred_z_1 = x[:,2:3,:,:] * (anc_set[:,5:6,:,:]) + anc_set[:,2:3,:,:]
128 | 
129 |         pred_whl_1 = torch.exp(x[:,3:6,:,:]) * anc_set[:,3:6,:,:]
130 |         pred_ori_1 = torch.atan2(torch.sin(x[:,6:7,:,:] + anc_set[:,6:7,:,:]), torch.cos(x[:,6:7,:,:] + anc_set[:,6:7,:,:]))
131 |         pred_xy_2 = x[:,7:9,:,:] * torch.sqrt(torch.pow(anc_set[:,10:11,:,:],2) + torch.pow(anc_set[:,11:12,:,:],2)) + anc_set[:,7:9,:,:]
132 |         pred_z_2 = x[:,9:10,:,:] * (anc_set[:,12:13,:,:]) + anc_set[:,9:10,:,:]
133 |         pred_whl_2 = torch.exp(x[:,10:13,:,:]) * anc_set[:,10:13,:,:]
134 |         pred_ori_2 = torch.atan2(torch.sin(x[:,13:14,:,:] + anc_set[:,13:14,:,:]), torch.cos(x[:,13:14,:,:] + anc_set[:,13:14,:,:]))
135 |         pred_bbox_feature = torch.cat((pred_xy_1, pred_z_1, pred_whl_1, pred_ori_1,
136 |                                       pred_xy_2, pred_z_2, pred_whl_2, pred_ori_2), dim=1)
137 |         return pred_bbox_feature
138 |         
139 | 
140 | class LidarBackboneNetwork(nn.Module):
141 |     def __init__(self, out_feature=(32, 64, 128, 192, 256), num_res_block=(1,2,4,6,6), Num_anchor = 2):
142 |         super(LidarBackboneNetwork, self).__init__()
143 |         self.backbone = ResnetCustomed(out_feature, num_res_block)
144 |         self.num_anchor = Num_anchor
145 |         
146 |         # FPN
147 |         self.latconv1 = nn.Conv2d(out_feature[-2], out_feature[-2], kernel_size=(1, 1), stride=(1, 1), bias=False)
148 |         self.downconv1 = nn.Conv2d(out_feature[-1], out_feature[-2], kernel_size=(1, 1), stride=(1, 1), bias=False)
149 |         self.upscale1 = nn.UpsamplingBilinear2d(scale_factor=2)
150 |         self.latconv2 = nn.Conv2d(out_feature[-3], out_feature[-2], kernel_size=(1, 1), stride=(1, 1), bias=False)
151 |         self.upscale2 = nn.UpsamplingBilinear2d(scale_factor=2) # NEED TO GENERALIZE IN BEV SIZE
152 |         self.conv3 = nn.Conv2d(out_feature[-2], out_feature[-2], kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
153 |         
154 |         self.classconv = nn.Conv2d(out_feature[-2], Num_anchor*2, kernel_size=(1, 1), stride=(1, 1), bias=False)
155 |         self.softmax1 = nn.Softmax(dim=1)
156 |         self.softmax2 = nn.Softmax(dim=1)
157 |         self.bbox3dconv = nn.Conv2d(out_feature[-2], Num_anchor*7, kernel_size=(1, 1), stride=(1, 1), bias=False)
158 | 
159 |     def forward(self, x):
160 |         x4, x3, x2 = self.backbone(x)
161 |         x3 = self.latconv1(x3)
162 |         x3_ = self.upscale1(self.downconv1(x4))
163 |         x3 += x3_
164 |         x2 = self.latconv2(x2)
165 |         x2_ = self.upscale2(x3)
166 |         x2 += x2_
167 |         x_pred = self.conv3(x2)
168 |         x_cls = self.classconv(x_pred)
169 |         x_cls_1 = self.softmax1(x_cls[:,:2])
170 |         x_cls_2 = self.softmax2(x_cls[:,2:4])
171 |         x_cls = torch.cat((x_cls_1,x_cls_2), dim=1)
172 |         x_reg = self.bbox3dconv(x_pred)
173 |         return x_cls, x_reg
174 | 
175 | 
176 | class ObjectDetection_DCF(nn.Module):
177 |     def __init__(self, config):
178 |         super(ObjectDetection_DCF, self).__init__()
179 |         self.offset_to_bbox = OffsettoBbox(config)
180 |         lm_config = config["lidar_module"]
181 |         out_feature = (lm_config["out_feature1"], 
182 |                        lm_config["out_feature2"],
183 |                        lm_config["out_feature3"],
184 |                        lm_config["out_feature4"],
185 |                        lm_config["out_feature5"])
186 |         num_resblock = (lm_config["num_res_block1"], 
187 |                         lm_config["num_res_block2"],
188 |                         lm_config["num_res_block3"],
189 |                         lm_config["num_res_block4"],
190 |                         lm_config["num_res_block5"])
191 |         self.lidar_backbone = LidarBackboneNetwork(out_feature, num_resblock)
192 |         # self.image_backbone = models.resnet18(pretrained=True)
193 | 
194 |     def forward(self, x_lidar, x_image):
195 |         lidar_pred_cls, lidar_pred_reg = self.lidar_backbone(x_lidar)
196 |         # lidar_pred_cls = self.lidar_backbone(x_lidar)
197 |         # image_ = self.image_backbone(x_image)
198 |         lidar_pred_bbox = self.offset_to_bbox(lidar_pred_reg)
199 |         """
200 |         TODO
201 |         1. make continuous fusion layer from image
202 |         2. add with lidar feature
203 |         """
204 |         return torch.cat((lidar_pred_cls, lidar_pred_reg, lidar_pred_bbox), dim = 1) #, lidar_pred_bbox
205 | 
206 | 
207 | # Press the green button in the gutter to run the script.
208 | if __name__ == '__main__':
209 |     image_backbone = models.resnet18(pretrained=True)
210 |     model = LidarBackboneNetwork()
211 |     pred = model(torch.ones(4, 32, 700, 700))
212 |     print(pred[0].shape)
213 |     pred2 = image_backbone(torch.ones(4, 3, 480, 640))
214 |     a = 1
215 |     print("model inference is good")
216 | # See PyCharm help at https://www.jetbrains.com/help/pycharm/
217 | 


--------------------------------------------------------------------------------
/separation_axis_theorem.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This code performs collision testing of convex 2D polyedra by means
  3 | # of the Hyperplane separation theorem, also known as Separating axis theorem (SAT).
  4 | #
  5 | # For more information visit:
  6 | # https://en.wikipedia.org/wiki/Hyperplane_separation_theorem
  7 | #
  8 | # Copyright (C) 2016, Juan Antonio Aldea Armenteros
  9 | # This program is free software: you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation, either version 3 of the License, or
 12 | # (at your option) any later version.
 13 | #
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17 | # GNU General Public License for more details.
 18 | #
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with this program. If not, see <http://www.gnu.org/licenses/>.
 21 | import math
 22 | 
 23 | 
 24 | # -*- coding: utf8 -*-
 25 | 
 26 | def normalize(v):
 27 |     norm = math.sqrt(v[0] ** 2 + v[1] ** 2)
 28 |     return (v[0] / norm, v[1] / norm)
 29 | 
 30 | def dot(a, b):
 31 |     return a[0] * b[0] + a[1] * b[1];
 32 | 
 33 | def edge_direction(p0, p1):
 34 |     return (p1[0] - p0[0], p1[1] - p0[1]);
 35 | 
 36 | def orthogonal(v):
 37 |     return (v[1], -v[0])
 38 | 
 39 | def vertices_to_edges(vertices):
 40 |     return [edge_direction(vertices[i], vertices[(i + 1) % len(vertices)]) \
 41 |         for i in range(len(vertices))]
 42 | 
 43 | def project(vertices, axis):
 44 |     dots = [dot(vertex, axis) for vertex in vertices]
 45 |     return [min(dots), max(dots)]
 46 | 
 47 | def contains(n, range_):
 48 |     a = range_[0]
 49 |     b = range_[1]
 50 |     if b < a:
 51 |         a = range_[1]
 52 |         b = range_[0]
 53 |     return (n >= a) and (n <= b);
 54 | 
 55 | def overlap(a, b):
 56 |     if contains(a[0], b):
 57 |         return True;
 58 |     if contains(a[1], b):
 59 |         return True;
 60 |     if contains(b[0], a):
 61 |         return True;
 62 |     if contains(b[1], a):
 63 |         return True;
 64 |     return False;
 65 | 
 66 | def separating_axis_theorem(vertices_a, vertices_b):
 67 |     edges_a = vertices_to_edges(vertices_a);
 68 |     edges_b = vertices_to_edges(vertices_b);
 69 | 
 70 |     edges = edges_a + edges_b
 71 | 
 72 |     axes = [normalize(orthogonal(edge)) for edge in edges]
 73 | 
 74 |     for i in range(len(axes)):
 75 |         projection_a = project(vertices_a, axes[i])
 76 |         projection_b = project(vertices_b, axes[i])
 77 |         overlapping = overlap(projection_a, projection_b)
 78 |         if not overlapping:
 79 |             return False;
 80 |     return True
 81 | 
 82 | def get_vertice_rect(center, box_size, heading_angle):
 83 |     
 84 |     center_x = center[0]
 85 |     center_y = center[1]
 86 |     yaw = heading_angle
 87 |     W = box_size[1]
 88 |     L = box_size[0] # z rotation is difference i think (90 deg)
 89 |     vertex_3 = (center_x + (L/2*math.cos(yaw) - W/2*math.sin(yaw)), center_y + (L/2*math.sin(yaw) + W/2*math.cos(yaw)))
 90 |     vertex_4 = (center_x + (-L/2*math.cos(yaw) - W/2*math.sin(yaw)), center_y + (-L/2*math.sin(yaw) + W/2*math.cos(yaw)))
 91 |     vertex_1 = (center_x + (-L/2*math.cos(yaw) + W/2*math.sin(yaw)), center_y + (-L/2*math.sin(yaw) - W/2*math.cos(yaw)))
 92 |     vertex_2 = (center_x + (L/2*math.cos(yaw) + W/2*math.sin(yaw)), center_y + (L/2*math.sin(yaw) - W/2*math.cos(yaw)))
 93 |     vertices = [vertex_1, vertex_2, vertex_3, vertex_4]
 94 |     return vertices
 95 |     
 96 | 
 97 | 
 98 | def main():
 99 |     a_vertices = [(0, 0), (70, 70), (70, 0), (0, 70)]
100 |     b_vertices = [(70, 70), (150, 70), (150, 150),(70, 150)]
101 |     c_vertices = [(30, 30), (150, 70), (70, 150)]
102 | 
103 |     print (separating_axis_theorem(a_vertices, b_vertices))
104 |     print (separating_axis_theorem(a_vertices, c_vertices))
105 |     print (separating_axis_theorem(b_vertices, c_vertices))
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     main()


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy import random
  3 | from numpy.core.fromnumeric import argmax
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.optim as optim
  8 | from torchvision.utils import save_image
  9 | from torch.nn.parallel import DistributedDataParallel as DDP
 10 | 
 11 | import os
 12 | import argparse
 13 | import matplotlib
 14 | matplotlib.use('Agg')
 15 | import matplotlib.pyplot as plt
 16 | import time
 17 | 
 18 | from data_import_carla import CarlaDataset
 19 | from loss import LossTotal
 20 | from model import LidarBackboneNetwork, ObjectDetection_DCF
 21 | from data_import import putBoundingBox
 22 | from IOU import get_3d_box, box3d_iou
 23 | from separation_axis_theorem import get_vertice_rect, separating_axis_theorem
 24 | 
 25 | 
 26 | 
 27 | 
 28 | class Test:
 29 |     def __init__(self, pre_trained_net, config):
 30 |         """
 31 |         configuration
 32 |         nms_iou_score_theshold (0.01)
 33 |         plot_AP_graph (False)
 34 |         """
 35 |         self.net = pre_trained_net
 36 |         self.config = config
 37 |         self.net.eval()
 38 |         self.num_TP_set = {}
 39 |         self.num_TP_set_per_predbox = []
 40 |         self.num_T = 0
 41 |         self.num_P = 0
 42 |         self.IOU_threshold = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
 43 |         self.loss_total = LossTotal(config)
 44 |         for iou_threshold in self.IOU_threshold:
 45 |             self.num_TP_set[iou_threshold] = 0
 46 | 
 47 |     def get_num_T(self):
 48 |         return self.num_T
 49 | 
 50 |     def get_num_P(self):
 51 |         return self.num_P
 52 | 
 53 |     def get_num_TP_set(self):
 54 |         return self.num_TP_set
 55 | 
 56 |     def save_feature_result(self,bev_image, ref_bboxes, num_ref_bboxes, i, epoch, dir="./result"):
 57 |         B = ref_bboxes.shape[0]
 58 |         file_list = os.listdir("./")
 59 |         if not "result" in file_list:
 60 |             os.mkdir(dir)
 61 |         file_list = os.listdir(dir)
 62 |         if not "epoch_{}".format(epoch) in file_list:
 63 |             os.mkdir(dir+"/epoch_{}".format(epoch))
 64 |         ref_bboxes = ref_bboxes.cpu().clone().numpy()
 65 |         num_ref_bboxes = num_ref_bboxes.cpu().clone().numpy()
 66 |         for b in range(B):
 67 |             save_image(self.pred_cls[b, 1, :, :], dir+"/epoch_{}/{}_in_{}_positive_image.png".format(epoch,i,b ))
 68 |             save_image(self.pred_cls[b, 0, :, :], dir+"/epoch_{}/{}_in_{}_negative_image.png".format(epoch,i,b))
 69 |             bev_image_ = 0.5*bev_image[b].permute(1,2,0)
 70 |             bev_image_with_bbox = putBoundingBox(bev_image_, self.refined_bbox[b], self.config, color="green").permute(2,0,1).type(torch.float)
 71 |             save_image(bev_image_with_bbox, dir+"/epoch_{}/{}_in_{}_bev_image_with_predbbox.png".format(epoch,i,b))
 72 |             
 73 |             bev_image_with_bbox = putBoundingBox(bev_image_, ref_bboxes[b,:num_ref_bboxes[b]], self.config, color="red").permute(2,0,1).type(torch.float)
 74 |             save_image(bev_image_with_bbox, dir+"/epoch_{}/{}_in_{}_bev_image_with_refbbox.png".format(epoch,i,b))
 75 | 
 76 |     def get_eval_value_onestep(self, lidar_voxel, camera_image, ref_bboxes, num_ref_bboxes):
 77 |         
 78 |         pred = self.net(lidar_voxel, camera_image)
 79 |         pred_cls, pred_reg, pred_bbox_f = torch.split(pred,[4, 14, 14], dim=1)
 80 |         self.pred_cls = pred_cls.cpu().clone().detach()
 81 |         pred_bbox_f = pred_bbox_f.cpu().clone().detach()
 82 |         self.loss_value = self.loss_total(ref_bboxes.cuda(), num_ref_bboxes, pred_cls, pred_reg)
 83 |         pred_bboxes = self.get_bboxes(self.pred_cls, pred_bbox_f, score_threshold=self.config["score_threshold"]) # shape: b * list[tensor(N * 7)]
 84 |         # self.refined_bbox = self.NMS_IOU(pred_bboxes, nms_iou_score_theshold=self.config["nms_iou_threshold"]) # shape: b * list[N *list[tensor(7)]]
 85 |         self.refined_bbox = self.NMS_SAT(pred_bboxes) # shape: b * list[N *list[tensor(7)]]
 86 |         self.precision_recall_singleshot(self.refined_bbox, ref_bboxes) # single batch
 87 |     
 88 |     def get_bboxes(self, pred_cls, pred_reg, score_threshold=0.8):
 89 |         """
 90 |         get bounding box score threshold instead of selecting bounding box
 91 |         """
 92 |         B, C_cls, W, H = pred_cls.shape
 93 |         B, C_reg, W, H = pred_reg.shape
 94 |         anchor_numb = int(C_cls/2)
 95 |         reg_channel_per_anc = int(C_reg/anchor_numb)
 96 |         selected_bboxes_batch =[]
 97 |         for b in range(B):
 98 |             selected_bboxes = []
 99 |             for a in range (anchor_numb):
100 |                 cls_pos = anchor_numb * a + 1
101 |                 reg_cha = reg_channel_per_anc * a
102 |                 pred_cls_= pred_cls[b,cls_pos].view(-1) > score_threshold
103 |                 indices = torch.nonzero(pred_cls_).view(-1)
104 |                 pred_reg_ = pred_reg[b, reg_cha:reg_cha+reg_channel_per_anc, :, :].view((reg_channel_per_anc,-1))
105 |                 selected_bboxes_ = pred_reg_[:,indices].permute(1,0)
106 |                 selected_bboxes += [selected_bboxes_]
107 |             selected_bboxes_batch.append(torch.cat(selected_bboxes, dim=0))
108 |         return selected_bboxes_batch
109 | 
110 |     def NMS_IOU(self, pred_bboxes, nms_iou_score_theshold=0.01):
111 |         filtered_bboxes_batch = []
112 |         B = len(pred_bboxes)
113 |         for b in range(B):
114 |             filtered_bboxes = []
115 |             filtered_bboxes_index = []
116 |             print("pred bbox: ", pred_bboxes[b].shape[0])
117 |             for i in range(pred_bboxes[b].shape[0]):
118 |                 bbox = pred_bboxes[b][i]
119 |                 if len(filtered_bboxes) == 0:
120 |                     filtered_bboxes.append(bbox)
121 |                     continue
122 |                 center = bbox[:3].numpy()
123 |                 box_size = bbox[3:6].numpy()
124 |                 heading_angle = bbox[6].numpy()
125 |                 cand_bbox_corners = get_3d_box(center, box_size, heading_angle)
126 |                 j =0
127 |                 for selected_bbox in filtered_bboxes:
128 |                     j +=1
129 |                     center_ = selected_bbox[:3].numpy()+0.0001
130 |                     box_size_ = selected_bbox[3:6].numpy()
131 |                     heading_angle_ = selected_bbox[6].numpy()
132 |                     selected_bbox_corners = get_3d_box(center_, box_size_, heading_angle_)
133 |                     (IOU_3d, IOU_2d) = box3d_iou(cand_bbox_corners, selected_bbox_corners)
134 |                     if IOU_3d > nms_iou_score_theshold:
135 |                         break
136 |                     else:
137 |                         if j == len(filtered_bboxes):
138 |                             filtered_bboxes.append(bbox)
139 |             filtered_bboxes_batch.append(filtered_bboxes)
140 |         return filtered_bboxes_batch
141 |         
142 |     def NMS_SAT(self, pred_bboxes):
143 |         # IOU vs SAT(separate axis theorem)
144 |         filtered_bboxes_batch = []
145 |         B = len(pred_bboxes)
146 |         for b in range(B):
147 |             filtered_bboxes = []
148 |             filtered_bboxes_index = []
149 |             # if pred_bboxes[b].shape[0] == 0:
150 |             #     filtered_bboxes_batch.append(None)
151 |             #     continue
152 |             for i in range(pred_bboxes[b].shape[0]):
153 |                 bbox = pred_bboxes[b][i]
154 |                 if len(filtered_bboxes) == 0:
155 |                     filtered_bboxes.append(bbox)
156 |                     continue
157 |                 center = bbox[:3].numpy()
158 |                 box_size = bbox[3:6].numpy()
159 |                 heading_angle = bbox[6].numpy()
160 |                 cand_bbox_corners = get_vertice_rect(center, box_size, heading_angle)
161 |                 j = 0
162 |                 for selected_bbox in filtered_bboxes:
163 |                     j += 1
164 |                     center_ = selected_bbox[:3].numpy()
165 |                     box_size_ = selected_bbox[3:6].numpy()
166 |                     heading_angle_ = selected_bbox[6].numpy()
167 |                     selected_bbox_corners = get_vertice_rect(center_, box_size_, heading_angle_)
168 |                     is_overlapped = separating_axis_theorem(cand_bbox_corners, selected_bbox_corners)
169 |                     if is_overlapped:
170 |                         break
171 |                     else:
172 |                         if j == len(filtered_bboxes):
173 |                             filtered_bboxes.append(bbox)
174 |             filtered_bboxes_batch.append(filtered_bboxes)
175 |         return filtered_bboxes_batch
176 |         
177 |     def precision_recall_singleshot(self, pred_bboxes, ref_bboxes):
178 |         B,_,_ = ref_bboxes.shape
179 |         for b in range(B):
180 |             pred_bboxes_sb = pred_bboxes[b]
181 |             ref_bboxes_sb = ref_bboxes[b]
182 |             if pred_bboxes_sb != None:
183 |                 for pred_bbox in pred_bboxes_sb:
184 |                     self.num_P += 1
185 |                     center = pred_bbox[:3].numpy()
186 |                     box_size = pred_bbox[3:6].numpy()
187 |                     heading_angle = pred_bbox[6].numpy()
188 |                     pred_bbox_corners = get_3d_box(center, box_size, heading_angle)
189 |                     true_positive_cand_score = {}
190 |                     for ref_bbox in ref_bboxes_sb:
191 |                         if ref_bbox[-1] == 1:
192 |                             center_ = ref_bbox[:3].numpy()
193 |                             box_size_ = ref_bbox[3:6].numpy()
194 |                             heading_angle_ = ref_bbox[6].numpy()
195 |                             ref_bbox_corners = get_3d_box(center_, box_size_, heading_angle_)
196 |                             (IOU_3d, IOU_2d) = box3d_iou(pred_bbox_corners, ref_bbox_corners)
197 |                             for iou_threshold in self.IOU_threshold:
198 |                                 if IOU_2d > iou_threshold:
199 |                                     true_positive_cand_score[iou_threshold] = IOU_2d
200 |                     for iou_threshold in self.IOU_threshold:
201 |                         if iou_threshold in true_positive_cand_score:
202 |                             self.num_TP_set[iou_threshold] += 1
203 |                     self.num_TP_set_per_predbox.append(self.num_TP_set)
204 |             for ref_bbox_ in ref_bboxes_sb:
205 |                 if ref_bbox_[-1] == 1:
206 |                     self.num_T += 1
207 |         
208 |     def display_average_precision(self, plot_AP_graph=False):
209 |         """
210 |         need to IOU threshold varying 
211 |         """
212 |         total_precision = {}
213 |         total_recall = {}
214 |         for iou_threshold in self.IOU_threshold:
215 |             total_precision[iou_threshold] = self.num_TP_set[iou_threshold] / (self.num_P + 0.01)
216 |             total_recall[iou_threshold] = self.num_TP_set[iou_threshold] / (self.num_T + 0.01)
217 |         # print("Total Precision: ", total_precision)
218 |         # print("Total Recall: ", total_recall)
219 |         precisions = {}
220 |         recalls = {}
221 |         num_P = 0
222 |         for iou_threshold in self.IOU_threshold:
223 |             precisions[iou_threshold] = [1]
224 |             recalls[iou_threshold] = [0]
225 |         for num_tp_set in self.num_TP_set_per_predbox:
226 |             num_P+=1
227 |             for iou_threshold in self.IOU_threshold:
228 |                 precisions[iou_threshold].append(num_tp_set[iou_threshold] / num_P)
229 |                 recalls[iou_threshold].append(num_tp_set[iou_threshold] / self.num_T)
230 |         if plot_AP_graph:
231 |             fig = plt.figure()
232 |             ax = fig.add_subplot(111)
233 |             lines = []
234 |             for iou_threshold in self.IOU_threshold:
235 |                 line = 0
236 |                 if len(recalls[iou_threshold]) > 1: 
237 |                     line = ax.plot(recalls[iou_threshold], precisions[iou_threshold])
238 |                 else:
239 |                     line = ax.plot([0,0])
240 |                 lines.append(line)
241 |             fig.legend(lines, labels=self.IOU_threshold, title="IOU threshold value")
242 |             fig.savefig('ap_result/test.png')
243 | 
244 |     def initialize_ap(self):
245 |         self.num_TP_set = {}
246 |         self.num_T = 0
247 |         self.num_P = 0
248 |         self.num_TP_set_per_predbox = []
249 |         for iou_threshold in self.IOU_threshold:
250 |             self.num_TP_set[iou_threshold] = 0
251 | 
252 | 
253 | if __name__ == '__main__':
254 |     parser = argparse.ArgumentParser(description='deep continuous fusion training')
255 |     parser.add_argument('--data', type=str, default="carla", help='Data type, choose "carla" or "kitti"')
256 |     parser.add_argument('--cuda', type=str, default="0", help="list of cuda visible device number. you can choose 0~7 in list. [EX] --cuda 0,3,4")
257 |     parser.add_argument('--port', type=str, default='12233', help="master port number. defaut is 12233")
258 |     args = parser.parse_args()
259 |     dataset_category = args.data
260 |     cuda_vis_dev_str = args.cuda
261 |     master_port = args.port
262 |     print(cuda_vis_dev_str)
263 |     device_id_source = cuda_vis_dev_str.split(",")
264 |     device_id = [i for i in range(len(device_id_source))]
265 |     os.environ['CUDA_VISIBLE_DEVICES'] = cuda_vis_dev_str
266 |     os.environ['MASTER_ADDR'] = 'localhost'
267 |     os.environ['MASTER_PORT'] = master_port
268 | 
269 | 
270 |     torch.distributed.init_process_group(backend='nccl', world_size=1, rank=0)
271 |     # Focus on test dataset
272 |     if dataset_category == "carla":
273 |         dataset = CarlaDataset(mode="test",want_bev_image=True)
274 |         print("carla dataset is used for training")
275 |     elif dataset_category =="kitti":
276 |         dataset = KittiDataset(mode="test")
277 |         print("kitti dataset is used for training")
278 |     print("dataset is ready")
279 |     data_loader = torch.utils.data.DataLoader(dataset,
280 |                                           batch_size=2,
281 |                                           shuffle=True)
282 |     # Load pre-trained model. you can use the model during training instead of test_model 
283 |     test_model = ObjectDetection_DCF().cuda()
284 |     test_model = DDP(test_model,device_ids=device_id, output_device=0, find_unused_parameters=True)
285 |     test_model.load_state_dict(torch.load("./saved_model/model"))
286 |     test = Test(test_model)
287 |     data_length = len(dataset)
288 |     loss_value = None
289 | 
290 |     for batch_ndx, sample in enumerate(data_loader):
291 |         print("batch_ndx is ", batch_ndx)
292 |         print("sample keys are ", sample.keys())
293 |         print("bbox shape is ", sample["bboxes"].shape)
294 |         print("image shape is ", sample["image"].shape)
295 |         print("pointcloud shape is ", sample["pointcloud"].shape)
296 |         test_index = np.random.randint(data_length)
297 |         image_data = sample['image'].cuda()
298 |         point_voxel = sample['pointcloud'].cuda()
299 |         reference_bboxes = sample['bboxes'].cpu().clone().detach()
300 |         num_ref_bboxes = sample['num_bboxes']
301 |         bev_image = sample['lidar_bev_2Dimage']
302 |         
303 |         # evaluate AP in one image and voxel lidar
304 |         test.get_eval_value_onestep(point_voxel, image_data, reference_bboxes, num_ref_bboxes)
305 |         test.save_feature_result(bev_image, reference_bboxes, num_ref_bboxes, batch_ndx, 99)
306 |         print("accumulated number of true data is ", test.get_num_T())
307 |         print("accumulated number of positive data is ", test.get_num_P())
308 |         print("accumulated number of true positive data is ", test.get_num_TP_set())
309 |         print("="*50)
310 |         if batch_ndx > 10:
311 |             break
312 | 
313 |     # display average-precision plot and mAP
314 |     test.display_average_precision(plot_AP_graph=True)
315 |     # MUST DO WHEN U DISPLAY ALL OF RESULTS
316 |     test.initialize_ap()
317 |     
318 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | from torchvision.utils import save_image
  5 | from torch.nn.parallel import DistributedDataParallel as DDP
  6 | 
  7 | import os
  8 | import numpy as np
  9 | import yaml
 10 | 
 11 | from data_import_carla import CarlaDataset
 12 | # from kitti import KittiDataset
 13 | from loss import LossTotal
 14 | from model import ObjectDetection_DCF
 15 | from test import Test
 16 | 
 17 | class Train(nn.Module):
 18 |     def __init__(self, config):
 19 |         super(Train, self).__init__()
 20 |         device_id_source = config["cuda_visible_id"].split(",")
 21 |         device_id = [i for i in range(len(device_id_source))]
 22 |         self.loss_total = LossTotal(config)
 23 |         self.model = ObjectDetection_DCF(config).cuda()
 24 |         self.model = DDP(self.model, device_ids=device_id, output_device=0, find_unused_parameters=True)
 25 |         self.loss_value = None
 26 |         lr = config["learning_rate"]
 27 |         beta1 = config["beta1"]
 28 |         self.optimizer = optim.Adam(self.model.parameters(), lr=lr, betas=(beta1, 0.999))
 29 | 
 30 |     def one_step(self, lidar_voxel, camera_image, object_data, num_ref_box):
 31 |         pred = self.model(lidar_voxel, camera_image)
 32 |         pred_cls, pred_reg, pred_bbox_f = torch.split(pred,[4, 14, 14], dim=1)
 33 |         self.loss_value = self.loss_total(object_data, num_ref_box, pred_cls, pred_reg)
 34 |         self.optimizer.zero_grad()
 35 |         self.loss_value.backward()
 36 |         self.optimizer.step()
 37 | 
 38 |     def get_loss_value(self, lidar_voxel, camera_image, object_data, num_ref_box):
 39 |         pred = self.model(lidar_voxel, camera_image)
 40 |         pred_cls, pred_reg, pred_bbox_f = torch.split(pred,[4, 14, 14], dim=1)
 41 |         self.loss_value = self.loss_total(object_data, num_ref_box, pred_cls, pred_reg)
 42 |         return self.loss_value.item(), pred_cls, pred_reg
 43 | 
 44 | 
 45 | if __name__ == '__main__':
 46 |     CONFIG_PATH = "./config/"
 47 |     config_name = "config_carla.yaml"
 48 |     with open(os.path.join(CONFIG_PATH, config_name)) as file:
 49 |         config = yaml.safe_load(file)
 50 |     
 51 |     device_id_source = config["cuda_visible_id"].split(",")
 52 |     device_id = [i for i in range(len(device_id_source))]
 53 |     os.environ['CUDA_VISIBLE_DEVICES'] = config["cuda_visible_id"]
 54 |     os.environ['MASTER_ADDR'] = 'localhost'
 55 |     os.environ['MASTER_PORT'] = config["port_number"]
 56 |     torch.distributed.init_process_group(backend='nccl', world_size=1, rank=0)
 57 |     if config["dataset_name"] == "carla":
 58 |         dataset = CarlaDataset(config)
 59 |         dataset_test = CarlaDataset(config, mode="test", want_bev_image=True)
 60 |         print("carla dataset is used for training")
 61 |     elif config["dataset_name"] =="kitti":
 62 |         dataset = KittiDataset()
 63 |         dataset_test = KittiDataset(mode="test")
 64 |         print("kitti dataset is used for training")
 65 |         
 66 |     train_sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=True)
 67 |     train_sampler_test = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=True)
 68 |     data_loader = torch.utils.data.DataLoader(dataset,
 69 |                                           batch_size=config["batch_size"],
 70 |                                           sampler=train_sampler)
 71 |     data_loader_test = torch.utils.data.DataLoader(dataset_test,
 72 |                                           batch_size=config["batch_size"],
 73 |                                           sampler=train_sampler_test)
 74 |     num_epochs = config["num_epoch"]
 75 |     training = Train(config)
 76 |     test = Test(training.model, config)
 77 |     data_length = len(data_loader)
 78 |     for epoch in range(num_epochs):
 79 |         torch.save(training.model.state_dict(), "./saved_model/" + config["saved_model_name"])
 80 |         for batch_ndx, sample in enumerate(data_loader):
 81 |             image_data = sample['image'].cuda()
 82 |             point_voxel = sample['pointcloud'].cuda()
 83 |             reference_bboxes = sample["bboxes"].cuda()
 84 |             num_ref_bboxes = sample["num_bboxes"]
 85 |             training.one_step(point_voxel, image_data, reference_bboxes, num_ref_bboxes)
 86 |             if batch_ndx % 100 == 0:
 87 |                 print("training at ", batch_ndx, "is processed")
 88 |             if batch_ndx % 500 == 0:
 89 |                 test_index = np.random.randint(len(dataset))
 90 |                 loss_value, _, _ = training.get_loss_value(point_voxel, image_data, reference_bboxes, num_ref_bboxes)
 91 |                 print("="*50)
 92 |                 print('[%d/%d][%d/%d]\tLoss: %.4f in traning dataset'
 93 |                       % (epoch, num_epochs, batch_ndx, data_length, loss_value))
 94 |                 for batch_ndx_, sample_ in enumerate(data_loader_test):
 95 |                     image_data_ = sample_['image'].cuda()
 96 |                     point_voxel_ = sample_['pointcloud'].cuda()
 97 |                     reference_bboxes_ = sample_['bboxes'].cpu().clone().detach()
 98 |                     num_ref_bboxes_ = sample_["num_bboxes"]
 99 |                     bev_image_ = sample_["lidar_bev_2Dimage"]
100 |                     test.get_eval_value_onestep(point_voxel_, image_data_, reference_bboxes_, num_ref_bboxes_)
101 |                     test.save_feature_result(bev_image_, reference_bboxes_, num_ref_bboxes_, batch_ndx_, epoch)
102 |                     if batch_ndx_ > 5:
103 |                         print("accumulated number of true data is ", test.get_num_T())
104 |                         print("accumulated number of positive data is ", test.get_num_P())
105 |                         print("accumulated number of true positive data is ", test.get_num_TP_set())
106 |                         break
107 |                 test.display_average_precision(plot_AP_graph=config["plot_AP_graph"])
108 |                 print("="*50)
109 |                 test.initialize_ap()
110 |         for batch_ndx, sample in enumerate(data_loader_test):
111 |             image_data = sample['image'].cuda()
112 |             point_voxel = sample['pointcloud'].cuda()
113 |             reference_bboxes = sample['bboxes'].cpu().clone().detach()
114 |             num_ref_bboxes = sample["num_bboxes"]
115 |             bev_image = sample["lidar_bev_2Dimage"]
116 |             test.get_eval_value_onestep(point_voxel, image_data, reference_bboxes, num_ref_bboxes)
117 |             test.save_feature_result(bev_image, reference_bboxes, num_ref_bboxes, batch_ndx, epoch)
118 |             if batch_ndx > 10:
119 |                 print("accumulated number of true data is ", test.get_num_T())
120 |                 print("accumulated number of positive data is ", test.get_num_P())
121 |                 print("accumulated number of true positive data is ", test.get_num_TP_set())
122 |                 print("="*50)
123 |                 break
124 |         test.display_average_precision(plot_AP_graph=config["plot_AP_graph"])
125 |         print("="*50)
126 |         test.initialize_ap()
127 |         


--------------------------------------------------------------------------------