├── 3DIOU_custom.py ├── IOU.py ├── README.md ├── config └── config_carla.yaml ├── data_import.py ├── data_import_carla.py ├── explain_figure └── overall.png ├── image ├── lidar_image_301_in_0.png └── lidar_image_366_in_0.png ├── loss.py ├── model.py ├── separation_axis_theorem.py ├── test.py └── train.py /3DIOU_custom.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ''' 5 | IOU calculation is not perfection, its too simple 6 | YOU NEED TO MODIFY CONVEX 7 | ''' 8 | import torch 9 | 10 | EPSILON = 1e-5 11 | 12 | def getCornerPoint(bbox): 13 | x1 = bbox[4]/2 * torch.cos(bbox[-1]) - bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0] 14 | x2 =-bbox[4]/2 * torch.cos(bbox[-1]) - bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0] 15 | x3 =-bbox[4]/2 * torch.cos(bbox[-1]) + bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0] 16 | x4 = bbox[4]/2 * torch.cos(bbox[-1]) + bbox[3]/2 * torch.sin(bbox[-1]) + bbox[0] 17 | y1 = bbox[4]/2 * torch.sin(bbox[-1]) + bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1] 18 | y2 =-bbox[4]/2 * torch.sin(bbox[-1]) + bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1] 19 | y3 =-bbox[4]/2 * torch.sin(bbox[-1]) - bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1] 20 | y4 = bbox[4]/2 * torch.sin(bbox[-1]) - bbox[3]/2 * torch.cos(bbox[-1]) + bbox[1] 21 | return torch.tensor([[x1,y1], [x2,y2], [x3,y3], [x4,y4]]), torch.tensor([[x2,y2], [x3,y3], [x4,y4],[x1,y1]]) 22 | 23 | # def CornerInRectangle(c1,bbox): 24 | # for corner in c1: 25 | 26 | def getLineEq(c1, c1_s): 27 | slope = (c1_s[:,1] - c1[:,1])/(c1_s[:,0] - c1[:,0]) 28 | return slope 29 | 30 | 31 | def get3DIOU(bbox_1, bbox_2): 32 | #bbox_1, bbox_2 : size(7), x,y,z,width,length,height,orientation 33 | c1, c1_s = getCornerPoint(bbox_1) 34 | c2, c2_s = getCornerPoint(bbox_2) 35 | slope1 = getLineEq(c1, c1_s) 36 | slope2 = getLineEq(c2, c2_s) 37 | c_in_set = [] 38 | for i in range(4): 39 | x_min = c1[i][0] if c1[i][0] < c1_s[i][0] else c1_s[i][0] 40 | x_max = c1[i][0] if c1[i][0] > c1_s[i][0] else c1_s[i][0] 41 | y_min = c1[i][1] if c1[i][1] < c1_s[i][1] else c1_s[i][1] 42 | y_max = c1[i][1] if c1[i][1] > c1_s[i][1] else c1_s[i][1] 43 | for j in range(4): 44 | c_in = 1/(slope2[j] - slope1[i]) * torch.matmul(torch.tensor([[-1.0, 1.0], 45 | [-slope2[j], slope1[i]]]), 46 | torch.tensor([[slope1[i]*c1[i][0] - c1[i][1]], 47 | [slope2[i]*c2[i][0] - c2[i][1]]])) 48 | if x_max - x_min < EPSILON: 49 | if c_in[1] > y_min and c_in[1] < y_max: 50 | c_in_set.append(c_in) 51 | elif y_max - y_min < EPSILON: 52 | if c_in[0] > x_min and c_in[0] < x_max: 53 | c_in_set.append(c_in) 54 | else: 55 | if c_in[0] > x_min and c_in[0] < x_max and c_in[1] > y_min and c_in[1] < y_max: 56 | c_in_set.append(c_in) 57 | 58 | IOU = 1 59 | return IOU 60 | 61 | -------------------------------------------------------------------------------- /IOU.py: -------------------------------------------------------------------------------- 1 | # 3D IoU caculate code for 3D object detection 2 | # Kent 2018/12 3 | 4 | import numpy as np 5 | from scipy.spatial import ConvexHull 6 | from numpy import * 7 | 8 | 9 | def polygon_clip(subjectPolygon, clipPolygon): 10 | """ Clip a polygon with another polygon. 11 | 12 | Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python 13 | 14 | Args: 15 | subjectPolygon: a list of (x,y) 2d points, any polygon. 16 | clipPolygon: a list of (x,y) 2d points, has to be *convex* 17 | Note: 18 | **points have to be counter-clockwise ordered** 19 | 20 | Return: 21 | a list of (x,y) vertex point for the intersection polygon. 22 | """ 23 | 24 | def inside(p): 25 | return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0]) 26 | 27 | def computeIntersection(): 28 | dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]] 29 | dp = [s[0] - e[0], s[1] - e[1]] 30 | n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0] 31 | n2 = s[0] * e[1] - s[1] * e[0] 32 | n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0]) 33 | return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3] 34 | 35 | outputList = subjectPolygon 36 | cp1 = clipPolygon[-1] 37 | 38 | for clipVertex in clipPolygon: 39 | cp2 = clipVertex 40 | inputList = outputList 41 | outputList = [] 42 | s = inputList[-1] 43 | 44 | for subjectVertex in inputList: 45 | e = subjectVertex 46 | if inside(e): 47 | if not inside(s): 48 | outputList.append(computeIntersection()) 49 | outputList.append(e) 50 | elif inside(s): 51 | outputList.append(computeIntersection()) 52 | s = e 53 | cp1 = cp2 54 | if len(outputList) == 0: 55 | return None 56 | return (outputList) 57 | 58 | 59 | def poly_area(x, y): 60 | """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """ 61 | return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) 62 | 63 | 64 | def convex_hull_intersection(p1, p2): 65 | """ Compute area of two convex hull's intersection area. 66 | p1,p2 are a list of (x,y) tuples of hull vertices. 67 | return a list of (x,y) for the intersection and its volume 68 | """ 69 | inter_p = polygon_clip(p1, p2) 70 | if inter_p is not None: 71 | hull_inter = ConvexHull(inter_p) 72 | return inter_p, hull_inter.volume 73 | else: 74 | return None, 0.0 75 | 76 | 77 | def box3d_vol(corners): 78 | ''' corners: (8,3) no assumption on axis direction ''' 79 | a = np.sqrt(np.sum((corners[0, :] - corners[1, :]) ** 2)) 80 | b = np.sqrt(np.sum((corners[1, :] - corners[2, :]) ** 2)) 81 | c = np.sqrt(np.sum((corners[0, :] - corners[4, :]) ** 2)) 82 | return a * b * c 83 | 84 | 85 | def is_clockwise(p): 86 | x = p[:, 0] 87 | y = p[:, 1] 88 | return np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)) > 0 89 | 90 | 91 | def box3d_iou(corners1, corners2): 92 | ''' Compute 3D bounding box IoU. 93 | 94 | Input: 95 | corners1: numpy array (8,3), assume up direction is negative Y 96 | corners2: numpy array (8,3), assume up direction is negative Y 97 | Output: 98 | iou: 3D bounding box IoU 99 | iou_2d: bird's eye view 2D bounding box IoU 100 | 101 | todo (kent): add more description on corner points' orders. 102 | ''' 103 | # corner points are in counter clockwise order 104 | rect1 = [(corners1[i, 0], corners1[i, 2]) for i in range(3, -1, -1)] 105 | rect2 = [(corners2[i, 0], corners2[i, 2]) for i in range(3, -1, -1)] 106 | 107 | area1 = poly_area(np.array(rect1)[:, 0], np.array(rect1)[:, 1]) 108 | area2 = poly_area(np.array(rect2)[:, 0], np.array(rect2)[:, 1]) 109 | 110 | inter, inter_area = convex_hull_intersection(rect1, rect2) 111 | iou_2d = inter_area / (area1 + area2 - inter_area) 112 | ymax = min(corners1[0, 1], corners2[0, 1]) 113 | ymin = max(corners1[4, 1], corners2[4, 1]) 114 | 115 | inter_vol = inter_area * max(0.0, ymax - ymin) 116 | 117 | vol1 = box3d_vol(corners1) 118 | vol2 = box3d_vol(corners2) 119 | iou = inter_vol / (vol1 + vol2 - inter_vol) 120 | return iou, iou_2d 121 | 122 | 123 | # ---------------------------------- 124 | # Helper functions for evaluation 125 | # ---------------------------------- 126 | 127 | def get_3d_box(center, box_size, heading_angle): 128 | ''' Calculate 3D bounding box corners from its parameterization. 129 | 130 | Input: 131 | box_size: tuple of (length,wide,height) 132 | heading_angle: rad scalar, clockwise from pos x axis 133 | center: tuple of (x,y,z) 134 | Output: 135 | corners_3d: numpy array of shape (8,3) for 3D box cornders 136 | ''' 137 | 138 | def roty(t): 139 | c = np.cos(t) 140 | s = np.sin(t) 141 | return np.array([[c, 0, s], 142 | [0, 1, 0], 143 | [-s, 0, c]]) 144 | 145 | R = roty(heading_angle) 146 | l, w, h = box_size 147 | x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]; 148 | y_corners = [h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2, -h / 2, -h / 2]; 149 | z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]; 150 | corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners])) 151 | corners_3d[0, :] = corners_3d[0, :] + center[0]; 152 | corners_3d[1, :] = corners_3d[1, :] + center[1]; 153 | corners_3d[2, :] = corners_3d[2, :] + center[2]; 154 | corners_3d = np.transpose(corners_3d) 155 | return corners_3d 156 | 157 | 158 | if __name__ == '__main__': 159 | print('------------------') 160 | # get_3d_box(center, box_size, heading_angle) 161 | corners_3d_ground = get_3d_box((2.882992, 1.698800, 20.785644), (1.497255, 1.644981, 3.628938), -1.531692) 162 | corners_3d_predict = get_3d_box((2.756923, 1.661275, 20.943280), (1.458242, 1.604773, 3.707947), -1.549553) 163 | print("predict corner is") 164 | print(corners_3d_predict) 165 | print("ground corner is") 166 | print(corners_3d_ground) 167 | (IOU_3d, IOU_2d) = box3d_iou(corners_3d_predict, corners_3d_ground) 168 | print(IOU_3d, IOU_2d) # 3d IoU/ 2d IoU of BEV(bird eye's view) 169 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection 2 | 3 | *Caution: Its unofficial algorithm and under construction. we will make total architecture soon. 4 | 5 | reference: Deep Continuous Fusion for Multi-Sensor 3D Object Detection (Ming Liang et.al. , ECCV 2018) 6 | 7 | ## Overall 8 | 9 | ![](./explain_figure/overall.png) 10 | 11 | 12 | 13 | 14 | 15 | ## How to use it 16 | 17 | ### Data directory 18 | 19 | Data parse is in data_import_carla.py. You can change other dataset by changing Dataset class! 20 | 21 | ### Training 22 | 23 | ``` 24 | $ python3.5 train.py --cuda 0,1 --port 12321 --data carla 25 | ``` 26 | 27 | ### Testing 28 | 29 | ``` 30 | $ python3.5 test.py --cuda 0,1 --port 12321 --data carla 31 | ``` 32 | -------------------------------------------------------------------------------- /config/config_carla.yaml: -------------------------------------------------------------------------------- 1 | #train.py configuration 2 | batch_size: 8 3 | dataset_name: carla 4 | cuda_visible_id: "1,2,3,4" 5 | port_number: "12233" 6 | saved_model_name: model_ 7 | num_epoch: 60 8 | learning_rate: 0.0001 9 | beta1: 0.9 10 | plot_AP_graph: False 11 | 12 | 13 | # data_import_carla.py configuration 14 | train_data_dir: /media/mmc-server1/Server1/chanuk/ready_for_journal/dataset/carla_object 15 | test_data_dir: /media/mmc-server1/Server1/chanuk/ready_for_journal/dataset/carla_object/test 16 | max_num_pc: 20000 17 | max_num_bbox: 20 18 | # lidar&voxel configuration 19 | lidar_x_min: 0.0 20 | lidar_x_max: 70.0 21 | lidar_y_min: -30.0 22 | lidar_y_max: 30.0 23 | lidar_z_min: -2.4 24 | lidar_z_max: 0.8 25 | delta: 0.2 26 | voxel_length: 384 # X 27 | voxel_width: 256 # Y 28 | voxel_channel: 32 # Z 29 | # image configuration 30 | image_height: 480 31 | image_width: 640 32 | 33 | #loss.py configuration 34 | regress_type: 0 35 | regress_loss_gain: 3 36 | positive_range: 5 37 | pos_sample_threshold: 128 38 | neg_sample_threshold: 128 39 | 40 | # model.py 41 | anchor_bbox_feature: # also used in loss.py 42 | width: 2.0 43 | length: 4.0 44 | height: 1.5 45 | reduced_scale: 4 # MUST MATCH WITH OUT_FEATURE3 OF LIDAR_MODULE 46 | lidar_module: 47 | out_feature1: 32 48 | out_feature2: 64 49 | out_feature3: 128 50 | out_feature4: 192 51 | out_feature5: 256 52 | 53 | num_res_block1: 1 54 | num_res_block2: 2 55 | num_res_block3: 4 56 | num_res_block4: 6 57 | num_res_block5: 6 58 | 59 | # test.py 60 | nms_iou_score_theshold: 0.01 61 | score_threshold: 0.8 -------------------------------------------------------------------------------- /data_import.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from PIL import Image, ImageDraw 4 | 5 | def getRect(x, y, width, height, angle): 6 | rect = np.array([(-width/2, -height/2), (width/2, -height/2), 7 | (width/2, height/2), (-width/2, height/2), 8 | (-width/2, -height/2)]) 9 | theta = angle 10 | R = np.array([[np.cos(theta), -np.sin(theta)], 11 | [np.sin(theta), np.cos(theta)]]) 12 | offset = np.array([x, y]) 13 | transformed_rect = np.dot(rect, R) + offset 14 | return transformed_rect 15 | 16 | def putBoundingBox(lidar_image, reference_bboxes, config, color=1): 17 | lidar_image_with_bbox = lidar_image.cpu().clone().numpy() 18 | img = Image.fromarray((255*lidar_image_with_bbox).astype(np.uint8)) 19 | draw = ImageDraw.Draw(img) 20 | x_scale = int(config["voxel_length"] / (config["lidar_x_max"] - config["lidar_x_min"])) 21 | y_scale = int(config["voxel_width"] / (config["lidar_y_max"] - config["lidar_y_min"])) 22 | x_offset = int(-config["lidar_x_min"] * x_scale) 23 | y_offset = int(-config["lidar_y_min"] * y_scale) 24 | for bbox in reference_bboxes: 25 | x = int(bbox[1]*y_scale + y_offset) 26 | y = int(bbox[0]*x_scale) 27 | width = bbox[3]*y_scale # WARNING! IT SHOULD BE SAME SCALE IN X & Y 28 | height = bbox[4]*x_scale # WARNING! IT SHOULD BE SAME SCALE IN X & Y 29 | angle = bbox[6] - 1.57 30 | rect = getRect(x=x, y=y, width=width, height=height, angle=angle) 31 | draw.polygon([tuple(p) for p in rect], fill=color) 32 | lidar_image_with_bbox = np.asarray(img) 33 | return torch.tensor(lidar_image_with_bbox) 34 | -------------------------------------------------------------------------------- /data_import_carla.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import h5py 4 | from torch.utils.data import Dataset 5 | from torchvision.utils import save_image 6 | from data_import import putBoundingBox 7 | import time 8 | import numpy as np 9 | import quaternion 10 | 11 | class CarlaDataset(Dataset): 12 | def __init__(self, config, mode="train",want_bev_image=False): 13 | super(CarlaDataset, self).__init__() 14 | self.config = config 15 | self.hdf5_files = self.load_dataset(mode = mode) 16 | self.hdf5_id_dict = self.getIdDict(self.hdf5_files) 17 | self.length = 0 18 | self.scenario_length = [] 19 | self.scenario_name = [] 20 | if (want_bev_image): 21 | self.want_bev_image = True 22 | else: 23 | self.want_bev_image = False 24 | 25 | for hdf5_file in self.hdf5_files: 26 | single_data_scenario = self.hdf5_files[hdf5_file] 27 | self.length += len(single_data_scenario) 28 | self.scenario_name.append(hdf5_file) 29 | self.scenario_length.append(len(single_data_scenario)) 30 | 31 | RT = self.get_extrinsic_parameter() 32 | C = self.get_intrinsic_parameter() 33 | CRT = np.matmul(C, RT) 34 | self.CRT_tensor = torch.tensor(CRT).permute(1,0).type(torch.float) 35 | x_scale = int(self.config["voxel_length"] / (self.config["lidar_x_max"] - self.config["lidar_x_min"])) 36 | y_scale = int(self.config["voxel_width"] / (self.config["lidar_y_max"] - self.config["lidar_y_min"])) 37 | z_scale = int(self.config["voxel_channel"] / (self.config["lidar_z_max"] - self.config["lidar_z_min"])) 38 | x_offset = int(-self.config["lidar_x_min"] * x_scale) 39 | y_offset = int(-self.config["lidar_y_min"] * y_scale) 40 | z_offset = int(-self.config["lidar_z_min"] * z_scale) 41 | self.pc_to_voxel_indice = torch.tensor([[x_scale,0,0,x_offset], 42 | [0,y_scale,0,y_offset], 43 | [0,0,z_scale,z_offset]], dtype=torch.float).permute(1,0) 44 | 45 | def __len__(self): 46 | return self.length 47 | 48 | def __getitem__(self, idx): 49 | idx_for_scenario = idx 50 | if idx > self.length or idx < 0: 51 | RuntimeError("idx is not in data file") 52 | return -1 53 | for scenario_file_index in range(len(self.scenario_length)): 54 | length = self.scenario_length[scenario_file_index] 55 | if (idx_for_scenario - length >= 0): 56 | idx_for_scenario = idx_for_scenario - length 57 | else: 58 | file_name = self.scenario_name[scenario_file_index] 59 | data = self.hdf5_files[file_name] 60 | id = self.hdf5_id_dict[file_name][idx_for_scenario].strip() 61 | object_datas, lidar_data, image_data = self.getOneStepData(data, id) 62 | image_data = torch.tensor(image_data).permute(2, 0, 1) 63 | reference_bboxes, num_reference_bboxes = self.arangeLabelData(object_datas) 64 | voxelized_lidar, point_cloud_raw, uv, num_points_raw, indices_for_bev = self.Voxelization_Projection(lidar_data) 65 | if (self.want_bev_image): 66 | bev_image = self.getLidarImage(indices_for_bev) 67 | return {'image': image_data, 68 | 'bboxes': reference_bboxes, 69 | "num_bboxes": num_reference_bboxes, 70 | "pointcloud": voxelized_lidar, 71 | "pointcloud_raw": point_cloud_raw, 72 | "projected_loc_uv": uv, 73 | "num_points_raw": num_points_raw, 74 | "lidar_bev_2Dimage": bev_image} 75 | else: 76 | return {'image': image_data, 77 | 'bboxes': reference_bboxes, 78 | "num_bboxes": num_reference_bboxes, 79 | "pointcloud_raw":point_cloud_raw, 80 | "projected_loc_uv": uv, 81 | "num_points_raw": num_points_raw, 82 | "pointcloud" : voxelized_lidar} 83 | 84 | def load_dataset(self, mode = "train"): 85 | if mode == "train": 86 | label_path = self.config["train_data_dir"] 87 | elif mode == "test": 88 | label_path = self.config["test_data_dir"] 89 | else: 90 | print ("ERROR IN MODE TYPE, PRESS [train] OR [test] !!") 91 | return -1 92 | hdf5_files = {} 93 | print("reading hdf5 file...") 94 | file_list = os.listdir(label_path) 95 | for file in file_list: 96 | if file.split('.')[-1] == 'hdf5': 97 | file_dir = os.path.join(label_path, file) 98 | try: 99 | hdf5_files[file] = h5py.File(file_dir, 'r') 100 | print(file) 101 | except: 102 | print(file + ' doesnt work. we except this folder') 103 | print("reading hdf5 end") 104 | return hdf5_files 105 | 106 | def valid_bbox(self, object_data): 107 | loc_x = object_data[0] 108 | loc_y = object_data[1] 109 | if loc_x >= self.config["lidar_x_min"] and loc_x < self.config["lidar_x_max"] and loc_y >= self.config["lidar_y_min"] and loc_y < self.config["lidar_y_max"]: 110 | return True 111 | return False 112 | 113 | def orientation_inner_bound(self, ori): 114 | if ori > 3.141592: 115 | while(1): 116 | ori = ori - 3.141592 117 | if ori > 0 and ori < 3.141592: 118 | break 119 | elif ori < 0: 120 | while(1): 121 | ori = ori + 3.141592 122 | if ori > 0 and ori < 3.141592: 123 | break 124 | else: 125 | ori = ori # 3 and 4 should be carefully look whether is pitch or roll 126 | return ori 127 | 128 | def arangeLabelData(self, object_datas): 129 | """ 130 | uint8 CLASSIFICATION_UNKNOWN=0 131 | uint8 CLASSIFICATION_UNKNOWN_SMALL=1 132 | uint8 CLASSIFICATION_UNKNOWN_MEDIUM=2 133 | uint8 CLASSIFICATION_UNKNOWN_BIG=3 134 | uint8 CLASSIFICATION_PEDESTRIAN=4 135 | uint8 CLASSIFICATION_BIKE=5 136 | uint8 CLASSIFICATION_CAR=6 137 | uint8 CLASSIFICATION_TRUCK=7 138 | uint8 CLASSIFICATION_MOTORCYCLE=8 139 | uint8 CLASSIFICATION_OTHER_VEHICLE=9 140 | uint8 CLASSIFICATION_BARRIER=10 141 | uint8 CLASSIFICATION_SIGN=11 142 | """ 143 | ref_bboxes = torch.zeros(self.config["max_num_bbox"],9) 144 | i = 0 145 | for object_data in object_datas: 146 | if i>self.config["max_num_bbox"]: 147 | break 148 | if not self.valid_bbox(object_data): 149 | continue 150 | object_class = object_data[9] 151 | if object_class == 6: 152 | rel_x = object_data[0] 153 | rel_y = object_data[1] 154 | rel_z = object_data[2] 155 | ori = self.orientation_inner_bound(object_data[5]) 156 | width = object_data[6] 157 | length = object_data[7] 158 | height = object_data[8] 159 | ref_bboxes[i,:] = torch.tensor([rel_x, rel_y, rel_z, length, width, height, ori, object_class, 1]) 160 | i+=1 161 | return ref_bboxes, i 162 | 163 | def getOneStepData(self, data, id): 164 | image_name = 'center_image_data' 165 | lidar_name = 'lidar_data' 166 | object_data_name = 'object_data' # relative position and rotation data 167 | object_data = torch.tensor(np.array(data[id][object_data_name])) # N * 10 (x, y, z, roll, pitch, yaw, wid, len, hei, class) 168 | lidar_data = torch.tensor(np.array(data[id][lidar_name])).type(torch.float)[:,3:6] # N * 3 (x, y, z) 169 | image_data = torch.tensor(np.array(data[id][image_name])) # 480*640*3 170 | 171 | return object_data, lidar_data, image_data 172 | 173 | def getIdDict(self, hdf5_files): 174 | hdf5_id_dict = {} 175 | for hdf5_file in hdf5_files: 176 | data_list = list(hdf5_files[hdf5_file].keys()) 177 | hdf5_id_dict[hdf5_file] = data_list 178 | return hdf5_id_dict 179 | 180 | def get_extrinsic_parameter(self): 181 | trans = np.zeros((3,1)) # translation is 0, 0, 0 182 | v_lidar = np.array([ -1.57079633, 3.12042851, -1.57079633 ]) 183 | v_cam = np.array([ -3.13498819, 1.59196951, 1.56942932 ]) 184 | v_diff = v_cam - v_lidar 185 | q = quaternion.from_euler_angles(v_diff) 186 | R_ = quaternion.as_rotation_matrix(q) 187 | RT = np.concatenate((R_,trans), axis=-1) 188 | return RT 189 | 190 | def get_intrinsic_parameter(self): 191 | cameraMatrix = np.array([[268.51188197672957, 0.0, 320.0], 192 | [0.0, 268.51188197672957, 240.0], 193 | [0.0, 0.0, 1.0]]) 194 | return cameraMatrix 195 | 196 | def Projection(self, point_cloud_raw): 197 | ones = torch.ones((point_cloud_raw.shape[0],1)) 198 | xyz_one = torch.cat((point_cloud_raw, ones), dim=-1) # input 199 | uv_z = torch.matmul(xyz_one, self.CRT_tensor).permute(1,0) 200 | uv = uv_z/uv_z[-1] 201 | uv = uv[:2] 202 | uv = torch.where(uv[0] > 0, uv, torch.tensor(0).type(torch.float)) 203 | uv = torch.where(uv[0] < self.config["image_height"], uv, torch.tensor(0).type(torch.float)) 204 | uv = torch.where(uv[1] > 0, uv, torch.tensor(0).type(torch.float)) 205 | uv = torch.where(uv[1] < self.config["image_width"], uv, torch.tensor(0).type(torch.float)) 206 | indices = torch.nonzero(uv) 207 | indices = indices[:int(indices.shape[0]/2),1] 208 | filtered_points_raw = point_cloud_raw[indices] 209 | 210 | return uv.permute(1,0)[indices], filtered_points_raw 211 | 212 | def Voxelization_Projection(self, lidar_data, interpolate=True): 213 | # Voxelization 214 | lidar_data = lidar_data.permute(1,0) # 3 * N 215 | lidar_data = torch.where(lidar_data[0] > self.config["lidar_x_min"], 216 | lidar_data, torch.tensor(0).type(torch.float)) 217 | lidar_data = torch.where(lidar_data[0] < self.config["lidar_x_max"] - self.config["delta"], 218 | lidar_data, torch.tensor(0).type(torch.float)) 219 | lidar_data = torch.where(lidar_data[1] > self.config["lidar_y_min"], 220 | lidar_data, torch.tensor(0).type(torch.float)) 221 | lidar_data = torch.where(lidar_data[1] < self.config["lidar_y_max"] - self.config["delta"], 222 | lidar_data, torch.tensor(0).type(torch.float)) 223 | lidar_data = torch.where(lidar_data[2] > self.config["lidar_z_min"], 224 | lidar_data, torch.tensor(0).type(torch.float)) 225 | lidar_data = torch.where(lidar_data[2] < self.config["lidar_z_max"] - self.config["delta"], 226 | lidar_data, torch.tensor(0).type(torch.float)) 227 | valid_indices = torch.nonzero(lidar_data) 228 | valid_indices = valid_indices[:int(valid_indices.shape[0]/3),1] 229 | lidar_data = lidar_data.permute(1,0)[valid_indices] 230 | lidar_data_ = torch.cat((lidar_data, torch.ones(lidar_data.shape[0],1).type(torch.float)), dim=-1) 231 | if not interpolate: 232 | indices = torch.matmul(lidar_data_, self.pc_to_voxel_indice).type(torch.long).permute(1,0) 233 | lidar_voxel = torch.zeros(self.config["voxel_channel"], self.config["voxel_length"], self.config["voxel_width"]) 234 | lidar_voxel[indices[2],indices[0],indices[1]]=1 235 | else: 236 | indices_float = torch.matmul(lidar_data_, self.pc_to_voxel_indice).permute(1,0) 237 | x = indices_float[0] 238 | y = indices_float[1] 239 | z = indices_float[2] 240 | x_lower = indices_float[0].type(torch.long) 241 | x_upper = indices_float[0].type(torch.long) + 1 242 | y_lower = indices_float[1].type(torch.long) 243 | y_upper = indices_float[1].type(torch.long) + 1 244 | z_lower = indices_float[2].type(torch.long) 245 | z_upper = indices_float[2].type(torch.long) + 1 246 | dx = x - x_lower.type(torch.float) 247 | dy = y - y_lower.type(torch.float) 248 | dz = z - z_lower.type(torch.float) 249 | lidar_voxel = torch.zeros(self.config["voxel_channel"], self.config["voxel_length"], self.config["voxel_width"]) 250 | lidar_voxel[z_lower, x_lower, y_lower] += (1-dx)*(1-dy)*(1-dz) 251 | lidar_voxel[z_upper, x_lower, y_lower] += (1-dx)*(1-dy)*dz 252 | lidar_voxel[z_lower, x_upper, y_lower] += dx*(1-dy)*(1-dz) 253 | lidar_voxel[z_upper, x_upper, y_lower] += dx*(1-dy)*dz 254 | lidar_voxel[z_lower, x_lower, y_upper] += (1-dx)*dy*(1-dz) 255 | lidar_voxel[z_upper, x_lower, y_upper] += (1-dx)*dy*dz 256 | lidar_voxel[z_lower, x_upper, y_upper] += dx*dy*(1-dz) 257 | lidar_voxel[z_upper, x_upper, y_upper] += dx*dy*dz 258 | indices = indices_float.type(torch.long) 259 | 260 | # Projection 261 | uv, filtered_points_raw = self.Projection(lidar_data) 262 | num_point_cloud_raw = filtered_points_raw.shape[0] 263 | point_cloud_raw_tensor = torch.zeros(self.config["max_num_pc"], 3) 264 | point_cloud_raw_tensor[:num_point_cloud_raw,:] = filtered_points_raw 265 | uv_tensor = torch.zeros(self.config["max_num_pc"], 2) 266 | uv_tensor[:num_point_cloud_raw,:] = uv 267 | return lidar_voxel, point_cloud_raw_tensor, uv_tensor, num_point_cloud_raw, indices 268 | 269 | def getLidarImage(self, indices_for_bev): 270 | lidar_image = torch.zeros(3, self.config["voxel_length"], self.config["voxel_width"]) 271 | lidar_image[:, indices_for_bev[0], indices_for_bev[1]] = 1 272 | return lidar_image 273 | 274 | if __name__ == "__main__": 275 | os.environ['CUDA_VISIBLE_DEVICES'] = '4' 276 | dataset = CarlaDataset(mode="test", want_bev_image=True) 277 | data_loader = torch.utils.data.DataLoader(dataset, 278 | batch_size=2, 279 | shuffle=True) 280 | start = time.time() 281 | for batch_ndx, sample in enumerate(data_loader): 282 | print("total time: ", time.time() - start) 283 | start = time.time() 284 | print("batch_ndx is ", batch_ndx) 285 | print("sample keys are ", sample.keys()) 286 | print("bbox shape is ", sample["bboxes"].shape) 287 | print("num bboxes is ", sample["num_bboxes"]) 288 | print("image shape is ", sample["image"].shape) 289 | print("pointcloud shape is ", sample["pointcloud"].shape) 290 | print("voxel type is ", sample["pointcloud"].type()) 291 | print("bev image shape: ", sample["lidar_bev_2Dimage"].shape) 292 | bev_image_ = 0.5*sample["lidar_bev_2Dimage"][-1].permute(1,2,0) 293 | bev_image_with_bbox = putBoundingBox(bev_image_, sample["bboxes"][-1,:sample["num_bboxes"][-1]], color="red").permute(2,0,1).type(torch.float) 294 | save_image(bev_image_with_bbox, 'image/lidar_image_{}.png'.format(batch_ndx)) 295 | # print(sample["image"][-1].dtype) 296 | save_image(torch.cat((sample["image"][-1][2:3],sample["image"][-1][1:2],sample["image"][-1][0:1]) ,dim=0).type(torch.float)/256,'image/RGB_image_{}.png'.format(batch_ndx) ) 297 | print("image max, min value is ",torch.max(sample["image"][-1]), torch.min(sample["image"][-1])) 298 | print("pointcloud_raw shape is ", sample["pointcloud_raw"].shape) 299 | print("num points is ", sample["num_points_raw"]) 300 | print("projected_loc_uv shape is ", sample["projected_loc_uv"].shape) 301 | 302 | print("="*50) 303 | if batch_ndx >10: 304 | break 305 | # print(dataset[len(dataset)-1]) -------------------------------------------------------------------------------- /explain_figure/overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chanuk-Yang/Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection/d85e3618351af1060864d24de6a4af59017da99c/explain_figure/overall.png -------------------------------------------------------------------------------- /image/lidar_image_301_in_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chanuk-Yang/Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection/d85e3618351af1060864d24de6a4af59017da99c/image/lidar_image_301_in_0.png -------------------------------------------------------------------------------- /image/lidar_image_366_in_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chanuk-Yang/Deep_Continuous_Fusion_for_Multi-Sensor_3D_Object_Detection/d85e3618351af1060864d24de6a4af59017da99c/image/lidar_image_366_in_0.png -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.utils import save_image 4 | 5 | import numpy as np 6 | import random 7 | import torch.nn.functional as F 8 | import os 9 | 10 | from IOU import get_3d_box, box3d_iou 11 | from model import AnchorBoundingBoxFeature 12 | 13 | 14 | class LossClass(nn.Module): 15 | def __init__(self): 16 | super().__init__() 17 | self.loss = torch.nn.CrossEntropyLoss() 18 | 19 | def forward(self, predicted_feature, binary_label): 20 | loss = self.loss(predicted_feature, binary_label) 21 | return loss 22 | 23 | class LossReg(nn.Module): 24 | def __init__(self): 25 | super().__init__() 26 | self.loss = torch.nn.SmoothL1Loss(reduction="none") 27 | 28 | def forward(self, predicted_feature, binary_label): 29 | loss = self.loss(predicted_feature, binary_label) 30 | return loss 31 | 32 | 33 | class LossTotal(nn.Module): 34 | def __init__(self, config): 35 | super().__init__() 36 | self.config = config 37 | self.loss_class = LossClass() 38 | self.loss_regress = LossReg() 39 | self.anchor_bbox_feature = AnchorBoundingBoxFeature(config) 40 | anchor_set = self.anchor_bbox_feature().cuda() 41 | anchor_set_shape = anchor_set.shape 42 | self.anchor_set = anchor_set.reshape(2,int(anchor_set_shape[0]/2), 43 | anchor_set_shape[1],anchor_set_shape[2]) 44 | self.regress_type = self.config["regress_type"] 45 | 46 | def forward(self, reference_bboxes_batch, num_ref_bbox_batch, 47 | predicted_class_feature_batch, predicted_regress_feature_batch): 48 | # reference_bboxes : B, max_n(20), 8 49 | B, max_num, _ = reference_bboxes_batch.shape 50 | total_loss = torch.zeros(1).cuda() 51 | self.anchor_set_ = self.anchor_set.unsqueeze(0).repeat(B,1,1,1,1) 52 | for b in range(B): 53 | reference_bboxes = reference_bboxes_batch[b,:num_ref_bbox_batch[b]] 54 | predicted_class_feature = predicted_class_feature_batch[b] 55 | predicted_regress_feature = predicted_regress_feature_batch[b] 56 | anchor = self.anchor_set_[b] 57 | 58 | IDX, positive_position_list_all, positive_position_list = self.getPositionOfPositive(anchor,reference_bboxes, 59 | sample_threshold=self.config["pos_sample_threshold"]) 60 | negative_position_list = self.getPositionOfNegative(anchor, positive_position_list, 61 | sample_threshold=self.config["neg_sample_threshold"]) 62 | # positive_position_list = [[1,2], [3,4], [11,12], [13,14]] 63 | # negative_position_list = [[5,6], [7,8], [9,10]] 64 | total_loss_class = self.getClassSum(positive_position_list, negative_position_list, predicted_class_feature[:2,:,:], self.loss_class) # per anchor 65 | total_loss_class += self.getClassSum(positive_position_list, negative_position_list, predicted_class_feature[2:4,:,:], self.loss_class) # per anchor 66 | 67 | ## anchor의 좌표를 절대 좌표로 바꿔야 한다. 68 | ## position_list_all은 positive_position이 다 들어간것, true 하나당 한 픽셀이려면 수정 필요 69 | 70 | Reg_loss = self.getRegSum(IDX, positive_position_list_all, reference_bboxes, predicted_regress_feature, anchor) 71 | total_loss = total_loss_class + self.config["regress_loss_gain"] * Reg_loss 72 | return total_loss 73 | 74 | def getPositionOfPositive(self, anchor_bbox_feature, ref_bboxes, sample_threshold = 128): 75 | _, C, H, W = anchor_bbox_feature.shape 76 | positive_position_list = [] 77 | positive_position_regress = [] 78 | positive_position_idx = {} 79 | temp_cnt = 0 80 | for i, ref_bbox in enumerate(ref_bboxes): 81 | positive_position_idx[i] = [] 82 | x_scale = int(self.config["voxel_length"] / (self.config["lidar_x_max"] - self.config["lidar_x_min"])) 83 | y_scale = int(self.config["voxel_width"] / (self.config["lidar_y_max"] - self.config["lidar_y_min"])) 84 | x_offset = int(-self.config["lidar_x_min"] * x_scale) 85 | y_offset = int(-self.config["lidar_y_min"] * y_scale) 86 | reduced_scale = self.config["anchor_bbox_feature"]["reduced_scale"] 87 | point_x = int((ref_bbox[0]*x_scale + x_offset)/reduced_scale) # (0~ 700/4) 88 | point_y = int((ref_bbox[1]*y_scale + y_offset)/reduced_scale) #(0 ~ 700/4) 89 | if point_x < 0 or point_x > H - 1 or point_y < 0 or point_y > W - 1: 90 | continue 91 | for x_int in range(self.config["positive_range"]): 92 | pos_x = point_x - int(self.config["positive_range"]/2) + x_int 93 | for y_int in range(self.config["positive_range"]): 94 | pos_y = point_y - int(self.config["positive_range"]/2) + y_int 95 | if pos_x < 0 or pos_x > H - 1 or pos_y < 0 or pos_y > W - 1: 96 | continue 97 | positive_position_list.append([pos_x, pos_y]) 98 | if self.regress_type==0: 99 | positive_position_regress.append([pos_x, pos_y]) 100 | positive_position_idx[i].append(temp_cnt) 101 | temp_cnt+=1 102 | else: 103 | if pos_x == point_x and pos_y == point_y: 104 | positive_position_regress.append([pos_x, pos_y]) #중심만 추가하기 105 | positive_position_idx[i].append(temp_cnt) 106 | temp_cnt+=1 107 | # sample_idx = np.random.choice(len(positive_position_list), np.max(sample_threshold, len(positive_position_list)), replace=False) 108 | np.random.shuffle(positive_position_list) 109 | if len(positive_position_list) > sample_threshold: 110 | positive_position_list = positive_position_list[:sample_threshold] 111 | return positive_position_idx, np.array(positive_position_regress), positive_position_list 112 | 113 | def getPositionOfNegative(self, anchor_bbox_feature, positive_position_list, sample_threshold = 128): 114 | _, C, H, W = anchor_bbox_feature.shape 115 | negative_position_list = [] 116 | sample = 0 117 | while(1): 118 | x = np.random.randint(H) 119 | y = np.random.randint(W) 120 | if [x, y] in positive_position_list: 121 | continue 122 | else: 123 | negative_position_list.append([x, y]) 124 | sample += 1 125 | if sample > sample_threshold: 126 | break 127 | return negative_position_list 128 | 129 | def getClassSum(self, positive_position_list, negative_position_list, predicted_class, loss_class): 130 | positive_size = len(positive_position_list) 131 | negative_size = len(negative_position_list) 132 | positive_label = torch.ones((positive_size), dtype=torch.long).cuda() 133 | negative_label = torch.zeros((negative_size), dtype=torch.long).cuda() 134 | negative_position = torch.tensor(negative_position_list, dtype=torch.long).cuda() 135 | positive_position = torch.tensor(positive_position_list, dtype=torch.long).cuda() 136 | c = predicted_class[:, negative_position[:,0], negative_position[:,1]].permute(1,0) 137 | if positive_size > 0: 138 | a = predicted_class[:, positive_position[:,0], positive_position[:,1]].permute(1,0) 139 | loss_sum = loss_class(a, positive_label) + loss_class(c, negative_label) 140 | else: 141 | loss_sum = loss_class(c, negative_label) 142 | return loss_sum 143 | 144 | def LossReg(self, ref_box, pred_box, a_box): 145 | 146 | ### Rel coordinate 기준 147 | 148 | # ref_box : [7,] 149 | # pred_box : [N, 14, ] 150 | # anchor_box : [N, 2, 7] 151 | 152 | N, num_anchor, char = a_box.shape 153 | ref_box = ref_box.unsqueeze(0).unsqueeze(0).repeat(N,num_anchor,1) 154 | pred_box = pred_box.reshape(N,num_anchor,char) 155 | xy_ref_offset = (ref_box[:,:,:2]-a_box[:,:,:2])/torch.sqrt(torch.pow(a_box[:,:,3:4],2) + torch.pow(a_box[:,:,4:5],2)) 156 | z_ref_offset = (ref_box[:,:,2:3]-a_box[:,:,2:3])/(a_box[:,:,5:6]) 157 | whd_ref_offset = torch.log(ref_box[:,:,3:6]/(a_box[:,:,3:6])) 158 | ori_ref_offset = torch.atan2(torch.sin(ref_box[:,:,6] - a_box[:,:,-1]), torch.cos(ref_box[:,:,6] - a_box[:,:,-1])) 159 | ref_offset = torch.cat([xy_ref_offset, z_ref_offset, whd_ref_offset, ori_ref_offset.unsqueeze(-1)], dim=-1) #[N,2,7] 160 | 161 | l1_loss = self.loss_regress(pred_box[:,:,:char], ref_offset) 162 | 163 | loss = 1.0/(N*num_anchor*char) * torch.sum(l1_loss) 164 | 165 | return loss 166 | 167 | def getRegSum(self, index, positive_position_list, reference_bboxes, predicted_regress_feature, anchor): 168 | 169 | # reference_bboxes = x : 0~70.0, y : -35.0 ~ 35.0 170 | # positive_position_list = x : 700/4 y: 700/4 , size : [700/4, 700/4] 171 | # predicted_regress_feature = x : ?, y: ?, size : [700/4, 700/4] 172 | # anchor = x : 0~70.0, y : -35.0~35.0, size : [700/4, 700/4] 173 | 174 | # back propagation is error. Batch sum and loss device type is error i think 175 | reg_loss = torch.zeros(1).cuda() 176 | for idx, reference_box in enumerate(reference_bboxes): 177 | positive_position = torch.tensor(positive_position_list[index[idx]], dtype=torch.long).cuda() 178 | predicted_box = predicted_regress_feature[:,positive_position[:,0], positive_position[:,1]].permute(1,0) 179 | anchor_box = anchor[:,:, positive_position[:,0], positive_position[:,1]].permute(2,0,1) #[2,7] 180 | if predicted_box.shape[0] == 0: 181 | continue 182 | # predicted_box = torch.stack(box_list, dim=0) #[N,14], predicted bbox set 183 | # anchor_box = torch.stack(abox_list, dim=0) #[N,2,7], anchor bbox set 184 | 185 | loss = self.LossReg(reference_box, predicted_box, anchor_box) 186 | reg_loss +=loss 187 | 188 | 189 | return reg_loss 190 | 191 | if __name__ == '__main__': 192 | os.environ['CUDA_VISIBLE_DEVICES'] = '2' 193 | # print(anchor_set_1[0, 0, :, :]) 194 | # print(anchor_set_1[0, 1, :, :]) 195 | # save_image(anchor_set_1[0, 0, :, :]/70.0, 'anchor/x.png') 196 | # save_image(anchor_set_1[0, 1, :, :]/(70.0)+0.5, 'anchor/y.png') 197 | #### Not use this function but will be use next time... 198 | # def getIOUfeature(anchor_bbox_feature, ref_bboxes): 199 | # ''' 200 | # caution! it will be extremely slow i guess... need to make faster algorithm 201 | # :param anchor_bbox_feature: reference bbox feature (7,H,W) 202 | # :param ref_bboxes: predicted bbox feature (7,H,W) 203 | # :return:IOU_feature 204 | # ''' 205 | # anchor_bbox_feature = anchor_bbox_feature.cpu() 206 | # C, H, W = anchor_bbox_feature.shape 207 | # IOU_feature = torch.zeros((1, H, W)) 208 | # for h in range(H): 209 | # for w in range(W): 210 | # for ref_bbox in ref_bboxes: 211 | # anchor_bbox = anchor_bbox_feature[:, h, w] 212 | # distance = torch.sqrt(torch.sum(torch.pow((anchor_bbox[:3] - ref_bbox[:3]),2))) 213 | # if distance < 4.0: 214 | # ref_bbox_corners = get_3d_box(ref_bbox[:3], ref_bbox[3:6], ref_bbox[-1]) 215 | # anchor_bbox_corners = get_3d_box(anchor_bbox[:3], anchor_bbox[3:6], anchor_bbox[-1]) 216 | # (IOU_3d, IOU_2d) = box3d_iou(ref_bbox_corners, anchor_bbox_corners) 217 | # IOU_feature[:, h, w] = IOU_3d 218 | # else: 219 | # IOU_feature[:, h, w] = 0 220 | # return IOU_feature 221 | 222 | # def getClassRefFromDist(anchor_bbox_feature, ref_bboxes): 223 | # ''' 224 | # its extremely slow. how could we change this? 225 | # :param anchor_bbox_feature: reference bbox. list(bboxes) 226 | # :param ref_bboxes: predicted bbox feature. (7,H,W) 227 | # :return:class_feature. (2, H, W) 228 | # ''' 229 | # C, H, W = anchor_bbox_feature.shape 230 | # class_feature = torch.zeros((2, H, W)).cuda() 231 | # anchor_bbox_feature = anchor_bbox_feature.cuda() 232 | # ref_bboxes = ref_bboxes.cuda() 233 | # for h in range(H): 234 | # print("doing at ",h, "in class ref.") 235 | # for w in range(W): 236 | # for ref_bbox in ref_bboxes: 237 | # anchor_bbox = anchor_bbox_feature[:, h, w] 238 | # # distance = torch.sqrt(torch.sum(torch.pow((anchor_bbox[:3] - ref_bbox[:3]),2))) 239 | # # if class_feature[0, h, w] == 0 and class_feature[1, h, w] == 0: 240 | # # if distance < 1: 241 | # # class_feature[0, h, w] = 1 242 | # # elif distance > 4: 243 | # # class_feature[1, h, w] = 1 244 | # return class_feature 245 | 246 | # def getClassRefFromDist_fast(anchor_bbox_feature, ref_bboxes): 247 | # ''' 248 | # :param anchor_bbox_feature: reference bbox. list(bboxes) 249 | # :param ref_bboxes: predicted bbox feature. (7,H,W) 250 | # :return:class_feature. (2, H, W) 251 | # ''' 252 | # C, H, W = anchor_bbox_feature.shape 253 | # class_feature_pos = torch.zeros((1, H, W)).cuda() 254 | # class_feature_neg = torch.ones((1, H, W)).cuda() 255 | # class_feature = torch.cat((class_feature_pos,class_feature_neg)) 256 | # # anchor_bbox_feature = anchor_bbox_feature.cuda() 257 | # for ref_bbox in ref_bboxes: 258 | # point_x = int(ref_bbox[0]*10/4) 259 | # point_y = int((ref_bbox[1]*10 + 350)/4) 260 | # if point_x < 0 or point_x > int(700/4) - 1 or point_y < 0 or point_y > int(700/4) - 1: 261 | # continue 262 | # for x_int in range(5): 263 | # for y_int in range(5): 264 | # class_feature[0, point_x - 2 + x_int, point_y - 2 + y_int] = 1 265 | # class_feature[1, point_x - 2 + x_int, point_y - 2 + y_int] = 0 266 | # # if class_feature[0, h, w] == 0 and class_feature[1, h, w] == 0: 267 | # # if distance < 1: 268 | # # class_feature[0, h, w] = 1 269 | # # elif distance > 4: 270 | # # class_feature[1, h, w] = 1 271 | # return class_feature -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python script. 2 | 3 | # Press Shift+F10 to execute it or replace it with your code. 4 | # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. 5 | import torch 6 | import torch.nn as nn 7 | from torchvision import models 8 | 9 | 10 | class ResidualBlock(nn.Module): 11 | def __init__(self, in_channels, out_channels): 12 | super(ResidualBlock,self).__init__() 13 | self.in_channels, self.out_channels = in_channels, out_channels 14 | if self.should_apply_shortcut: 15 | self.conv1 = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=(3, 3), stride=(2, 2), 16 | padding=(1, 1), bias=False) 17 | else: 18 | self.conv1 = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=(3, 3), stride=(1, 1), 19 | padding=(1, 1), bias=False) 20 | self.bn1 = nn.BatchNorm2d(self.out_channels, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) 21 | self.relu1 = nn.ReLU(inplace=True) 22 | self.conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), 23 | bias=False) 24 | self.bn2 = nn.BatchNorm2d(self.out_channels, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) 25 | self.relu2 = nn.ReLU(inplace=True) 26 | if self.should_apply_shortcut: 27 | self.down_conv = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=(1, 1), stride=(2, 2), 28 | bias=False) 29 | self.down_bn = nn.BatchNorm2d(self.out_channels, eps=1e-05, momentum=0.1, affine=True, 30 | track_running_stats=True) 31 | 32 | def forward(self, x): 33 | if self.should_apply_shortcut: 34 | residual = self.down_bn(self.down_conv(x)) 35 | else: 36 | residual = x 37 | x = self.relu1(self.bn1(self.conv1(x))) 38 | x = self.bn2(self.conv2(x)) 39 | x += residual 40 | x = self.relu2(x) 41 | return x 42 | 43 | @property 44 | def should_apply_shortcut(self): 45 | return self.in_channels != self.out_channels 46 | 47 | 48 | class ResidualBlockModule(nn.Module): 49 | def __init__(self, first_in_channel, last_out_channel, num_resblock): 50 | super(ResidualBlockModule, self).__init__() 51 | self.sequential = nn.Sequential() 52 | for i in range(num_resblock): 53 | if i == 0: 54 | self.sequential.add_module("resblock_" + str(i), 55 | ResidualBlock(first_in_channel, last_out_channel)) 56 | else: 57 | self.sequential.add_module("resblock_" + str(i), 58 | ResidualBlock(last_out_channel, last_out_channel)) 59 | def forward(self, x): 60 | x = self.sequential(x) 61 | return(x) 62 | 63 | 64 | class ResnetCustomed(nn.Module): 65 | def __init__(self, out_feature=(32, 64, 128, 192, 256), num_res_block=(1,2,4,6,6)): 66 | super(ResnetCustomed, self).__init__() 67 | self.layer1 = ResidualBlockModule(out_feature[0], out_feature[0], num_res_block[0]) 68 | self.layer2 = ResidualBlockModule(out_feature[0], out_feature[1], num_res_block[1]) 69 | self.layer3 = ResidualBlockModule(out_feature[1], out_feature[2], num_res_block[2]) 70 | self.layer4 = ResidualBlockModule(out_feature[2], out_feature[3], num_res_block[3]) 71 | self.layer5 = ResidualBlockModule(out_feature[3], out_feature[4], num_res_block[4]) 72 | 73 | def forward(self, x): 74 | x = self.layer1(x) 75 | x1 = self.layer2(x) 76 | x2 = self.layer3(x1) 77 | x3 = self.layer4(x2) 78 | x4 = self.layer5(x3) 79 | return x4, x3, x2 80 | 81 | 82 | class AnchorBoundingBoxFeature(nn.Module): 83 | def __init__(self, config): 84 | super(AnchorBoundingBoxFeature,self).__init__() 85 | self.config = config 86 | 87 | self.f_height = int(self.config["voxel_length"]/self.config["anchor_bbox_feature"]["reduced_scale"]) 88 | self.f_width = int(self.config["voxel_width"]/self.config["anchor_bbox_feature"]["reduced_scale"]) 89 | self.width = self.config["anchor_bbox_feature"]["width"] 90 | self.length = self.config["anchor_bbox_feature"]["length"] 91 | self.height = self.config["anchor_bbox_feature"]["height"] 92 | 93 | def forward(self): 94 | anc_x = torch.matmul( 95 | torch.linspace(self.config["lidar_x_min"], 96 | self.config["lidar_x_max"], 97 | self.f_height).view(self.f_height, 1), 98 | torch.ones(1, self.f_width)).view(1, self.f_height, self.f_width) 99 | anc_y = torch.matmul( 100 | torch.ones(self.f_height, 1), 101 | torch.linspace(self.config["lidar_y_min"], 102 | self.config["lidar_y_max"], 103 | self.f_width).view(1, self.f_width)).view(1, self.f_height, self.f_width) 104 | anc_z = torch.ones(1, self.f_height, self.f_width) * (-4.5) 105 | anc_w = torch.ones(1, self.f_height, self.f_width) * self.width 106 | anc_l = torch.ones(1, self.f_height, self.f_width) * self.length 107 | anc_h = torch.ones(1, self.f_height, self.f_width) * self.height 108 | anc_ori = torch.ones(1, self.f_height, self.f_width) * 0 109 | anc_ori_90 = torch.ones(1, self.f_height, self.f_width) * 3.1415926/2 110 | anc_set_1 = torch.cat((anc_x, anc_y, anc_z, anc_l, anc_w, anc_h, anc_ori), 0) 111 | anc_set_2 = torch.cat((anc_x, anc_y, anc_z, anc_l, anc_w, anc_h, anc_ori_90), 0) 112 | anc_set = torch.cat((anc_set_1,anc_set_2), dim=0) # dim = [2*7, self.f_height, self.f_width] 113 | return anc_set 114 | 115 | 116 | class OffsettoBbox(nn.Module): 117 | def __init__(self, config): 118 | super(OffsettoBbox, self).__init__() 119 | self.anchor_bbox_feature = AnchorBoundingBoxFeature(config) 120 | 121 | def forward(self, x): 122 | """ 123 | x: x_reg [b,num_anc*7,wid,hei] 124 | """ 125 | anc_set = self.anchor_bbox_feature().cuda().unsqueeze(0) 126 | pred_xy_1 = x[:,:2,:,:] * torch.sqrt(torch.pow(anc_set[:,3:4,:,:],2) + torch.pow(anc_set[:,4:5,:,:],2)) + anc_set[:,:2,:,:] 127 | pred_z_1 = x[:,2:3,:,:] * (anc_set[:,5:6,:,:]) + anc_set[:,2:3,:,:] 128 | 129 | pred_whl_1 = torch.exp(x[:,3:6,:,:]) * anc_set[:,3:6,:,:] 130 | pred_ori_1 = torch.atan2(torch.sin(x[:,6:7,:,:] + anc_set[:,6:7,:,:]), torch.cos(x[:,6:7,:,:] + anc_set[:,6:7,:,:])) 131 | pred_xy_2 = x[:,7:9,:,:] * torch.sqrt(torch.pow(anc_set[:,10:11,:,:],2) + torch.pow(anc_set[:,11:12,:,:],2)) + anc_set[:,7:9,:,:] 132 | pred_z_2 = x[:,9:10,:,:] * (anc_set[:,12:13,:,:]) + anc_set[:,9:10,:,:] 133 | pred_whl_2 = torch.exp(x[:,10:13,:,:]) * anc_set[:,10:13,:,:] 134 | pred_ori_2 = torch.atan2(torch.sin(x[:,13:14,:,:] + anc_set[:,13:14,:,:]), torch.cos(x[:,13:14,:,:] + anc_set[:,13:14,:,:])) 135 | pred_bbox_feature = torch.cat((pred_xy_1, pred_z_1, pred_whl_1, pred_ori_1, 136 | pred_xy_2, pred_z_2, pred_whl_2, pred_ori_2), dim=1) 137 | return pred_bbox_feature 138 | 139 | 140 | class LidarBackboneNetwork(nn.Module): 141 | def __init__(self, out_feature=(32, 64, 128, 192, 256), num_res_block=(1,2,4,6,6), Num_anchor = 2): 142 | super(LidarBackboneNetwork, self).__init__() 143 | self.backbone = ResnetCustomed(out_feature, num_res_block) 144 | self.num_anchor = Num_anchor 145 | 146 | # FPN 147 | self.latconv1 = nn.Conv2d(out_feature[-2], out_feature[-2], kernel_size=(1, 1), stride=(1, 1), bias=False) 148 | self.downconv1 = nn.Conv2d(out_feature[-1], out_feature[-2], kernel_size=(1, 1), stride=(1, 1), bias=False) 149 | self.upscale1 = nn.UpsamplingBilinear2d(scale_factor=2) 150 | self.latconv2 = nn.Conv2d(out_feature[-3], out_feature[-2], kernel_size=(1, 1), stride=(1, 1), bias=False) 151 | self.upscale2 = nn.UpsamplingBilinear2d(scale_factor=2) # NEED TO GENERALIZE IN BEV SIZE 152 | self.conv3 = nn.Conv2d(out_feature[-2], out_feature[-2], kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) 153 | 154 | self.classconv = nn.Conv2d(out_feature[-2], Num_anchor*2, kernel_size=(1, 1), stride=(1, 1), bias=False) 155 | self.softmax1 = nn.Softmax(dim=1) 156 | self.softmax2 = nn.Softmax(dim=1) 157 | self.bbox3dconv = nn.Conv2d(out_feature[-2], Num_anchor*7, kernel_size=(1, 1), stride=(1, 1), bias=False) 158 | 159 | def forward(self, x): 160 | x4, x3, x2 = self.backbone(x) 161 | x3 = self.latconv1(x3) 162 | x3_ = self.upscale1(self.downconv1(x4)) 163 | x3 += x3_ 164 | x2 = self.latconv2(x2) 165 | x2_ = self.upscale2(x3) 166 | x2 += x2_ 167 | x_pred = self.conv3(x2) 168 | x_cls = self.classconv(x_pred) 169 | x_cls_1 = self.softmax1(x_cls[:,:2]) 170 | x_cls_2 = self.softmax2(x_cls[:,2:4]) 171 | x_cls = torch.cat((x_cls_1,x_cls_2), dim=1) 172 | x_reg = self.bbox3dconv(x_pred) 173 | return x_cls, x_reg 174 | 175 | 176 | class ObjectDetection_DCF(nn.Module): 177 | def __init__(self, config): 178 | super(ObjectDetection_DCF, self).__init__() 179 | self.offset_to_bbox = OffsettoBbox(config) 180 | lm_config = config["lidar_module"] 181 | out_feature = (lm_config["out_feature1"], 182 | lm_config["out_feature2"], 183 | lm_config["out_feature3"], 184 | lm_config["out_feature4"], 185 | lm_config["out_feature5"]) 186 | num_resblock = (lm_config["num_res_block1"], 187 | lm_config["num_res_block2"], 188 | lm_config["num_res_block3"], 189 | lm_config["num_res_block4"], 190 | lm_config["num_res_block5"]) 191 | self.lidar_backbone = LidarBackboneNetwork(out_feature, num_resblock) 192 | # self.image_backbone = models.resnet18(pretrained=True) 193 | 194 | def forward(self, x_lidar, x_image): 195 | lidar_pred_cls, lidar_pred_reg = self.lidar_backbone(x_lidar) 196 | # lidar_pred_cls = self.lidar_backbone(x_lidar) 197 | # image_ = self.image_backbone(x_image) 198 | lidar_pred_bbox = self.offset_to_bbox(lidar_pred_reg) 199 | """ 200 | TODO 201 | 1. make continuous fusion layer from image 202 | 2. add with lidar feature 203 | """ 204 | return torch.cat((lidar_pred_cls, lidar_pred_reg, lidar_pred_bbox), dim = 1) #, lidar_pred_bbox 205 | 206 | 207 | # Press the green button in the gutter to run the script. 208 | if __name__ == '__main__': 209 | image_backbone = models.resnet18(pretrained=True) 210 | model = LidarBackboneNetwork() 211 | pred = model(torch.ones(4, 32, 700, 700)) 212 | print(pred[0].shape) 213 | pred2 = image_backbone(torch.ones(4, 3, 480, 640)) 214 | a = 1 215 | print("model inference is good") 216 | # See PyCharm help at https://www.jetbrains.com/help/pycharm/ 217 | -------------------------------------------------------------------------------- /separation_axis_theorem.py: -------------------------------------------------------------------------------- 1 | 2 | # This code performs collision testing of convex 2D polyedra by means 3 | # of the Hyperplane separation theorem, also known as Separating axis theorem (SAT). 4 | # 5 | # For more information visit: 6 | # https://en.wikipedia.org/wiki/Hyperplane_separation_theorem 7 | # 8 | # Copyright (C) 2016, Juan Antonio Aldea Armenteros 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | import math 22 | 23 | 24 | # -*- coding: utf8 -*- 25 | 26 | def normalize(v): 27 | norm = math.sqrt(v[0] ** 2 + v[1] ** 2) 28 | return (v[0] / norm, v[1] / norm) 29 | 30 | def dot(a, b): 31 | return a[0] * b[0] + a[1] * b[1]; 32 | 33 | def edge_direction(p0, p1): 34 | return (p1[0] - p0[0], p1[1] - p0[1]); 35 | 36 | def orthogonal(v): 37 | return (v[1], -v[0]) 38 | 39 | def vertices_to_edges(vertices): 40 | return [edge_direction(vertices[i], vertices[(i + 1) % len(vertices)]) \ 41 | for i in range(len(vertices))] 42 | 43 | def project(vertices, axis): 44 | dots = [dot(vertex, axis) for vertex in vertices] 45 | return [min(dots), max(dots)] 46 | 47 | def contains(n, range_): 48 | a = range_[0] 49 | b = range_[1] 50 | if b < a: 51 | a = range_[1] 52 | b = range_[0] 53 | return (n >= a) and (n <= b); 54 | 55 | def overlap(a, b): 56 | if contains(a[0], b): 57 | return True; 58 | if contains(a[1], b): 59 | return True; 60 | if contains(b[0], a): 61 | return True; 62 | if contains(b[1], a): 63 | return True; 64 | return False; 65 | 66 | def separating_axis_theorem(vertices_a, vertices_b): 67 | edges_a = vertices_to_edges(vertices_a); 68 | edges_b = vertices_to_edges(vertices_b); 69 | 70 | edges = edges_a + edges_b 71 | 72 | axes = [normalize(orthogonal(edge)) for edge in edges] 73 | 74 | for i in range(len(axes)): 75 | projection_a = project(vertices_a, axes[i]) 76 | projection_b = project(vertices_b, axes[i]) 77 | overlapping = overlap(projection_a, projection_b) 78 | if not overlapping: 79 | return False; 80 | return True 81 | 82 | def get_vertice_rect(center, box_size, heading_angle): 83 | 84 | center_x = center[0] 85 | center_y = center[1] 86 | yaw = heading_angle 87 | W = box_size[1] 88 | L = box_size[0] # z rotation is difference i think (90 deg) 89 | vertex_3 = (center_x + (L/2*math.cos(yaw) - W/2*math.sin(yaw)), center_y + (L/2*math.sin(yaw) + W/2*math.cos(yaw))) 90 | vertex_4 = (center_x + (-L/2*math.cos(yaw) - W/2*math.sin(yaw)), center_y + (-L/2*math.sin(yaw) + W/2*math.cos(yaw))) 91 | vertex_1 = (center_x + (-L/2*math.cos(yaw) + W/2*math.sin(yaw)), center_y + (-L/2*math.sin(yaw) - W/2*math.cos(yaw))) 92 | vertex_2 = (center_x + (L/2*math.cos(yaw) + W/2*math.sin(yaw)), center_y + (L/2*math.sin(yaw) - W/2*math.cos(yaw))) 93 | vertices = [vertex_1, vertex_2, vertex_3, vertex_4] 94 | return vertices 95 | 96 | 97 | 98 | def main(): 99 | a_vertices = [(0, 0), (70, 70), (70, 0), (0, 70)] 100 | b_vertices = [(70, 70), (150, 70), (150, 150),(70, 150)] 101 | c_vertices = [(30, 30), (150, 70), (70, 150)] 102 | 103 | print (separating_axis_theorem(a_vertices, b_vertices)) 104 | print (separating_axis_theorem(a_vertices, c_vertices)) 105 | print (separating_axis_theorem(b_vertices, c_vertices)) 106 | 107 | 108 | if __name__ == "__main__": 109 | main() -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import random 3 | from numpy.core.fromnumeric import argmax 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torchvision.utils import save_image 9 | from torch.nn.parallel import DistributedDataParallel as DDP 10 | 11 | import os 12 | import argparse 13 | import matplotlib 14 | matplotlib.use('Agg') 15 | import matplotlib.pyplot as plt 16 | import time 17 | 18 | from data_import_carla import CarlaDataset 19 | from loss import LossTotal 20 | from model import LidarBackboneNetwork, ObjectDetection_DCF 21 | from data_import import putBoundingBox 22 | from IOU import get_3d_box, box3d_iou 23 | from separation_axis_theorem import get_vertice_rect, separating_axis_theorem 24 | 25 | 26 | 27 | 28 | class Test: 29 | def __init__(self, pre_trained_net, config): 30 | """ 31 | configuration 32 | nms_iou_score_theshold (0.01) 33 | plot_AP_graph (False) 34 | """ 35 | self.net = pre_trained_net 36 | self.config = config 37 | self.net.eval() 38 | self.num_TP_set = {} 39 | self.num_TP_set_per_predbox = [] 40 | self.num_T = 0 41 | self.num_P = 0 42 | self.IOU_threshold = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] 43 | self.loss_total = LossTotal(config) 44 | for iou_threshold in self.IOU_threshold: 45 | self.num_TP_set[iou_threshold] = 0 46 | 47 | def get_num_T(self): 48 | return self.num_T 49 | 50 | def get_num_P(self): 51 | return self.num_P 52 | 53 | def get_num_TP_set(self): 54 | return self.num_TP_set 55 | 56 | def save_feature_result(self,bev_image, ref_bboxes, num_ref_bboxes, i, epoch, dir="./result"): 57 | B = ref_bboxes.shape[0] 58 | file_list = os.listdir("./") 59 | if not "result" in file_list: 60 | os.mkdir(dir) 61 | file_list = os.listdir(dir) 62 | if not "epoch_{}".format(epoch) in file_list: 63 | os.mkdir(dir+"/epoch_{}".format(epoch)) 64 | ref_bboxes = ref_bboxes.cpu().clone().numpy() 65 | num_ref_bboxes = num_ref_bboxes.cpu().clone().numpy() 66 | for b in range(B): 67 | save_image(self.pred_cls[b, 1, :, :], dir+"/epoch_{}/{}_in_{}_positive_image.png".format(epoch,i,b )) 68 | save_image(self.pred_cls[b, 0, :, :], dir+"/epoch_{}/{}_in_{}_negative_image.png".format(epoch,i,b)) 69 | bev_image_ = 0.5*bev_image[b].permute(1,2,0) 70 | bev_image_with_bbox = putBoundingBox(bev_image_, self.refined_bbox[b], self.config, color="green").permute(2,0,1).type(torch.float) 71 | save_image(bev_image_with_bbox, dir+"/epoch_{}/{}_in_{}_bev_image_with_predbbox.png".format(epoch,i,b)) 72 | 73 | bev_image_with_bbox = putBoundingBox(bev_image_, ref_bboxes[b,:num_ref_bboxes[b]], self.config, color="red").permute(2,0,1).type(torch.float) 74 | save_image(bev_image_with_bbox, dir+"/epoch_{}/{}_in_{}_bev_image_with_refbbox.png".format(epoch,i,b)) 75 | 76 | def get_eval_value_onestep(self, lidar_voxel, camera_image, ref_bboxes, num_ref_bboxes): 77 | 78 | pred = self.net(lidar_voxel, camera_image) 79 | pred_cls, pred_reg, pred_bbox_f = torch.split(pred,[4, 14, 14], dim=1) 80 | self.pred_cls = pred_cls.cpu().clone().detach() 81 | pred_bbox_f = pred_bbox_f.cpu().clone().detach() 82 | self.loss_value = self.loss_total(ref_bboxes.cuda(), num_ref_bboxes, pred_cls, pred_reg) 83 | pred_bboxes = self.get_bboxes(self.pred_cls, pred_bbox_f, score_threshold=self.config["score_threshold"]) # shape: b * list[tensor(N * 7)] 84 | # self.refined_bbox = self.NMS_IOU(pred_bboxes, nms_iou_score_theshold=self.config["nms_iou_threshold"]) # shape: b * list[N *list[tensor(7)]] 85 | self.refined_bbox = self.NMS_SAT(pred_bboxes) # shape: b * list[N *list[tensor(7)]] 86 | self.precision_recall_singleshot(self.refined_bbox, ref_bboxes) # single batch 87 | 88 | def get_bboxes(self, pred_cls, pred_reg, score_threshold=0.8): 89 | """ 90 | get bounding box score threshold instead of selecting bounding box 91 | """ 92 | B, C_cls, W, H = pred_cls.shape 93 | B, C_reg, W, H = pred_reg.shape 94 | anchor_numb = int(C_cls/2) 95 | reg_channel_per_anc = int(C_reg/anchor_numb) 96 | selected_bboxes_batch =[] 97 | for b in range(B): 98 | selected_bboxes = [] 99 | for a in range (anchor_numb): 100 | cls_pos = anchor_numb * a + 1 101 | reg_cha = reg_channel_per_anc * a 102 | pred_cls_= pred_cls[b,cls_pos].view(-1) > score_threshold 103 | indices = torch.nonzero(pred_cls_).view(-1) 104 | pred_reg_ = pred_reg[b, reg_cha:reg_cha+reg_channel_per_anc, :, :].view((reg_channel_per_anc,-1)) 105 | selected_bboxes_ = pred_reg_[:,indices].permute(1,0) 106 | selected_bboxes += [selected_bboxes_] 107 | selected_bboxes_batch.append(torch.cat(selected_bboxes, dim=0)) 108 | return selected_bboxes_batch 109 | 110 | def NMS_IOU(self, pred_bboxes, nms_iou_score_theshold=0.01): 111 | filtered_bboxes_batch = [] 112 | B = len(pred_bboxes) 113 | for b in range(B): 114 | filtered_bboxes = [] 115 | filtered_bboxes_index = [] 116 | print("pred bbox: ", pred_bboxes[b].shape[0]) 117 | for i in range(pred_bboxes[b].shape[0]): 118 | bbox = pred_bboxes[b][i] 119 | if len(filtered_bboxes) == 0: 120 | filtered_bboxes.append(bbox) 121 | continue 122 | center = bbox[:3].numpy() 123 | box_size = bbox[3:6].numpy() 124 | heading_angle = bbox[6].numpy() 125 | cand_bbox_corners = get_3d_box(center, box_size, heading_angle) 126 | j =0 127 | for selected_bbox in filtered_bboxes: 128 | j +=1 129 | center_ = selected_bbox[:3].numpy()+0.0001 130 | box_size_ = selected_bbox[3:6].numpy() 131 | heading_angle_ = selected_bbox[6].numpy() 132 | selected_bbox_corners = get_3d_box(center_, box_size_, heading_angle_) 133 | (IOU_3d, IOU_2d) = box3d_iou(cand_bbox_corners, selected_bbox_corners) 134 | if IOU_3d > nms_iou_score_theshold: 135 | break 136 | else: 137 | if j == len(filtered_bboxes): 138 | filtered_bboxes.append(bbox) 139 | filtered_bboxes_batch.append(filtered_bboxes) 140 | return filtered_bboxes_batch 141 | 142 | def NMS_SAT(self, pred_bboxes): 143 | # IOU vs SAT(separate axis theorem) 144 | filtered_bboxes_batch = [] 145 | B = len(pred_bboxes) 146 | for b in range(B): 147 | filtered_bboxes = [] 148 | filtered_bboxes_index = [] 149 | # if pred_bboxes[b].shape[0] == 0: 150 | # filtered_bboxes_batch.append(None) 151 | # continue 152 | for i in range(pred_bboxes[b].shape[0]): 153 | bbox = pred_bboxes[b][i] 154 | if len(filtered_bboxes) == 0: 155 | filtered_bboxes.append(bbox) 156 | continue 157 | center = bbox[:3].numpy() 158 | box_size = bbox[3:6].numpy() 159 | heading_angle = bbox[6].numpy() 160 | cand_bbox_corners = get_vertice_rect(center, box_size, heading_angle) 161 | j = 0 162 | for selected_bbox in filtered_bboxes: 163 | j += 1 164 | center_ = selected_bbox[:3].numpy() 165 | box_size_ = selected_bbox[3:6].numpy() 166 | heading_angle_ = selected_bbox[6].numpy() 167 | selected_bbox_corners = get_vertice_rect(center_, box_size_, heading_angle_) 168 | is_overlapped = separating_axis_theorem(cand_bbox_corners, selected_bbox_corners) 169 | if is_overlapped: 170 | break 171 | else: 172 | if j == len(filtered_bboxes): 173 | filtered_bboxes.append(bbox) 174 | filtered_bboxes_batch.append(filtered_bboxes) 175 | return filtered_bboxes_batch 176 | 177 | def precision_recall_singleshot(self, pred_bboxes, ref_bboxes): 178 | B,_,_ = ref_bboxes.shape 179 | for b in range(B): 180 | pred_bboxes_sb = pred_bboxes[b] 181 | ref_bboxes_sb = ref_bboxes[b] 182 | if pred_bboxes_sb != None: 183 | for pred_bbox in pred_bboxes_sb: 184 | self.num_P += 1 185 | center = pred_bbox[:3].numpy() 186 | box_size = pred_bbox[3:6].numpy() 187 | heading_angle = pred_bbox[6].numpy() 188 | pred_bbox_corners = get_3d_box(center, box_size, heading_angle) 189 | true_positive_cand_score = {} 190 | for ref_bbox in ref_bboxes_sb: 191 | if ref_bbox[-1] == 1: 192 | center_ = ref_bbox[:3].numpy() 193 | box_size_ = ref_bbox[3:6].numpy() 194 | heading_angle_ = ref_bbox[6].numpy() 195 | ref_bbox_corners = get_3d_box(center_, box_size_, heading_angle_) 196 | (IOU_3d, IOU_2d) = box3d_iou(pred_bbox_corners, ref_bbox_corners) 197 | for iou_threshold in self.IOU_threshold: 198 | if IOU_2d > iou_threshold: 199 | true_positive_cand_score[iou_threshold] = IOU_2d 200 | for iou_threshold in self.IOU_threshold: 201 | if iou_threshold in true_positive_cand_score: 202 | self.num_TP_set[iou_threshold] += 1 203 | self.num_TP_set_per_predbox.append(self.num_TP_set) 204 | for ref_bbox_ in ref_bboxes_sb: 205 | if ref_bbox_[-1] == 1: 206 | self.num_T += 1 207 | 208 | def display_average_precision(self, plot_AP_graph=False): 209 | """ 210 | need to IOU threshold varying 211 | """ 212 | total_precision = {} 213 | total_recall = {} 214 | for iou_threshold in self.IOU_threshold: 215 | total_precision[iou_threshold] = self.num_TP_set[iou_threshold] / (self.num_P + 0.01) 216 | total_recall[iou_threshold] = self.num_TP_set[iou_threshold] / (self.num_T + 0.01) 217 | # print("Total Precision: ", total_precision) 218 | # print("Total Recall: ", total_recall) 219 | precisions = {} 220 | recalls = {} 221 | num_P = 0 222 | for iou_threshold in self.IOU_threshold: 223 | precisions[iou_threshold] = [1] 224 | recalls[iou_threshold] = [0] 225 | for num_tp_set in self.num_TP_set_per_predbox: 226 | num_P+=1 227 | for iou_threshold in self.IOU_threshold: 228 | precisions[iou_threshold].append(num_tp_set[iou_threshold] / num_P) 229 | recalls[iou_threshold].append(num_tp_set[iou_threshold] / self.num_T) 230 | if plot_AP_graph: 231 | fig = plt.figure() 232 | ax = fig.add_subplot(111) 233 | lines = [] 234 | for iou_threshold in self.IOU_threshold: 235 | line = 0 236 | if len(recalls[iou_threshold]) > 1: 237 | line = ax.plot(recalls[iou_threshold], precisions[iou_threshold]) 238 | else: 239 | line = ax.plot([0,0]) 240 | lines.append(line) 241 | fig.legend(lines, labels=self.IOU_threshold, title="IOU threshold value") 242 | fig.savefig('ap_result/test.png') 243 | 244 | def initialize_ap(self): 245 | self.num_TP_set = {} 246 | self.num_T = 0 247 | self.num_P = 0 248 | self.num_TP_set_per_predbox = [] 249 | for iou_threshold in self.IOU_threshold: 250 | self.num_TP_set[iou_threshold] = 0 251 | 252 | 253 | if __name__ == '__main__': 254 | parser = argparse.ArgumentParser(description='deep continuous fusion training') 255 | parser.add_argument('--data', type=str, default="carla", help='Data type, choose "carla" or "kitti"') 256 | parser.add_argument('--cuda', type=str, default="0", help="list of cuda visible device number. you can choose 0~7 in list. [EX] --cuda 0,3,4") 257 | parser.add_argument('--port', type=str, default='12233', help="master port number. defaut is 12233") 258 | args = parser.parse_args() 259 | dataset_category = args.data 260 | cuda_vis_dev_str = args.cuda 261 | master_port = args.port 262 | print(cuda_vis_dev_str) 263 | device_id_source = cuda_vis_dev_str.split(",") 264 | device_id = [i for i in range(len(device_id_source))] 265 | os.environ['CUDA_VISIBLE_DEVICES'] = cuda_vis_dev_str 266 | os.environ['MASTER_ADDR'] = 'localhost' 267 | os.environ['MASTER_PORT'] = master_port 268 | 269 | 270 | torch.distributed.init_process_group(backend='nccl', world_size=1, rank=0) 271 | # Focus on test dataset 272 | if dataset_category == "carla": 273 | dataset = CarlaDataset(mode="test",want_bev_image=True) 274 | print("carla dataset is used for training") 275 | elif dataset_category =="kitti": 276 | dataset = KittiDataset(mode="test") 277 | print("kitti dataset is used for training") 278 | print("dataset is ready") 279 | data_loader = torch.utils.data.DataLoader(dataset, 280 | batch_size=2, 281 | shuffle=True) 282 | # Load pre-trained model. you can use the model during training instead of test_model 283 | test_model = ObjectDetection_DCF().cuda() 284 | test_model = DDP(test_model,device_ids=device_id, output_device=0, find_unused_parameters=True) 285 | test_model.load_state_dict(torch.load("./saved_model/model")) 286 | test = Test(test_model) 287 | data_length = len(dataset) 288 | loss_value = None 289 | 290 | for batch_ndx, sample in enumerate(data_loader): 291 | print("batch_ndx is ", batch_ndx) 292 | print("sample keys are ", sample.keys()) 293 | print("bbox shape is ", sample["bboxes"].shape) 294 | print("image shape is ", sample["image"].shape) 295 | print("pointcloud shape is ", sample["pointcloud"].shape) 296 | test_index = np.random.randint(data_length) 297 | image_data = sample['image'].cuda() 298 | point_voxel = sample['pointcloud'].cuda() 299 | reference_bboxes = sample['bboxes'].cpu().clone().detach() 300 | num_ref_bboxes = sample['num_bboxes'] 301 | bev_image = sample['lidar_bev_2Dimage'] 302 | 303 | # evaluate AP in one image and voxel lidar 304 | test.get_eval_value_onestep(point_voxel, image_data, reference_bboxes, num_ref_bboxes) 305 | test.save_feature_result(bev_image, reference_bboxes, num_ref_bboxes, batch_ndx, 99) 306 | print("accumulated number of true data is ", test.get_num_T()) 307 | print("accumulated number of positive data is ", test.get_num_P()) 308 | print("accumulated number of true positive data is ", test.get_num_TP_set()) 309 | print("="*50) 310 | if batch_ndx > 10: 311 | break 312 | 313 | # display average-precision plot and mAP 314 | test.display_average_precision(plot_AP_graph=True) 315 | # MUST DO WHEN U DISPLAY ALL OF RESULTS 316 | test.initialize_ap() 317 | 318 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torchvision.utils import save_image 5 | from torch.nn.parallel import DistributedDataParallel as DDP 6 | 7 | import os 8 | import numpy as np 9 | import yaml 10 | 11 | from data_import_carla import CarlaDataset 12 | # from kitti import KittiDataset 13 | from loss import LossTotal 14 | from model import ObjectDetection_DCF 15 | from test import Test 16 | 17 | class Train(nn.Module): 18 | def __init__(self, config): 19 | super(Train, self).__init__() 20 | device_id_source = config["cuda_visible_id"].split(",") 21 | device_id = [i for i in range(len(device_id_source))] 22 | self.loss_total = LossTotal(config) 23 | self.model = ObjectDetection_DCF(config).cuda() 24 | self.model = DDP(self.model, device_ids=device_id, output_device=0, find_unused_parameters=True) 25 | self.loss_value = None 26 | lr = config["learning_rate"] 27 | beta1 = config["beta1"] 28 | self.optimizer = optim.Adam(self.model.parameters(), lr=lr, betas=(beta1, 0.999)) 29 | 30 | def one_step(self, lidar_voxel, camera_image, object_data, num_ref_box): 31 | pred = self.model(lidar_voxel, camera_image) 32 | pred_cls, pred_reg, pred_bbox_f = torch.split(pred,[4, 14, 14], dim=1) 33 | self.loss_value = self.loss_total(object_data, num_ref_box, pred_cls, pred_reg) 34 | self.optimizer.zero_grad() 35 | self.loss_value.backward() 36 | self.optimizer.step() 37 | 38 | def get_loss_value(self, lidar_voxel, camera_image, object_data, num_ref_box): 39 | pred = self.model(lidar_voxel, camera_image) 40 | pred_cls, pred_reg, pred_bbox_f = torch.split(pred,[4, 14, 14], dim=1) 41 | self.loss_value = self.loss_total(object_data, num_ref_box, pred_cls, pred_reg) 42 | return self.loss_value.item(), pred_cls, pred_reg 43 | 44 | 45 | if __name__ == '__main__': 46 | CONFIG_PATH = "./config/" 47 | config_name = "config_carla.yaml" 48 | with open(os.path.join(CONFIG_PATH, config_name)) as file: 49 | config = yaml.safe_load(file) 50 | 51 | device_id_source = config["cuda_visible_id"].split(",") 52 | device_id = [i for i in range(len(device_id_source))] 53 | os.environ['CUDA_VISIBLE_DEVICES'] = config["cuda_visible_id"] 54 | os.environ['MASTER_ADDR'] = 'localhost' 55 | os.environ['MASTER_PORT'] = config["port_number"] 56 | torch.distributed.init_process_group(backend='nccl', world_size=1, rank=0) 57 | if config["dataset_name"] == "carla": 58 | dataset = CarlaDataset(config) 59 | dataset_test = CarlaDataset(config, mode="test", want_bev_image=True) 60 | print("carla dataset is used for training") 61 | elif config["dataset_name"] =="kitti": 62 | dataset = KittiDataset() 63 | dataset_test = KittiDataset(mode="test") 64 | print("kitti dataset is used for training") 65 | 66 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=True) 67 | train_sampler_test = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=True) 68 | data_loader = torch.utils.data.DataLoader(dataset, 69 | batch_size=config["batch_size"], 70 | sampler=train_sampler) 71 | data_loader_test = torch.utils.data.DataLoader(dataset_test, 72 | batch_size=config["batch_size"], 73 | sampler=train_sampler_test) 74 | num_epochs = config["num_epoch"] 75 | training = Train(config) 76 | test = Test(training.model, config) 77 | data_length = len(data_loader) 78 | for epoch in range(num_epochs): 79 | torch.save(training.model.state_dict(), "./saved_model/" + config["saved_model_name"]) 80 | for batch_ndx, sample in enumerate(data_loader): 81 | image_data = sample['image'].cuda() 82 | point_voxel = sample['pointcloud'].cuda() 83 | reference_bboxes = sample["bboxes"].cuda() 84 | num_ref_bboxes = sample["num_bboxes"] 85 | training.one_step(point_voxel, image_data, reference_bboxes, num_ref_bboxes) 86 | if batch_ndx % 100 == 0: 87 | print("training at ", batch_ndx, "is processed") 88 | if batch_ndx % 500 == 0: 89 | test_index = np.random.randint(len(dataset)) 90 | loss_value, _, _ = training.get_loss_value(point_voxel, image_data, reference_bboxes, num_ref_bboxes) 91 | print("="*50) 92 | print('[%d/%d][%d/%d]\tLoss: %.4f in traning dataset' 93 | % (epoch, num_epochs, batch_ndx, data_length, loss_value)) 94 | for batch_ndx_, sample_ in enumerate(data_loader_test): 95 | image_data_ = sample_['image'].cuda() 96 | point_voxel_ = sample_['pointcloud'].cuda() 97 | reference_bboxes_ = sample_['bboxes'].cpu().clone().detach() 98 | num_ref_bboxes_ = sample_["num_bboxes"] 99 | bev_image_ = sample_["lidar_bev_2Dimage"] 100 | test.get_eval_value_onestep(point_voxel_, image_data_, reference_bboxes_, num_ref_bboxes_) 101 | test.save_feature_result(bev_image_, reference_bboxes_, num_ref_bboxes_, batch_ndx_, epoch) 102 | if batch_ndx_ > 5: 103 | print("accumulated number of true data is ", test.get_num_T()) 104 | print("accumulated number of positive data is ", test.get_num_P()) 105 | print("accumulated number of true positive data is ", test.get_num_TP_set()) 106 | break 107 | test.display_average_precision(plot_AP_graph=config["plot_AP_graph"]) 108 | print("="*50) 109 | test.initialize_ap() 110 | for batch_ndx, sample in enumerate(data_loader_test): 111 | image_data = sample['image'].cuda() 112 | point_voxel = sample['pointcloud'].cuda() 113 | reference_bboxes = sample['bboxes'].cpu().clone().detach() 114 | num_ref_bboxes = sample["num_bboxes"] 115 | bev_image = sample["lidar_bev_2Dimage"] 116 | test.get_eval_value_onestep(point_voxel, image_data, reference_bboxes, num_ref_bboxes) 117 | test.save_feature_result(bev_image, reference_bboxes, num_ref_bboxes, batch_ndx, epoch) 118 | if batch_ndx > 10: 119 | print("accumulated number of true data is ", test.get_num_T()) 120 | print("accumulated number of positive data is ", test.get_num_P()) 121 | print("accumulated number of true positive data is ", test.get_num_TP_set()) 122 | print("="*50) 123 | break 124 | test.display_average_precision(plot_AP_graph=config["plot_AP_graph"]) 125 | print("="*50) 126 | test.initialize_ap() 127 | --------------------------------------------------------------------------------