├── .gitignore ├── .idea ├── vcs.xml ├── misc.xml ├── modules.xml ├── deployment.xml ├── LidarDet.iml └── webServers.xml ├── data ├── gen_train_val.py ├── crop.py └── kitti.py ├── config.py ├── README.md ├── loss.py ├── test.py ├── data_aug.py ├── voxelnet.py ├── train.py ├── test_utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/KITTI/ 2 | results/ 3 | nms/_ext/ 4 | build/ 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/LidarDet.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /data/gen_train_val.py: -------------------------------------------------------------------------------- 1 | import os 2 | root = '/data/cxg1/VoxelNet_pro/Data/training' 3 | train_file = open(os.path.join(root, 'train.txt'), 'w') 4 | val_file = open(os.path.join(root, 'val.txt'), 'w') 5 | from sklearn.model_selection import train_test_split 6 | file_ids = ["%06d\n" % i for i in range(0,7481)] 7 | train_ids, val_ids = train_test_split(file_ids, test_size=0.3) 8 | print(len(train_ids)) 9 | train_file.writelines(train_ids) 10 | val_file.writelines(val_ids) 11 | train_file.close() 12 | val_file.close() 13 | -------------------------------------------------------------------------------- /.idea/webServers.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 14 | 15 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | class config: 5 | 6 | # classes 7 | class_list = ['Car', 'Van'] 8 | 9 | # batch size 10 | N=2 11 | 12 | # maxiumum number of points per voxel 13 | T=35 14 | 15 | # voxel size 16 | vd = 0.4 17 | vh = 0.2 18 | vw = 0.2 19 | 20 | # points cloud range 21 | xrange = (0.0, 70.4) 22 | yrange = (-40, 40) 23 | zrange = (-3, 1) 24 | 25 | # voxel grid 26 | W = math.ceil((xrange[1] - xrange[0]) / vw) 27 | H = math.ceil((yrange[1] - yrange[0]) / vh) 28 | D = math.ceil((zrange[1] - zrange[0]) / vd) 29 | 30 | # # iou threshold 31 | # pos_threshold = 0.9 32 | # neg_threshold = 0.45 33 | 34 | # anchors: (200, 176, 2, 7) x y z h w l r 35 | x = np.linspace(xrange[0]+vw, xrange[1]-vw, W//2) 36 | y = np.linspace(yrange[0]+vh, yrange[1]-vh, H//2) 37 | cx, cy = np.meshgrid(x, y) 38 | # all is (w, l, 2) 39 | cx = np.tile(cx[..., np.newaxis], 2) 40 | cy = np.tile(cy[..., np.newaxis], 2) 41 | cz = np.ones_like(cx) * -1.0 42 | w = np.ones_like(cx) * 1.6 43 | l = np.ones_like(cx) * 3.9 44 | h = np.ones_like(cx) * 1.56 45 | r = np.ones_like(cx) 46 | r[..., 0] = 0 47 | r[..., 1] = np.pi/2 48 | anchors = np.stack([cx, cy, cz, h, w, l, r], axis=-1) 49 | 50 | anchors_per_position = 2 51 | 52 | # non-maximum suppression 53 | nms_threshold = 1e-3 54 | score_threshold = 0.9 55 | 56 | device = "cuda:2" 57 | num_dim = 51 58 | 59 | last_epoch=0 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This is an unofficial implementation of [VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection](https://arxiv.org/abs/1711.06396) in pytorch. A large part of this project is based on the work [here](https://github.com/jeasinema/VoxelNet-tensorflow) 4 | # Dependencies 5 | - `python3.5+` 6 | - `pytorch` (tested on 1.4) 7 | - `opencv` 8 | - `shapely` 9 | - `mayavi` 10 | 11 | # Installation 12 | 1. Clone this repository. 13 | 2. the nms & box_overlap is based on [detectron2](https://github.com/facebookresearch/detectron2), Please install this libs. 14 | 15 | 16 | # Data Preparation 17 | 1. Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Data to download include: 18 | * Velodyne point clouds (29 GB): input data to VoxelNet 19 | * Training labels of object data set (5 MB): input label to VoxelNet 20 | * Camera calibration matrices of object data set (16 MB): for visualization of predictions 21 | * Left color images of object data set (12 GB): for visualization of predictions 22 | 23 | 2. In this project, the cropped point cloud data for training and validation. Point clouds outside the image coordinates are removed. 24 | ```bash 25 | $ python3 data/crop.py 26 | ``` 27 | 3. Split the training set into training and validation set according to the protocol [here](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz). 28 | ```plain 29 | └── DATA_DIR 30 | ├── training <-- training data 31 | | ├── image_2 32 | | ├── label_2 33 | | ├── velodyne 34 | | └── crop 35 | └── testing <--- testing data 36 | | ├── image_2 37 | | ├── label_2 38 | | ├── velodyne 39 | | └── crop 40 | ``` 41 | 42 | # Train 43 | 44 | 45 | 46 | 47 | # TODO 48 | - [x] training code 49 | - [x] data augmentation 50 | - [ ] validation code 51 | - [ ] reproduce results for `Car`, `Pedestrian` and `Cyclist` 52 | - [ ] multi-gpu support 53 | - [ ] improve the performances 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | class VoxelLoss(nn.Module): 9 | def __init__(self, alpha, beta, gamma): 10 | super(VoxelLoss, self).__init__() 11 | self.smoothl1loss = nn.SmoothL1Loss(reduction='sum') 12 | self.alpha = alpha 13 | self.beta = beta 14 | self.gamma = gamma 15 | 16 | def forward(self, reg, p_pos, pos_equal_one, neg_equal_one, targets, tag='train'): 17 | # reg (N * A*7 * H * W) , score (N * A * H * W) 18 | reg = reg.permute(0,2,3,1).contiguous() 19 | reg = reg.view(reg.size(0),reg.size(1),reg.size(2),-1,7) # (N * H * W * A * 7) 20 | targets = targets.view(targets.size(0),targets.size(1),targets.size(2),-1,7) # (N * H * W * A * 7) 21 | pos_equal_one_for_reg = pos_equal_one.unsqueeze(pos_equal_one.dim()).expand(-1,-1,-1,-1,7) 22 | 23 | rm_pos = reg * pos_equal_one_for_reg 24 | targets_pos = targets * pos_equal_one_for_reg 25 | 26 | cls_pos_loss = -pos_equal_one * torch.log(p_pos + 1e-6) 27 | cls_pos_loss = cls_pos_loss.sum() / (pos_equal_one.sum() + 1e-6) 28 | 29 | cls_neg_loss = -neg_equal_one * torch.log(1 - p_pos + 1e-6) 30 | cls_neg_loss = cls_neg_loss.sum() / (neg_equal_one.sum() + 1e-6) 31 | 32 | reg_loss = self.smoothl1loss(rm_pos, targets_pos) 33 | reg_loss = reg_loss / (pos_equal_one.sum() + 1e-6) 34 | conf_loss = self.alpha * cls_pos_loss + self.beta * cls_neg_loss 35 | 36 | if tag == 'val': 37 | xyz_loss = self.smoothl1loss(rm_pos[..., [0,1,2]], targets_pos[..., [0,1,2]]) / (pos_equal_one.sum() + 1e-6) 38 | whl_loss = self.smoothl1loss(rm_pos[..., [3,4,5]], targets_pos[..., [3,4,5]]) / (pos_equal_one.sum() + 1e-6) 39 | r_loss = self.smoothl1loss(rm_pos[..., [6]], targets_pos[..., [6]]) / (pos_equal_one.sum() + 1e-6) 40 | return conf_loss, reg_loss, xyz_loss, whl_loss, r_loss 41 | 42 | return conf_loss, reg_loss, None, None, None 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /data/crop.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | 5 | CAM = 2 6 | 7 | def load_velodyne_points(filename): 8 | points = np.fromfile(filename, dtype=np.float32).reshape(-1, 4) 9 | #points = points[:, :3] # exclude luminance 10 | return points 11 | 12 | def load_calib(calib_dir): 13 | # P2 * R0_rect * Tr_velo_to_cam * y 14 | lines = open(calib_dir).readlines() 15 | lines = [ line.split()[1:] for line in lines ][:-1] 16 | # 17 | P = np.array(lines[CAM]).reshape(3,4) 18 | # 19 | Tr_velo_to_cam = np.array(lines[5]).reshape(3,4) 20 | Tr_velo_to_cam = np.concatenate( [ Tr_velo_to_cam, np.array([0,0,0,1]).reshape(1,4) ] , 0 ) 21 | # 22 | R_cam_to_rect = np.eye(4) 23 | R_cam_to_rect[:3,:3] = np.array(lines[4][:9]).reshape(3,3) 24 | # 25 | P = P.astype('float32') 26 | Tr_velo_to_cam = Tr_velo_to_cam.astype('float32') 27 | R_cam_to_rect = R_cam_to_rect.astype('float32') 28 | return P, Tr_velo_to_cam, R_cam_to_rect 29 | 30 | def prepare_velo_points(pts3d_raw): 31 | '''Replaces the reflectance value by 1, and tranposes the array, so 32 | points can be directly multiplied by the camera projection matrix''' 33 | pts3d = pts3d_raw 34 | # Reflectance > 0 35 | indices = pts3d[:, 3] > 0 36 | pts3d = pts3d[indices ,:] 37 | pts3d[:,3] = 1 38 | return pts3d.transpose(), indices 39 | 40 | def project_velo_points_in_img(pts3d, T_cam_velo, Rrect, Prect): 41 | '''Project 3D points into 2D image. Expects pts3d as a 4xN 42 | numpy array. Returns the 2D projection of the points that 43 | are in front of the camera only an the corresponding 3D points.''' 44 | # 3D points in camera reference frame. 45 | pts3d_cam = Rrect.dot(T_cam_velo.dot(pts3d)) 46 | # Before projecting, keep only points with z>0 47 | # (points that are in fronto of the camera). 48 | idx = (pts3d_cam[2,:]>=0) 49 | pts2d_cam = Prect.dot(pts3d_cam[:,idx]) 50 | return pts3d[:, idx], pts2d_cam/pts2d_cam[2,:], idx 51 | 52 | 53 | def align_img_and_pc(img_dir, pc_dir, calib_dir): 54 | 55 | img = cv2.imread(img_dir) 56 | pts = load_velodyne_points( pc_dir ) 57 | P, Tr_velo_to_cam, R_cam_to_rect = load_calib(calib_dir) 58 | 59 | pts3d, indices = prepare_velo_points(pts) 60 | pts3d_ori = pts3d.copy() 61 | reflectances = pts[indices, 3] 62 | pts3d, pts2d_normed, idx = project_velo_points_in_img( pts3d, Tr_velo_to_cam, R_cam_to_rect, P ) 63 | #print reflectances.shape, idx.shape 64 | reflectances = reflectances[idx] 65 | #print reflectances.shape, pts3d.shape, pts2d_normed.shape 66 | assert reflectances.shape[0] == pts3d.shape[1] == pts2d_normed.shape[1] 67 | 68 | rows, cols = img.shape[:2] 69 | 70 | points = [] 71 | for i in range(pts2d_normed.shape[1]): 72 | c = int(np.round(pts2d_normed[0,i])) 73 | r = int(np.round(pts2d_normed[1,i])) 74 | if c < cols and r < rows and r > 0 and c > 0: 75 | color = img[r, c, :] 76 | point = [ pts3d[0,i], pts3d[1,i], pts3d[2,i], reflectances[i], color[0], color[1], color[2], pts2d_normed[0,i], pts2d_normed[1,i] ] 77 | points.append(point) 78 | 79 | points = np.array(points) 80 | return points 81 | 82 | # update the following directories 83 | IMG_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/image_2/' 84 | PC_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/velodyne/' 85 | CALIB_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/calib/' 86 | PC_CROP_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/crop/' 87 | 88 | 89 | for frame in range(0, 7481): 90 | img_dir = IMG_ROOT + '%06d.png' % frame 91 | pc_dir = PC_ROOT + '%06d.bin' % frame 92 | calib_dir = CALIB_ROOT + '%06d.txt' % frame 93 | 94 | points = align_img_and_pc(img_dir, pc_dir, calib_dir) 95 | 96 | output_name = PC_CROP_ROOT + '%06d.bin' % frame 97 | sys.stdout.write('Save to %s \n' % output_name) 98 | points[:,:4].astype('float32').tofile(output_name) 99 | 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.autograd import Variable 4 | from config import config as cfg 5 | from data.kitti import KittiDataset 6 | import torch.utils.data as data 7 | import time 8 | from loss import VoxelLoss 9 | from voxelnet import VoxelNet 10 | import torch.optim as optim 11 | import torch.optim.lr_scheduler as lr_scheduler 12 | import torch.nn.init as init 13 | # from nms.pth_nms import pth_nms 14 | import numpy as np 15 | import torch.backends.cudnn 16 | from test_utils import draw_boxes 17 | from torch.utils.tensorboard import SummaryWriter 18 | import torchvision 19 | import os 20 | from utils import plot_grad, print_prob 21 | import cv2 22 | 23 | def weights_init(m): 24 | if isinstance(m, nn.Conv2d): 25 | init.xavier_normal_(m.weight.data) 26 | m.bias.data.zero_() 27 | 28 | def detection_collate(batch): 29 | voxel_features = [] 30 | voxel_coords = [] 31 | gt_box3d_corner = [] 32 | gt_box3d = [] 33 | 34 | images = [] 35 | calibs = [] 36 | ids = [] 37 | for i, sample in enumerate(batch): 38 | voxel_features.append(sample[0]) 39 | 40 | voxel_coords.append( 41 | np.pad(sample[1], ((0, 0), (1, 0)), 42 | mode='constant', constant_values=i)) 43 | 44 | gt_box3d_corner.append(sample[2]) 45 | gt_box3d.append(sample[3]) 46 | 47 | images.append(sample[4]) 48 | calibs.append(sample[5]) 49 | ids.append(sample[6]) 50 | return np.concatenate(voxel_features), \ 51 | np.concatenate(voxel_coords), \ 52 | gt_box3d_corner,\ 53 | gt_box3d,\ 54 | images,\ 55 | calibs, ids 56 | 57 | torch.backends.cudnn.enabled=True 58 | 59 | import argparse 60 | parser = argparse.ArgumentParser(description='arg parser') 61 | parser.add_argument('--ckpt', type=str, default=None, help='pre_load_ckpt') 62 | args = parser.parse_args() 63 | 64 | def train(net, model_name, hyper, cfg, writer): 65 | 66 | dataset=KittiDataset(cfg=cfg,root='/data/cxg1/VoxelNet_pro/Data',set='val') 67 | data_loader = data.DataLoader(dataset, batch_size=cfg.N, num_workers=4, collate_fn=detection_collate, shuffle=True, \ 68 | pin_memory=False) 69 | 70 | # define loss function 71 | criterion = VoxelLoss(alpha=hyper['alpha'], beta=hyper['beta'], gamma=hyper['gamma']) 72 | 73 | running_loss = 0.0 74 | running_reg_loss = 0.0 75 | running_conf_loss = 0.0 76 | 77 | # training process 78 | # batch_iterator = None 79 | epoch_size = len(dataset) // cfg.N 80 | print('Epoch size', epoch_size) 81 | iteration = 0 82 | for voxel_features, voxel_coords, gt_box3d_corner, gt_box3d, images, calibs, ids in data_loader: 83 | 84 | # wrapper to variable 85 | voxel_features = torch.tensor(voxel_features).to(cfg.device) 86 | 87 | pos_equal_one = [] 88 | neg_equal_one = [] 89 | targets = [] 90 | 91 | for i in range(len(gt_box3d)): 92 | pos_equal_one_, neg_equal_one_, targets_ = dataset.cal_target(gt_box3d_corner[i], gt_box3d[i], cfg) 93 | pos_equal_one.append(pos_equal_one_) 94 | neg_equal_one.append(neg_equal_one_) 95 | targets.append(targets_) 96 | 97 | pos_equal_one = torch.stack(pos_equal_one, dim=0) 98 | neg_equal_one = torch.stack(neg_equal_one, dim=0) 99 | targets = torch.stack(targets, dim=0) 100 | 101 | # zero the parameter gradients 102 | # forward 103 | score, reg = net(voxel_features, voxel_coords) 104 | 105 | if iteration == 0 : # visualize the first image 106 | print_prob(score, "pred.png") 107 | print_prob(pos_equal_one, "gt.png") 108 | 109 | # calculate loss 110 | conf_loss, reg_loss, xyz_loss, whl_loss, r_loss = criterion(reg, score, pos_equal_one, neg_equal_one, targets) 111 | loss = hyper['lambda'] * conf_loss + reg_loss 112 | 113 | running_conf_loss += conf_loss.item() 114 | running_reg_loss += reg_loss.item() 115 | running_loss += (reg_loss.item() + conf_loss.item()) 116 | 117 | pre_image = draw_boxes(reg, score, images, calibs, ids, 'pred') 118 | gt_image = draw_boxes(targets.float(), pos_equal_one.float(), images, calibs, ids, 'true') 119 | try : 120 | writer.add_image("gt_image_box {}".format(iteration), gt_image, global_step=iteration, dataformats='NHWC') 121 | writer.add_image("predict_image_box {}".format(iteration), pre_image, global_step=iteration, dataformats='NHWC') 122 | except : 123 | pass 124 | iteration += 1 125 | 126 | hyper = {'alpha': 1.0, 127 | 'beta': 10.0, 128 | 'pos': 0.6, 129 | 'neg': 0.4, 130 | 'lr':0.01, 131 | 'momentum': 0.9, 132 | 'lambda': 2.0, 133 | 'gamma':2, 134 | 'weight_decay':0.00002} 135 | 136 | if __name__ == '__main__': 137 | cfg.pos_threshold = hyper['pos'] 138 | cfg.neg_threshold = hyper['neg'] 139 | model_name = args.ckpt 140 | 141 | writer = SummaryWriter('runs/test_run') 142 | 143 | net = VoxelNet() 144 | net.to(cfg.device) 145 | net.load_state_dict(torch.load(os.path.join('./model',model_name), map_location=cfg.device)['model_state_dict']) 146 | with torch.no_grad(): 147 | try: 148 | train(net, model_name, hyper, cfg, writer) 149 | except KeyboardInterrupt: 150 | pass 151 | writer.close() 152 | -------------------------------------------------------------------------------- /data_aug.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from config import config as cfg 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | 6 | def draw_polygon(img, box_corner, color = (255, 255, 255),thickness = 1): 7 | 8 | tup0 = (box_corner[0, 1],box_corner[0, 0]) 9 | tup1 = (box_corner[1, 1],box_corner[1, 0]) 10 | tup2 = (box_corner[2, 1],box_corner[2, 0]) 11 | tup3 = (box_corner[3, 1],box_corner[3, 0]) 12 | cv2.line(img, tup0, tup1, color, thickness, cv2.LINE_AA) 13 | cv2.line(img, tup1, tup2, color, thickness, cv2.LINE_AA) 14 | cv2.line(img, tup2, tup3, color, thickness, cv2.LINE_AA) 15 | cv2.line(img, tup3, tup0, color, thickness, cv2.LINE_AA) 16 | return img 17 | 18 | def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0): 19 | # Input: 20 | # points: (N, 3) 21 | # rx/y/z: in radians 22 | # Output: 23 | # points: (N, 3) 24 | N = points.shape[0] 25 | points = np.hstack([points, np.ones((N, 1))]) 26 | mat1 = np.eye(4) 27 | mat1[3, 0:3] = tx, ty, tz 28 | points = np.matmul(points, mat1) 29 | if rx != 0: 30 | mat = np.zeros((4, 4)) 31 | mat[0, 0] = 1 32 | mat[3, 3] = 1 33 | mat[1, 1] = np.cos(rx) 34 | mat[1, 2] = -np.sin(rx) 35 | mat[2, 1] = np.sin(rx) 36 | mat[2, 2] = np.cos(rx) 37 | points = np.matmul(points, mat) 38 | if ry != 0: 39 | mat = np.zeros((4, 4)) 40 | mat[1, 1] = 1 41 | mat[3, 3] = 1 42 | mat[0, 0] = np.cos(ry) 43 | mat[0, 2] = np.sin(ry) 44 | mat[2, 0] = -np.sin(ry) 45 | mat[2, 2] = np.cos(ry) 46 | points = np.matmul(points, mat) 47 | if rz != 0: 48 | mat = np.zeros((4, 4)) 49 | mat[2, 2] = 1 50 | mat[3, 3] = 1 51 | mat[0, 0] = np.cos(rz) 52 | mat[0, 1] = -np.sin(rz) 53 | mat[1, 0] = np.sin(rz) 54 | mat[1, 1] = np.cos(rz) 55 | points = np.matmul(points, mat) 56 | return points[:, 0:3] 57 | 58 | def box_transform(boxes_corner, tx, ty, tz, r=0): 59 | # boxes_corner (N, 8, 3) 60 | for idx in range(len(boxes_corner)): 61 | boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r) 62 | return boxes_corner 63 | 64 | def cal_iou2d(box1_corner, box2_corner): 65 | box1_corner = np.reshape(box1_corner, [4, 2]) 66 | box2_corner = np.reshape(box2_corner, [4, 2]) 67 | box1_corner = ((cfg.W, cfg.H)-(box1_corner - (cfg.xrange[0], cfg.yrange[0])) / (cfg.vw, cfg.vh)).astype(np.int32) 68 | box2_corner = ((cfg.W, cfg.H)-(box2_corner - (cfg.xrange[0], cfg.yrange[0])) / (cfg.vw, cfg.vh)).astype(np.int32) 69 | 70 | buf1 = np.zeros((cfg.H, cfg.W, 3)) 71 | buf2 = np.zeros((cfg.H, cfg.W, 3)) 72 | buf1 = cv2.fillConvexPoly(buf1, box1_corner, color=(1,1,1))[..., 0] 73 | buf2 = cv2.fillConvexPoly(buf2, box2_corner, color=(1,1,1))[..., 0] 74 | 75 | indiv = np.sum(np.absolute(buf1-buf2)) 76 | share = np.sum((buf1 + buf2) == 2) 77 | if indiv == 0: 78 | return 0.0 # when target is out of bound 79 | return share / (indiv + share) 80 | 81 | def aug_data(lidar, gt_box3d_corner): 82 | np.random.seed() 83 | 84 | choice = np.random.randint(1, 10) 85 | 86 | if choice >= 7: 87 | for idx in range(len(gt_box3d_corner)): 88 | # TODO: precisely gather the point 89 | is_collision = True 90 | _count = 0 91 | while is_collision and _count < 100: 92 | t_rz = np.random.uniform(-np.pi / 10, np.pi / 10) 93 | t_x = np.random.normal() 94 | t_y = np.random.normal() 95 | t_z = np.random.normal() 96 | 97 | # check collision 98 | tmp = box_transform( 99 | gt_box3d_corner[[idx]], t_x, t_y, t_z, t_rz) 100 | is_collision = False 101 | for idy in range(idx): 102 | iou = cal_iou2d(tmp[0,:4,:2],gt_box3d_corner[idy,:4,:2]) 103 | if iou > 0: 104 | is_collision = True 105 | _count += 1 106 | break 107 | if not is_collision: 108 | box_corner = gt_box3d_corner[idx] 109 | minx = np.min(box_corner[:, 0]) 110 | miny = np.min(box_corner[:, 1]) 111 | minz = np.min(box_corner[:, 2]) 112 | maxx = np.max(box_corner[:, 0]) 113 | maxy = np.max(box_corner[:, 1]) 114 | maxz = np.max(box_corner[:, 2]) 115 | bound_x = np.logical_and( 116 | lidar[:, 0] >= minx, lidar[:, 0] <= maxx) 117 | bound_y = np.logical_and( 118 | lidar[:, 1] >= miny, lidar[:, 1] <= maxy) 119 | bound_z = np.logical_and( 120 | lidar[:, 2] >= minz, lidar[:, 2] <= maxz) 121 | bound_box = np.logical_and( 122 | np.logical_and(bound_x, bound_y), bound_z) 123 | lidar[bound_box, 0:3] = point_transform( 124 | lidar[bound_box, 0:3], t_x, t_y, t_z, rz=t_rz) 125 | gt_box3d_corner[idx] = box_transform( 126 | gt_box3d_corner[[idx]], t_x, t_y, t_z, t_rz) 127 | 128 | gt_box3d = gt_box3d_corner 129 | 130 | elif choice < 7 and choice >= 4: 131 | # global rotation 132 | angle = np.random.uniform(-np.pi / 4, np.pi / 4) 133 | lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle) 134 | gt_box3d = box_transform(gt_box3d_corner, 0, 0, 0, r=angle) 135 | 136 | else: 137 | # global scaling 138 | factor = np.random.uniform(0.95, 1.05) 139 | lidar[:, 0:3] = lidar[:, 0:3] * factor 140 | gt_box3d = gt_box3d_corner * factor 141 | 142 | return lidar, gt_box3d -------------------------------------------------------------------------------- /voxelnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | from torch.autograd import Variable 5 | from config import config as cfg 6 | 7 | # conv2d + bn + relu 8 | class Conv2d(nn.Module): 9 | 10 | def __init__(self,in_channels,out_channels,k,s,p, activation=True, batch_norm=True): 11 | super(Conv2d, self).__init__() 12 | self.conv = nn.Conv2d(in_channels,out_channels,kernel_size=k,stride=s,padding=p) 13 | if batch_norm: 14 | self.bn = nn.BatchNorm2d(out_channels) 15 | else: 16 | self.bn = None 17 | self.activation = activation 18 | def forward(self,x): 19 | x = self.conv(x) 20 | if self.bn is not None: 21 | x=self.bn(x) 22 | if self.activation: 23 | return F.relu(x,inplace=True) 24 | else: 25 | return x 26 | 27 | # conv3d + bn + relu 28 | class Conv3d(nn.Module): 29 | 30 | def __init__(self, in_channels, out_channels, k, s, p, batch_norm=True): 31 | super(Conv3d, self).__init__() 32 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=k, stride=s, padding=p) 33 | if batch_norm: 34 | self.bn = nn.BatchNorm3d(out_channels) 35 | else: 36 | self.bn = None 37 | 38 | def forward(self, x): 39 | x = self.conv(x) 40 | if self.bn is not None: 41 | x = self.bn(x) 42 | 43 | return F.relu(x, inplace=True) 44 | 45 | # Fully Connected Network 46 | class FCN(nn.Module): 47 | 48 | def __init__(self,cin,cout): 49 | super(FCN, self).__init__() 50 | self.cout = cout 51 | self.linear = nn.Linear(cin, cout) 52 | self.bn = nn.BatchNorm1d(cout) 53 | 54 | def forward(self,x): 55 | # KK is the stacked k across batch 56 | kk, t, _ = x.shape 57 | x = self.linear(x.view(kk*t,-1)) 58 | x = F.relu(self.bn(x)) 59 | return x.view(kk,t,-1) 60 | 61 | # Voxel Feature Encoding layer 62 | class VFE(nn.Module): 63 | 64 | def __init__(self,cin,cout): 65 | super(VFE, self).__init__() 66 | assert cout % 2 == 0 67 | self.units = cout // 2 68 | self.fcn = FCN(cin,self.units) 69 | 70 | def forward(self, x, mask): 71 | # point-wise feauture 72 | pwf = self.fcn(x) 73 | #locally aggregated feature 74 | laf = torch.max(pwf,1)[0].unsqueeze(1).repeat(1,cfg.T,1) 75 | # point-wise concat feature 76 | pwcf = torch.cat((pwf,laf),dim=2) 77 | # apply mask 78 | mask = mask.unsqueeze(2).repeat(1, 1, self.units * 2) 79 | pwcf = pwcf * mask.float() 80 | 81 | return pwcf 82 | 83 | # Stacked Voxel Feature Encoding 84 | class SVFE(nn.Module): 85 | 86 | def __init__(self): 87 | super(SVFE, self).__init__() 88 | self.vfe_1 = VFE(7,32) 89 | self.vfe_2 = VFE(32,128) 90 | self.fcn = FCN(128,128) 91 | def forward(self, x): 92 | mask = torch.ne(torch.max(x,2)[0], 0) # 滤掉为零的点 93 | x = self.vfe_1(x, mask) 94 | x = self.vfe_2(x, mask) 95 | x = self.fcn(x) 96 | # element-wise max pooling 97 | x = torch.max(x,1)[0] 98 | return x 99 | 100 | # Convolutional Middle Layer 101 | class CML(nn.Module): 102 | def __init__(self): 103 | super(CML, self).__init__() 104 | self.conv3d_1 = Conv3d(128, 64, 3, s=(2, 1, 1), p=(1, 1, 1)) 105 | self.conv3d_2 = Conv3d(64, 64, 3, s=(1, 1, 1), p=(0, 1, 1)) 106 | self.conv3d_3 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1)) 107 | 108 | def forward(self, x): 109 | x = self.conv3d_1(x) 110 | x = self.conv3d_2(x) 111 | x = self.conv3d_3(x) 112 | return x 113 | 114 | # Region Proposal Network 115 | class RPN(nn.Module): 116 | def __init__(self): 117 | super(RPN, self).__init__() 118 | self.block_1 = [Conv2d(128, 128, 3, 2, 1)] 119 | self.block_1 += [Conv2d(128, 128, 3, 1, 1) for _ in range(3)] 120 | self.block_1 = nn.Sequential(*self.block_1) 121 | 122 | self.block_2 = [Conv2d(128, 128, 3, 2, 1)] 123 | self.block_2 += [Conv2d(128, 128, 3, 1, 1) for _ in range(5)] 124 | self.block_2 = nn.Sequential(*self.block_2) 125 | 126 | self.block_3 = [Conv2d(128, 256, 3, 2, 1)] 127 | self.block_3 += [nn.Conv2d(256, 256, 3, 1, 1) for _ in range(5)] 128 | self.block_3 = nn.Sequential(*self.block_3) 129 | 130 | self.deconv_1 = nn.Sequential(nn.ConvTranspose2d(256, 256, 4, 4, 0),nn.BatchNorm2d(256)) 131 | self.deconv_2 = nn.Sequential(nn.ConvTranspose2d(128, 256, 2, 2, 0),nn.BatchNorm2d(256)) 132 | self.deconv_3 = nn.Sequential(nn.ConvTranspose2d(128, 256, 1, 1, 0),nn.BatchNorm2d(256)) 133 | 134 | self.score_head = Conv2d(768, cfg.anchors_per_position, 1, 1, 0, activation=False, batch_norm=False) 135 | self.reg_head = Conv2d(768, 7 * cfg.anchors_per_position, 1, 1, 0, activation=False, batch_norm=False) 136 | 137 | def forward(self,x): 138 | x = self.block_1(x) 139 | x_skip_1 = x 140 | x = self.block_2(x) 141 | x_skip_2 = x 142 | x = self.block_3(x) 143 | x_0 = self.deconv_1(x) 144 | x_1 = self.deconv_2(x_skip_2) 145 | x_2 = self.deconv_3(x_skip_1) 146 | x = torch.cat((x_0,x_1,x_2), dim = 1) 147 | return self.score_head(x), self.reg_head(x) 148 | 149 | class VoxelNet(nn.Module): 150 | 151 | def __init__(self): 152 | super(VoxelNet, self).__init__() 153 | self.svfe = SVFE() 154 | self.cml = CML() 155 | self.rpn = RPN() 156 | 157 | def voxel_indexing(self, sparse_features, coords): 158 | dim = sparse_features.shape[-1] 159 | 160 | dense_feature = torch.zeros(cfg.N, cfg.D, cfg.H, cfg.W, dim).to(cfg.device) 161 | 162 | dense_feature[coords[:,0], coords[:,1], coords[:,2], coords[:,3], :]= sparse_features 163 | 164 | return dense_feature.permute(0, 4, 1, 2, 3) 165 | 166 | def forward(self, voxel_features, voxel_coords): 167 | 168 | # feature learning network 169 | vwfs = self.svfe(voxel_features) 170 | vwfs = self.voxel_indexing(vwfs,voxel_coords) 171 | 172 | # convolutional middle network 173 | cml_out = self.cml(vwfs) 174 | 175 | # region proposal network 176 | 177 | # merge the depth and feature dim into one, output probability score map and regression map 178 | score, reg = self.rpn(cml_out.view(cfg.N,-1,cfg.H, cfg.W)) 179 | score = torch.sigmoid(score) 180 | score = score.permute((0, 2, 3, 1)) 181 | 182 | return score, reg 183 | 184 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.autograd import Variable 4 | from config import config as cfg 5 | from data.kitti import KittiDataset 6 | import torch.utils.data as data 7 | import time 8 | from loss import VoxelLoss 9 | from voxelnet import VoxelNet 10 | import torch.optim as optim 11 | import torch.optim.lr_scheduler as lr_scheduler 12 | import torch.nn.init as init 13 | # from nms.pth_nms import pth_nms 14 | import numpy as np 15 | import torch.backends.cudnn 16 | from test_utils import draw_boxes 17 | from torch.utils.tensorboard import SummaryWriter 18 | import torchvision 19 | import os 20 | from utils import plot_grad 21 | import cv2 22 | 23 | 24 | import argparse 25 | 26 | parser = argparse.ArgumentParser(description='arg parser') 27 | parser.add_argument('--ckpt', type=str, default=None, help='pre_load_ckpt') 28 | parser.add_argument('--index', type=int, default=None, help='hyper_tag') 29 | parser.add_argument('--epoch', type=int , default=160, help="training epoch") 30 | args = parser.parse_args() 31 | 32 | def weights_init(m): 33 | if isinstance(m, nn.Conv2d): 34 | init.xavier_normal_(m.weight.data) 35 | m.bias.data.zero_() 36 | 37 | def detection_collate(batch): 38 | voxel_features = [] 39 | voxel_coords = [] 40 | gt_box3d_corner = [] 41 | gt_box3d = [] 42 | 43 | images = [] 44 | calibs = [] 45 | ids = [] 46 | for i, sample in enumerate(batch): 47 | voxel_features.append(sample[0]) 48 | 49 | voxel_coords.append( 50 | np.pad(sample[1], ((0, 0), (1, 0)), 51 | mode='constant', constant_values=i)) 52 | 53 | gt_box3d_corner.append(sample[2]) 54 | gt_box3d.append(sample[3]) 55 | 56 | images.append(sample[4]) 57 | calibs.append(sample[5]) 58 | ids.append(sample[6]) 59 | return np.concatenate(voxel_features), \ 60 | np.concatenate(voxel_coords), \ 61 | gt_box3d_corner,\ 62 | gt_box3d,\ 63 | images,\ 64 | calibs, ids 65 | 66 | torch.backends.cudnn.enabled=True 67 | 68 | def train(net, model_name, hyper, cfg, writer, optimizer): 69 | 70 | dataset=KittiDataset(cfg=cfg,root='/data/cxg1/VoxelNet_pro/Data',set='train') 71 | data_loader = data.DataLoader(dataset, batch_size=cfg.N, num_workers=4, collate_fn=detection_collate, shuffle=True, \ 72 | pin_memory=False) 73 | 74 | net.train() 75 | 76 | # define optimizer 77 | 78 | # define loss function 79 | criterion = VoxelLoss(alpha=hyper['alpha'], beta=hyper['beta'], gamma=hyper['gamma']) 80 | 81 | running_loss = 0.0 82 | running_reg_loss = 0.0 83 | running_conf_loss = 0.0 84 | 85 | # training process 86 | # batch_iterator = None 87 | epoch_size = len(dataset) // cfg.N 88 | print('Epoch size', epoch_size) 89 | 90 | scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(args.epoch*x) for x in (0.7, 0.9)], gamma=0.1) 91 | scheduler.last_epoch = cfg.last_epoch + 1 92 | optimizer.zero_grad() 93 | 94 | epoch = cfg.last_epoch 95 | while epoch < args.epoch : 96 | iteration = 0 97 | for voxel_features, voxel_coords, gt_box3d_corner, gt_box3d, images, calibs, ids in data_loader: 98 | 99 | # wrapper to variable 100 | voxel_features = torch.tensor(voxel_features).to(cfg.device) 101 | 102 | pos_equal_one = [] 103 | neg_equal_one = [] 104 | targets = [] 105 | 106 | with torch.no_grad(): 107 | for i in range(len(gt_box3d)): 108 | pos_equal_one_, neg_equal_one_, targets_ = dataset.cal_target(gt_box3d_corner[i], gt_box3d[i], cfg) 109 | pos_equal_one.append(pos_equal_one_) 110 | neg_equal_one.append(neg_equal_one_) 111 | targets.append(targets_) 112 | 113 | pos_equal_one = torch.stack(pos_equal_one, dim=0) 114 | neg_equal_one = torch.stack(neg_equal_one, dim=0) 115 | targets = torch.stack(targets, dim=0) 116 | 117 | # zero the parameter gradients 118 | # forward 119 | score, reg = net(voxel_features, voxel_coords) 120 | 121 | # calculate loss 122 | conf_loss, reg_loss, _, _, _ = criterion(reg, score, pos_equal_one, neg_equal_one, targets) 123 | loss = hyper['lambda'] * conf_loss + reg_loss 124 | 125 | running_conf_loss += conf_loss.item() 126 | running_reg_loss += reg_loss.item() 127 | running_loss += (reg_loss.item() + conf_loss.item()) 128 | 129 | # backward 130 | loss.backward() 131 | 132 | # visualize gradient 133 | if iteration == 0 and epoch % 30 == 0: 134 | plot_grad(net.svfe.vfe_1.fcn.linear.weight.grad.view(-1), epoch, "vfe_1_grad_%d"%(epoch)) 135 | plot_grad(net.svfe.vfe_2.fcn.linear.weight.grad.view(-1), epoch,"vfe_2_grad_%d"%(epoch)) 136 | plot_grad(net.cml.conv3d_1.conv.weight.grad.view(-1), epoch,"conv3d_1_grad_%d"%(epoch)) 137 | plot_grad(net.rpn.reg_head.conv.weight.grad.view(-1), epoch,"reghead_grad_%d"%(epoch)) 138 | plot_grad(net.rpn.score_head.conv.weight.grad.view(-1), epoch,"scorehead_grad_%d"%(epoch)) 139 | 140 | # update 141 | if iteration%10 == 9: 142 | for param in net.parameters(): 143 | param.grad /= 10 144 | optimizer.step() 145 | optimizer.zero_grad() 146 | 147 | if iteration % 50 == 49: 148 | writer.add_scalar('total_loss', running_loss/50.0, epoch * epoch_size + iteration) 149 | writer.add_scalar('reg_loss', running_reg_loss/50.0, epoch * epoch_size + iteration) 150 | writer.add_scalar('conf_loss',running_conf_loss/50.0, epoch * epoch_size + iteration) 151 | 152 | print("epoch : " + repr(epoch) + ' || iter ' + repr(iteration) + ' || Loss: %.4f || Loc Loss: %.4f || Conf Loss: %.4f' % \ 153 | ( running_loss/50.0, running_reg_loss/50.0, running_conf_loss/50.0)) 154 | 155 | running_conf_loss = 0.0 156 | running_loss = 0.0 157 | running_reg_loss = 0.0 158 | 159 | # visualization 160 | if iteration == 2000: 161 | reg_de = reg.detach() 162 | score_de = score.detach() 163 | with torch.no_grad(): 164 | pre_image = draw_boxes(reg_de, score_de, images, calibs, ids, 'pred') 165 | gt_image = draw_boxes(targets.float(), pos_equal_one.float(), images, calibs, ids, 'true') 166 | try : 167 | writer.add_image("gt_image_box {}".format(epoch), gt_image, global_step=epoch * epoch_size + iteration, dataformats='NHWC') 168 | writer.add_image("predict_image_box {}".format(epoch), pre_image, global_step=epoch * epoch_size + iteration, dataformats='NHWC') 169 | except : 170 | pass 171 | iteration += 1 172 | scheduler.step() 173 | epoch += 1 174 | if epoch % 30 == 0: 175 | torch.save({ 176 | "epoch": epoch, 177 | 'model_state_dict': net.state_dict(), 178 | 'optimizer_state_dict': optimizer.state_dict(), 179 | }, os.path.join('./model', model_name+str(epoch)+'.pt')) 180 | 181 | hyper = {'alpha': 1.0, 182 | 'beta': 10.0, 183 | 'pos': 0.75, 184 | 'neg': 0.5, 185 | 'lr':0.005, 186 | 'momentum': 0.9, 187 | 'lambda': 2.0, 188 | 'gamma':2, 189 | 'weight_decay':0.00001} 190 | 191 | if __name__ == '__main__': 192 | pre_model = args.ckpt 193 | 194 | cfg.pos_threshold = hyper['pos'] 195 | cfg.neg_threshold = hyper['neg'] 196 | model_name = "model_%d"%(args.index+1) 197 | 198 | writer = SummaryWriter('runs/%s'%(model_name[:-4])) 199 | 200 | net = VoxelNet() 201 | net.to(cfg.device) 202 | optimizer = optim.SGD(net.parameters(), lr=hyper['lr'], momentum = hyper['momentum'], weight_decay=hyper['weight_decay']) 203 | 204 | if pre_model is not None and os.path.exists(os.path.join('./model',pre_model)) : 205 | ckpt = torch.load(os.path.join('./model',pre_model), map_location=cfg.device) 206 | net.load_state_dict(ckpt['model_state_dict']) 207 | cfg.last_epoch = ckpt['epoch'] 208 | optimizer.load_state_dict(ckpt['optimizer_state_dict']) 209 | else : 210 | net.apply(weights_init) 211 | train(net, model_name, hyper, cfg, writer, optimizer) 212 | writer.close() 213 | -------------------------------------------------------------------------------- /test_utils.py: -------------------------------------------------------------------------------- 1 | from utils import get_filtered_lidar, project_velo2rgb, draw_rgb_projections 2 | from config import config as cfg 3 | from data.kitti import KittiDataset 4 | import torch.utils.data as data 5 | # from nms.pth_nms import pth_nms 6 | import torch.nn.functional as F 7 | import numpy as np 8 | import torch.backends.cudnn 9 | import cv2 10 | import matplotlib.pyplot as plt 11 | from detectron2.layers import nms_rotated 12 | torch.backends.cudnn.benchmark=True 13 | torch.backends.cudnn.enabled=True 14 | 15 | def delta_to_boxes3d(deltas, anchors): 16 | # Input: 17 | # deltas: (N, w, l, 14) 18 | # feature_map_shape: (w, l) 19 | # anchors: (w, l, 2, 7) 20 | 21 | # Ouput: 22 | # boxes3d: (N, w*l*2, 7) 23 | N = deltas.shape[0] 24 | deltas = deltas.view(N, -1, 7) 25 | anchors = torch.FloatTensor(anchors) 26 | boxes3d = torch.zeros_like(deltas) 27 | 28 | if deltas.is_cuda: 29 | anchors = anchors.to(cfg.device) 30 | boxes3d = boxes3d.to(cfg.device) 31 | 32 | anchors_reshaped = anchors.view(-1, 7) 33 | 34 | anchors_d = torch.sqrt(anchors_reshaped[:, 4]**2 + anchors_reshaped[:, 5]**2) 35 | 36 | anchors_d = anchors_d.repeat(N, 2, 1).transpose(1,2) 37 | anchors_reshaped = anchors_reshaped.repeat(N, 1, 1) 38 | 39 | boxes3d[..., [0, 1]] = torch.mul(deltas[..., [0, 1]], anchors_d) + anchors_reshaped[..., [0, 1]] 40 | boxes3d[..., [2]] = torch.mul(deltas[..., [2]], anchors_reshaped[...,[3]]) + anchors_reshaped[..., [2]] 41 | 42 | boxes3d[..., [3, 4, 5]] = torch.exp( 43 | deltas[..., [3, 4, 5]]) * anchors_reshaped[..., [3, 4, 5]] 44 | 45 | boxes3d[..., 6] = deltas[..., 6] + anchors_reshaped[..., 6] 46 | 47 | return boxes3d 48 | 49 | def detection_collate(batch): 50 | lidars = [] 51 | images = [] 52 | calibs = [] 53 | 54 | targets = [] 55 | pos_equal_ones=[] 56 | ids = [] 57 | for i, sample in enumerate(batch): 58 | lidars.append(sample[0]) 59 | images.append(sample[1]) 60 | calibs.append(sample[2]) 61 | targets.append(sample[3]) 62 | pos_equal_ones.append(sample[4]) 63 | ids.append(sample[5]) 64 | return lidars,images,calibs,\ 65 | torch.cuda.FloatTensor(np.array(targets)), \ 66 | torch.cuda.FloatTensor(np.array(pos_equal_ones)),\ 67 | ids 68 | 69 | 70 | def box3d_center_to_corner_batch(boxes_center): 71 | # (N, 7) -> (N, 8) 72 | N = boxes_center.shape[0] 73 | 74 | x, y, z, h, w, l,theta = boxes_center.chunk(7, dim=1) 75 | sizes = (w * l).squeeze() 76 | zero_pad = torch.zeros((boxes_center.shape[0], 1)).to(cfg.device) 77 | one_pad = torch.ones((boxes_center.shape[0], 1)).to(cfg.device) 78 | box_shape = torch.cat([ 79 | -0.5*l , -0.5*l, 0.5*l, 0.5*l, -0.5*l, -0.5*l, 0.5*l, 0.5*l,\ 80 | 0.5*w , -0.5*w, -0.5*w, 0.5*w, 0.5*w, -0.5*w, -0.5*w, 0.5*w,\ 81 | zero_pad, zero_pad, zero_pad, zero_pad, h, h, h, h \ 82 | ], dim = 1).unsqueeze(2).reshape((-1, 3, 8)) 83 | rotMat = torch.cat([ 84 | torch.cos(theta), -torch.sin(theta), zero_pad,\ 85 | torch.sin(theta), torch.cos(theta), zero_pad, \ 86 | zero_pad, zero_pad, one_pad \ 87 | ], dim=1).unsqueeze(2).reshape((-1, 3, 3)) 88 | trans = torch.cat((x,y,z), dim=1).unsqueeze(2) # N * 3 * 1 89 | corner = torch.bmm(rotMat, box_shape) + trans 90 | 91 | return corner, sizes 92 | 93 | def box3d_corner_to_top_batch(boxes3d): 94 | # [N,8,3] -> [N,4,2] -> [N,8] 95 | box3d_top=boxes3d[:, :2, :4] 96 | return box3d_top.reshape((-1, 8)) 97 | 98 | def nms(boxes_bottom, nms_threshold): 99 | # boxes_bottom (N, 9) 100 | x = torch.linspace(0,1,cfg.num_dim) 101 | y = torch.linspace(0,1,cfg.num_dim) 102 | X, Y = torch.meshgrid(x,y) 103 | coords = torch.cat([X.reshape(1, -1), Y.reshape(1, -1)], dim=0).to(cfg.device) 104 | query = [] 105 | 106 | # filter nonsigular 107 | x1 = boxes_bottom[:, 0] - boxes_bottom[:, 1] 108 | y1 = boxes_bottom[:, 4] - boxes_bottom[:, 5] 109 | x2 = boxes_bottom[:, 2] - boxes_bottom[:, 1] 110 | y2 = boxes_bottom[:, 6] - boxes_bottom[:, 5] 111 | 112 | sizes = torch.sqrt(x1**2 + y1**2) * torch.sqrt(x2**2 + y2**2) # sizes of rectangle 113 | 114 | deter = torch.abs(x1*y2 - x2*y1) 115 | non_singular = torch.nonzero(deter > 1e-1) 116 | boxes_bottom = boxes_bottom[non_singular].reshape(-1, 10) 117 | sizes = sizes[non_singular].reshape(-1) 118 | if boxes_bottom.shape[0] == 0: 119 | return None 120 | 121 | alpha_1 = torch.stack([boxes_bottom[:, 0] - boxes_bottom[:, 1], boxes_bottom[:, 4] - boxes_bottom[:, 5]], dim = 1) # (x1, y1) 122 | alpha_2 = torch.stack([boxes_bottom[:, 2] - boxes_bottom[:, 1], boxes_bottom[:, 6] - boxes_bottom[:, 5]], dim = 1) # (x2, y2) 123 | trans = torch.stack([boxes_bottom[:, 1], boxes_bottom[:, 5]], dim = 1) 124 | 125 | while trans.shape[0] > 1: 126 | Rot1 = torch.stack([alpha_1[0, :].t(), alpha_2[0, :].t()], dim=1) 127 | train_trans = torch.mm(Rot1, coords).unsqueeze(0) + (trans[0, :] - trans[1:, :]).unsqueeze(2).repeat(1, 1, cfg.num_dim**2) # N * 2 * 2601 128 | Rot2 = torch.cat([alpha_1[1:, :].unsqueeze(2), alpha_2[1:, :].unsqueeze(2)], dim=2) 129 | translated = torch.bmm(torch.inverse(Rot2), train_trans) # N * 2 * 2601 130 | 131 | x_fit = (0<=translated[:, 0, :])*(translated[:, 0, :]<=1) 132 | y_fit = (0<=translated[:, 1, :])*(translated[:, 1, :]<=1) 133 | Intersection = (x_fit * y_fit).float() 134 | 135 | Intersection = torch.sum(Intersection, dim = 1) / cfg.num_dim**2 136 | IoU = sizes[0] * Intersection / (sizes[0] + sizes[1:] - sizes[0]*Intersection) 137 | index = torch.nonzero(IoU < nms_threshold) + 1 138 | 139 | trans = trans[index].reshape(-1, 2) 140 | alpha_1 = alpha_1[index].reshape(-1, 2) 141 | alpha_2 = alpha_2[index].reshape(-1, 2) 142 | query.append(boxes_bottom[0]) 143 | boxes_bottom = boxes_bottom[index].reshape(-1, 10) 144 | sizes = sizes[index].reshape(-1) 145 | 146 | if boxes_bottom.shape[0] == 1: 147 | query.append(boxes_bottom[0]) 148 | return torch.stack(query, dim=0) 149 | 150 | def draw_boxes(reg, prob, images, calibs, ids, tag): 151 | prob = prob.reshape(cfg.N, -1) 152 | batch_boxes3d = delta_to_boxes3d(reg, cfg.anchors) 153 | mask = torch.gt(prob, cfg.score_threshold) 154 | mask_reg = mask.unsqueeze(2).repeat(1, 1, 7) 155 | 156 | out_images = [] 157 | for batch_id in range(cfg.N): 158 | boxes3d = torch.masked_select(batch_boxes3d[batch_id], mask_reg[batch_id]).view(-1, 7) 159 | scores = torch.masked_select(prob[batch_id], mask[batch_id]) 160 | 161 | image = images[batch_id] 162 | calib = calibs[batch_id] 163 | id = ids[batch_id] 164 | 165 | if len(boxes3d) != 0: 166 | # boxes3d_corner, sizes = box3d_center_to_corner_batch(boxes3d) 167 | # boxes2d_bottom = box3d_corner_to_top_batch(boxes3d_corner) 168 | # boxes2d_score = torch.cat((boxes2d_bottom, scores.unsqueeze(1), torch.arange(0, len(boxes2d_bottom)).float().unsqueeze(1).to(cfg.device)), dim=1) 169 | 170 | # args = torch.argsort(boxes2d_score[:, 8], descending=True) 171 | # boxes2d_score = boxes2d_score[args] 172 | 173 | # vac = torch.nonzero(sizes > 1e-2) 174 | # boxes2d_score = boxes2d_score[vac].squeeze() 175 | # if boxes2d_score.shape[0] == 0: 176 | # out_images.append(image) 177 | # continue 178 | 179 | # NMS 180 | # boxes2d_score = nms(boxes2d_score, cfg.nms_threshold) 181 | 182 | index = nms_rotated(boxes3d[..., [0, 1, 5, 4, 6]], scores, 0.01) 183 | if len(index) is None: 184 | out_images.append(image) 185 | continue 186 | # boxes3d_corner_keep = boxes3d_corner[boxes2d_score[:, 9].long()] 187 | boxes3d = boxes3d[index] 188 | print("No. %d objects detected" % len(boxes3d)) 189 | boxes3d_corner_keep, _ = box3d_center_to_corner_batch(boxes3d) 190 | boxes3d_corner_keep = boxes3d_corner_keep.cpu().numpy() 191 | boxes3d_corner_keep = np.transpose(boxes3d_corner_keep, (0, 2, 1)) 192 | rgb_2D = project_velo2rgb(boxes3d_corner_keep, calib) 193 | img_with_box = draw_rgb_projections(image, rgb_2D, color=(0, 0, 255), thickness=1) 194 | out_images.append(img_with_box) 195 | return np.array(out_images) 196 | 197 | if __name__=='__main__': 198 | center = torch.tensor([[1, 1, 1, 2, 3, 4, 0.3]]).to(cfg.device) 199 | box3d_center_to_corner_batch(center) 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | -------------------------------------------------------------------------------- /data/kitti.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import os.path 4 | import torch.utils.data as data 5 | import utils 6 | from utils import box3d_corner_to_center_batch, anchors_center_to_corner, corner_to_standup_box2d_batch 7 | from data_aug import aug_data 8 | from box_overlaps import bbox_overlaps 9 | import numpy as np 10 | import cv2 11 | import torch 12 | from detectron2.layers.rotated_boxes import pairwise_iou_rotated 13 | 14 | class KittiDataset(data.Dataset): 15 | def __init__(self, cfg, root='./KITTI',set='train',type='velodyne_train'): 16 | self.type = type 17 | self.root = root 18 | self.data_path = os.path.join(root, 'training') 19 | self.lidar_path = os.path.join(self.data_path, "crop") 20 | self.image_path = os.path.join(self.data_path, "image_2/") 21 | self.calib_path = os.path.join(self.data_path, "calib") 22 | self.label_path = os.path.join(self.data_path, "label_2") 23 | 24 | with open(os.path.join(self.data_path, '%s.txt' % set)) as f: 25 | self.file_list = f.read().splitlines() 26 | 27 | self.T = cfg.T 28 | self.vd = cfg.vd 29 | self.vh = cfg.vh 30 | self.vw = cfg.vw 31 | self.xrange = cfg.xrange 32 | self.yrange = cfg.yrange 33 | self.zrange = cfg.zrange 34 | self.anchors = torch.tensor(cfg.anchors.reshape(-1,7)).float().to(cfg.device) 35 | self.anchors_xylwr = self.anchors[..., [0, 1, 5, 4, 6]].contiguous() 36 | self.feature_map_shape = (int(cfg.H / 2), int(cfg.W / 2)) 37 | self.anchors_per_position = cfg.anchors_per_position 38 | self.pos_threshold = cfg.pos_threshold 39 | self.neg_threshold = cfg.neg_threshold 40 | 41 | def cal_target(self, gt_box3d, gt_xyzhwlr, cfg): 42 | # Input: 43 | # labels: (N,) 44 | # feature_map_shape: (w, l) 45 | # anchors: (w, l, 2, 7) 46 | # Output: 47 | # pos_equal_one (w, l, 2) 48 | # neg_equal_one (w, l, 2) 49 | # targets (w, l, 14) 50 | # attention: cal IoU on birdview 51 | 52 | anchors_d = torch.sqrt(self.anchors[:, 4] ** 2 + self.anchors[:, 5] ** 2).to(cfg.device) 53 | # denote whether the anchor box is pos or neg 54 | pos_equal_one = torch.zeros((*self.feature_map_shape, 2)).to(cfg.device) 55 | neg_equal_one = torch.zeros((*self.feature_map_shape, 2)).to(cfg.device) 56 | 57 | targets = torch.zeros((*self.feature_map_shape, 14)).to(cfg.device) 58 | 59 | gt_xyzhwlr = torch.tensor(gt_xyzhwlr, requires_grad=False).float().to(cfg.device) 60 | gt_xylwr = gt_xyzhwlr[..., [0, 1, 5, 4, 6]] 61 | 62 | # BOTTLENECK 63 | 64 | iou = pairwise_iou_rotated( 65 | self.anchors_xylwr, 66 | gt_xylwr.contiguous() 67 | ).cpu().numpy() # (gt - anchor) 68 | 69 | id_highest = np.argmax(iou.T, axis=1) # the maximum anchor's ID 70 | id_highest_gt = np.arange(iou.T.shape[0]) 71 | mask = iou.T[id_highest_gt, id_highest] > 0 # make sure all the iou is positive 72 | id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask] 73 | 74 | # find anchor iou > cfg.XXX_POS_IOU 75 | id_pos, id_pos_gt = np.where(iou > self.pos_threshold) 76 | # find anchor iou < cfg.XXX_NEG_IOU 77 | id_neg = np.where(np.sum(iou < self.neg_threshold, 78 | axis=1) == iou.shape[1])[0] # anchor doesn't match ant ground truth 79 | 80 | for gt in range(iou.shape[1]): 81 | if gt not in id_pos_gt and iou[id_highest[gt], gt] > self.neg_threshold: 82 | id_pos = np.append(id_pos, id_highest[gt]) 83 | id_pos_gt = np.append(id_pos_gt, gt) 84 | 85 | # sample the negative points to keep ratio as 1:10 with minimum 500 86 | num_neg = 10 * id_pos.shape[0] 87 | if num_neg < 500: 88 | num_neg = 500 89 | if id_neg.shape[0] > num_neg: 90 | np.random.shuffle(id_neg) 91 | id_neg = id_neg[:num_neg] 92 | # cal the target and set the equal one 93 | index_x, index_y, index_z = np.unravel_index( 94 | id_pos, (*self.feature_map_shape, self.anchors_per_position)) 95 | pos_equal_one[index_x, index_y, index_z] = 1 96 | # ATTENTION: index_z should be np.array 97 | 98 | # parameterize the ground truth box relative to anchor boxs 99 | targets[index_x, index_y, np.array(index_z) * 7] = \ 100 | (gt_xyzhwlr[id_pos_gt, 0] - self.anchors[id_pos, 0]) / anchors_d[id_pos] 101 | targets[index_x, index_y, np.array(index_z) * 7 + 1] = \ 102 | (gt_xyzhwlr[id_pos_gt, 1] - self.anchors[id_pos, 1]) / anchors_d[id_pos] 103 | targets[index_x, index_y, np.array(index_z) * 7 + 2] = \ 104 | (gt_xyzhwlr[id_pos_gt, 2] - self.anchors[id_pos, 2]) / self.anchors[id_pos, 3] 105 | targets[index_x, index_y, np.array(index_z) * 7 + 3] = torch.log( 106 | gt_xyzhwlr[id_pos_gt, 3] / self.anchors[id_pos, 3]) 107 | targets[index_x, index_y, np.array(index_z) * 7 + 4] = torch.log( 108 | gt_xyzhwlr[id_pos_gt, 4] / self.anchors[id_pos, 4]) 109 | targets[index_x, index_y, np.array(index_z) * 7 + 5] = torch.log( 110 | gt_xyzhwlr[id_pos_gt, 5] / self.anchors[id_pos, 5]) 111 | targets[index_x, index_y, np.array(index_z) * 7 + 6] = ( 112 | gt_xyzhwlr[id_pos_gt, 6] - self.anchors[id_pos, 6]) 113 | index_x, index_y, index_z = np.unravel_index( 114 | id_neg, (*self.feature_map_shape, self.anchors_per_position)) 115 | neg_equal_one[index_x, index_y, index_z] = 1 116 | 117 | return pos_equal_one, neg_equal_one, targets 118 | 119 | def preprocess(self, lidar): 120 | # This func cluster the points in the same voxel. 121 | # shuffling the points 122 | np.random.shuffle(lidar) 123 | 124 | voxel_coords = ((lidar[:, :3] - np.array([self.xrange[0], self.yrange[0], self.zrange[0]])) / ( 125 | self.vw, self.vh, self.vd)).astype(np.int32) 126 | 127 | # convert to (D, H, W) 128 | voxel_coords = voxel_coords[:,[2,1,0]] 129 | voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0, \ 130 | return_inverse=True, return_counts=True) 131 | 132 | voxel_features = [] 133 | 134 | for i in range(len(voxel_coords)): 135 | voxel = np.zeros((self.T, 7), dtype=np.float32) 136 | pts = lidar[inv_ind == i] 137 | if voxel_counts[i] > self.T: 138 | pts = pts[:self.T, :] 139 | voxel_counts[i] = self.T 140 | # augment the points 141 | voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - np.mean(pts[:, :3], 0)), axis=1) 142 | voxel_features.append(voxel) 143 | return np.array(voxel_features), voxel_coords 144 | 145 | def __getitem__(self, i): 146 | 147 | lidar_file = self.lidar_path + '/' + self.file_list[i] + '.bin' 148 | calib_file = self.calib_path + '/' + self.file_list[i] + '.txt' 149 | label_file = self.label_path + '/' + self.file_list[i] + '.txt' 150 | image_file = self.image_path + '/' + self.file_list[i] + '.png' 151 | 152 | calib = utils.load_kitti_calib(calib_file) 153 | Tr = calib['Tr_velo2cam'] 154 | gt_box3d_corner, gt_box3d = utils.load_kitti_label(label_file, Tr) 155 | lidar = np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) 156 | 157 | 158 | if self.type == 'velodyne_train': 159 | image = cv2.imread(image_file) 160 | 161 | # data augmentation 162 | # lidar, gt_box3d = aug_data(lidar, gt_box3d) 163 | 164 | # specify a range 165 | lidar, gt_box3d_corner, gt_box3d = utils.get_filtered_lidar(lidar, gt_box3d_corner, gt_box3d) 166 | 167 | # voxelize 168 | voxel_features, voxel_coords = self.preprocess(lidar) 169 | 170 | # bounding-box encoding 171 | # pos_equal_one, neg_equal_one, targets = self.cal_target(gt_box3d_corner, gt_box3d) 172 | 173 | return voxel_features, voxel_coords, gt_box3d_corner, gt_box3d, image, calib, self.file_list[i] # pos_equal_one, neg_equal_one, targets, image, calib, self.file_list[i] 174 | 175 | elif self.type == 'velodyne_test': 176 | image = cv2.imread(image_file) 177 | 178 | lidar, gt_box3d = utils.get_filtered_lidar(lidar, gt_box3d) 179 | 180 | voxel_features, voxel_coords = self.preprocess(lidar) 181 | 182 | return voxel_features, voxel_coords, image, calib, self.file_list[i] 183 | 184 | else: 185 | raise ValueError('the type invalid') 186 | 187 | 188 | def __len__(self): 189 | return len(self.file_list) 190 | 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from config import config as cfg 4 | import math 5 | # import mayavi.mlab as mlab 6 | import cv2 7 | from box_overlaps import * 8 | from data_aug import aug_data 9 | import matplotlib.pyplot as plt 10 | import matplotlib 11 | import os 12 | import torch 13 | 14 | def get_filtered_lidar(lidar, boxes3d=None, gt_box3d=None): 15 | 16 | pxs = lidar[:, 0] 17 | pys = lidar[:, 1] 18 | pzs = lidar[:, 2] 19 | 20 | filter_x = np.where((pxs >= cfg.xrange[0]) & (pxs < cfg.xrange[1]))[0] 21 | filter_y = np.where((pys >= cfg.yrange[0]) & (pys < cfg.yrange[1]))[0] 22 | filter_z = np.where((pzs >= cfg.zrange[0]) & (pzs < cfg.zrange[1]))[0] 23 | filter_xy = np.intersect1d(filter_x, filter_y) 24 | filter_xyz = np.intersect1d(filter_xy, filter_z) 25 | 26 | if boxes3d is not None: 27 | box_x = (boxes3d[:, :, 0] >= cfg.xrange[0]) & (boxes3d[:, :, 0] < cfg.xrange[1]) 28 | box_y = (boxes3d[:, :, 1] >= cfg.yrange[0]) & (boxes3d[:, :, 1] < cfg.yrange[1]) 29 | box_z = (boxes3d[:, :, 2] >= cfg.zrange[0]) & (boxes3d[:, :, 2] < cfg.zrange[1]) 30 | box_xyz = np.sum(box_x & box_y & box_z,axis=1) 31 | 32 | return lidar[filter_xyz], boxes3d[box_xyz>0], gt_box3d[box_xyz>0] 33 | 34 | return lidar[filter_xyz] 35 | 36 | def lidar_to_bev(lidar): 37 | 38 | X0, Xn = 0, cfg.W 39 | Y0, Yn = 0, cfg.H 40 | Z0, Zn = 0, cfg.D 41 | 42 | width = Yn - Y0 43 | height = Xn - X0 44 | channel = Zn - Z0 + 2 45 | 46 | pxs = lidar[:, 0] 47 | pys = lidar[:, 1] 48 | pzs = lidar[:, 2] 49 | prs = lidar[:, 3] 50 | 51 | qxs=((pxs-cfg.xrange[0])/cfg.vw).astype(np.int32) 52 | qys=((pys-cfg.yrange[0])/cfg.vh).astype(np.int32) 53 | qzs=((pzs-cfg.zrange[0])/cfg.vd).astype(np.int32) 54 | 55 | print('height,width,channel=%d,%d,%d'%(height,width,channel)) 56 | top = np.zeros(shape=(height,width,channel), dtype=np.float32) 57 | mask = np.ones(shape=(height,width,channel-1), dtype=np.float32)* -5 58 | 59 | for i in range(len(pxs)): 60 | top[-qxs[i], -qys[i], -1]= 1+ top[-qxs[i], -qys[i], -1] 61 | if pzs[i]>mask[-qxs[i], -qys[i],qzs[i]]: 62 | top[-qxs[i], -qys[i], qzs[i]] = max(0,pzs[i]-cfg.zrange[0]) 63 | mask[-qxs[i], -qys[i],qzs[i]]=pzs[i] 64 | if pzs[i]>mask[-qxs[i], -qys[i],-1]: 65 | mask[-qxs[i], -qys[i],-1]=pzs[i] 66 | top[-qxs[i], -qys[i], -2]=prs[i] 67 | 68 | 69 | top[:,:,-1] = np.log(top[:,:,-1]+1)/math.log(64) 70 | 71 | if 1: 72 | # top_image = np.sum(top[:,:,:-1],axis=2) 73 | density_image = top[:,:,-1] 74 | density_image = density_image-np.min(density_image) 75 | density_image = (density_image/np.max(density_image)*255).astype(np.uint8) 76 | # top_image = np.dstack((top_image, top_image, top_image)).astype(np.uint8) 77 | 78 | 79 | return top, density_image 80 | 81 | 82 | # def draw_lidar(lidar, is_grid=False, is_axis = True, is_top_region=True, fig=None): 83 | 84 | # pxs=lidar[:,0] 85 | # pys=lidar[:,1] 86 | # pzs=lidar[:,2] 87 | # prs=lidar[:,3] 88 | 89 | # if fig is None: fig = mlab.figure(figure=None, bgcolor=(0,0,0), fgcolor=None, engine=None, size=(1000, 500)) 90 | 91 | # mlab.points3d( 92 | # pxs, pys, pzs, prs, 93 | # mode='point', # 'point' 'sphere' 94 | # colormap='gnuplot', #'bone', #'spectral', #'copper', 95 | # scale_factor=1, 96 | # figure=fig) 97 | 98 | # #draw grid 99 | # if is_grid: 100 | # mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=0.2) 101 | 102 | # for y in np.arange(-50,50,1): 103 | # x1,y1,z1 = -50, y, 0 104 | # x2,y2,z2 = 50, y, 0 105 | # mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig) 106 | 107 | # for x in np.arange(-50,50,1): 108 | # x1,y1,z1 = x,-50, 0 109 | # x2,y2,z2 = x, 50, 0 110 | # mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig) 111 | 112 | # #draw axis 113 | # if is_grid: 114 | # mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=0.2) 115 | 116 | # axes=np.array([ 117 | # [2.,0.,0.,0.], 118 | # [0.,2.,0.,0.], 119 | # [0.,0.,2.,0.], 120 | # ],dtype=np.float64) 121 | # fov=np.array([ ## : now is 45 deg. use actual setting later ... 122 | # [20., 20., 0.,0.], 123 | # [20.,-20., 0.,0.], 124 | # ],dtype=np.float64) 125 | 126 | 127 | # mlab.plot3d([0, axes[0,0]], [0, axes[0,1]], [0, axes[0,2]], color=(1,0,0), tube_radius=None, figure=fig) 128 | # mlab.plot3d([0, axes[1,0]], [0, axes[1,1]], [0, axes[1,2]], color=(0,1,0), tube_radius=None, figure=fig) 129 | # mlab.plot3d([0, axes[2,0]], [0, axes[2,1]], [0, axes[2,2]], color=(0,0,1), tube_radius=None, figure=fig) 130 | # mlab.plot3d([0, fov[0,0]], [0, fov[0,1]], [0, fov[0,2]], color=(1,1,1), tube_radius=None, line_width=1, figure=fig) 131 | # mlab.plot3d([0, fov[1,0]], [0, fov[1,1]], [0, fov[1,2]], color=(1,1,1), tube_radius=None, line_width=1, figure=fig) 132 | 133 | # #draw top_image feature area 134 | # if is_top_region: 135 | # x1 = cfg.xrange[0] 136 | # x2 = cfg.xrange[1] 137 | # y1 = cfg.yrange[0] 138 | # y2 = cfg.yrange[1] 139 | # mlab.plot3d([x1, x1], [y1, y2], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig) 140 | # mlab.plot3d([x2, x2], [y1, y2], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig) 141 | # mlab.plot3d([x1, x2], [y1, y1], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig) 142 | # mlab.plot3d([x1, x2], [y2, y2], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig) 143 | 144 | # mlab.orientation_axes() 145 | # mlab.view(azimuth=180,elevation=None,distance=50,focalpoint=[ 12.0909996 , -1.04700089, -2.03249991])#2.0909996 , -1.04700089, -2.03249991 146 | 147 | # return fig 148 | 149 | # def draw_gt_boxes3d(gt_boxes3d, fig, color=(1,0,0), line_width=2): 150 | 151 | # num = len(gt_boxes3d) 152 | # for n in range(num): 153 | # b = gt_boxes3d[n] 154 | 155 | # for k in range(0,4): 156 | 157 | # i,j=k,(k+1)%4 158 | # mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], color=color, tube_radius=None, line_width=line_width, figure=fig) 159 | 160 | # i,j=k+4,(k+3)%4 + 4 161 | # mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], color=color, tube_radius=None, line_width=line_width, figure=fig) 162 | 163 | # i,j=k,k+4 164 | # mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], color=color, tube_radius=None, line_width=line_width, figure=fig) 165 | 166 | # mlab.view(azimuth=180,elevation=None,distance=50,focalpoint=[ 12.0909996 , -1.04700089, -2.03249991])#2.0909996 , -1.04700089, -2.03249991 167 | 168 | def project_velo2rgb(velo,calib): 169 | T=np.zeros([4,4],dtype=np.float32) 170 | T[:3,:]=calib['Tr_velo2cam'] 171 | T[3,3]=1 172 | R=np.zeros([4,4],dtype=np.float32) 173 | R[:3,:3]=calib['R0'] 174 | R[3,3]=1 175 | num=len(velo) 176 | projections = np.zeros((num,8,2), dtype=np.int32) 177 | for i in range(len(velo)): 178 | box3d=np.ones([8,4],dtype=np.float32) 179 | box3d[:,:3]=velo[i] 180 | M=np.dot(calib['P2'],R) 181 | M=np.dot(M,T) 182 | box2d=np.dot(M,box3d.T) 183 | box2d=box2d[:2,:].T/box2d[2,:].reshape(8,1) 184 | projections[i] = box2d 185 | return projections 186 | 187 | def draw_rgb_projections(image, projections, color=(255,255,255), thickness=2, darker=1): 188 | 189 | img = image.copy()*darker 190 | num=len(projections) 191 | forward_color=(255,255,0) 192 | for n in range(num): 193 | qs = projections[n] 194 | for k in range(0,4): 195 | i,j=k,(k+1)%4 196 | 197 | cv2.line(img, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.LINE_AA) 198 | 199 | i,j=k+4,(k+1)%4 + 4 200 | cv2.line(img, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.LINE_AA) 201 | 202 | i,j=k,k+4 203 | cv2.line(img, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.LINE_AA) 204 | 205 | cv2.line(img, (qs[3,0],qs[3,1]), (qs[7,0],qs[7,1]), forward_color, thickness, cv2.LINE_AA) 206 | cv2.line(img, (qs[7,0],qs[7,1]), (qs[6,0],qs[6,1]), forward_color, thickness, cv2.LINE_AA) 207 | cv2.line(img, (qs[6,0],qs[6,1]), (qs[2,0],qs[2,1]), forward_color, thickness, cv2.LINE_AA) 208 | cv2.line(img, (qs[2,0],qs[2,1]), (qs[3,0],qs[3,1]), forward_color, thickness, cv2.LINE_AA) 209 | cv2.line(img, (qs[3,0],qs[3,1]), (qs[6,0],qs[6,1]), forward_color, thickness, cv2.LINE_AA) 210 | cv2.line(img, (qs[2,0],qs[2,1]), (qs[7,0],qs[7,1]), forward_color, thickness, cv2.LINE_AA) 211 | 212 | return img 213 | 214 | def _quantize_coords(x, y): 215 | xx = cfg.H - int((y - cfg.yrange[0]) / cfg.vh) 216 | yy = cfg.W - int((x - cfg.xrange[0]) / cfg.vw) 217 | return xx, yy 218 | 219 | def draw_polygons(image, polygons,color=(255,255,255), thickness=1, darken=1): 220 | 221 | img = image.copy() * darken 222 | for polygon in polygons: 223 | tup0, tup1, tup2, tup3 = [_quantize_coords(*tup) for tup in polygon] 224 | cv2.line(img, tup0, tup1, color, thickness, cv2.LINE_AA) 225 | cv2.line(img, tup1, tup2, color, thickness, cv2.LINE_AA) 226 | cv2.line(img, tup2, tup3, color, thickness, cv2.LINE_AA) 227 | cv2.line(img, tup3, tup0, color, thickness, cv2.LINE_AA) 228 | return img 229 | 230 | def draw_rects(image, rects, color=(255,255,255), thickness=1, darken=1): 231 | 232 | img = image.copy() * darken 233 | for rect in rects: 234 | tup0,tup1 = [_quantize_coords(*tup) for tup in list(zip(rect[0::2], rect[1::2]))] 235 | cv2.rectangle(img, tup0, tup1, color, thickness, cv2.LINE_AA) 236 | return img 237 | 238 | def load_kitti_calib(calib_file): 239 | """ 240 | load projection matrix 241 | """ 242 | with open(calib_file) as fi: 243 | lines = fi.readlines() 244 | assert (len(lines) == 8) 245 | 246 | obj = lines[0].strip().split(' ')[1:] 247 | P0 = np.array(obj, dtype=np.float32) 248 | obj = lines[1].strip().split(' ')[1:] 249 | P1 = np.array(obj, dtype=np.float32) 250 | obj = lines[2].strip().split(' ')[1:] 251 | P2 = np.array(obj, dtype=np.float32) 252 | obj = lines[3].strip().split(' ')[1:] 253 | P3 = np.array(obj, dtype=np.float32) 254 | obj = lines[4].strip().split(' ')[1:] 255 | R0 = np.array(obj, dtype=np.float32) 256 | obj = lines[5].strip().split(' ')[1:] 257 | Tr_velo_to_cam = np.array(obj, dtype=np.float32) 258 | obj = lines[6].strip().split(' ')[1:] 259 | Tr_imu_to_velo = np.array(obj, dtype=np.float32) 260 | 261 | return {'P2': P2.reshape(3, 4), 262 | 'R0': R0.reshape(3, 3), 263 | 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} 264 | 265 | def angle_in_limit(angle): 266 | # To limit the angle in -pi/2 - pi/2 267 | limit_degree = 5 268 | while angle >= np.pi / 2: 269 | angle -= np.pi 270 | while angle < -np.pi / 2: 271 | angle += np.pi 272 | if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi: 273 | angle = np.pi / 2 274 | return angle 275 | 276 | def box3d_cam_to_velo(box3d, Tr): 277 | 278 | def project_cam2velo(cam, Tr): 279 | T = np.zeros([4, 4], dtype=np.float32) 280 | T[:3, :] = Tr 281 | T[3, 3] = 1 282 | T_inv = np.linalg.inv(T) 283 | lidar_loc_ = np.dot(T_inv, cam) 284 | lidar_loc = lidar_loc_[:3] 285 | return lidar_loc.reshape(1, 3) 286 | 287 | h,w,l,tx,ty,tz,ry = [float(i) for i in box3d] 288 | cam = np.ones([4, 1]) 289 | cam[0] = tx 290 | cam[1] = ty 291 | cam[2] = tz 292 | t_lidar = project_cam2velo(cam, Tr) 293 | 294 | Box = np.array([[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], 295 | [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], 296 | [0, 0, 0, 0, h, h, h, h]]) 297 | 298 | rz = angle_in_limit(-ry-np.pi/2) 299 | 300 | rotMat = np.array([ 301 | [np.cos(rz), -np.sin(rz), 0.0], 302 | [np.sin(rz), np.cos(rz), 0.0], 303 | [0.0, 0.0, 1.0]]) 304 | 305 | velo_box = np.dot(rotMat, Box) 306 | 307 | cornerPosInVelo = velo_box + np.tile(t_lidar, (8, 1)).T 308 | 309 | box3d_corner = cornerPosInVelo.transpose() 310 | 311 | return box3d_corner.astype(np.float32), np.array([t_lidar[0,0], t_lidar[0,1], t_lidar[0,2], h, w, l, rz]) 312 | 313 | def anchors_center_to_corner(anchors): 314 | N = anchors.shape[0] 315 | anchor_corner = np.zeros((N, 4, 2)) 316 | for i in range(N): 317 | anchor = anchors[i] 318 | translation = anchor[0:3] 319 | h, w, l = anchor[3:6] 320 | rz = anchor[-1] 321 | Box = np.array([ 322 | [-l / 2, -l / 2, l / 2, l / 2], \ 323 | [w / 2, -w / 2, -w / 2, w / 2]]) 324 | # re-create 3D bounding box in velodyne coordinate system 325 | rotMat = np.array([ 326 | [np.cos(rz), -np.sin(rz)], 327 | [np.sin(rz), np.cos(rz)]]) 328 | velo_box = np.dot(rotMat, Box) 329 | cornerPosInVelo = velo_box + np.tile(translation[:2], (4, 1)).T 330 | box2d = cornerPosInVelo.transpose() 331 | anchor_corner[i] = box2d 332 | return anchor_corner 333 | 334 | 335 | def corner_to_standup_box2d_batch(boxes_corner): 336 | # (N, 4, 2) -> (N, 4) x1, y1, x2, y2 337 | N = boxes_corner.shape[0] 338 | standup_boxes2d = np.zeros((N, 4)) 339 | standup_boxes2d[:, 0] = np.min(boxes_corner[:, :, 0], axis=1) 340 | standup_boxes2d[:, 1] = np.min(boxes_corner[:, :, 1], axis=1) 341 | standup_boxes2d[:, 2] = np.max(boxes_corner[:, :, 0], axis=1) 342 | standup_boxes2d[:, 3] = np.max(boxes_corner[:, :, 1], axis=1) 343 | return standup_boxes2d 344 | 345 | def box3d_corner_to_center_batch(box3d_corner): 346 | # (N, 8, 3) -> (N, 7) 347 | assert box3d_corner.ndim == 3 348 | batch_size = box3d_corner.shape[0] 349 | 350 | xyz = np.mean(box3d_corner[:, :4, :], axis=1) 351 | 352 | h = abs(np.mean(box3d_corner[:, 4:, 2] - box3d_corner[:, :4, 2], axis=1, keepdims=True)) 353 | 354 | w = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 1, [0, 1]]) ** 2, axis=1, keepdims=True)) + 355 | np.sqrt(np.sum((box3d_corner[:, 2, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + 356 | np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 5, [0, 1]]) ** 2, axis=1, keepdims=True)) + 357 | np.sqrt(np.sum((box3d_corner[:, 6, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 358 | 359 | l = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + 360 | np.sqrt(np.sum((box3d_corner[:, 1, [0, 1]] - box3d_corner[:, 2, [0, 1]]) ** 2, axis=1, keepdims=True)) + 361 | np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True)) + 362 | np.sqrt(np.sum((box3d_corner[:, 5, [0, 1]] - box3d_corner[:, 6, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 363 | 364 | theta = (np.arctan2(box3d_corner[:, 2, 1] - box3d_corner[:, 1, 1], 365 | box3d_corner[:, 2, 0] - box3d_corner[:, 1, 0]) + 366 | np.arctan2(box3d_corner[:, 3, 1] - box3d_corner[:, 0, 1], 367 | box3d_corner[:, 3, 0] - box3d_corner[:, 0, 0]) + 368 | np.arctan2(box3d_corner[:, 2, 0] - box3d_corner[:, 3, 0], 369 | box3d_corner[:, 3, 1] - box3d_corner[:, 2, 1]) + 370 | np.arctan2(box3d_corner[:, 1, 0] - box3d_corner[:, 0, 0], 371 | box3d_corner[:, 0, 1] - box3d_corner[:, 1, 1]))[:, np.newaxis] / 4 372 | 373 | return np.concatenate([xyz, h, w, l, theta], axis=1).reshape(batch_size, 7) 374 | 375 | def get_anchor3d(anchors): 376 | num = anchors.shape[0] 377 | anchors3d = np.zeros((num,8,3)) 378 | anchors3d[:, :4, :2] = anchors 379 | anchors3d[:, :, 2] = cfg.z_a 380 | anchors3d[:, 4:, :2] = anchors 381 | anchors3d[:, 4:, 2] = cfg.z_a + cfg.h_a 382 | return anchors3d 383 | 384 | def load_kitti_label(label_file, Tr): 385 | 386 | with open(label_file,'r') as f: 387 | lines = f.readlines() 388 | 389 | gt_boxes3d_corner = [] 390 | gt_boxes3d = [] 391 | 392 | num_obj = len(lines) 393 | 394 | for j in range(num_obj): 395 | obj = lines[j].strip().split(' ') 396 | 397 | obj_class = obj[0].strip() 398 | if obj_class not in cfg.class_list: 399 | continue 400 | 401 | box3d_corner, box3d_vertice = box3d_cam_to_velo(obj[8:], Tr) 402 | 403 | gt_boxes3d.append(box3d_vertice) 404 | gt_boxes3d_corner.append(box3d_corner) 405 | 406 | gt_boxes3d_corner = np.array(gt_boxes3d_corner).reshape(-1,8,3) 407 | gt_boxes3d = np.array(gt_boxes3d).reshape(-1, 7) 408 | 409 | return gt_boxes3d_corner, gt_boxes3d 410 | 411 | 412 | def test(): 413 | import os 414 | import glob 415 | import matplotlib.pyplot as plt 416 | 417 | lidar_path = os.path.join('./data/KITTI/training', "crop/") 418 | image_path = os.path.join('./data/KITTI/training', "image_2/") 419 | calib_path = os.path.join('./data/KITTI/training', "calib/") 420 | label_path = os.path.join('./data/KITTI/training', "label_2/") 421 | 422 | 423 | file=[i.strip().split('/')[-1][:-4] for i in sorted(os.listdir(label_path))] 424 | 425 | i=2600 426 | 427 | lidar_file = lidar_path + '/' + file[i] + '.bin' 428 | calib_file = calib_path + '/' + file[i] + '.txt' 429 | label_file = label_path + '/' + file[i] + '.txt' 430 | image_file = image_path + '/' + file[i] + '.png' 431 | 432 | image = cv2.imread(image_file) 433 | print("Processing: ", lidar_file) 434 | lidar = np.fromfile(lidar_file, dtype=np.float32) 435 | lidar = lidar.reshape((-1, 4)) 436 | 437 | calib = load_kitti_calib(calib_file) 438 | gt_box3d = load_kitti_label(label_file, calib['Tr_velo2cam']) 439 | 440 | # augmentation 441 | #lidar, gt_box3d = aug_data(lidar, gt_box3d) 442 | 443 | # filtering 444 | lidar, gt_box3d = get_filtered_lidar(lidar, gt_box3d) 445 | 446 | # view in point cloud 447 | 448 | # fig = draw_lidar(lidar, is_grid=False, is_top_region=True) 449 | # draw_gt_boxes3d(gt_boxes3d=gt_box3d, fig=fig) 450 | # mlab.show() 451 | 452 | # view in image 453 | 454 | # gt_3dTo2D = project_velo2rgb(gt_box3d, calib) 455 | # img_with_box = draw_rgb_projections(image,gt_3dTo2D, color=(0,0,255),thickness=1) 456 | # plt.imshow(img_with_box[:,:,[2,1,0]]) 457 | # plt.show() 458 | 459 | # view in bird-eye view 460 | 461 | top_new, density_image=lidar_to_bev(lidar) 462 | # gt_box3d_top = corner_to_standup_box2d_batch(gt_box3d) 463 | # density_with_box = draw_rects(density_image,gt_box3d_top) 464 | density_with_box = draw_polygons(density_image,gt_box3d[:,:4,:2]) 465 | plt.imshow(density_with_box,cmap='gray') 466 | plt.show() 467 | 468 | def plot_grad(grad, epoch, name): 469 | if not os.path.exists('./vis/%d'%(epoch)): 470 | os.mkdir('./vis/%d'%(epoch)) 471 | plt.figure() 472 | grad = grad.detach().cpu().numpy() 473 | matplotlib.rcParams['font.sans-serif']=['SimHei'] # 用黑体显示中文 474 | matplotlib.rcParams['axes.unicode_minus']=False # 正常显示负号 475 | plt.hist(grad, bins=40, facecolor="blue", edgecolor="black", alpha=0.7) 476 | # 显示横轴标签 477 | plt.xlabel("value") 478 | # 显示纵轴标签 479 | plt.ylabel("frequency") 480 | # 显示图标题 481 | mean = np.mean(grad) 482 | var = np.var(grad) 483 | plt.title("mean: %.4f, var: %.4f"%(mean, var)) 484 | plt.savefig('./vis/%d/%s.png'%(epoch, name)) 485 | 486 | def print_prob(score, name): 487 | score = score.permute(0, 3, 1, 2) 488 | score = score.detach().cpu().numpy() 489 | score_1 = score[0, ...] 490 | up = score_1[0, ...] 491 | down = score_1[1, ...] 492 | score = np.bitwise_or(up > 0.8 , down > 0.8) 493 | 494 | plt.figure() 495 | plt.imshow(score.astype(np.uint8), cmap='hot') 496 | plt.colorbar() 497 | plt.savefig(name) 498 | 499 | if __name__ == '__main__': 500 | test() --------------------------------------------------------------------------------