├── .gitignore
├── .idea
├── vcs.xml
├── misc.xml
├── modules.xml
├── deployment.xml
├── LidarDet.iml
└── webServers.xml
├── data
├── gen_train_val.py
├── crop.py
└── kitti.py
├── config.py
├── README.md
├── loss.py
├── test.py
├── data_aug.py
├── voxelnet.py
├── train.py
├── test_utils.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data/KITTI/
2 | results/
3 | nms/_ext/
4 | build/
5 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/LidarDet.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/data/gen_train_val.py:
--------------------------------------------------------------------------------
1 | import os
2 | root = '/data/cxg1/VoxelNet_pro/Data/training'
3 | train_file = open(os.path.join(root, 'train.txt'), 'w')
4 | val_file = open(os.path.join(root, 'val.txt'), 'w')
5 | from sklearn.model_selection import train_test_split
6 | file_ids = ["%06d\n" % i for i in range(0,7481)]
7 | train_ids, val_ids = train_test_split(file_ids, test_size=0.3)
8 | print(len(train_ids))
9 | train_file.writelines(train_ids)
10 | val_file.writelines(val_ids)
11 | train_file.close()
12 | val_file.close()
13 |
--------------------------------------------------------------------------------
/.idea/webServers.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
14 |
15 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 |
4 | class config:
5 |
6 | # classes
7 | class_list = ['Car', 'Van']
8 |
9 | # batch size
10 | N=2
11 |
12 | # maxiumum number of points per voxel
13 | T=35
14 |
15 | # voxel size
16 | vd = 0.4
17 | vh = 0.2
18 | vw = 0.2
19 |
20 | # points cloud range
21 | xrange = (0.0, 70.4)
22 | yrange = (-40, 40)
23 | zrange = (-3, 1)
24 |
25 | # voxel grid
26 | W = math.ceil((xrange[1] - xrange[0]) / vw)
27 | H = math.ceil((yrange[1] - yrange[0]) / vh)
28 | D = math.ceil((zrange[1] - zrange[0]) / vd)
29 |
30 | # # iou threshold
31 | # pos_threshold = 0.9
32 | # neg_threshold = 0.45
33 |
34 | # anchors: (200, 176, 2, 7) x y z h w l r
35 | x = np.linspace(xrange[0]+vw, xrange[1]-vw, W//2)
36 | y = np.linspace(yrange[0]+vh, yrange[1]-vh, H//2)
37 | cx, cy = np.meshgrid(x, y)
38 | # all is (w, l, 2)
39 | cx = np.tile(cx[..., np.newaxis], 2)
40 | cy = np.tile(cy[..., np.newaxis], 2)
41 | cz = np.ones_like(cx) * -1.0
42 | w = np.ones_like(cx) * 1.6
43 | l = np.ones_like(cx) * 3.9
44 | h = np.ones_like(cx) * 1.56
45 | r = np.ones_like(cx)
46 | r[..., 0] = 0
47 | r[..., 1] = np.pi/2
48 | anchors = np.stack([cx, cy, cz, h, w, l, r], axis=-1)
49 |
50 | anchors_per_position = 2
51 |
52 | # non-maximum suppression
53 | nms_threshold = 1e-3
54 | score_threshold = 0.9
55 |
56 | device = "cuda:2"
57 | num_dim = 51
58 |
59 | last_epoch=0
60 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 |
3 | This is an unofficial implementation of [VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection](https://arxiv.org/abs/1711.06396) in pytorch. A large part of this project is based on the work [here](https://github.com/jeasinema/VoxelNet-tensorflow)
4 | # Dependencies
5 | - `python3.5+`
6 | - `pytorch` (tested on 1.4)
7 | - `opencv`
8 | - `shapely`
9 | - `mayavi`
10 |
11 | # Installation
12 | 1. Clone this repository.
13 | 2. the nms & box_overlap is based on [detectron2](https://github.com/facebookresearch/detectron2), Please install this libs.
14 |
15 |
16 | # Data Preparation
17 | 1. Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Data to download include:
18 | * Velodyne point clouds (29 GB): input data to VoxelNet
19 | * Training labels of object data set (5 MB): input label to VoxelNet
20 | * Camera calibration matrices of object data set (16 MB): for visualization of predictions
21 | * Left color images of object data set (12 GB): for visualization of predictions
22 |
23 | 2. In this project, the cropped point cloud data for training and validation. Point clouds outside the image coordinates are removed.
24 | ```bash
25 | $ python3 data/crop.py
26 | ```
27 | 3. Split the training set into training and validation set according to the protocol [here](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz).
28 | ```plain
29 | └── DATA_DIR
30 | ├── training <-- training data
31 | | ├── image_2
32 | | ├── label_2
33 | | ├── velodyne
34 | | └── crop
35 | └── testing <--- testing data
36 | | ├── image_2
37 | | ├── label_2
38 | | ├── velodyne
39 | | └── crop
40 | ```
41 |
42 | # Train
43 |
44 |
45 |
46 |
47 | # TODO
48 | - [x] training code
49 | - [x] data augmentation
50 | - [ ] validation code
51 | - [ ] reproduce results for `Car`, `Pedestrian` and `Cyclist`
52 | - [ ] multi-gpu support
53 | - [ ] improve the performances
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 |
8 | class VoxelLoss(nn.Module):
9 | def __init__(self, alpha, beta, gamma):
10 | super(VoxelLoss, self).__init__()
11 | self.smoothl1loss = nn.SmoothL1Loss(reduction='sum')
12 | self.alpha = alpha
13 | self.beta = beta
14 | self.gamma = gamma
15 |
16 | def forward(self, reg, p_pos, pos_equal_one, neg_equal_one, targets, tag='train'):
17 | # reg (N * A*7 * H * W) , score (N * A * H * W)
18 | reg = reg.permute(0,2,3,1).contiguous()
19 | reg = reg.view(reg.size(0),reg.size(1),reg.size(2),-1,7) # (N * H * W * A * 7)
20 | targets = targets.view(targets.size(0),targets.size(1),targets.size(2),-1,7) # (N * H * W * A * 7)
21 | pos_equal_one_for_reg = pos_equal_one.unsqueeze(pos_equal_one.dim()).expand(-1,-1,-1,-1,7)
22 |
23 | rm_pos = reg * pos_equal_one_for_reg
24 | targets_pos = targets * pos_equal_one_for_reg
25 |
26 | cls_pos_loss = -pos_equal_one * torch.log(p_pos + 1e-6)
27 | cls_pos_loss = cls_pos_loss.sum() / (pos_equal_one.sum() + 1e-6)
28 |
29 | cls_neg_loss = -neg_equal_one * torch.log(1 - p_pos + 1e-6)
30 | cls_neg_loss = cls_neg_loss.sum() / (neg_equal_one.sum() + 1e-6)
31 |
32 | reg_loss = self.smoothl1loss(rm_pos, targets_pos)
33 | reg_loss = reg_loss / (pos_equal_one.sum() + 1e-6)
34 | conf_loss = self.alpha * cls_pos_loss + self.beta * cls_neg_loss
35 |
36 | if tag == 'val':
37 | xyz_loss = self.smoothl1loss(rm_pos[..., [0,1,2]], targets_pos[..., [0,1,2]]) / (pos_equal_one.sum() + 1e-6)
38 | whl_loss = self.smoothl1loss(rm_pos[..., [3,4,5]], targets_pos[..., [3,4,5]]) / (pos_equal_one.sum() + 1e-6)
39 | r_loss = self.smoothl1loss(rm_pos[..., [6]], targets_pos[..., [6]]) / (pos_equal_one.sum() + 1e-6)
40 | return conf_loss, reg_loss, xyz_loss, whl_loss, r_loss
41 |
42 | return conf_loss, reg_loss, None, None, None
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/data/crop.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import sys
4 |
5 | CAM = 2
6 |
7 | def load_velodyne_points(filename):
8 | points = np.fromfile(filename, dtype=np.float32).reshape(-1, 4)
9 | #points = points[:, :3] # exclude luminance
10 | return points
11 |
12 | def load_calib(calib_dir):
13 | # P2 * R0_rect * Tr_velo_to_cam * y
14 | lines = open(calib_dir).readlines()
15 | lines = [ line.split()[1:] for line in lines ][:-1]
16 | #
17 | P = np.array(lines[CAM]).reshape(3,4)
18 | #
19 | Tr_velo_to_cam = np.array(lines[5]).reshape(3,4)
20 | Tr_velo_to_cam = np.concatenate( [ Tr_velo_to_cam, np.array([0,0,0,1]).reshape(1,4) ] , 0 )
21 | #
22 | R_cam_to_rect = np.eye(4)
23 | R_cam_to_rect[:3,:3] = np.array(lines[4][:9]).reshape(3,3)
24 | #
25 | P = P.astype('float32')
26 | Tr_velo_to_cam = Tr_velo_to_cam.astype('float32')
27 | R_cam_to_rect = R_cam_to_rect.astype('float32')
28 | return P, Tr_velo_to_cam, R_cam_to_rect
29 |
30 | def prepare_velo_points(pts3d_raw):
31 | '''Replaces the reflectance value by 1, and tranposes the array, so
32 | points can be directly multiplied by the camera projection matrix'''
33 | pts3d = pts3d_raw
34 | # Reflectance > 0
35 | indices = pts3d[:, 3] > 0
36 | pts3d = pts3d[indices ,:]
37 | pts3d[:,3] = 1
38 | return pts3d.transpose(), indices
39 |
40 | def project_velo_points_in_img(pts3d, T_cam_velo, Rrect, Prect):
41 | '''Project 3D points into 2D image. Expects pts3d as a 4xN
42 | numpy array. Returns the 2D projection of the points that
43 | are in front of the camera only an the corresponding 3D points.'''
44 | # 3D points in camera reference frame.
45 | pts3d_cam = Rrect.dot(T_cam_velo.dot(pts3d))
46 | # Before projecting, keep only points with z>0
47 | # (points that are in fronto of the camera).
48 | idx = (pts3d_cam[2,:]>=0)
49 | pts2d_cam = Prect.dot(pts3d_cam[:,idx])
50 | return pts3d[:, idx], pts2d_cam/pts2d_cam[2,:], idx
51 |
52 |
53 | def align_img_and_pc(img_dir, pc_dir, calib_dir):
54 |
55 | img = cv2.imread(img_dir)
56 | pts = load_velodyne_points( pc_dir )
57 | P, Tr_velo_to_cam, R_cam_to_rect = load_calib(calib_dir)
58 |
59 | pts3d, indices = prepare_velo_points(pts)
60 | pts3d_ori = pts3d.copy()
61 | reflectances = pts[indices, 3]
62 | pts3d, pts2d_normed, idx = project_velo_points_in_img( pts3d, Tr_velo_to_cam, R_cam_to_rect, P )
63 | #print reflectances.shape, idx.shape
64 | reflectances = reflectances[idx]
65 | #print reflectances.shape, pts3d.shape, pts2d_normed.shape
66 | assert reflectances.shape[0] == pts3d.shape[1] == pts2d_normed.shape[1]
67 |
68 | rows, cols = img.shape[:2]
69 |
70 | points = []
71 | for i in range(pts2d_normed.shape[1]):
72 | c = int(np.round(pts2d_normed[0,i]))
73 | r = int(np.round(pts2d_normed[1,i]))
74 | if c < cols and r < rows and r > 0 and c > 0:
75 | color = img[r, c, :]
76 | point = [ pts3d[0,i], pts3d[1,i], pts3d[2,i], reflectances[i], color[0], color[1], color[2], pts2d_normed[0,i], pts2d_normed[1,i] ]
77 | points.append(point)
78 |
79 | points = np.array(points)
80 | return points
81 |
82 | # update the following directories
83 | IMG_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/image_2/'
84 | PC_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/velodyne/'
85 | CALIB_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/calib/'
86 | PC_CROP_ROOT = '/data/cxg1/VoxelNet_pro/Data/training/crop/'
87 |
88 |
89 | for frame in range(0, 7481):
90 | img_dir = IMG_ROOT + '%06d.png' % frame
91 | pc_dir = PC_ROOT + '%06d.bin' % frame
92 | calib_dir = CALIB_ROOT + '%06d.txt' % frame
93 |
94 | points = align_img_and_pc(img_dir, pc_dir, calib_dir)
95 |
96 | output_name = PC_CROP_ROOT + '%06d.bin' % frame
97 | sys.stdout.write('Save to %s \n' % output_name)
98 | points[:,:4].astype('float32').tofile(output_name)
99 |
100 |
101 |
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | from torch.autograd import Variable
4 | from config import config as cfg
5 | from data.kitti import KittiDataset
6 | import torch.utils.data as data
7 | import time
8 | from loss import VoxelLoss
9 | from voxelnet import VoxelNet
10 | import torch.optim as optim
11 | import torch.optim.lr_scheduler as lr_scheduler
12 | import torch.nn.init as init
13 | # from nms.pth_nms import pth_nms
14 | import numpy as np
15 | import torch.backends.cudnn
16 | from test_utils import draw_boxes
17 | from torch.utils.tensorboard import SummaryWriter
18 | import torchvision
19 | import os
20 | from utils import plot_grad, print_prob
21 | import cv2
22 |
23 | def weights_init(m):
24 | if isinstance(m, nn.Conv2d):
25 | init.xavier_normal_(m.weight.data)
26 | m.bias.data.zero_()
27 |
28 | def detection_collate(batch):
29 | voxel_features = []
30 | voxel_coords = []
31 | gt_box3d_corner = []
32 | gt_box3d = []
33 |
34 | images = []
35 | calibs = []
36 | ids = []
37 | for i, sample in enumerate(batch):
38 | voxel_features.append(sample[0])
39 |
40 | voxel_coords.append(
41 | np.pad(sample[1], ((0, 0), (1, 0)),
42 | mode='constant', constant_values=i))
43 |
44 | gt_box3d_corner.append(sample[2])
45 | gt_box3d.append(sample[3])
46 |
47 | images.append(sample[4])
48 | calibs.append(sample[5])
49 | ids.append(sample[6])
50 | return np.concatenate(voxel_features), \
51 | np.concatenate(voxel_coords), \
52 | gt_box3d_corner,\
53 | gt_box3d,\
54 | images,\
55 | calibs, ids
56 |
57 | torch.backends.cudnn.enabled=True
58 |
59 | import argparse
60 | parser = argparse.ArgumentParser(description='arg parser')
61 | parser.add_argument('--ckpt', type=str, default=None, help='pre_load_ckpt')
62 | args = parser.parse_args()
63 |
64 | def train(net, model_name, hyper, cfg, writer):
65 |
66 | dataset=KittiDataset(cfg=cfg,root='/data/cxg1/VoxelNet_pro/Data',set='val')
67 | data_loader = data.DataLoader(dataset, batch_size=cfg.N, num_workers=4, collate_fn=detection_collate, shuffle=True, \
68 | pin_memory=False)
69 |
70 | # define loss function
71 | criterion = VoxelLoss(alpha=hyper['alpha'], beta=hyper['beta'], gamma=hyper['gamma'])
72 |
73 | running_loss = 0.0
74 | running_reg_loss = 0.0
75 | running_conf_loss = 0.0
76 |
77 | # training process
78 | # batch_iterator = None
79 | epoch_size = len(dataset) // cfg.N
80 | print('Epoch size', epoch_size)
81 | iteration = 0
82 | for voxel_features, voxel_coords, gt_box3d_corner, gt_box3d, images, calibs, ids in data_loader:
83 |
84 | # wrapper to variable
85 | voxel_features = torch.tensor(voxel_features).to(cfg.device)
86 |
87 | pos_equal_one = []
88 | neg_equal_one = []
89 | targets = []
90 |
91 | for i in range(len(gt_box3d)):
92 | pos_equal_one_, neg_equal_one_, targets_ = dataset.cal_target(gt_box3d_corner[i], gt_box3d[i], cfg)
93 | pos_equal_one.append(pos_equal_one_)
94 | neg_equal_one.append(neg_equal_one_)
95 | targets.append(targets_)
96 |
97 | pos_equal_one = torch.stack(pos_equal_one, dim=0)
98 | neg_equal_one = torch.stack(neg_equal_one, dim=0)
99 | targets = torch.stack(targets, dim=0)
100 |
101 | # zero the parameter gradients
102 | # forward
103 | score, reg = net(voxel_features, voxel_coords)
104 |
105 | if iteration == 0 : # visualize the first image
106 | print_prob(score, "pred.png")
107 | print_prob(pos_equal_one, "gt.png")
108 |
109 | # calculate loss
110 | conf_loss, reg_loss, xyz_loss, whl_loss, r_loss = criterion(reg, score, pos_equal_one, neg_equal_one, targets)
111 | loss = hyper['lambda'] * conf_loss + reg_loss
112 |
113 | running_conf_loss += conf_loss.item()
114 | running_reg_loss += reg_loss.item()
115 | running_loss += (reg_loss.item() + conf_loss.item())
116 |
117 | pre_image = draw_boxes(reg, score, images, calibs, ids, 'pred')
118 | gt_image = draw_boxes(targets.float(), pos_equal_one.float(), images, calibs, ids, 'true')
119 | try :
120 | writer.add_image("gt_image_box {}".format(iteration), gt_image, global_step=iteration, dataformats='NHWC')
121 | writer.add_image("predict_image_box {}".format(iteration), pre_image, global_step=iteration, dataformats='NHWC')
122 | except :
123 | pass
124 | iteration += 1
125 |
126 | hyper = {'alpha': 1.0,
127 | 'beta': 10.0,
128 | 'pos': 0.6,
129 | 'neg': 0.4,
130 | 'lr':0.01,
131 | 'momentum': 0.9,
132 | 'lambda': 2.0,
133 | 'gamma':2,
134 | 'weight_decay':0.00002}
135 |
136 | if __name__ == '__main__':
137 | cfg.pos_threshold = hyper['pos']
138 | cfg.neg_threshold = hyper['neg']
139 | model_name = args.ckpt
140 |
141 | writer = SummaryWriter('runs/test_run')
142 |
143 | net = VoxelNet()
144 | net.to(cfg.device)
145 | net.load_state_dict(torch.load(os.path.join('./model',model_name), map_location=cfg.device)['model_state_dict'])
146 | with torch.no_grad():
147 | try:
148 | train(net, model_name, hyper, cfg, writer)
149 | except KeyboardInterrupt:
150 | pass
151 | writer.close()
152 |
--------------------------------------------------------------------------------
/data_aug.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from config import config as cfg
3 | import cv2
4 | import matplotlib.pyplot as plt
5 |
6 | def draw_polygon(img, box_corner, color = (255, 255, 255),thickness = 1):
7 |
8 | tup0 = (box_corner[0, 1],box_corner[0, 0])
9 | tup1 = (box_corner[1, 1],box_corner[1, 0])
10 | tup2 = (box_corner[2, 1],box_corner[2, 0])
11 | tup3 = (box_corner[3, 1],box_corner[3, 0])
12 | cv2.line(img, tup0, tup1, color, thickness, cv2.LINE_AA)
13 | cv2.line(img, tup1, tup2, color, thickness, cv2.LINE_AA)
14 | cv2.line(img, tup2, tup3, color, thickness, cv2.LINE_AA)
15 | cv2.line(img, tup3, tup0, color, thickness, cv2.LINE_AA)
16 | return img
17 |
18 | def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0):
19 | # Input:
20 | # points: (N, 3)
21 | # rx/y/z: in radians
22 | # Output:
23 | # points: (N, 3)
24 | N = points.shape[0]
25 | points = np.hstack([points, np.ones((N, 1))])
26 | mat1 = np.eye(4)
27 | mat1[3, 0:3] = tx, ty, tz
28 | points = np.matmul(points, mat1)
29 | if rx != 0:
30 | mat = np.zeros((4, 4))
31 | mat[0, 0] = 1
32 | mat[3, 3] = 1
33 | mat[1, 1] = np.cos(rx)
34 | mat[1, 2] = -np.sin(rx)
35 | mat[2, 1] = np.sin(rx)
36 | mat[2, 2] = np.cos(rx)
37 | points = np.matmul(points, mat)
38 | if ry != 0:
39 | mat = np.zeros((4, 4))
40 | mat[1, 1] = 1
41 | mat[3, 3] = 1
42 | mat[0, 0] = np.cos(ry)
43 | mat[0, 2] = np.sin(ry)
44 | mat[2, 0] = -np.sin(ry)
45 | mat[2, 2] = np.cos(ry)
46 | points = np.matmul(points, mat)
47 | if rz != 0:
48 | mat = np.zeros((4, 4))
49 | mat[2, 2] = 1
50 | mat[3, 3] = 1
51 | mat[0, 0] = np.cos(rz)
52 | mat[0, 1] = -np.sin(rz)
53 | mat[1, 0] = np.sin(rz)
54 | mat[1, 1] = np.cos(rz)
55 | points = np.matmul(points, mat)
56 | return points[:, 0:3]
57 |
58 | def box_transform(boxes_corner, tx, ty, tz, r=0):
59 | # boxes_corner (N, 8, 3)
60 | for idx in range(len(boxes_corner)):
61 | boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r)
62 | return boxes_corner
63 |
64 | def cal_iou2d(box1_corner, box2_corner):
65 | box1_corner = np.reshape(box1_corner, [4, 2])
66 | box2_corner = np.reshape(box2_corner, [4, 2])
67 | box1_corner = ((cfg.W, cfg.H)-(box1_corner - (cfg.xrange[0], cfg.yrange[0])) / (cfg.vw, cfg.vh)).astype(np.int32)
68 | box2_corner = ((cfg.W, cfg.H)-(box2_corner - (cfg.xrange[0], cfg.yrange[0])) / (cfg.vw, cfg.vh)).astype(np.int32)
69 |
70 | buf1 = np.zeros((cfg.H, cfg.W, 3))
71 | buf2 = np.zeros((cfg.H, cfg.W, 3))
72 | buf1 = cv2.fillConvexPoly(buf1, box1_corner, color=(1,1,1))[..., 0]
73 | buf2 = cv2.fillConvexPoly(buf2, box2_corner, color=(1,1,1))[..., 0]
74 |
75 | indiv = np.sum(np.absolute(buf1-buf2))
76 | share = np.sum((buf1 + buf2) == 2)
77 | if indiv == 0:
78 | return 0.0 # when target is out of bound
79 | return share / (indiv + share)
80 |
81 | def aug_data(lidar, gt_box3d_corner):
82 | np.random.seed()
83 |
84 | choice = np.random.randint(1, 10)
85 |
86 | if choice >= 7:
87 | for idx in range(len(gt_box3d_corner)):
88 | # TODO: precisely gather the point
89 | is_collision = True
90 | _count = 0
91 | while is_collision and _count < 100:
92 | t_rz = np.random.uniform(-np.pi / 10, np.pi / 10)
93 | t_x = np.random.normal()
94 | t_y = np.random.normal()
95 | t_z = np.random.normal()
96 |
97 | # check collision
98 | tmp = box_transform(
99 | gt_box3d_corner[[idx]], t_x, t_y, t_z, t_rz)
100 | is_collision = False
101 | for idy in range(idx):
102 | iou = cal_iou2d(tmp[0,:4,:2],gt_box3d_corner[idy,:4,:2])
103 | if iou > 0:
104 | is_collision = True
105 | _count += 1
106 | break
107 | if not is_collision:
108 | box_corner = gt_box3d_corner[idx]
109 | minx = np.min(box_corner[:, 0])
110 | miny = np.min(box_corner[:, 1])
111 | minz = np.min(box_corner[:, 2])
112 | maxx = np.max(box_corner[:, 0])
113 | maxy = np.max(box_corner[:, 1])
114 | maxz = np.max(box_corner[:, 2])
115 | bound_x = np.logical_and(
116 | lidar[:, 0] >= minx, lidar[:, 0] <= maxx)
117 | bound_y = np.logical_and(
118 | lidar[:, 1] >= miny, lidar[:, 1] <= maxy)
119 | bound_z = np.logical_and(
120 | lidar[:, 2] >= minz, lidar[:, 2] <= maxz)
121 | bound_box = np.logical_and(
122 | np.logical_and(bound_x, bound_y), bound_z)
123 | lidar[bound_box, 0:3] = point_transform(
124 | lidar[bound_box, 0:3], t_x, t_y, t_z, rz=t_rz)
125 | gt_box3d_corner[idx] = box_transform(
126 | gt_box3d_corner[[idx]], t_x, t_y, t_z, t_rz)
127 |
128 | gt_box3d = gt_box3d_corner
129 |
130 | elif choice < 7 and choice >= 4:
131 | # global rotation
132 | angle = np.random.uniform(-np.pi / 4, np.pi / 4)
133 | lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle)
134 | gt_box3d = box_transform(gt_box3d_corner, 0, 0, 0, r=angle)
135 |
136 | else:
137 | # global scaling
138 | factor = np.random.uniform(0.95, 1.05)
139 | lidar[:, 0:3] = lidar[:, 0:3] * factor
140 | gt_box3d = gt_box3d_corner * factor
141 |
142 | return lidar, gt_box3d
--------------------------------------------------------------------------------
/voxelnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | import torch
4 | from torch.autograd import Variable
5 | from config import config as cfg
6 |
7 | # conv2d + bn + relu
8 | class Conv2d(nn.Module):
9 |
10 | def __init__(self,in_channels,out_channels,k,s,p, activation=True, batch_norm=True):
11 | super(Conv2d, self).__init__()
12 | self.conv = nn.Conv2d(in_channels,out_channels,kernel_size=k,stride=s,padding=p)
13 | if batch_norm:
14 | self.bn = nn.BatchNorm2d(out_channels)
15 | else:
16 | self.bn = None
17 | self.activation = activation
18 | def forward(self,x):
19 | x = self.conv(x)
20 | if self.bn is not None:
21 | x=self.bn(x)
22 | if self.activation:
23 | return F.relu(x,inplace=True)
24 | else:
25 | return x
26 |
27 | # conv3d + bn + relu
28 | class Conv3d(nn.Module):
29 |
30 | def __init__(self, in_channels, out_channels, k, s, p, batch_norm=True):
31 | super(Conv3d, self).__init__()
32 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
33 | if batch_norm:
34 | self.bn = nn.BatchNorm3d(out_channels)
35 | else:
36 | self.bn = None
37 |
38 | def forward(self, x):
39 | x = self.conv(x)
40 | if self.bn is not None:
41 | x = self.bn(x)
42 |
43 | return F.relu(x, inplace=True)
44 |
45 | # Fully Connected Network
46 | class FCN(nn.Module):
47 |
48 | def __init__(self,cin,cout):
49 | super(FCN, self).__init__()
50 | self.cout = cout
51 | self.linear = nn.Linear(cin, cout)
52 | self.bn = nn.BatchNorm1d(cout)
53 |
54 | def forward(self,x):
55 | # KK is the stacked k across batch
56 | kk, t, _ = x.shape
57 | x = self.linear(x.view(kk*t,-1))
58 | x = F.relu(self.bn(x))
59 | return x.view(kk,t,-1)
60 |
61 | # Voxel Feature Encoding layer
62 | class VFE(nn.Module):
63 |
64 | def __init__(self,cin,cout):
65 | super(VFE, self).__init__()
66 | assert cout % 2 == 0
67 | self.units = cout // 2
68 | self.fcn = FCN(cin,self.units)
69 |
70 | def forward(self, x, mask):
71 | # point-wise feauture
72 | pwf = self.fcn(x)
73 | #locally aggregated feature
74 | laf = torch.max(pwf,1)[0].unsqueeze(1).repeat(1,cfg.T,1)
75 | # point-wise concat feature
76 | pwcf = torch.cat((pwf,laf),dim=2)
77 | # apply mask
78 | mask = mask.unsqueeze(2).repeat(1, 1, self.units * 2)
79 | pwcf = pwcf * mask.float()
80 |
81 | return pwcf
82 |
83 | # Stacked Voxel Feature Encoding
84 | class SVFE(nn.Module):
85 |
86 | def __init__(self):
87 | super(SVFE, self).__init__()
88 | self.vfe_1 = VFE(7,32)
89 | self.vfe_2 = VFE(32,128)
90 | self.fcn = FCN(128,128)
91 | def forward(self, x):
92 | mask = torch.ne(torch.max(x,2)[0], 0) # 滤掉为零的点
93 | x = self.vfe_1(x, mask)
94 | x = self.vfe_2(x, mask)
95 | x = self.fcn(x)
96 | # element-wise max pooling
97 | x = torch.max(x,1)[0]
98 | return x
99 |
100 | # Convolutional Middle Layer
101 | class CML(nn.Module):
102 | def __init__(self):
103 | super(CML, self).__init__()
104 | self.conv3d_1 = Conv3d(128, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
105 | self.conv3d_2 = Conv3d(64, 64, 3, s=(1, 1, 1), p=(0, 1, 1))
106 | self.conv3d_3 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
107 |
108 | def forward(self, x):
109 | x = self.conv3d_1(x)
110 | x = self.conv3d_2(x)
111 | x = self.conv3d_3(x)
112 | return x
113 |
114 | # Region Proposal Network
115 | class RPN(nn.Module):
116 | def __init__(self):
117 | super(RPN, self).__init__()
118 | self.block_1 = [Conv2d(128, 128, 3, 2, 1)]
119 | self.block_1 += [Conv2d(128, 128, 3, 1, 1) for _ in range(3)]
120 | self.block_1 = nn.Sequential(*self.block_1)
121 |
122 | self.block_2 = [Conv2d(128, 128, 3, 2, 1)]
123 | self.block_2 += [Conv2d(128, 128, 3, 1, 1) for _ in range(5)]
124 | self.block_2 = nn.Sequential(*self.block_2)
125 |
126 | self.block_3 = [Conv2d(128, 256, 3, 2, 1)]
127 | self.block_3 += [nn.Conv2d(256, 256, 3, 1, 1) for _ in range(5)]
128 | self.block_3 = nn.Sequential(*self.block_3)
129 |
130 | self.deconv_1 = nn.Sequential(nn.ConvTranspose2d(256, 256, 4, 4, 0),nn.BatchNorm2d(256))
131 | self.deconv_2 = nn.Sequential(nn.ConvTranspose2d(128, 256, 2, 2, 0),nn.BatchNorm2d(256))
132 | self.deconv_3 = nn.Sequential(nn.ConvTranspose2d(128, 256, 1, 1, 0),nn.BatchNorm2d(256))
133 |
134 | self.score_head = Conv2d(768, cfg.anchors_per_position, 1, 1, 0, activation=False, batch_norm=False)
135 | self.reg_head = Conv2d(768, 7 * cfg.anchors_per_position, 1, 1, 0, activation=False, batch_norm=False)
136 |
137 | def forward(self,x):
138 | x = self.block_1(x)
139 | x_skip_1 = x
140 | x = self.block_2(x)
141 | x_skip_2 = x
142 | x = self.block_3(x)
143 | x_0 = self.deconv_1(x)
144 | x_1 = self.deconv_2(x_skip_2)
145 | x_2 = self.deconv_3(x_skip_1)
146 | x = torch.cat((x_0,x_1,x_2), dim = 1)
147 | return self.score_head(x), self.reg_head(x)
148 |
149 | class VoxelNet(nn.Module):
150 |
151 | def __init__(self):
152 | super(VoxelNet, self).__init__()
153 | self.svfe = SVFE()
154 | self.cml = CML()
155 | self.rpn = RPN()
156 |
157 | def voxel_indexing(self, sparse_features, coords):
158 | dim = sparse_features.shape[-1]
159 |
160 | dense_feature = torch.zeros(cfg.N, cfg.D, cfg.H, cfg.W, dim).to(cfg.device)
161 |
162 | dense_feature[coords[:,0], coords[:,1], coords[:,2], coords[:,3], :]= sparse_features
163 |
164 | return dense_feature.permute(0, 4, 1, 2, 3)
165 |
166 | def forward(self, voxel_features, voxel_coords):
167 |
168 | # feature learning network
169 | vwfs = self.svfe(voxel_features)
170 | vwfs = self.voxel_indexing(vwfs,voxel_coords)
171 |
172 | # convolutional middle network
173 | cml_out = self.cml(vwfs)
174 |
175 | # region proposal network
176 |
177 | # merge the depth and feature dim into one, output probability score map and regression map
178 | score, reg = self.rpn(cml_out.view(cfg.N,-1,cfg.H, cfg.W))
179 | score = torch.sigmoid(score)
180 | score = score.permute((0, 2, 3, 1))
181 |
182 | return score, reg
183 |
184 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | from torch.autograd import Variable
4 | from config import config as cfg
5 | from data.kitti import KittiDataset
6 | import torch.utils.data as data
7 | import time
8 | from loss import VoxelLoss
9 | from voxelnet import VoxelNet
10 | import torch.optim as optim
11 | import torch.optim.lr_scheduler as lr_scheduler
12 | import torch.nn.init as init
13 | # from nms.pth_nms import pth_nms
14 | import numpy as np
15 | import torch.backends.cudnn
16 | from test_utils import draw_boxes
17 | from torch.utils.tensorboard import SummaryWriter
18 | import torchvision
19 | import os
20 | from utils import plot_grad
21 | import cv2
22 |
23 |
24 | import argparse
25 |
26 | parser = argparse.ArgumentParser(description='arg parser')
27 | parser.add_argument('--ckpt', type=str, default=None, help='pre_load_ckpt')
28 | parser.add_argument('--index', type=int, default=None, help='hyper_tag')
29 | parser.add_argument('--epoch', type=int , default=160, help="training epoch")
30 | args = parser.parse_args()
31 |
32 | def weights_init(m):
33 | if isinstance(m, nn.Conv2d):
34 | init.xavier_normal_(m.weight.data)
35 | m.bias.data.zero_()
36 |
37 | def detection_collate(batch):
38 | voxel_features = []
39 | voxel_coords = []
40 | gt_box3d_corner = []
41 | gt_box3d = []
42 |
43 | images = []
44 | calibs = []
45 | ids = []
46 | for i, sample in enumerate(batch):
47 | voxel_features.append(sample[0])
48 |
49 | voxel_coords.append(
50 | np.pad(sample[1], ((0, 0), (1, 0)),
51 | mode='constant', constant_values=i))
52 |
53 | gt_box3d_corner.append(sample[2])
54 | gt_box3d.append(sample[3])
55 |
56 | images.append(sample[4])
57 | calibs.append(sample[5])
58 | ids.append(sample[6])
59 | return np.concatenate(voxel_features), \
60 | np.concatenate(voxel_coords), \
61 | gt_box3d_corner,\
62 | gt_box3d,\
63 | images,\
64 | calibs, ids
65 |
66 | torch.backends.cudnn.enabled=True
67 |
68 | def train(net, model_name, hyper, cfg, writer, optimizer):
69 |
70 | dataset=KittiDataset(cfg=cfg,root='/data/cxg1/VoxelNet_pro/Data',set='train')
71 | data_loader = data.DataLoader(dataset, batch_size=cfg.N, num_workers=4, collate_fn=detection_collate, shuffle=True, \
72 | pin_memory=False)
73 |
74 | net.train()
75 |
76 | # define optimizer
77 |
78 | # define loss function
79 | criterion = VoxelLoss(alpha=hyper['alpha'], beta=hyper['beta'], gamma=hyper['gamma'])
80 |
81 | running_loss = 0.0
82 | running_reg_loss = 0.0
83 | running_conf_loss = 0.0
84 |
85 | # training process
86 | # batch_iterator = None
87 | epoch_size = len(dataset) // cfg.N
88 | print('Epoch size', epoch_size)
89 |
90 | scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(args.epoch*x) for x in (0.7, 0.9)], gamma=0.1)
91 | scheduler.last_epoch = cfg.last_epoch + 1
92 | optimizer.zero_grad()
93 |
94 | epoch = cfg.last_epoch
95 | while epoch < args.epoch :
96 | iteration = 0
97 | for voxel_features, voxel_coords, gt_box3d_corner, gt_box3d, images, calibs, ids in data_loader:
98 |
99 | # wrapper to variable
100 | voxel_features = torch.tensor(voxel_features).to(cfg.device)
101 |
102 | pos_equal_one = []
103 | neg_equal_one = []
104 | targets = []
105 |
106 | with torch.no_grad():
107 | for i in range(len(gt_box3d)):
108 | pos_equal_one_, neg_equal_one_, targets_ = dataset.cal_target(gt_box3d_corner[i], gt_box3d[i], cfg)
109 | pos_equal_one.append(pos_equal_one_)
110 | neg_equal_one.append(neg_equal_one_)
111 | targets.append(targets_)
112 |
113 | pos_equal_one = torch.stack(pos_equal_one, dim=0)
114 | neg_equal_one = torch.stack(neg_equal_one, dim=0)
115 | targets = torch.stack(targets, dim=0)
116 |
117 | # zero the parameter gradients
118 | # forward
119 | score, reg = net(voxel_features, voxel_coords)
120 |
121 | # calculate loss
122 | conf_loss, reg_loss, _, _, _ = criterion(reg, score, pos_equal_one, neg_equal_one, targets)
123 | loss = hyper['lambda'] * conf_loss + reg_loss
124 |
125 | running_conf_loss += conf_loss.item()
126 | running_reg_loss += reg_loss.item()
127 | running_loss += (reg_loss.item() + conf_loss.item())
128 |
129 | # backward
130 | loss.backward()
131 |
132 | # visualize gradient
133 | if iteration == 0 and epoch % 30 == 0:
134 | plot_grad(net.svfe.vfe_1.fcn.linear.weight.grad.view(-1), epoch, "vfe_1_grad_%d"%(epoch))
135 | plot_grad(net.svfe.vfe_2.fcn.linear.weight.grad.view(-1), epoch,"vfe_2_grad_%d"%(epoch))
136 | plot_grad(net.cml.conv3d_1.conv.weight.grad.view(-1), epoch,"conv3d_1_grad_%d"%(epoch))
137 | plot_grad(net.rpn.reg_head.conv.weight.grad.view(-1), epoch,"reghead_grad_%d"%(epoch))
138 | plot_grad(net.rpn.score_head.conv.weight.grad.view(-1), epoch,"scorehead_grad_%d"%(epoch))
139 |
140 | # update
141 | if iteration%10 == 9:
142 | for param in net.parameters():
143 | param.grad /= 10
144 | optimizer.step()
145 | optimizer.zero_grad()
146 |
147 | if iteration % 50 == 49:
148 | writer.add_scalar('total_loss', running_loss/50.0, epoch * epoch_size + iteration)
149 | writer.add_scalar('reg_loss', running_reg_loss/50.0, epoch * epoch_size + iteration)
150 | writer.add_scalar('conf_loss',running_conf_loss/50.0, epoch * epoch_size + iteration)
151 |
152 | print("epoch : " + repr(epoch) + ' || iter ' + repr(iteration) + ' || Loss: %.4f || Loc Loss: %.4f || Conf Loss: %.4f' % \
153 | ( running_loss/50.0, running_reg_loss/50.0, running_conf_loss/50.0))
154 |
155 | running_conf_loss = 0.0
156 | running_loss = 0.0
157 | running_reg_loss = 0.0
158 |
159 | # visualization
160 | if iteration == 2000:
161 | reg_de = reg.detach()
162 | score_de = score.detach()
163 | with torch.no_grad():
164 | pre_image = draw_boxes(reg_de, score_de, images, calibs, ids, 'pred')
165 | gt_image = draw_boxes(targets.float(), pos_equal_one.float(), images, calibs, ids, 'true')
166 | try :
167 | writer.add_image("gt_image_box {}".format(epoch), gt_image, global_step=epoch * epoch_size + iteration, dataformats='NHWC')
168 | writer.add_image("predict_image_box {}".format(epoch), pre_image, global_step=epoch * epoch_size + iteration, dataformats='NHWC')
169 | except :
170 | pass
171 | iteration += 1
172 | scheduler.step()
173 | epoch += 1
174 | if epoch % 30 == 0:
175 | torch.save({
176 | "epoch": epoch,
177 | 'model_state_dict': net.state_dict(),
178 | 'optimizer_state_dict': optimizer.state_dict(),
179 | }, os.path.join('./model', model_name+str(epoch)+'.pt'))
180 |
181 | hyper = {'alpha': 1.0,
182 | 'beta': 10.0,
183 | 'pos': 0.75,
184 | 'neg': 0.5,
185 | 'lr':0.005,
186 | 'momentum': 0.9,
187 | 'lambda': 2.0,
188 | 'gamma':2,
189 | 'weight_decay':0.00001}
190 |
191 | if __name__ == '__main__':
192 | pre_model = args.ckpt
193 |
194 | cfg.pos_threshold = hyper['pos']
195 | cfg.neg_threshold = hyper['neg']
196 | model_name = "model_%d"%(args.index+1)
197 |
198 | writer = SummaryWriter('runs/%s'%(model_name[:-4]))
199 |
200 | net = VoxelNet()
201 | net.to(cfg.device)
202 | optimizer = optim.SGD(net.parameters(), lr=hyper['lr'], momentum = hyper['momentum'], weight_decay=hyper['weight_decay'])
203 |
204 | if pre_model is not None and os.path.exists(os.path.join('./model',pre_model)) :
205 | ckpt = torch.load(os.path.join('./model',pre_model), map_location=cfg.device)
206 | net.load_state_dict(ckpt['model_state_dict'])
207 | cfg.last_epoch = ckpt['epoch']
208 | optimizer.load_state_dict(ckpt['optimizer_state_dict'])
209 | else :
210 | net.apply(weights_init)
211 | train(net, model_name, hyper, cfg, writer, optimizer)
212 | writer.close()
213 |
--------------------------------------------------------------------------------
/test_utils.py:
--------------------------------------------------------------------------------
1 | from utils import get_filtered_lidar, project_velo2rgb, draw_rgb_projections
2 | from config import config as cfg
3 | from data.kitti import KittiDataset
4 | import torch.utils.data as data
5 | # from nms.pth_nms import pth_nms
6 | import torch.nn.functional as F
7 | import numpy as np
8 | import torch.backends.cudnn
9 | import cv2
10 | import matplotlib.pyplot as plt
11 | from detectron2.layers import nms_rotated
12 | torch.backends.cudnn.benchmark=True
13 | torch.backends.cudnn.enabled=True
14 |
15 | def delta_to_boxes3d(deltas, anchors):
16 | # Input:
17 | # deltas: (N, w, l, 14)
18 | # feature_map_shape: (w, l)
19 | # anchors: (w, l, 2, 7)
20 |
21 | # Ouput:
22 | # boxes3d: (N, w*l*2, 7)
23 | N = deltas.shape[0]
24 | deltas = deltas.view(N, -1, 7)
25 | anchors = torch.FloatTensor(anchors)
26 | boxes3d = torch.zeros_like(deltas)
27 |
28 | if deltas.is_cuda:
29 | anchors = anchors.to(cfg.device)
30 | boxes3d = boxes3d.to(cfg.device)
31 |
32 | anchors_reshaped = anchors.view(-1, 7)
33 |
34 | anchors_d = torch.sqrt(anchors_reshaped[:, 4]**2 + anchors_reshaped[:, 5]**2)
35 |
36 | anchors_d = anchors_d.repeat(N, 2, 1).transpose(1,2)
37 | anchors_reshaped = anchors_reshaped.repeat(N, 1, 1)
38 |
39 | boxes3d[..., [0, 1]] = torch.mul(deltas[..., [0, 1]], anchors_d) + anchors_reshaped[..., [0, 1]]
40 | boxes3d[..., [2]] = torch.mul(deltas[..., [2]], anchors_reshaped[...,[3]]) + anchors_reshaped[..., [2]]
41 |
42 | boxes3d[..., [3, 4, 5]] = torch.exp(
43 | deltas[..., [3, 4, 5]]) * anchors_reshaped[..., [3, 4, 5]]
44 |
45 | boxes3d[..., 6] = deltas[..., 6] + anchors_reshaped[..., 6]
46 |
47 | return boxes3d
48 |
49 | def detection_collate(batch):
50 | lidars = []
51 | images = []
52 | calibs = []
53 |
54 | targets = []
55 | pos_equal_ones=[]
56 | ids = []
57 | for i, sample in enumerate(batch):
58 | lidars.append(sample[0])
59 | images.append(sample[1])
60 | calibs.append(sample[2])
61 | targets.append(sample[3])
62 | pos_equal_ones.append(sample[4])
63 | ids.append(sample[5])
64 | return lidars,images,calibs,\
65 | torch.cuda.FloatTensor(np.array(targets)), \
66 | torch.cuda.FloatTensor(np.array(pos_equal_ones)),\
67 | ids
68 |
69 |
70 | def box3d_center_to_corner_batch(boxes_center):
71 | # (N, 7) -> (N, 8)
72 | N = boxes_center.shape[0]
73 |
74 | x, y, z, h, w, l,theta = boxes_center.chunk(7, dim=1)
75 | sizes = (w * l).squeeze()
76 | zero_pad = torch.zeros((boxes_center.shape[0], 1)).to(cfg.device)
77 | one_pad = torch.ones((boxes_center.shape[0], 1)).to(cfg.device)
78 | box_shape = torch.cat([
79 | -0.5*l , -0.5*l, 0.5*l, 0.5*l, -0.5*l, -0.5*l, 0.5*l, 0.5*l,\
80 | 0.5*w , -0.5*w, -0.5*w, 0.5*w, 0.5*w, -0.5*w, -0.5*w, 0.5*w,\
81 | zero_pad, zero_pad, zero_pad, zero_pad, h, h, h, h \
82 | ], dim = 1).unsqueeze(2).reshape((-1, 3, 8))
83 | rotMat = torch.cat([
84 | torch.cos(theta), -torch.sin(theta), zero_pad,\
85 | torch.sin(theta), torch.cos(theta), zero_pad, \
86 | zero_pad, zero_pad, one_pad \
87 | ], dim=1).unsqueeze(2).reshape((-1, 3, 3))
88 | trans = torch.cat((x,y,z), dim=1).unsqueeze(2) # N * 3 * 1
89 | corner = torch.bmm(rotMat, box_shape) + trans
90 |
91 | return corner, sizes
92 |
93 | def box3d_corner_to_top_batch(boxes3d):
94 | # [N,8,3] -> [N,4,2] -> [N,8]
95 | box3d_top=boxes3d[:, :2, :4]
96 | return box3d_top.reshape((-1, 8))
97 |
98 | def nms(boxes_bottom, nms_threshold):
99 | # boxes_bottom (N, 9)
100 | x = torch.linspace(0,1,cfg.num_dim)
101 | y = torch.linspace(0,1,cfg.num_dim)
102 | X, Y = torch.meshgrid(x,y)
103 | coords = torch.cat([X.reshape(1, -1), Y.reshape(1, -1)], dim=0).to(cfg.device)
104 | query = []
105 |
106 | # filter nonsigular
107 | x1 = boxes_bottom[:, 0] - boxes_bottom[:, 1]
108 | y1 = boxes_bottom[:, 4] - boxes_bottom[:, 5]
109 | x2 = boxes_bottom[:, 2] - boxes_bottom[:, 1]
110 | y2 = boxes_bottom[:, 6] - boxes_bottom[:, 5]
111 |
112 | sizes = torch.sqrt(x1**2 + y1**2) * torch.sqrt(x2**2 + y2**2) # sizes of rectangle
113 |
114 | deter = torch.abs(x1*y2 - x2*y1)
115 | non_singular = torch.nonzero(deter > 1e-1)
116 | boxes_bottom = boxes_bottom[non_singular].reshape(-1, 10)
117 | sizes = sizes[non_singular].reshape(-1)
118 | if boxes_bottom.shape[0] == 0:
119 | return None
120 |
121 | alpha_1 = torch.stack([boxes_bottom[:, 0] - boxes_bottom[:, 1], boxes_bottom[:, 4] - boxes_bottom[:, 5]], dim = 1) # (x1, y1)
122 | alpha_2 = torch.stack([boxes_bottom[:, 2] - boxes_bottom[:, 1], boxes_bottom[:, 6] - boxes_bottom[:, 5]], dim = 1) # (x2, y2)
123 | trans = torch.stack([boxes_bottom[:, 1], boxes_bottom[:, 5]], dim = 1)
124 |
125 | while trans.shape[0] > 1:
126 | Rot1 = torch.stack([alpha_1[0, :].t(), alpha_2[0, :].t()], dim=1)
127 | train_trans = torch.mm(Rot1, coords).unsqueeze(0) + (trans[0, :] - trans[1:, :]).unsqueeze(2).repeat(1, 1, cfg.num_dim**2) # N * 2 * 2601
128 | Rot2 = torch.cat([alpha_1[1:, :].unsqueeze(2), alpha_2[1:, :].unsqueeze(2)], dim=2)
129 | translated = torch.bmm(torch.inverse(Rot2), train_trans) # N * 2 * 2601
130 |
131 | x_fit = (0<=translated[:, 0, :])*(translated[:, 0, :]<=1)
132 | y_fit = (0<=translated[:, 1, :])*(translated[:, 1, :]<=1)
133 | Intersection = (x_fit * y_fit).float()
134 |
135 | Intersection = torch.sum(Intersection, dim = 1) / cfg.num_dim**2
136 | IoU = sizes[0] * Intersection / (sizes[0] + sizes[1:] - sizes[0]*Intersection)
137 | index = torch.nonzero(IoU < nms_threshold) + 1
138 |
139 | trans = trans[index].reshape(-1, 2)
140 | alpha_1 = alpha_1[index].reshape(-1, 2)
141 | alpha_2 = alpha_2[index].reshape(-1, 2)
142 | query.append(boxes_bottom[0])
143 | boxes_bottom = boxes_bottom[index].reshape(-1, 10)
144 | sizes = sizes[index].reshape(-1)
145 |
146 | if boxes_bottom.shape[0] == 1:
147 | query.append(boxes_bottom[0])
148 | return torch.stack(query, dim=0)
149 |
150 | def draw_boxes(reg, prob, images, calibs, ids, tag):
151 | prob = prob.reshape(cfg.N, -1)
152 | batch_boxes3d = delta_to_boxes3d(reg, cfg.anchors)
153 | mask = torch.gt(prob, cfg.score_threshold)
154 | mask_reg = mask.unsqueeze(2).repeat(1, 1, 7)
155 |
156 | out_images = []
157 | for batch_id in range(cfg.N):
158 | boxes3d = torch.masked_select(batch_boxes3d[batch_id], mask_reg[batch_id]).view(-1, 7)
159 | scores = torch.masked_select(prob[batch_id], mask[batch_id])
160 |
161 | image = images[batch_id]
162 | calib = calibs[batch_id]
163 | id = ids[batch_id]
164 |
165 | if len(boxes3d) != 0:
166 | # boxes3d_corner, sizes = box3d_center_to_corner_batch(boxes3d)
167 | # boxes2d_bottom = box3d_corner_to_top_batch(boxes3d_corner)
168 | # boxes2d_score = torch.cat((boxes2d_bottom, scores.unsqueeze(1), torch.arange(0, len(boxes2d_bottom)).float().unsqueeze(1).to(cfg.device)), dim=1)
169 |
170 | # args = torch.argsort(boxes2d_score[:, 8], descending=True)
171 | # boxes2d_score = boxes2d_score[args]
172 |
173 | # vac = torch.nonzero(sizes > 1e-2)
174 | # boxes2d_score = boxes2d_score[vac].squeeze()
175 | # if boxes2d_score.shape[0] == 0:
176 | # out_images.append(image)
177 | # continue
178 |
179 | # NMS
180 | # boxes2d_score = nms(boxes2d_score, cfg.nms_threshold)
181 |
182 | index = nms_rotated(boxes3d[..., [0, 1, 5, 4, 6]], scores, 0.01)
183 | if len(index) is None:
184 | out_images.append(image)
185 | continue
186 | # boxes3d_corner_keep = boxes3d_corner[boxes2d_score[:, 9].long()]
187 | boxes3d = boxes3d[index]
188 | print("No. %d objects detected" % len(boxes3d))
189 | boxes3d_corner_keep, _ = box3d_center_to_corner_batch(boxes3d)
190 | boxes3d_corner_keep = boxes3d_corner_keep.cpu().numpy()
191 | boxes3d_corner_keep = np.transpose(boxes3d_corner_keep, (0, 2, 1))
192 | rgb_2D = project_velo2rgb(boxes3d_corner_keep, calib)
193 | img_with_box = draw_rgb_projections(image, rgb_2D, color=(0, 0, 255), thickness=1)
194 | out_images.append(img_with_box)
195 | return np.array(out_images)
196 |
197 | if __name__=='__main__':
198 | center = torch.tensor([[1, 1, 1, 2, 3, 4, 0.3]]).to(cfg.device)
199 | box3d_center_to_corner_batch(center)
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
--------------------------------------------------------------------------------
/data/kitti.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import os
3 | import os.path
4 | import torch.utils.data as data
5 | import utils
6 | from utils import box3d_corner_to_center_batch, anchors_center_to_corner, corner_to_standup_box2d_batch
7 | from data_aug import aug_data
8 | from box_overlaps import bbox_overlaps
9 | import numpy as np
10 | import cv2
11 | import torch
12 | from detectron2.layers.rotated_boxes import pairwise_iou_rotated
13 |
14 | class KittiDataset(data.Dataset):
15 | def __init__(self, cfg, root='./KITTI',set='train',type='velodyne_train'):
16 | self.type = type
17 | self.root = root
18 | self.data_path = os.path.join(root, 'training')
19 | self.lidar_path = os.path.join(self.data_path, "crop")
20 | self.image_path = os.path.join(self.data_path, "image_2/")
21 | self.calib_path = os.path.join(self.data_path, "calib")
22 | self.label_path = os.path.join(self.data_path, "label_2")
23 |
24 | with open(os.path.join(self.data_path, '%s.txt' % set)) as f:
25 | self.file_list = f.read().splitlines()
26 |
27 | self.T = cfg.T
28 | self.vd = cfg.vd
29 | self.vh = cfg.vh
30 | self.vw = cfg.vw
31 | self.xrange = cfg.xrange
32 | self.yrange = cfg.yrange
33 | self.zrange = cfg.zrange
34 | self.anchors = torch.tensor(cfg.anchors.reshape(-1,7)).float().to(cfg.device)
35 | self.anchors_xylwr = self.anchors[..., [0, 1, 5, 4, 6]].contiguous()
36 | self.feature_map_shape = (int(cfg.H / 2), int(cfg.W / 2))
37 | self.anchors_per_position = cfg.anchors_per_position
38 | self.pos_threshold = cfg.pos_threshold
39 | self.neg_threshold = cfg.neg_threshold
40 |
41 | def cal_target(self, gt_box3d, gt_xyzhwlr, cfg):
42 | # Input:
43 | # labels: (N,)
44 | # feature_map_shape: (w, l)
45 | # anchors: (w, l, 2, 7)
46 | # Output:
47 | # pos_equal_one (w, l, 2)
48 | # neg_equal_one (w, l, 2)
49 | # targets (w, l, 14)
50 | # attention: cal IoU on birdview
51 |
52 | anchors_d = torch.sqrt(self.anchors[:, 4] ** 2 + self.anchors[:, 5] ** 2).to(cfg.device)
53 | # denote whether the anchor box is pos or neg
54 | pos_equal_one = torch.zeros((*self.feature_map_shape, 2)).to(cfg.device)
55 | neg_equal_one = torch.zeros((*self.feature_map_shape, 2)).to(cfg.device)
56 |
57 | targets = torch.zeros((*self.feature_map_shape, 14)).to(cfg.device)
58 |
59 | gt_xyzhwlr = torch.tensor(gt_xyzhwlr, requires_grad=False).float().to(cfg.device)
60 | gt_xylwr = gt_xyzhwlr[..., [0, 1, 5, 4, 6]]
61 |
62 | # BOTTLENECK
63 |
64 | iou = pairwise_iou_rotated(
65 | self.anchors_xylwr,
66 | gt_xylwr.contiguous()
67 | ).cpu().numpy() # (gt - anchor)
68 |
69 | id_highest = np.argmax(iou.T, axis=1) # the maximum anchor's ID
70 | id_highest_gt = np.arange(iou.T.shape[0])
71 | mask = iou.T[id_highest_gt, id_highest] > 0 # make sure all the iou is positive
72 | id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask]
73 |
74 | # find anchor iou > cfg.XXX_POS_IOU
75 | id_pos, id_pos_gt = np.where(iou > self.pos_threshold)
76 | # find anchor iou < cfg.XXX_NEG_IOU
77 | id_neg = np.where(np.sum(iou < self.neg_threshold,
78 | axis=1) == iou.shape[1])[0] # anchor doesn't match ant ground truth
79 |
80 | for gt in range(iou.shape[1]):
81 | if gt not in id_pos_gt and iou[id_highest[gt], gt] > self.neg_threshold:
82 | id_pos = np.append(id_pos, id_highest[gt])
83 | id_pos_gt = np.append(id_pos_gt, gt)
84 |
85 | # sample the negative points to keep ratio as 1:10 with minimum 500
86 | num_neg = 10 * id_pos.shape[0]
87 | if num_neg < 500:
88 | num_neg = 500
89 | if id_neg.shape[0] > num_neg:
90 | np.random.shuffle(id_neg)
91 | id_neg = id_neg[:num_neg]
92 | # cal the target and set the equal one
93 | index_x, index_y, index_z = np.unravel_index(
94 | id_pos, (*self.feature_map_shape, self.anchors_per_position))
95 | pos_equal_one[index_x, index_y, index_z] = 1
96 | # ATTENTION: index_z should be np.array
97 |
98 | # parameterize the ground truth box relative to anchor boxs
99 | targets[index_x, index_y, np.array(index_z) * 7] = \
100 | (gt_xyzhwlr[id_pos_gt, 0] - self.anchors[id_pos, 0]) / anchors_d[id_pos]
101 | targets[index_x, index_y, np.array(index_z) * 7 + 1] = \
102 | (gt_xyzhwlr[id_pos_gt, 1] - self.anchors[id_pos, 1]) / anchors_d[id_pos]
103 | targets[index_x, index_y, np.array(index_z) * 7 + 2] = \
104 | (gt_xyzhwlr[id_pos_gt, 2] - self.anchors[id_pos, 2]) / self.anchors[id_pos, 3]
105 | targets[index_x, index_y, np.array(index_z) * 7 + 3] = torch.log(
106 | gt_xyzhwlr[id_pos_gt, 3] / self.anchors[id_pos, 3])
107 | targets[index_x, index_y, np.array(index_z) * 7 + 4] = torch.log(
108 | gt_xyzhwlr[id_pos_gt, 4] / self.anchors[id_pos, 4])
109 | targets[index_x, index_y, np.array(index_z) * 7 + 5] = torch.log(
110 | gt_xyzhwlr[id_pos_gt, 5] / self.anchors[id_pos, 5])
111 | targets[index_x, index_y, np.array(index_z) * 7 + 6] = (
112 | gt_xyzhwlr[id_pos_gt, 6] - self.anchors[id_pos, 6])
113 | index_x, index_y, index_z = np.unravel_index(
114 | id_neg, (*self.feature_map_shape, self.anchors_per_position))
115 | neg_equal_one[index_x, index_y, index_z] = 1
116 |
117 | return pos_equal_one, neg_equal_one, targets
118 |
119 | def preprocess(self, lidar):
120 | # This func cluster the points in the same voxel.
121 | # shuffling the points
122 | np.random.shuffle(lidar)
123 |
124 | voxel_coords = ((lidar[:, :3] - np.array([self.xrange[0], self.yrange[0], self.zrange[0]])) / (
125 | self.vw, self.vh, self.vd)).astype(np.int32)
126 |
127 | # convert to (D, H, W)
128 | voxel_coords = voxel_coords[:,[2,1,0]]
129 | voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0, \
130 | return_inverse=True, return_counts=True)
131 |
132 | voxel_features = []
133 |
134 | for i in range(len(voxel_coords)):
135 | voxel = np.zeros((self.T, 7), dtype=np.float32)
136 | pts = lidar[inv_ind == i]
137 | if voxel_counts[i] > self.T:
138 | pts = pts[:self.T, :]
139 | voxel_counts[i] = self.T
140 | # augment the points
141 | voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - np.mean(pts[:, :3], 0)), axis=1)
142 | voxel_features.append(voxel)
143 | return np.array(voxel_features), voxel_coords
144 |
145 | def __getitem__(self, i):
146 |
147 | lidar_file = self.lidar_path + '/' + self.file_list[i] + '.bin'
148 | calib_file = self.calib_path + '/' + self.file_list[i] + '.txt'
149 | label_file = self.label_path + '/' + self.file_list[i] + '.txt'
150 | image_file = self.image_path + '/' + self.file_list[i] + '.png'
151 |
152 | calib = utils.load_kitti_calib(calib_file)
153 | Tr = calib['Tr_velo2cam']
154 | gt_box3d_corner, gt_box3d = utils.load_kitti_label(label_file, Tr)
155 | lidar = np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
156 |
157 |
158 | if self.type == 'velodyne_train':
159 | image = cv2.imread(image_file)
160 |
161 | # data augmentation
162 | # lidar, gt_box3d = aug_data(lidar, gt_box3d)
163 |
164 | # specify a range
165 | lidar, gt_box3d_corner, gt_box3d = utils.get_filtered_lidar(lidar, gt_box3d_corner, gt_box3d)
166 |
167 | # voxelize
168 | voxel_features, voxel_coords = self.preprocess(lidar)
169 |
170 | # bounding-box encoding
171 | # pos_equal_one, neg_equal_one, targets = self.cal_target(gt_box3d_corner, gt_box3d)
172 |
173 | return voxel_features, voxel_coords, gt_box3d_corner, gt_box3d, image, calib, self.file_list[i] # pos_equal_one, neg_equal_one, targets, image, calib, self.file_list[i]
174 |
175 | elif self.type == 'velodyne_test':
176 | image = cv2.imread(image_file)
177 |
178 | lidar, gt_box3d = utils.get_filtered_lidar(lidar, gt_box3d)
179 |
180 | voxel_features, voxel_coords = self.preprocess(lidar)
181 |
182 | return voxel_features, voxel_coords, image, calib, self.file_list[i]
183 |
184 | else:
185 | raise ValueError('the type invalid')
186 |
187 |
188 | def __len__(self):
189 | return len(self.file_list)
190 |
191 |
192 |
193 |
194 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | from config import config as cfg
4 | import math
5 | # import mayavi.mlab as mlab
6 | import cv2
7 | from box_overlaps import *
8 | from data_aug import aug_data
9 | import matplotlib.pyplot as plt
10 | import matplotlib
11 | import os
12 | import torch
13 |
14 | def get_filtered_lidar(lidar, boxes3d=None, gt_box3d=None):
15 |
16 | pxs = lidar[:, 0]
17 | pys = lidar[:, 1]
18 | pzs = lidar[:, 2]
19 |
20 | filter_x = np.where((pxs >= cfg.xrange[0]) & (pxs < cfg.xrange[1]))[0]
21 | filter_y = np.where((pys >= cfg.yrange[0]) & (pys < cfg.yrange[1]))[0]
22 | filter_z = np.where((pzs >= cfg.zrange[0]) & (pzs < cfg.zrange[1]))[0]
23 | filter_xy = np.intersect1d(filter_x, filter_y)
24 | filter_xyz = np.intersect1d(filter_xy, filter_z)
25 |
26 | if boxes3d is not None:
27 | box_x = (boxes3d[:, :, 0] >= cfg.xrange[0]) & (boxes3d[:, :, 0] < cfg.xrange[1])
28 | box_y = (boxes3d[:, :, 1] >= cfg.yrange[0]) & (boxes3d[:, :, 1] < cfg.yrange[1])
29 | box_z = (boxes3d[:, :, 2] >= cfg.zrange[0]) & (boxes3d[:, :, 2] < cfg.zrange[1])
30 | box_xyz = np.sum(box_x & box_y & box_z,axis=1)
31 |
32 | return lidar[filter_xyz], boxes3d[box_xyz>0], gt_box3d[box_xyz>0]
33 |
34 | return lidar[filter_xyz]
35 |
36 | def lidar_to_bev(lidar):
37 |
38 | X0, Xn = 0, cfg.W
39 | Y0, Yn = 0, cfg.H
40 | Z0, Zn = 0, cfg.D
41 |
42 | width = Yn - Y0
43 | height = Xn - X0
44 | channel = Zn - Z0 + 2
45 |
46 | pxs = lidar[:, 0]
47 | pys = lidar[:, 1]
48 | pzs = lidar[:, 2]
49 | prs = lidar[:, 3]
50 |
51 | qxs=((pxs-cfg.xrange[0])/cfg.vw).astype(np.int32)
52 | qys=((pys-cfg.yrange[0])/cfg.vh).astype(np.int32)
53 | qzs=((pzs-cfg.zrange[0])/cfg.vd).astype(np.int32)
54 |
55 | print('height,width,channel=%d,%d,%d'%(height,width,channel))
56 | top = np.zeros(shape=(height,width,channel), dtype=np.float32)
57 | mask = np.ones(shape=(height,width,channel-1), dtype=np.float32)* -5
58 |
59 | for i in range(len(pxs)):
60 | top[-qxs[i], -qys[i], -1]= 1+ top[-qxs[i], -qys[i], -1]
61 | if pzs[i]>mask[-qxs[i], -qys[i],qzs[i]]:
62 | top[-qxs[i], -qys[i], qzs[i]] = max(0,pzs[i]-cfg.zrange[0])
63 | mask[-qxs[i], -qys[i],qzs[i]]=pzs[i]
64 | if pzs[i]>mask[-qxs[i], -qys[i],-1]:
65 | mask[-qxs[i], -qys[i],-1]=pzs[i]
66 | top[-qxs[i], -qys[i], -2]=prs[i]
67 |
68 |
69 | top[:,:,-1] = np.log(top[:,:,-1]+1)/math.log(64)
70 |
71 | if 1:
72 | # top_image = np.sum(top[:,:,:-1],axis=2)
73 | density_image = top[:,:,-1]
74 | density_image = density_image-np.min(density_image)
75 | density_image = (density_image/np.max(density_image)*255).astype(np.uint8)
76 | # top_image = np.dstack((top_image, top_image, top_image)).astype(np.uint8)
77 |
78 |
79 | return top, density_image
80 |
81 |
82 | # def draw_lidar(lidar, is_grid=False, is_axis = True, is_top_region=True, fig=None):
83 |
84 | # pxs=lidar[:,0]
85 | # pys=lidar[:,1]
86 | # pzs=lidar[:,2]
87 | # prs=lidar[:,3]
88 |
89 | # if fig is None: fig = mlab.figure(figure=None, bgcolor=(0,0,0), fgcolor=None, engine=None, size=(1000, 500))
90 |
91 | # mlab.points3d(
92 | # pxs, pys, pzs, prs,
93 | # mode='point', # 'point' 'sphere'
94 | # colormap='gnuplot', #'bone', #'spectral', #'copper',
95 | # scale_factor=1,
96 | # figure=fig)
97 |
98 | # #draw grid
99 | # if is_grid:
100 | # mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=0.2)
101 |
102 | # for y in np.arange(-50,50,1):
103 | # x1,y1,z1 = -50, y, 0
104 | # x2,y2,z2 = 50, y, 0
105 | # mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig)
106 |
107 | # for x in np.arange(-50,50,1):
108 | # x1,y1,z1 = x,-50, 0
109 | # x2,y2,z2 = x, 50, 0
110 | # mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig)
111 |
112 | # #draw axis
113 | # if is_grid:
114 | # mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=0.2)
115 |
116 | # axes=np.array([
117 | # [2.,0.,0.,0.],
118 | # [0.,2.,0.,0.],
119 | # [0.,0.,2.,0.],
120 | # ],dtype=np.float64)
121 | # fov=np.array([ ## : now is 45 deg. use actual setting later ...
122 | # [20., 20., 0.,0.],
123 | # [20.,-20., 0.,0.],
124 | # ],dtype=np.float64)
125 |
126 |
127 | # mlab.plot3d([0, axes[0,0]], [0, axes[0,1]], [0, axes[0,2]], color=(1,0,0), tube_radius=None, figure=fig)
128 | # mlab.plot3d([0, axes[1,0]], [0, axes[1,1]], [0, axes[1,2]], color=(0,1,0), tube_radius=None, figure=fig)
129 | # mlab.plot3d([0, axes[2,0]], [0, axes[2,1]], [0, axes[2,2]], color=(0,0,1), tube_radius=None, figure=fig)
130 | # mlab.plot3d([0, fov[0,0]], [0, fov[0,1]], [0, fov[0,2]], color=(1,1,1), tube_radius=None, line_width=1, figure=fig)
131 | # mlab.plot3d([0, fov[1,0]], [0, fov[1,1]], [0, fov[1,2]], color=(1,1,1), tube_radius=None, line_width=1, figure=fig)
132 |
133 | # #draw top_image feature area
134 | # if is_top_region:
135 | # x1 = cfg.xrange[0]
136 | # x2 = cfg.xrange[1]
137 | # y1 = cfg.yrange[0]
138 | # y2 = cfg.yrange[1]
139 | # mlab.plot3d([x1, x1], [y1, y2], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig)
140 | # mlab.plot3d([x2, x2], [y1, y2], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig)
141 | # mlab.plot3d([x1, x2], [y1, y1], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig)
142 | # mlab.plot3d([x1, x2], [y2, y2], [0,0], color=(0.5,0.5,0.5), tube_radius=None, line_width=1, figure=fig)
143 |
144 | # mlab.orientation_axes()
145 | # mlab.view(azimuth=180,elevation=None,distance=50,focalpoint=[ 12.0909996 , -1.04700089, -2.03249991])#2.0909996 , -1.04700089, -2.03249991
146 |
147 | # return fig
148 |
149 | # def draw_gt_boxes3d(gt_boxes3d, fig, color=(1,0,0), line_width=2):
150 |
151 | # num = len(gt_boxes3d)
152 | # for n in range(num):
153 | # b = gt_boxes3d[n]
154 |
155 | # for k in range(0,4):
156 |
157 | # i,j=k,(k+1)%4
158 | # mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], color=color, tube_radius=None, line_width=line_width, figure=fig)
159 |
160 | # i,j=k+4,(k+3)%4 + 4
161 | # mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], color=color, tube_radius=None, line_width=line_width, figure=fig)
162 |
163 | # i,j=k,k+4
164 | # mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], color=color, tube_radius=None, line_width=line_width, figure=fig)
165 |
166 | # mlab.view(azimuth=180,elevation=None,distance=50,focalpoint=[ 12.0909996 , -1.04700089, -2.03249991])#2.0909996 , -1.04700089, -2.03249991
167 |
168 | def project_velo2rgb(velo,calib):
169 | T=np.zeros([4,4],dtype=np.float32)
170 | T[:3,:]=calib['Tr_velo2cam']
171 | T[3,3]=1
172 | R=np.zeros([4,4],dtype=np.float32)
173 | R[:3,:3]=calib['R0']
174 | R[3,3]=1
175 | num=len(velo)
176 | projections = np.zeros((num,8,2), dtype=np.int32)
177 | for i in range(len(velo)):
178 | box3d=np.ones([8,4],dtype=np.float32)
179 | box3d[:,:3]=velo[i]
180 | M=np.dot(calib['P2'],R)
181 | M=np.dot(M,T)
182 | box2d=np.dot(M,box3d.T)
183 | box2d=box2d[:2,:].T/box2d[2,:].reshape(8,1)
184 | projections[i] = box2d
185 | return projections
186 |
187 | def draw_rgb_projections(image, projections, color=(255,255,255), thickness=2, darker=1):
188 |
189 | img = image.copy()*darker
190 | num=len(projections)
191 | forward_color=(255,255,0)
192 | for n in range(num):
193 | qs = projections[n]
194 | for k in range(0,4):
195 | i,j=k,(k+1)%4
196 |
197 | cv2.line(img, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.LINE_AA)
198 |
199 | i,j=k+4,(k+1)%4 + 4
200 | cv2.line(img, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.LINE_AA)
201 |
202 | i,j=k,k+4
203 | cv2.line(img, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.LINE_AA)
204 |
205 | cv2.line(img, (qs[3,0],qs[3,1]), (qs[7,0],qs[7,1]), forward_color, thickness, cv2.LINE_AA)
206 | cv2.line(img, (qs[7,0],qs[7,1]), (qs[6,0],qs[6,1]), forward_color, thickness, cv2.LINE_AA)
207 | cv2.line(img, (qs[6,0],qs[6,1]), (qs[2,0],qs[2,1]), forward_color, thickness, cv2.LINE_AA)
208 | cv2.line(img, (qs[2,0],qs[2,1]), (qs[3,0],qs[3,1]), forward_color, thickness, cv2.LINE_AA)
209 | cv2.line(img, (qs[3,0],qs[3,1]), (qs[6,0],qs[6,1]), forward_color, thickness, cv2.LINE_AA)
210 | cv2.line(img, (qs[2,0],qs[2,1]), (qs[7,0],qs[7,1]), forward_color, thickness, cv2.LINE_AA)
211 |
212 | return img
213 |
214 | def _quantize_coords(x, y):
215 | xx = cfg.H - int((y - cfg.yrange[0]) / cfg.vh)
216 | yy = cfg.W - int((x - cfg.xrange[0]) / cfg.vw)
217 | return xx, yy
218 |
219 | def draw_polygons(image, polygons,color=(255,255,255), thickness=1, darken=1):
220 |
221 | img = image.copy() * darken
222 | for polygon in polygons:
223 | tup0, tup1, tup2, tup3 = [_quantize_coords(*tup) for tup in polygon]
224 | cv2.line(img, tup0, tup1, color, thickness, cv2.LINE_AA)
225 | cv2.line(img, tup1, tup2, color, thickness, cv2.LINE_AA)
226 | cv2.line(img, tup2, tup3, color, thickness, cv2.LINE_AA)
227 | cv2.line(img, tup3, tup0, color, thickness, cv2.LINE_AA)
228 | return img
229 |
230 | def draw_rects(image, rects, color=(255,255,255), thickness=1, darken=1):
231 |
232 | img = image.copy() * darken
233 | for rect in rects:
234 | tup0,tup1 = [_quantize_coords(*tup) for tup in list(zip(rect[0::2], rect[1::2]))]
235 | cv2.rectangle(img, tup0, tup1, color, thickness, cv2.LINE_AA)
236 | return img
237 |
238 | def load_kitti_calib(calib_file):
239 | """
240 | load projection matrix
241 | """
242 | with open(calib_file) as fi:
243 | lines = fi.readlines()
244 | assert (len(lines) == 8)
245 |
246 | obj = lines[0].strip().split(' ')[1:]
247 | P0 = np.array(obj, dtype=np.float32)
248 | obj = lines[1].strip().split(' ')[1:]
249 | P1 = np.array(obj, dtype=np.float32)
250 | obj = lines[2].strip().split(' ')[1:]
251 | P2 = np.array(obj, dtype=np.float32)
252 | obj = lines[3].strip().split(' ')[1:]
253 | P3 = np.array(obj, dtype=np.float32)
254 | obj = lines[4].strip().split(' ')[1:]
255 | R0 = np.array(obj, dtype=np.float32)
256 | obj = lines[5].strip().split(' ')[1:]
257 | Tr_velo_to_cam = np.array(obj, dtype=np.float32)
258 | obj = lines[6].strip().split(' ')[1:]
259 | Tr_imu_to_velo = np.array(obj, dtype=np.float32)
260 |
261 | return {'P2': P2.reshape(3, 4),
262 | 'R0': R0.reshape(3, 3),
263 | 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)}
264 |
265 | def angle_in_limit(angle):
266 | # To limit the angle in -pi/2 - pi/2
267 | limit_degree = 5
268 | while angle >= np.pi / 2:
269 | angle -= np.pi
270 | while angle < -np.pi / 2:
271 | angle += np.pi
272 | if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi:
273 | angle = np.pi / 2
274 | return angle
275 |
276 | def box3d_cam_to_velo(box3d, Tr):
277 |
278 | def project_cam2velo(cam, Tr):
279 | T = np.zeros([4, 4], dtype=np.float32)
280 | T[:3, :] = Tr
281 | T[3, 3] = 1
282 | T_inv = np.linalg.inv(T)
283 | lidar_loc_ = np.dot(T_inv, cam)
284 | lidar_loc = lidar_loc_[:3]
285 | return lidar_loc.reshape(1, 3)
286 |
287 | h,w,l,tx,ty,tz,ry = [float(i) for i in box3d]
288 | cam = np.ones([4, 1])
289 | cam[0] = tx
290 | cam[1] = ty
291 | cam[2] = tz
292 | t_lidar = project_cam2velo(cam, Tr)
293 |
294 | Box = np.array([[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2],
295 | [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2],
296 | [0, 0, 0, 0, h, h, h, h]])
297 |
298 | rz = angle_in_limit(-ry-np.pi/2)
299 |
300 | rotMat = np.array([
301 | [np.cos(rz), -np.sin(rz), 0.0],
302 | [np.sin(rz), np.cos(rz), 0.0],
303 | [0.0, 0.0, 1.0]])
304 |
305 | velo_box = np.dot(rotMat, Box)
306 |
307 | cornerPosInVelo = velo_box + np.tile(t_lidar, (8, 1)).T
308 |
309 | box3d_corner = cornerPosInVelo.transpose()
310 |
311 | return box3d_corner.astype(np.float32), np.array([t_lidar[0,0], t_lidar[0,1], t_lidar[0,2], h, w, l, rz])
312 |
313 | def anchors_center_to_corner(anchors):
314 | N = anchors.shape[0]
315 | anchor_corner = np.zeros((N, 4, 2))
316 | for i in range(N):
317 | anchor = anchors[i]
318 | translation = anchor[0:3]
319 | h, w, l = anchor[3:6]
320 | rz = anchor[-1]
321 | Box = np.array([
322 | [-l / 2, -l / 2, l / 2, l / 2], \
323 | [w / 2, -w / 2, -w / 2, w / 2]])
324 | # re-create 3D bounding box in velodyne coordinate system
325 | rotMat = np.array([
326 | [np.cos(rz), -np.sin(rz)],
327 | [np.sin(rz), np.cos(rz)]])
328 | velo_box = np.dot(rotMat, Box)
329 | cornerPosInVelo = velo_box + np.tile(translation[:2], (4, 1)).T
330 | box2d = cornerPosInVelo.transpose()
331 | anchor_corner[i] = box2d
332 | return anchor_corner
333 |
334 |
335 | def corner_to_standup_box2d_batch(boxes_corner):
336 | # (N, 4, 2) -> (N, 4) x1, y1, x2, y2
337 | N = boxes_corner.shape[0]
338 | standup_boxes2d = np.zeros((N, 4))
339 | standup_boxes2d[:, 0] = np.min(boxes_corner[:, :, 0], axis=1)
340 | standup_boxes2d[:, 1] = np.min(boxes_corner[:, :, 1], axis=1)
341 | standup_boxes2d[:, 2] = np.max(boxes_corner[:, :, 0], axis=1)
342 | standup_boxes2d[:, 3] = np.max(boxes_corner[:, :, 1], axis=1)
343 | return standup_boxes2d
344 |
345 | def box3d_corner_to_center_batch(box3d_corner):
346 | # (N, 8, 3) -> (N, 7)
347 | assert box3d_corner.ndim == 3
348 | batch_size = box3d_corner.shape[0]
349 |
350 | xyz = np.mean(box3d_corner[:, :4, :], axis=1)
351 |
352 | h = abs(np.mean(box3d_corner[:, 4:, 2] - box3d_corner[:, :4, 2], axis=1, keepdims=True))
353 |
354 | w = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 1, [0, 1]]) ** 2, axis=1, keepdims=True)) +
355 | np.sqrt(np.sum((box3d_corner[:, 2, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) +
356 | np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 5, [0, 1]]) ** 2, axis=1, keepdims=True)) +
357 | np.sqrt(np.sum((box3d_corner[:, 6, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4
358 |
359 | l = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) +
360 | np.sqrt(np.sum((box3d_corner[:, 1, [0, 1]] - box3d_corner[:, 2, [0, 1]]) ** 2, axis=1, keepdims=True)) +
361 | np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True)) +
362 | np.sqrt(np.sum((box3d_corner[:, 5, [0, 1]] - box3d_corner[:, 6, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4
363 |
364 | theta = (np.arctan2(box3d_corner[:, 2, 1] - box3d_corner[:, 1, 1],
365 | box3d_corner[:, 2, 0] - box3d_corner[:, 1, 0]) +
366 | np.arctan2(box3d_corner[:, 3, 1] - box3d_corner[:, 0, 1],
367 | box3d_corner[:, 3, 0] - box3d_corner[:, 0, 0]) +
368 | np.arctan2(box3d_corner[:, 2, 0] - box3d_corner[:, 3, 0],
369 | box3d_corner[:, 3, 1] - box3d_corner[:, 2, 1]) +
370 | np.arctan2(box3d_corner[:, 1, 0] - box3d_corner[:, 0, 0],
371 | box3d_corner[:, 0, 1] - box3d_corner[:, 1, 1]))[:, np.newaxis] / 4
372 |
373 | return np.concatenate([xyz, h, w, l, theta], axis=1).reshape(batch_size, 7)
374 |
375 | def get_anchor3d(anchors):
376 | num = anchors.shape[0]
377 | anchors3d = np.zeros((num,8,3))
378 | anchors3d[:, :4, :2] = anchors
379 | anchors3d[:, :, 2] = cfg.z_a
380 | anchors3d[:, 4:, :2] = anchors
381 | anchors3d[:, 4:, 2] = cfg.z_a + cfg.h_a
382 | return anchors3d
383 |
384 | def load_kitti_label(label_file, Tr):
385 |
386 | with open(label_file,'r') as f:
387 | lines = f.readlines()
388 |
389 | gt_boxes3d_corner = []
390 | gt_boxes3d = []
391 |
392 | num_obj = len(lines)
393 |
394 | for j in range(num_obj):
395 | obj = lines[j].strip().split(' ')
396 |
397 | obj_class = obj[0].strip()
398 | if obj_class not in cfg.class_list:
399 | continue
400 |
401 | box3d_corner, box3d_vertice = box3d_cam_to_velo(obj[8:], Tr)
402 |
403 | gt_boxes3d.append(box3d_vertice)
404 | gt_boxes3d_corner.append(box3d_corner)
405 |
406 | gt_boxes3d_corner = np.array(gt_boxes3d_corner).reshape(-1,8,3)
407 | gt_boxes3d = np.array(gt_boxes3d).reshape(-1, 7)
408 |
409 | return gt_boxes3d_corner, gt_boxes3d
410 |
411 |
412 | def test():
413 | import os
414 | import glob
415 | import matplotlib.pyplot as plt
416 |
417 | lidar_path = os.path.join('./data/KITTI/training', "crop/")
418 | image_path = os.path.join('./data/KITTI/training', "image_2/")
419 | calib_path = os.path.join('./data/KITTI/training', "calib/")
420 | label_path = os.path.join('./data/KITTI/training', "label_2/")
421 |
422 |
423 | file=[i.strip().split('/')[-1][:-4] for i in sorted(os.listdir(label_path))]
424 |
425 | i=2600
426 |
427 | lidar_file = lidar_path + '/' + file[i] + '.bin'
428 | calib_file = calib_path + '/' + file[i] + '.txt'
429 | label_file = label_path + '/' + file[i] + '.txt'
430 | image_file = image_path + '/' + file[i] + '.png'
431 |
432 | image = cv2.imread(image_file)
433 | print("Processing: ", lidar_file)
434 | lidar = np.fromfile(lidar_file, dtype=np.float32)
435 | lidar = lidar.reshape((-1, 4))
436 |
437 | calib = load_kitti_calib(calib_file)
438 | gt_box3d = load_kitti_label(label_file, calib['Tr_velo2cam'])
439 |
440 | # augmentation
441 | #lidar, gt_box3d = aug_data(lidar, gt_box3d)
442 |
443 | # filtering
444 | lidar, gt_box3d = get_filtered_lidar(lidar, gt_box3d)
445 |
446 | # view in point cloud
447 |
448 | # fig = draw_lidar(lidar, is_grid=False, is_top_region=True)
449 | # draw_gt_boxes3d(gt_boxes3d=gt_box3d, fig=fig)
450 | # mlab.show()
451 |
452 | # view in image
453 |
454 | # gt_3dTo2D = project_velo2rgb(gt_box3d, calib)
455 | # img_with_box = draw_rgb_projections(image,gt_3dTo2D, color=(0,0,255),thickness=1)
456 | # plt.imshow(img_with_box[:,:,[2,1,0]])
457 | # plt.show()
458 |
459 | # view in bird-eye view
460 |
461 | top_new, density_image=lidar_to_bev(lidar)
462 | # gt_box3d_top = corner_to_standup_box2d_batch(gt_box3d)
463 | # density_with_box = draw_rects(density_image,gt_box3d_top)
464 | density_with_box = draw_polygons(density_image,gt_box3d[:,:4,:2])
465 | plt.imshow(density_with_box,cmap='gray')
466 | plt.show()
467 |
468 | def plot_grad(grad, epoch, name):
469 | if not os.path.exists('./vis/%d'%(epoch)):
470 | os.mkdir('./vis/%d'%(epoch))
471 | plt.figure()
472 | grad = grad.detach().cpu().numpy()
473 | matplotlib.rcParams['font.sans-serif']=['SimHei'] # 用黑体显示中文
474 | matplotlib.rcParams['axes.unicode_minus']=False # 正常显示负号
475 | plt.hist(grad, bins=40, facecolor="blue", edgecolor="black", alpha=0.7)
476 | # 显示横轴标签
477 | plt.xlabel("value")
478 | # 显示纵轴标签
479 | plt.ylabel("frequency")
480 | # 显示图标题
481 | mean = np.mean(grad)
482 | var = np.var(grad)
483 | plt.title("mean: %.4f, var: %.4f"%(mean, var))
484 | plt.savefig('./vis/%d/%s.png'%(epoch, name))
485 |
486 | def print_prob(score, name):
487 | score = score.permute(0, 3, 1, 2)
488 | score = score.detach().cpu().numpy()
489 | score_1 = score[0, ...]
490 | up = score_1[0, ...]
491 | down = score_1[1, ...]
492 | score = np.bitwise_or(up > 0.8 , down > 0.8)
493 |
494 | plt.figure()
495 | plt.imshow(score.astype(np.uint8), cmap='hot')
496 | plt.colorbar()
497 | plt.savefig(name)
498 |
499 | if __name__ == '__main__':
500 | test()
--------------------------------------------------------------------------------