├── README.md
├── data
├── __init__.py
├── common.py
├── const.py
├── dataset.py
├── image.py
├── lidar.py
├── my_dataset.py
├── rasterize.py
├── txt
├── utils.py
├── vector_map.py
└── viz_data.py
├── evaluate.py
├── evaluate_json.py
├── export_gt_to_json.py
├── export_pred_to_json.py
├── loss.py
├── model
├── HDMapNet
│ ├── base.py
│ ├── hdmapnet.py
│ ├── homography.py
│ ├── init
│ ├── ipm_net.py
│ ├── pointpillar.py
│ ├── utils.py
│ └── voxel.py
├── VPN.py
├── __init__.py
├── base.py
├── hdmapnet.py
├── homography.py
├── init
├── lift_splat.py
└── utils.py
├── pic
├── ex.md
├── img1.png
├── img2.png
└── img3.png
├── requirement.txt
├── train.py
├── utils.py
├── vis_label.py
└── vis_pred.py
/README.md:
--------------------------------------------------------------------------------
1 | # Bi-Mapper: Holistic BEV Semantic Mapping for Autonomous Driving (IEEE RA-L)
2 |
3 | Siyu Li, [Kailun Yang](https://yangkailun.com/), Hao Shi, [Jiaming Zhang](https://jamycheung.github.io/), Jiacheng Lin, Zhifeng Teng, and Zhiyong Li
4 |
5 | **IEEE Robotics and Automation Letters** [[PDF](https://arxiv.org/pdf/2305.04205.pdf)]
6 |
7 | ## Motivation
8 |
9 |

10 |
11 |
12 |
13 | ### Abstract
14 |
15 | A semantic map of the road scene, covering fundamental road elements, is an essential ingredient in autonomous driving systems. It provides important perception foundations for positioning and planning when rendered in the Bird’sEye-View (BEV). Currently, the learning of prior knowledge, the hypothetical depth, has two sides in the research of BEV scene understanding. It can guide learning of translating front perspective views into BEV directly on the help of calibration parameters. However, it suffers from geometric distortions in the representation of distant objects. In this paper, we propose a Bi-Mapper framework for top-down road-scene semantic understanding. The dual streams incorporate global view and local prior knowledge, which are learned asynchronously according to the learning timing. At the same time, an Across-Space Loss (ASL) is designed to mitigate the negative impact of geometric distortions. Extensive results verify the effectiveness of each module in the proposed Bi-Mapper framework. Compared with exiting road mapping networks, the proposed Bi-Mapper achieves 5.0 higher IoU on the nuScenes dataset. Moreover, we verify the generalization performance of Bi-Mapper in a realworld driving scenario.
16 |
17 | ### Method
18 | 
19 |
20 | ### Result
21 |
22 | 
23 |
24 | ### Update
25 |
26 | 2023.04.28 Init repository.
27 | 2024.07.24 Update
28 |
29 | ### Data
30 | Download [nuScenes dataset](https://www.nuscenes.org/). And change the dataset path in the code.
31 |
32 | ### Environment
33 | pip install -r requirement.txt
34 |
35 | ### Training
36 | # Note the need to change the dataset path!
37 | python train.py
38 |
39 | ### Evaluation
40 | ## Note the need to change the path of dataset and modelf!
41 | python evalute.py
42 |
43 | ## Publication
44 | If you find this repo useful, please consider referencing the following paper:
45 | ```
46 | @article{li2023bimapper,
47 | title={Bi-Mapper: Holistic BEV Semantic Mapping for Autonomous Driving},
48 | author={Li, Siyu and Yang, Kailun and Shi, Hao and Zhang, Jiaming and Lin, Jiacheng and Teng, Zhifeng and Li, Zhiyong},
49 | journal={IEEE Robotics and Automation Letters},
50 | year={2023}
51 | }
52 | ```
53 | ### Acknowledgement
54 | The code framework of this project is based on , thanks to this excellent work.
55 | ### Contact
56 |
57 | Feel free to contact me if you have additional questions or have interests in collaboration. Please drop me an email at lsynn@hnu.edu.cn
58 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/data/__init__.py
--------------------------------------------------------------------------------
/data/common.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | from pathlib import Path
5 | from pyquaternion import Quaternion
6 |
7 |
8 | INTERPOLATION = cv2.LINE_8
9 |
10 |
11 | def get_split(split, dataset_name):
12 | split_dir = Path(__file__).parent / 'splits' / dataset_name
13 | split_path = split_dir / f'{split}.txt'
14 |
15 | return split_path.read_text().strip().split('\n')#返回场景序号列表[scenes-0001,....]
16 |
17 |
18 | def get_view_matrix(h=200, w=400, h_meters=30.0, w_meters=60.0, h_offset=2, w_offset=-2):
19 | sh = h / h_meters
20 | sw = w / w_meters
21 | t1 = np.float32([
22 | [ 0., sw, w*w_offset+w/2.],
23 | [sh, 0., h*h_offset+h/2.],
24 | [ 0., 0., 1.]
25 | ])
26 | t2 = np.float32([
27 | [ sw, 0., w*w_offset+w/2.],
28 | [0., sh, h*h_offset+h/2.],
29 | [ 0., 0., 1.]
30 | ])
31 | return t2
32 |
33 |
34 | def get_transformation_matrix(R, t, inv=False):
35 | pose = np.eye(4, dtype=np.float32)
36 | pose[:3, :3] = R if not inv else R.T
37 | pose[:3, -1] = t if not inv else R.T @ -t
38 |
39 | return pose
40 |
41 |
42 | def get_pose(rotation, translation, inv=False, flat=False):
43 | if flat:
44 | yaw = Quaternion(rotation).yaw_pitch_roll[0]
45 | R = Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).rotation_matrix
46 | else:
47 | R = Quaternion(rotation).rotation_matrix
48 |
49 | t = np.array(translation, dtype=np.float32)
50 |
51 | return get_transformation_matrix(R, t, inv=inv)
52 |
53 |
54 | def encode(x):
55 | """
56 | (h, w, c) np.uint8 {0, 255}
57 | """
58 | n = x.shape[2]
59 |
60 | # assert n < 16
61 | assert x.ndim == 3
62 | assert x.dtype == np.uint8
63 | assert all(x in [0, 255] for x in np.unique(x))
64 |
65 | shift = np.arange(n, dtype=np.int32)[None, None]
66 |
67 | binary = (x > 0)
68 | binary = (binary << shift).sum(-1)
69 | binary = binary.astype(np.int32)
70 |
71 | return binary
72 |
73 |
74 | def decode(img, n):
75 | """
76 | returns (h, w, n) np.int32 {0, 1}
77 | """
78 | shift = np.arange(n, dtype=np.int32)[None, None]
79 |
80 | x = np.array(img)[..., None]
81 | x = (x >> shift) & 1
82 |
83 | return x
84 |
85 |
86 | if __name__ == '__main__':
87 | from PIL import Image
88 |
89 | n = 12
90 |
91 | x = np.random.rand(64, 64, n)
92 | x = 255 * (x > 0.5).astype(np.uint8)
93 |
94 | x_encoded = encode(x)
95 | x_img = Image.fromarray(x_encoded)
96 | x_img.save('tmp.png')
97 | x_loaded = Image.open('tmp.png')
98 | x_decoded = 255 * decode(x_loaded, 12)
99 | x_decoded = x_decoded[..., :n]
100 |
101 | print(abs(x_decoded - x).max())
102 |
--------------------------------------------------------------------------------
/data/const.py:
--------------------------------------------------------------------------------
1 | MAP = ['boston-seaport', 'singapore-hollandvillage', 'singapore-onenorth', 'singapore-queenstown']
2 | CAMS = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT']
3 | CLASS2LABEL = {
4 | 'road_divider': 0,
5 | 'lane_divider': 0,
6 | 'ped_crossing': 1,
7 | 'contours': 2,
8 | 'car':3,
9 | 'others': -1
10 | }
11 | NUM_CLASSES = 3
12 | IMG_ORIGIN_H = 900
13 | IMG_ORIGIN_W = 1600
14 |
--------------------------------------------------------------------------------
/data/image.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 |
4 | import torch
5 | import torchvision
6 |
7 | class NormalizeInverse(torchvision.transforms.Normalize):
8 | # https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/8
9 | def __init__(self, mean, std):
10 | mean = torch.as_tensor(mean)
11 | std = torch.as_tensor(std)
12 | std_inv = 1 / (std + 1e-7)
13 | mean_inv = -mean * std_inv
14 | super().__init__(mean=mean_inv, std=std_inv)
15 |
16 | def __call__(self, tensor):
17 | return super().__call__(tensor.clone())
18 |
19 |
20 | normalize_img = torchvision.transforms.Compose((
21 | torchvision.transforms.ToTensor(),
22 | torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
23 | std=[0.229, 0.224, 0.225]),
24 | ))#对比度增强
25 |
26 | nor_img = torchvision.transforms.Compose((
27 | torchvision.transforms.ToTensor(),
28 | ))
29 | denor_img = torchvision.transforms.Compose((
30 | torchvision.transforms.ToPILImage(),
31 | ))
32 | denormalize_img = torchvision.transforms.Compose((
33 | NormalizeInverse(mean=[0.485, 0.456, 0.406],
34 | std=[0.229, 0.224, 0.225]),
35 | torchvision.transforms.ToPILImage(),
36 | ))
37 |
38 |
39 | def img_transform(img, resize, resize_dims):
40 | post_rot2 = torch.eye(2)
41 | post_tran2 = torch.zeros(2)
42 |
43 | img = img.resize(resize_dims)
44 |
45 | rot_resize = torch.Tensor([[resize[0], 0],
46 | [0, resize[1]]])
47 | post_rot2 = rot_resize @ post_rot2#矩阵乘法
48 | post_tran2 = rot_resize @ post_tran2
49 |
50 | post_tran = torch.zeros(3)
51 | post_rot = torch.eye(3)
52 | post_tran[:2] = post_tran2
53 | post_rot[:2, :2] = post_rot2
54 | return img, post_rot, post_tran
55 |
56 |
57 | def get_rot(h):
58 | return torch.Tensor([
59 | [np.cos(h), np.sin(h)],
60 | [-np.sin(h), np.cos(h)],
61 | ])
62 |
63 | def img_transform_rot(img, resize, resize_dims, crop, flip, rotate):
64 | post_rot2 = torch.eye(2)
65 | post_tran2 = torch.zeros(2)
66 |
67 | # adjust image
68 | img = img.resize(resize_dims)
69 | img = img.crop(crop)
70 | if flip:#翻转
71 | img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
72 | img = img.rotate(rotate)
73 |
74 | # post-homography transformation
75 | post_rot2 *= resize
76 | post_tran2 -= torch.Tensor(crop[:2])
77 | if flip:
78 | A = torch.Tensor([[-1, 0], [0, 1]])
79 | b = torch.Tensor([crop[2] - crop[0], 0])
80 | post_rot2 = A.matmul(post_rot2)
81 | post_tran2 = A.matmul(post_tran2) + b
82 | A = get_rot(rotate/180*np.pi)
83 | b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
84 | b = A.matmul(-b) + b
85 | post_rot2 = A.matmul(post_rot2)
86 | post_tran2 = A.matmul(post_tran2) + b
87 |
88 | post_tran = torch.zeros(3)
89 | post_rot = torch.eye(3)
90 | post_tran[:2] = post_tran2
91 | post_rot[:2, :2] = post_rot2
92 | return img, post_rot, post_tran
93 |
94 |
--------------------------------------------------------------------------------
/data/lidar.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from functools import reduce
4 |
5 | from pyquaternion import Quaternion
6 |
7 | from nuscenes.utils.data_classes import LidarPointCloud
8 | from nuscenes.utils.geometry_utils import transform_matrix
9 |
10 |
11 | def get_lidar_data(nusc, sample_rec, nsweeps, min_distance):
12 | """
13 | Returns at most nsweeps of lidar in the ego frame.
14 | Returned tensor is 5(x, y, z, reflectance, dt) x N
15 | Adapted from https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/data_classes.py#L56
16 | """
17 | points = np.zeros((5, 0))
18 |
19 | # Get reference pose and timestamp.
20 | ref_sd_token = sample_rec['data']['LIDAR_TOP']
21 | ref_sd_rec = nusc.get('sample_data', ref_sd_token)
22 | ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token'])
23 | ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token'])
24 | ref_time = 1e-6 * ref_sd_rec['timestamp']
25 |
26 | # Homogeneous transformation matrix from global to _current_ ego car frame.
27 | car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']),
28 | inverse=True)
29 |
30 | # Aggregate current and previous sweeps.
31 | sample_data_token = sample_rec['data']['LIDAR_TOP']
32 | current_sd_rec = nusc.get('sample_data', sample_data_token)
33 | for _ in range(nsweeps):
34 | # Load up the pointcloud and remove points close to the sensor.
35 | current_pc = LidarPointCloud.from_file(os.path.join(nusc.dataroot, current_sd_rec['filename']))
36 | current_pc.remove_close(min_distance)
37 |
38 | # Get past pose.
39 | current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token'])
40 | global_from_car = transform_matrix(current_pose_rec['translation'],
41 | Quaternion(current_pose_rec['rotation']), inverse=False)
42 |
43 | # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
44 | current_cs_rec = nusc.get('calibrated_sensor', current_sd_rec['calibrated_sensor_token'])
45 | car_from_current = transform_matrix(current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']),
46 | inverse=False)
47 |
48 | # Fuse four transformation matrices into one and perform transform.
49 | trans_matrix = reduce(np.dot, [car_from_global, global_from_car, car_from_current])
50 | current_pc.transform(trans_matrix)
51 |
52 | # Add time vector which can be used as a temporal feature.
53 | time_lag = ref_time - 1e-6 * current_sd_rec['timestamp']
54 | times = time_lag * np.ones((1, current_pc.nbr_points()))
55 |
56 | new_points = np.concatenate((current_pc.points, times), 0)
57 | points = np.concatenate((points, new_points), 1)
58 |
59 | # Abort if there are no previous sweeps.
60 | if current_sd_rec['prev'] == '':
61 | break
62 | else:
63 | current_sd_rec = nusc.get('sample_data', current_sd_rec['prev'])
64 |
65 | return points
--------------------------------------------------------------------------------
/data/my_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | from PIL import Image
5 | from torch.utils.data import Dataset
6 | import matplotlib.pyplot as plt
7 | Img_path = ['ImgLeft','ImgFront','Imgright','ImgBack']
8 | def my_dataset(Dataset):
9 | def __init__(self, dataroot):
10 | super(my_dataset, self).__init__()
11 | self.dataroot = dataroot
12 | self.data_conf = {
13 | 'image_size': [128, 352],
14 | 'xbound': [-30.0, 30.0, 0.15],
15 | 'ybound': [-15.0, 15.0, 0.15],
16 | 'thickness': 5,
17 | 'angle_class': 36,
18 | }
19 | self.len = 100
20 | def __len__(self):
21 | return self.len
22 | def get_intrinsic(self,i):
23 | d = dict()
24 | d[0] = np.array([[273.7, 0.0, 334.9], [0, 273.4, 244.8], [0, 0, 1]])#左
25 | d[1] = np.array([[462.9, 0.0, 328.1], [0, 462.3, 183.2], [0, 0, 1]])#前
26 | d[2] = np.array([[273.2, 0.0, 328.7], [0, 274.1, 219.2], [0, 0, 1]])#右
27 | d[3] = np.array([[274.2, 0.0, 332.2], [0, 272.8, 239.1], [0, 0, 1]])#后
28 | return d[i]
29 | def get_rot(self,i):
30 | d = dict()
31 | d[0] = np.array([[0.99, -0.015, -0.00], [0.005, -0.004, 0.99], [-0.015, -0.99, -0.004]])
32 | d[1] = np.array([[0.0, 0.0, 1], [-1, 0, 0], [0, -1, 0]])
33 | d[2] = np.array([[-0.99, -0.0168, 0.0025], [-0.00223, -0.0160, -0.9998], [0.0168, -0.9997, 0.016]])
34 | d[3] = np.array([[-0.0027, -0.0307, -0.9995], [0.99992, -0.0117, -0.00237], [-0.0116, -0.9994, 0.0308]])
35 | return d[i]
36 | def get_trans(self,i):
37 | d = dict()
38 | d[0] = np.array([0, 0.263, 0.92])
39 | d[1] = np.array([0.35, 0.599, 0.92])
40 | d[2] = np.array([0, -0.263, 0.92])
41 | d[3] = np.array([ -0.365, 0.263, 0.92])
42 | return d[i]
43 | def sample_augmentation(self):
44 | fH, fW = self.data_conf['image_size'] # 128 352
45 | resize = (fW / IMG_ORIGIN_W, fH / IMG_ORIGIN_H) # (1,1)变换尺度
46 | resize_dims = (fW, fH) # (128,352)目标数据尺寸
47 | return resize, resize_dims
48 | def get_img(self,id):
49 | imgs = []
50 | img_origin = []
51 | feas = []
52 | for i in range(len(Img_path)):
53 | if i == 4:
54 | imgs.append(torch.zeros_like(img))
55 | a = np.array([[0,0,0],[0,0,0],[0,0,0]])
56 | b = np.array([0,0,0])
57 | trans.append(torch.Tensor(b)) # 标定位移变换,相机到车辆的变换
58 | rots.append(torch.Tensor(a)) # 标定旋转变换
59 | intrins.append(torch.Tensor(a))
60 | feas.append(torch.zeros_like(fea))
61 | root = os.path.join(self.path,Img_path[i])
62 | n = str(id)
63 | n = n.zfill(6)
64 | root = root + n + ".jpg"
65 | img = Image.open(root)
66 | resize, resize_dims = self.sample_augmentation() # 图片变换尺度,变换大小,目标数据尺寸
67 | img_o_res, _, _ = img_transform(img, resize, resize_dims) # 输出3*3旋转和平移矩阵
68 | img_o = nor_img(img) # 3 900 1600
69 | img_origin.append(nor_img(img_o_res))
70 | img, post_rot, post_tran = img_transform(img, resize, resize_dims) # 输出3*3旋转和平移矩阵
71 | img = normalize_img(img) # 归一化3 128 325
72 | imgs.append(img) # 不同角度相机累积
73 |
74 | trans.append(torch.Tensor(get_trans(i))) # 标定位移变换,相机到车辆的变换
75 | rots.append(torch.Tensor(get_rot(i))) # 标定旋转变换
76 | intrins.append(torch.Tensor(get_intrinsic(i)))
77 |
78 | fea = get_bev_cam(img_o,torch.Tensor(get_intrinsic(i)) , 1) # ,fea_1
79 | fea_p = denor_img(fea)
80 | fea_p = fea_p.resize(resize_dims)
81 | fea = normalize_img(fea_p)
82 | # fea= nor_img(fea_p)#显示原图
83 | feas.append(fea)
84 | imgs.append(torch.zeros_like(img))
85 | a = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]])
86 | b = np.array([0, 0, 0])
87 | trans.append(torch.Tensor(b)) # 标定位移变换,相机到车辆的变换
88 | rots.append(torch.Tensor(a)) # 标定旋转变换
89 | intrins.append(torch.Tensor(a))
90 | feas.append(torch.zeros_like(fea))
91 | return torch.stack(img_origin),torch.stack(feas),torch.stack(imgs), torch.stack(trans), torch.stack(rots), torch.stack(intrins), None, None
92 | def __getitem__(self, idx):
93 | img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots = self.get_imgs(idx)
94 | return img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots, None, None, None, None
--------------------------------------------------------------------------------
/data/rasterize.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | import torch
5 |
6 | from shapely import affinity
7 | from shapely.geometry import LineString, box
8 |
9 |
10 | def get_patch_coord(patch_box, patch_angle=0.0):
11 | patch_x, patch_y, patch_h, patch_w = patch_box
12 |
13 | x_min = patch_x - patch_w / 2.0
14 | y_min = patch_y - patch_h / 2.0
15 | x_max = patch_x + patch_w / 2.0
16 | y_max = patch_y + patch_h / 2.0
17 |
18 | patch = box(x_min, y_min, x_max, y_max)
19 | patch = affinity.rotate(patch, patch_angle, origin=(patch_x, patch_y), use_radians=False)
20 |
21 | return patch
22 |
23 |
24 | def get_discrete_degree(vec, angle_class=36):
25 | deg = np.mod(np.degrees(np.arctan2(vec[1], vec[0])), 360)
26 | deg = (int(deg / (360 / angle_class) + 0.5) % angle_class) + 1
27 | return deg
28 |
29 |
30 | def mask_for_lines(lines, mask, thickness, idx, type='index', angle_class=36):
31 | coords = np.asarray(list(lines.coords), np.int32)
32 | coords = coords.reshape((-1, 2))
33 | if len(coords) < 2:
34 | return mask, idx
35 | if type == 'backward':
36 | coords = np.flip(coords, 0)#翻转
37 |
38 | if type == 'index':
39 | cv2.polylines(mask, [coords], False, color=idx, thickness=thickness)
40 | idx += 1
41 | else:
42 | for i in range(len(coords) - 1):
43 | cv2.polylines(mask, [coords[i:]], False, color=get_discrete_degree(coords[i + 1] - coords[i], angle_class=angle_class), thickness=thickness)
44 | return mask, idx
45 |
46 |
47 | def line_geom_to_mask(layer_geom, confidence_levels, local_box, canvas_size, thickness, idx, type='index', angle_class=36):
48 | patch_x, patch_y, patch_h, patch_w = local_box
49 |
50 | patch = get_patch_coord(local_box)
51 |
52 | canvas_h = canvas_size[0]
53 | canvas_w = canvas_size[1]
54 | scale_height = canvas_h / patch_h
55 | scale_width = canvas_w / patch_w
56 |
57 | trans_x = -patch_x + patch_w / 2.0
58 | trans_y = -patch_y + patch_h / 2.0
59 |
60 | map_mask = np.zeros(canvas_size, np.uint8)
61 |
62 | for line in layer_geom:
63 | if isinstance(line, tuple):
64 | line, confidence = line
65 | else:
66 | confidence = None
67 | new_line = line.intersection(patch)
68 | if not new_line.is_empty:
69 | new_line = affinity.affine_transform(new_line, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
70 | new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
71 | confidence_levels.append(confidence)
72 | if new_line.geom_type == 'MultiLineString':
73 | for new_single_line in new_line:
74 | map_mask, idx = mask_for_lines(new_single_line, map_mask, thickness, idx, type, angle_class)
75 | else:
76 | map_mask, idx = mask_for_lines(new_line, map_mask, thickness, idx, type, angle_class)
77 | return map_mask, idx
78 |
79 |
80 | def overlap_filter(mask, filter_mask):
81 | C, _, _ = mask.shape
82 | for c in range(C-1, -1, -1):
83 | filter = np.repeat((filter_mask[c] != 0)[None, :], c, axis=0)
84 | mask[:c][filter] = 0
85 |
86 | return mask
87 |
88 |
89 | def preprocess_map(vectors, patch_size, canvas_size, num_classes, thickness, angle_class):
90 | confidence_levels = [-1]
91 | vector_num_list = {}
92 | for i in range(num_classes):
93 | vector_num_list[i] = []
94 |
95 | for vector in vectors:
96 | if vector['pts_num'] >= 2:
97 | vector_num_list[vector['type']].append(LineString(vector['pts'][:vector['pts_num']]))
98 |
99 | local_box = (0.0, 0.0, patch_size[0], patch_size[1])
100 |
101 | idx = 1
102 | filter_masks = []
103 | instance_masks = []
104 | forward_masks = []
105 | backward_masks = []
106 | for i in range(num_classes):
107 | map_mask, idx = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, idx)
108 | instance_masks.append(map_mask)
109 | filter_mask, _ = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness + 4, 1)
110 | filter_masks.append(filter_mask)
111 | forward_mask, _ = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, 1, type='forward', angle_class=angle_class)
112 | forward_masks.append(forward_mask)
113 | backward_mask, _ = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, 1, type='backward', angle_class=angle_class)
114 | backward_masks.append(backward_mask)
115 |
116 | filter_masks = np.stack(filter_masks)
117 | instance_masks = np.stack(instance_masks)
118 | forward_masks = np.stack(forward_masks)
119 | backward_masks = np.stack(backward_masks)
120 |
121 | instance_masks = overlap_filter(instance_masks, filter_masks)
122 | forward_masks = overlap_filter(forward_masks, filter_masks).sum(0).astype('int32')
123 | backward_masks = overlap_filter(backward_masks, filter_masks).sum(0).astype('int32')
124 |
125 | return torch.tensor(instance_masks), torch.tensor(forward_masks), torch.tensor(backward_masks)
126 |
127 |
128 | def rasterize_map(vectors, patch_size, canvas_size, num_classes, thickness):
129 | confidence_levels = [-1]
130 | vector_num_list = {}
131 | for i in range(num_classes):
132 | vector_num_list[i] = []
133 |
134 | for vector in vectors:
135 | if vector['pts_num'] >= 2:
136 | vector_num_list[vector['type']].append((LineString(vector['pts'][:vector['pts_num']]), vector.get('confidence_level', 1)))
137 |
138 | local_box = (0.0, 0.0, patch_size[0], patch_size[1])
139 |
140 | idx = 1
141 | masks = []
142 | for i in range(num_classes):
143 | map_mask, idx = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, idx)
144 | masks.append(map_mask)
145 |
146 | return np.stack(masks), confidence_levels
147 |
--------------------------------------------------------------------------------
/data/txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/data/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import xml.etree.ElementTree as xmlET
3 | import torch
4 | import os
5 | import math
6 |
7 | def get_proj_mat(intrins, rots, trans):
8 | K = np.eye(4)
9 | K[:3, :3] = intrins
10 | R = np.eye(4)
11 | R[:3, :3] = rots.transpose(-1, -2)
12 | T = np.eye(4)
13 | T[:3, 3] = -trans
14 | RT = R @ T
15 | return K @ RT
16 |
17 |
18 | def perspective(cam_coords, proj_mat):
19 | pix_coords = proj_mat @ cam_coords
20 | valid_idx = pix_coords[2, :] > 0
21 | pix_coords = pix_coords[:, valid_idx]
22 | pix_coords = pix_coords[:2, :] / (pix_coords[2, :] + 1e-7)
23 | pix_coords = pix_coords.transpose(1, 0)
24 | return pix_coords
25 |
26 |
27 | def label_onehot_decoding(onehot):
28 | return torch.argmax(onehot, axis=0)
29 |
30 |
31 | def label_onehot_encoding(label, num_classes=4):
32 | H, W = label.shape
33 | onehot = torch.zeros((num_classes, H, W))
34 | onehot.scatter_(0, label[None].long(), 1)
35 | return onehot
36 |
37 |
38 | def gen_dx_bx(xbound, ybound, zbound):
39 | dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]])
40 | bx = torch.Tensor([row[0] + row[2] / 2.0 for row in [xbound, ybound, zbound]])
41 | nx = torch.LongTensor([int((row[1] - row[0]) / row[2]) for row in [xbound, ybound, zbound]])
42 | return dx, bx, nx
43 |
44 | def eulerAngles2rotationMat(theta, format='degree'):
45 | """
46 | Calculates Rotation Matrix given euler angles.
47 | :param theta: 1-by-3 list [rx, ry, rz] angle in degree
48 | :return:
49 | RPY角,是ZYX欧拉角,依次 绕定轴XYZ转动[rx, ry, rz]
50 | """
51 | if format == 'degree':
52 | theta = [i * math.pi / 180.0 for i in theta]
53 |
54 | R_x = np.array([[1, 0, 0],
55 | [0, math.cos(theta[0]), -math.sin(theta[0])],
56 | [0, math.sin(theta[0]), math.cos(theta[0])]
57 | ])
58 |
59 | R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1])],
60 | [0, 1, 0],
61 | [-math.sin(theta[1]), 0, math.cos(theta[1])]
62 | ])
63 |
64 | R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0],
65 | [math.sin(theta[2]), math.cos(theta[2]), 0],
66 | [0, 0, 1]
67 | ])
68 | R = np.dot(R_z, np.dot(R_y, R_x))
69 | return R
70 | def get_files_in_folder(folder):
71 |
72 | return sorted([os.path.join(folder, f) for f in os.listdir(folder)])#返回图片名字路径
73 |
74 | def one_hot_encode_image(image, palette):
75 |
76 | one_hot_map = []
77 |
78 | # find instances of class colors and append layer to one-hot-map
79 | for class_colors in palette:
80 | class_map = np.zeros(image.shape[0:2], dtype=bool)
81 | for color in class_colors:
82 | # temp = np.ones_like(image)
83 | # temp = temp [:,:,0] * color[0]
84 | # temp = temp[:, :, 1] * color[1]
85 | # temp = temp[:, :, 2] * color[2]
86 | class_map = class_map | (image==color).all(axis=-1)
87 | one_hot_map.append(class_map)
88 |
89 | # finalize one-hot-map
90 | one_hot_map = np.stack(one_hot_map, axis=-1)
91 | one_hot_map = one_hot_map.astype(np.float32)
92 |
93 | return one_hot_map
94 | def parse_convert_xml(conversion_file_path):
95 |
96 | defRoot = xmlET.parse(conversion_file_path).getroot()
97 |
98 | one_hot_palette = []
99 | class_list = []
100 | for idx, defElement in enumerate(defRoot.findall("SLabel")):
101 | from_color = np.fromstring(defElement.get("fromColour"), dtype=int, sep=" ")
102 | to_class = np.fromstring(defElement.get("toValue"), dtype=int, sep=" ")
103 | if to_class in class_list:
104 | one_hot_palette[class_list.index(to_class)].append(from_color)
105 | else:
106 | one_hot_palette.append([from_color])
107 | class_list.append(to_class)
108 |
109 | return one_hot_palette
110 |
--------------------------------------------------------------------------------
/data/vector_map.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer
4 | from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
5 | from shapely import affinity, ops
6 | from shapely.geometry import LineString, box, MultiPolygon, MultiLineString
7 | import math
8 | from .const import CLASS2LABEL
9 |
10 |
11 | def isRotationMatrix(R):
12 | Rt = np.transpose(R)
13 | shouldBeIdentity = np.dot(Rt, R)
14 | I = np.identity(3, dtype=R.dtype)
15 | n = np.linalg.norm(I - shouldBeIdentity)
16 | return n < 1e-6
17 |
18 |
19 | def rotationMatrixToEulerAngles(R):
20 | assert (isRotationMatrix(R))
21 | sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
22 | singular = sy < 1e-6
23 |
24 | if not singular:
25 | x = math.atan2(R[2, 1], R[2, 2])
26 | y = math.atan2(-R[2, 0], sy)
27 | z = math.atan2(R[1, 0], R[0, 0])
28 | else:
29 | x = math.atan2(-R[1, 2], R[1, 1])
30 | y = math.atan2(-R[2, 0], sy)
31 | z = 0
32 |
33 | return np.array([x, y, z])
34 |
35 |
36 | def eulerAnglesToRotationMatrix(theta):
37 | R_x = np.array([[1, 0, 0],
38 | [0, math.cos(theta[0]), -math.sin(theta[0])],
39 | [0, math.sin(theta[0]), math.cos(theta[0])]
40 | ])
41 |
42 | R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1])],
43 | [0, 1, 0],
44 | [-math.sin(theta[1]), 0, math.cos(theta[1])]
45 | ])
46 |
47 | R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0],
48 | [math.sin(theta[2]), math.cos(theta[2]), 0],
49 | [0, 0, 1]
50 | ])
51 |
52 | R = np.dot(R_z, np.dot(R_y, R_x))
53 |
54 | return R
55 |
56 |
57 | def EulerAndQuaternionTransform(intput_data):
58 | """
59 | 四元素与欧拉角互换
60 | """
61 | data_len = len(intput_data)
62 | angle_is_not_rad = False
63 |
64 | if data_len == 3:
65 | r = 0
66 | p = 0
67 | y = 0
68 | if angle_is_not_rad: # 180 ->pi
69 | r = math.radians(intput_data[0])
70 | p = math.radians(intput_data[1])
71 | y = math.radians(intput_data[2])
72 | else:
73 | r = intput_data[0]
74 | p = intput_data[1]
75 | y = intput_data[2]
76 |
77 | sinp = math.sin(p / 2)
78 | siny = math.sin(y / 2)
79 | sinr = math.sin(r / 2)
80 |
81 | cosp = math.cos(p / 2)
82 | cosy = math.cos(y / 2)
83 | cosr = math.cos(r / 2)
84 |
85 | w = cosr * cosp * cosy + sinr * sinp * siny
86 | x = sinr * cosp * cosy - cosr * sinp * siny
87 | y = cosr * sinp * cosy + sinr * cosp * siny
88 | z = cosr * cosp * siny - sinr * sinp * cosy
89 | return [w, x, y, z]
90 |
91 | elif data_len == 4:
92 |
93 | w = intput_data[0]
94 | x = intput_data[1]
95 | y = intput_data[2]
96 | z = intput_data[3]
97 |
98 | r = math.atan2(2 * (w * x + y * z), 1 - 2 * (x * x + y * y))
99 | p = math.asin(2 * (w * y - z * x))
100 | y = math.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z))
101 |
102 | if angle_is_not_rad: # pi -> 180
103 | r = math.degrees(r)
104 | p = math.degrees(p)
105 | y = math.degrees(y)
106 | return [r, p, y]
107 |
108 |
109 | class VectorizedLocalMap(object):
110 | def __init__(self,
111 | dataroot,
112 | patch_size,
113 | canvas_size,
114 | line_classes=['road_divider', 'lane_divider'],
115 | ped_crossing_classes=['ped_crossing'],
116 | contour_classes=['road_segment', 'lane'],
117 | sample_dist=1,
118 | num_samples=250,
119 | padding=False,
120 | normalize=False,
121 | fixed_num=-1):
122 | '''
123 | Args:
124 | fixed_num = -1 : no fixed num
125 | '''
126 | super().__init__()
127 | self.data_root = dataroot
128 | self.MAPS = ['boston-seaport', 'singapore-hollandvillage',
129 | 'singapore-onenorth', 'singapore-queenstown']
130 | self.line_classes = line_classes
131 | self.ped_crossing_classes = ped_crossing_classes
132 | self.polygon_classes = contour_classes
133 | self.nusc_maps = {}
134 | self.map_explorer = {}
135 | for loc in self.MAPS:
136 | self.nusc_maps[loc] = NuScenesMap(dataroot=self.data_root, map_name=loc)
137 | self.map_explorer[loc] = NuScenesMapExplorer(self.nusc_maps[loc])
138 |
139 | self.patch_size = patch_size
140 | self.canvas_size = canvas_size
141 | self.sample_dist = sample_dist
142 | self.num_samples = num_samples
143 | self.padding = padding
144 | self.normalize = normalize
145 | self.fixed_num = fixed_num
146 |
147 |
148 |
149 | def gen_vectorized_samples(self, location, ego2global_translation, ego2global_rotation):
150 | map_pose = ego2global_translation[:2]#
151 | rotation = Quaternion(ego2global_rotation)
152 | #print('ego',map_pose)
153 | patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])#[x_center, y_center, height, width]
154 | patch_angle = quaternion_yaw(rotation) / np.pi * 180
155 | #print(patch_angle)
156 | line_geom = self.get_map_geom(patch_box, patch_angle, self.line_classes, location)#list 2 :
157 | line_vector_dict = self.line_geoms_to_vectors(line_geom)
158 |
159 | ped_geom = self.get_map_geom(patch_box, patch_angle, self.ped_crossing_classes, location)
160 | # ped_vector_list = self.ped_geoms_to_vectors(ped_geom)
161 | ped_vector_list = self.line_geoms_to_vectors(ped_geom)['ped_crossing']
162 |
163 | polygon_geom = self.get_map_geom(patch_box, patch_angle, self.polygon_classes, location)
164 | poly_bound_list = self.poly_geoms_to_vectors(polygon_geom)
165 |
166 | vectors = []
167 | for line_type, vects in line_vector_dict.items():
168 | for line, length in vects:
169 | vectors.append((line.astype(float), length, CLASS2LABEL.get(line_type, -1)))
170 |
171 | for ped_line, length in ped_vector_list:
172 | vectors.append((ped_line.astype(float), length, CLASS2LABEL.get('ped_crossing', -1)))
173 |
174 | for contour, length in poly_bound_list:
175 | vectors.append((contour.astype(float), length, CLASS2LABEL.get('contours', -1)))
176 |
177 | # filter out -1
178 | filtered_vectors = []
179 | for pts, pts_num, type in vectors:
180 | if type != -1:
181 | filtered_vectors.append({
182 | 'pts': pts,
183 | 'pts_num': pts_num,
184 | 'type': type
185 | })
186 |
187 | return filtered_vectors
188 |
189 |
190 | def get_map_geom(self, patch_box, patch_angle, layer_names, location):
191 | map_geom = []
192 | for layer_name in layer_names:
193 | if layer_name in self.line_classes:
194 | geoms = self.map_explorer[location]._get_layer_line(patch_box, patch_angle, layer_name)
195 | map_geom.append((layer_name, geoms))
196 | elif layer_name in self.polygon_classes:
197 | geoms = self.map_explorer[location]._get_layer_polygon(patch_box, patch_angle, layer_name)
198 | map_geom.append((layer_name, geoms))
199 | elif layer_name in self.ped_crossing_classes:
200 | geoms = self.get_ped_crossing_line(patch_box, patch_angle, location)
201 | # geoms = self.map_explorer[location]._get_layer_polygon(patch_box, patch_angle, layer_name)
202 | map_geom.append((layer_name, geoms))
203 | return map_geom
204 |
205 | def _one_type_line_geom_to_vectors(self, line_geom):
206 | line_vectors = []
207 | for line in line_geom:
208 | if not line.is_empty:
209 | if line.geom_type == 'MultiLineString':
210 | for single_line in line.geoms:
211 | line_vectors.append(self.sample_pts_from_line(single_line))
212 | elif line.geom_type == 'LineString':
213 | line_vectors.append(self.sample_pts_from_line(line))
214 | else:
215 | raise NotImplementedError
216 | return line_vectors
217 |
218 | def poly_geoms_to_vectors(self, polygon_geom):
219 | roads = polygon_geom[0][1]
220 | lanes = polygon_geom[1][1]
221 | union_roads = ops.unary_union(roads)
222 | union_lanes = ops.unary_union(lanes)
223 | union_segments = ops.unary_union([union_roads, union_lanes])
224 | max_x = self.patch_size[1] / 2
225 | max_y = self.patch_size[0] / 2
226 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
227 | exteriors = []
228 | interiors = []
229 | if union_segments.geom_type != 'MultiPolygon':
230 | union_segments = MultiPolygon([union_segments])
231 | for poly in union_segments.geoms:
232 | exteriors.append(poly.exterior)
233 | for inter in poly.interiors:
234 | interiors.append(inter)
235 |
236 | results = []
237 | for ext in exteriors:
238 | if ext.is_ccw:
239 | ext.coords = list(ext.coords)[::-1]
240 | lines = ext.intersection(local_patch)
241 | if isinstance(lines, MultiLineString):
242 | lines = ops.linemerge(lines)
243 | results.append(lines)
244 |
245 | for inter in interiors:
246 | if not inter.is_ccw:
247 | inter.coords = list(inter.coords)[::-1]
248 | lines = inter.intersection(local_patch)
249 | if isinstance(lines, MultiLineString):
250 | lines = ops.linemerge(lines)
251 | results.append(lines)
252 |
253 | return self._one_type_line_geom_to_vectors(results)
254 |
255 | def line_geoms_to_vectors(self, line_geom):
256 | line_vectors_dict = dict()
257 | for line_type, a_type_of_lines in line_geom:#每一种对象的直线数
258 | one_type_vectors = self._one_type_line_geom_to_vectors(a_type_of_lines)
259 | line_vectors_dict[line_type] = one_type_vectors
260 |
261 | return line_vectors_dict#dict[road_divide: ,lane_divied:list[5],5个对象均分点]
262 |
263 | def ped_geoms_to_vectors(self, ped_geom):
264 | ped_geom = ped_geom[0][1]
265 | union_ped = ops.unary_union(ped_geom)
266 | if union_ped.geom_type != 'MultiPolygon':
267 | union_ped = MultiPolygon([union_ped])
268 |
269 | max_x = self.patch_size[1] / 2
270 | max_y = self.patch_size[0] / 2
271 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
272 | results = []
273 | for ped_poly in union_ped:
274 | # rect = ped_poly.minimum_rotated_rectangle
275 | ext = ped_poly.exterior
276 | if not ext.is_ccw:
277 | ext.coords = list(ext.coords)[::-1]
278 | lines = ext.intersection(local_patch)
279 | results.append(lines)
280 |
281 | return self._one_type_line_geom_to_vectors(results)
282 |
283 | def get_ped_crossing_line(self, patch_box, patch_angle, location):
284 | def add_line(poly_xy, idx, patch, patch_angle, patch_x, patch_y, line_list):
285 | points = [(p0, p1) for p0, p1 in zip(poly_xy[0, idx:idx + 2], poly_xy[1, idx:idx + 2])]
286 | line = LineString(points)
287 | line = line.intersection(patch)
288 | if not line.is_empty:
289 | line = affinity.rotate(line, -patch_angle, origin=(patch_x, patch_y), use_radians=False)
290 | line = affinity.affine_transform(line, [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])
291 | line_list.append(line)
292 |
293 | patch_x = patch_box[0]
294 | patch_y = patch_box[1]
295 |
296 | patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
297 | line_list = []
298 | records = getattr(self.nusc_maps[location], 'ped_crossing')
299 | for record in records:
300 | polygon = self.map_explorer[location].extract_polygon(record['polygon_token'])
301 | poly_xy = np.array(polygon.exterior.xy)
302 | dist = np.square(poly_xy[:, 1:] - poly_xy[:, :-1]).sum(0)
303 | x1, x2 = np.argsort(dist)[-2:]
304 |
305 | add_line(poly_xy, x1, patch, patch_angle, patch_x, patch_y, line_list)
306 | add_line(poly_xy, x2, patch, patch_angle, patch_x, patch_y, line_list)
307 |
308 | return line_list
309 |
310 | def sample_pts_from_line(self, line):
311 | if self.fixed_num < 0:
312 | distances = np.arange(0, line.length, self.sample_dist)
313 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)#按照顺序坐标的?
314 | else:
315 | # fixed number of points, so distance is line.length / self.fixed_num
316 | distances = np.linspace(0, line.length, self.fixed_num)
317 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
318 |
319 | if self.normalize:
320 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])
321 |
322 | num_valid = len(sampled_points)
323 |
324 | if not self.padding or self.fixed_num > 0:
325 | # fixed num sample can return now!
326 | return sampled_points, num_valid
327 |
328 | # fixed distance sampling need padding!
329 | num_valid = len(sampled_points)
330 |
331 | if self.fixed_num < 0:
332 | if num_valid < self.num_samples:
333 | padding = np.zeros((self.num_samples - len(sampled_points), 2))
334 | sampled_points = np.concatenate([sampled_points, padding], axis=0)
335 | else:
336 | sampled_points = sampled_points[:self.num_samples, :]
337 | num_valid = self.num_samples
338 |
339 | if self.normalize:
340 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])
341 | num_valid = len(sampled_points)
342 |
343 | return sampled_points, num_valid
344 |
--------------------------------------------------------------------------------
/data/viz_data.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import cv2
4 |
5 |
6 | from matplotlib.pyplot import get_cmap
7 | DIVIDER = ['road_divider and lane_divider']
8 | Ped = ['ped_crossing']
9 | STATIC = ['lane and road_segment']
10 | DYNAMIC = [
11 | 'car', 'truck', 'bus',
12 | 'trailer', 'construction',
13 | 'pedestrian',
14 | 'motorcycle', 'bicycle',
15 | ]
16 |
17 | # many colors from
18 | # https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/color_map.py
19 | COLORS = {
20 | # dividers
21 | 'road_divider and lane_divider': (255, 200, 0),
22 | 'ped_crossing': (255, 158, 0),
23 | # static
24 | 'lane and road_segment': (90, 90, 90),
25 | # dynamic
26 | 'car': (255, 158, 0),
27 | 'truck': (255, 99, 71),
28 | 'bus': (255, 127, 80),
29 | 'trailer': (255, 140, 0),
30 | 'construction': (233, 150, 70),
31 | 'pedestrian': (0, 0, 230),
32 | 'motorcycle': (255, 61, 99),
33 | 'bicycle': (220, 20, 60),
34 |
35 | 'nothing': (200, 200, 200)
36 | }
37 |
38 |
39 | def colorize(x, colormap=None):
40 | """
41 | x: (h w) np.uint8 0-255
42 | colormap
43 | """
44 | try:
45 | return (255 * get_cmap(colormap)(x)[..., :3]).astype(np.uint8)
46 | except:
47 | pass
48 |
49 | if x.dtype == np.float32:
50 | x = (255 * x).astype(np.uint8)
51 |
52 | if colormap is None:
53 | return x[..., None].repeat(3, 2)
54 |
55 | return cv2.applyColorMap(x, getattr(cv2, f'COLORMAP_{colormap.upper()}'))
56 |
57 |
58 | def get_colors(semantics):
59 | return np.array([COLORS[s] for s in semantics], dtype=np.uint8)
60 |
61 |
62 | def to_image(x):
63 | return (255 * x).byte().cpu().numpy().transpose(1, 2, 0)
64 | def to_image_single(x):
65 |
66 |
67 | return (255 * x).astype(np.uint8)
68 |
69 | def to_color(x):
70 |
71 | t = 255 * np.ones_like(x)
72 | x = x[np.newaxis,:,:]
73 | t = t[np.newaxis, :, :]
74 | c = np.concatenate([t, t, x], axis=0).transpose(1, 2, 0)
75 |
76 | return c.astype(np.uint8)
77 | def greyscale(x):
78 | return (255 * x.repeat(3, 2)).astype(np.uint8)
79 |
80 |
81 | def resize(src, dst=None, shape=None, idx=0):
82 | if dst is not None:
83 | ratio = dst.shape[idx] / src.shape[idx]
84 | elif shape is not None:
85 | ratio = shape[idx] / src.shape[idx]
86 |
87 | width = int(ratio * src.shape[1])
88 | height = int(ratio * src.shape[0])
89 |
90 | return cv2.resize(src, (width, height), interpolation=cv2.INTER_CUBIC)
91 |
92 |
93 | class BaseViz:
94 | SEMANTICS = DIVIDER + Ped +STATIC +DYNAMIC
95 |
96 | def __init__(self, label_indices=None, colormap='inferno'):
97 | self.label_indices = label_indices
98 | self.colors = get_colors(self.SEMANTICS)#12 3
99 | self.colormap = colormap
100 |
101 | def visualize_pred(self, bev, pred, threshold=None):
102 | """
103 | (c, h, w) torch float {0, 1}
104 | (c, h, w) torch float [0-1]
105 | """
106 | if isinstance(bev, torch.Tensor):
107 | bev = bev.cpu().numpy().transpose(1, 2, 0)
108 |
109 | if isinstance(pred, torch.Tensor):
110 | pred = pred.cpu().numpy().transpose(1, 2, 0)
111 |
112 | if self.label_indices is not None:
113 | bev = [bev[..., idx].max(-1) for idx in self.label_indices]
114 | bev = np.stack(bev, -1)
115 |
116 | if threshold is not None:
117 | pred = (pred > threshold).astype(np.float32)
118 |
119 | result = colorize((255 * pred.squeeze(2)).astype(np.uint8), self.colormap)
120 |
121 | return result
122 |
123 | # def visualize_pred_unused(self, bev, pred, threshold=None):
124 | # h, w, c = pred.shape
125 |
126 | # img = np.zeros((h, w, 3), dtype=np.float32)
127 | # img[...] = 0.5
128 | # colors = np.float32([
129 | # [0, .6, 0],
130 | # [1, .7, 0],
131 | # [1, 0, 0]
132 | # ])
133 | # tp = (pred > threshold) & (bev > threshold)
134 | # fp = (pred > threshold) & (bev < threshold)
135 | # fn = (pred <= threshold) & (bev > threshold)
136 |
137 | # for channel in range(c):
138 | # for i, m in enumerate([tp, fp, fn]):
139 | # img[m[..., channel]] = colors[i][None]
140 |
141 | # return (255 * img).astype(np.uint8)
142 |
143 | def visualize_bev(self, bev):
144 | """
145 | (c, h, w) torch [0, 1] float
146 |
147 | returns (h, w, 3) np.uint8
148 | """
149 | if isinstance(bev, torch.Tensor):
150 | bev = bev.cpu().numpy().transpose(1, 2, 0)
151 |
152 | h, w, c = bev.shape
153 |
154 | #assert c == len(self.SEMANTICS)
155 |
156 | # Prioritize higher class labels
157 | eps = (1e-5 * np.arange(c))[None, None]#1 1 12
158 | idx = (bev + eps).argmax(axis=-1)#200 400 给地图利用数字划分区域,取最大的区域
159 | val = np.take_along_axis(bev, idx[..., None], -1)
160 |
161 | # Spots with no labels are light grey
162 | empty = np.uint8(COLORS['nothing'])[None, None]
163 |
164 | result = (val * self.colors[idx]) + ((1 - val) * empty)
165 | result = np.uint8(result)
166 |
167 | return result
168 |
169 | def visualize_custom(self, batch, pred):
170 | return []
171 |
172 | @torch.no_grad()
173 | def visualize(self, bev, img, pred=None, b_max=8, **kwargs):
174 |
175 | b,h,w= bev.shape# 5 h w
176 | t = torch.ones([b,h,w]).cuda()
177 | bev = t * bev
178 | if True:
179 | if pred is not None:
180 | right = self.visualize_pred(bev, pred['bev'].sigmoid())
181 | else:
182 | right = self.visualize_bev(bev)
183 |
184 | right = [right] + self.visualize_custom(bev, pred)
185 | right = [x for x in right if x is not None]
186 | right = np.hstack(right)
187 |
188 | image = img#6 3 128 352
189 |
190 | if image is not None:
191 | imgs = [to_image(image[i]) for i in range(image.shape[0])]
192 |
193 | if len(imgs) == 6:
194 | a = np.hstack(imgs[:3])
195 | b = np.hstack(imgs[3:])
196 | left = resize(np.vstack((a, b)), right)
197 | if len(imgs) == 4:
198 | a = np.hstack(imgs[:2])
199 | b = np.hstack(imgs[2:])
200 | left = resize(np.vstack((a, b)), right)
201 | else:
202 | left = np.hstack([resize(x, right) for x in imgs])
203 |
204 | yield np.hstack((left, right))
205 | else:
206 | yield right
207 |
208 | def __call__(self, bev, img, pred=None, **kwargs):
209 |
210 | return list(self.visualize(bev=bev[1:].cuda(), img = img, pred=pred, **kwargs))
--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import tqdm
3 | import os
4 | import torch
5 | from data.dataset import semantic_dataset,mydataset,vir_semantic_dataset
6 | from data.const import NUM_CLASSES
7 | from evaluation.iou import get_batch_iou
8 | from model import get_model
9 |
10 | from loss import view_loss
11 |
12 | def onehot_encoding(logits, dim=1):
13 | max_idx = torch.argmax(logits, dim, keepdim=True)
14 | one_hot = logits.new_full(logits.shape, 0)
15 | one_hot.scatter_(dim, max_idx, 1)
16 | return one_hot
17 |
18 | def eval_iou(model, val_loader):
19 | model.eval()
20 | total_intersects = 0
21 | total_union = 0
22 | total_intersects1 = 0
23 | total_union1 = 0
24 | with torch.no_grad():
25 | for img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots, car_trans, yaw_pitch_roll, semantic_gt, instance_gt in tqdm.tqdm(val_loader):
26 |
27 | semantic, x_view_pre,view_ipm, viw_fore = model(feas.cuda(),imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
28 | post_trans.cuda(), post_rots.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda())
29 | #, embedding, direction
30 | semantic_gt = semantic_gt.cuda().float()
31 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt)
32 | total_intersects += intersects
33 | total_union += union
34 | view_img_gt = view_loss(semantic_gt.cuda(), trans.cuda(), rots.cuda()) # b*n 5 h w
35 | intersects1, union1 = get_batch_iou(onehot_encoding(x_view_pre), view_img_gt)
36 | total_intersects1 += intersects1
37 | total_union1 += union1
38 | return total_intersects / (total_union + 1e-7),total_intersects1 / (total_union1 + 1e-7)
39 |
40 | def main(args):
41 | data_conf = {
42 | 'num_channels': NUM_CLASSES + 1,
43 | 'image_size': args.image_size,
44 | 'xbound': args.xbound,
45 | 'ybound': args.ybound,
46 | 'zbound': args.zbound,
47 | 'dbound': args.dbound,
48 | 'thickness': args.thickness,
49 | 'angle_class': args.angle_class,
50 | 'B': args.bsz,
51 | 'merge': True
52 | }
53 |
54 | train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)
55 | #train_loader, val_loader = vir_semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)
56 |
57 | model = get_model(args.model, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class)
58 | model.load_state_dict(torch.load(args.modelf), strict=False)
59 | model.cuda()
60 | res,_ = eval_iou(model,val_loader)
61 | print(res)
62 |
63 |
64 |
65 | if __name__ == '__main__':
66 | parser = argparse.ArgumentParser()
67 | # logging config
68 | parser.add_argument("--logdir", type=str, default='./runs')
69 |
70 | # nuScenes config
71 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0')
72 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini'])
73 |
74 | # model config
75 | parser.add_argument("--model", type=str, default='BiMapper')
76 |
77 | # training config
78 | parser.add_argument("--nepochs", type=int, default=30)
79 | parser.add_argument("--max_grad_norm", type=float, default=5.0)
80 | parser.add_argument("--pos_weight", type=float, default=2.13)
81 | parser.add_argument("--bsz", type=int, default=1)
82 | parser.add_argument("--nworkers", type=int, default=4)
83 | parser.add_argument("--lr", type=float, default=1e-3)
84 | parser.add_argument("--weight_decay", type=float, default=1e-7)
85 |
86 | # finetune config
87 | parser.add_argument('--finetune', default=False, action='store_true')
88 | parser.add_argument('--modelf', type=str, default='')
89 |
90 | # data config
91 | parser.add_argument("--thickness", type=int, default=5)
92 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352])
93 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
94 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
95 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0])
96 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0])
97 |
98 | # embedding config
99 | parser.add_argument('--instance_seg', action='store_true')
100 | parser.add_argument("--embedding_dim", type=int, default=16)
101 | parser.add_argument("--delta_v", type=float, default=0.5)
102 | parser.add_argument("--delta_d", type=float, default=3.0)
103 |
104 | # direction config
105 | parser.add_argument('--direction_pred', action='store_true')
106 | parser.add_argument('--angle_class', type=int, default=36)
107 |
108 | # loss config
109 | parser.add_argument("--scale_seg", type=float, default=1.0)
110 | parser.add_argument("--scale_var", type=float, default=1.0)
111 | parser.add_argument("--scale_dist", type=float, default=1.0)
112 | parser.add_argument("--scale_direction", type=float, default=0.2)
113 |
114 | args = parser.parse_args()
115 | main(args)
116 |
--------------------------------------------------------------------------------
/evaluate_json.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import tqdm
3 |
4 | from evaluation.dataset import HDMapNetEvalDataset
5 | from evaluation.chamfer_distance import semantic_mask_chamfer_dist_cum
6 | from evaluation.AP import instance_mask_AP
7 | from evaluation.iou import get_batch_iou
8 |
9 | SAMPLED_RECALLS = torch.linspace(0.1, 1, 10)
10 | THRESHOLDS = [2, 4, 6]
11 |
12 |
13 | def get_val_info(args):
14 | data_conf = {
15 | 'xbound': args.xbound,
16 | 'ybound': args.ybound,
17 | 'thickness': args.thickness,
18 | }
19 |
20 | dataset = HDMapNetEvalDataset(args.version, args.dataroot, args.eval_set, args.result_path, data_conf)
21 |
22 | data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.bsz, shuffle=False, drop_last=False)
23 |
24 | total_CD1 = torch.zeros(args.max_channel).cuda()
25 | total_CD2 = torch.zeros(args.max_channel).cuda()
26 | total_CD_num1 = torch.zeros(args.max_channel).cuda()
27 | total_CD_num2 = torch.zeros(args.max_channel).cuda()
28 | total_intersect = torch.zeros(args.max_channel).cuda()
29 | total_union = torch.zeros(args.max_channel).cuda()
30 | AP_matrix = torch.zeros((args.max_channel, len(THRESHOLDS))).cuda()#3 3
31 | AP_count_matrix = torch.zeros((args.max_channel, len(THRESHOLDS))).cuda()
32 |
33 | print('running eval...')
34 | for pred_map, confidence_level, gt_map in tqdm.tqdm(data_loader):
35 | # iou
36 | pred_map = pred_map.cuda()
37 | confidence_level = confidence_level.cuda()
38 | gt_map = gt_map.cuda()
39 |
40 | intersect, union = get_batch_iou(pred_map, gt_map)
41 | CD1, CD2, num1, num2 = semantic_mask_chamfer_dist_cum(pred_map, gt_map, args.xbound[2], args.ybound[2], threshold=args.CD_threshold)#5
42 |
43 | instance_mask_AP(AP_matrix, AP_count_matrix, pred_map, gt_map, args.xbound[2], args.ybound[2],
44 | confidence_level, THRESHOLDS, sampled_recalls=SAMPLED_RECALLS)
45 |
46 | total_intersect += intersect.cuda()
47 | total_union += union.cuda()
48 | total_CD1 += CD1
49 | total_CD2 += CD2
50 | total_CD_num1 += num1
51 | total_CD_num2 += num2
52 |
53 | CD_pred = total_CD1 / total_CD_num1
54 | CD_label = total_CD2 / total_CD_num2
55 | CD = (total_CD1 + total_CD2) / (total_CD_num1 + total_CD_num2)
56 | CD_pred[CD_pred > args.CD_threshold] = args.CD_threshold
57 | CD_label[CD_label > args.CD_threshold] = args.CD_threshold
58 | CD[CD > args.CD_threshold] = args.CD_threshold
59 | return {
60 | 'iou': total_intersect / total_union,
61 | 'CD_pred': CD_pred,
62 | 'CD_label': CD_label,
63 | 'CD': CD,
64 | 'Average_precision': AP_matrix / AP_count_matrix,
65 | }
66 |
67 |
68 | if __name__ == '__main__':
69 | import argparse
70 | parser = argparse.ArgumentParser(description='Evaluate nuScenes local HD Map Construction Results.')
71 | parser.add_argument('--result_path', type=str)
72 | parser.add_argument('--dataroot', type=str, default='dataset/nuScenes/')
73 | parser.add_argument('--bsz', type=int, default=4)
74 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini'])
75 | parser.add_argument('--eval_set', type=str, default='mini_val', choices=['train', 'val', 'test', 'mini_train', 'mini_val'])
76 | parser.add_argument('--thickness', type=int, default=5)
77 | parser.add_argument('--max_channel', type=int, default=3)
78 | parser.add_argument('--CD_threshold', type=int, default=5)
79 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
80 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
81 |
82 | args = parser.parse_args()
83 |
84 | print(get_val_info(args))
85 |
--------------------------------------------------------------------------------
/export_gt_to_json.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from tqdm import tqdm
4 |
5 | from nuscenes import NuScenes
6 | from nuscenes.utils.splits import create_splits_scenes
7 |
8 | from data.vector_map import VectorizedLocalMap
9 | from data.rasterize import rasterize_map
10 |
11 | import mmcv
12 |
13 |
14 | def main(args):
15 | patch_h = args.ybound[1] - args.ybound[0]
16 | patch_w = args.xbound[1] - args.xbound[0]
17 | canvas_h = int(patch_h / args.ybound[2])
18 | canvas_w = int(patch_w / args.xbound[2])
19 | patch_size = (patch_h, patch_w)
20 | canvas_size = (canvas_h, canvas_w)
21 |
22 | submission = {
23 | "meta": {
24 | "use_camera": True,
25 | "use_lidar": False,
26 | "use_radar": False,
27 | "use_external": False,
28 | "vector": not args.raster
29 | },
30 | "results": {}
31 | }
32 |
33 | scenes = create_splits_scenes()[args.eval_set]
34 | nusc = NuScenes(version=args.version, dataroot=args.dataroot, verbose=False)
35 | vector_map = VectorizedLocalMap(args.dataroot, patch_size=patch_size, canvas_size=canvas_size)
36 |
37 | samples = [samp for samp in nusc.sample if nusc.get('scene', samp['scene_token'])['name'] in scenes]
38 | for rec in tqdm(samples):
39 | location = nusc.get('log', nusc.get('scene', rec['scene_token'])['log_token'])['location']
40 | ego_pose = nusc.get('ego_pose', nusc.get('sample_data', rec['data']['LIDAR_TOP'])['ego_pose_token'])
41 | vectors = vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation'])
42 | if args.raster:
43 | pred_map, confidence_level = rasterize_map(vectors, patch_size, canvas_size, args.max_channel, args.thickness)
44 | submission['results'][rec['token']] = {
45 | 'map': pred_map,
46 | 'confidence_level': confidence_level
47 | }
48 | else:
49 | for vector in vectors:
50 | vector['confidence_level'] = 1
51 | vector['pts'] = vector['pts'].tolist()
52 | submission['results'][rec['token']] = vectors
53 |
54 | with open(args.output, 'w') as f:
55 | json.dump(submission, f)
56 | print(f"exported to {args.output}")
57 |
58 |
59 | if __name__ == '__main__':
60 | import argparse
61 | parser = argparse.ArgumentParser(description='Demo Script to turn GT into json submission.')
62 | parser.add_argument('--dataroot', type=str, default='dataset/nuScenes/')
63 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini'])
64 | parser.add_argument('--eval_set', type=str, default='mini_val', choices=['train', 'val', 'test', 'mini_train', 'mini_val'])
65 | parser.add_argument('--output', type=str, default='submission.json')
66 | parser.add_argument('--thickness', type=int, default=1)
67 | parser.add_argument('--max_channel', type=int, default=3)
68 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
69 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
70 | parser.add_argument('--raster', action='store_true', default=False)
71 | args = parser.parse_args()
72 |
73 | main(args)
74 |
--------------------------------------------------------------------------------
/export_pred_to_json.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import mmcv
3 | import tqdm
4 | import torch
5 |
6 | from data.dataset import semantic_dataset
7 | from data.const import NUM_CLASSES
8 | from model import get_model
9 | from postprocess.vectorize import vectorize
10 |
11 |
12 | def gen_dx_bx(xbound, ybound):
13 | dx = [row[2] for row in [xbound, ybound]]
14 | bx = [row[0] + row[2] / 2.0 for row in [xbound, ybound]]
15 | nx = [(row[1] - row[0]) / row[2] for row in [xbound, ybound]]
16 | return dx, bx, nx
17 |
18 | def export_to_json(model, val_loader, angle_class, args):
19 | submission = {
20 | "meta": {
21 | "use_camera": True,
22 | "use_lidar": False,
23 | "use_radar": False,
24 | "use_external": False,
25 | "vector": True,
26 | },
27 | "results": {}
28 | }
29 |
30 | dx, bx, nx = gen_dx_bx(args.xbound, args.ybound)
31 |
32 | model.eval()
33 | with torch.no_grad():
34 | for batchi, (imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll, segmentation_gt, instance_gt, direction_gt) in enumerate(tqdm.tqdm(val_loader)):
35 | segmentation, embedding, direction = model(imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
36 | post_trans.cuda(), post_rots.cuda(), lidar_data.cuda(),
37 | lidar_mask.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda())
38 |
39 | for si in range(segmentation.shape[0]):
40 | coords, confidences, line_types = vectorize(segmentation[si], embedding[si], direction[si], angle_class)
41 | vectors = []
42 | for coord, confidence, line_type in zip(coords, confidences, line_types):
43 | vector = {'pts': coord * dx + bx, 'pts_num': len(coord), "type": line_type, "confidence_level": confidence}
44 | vectors.append(vector)
45 | rec = val_loader.dataset.samples[batchi * val_loader.batch_size + si]
46 | submission['results'][rec['token']] = vectors
47 |
48 | mmcv.dump(submission, args.output)
49 |
50 |
51 | def main(args):
52 | data_conf = {
53 | 'num_channels': NUM_CLASSES + 1,
54 | 'image_size': args.image_size,
55 | 'xbound': args.xbound,
56 | 'ybound': args.ybound,
57 | 'zbound': args.zbound,
58 | 'dbound': args.dbound,
59 | 'thickness': args.thickness,
60 | 'angle_class': args.angle_class,
61 | }
62 |
63 | train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)
64 | model = get_model(args.model, data_conf, True, args.embedding_dim, True, args.angle_class)
65 | model.load_state_dict(torch.load(args.modelf), strict=False)
66 | model.cuda()
67 | export_to_json(model, val_loader, args.angle_class, args)
68 |
69 |
70 | if __name__ == '__main__':
71 | parser = argparse.ArgumentParser()
72 | # nuScenes config
73 | parser.add_argument('--dataroot', type=str, default='dataset/nuScenes/')
74 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini'])
75 |
76 | # model config
77 | parser.add_argument("--model", type=str, default='HDMapNet_cam')
78 |
79 | # training config
80 | parser.add_argument("--bsz", type=int, default=4)
81 | parser.add_argument("--nworkers", type=int, default=10)
82 |
83 | parser.add_argument('--modelf', type=str, default=None)
84 |
85 | # data config
86 | parser.add_argument("--thickness", type=int, default=5)
87 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352])
88 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
89 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
90 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0])
91 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0])
92 |
93 | # embedding config
94 | parser.add_argument("--embedding_dim", type=int, default=16)
95 |
96 | # direction config
97 | parser.add_argument('--angle_class', type=int, default=36)
98 |
99 | # output
100 | parser.add_argument("--output", type=str, default='output.json')
101 |
102 | args = parser.parse_args()
103 | main(args)
104 |
--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import numpy as np
5 |
6 | class FocalLoss(nn.Module):
7 | def __init__(self, alpha=1, gamma=2, reduce='mean'):
8 | super(FocalLoss, self).__init__()
9 | self.alpha = alpha
10 | self.gamma = gamma
11 | self.reduce = reduce
12 |
13 | def forward(self, inputs, targets):
14 | BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
15 | pt = torch.exp(-BCE_loss)
16 | F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss
17 |
18 | if self.reduce == 'mean':
19 | return torch.mean(F_loss)
20 | elif self.reduce == 'sum':
21 | return torch.sum(F_loss)
22 | else:
23 | raise NotImplementedError
24 |
25 |
26 | class SimpleLoss(torch.nn.Module):
27 | def __init__(self, pos_weight):
28 | super(SimpleLoss, self).__init__()
29 | self.loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([pos_weight]))#内部含有sigmod
30 |
31 | def forward(self, ypred, ytgt):
32 | loss = self.loss_fn(ypred, ytgt)
33 | return loss
34 |
35 |
36 | class DiscriminativeLoss(nn.Module):
37 | def __init__(self, embed_dim, delta_v, delta_d):
38 | super(DiscriminativeLoss, self).__init__()
39 | self.embed_dim = embed_dim
40 | self.delta_v = delta_v
41 | self.delta_d = delta_d
42 |
43 | def forward(self, embedding, seg_gt):
44 | if embedding is None:
45 | return 0, 0, 0
46 | bs = embedding.shape[0]#4
47 |
48 | var_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device)
49 | dist_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device)
50 | reg_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device)
51 |
52 | for b in range(bs):
53 | embedding_b = embedding[b] # (embed_dim, H, W) 16 200 400 预测16个对象位置
54 | seg_gt_b = seg_gt[b]# 200 400
55 |
56 | labels = torch.unique(seg_gt_b)#挑出所有不重复数字1 2 ...,所有对象
57 | labels = labels[labels != 0]
58 | num_lanes = len(labels)
59 | if num_lanes == 0:
60 | # please refer to issue here: https://github.com/harryhan618/LaneNet/issues/12
61 | _nonsense = embedding.sum()
62 | _zero = torch.zeros_like(_nonsense)
63 | var_loss = var_loss + _nonsense * _zero
64 | dist_loss = dist_loss + _nonsense * _zero
65 | reg_loss = reg_loss + _nonsense * _zero
66 | continue
67 |
68 | centroid_mean = []
69 | for lane_idx in labels:#每个类别数字
70 | seg_mask_i = (seg_gt_b == lane_idx)#取出其中一个类别的数字
71 | if not seg_mask_i.any():
72 | continue
73 | embedding_i = embedding_b[:, seg_mask_i]
74 |
75 | mean_i = torch.mean(embedding_i, dim=1)
76 | centroid_mean.append(mean_i)
77 |
78 | # ---------- var_loss -------------
79 | var_loss = var_loss + torch.mean(F.relu(torch.norm(embedding_i-mean_i.reshape(self.embed_dim, 1), dim=0) - self.delta_v) ** 2) / num_lanes
80 | centroid_mean = torch.stack(centroid_mean) # (n_lane, embed_dim)
81 |
82 | if num_lanes > 1:
83 | centroid_mean1 = centroid_mean.reshape(-1, 1, self.embed_dim)
84 | centroid_mean2 = centroid_mean.reshape(1, -1, self.embed_dim)
85 | dist = torch.norm(centroid_mean1-centroid_mean2, dim=2) # shape (num_lanes, num_lanes)
86 | dist = dist + torch.eye(num_lanes, dtype=dist.dtype, device=dist.device) * self.delta_d # diagonal elements are 0, now mask above delta_d
87 |
88 | # divided by two for double calculated loss above, for implementation convenience
89 | dist_loss = dist_loss + torch.sum(F.relu(-dist + self.delta_d)**2) / (num_lanes * (num_lanes-1)) / 2
90 |
91 | # reg_loss is not used in original paper
92 | # reg_loss = reg_loss + torch.mean(torch.norm(centroid_mean, dim=1))
93 |
94 | var_loss = var_loss / bs
95 | dist_loss = dist_loss / bs
96 | reg_loss = reg_loss / bs
97 | return var_loss, dist_loss, reg_loss
98 |
99 |
100 | def calc_loss():
101 | pass
102 |
103 | xmin, xmax = -5, 5#-30 30
104 | xx_l = xmax- xmin
105 | num_x = 32
106 | zmin, zmax = 3, 29
107 | zz_l = zmax - zmin
108 | num_z = 88
109 | #h = 0.5
110 | def plane_grid_cam(trans, rots,h):
111 |
112 | B,N,_ = trans.size()#4 6 3
113 |
114 |
115 | # y = torch.linspace(xmin, xmax, num_x, dtype=torch.double).cuda()
116 | # x = torch.linspace(ymin, ymax, num_y, dtype=torch.double).cuda()
117 | x = torch.linspace(xmin, xmax, num_x)
118 | z = torch.linspace(zmin, zmax, num_z)
119 |
120 | x = x.cuda()
121 | z = z.cuda()
122 |
123 | x, z = torch.meshgrid(x, z)
124 |
125 | x = x.flatten()#1 3200
126 | z = z.flatten()
127 | x = x.unsqueeze(0).repeat(B*N, 1)#24 3200
128 | z = z.unsqueeze(0).repeat(B*N, 1)
129 |
130 | y = torch.ones_like(x)
131 | y= y*h
132 | d = torch.ones_like(x)
133 | y = y.cuda()
134 | d = d.cuda()
135 |
136 | coords = torch.stack([x, y, z,d], axis=1)# 24 3 3200
137 | Rs = torch.eye(4, device=rots.device).repeat(B, N, 1, 1)#转为车辆坐标系
138 | Rs[:, :, :3, :3] = rots
139 | Ts = torch.eye(4, device=trans.device).repeat(B, N, 1, 1)
140 | Ts[:, :, :3, 3] = trans
141 | RTs = Rs @ Ts#b 6 4 4
142 | RTs = RTs.view(-1,4,4)
143 | # rots = rots.view(-1,3,3)
144 | # ego_coords = rots @ coords#24 4 4 * 24 4 3200 = 24 4 3200
145 | # trans = trans.view(-1,3,1)
146 | # ego_coords += trans
147 | #RTs = RTs.cuda()
148 | ego_coords = RTs @ coords
149 | ego_coords = torch.stack([ego_coords[:, 0], ego_coords[:, 1]], axis=1)# 24 2 3200
150 | ego_coords = ego_coords.view(B*N,2,num_x,num_z)
151 | ego_coords = ego_coords.view(B, N, 2, num_x, num_z)
152 | ego_coords = ego_coords.permute(0, 1, 3, 4, 2).contiguous()#4 6 40 80 2
153 | return ego_coords
154 |
155 | def get_cam_gt(img,ego_coords):#传入真值
156 | _, img_c , img_h, img_w= img.shape#4 5 200 400
157 | img = img.permute(0,2,3,1).contiguous()#4 200 400 5
158 | B, N, pix_h, pix_w, pix_c = ego_coords.shape#4 6 40 80 2
159 | out_shape = (B, N, pix_h, pix_w, img_c)
160 |
161 | pix_x, pix_y = torch.split(ego_coords, 1, dim=-1) # [B, n,pix_h, pix_w,1]
162 | pix_x = pix_x / 0.15 + 200
163 | pix_y = pix_y / 0.15 + 100
164 | # Rounding
165 | pix_x0 = torch.floor(pix_x)
166 | pix_x1 = pix_x0 + 1
167 | pix_y0 = torch.floor(pix_y)
168 | pix_y1 = pix_y0 + 1
169 |
170 | # Clip within image boundary
171 | y_max = (img_h - 1)
172 | x_max = (img_w - 1)
173 |
174 | pix_x0 = torch.clip(pix_x0, 0, x_max)
175 | pix_y0 = torch.clip(pix_y0, 0, y_max)
176 | pix_x1 = torch.clip(pix_x1, 0, x_max)
177 | pix_y1 = torch.clip(pix_y1, 0, y_max) #
178 |
179 | # Weights [B, n,pix_h, pix_w, 1]
180 | wt_x0 = pix_x1 - pix_x
181 | wt_x1 = pix_x - pix_x0
182 | wt_y0 = pix_y1 - pix_y
183 | wt_y1 = pix_y - pix_y0
184 |
185 | # indices in the image to sample from
186 | dim = img_w
187 |
188 | # Apply the lower and upper bound pix coord
189 | base_y0 = pix_y0 * dim
190 | base_y1 = pix_y1 * dim
191 |
192 | # 4 corner vert ices
193 | idx00 = (pix_x0 + base_y0).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long()
194 | idx01 = (pix_x0 + base_y1).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long()
195 | idx10 = (pix_x1 + base_y0).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long()
196 | idx11 = (pix_x1 + base_y1).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long()
197 |
198 | # Gather pixels from image using vertices
199 |
200 | imgs_flat = img.reshape([B, -1, img_c]).unsqueeze(1)
201 | imgs_flat = imgs_flat.repeat(1,N,1,1)
202 | #imgs_flat = imgs_flat.cuda()
203 | im00 = torch.gather(imgs_flat, 2, idx00).reshape(out_shape)
204 | im01 = torch.gather(imgs_flat, 2, idx01).reshape(out_shape)
205 | im10 = torch.gather(imgs_flat, 2, idx10).reshape(out_shape)
206 | im11 = torch.gather(imgs_flat, 2, idx11).reshape(out_shape)
207 |
208 | # Apply weights [pix_h, pix_w, 1]
209 | w00 = wt_x0 * wt_y0
210 | w01 = wt_x0 * wt_y1
211 | w10 = wt_x1 * wt_y0
212 | w11 = wt_x1 * wt_y1
213 | gt = w00 * im00 + w01 * im01 + w10 * im10 + w11 * im11
214 | return gt#b n h w 5
215 |
216 |
217 | def view_loss(gt,trans,rots,h=0.5):
218 |
219 | ego_coords = plane_grid_cam(trans, rots,h)#4 6 2 40 80
220 | cam_gt = get_cam_gt(gt,ego_coords)#b n h w 5
221 | cam_gt = cam_gt.permute(0,1,4,2,3).contiguous()
222 | b,n,c,h,w = cam_gt.size()
223 | cam_gt = cam_gt.view(-1, c, h, w)
224 | return cam_gt
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
--------------------------------------------------------------------------------
/model/HDMapNet/base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from efficientnet_pytorch import EfficientNet
5 | from torchvision.models.resnet import resnet18
6 |
7 |
8 | class Up(nn.Module):
9 | def __init__(self, in_channels, out_channels, scale_factor=2):
10 | super().__init__()
11 |
12 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear',
13 | align_corners=True)
14 |
15 | self.conv = nn.Sequential(
16 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
17 | nn.BatchNorm2d(out_channels),
18 | nn.ReLU(inplace=True),
19 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
20 | nn.BatchNorm2d(out_channels),
21 | nn.ReLU(inplace=True)
22 | )
23 |
24 | def forward(self, x1, x2):
25 | x1 = self.up(x1)
26 | x1 = torch.cat([x2, x1], dim=1)
27 | return self.conv(x1)
28 |
29 |
30 | class CamEncode(nn.Module):
31 | def __init__(self, C):
32 | super(CamEncode, self).__init__()
33 | self.C = C
34 |
35 | self.trunk = EfficientNet.from_pretrained("efficientnet-b0")
36 | self.up1 = Up(320+112, self.C)
37 |
38 | def get_eff_depth(self, x):
39 | # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231
40 | endpoints = dict()
41 |
42 | # Stem
43 | x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x)))
44 | prev_x = x
45 |
46 | # Blocks
47 | for idx, block in enumerate(self.trunk._blocks):
48 | drop_connect_rate = self.trunk._global_params.drop_connect_rate
49 | if drop_connect_rate:
50 | drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate
51 | x = block(x, drop_connect_rate=drop_connect_rate)
52 | if prev_x.size(2) > x.size(2):
53 | endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x
54 | prev_x = x
55 |
56 | # Head
57 | endpoints['reduction_{}'.format(len(endpoints)+1)] = x
58 | x = self.up1(endpoints['reduction_5'], endpoints['reduction_4'])
59 | return x
60 |
61 | def forward(self, x):
62 | return self.get_eff_depth(x)
63 |
64 |
65 | class BevEncode(nn.Module):
66 | def __init__(self, inC, outC, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=37):
67 | super(BevEncode, self).__init__()
68 | trunk = resnet18(pretrained=False, zero_init_residual=True)
69 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3,
70 | bias=False)
71 | self.bn1 = trunk.bn1
72 | self.relu = trunk.relu
73 |
74 | self.layer1 = trunk.layer1
75 | self.layer2 = trunk.layer2
76 | self.layer3 = trunk.layer3
77 |
78 | self.up1 = Up(64 + 256, 256, scale_factor=4)
79 | self.up2 = nn.Sequential(
80 | nn.Upsample(scale_factor=2, mode='bilinear',
81 | align_corners=True),
82 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
83 | nn.BatchNorm2d(128),
84 | nn.ReLU(inplace=True),
85 | nn.Conv2d(128, outC, kernel_size=1, padding=0),
86 | )
87 |
88 | self.instance_seg = instance_seg
89 | if instance_seg:
90 | self.up1_embedded = Up(64 + 256, 256, scale_factor=4)
91 | self.up2_embedded = nn.Sequential(
92 | nn.Upsample(scale_factor=2, mode='bilinear',
93 | align_corners=True),
94 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
95 | nn.BatchNorm2d(128),
96 | nn.ReLU(inplace=True),
97 | nn.Conv2d(128, embedded_dim, kernel_size=1, padding=0),
98 | )
99 |
100 | self.direction_pred = direction_pred
101 | if direction_pred:
102 | self.up1_direction = Up(64 + 256, 256, scale_factor=4)
103 | self.up2_direction = nn.Sequential(
104 | nn.Upsample(scale_factor=2, mode='bilinear',
105 | align_corners=True),
106 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
107 | nn.BatchNorm2d(128),
108 | nn.ReLU(inplace=True),
109 | nn.Conv2d(128, direction_dim, kernel_size=1, padding=0),
110 | )
111 |
112 | def forward(self, x):
113 | x = self.conv1(x)
114 | x = self.bn1(x)
115 | x = self.relu(x)
116 |
117 | x1 = self.layer1(x)
118 | x = self.layer2(x1)
119 | x2 = self.layer3(x)
120 |
121 | x = self.up1(x2, x1)
122 | x = self.up2(x)
123 |
124 | if self.instance_seg:
125 | x_embedded = self.up1_embedded(x2, x1)
126 | x_embedded = self.up2_embedded(x_embedded)
127 | else:
128 | x_embedded = None
129 |
130 | if self.direction_pred:
131 | x_direction = self.up1_embedded(x2, x1)
132 | x_direction = self.up2_direction(x_direction)
133 | else:
134 | x_direction = None
135 |
136 | return x, x_embedded, x_direction
137 |
--------------------------------------------------------------------------------
/model/HDMapNet/hdmapnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from .homography import bilinear_sampler, IPM
5 | from .utils import plane_grid_2d, get_rot_2d, cam_to_pixel
6 | from .pointpillar import PointPillarEncoder
7 | from .base import CamEncode, BevEncode
8 | from data.utils import gen_dx_bx
9 |
10 |
11 | class ViewTransformation(nn.Module):
12 | def __init__(self, fv_size, bv_size, n_views=6):
13 | super(ViewTransformation, self).__init__()
14 | self.n_views = n_views
15 | self.hw_mat = []
16 | self.bv_size = bv_size
17 | fv_dim = fv_size[0] * fv_size[1]
18 | bv_dim = bv_size[0] * bv_size[1]
19 | for i in range(self.n_views):
20 | fc_transform = nn.Sequential(
21 | nn.Linear(fv_dim, bv_dim),
22 | nn.ReLU(),
23 | nn.Linear(bv_dim, bv_dim),
24 | nn.ReLU()
25 | )
26 | self.hw_mat.append(fc_transform)
27 | self.hw_mat = nn.ModuleList(self.hw_mat)
28 |
29 | def forward(self, feat):
30 | B, N, C, H, W = feat.shape
31 | feat = feat.view(B, N, C, H*W)
32 | outputs = []
33 | for i in range(N):
34 | output = self.hw_mat[i](feat[:, i]).view(B, C, self.bv_size[0], self.bv_size[1])
35 | outputs.append(output)
36 | outputs = torch.stack(outputs, 1)
37 | return outputs
38 |
39 |
40 | class HDMapNet(nn.Module):
41 | def __init__(self, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=36, lidar=False):
42 | super(HDMapNet, self).__init__()
43 | self.camC = 64
44 | self.downsample = 16
45 |
46 | dx, bx, nx = gen_dx_bx(data_conf['xbound'], data_conf['ybound'], data_conf['zbound'])
47 | #nx = [400,200,1]
48 | final_H, final_W = nx[1].item(), nx[0].item()
49 |
50 | self.camencode = CamEncode(self.camC)#初始化对象
51 | fv_size = (data_conf['image_size'][0]//self.downsample, data_conf['image_size'][1]//self.downsample)#下采样后图片大小
52 | bv_size = (final_H//5, final_W//5)#鸟瞰图大小?
53 | self.view_fusion = ViewTransformation(fv_size=fv_size, bv_size=bv_size)#初始化变换对象
54 |
55 | res_x = bv_size[1] * 3 // 4
56 | ipm_xbound = [-res_x, res_x, 4*res_x/final_W]
57 | ipm_ybound = [-res_x/2, res_x/2, 2*res_x/final_H]
58 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=6, C=self.camC, extrinsic=True)
59 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
60 | # self.up_sampler = nn.Upsample(scale_factor=5, mode='bilinear', align_corners=True)
61 |
62 | self.lidar = lidar
63 | if lidar:
64 | self.pp = PointPillarEncoder(128, data_conf['xbound'], data_conf['ybound'], data_conf['zbound'])
65 | self.bevencode = BevEncode(inC=self.camC+128, outC=data_conf['num_channels'], instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=direction_dim+1)
66 | else:
67 | self.bevencode = BevEncode(inC=self.camC, outC=data_conf['num_channels'], instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=direction_dim+1)
68 |
69 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans):
70 | B, N, _, _ = intrins.shape
71 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
72 |
73 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
74 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous()
75 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
76 | Ts[:, :, :3, 3] = -trans
77 | RTs = Rs @ Ts
78 |
79 | post_RTs = None
80 |
81 | return Ks, RTs, post_RTs
82 |
83 | def get_cam_feats(self, x):
84 | B, N, C, imH, imW = x.shape
85 | x = x.view(B*N, C, imH, imW)
86 | x = self.camencode(x)
87 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample)
88 | return x
89 |
90 | def forward(self, img, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll):
91 | x = self.get_cam_feats(img)
92 | x = self.view_fusion(x)
93 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans)
94 | topdown = self.ipm(x, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs)
95 | topdown = self.up_sampler(topdown)
96 | if self.lidar:
97 | lidar_feature = self.pp(lidar_data, lidar_mask)
98 | topdown = torch.cat([topdown, lidar_feature], dim=1)
99 | return self.bevencode(topdown)
100 |
--------------------------------------------------------------------------------
/model/HDMapNet/homography.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import cv2
5 |
6 | CAM_FL = 0
7 | CAM_F = 1
8 | CAM_FR = 2
9 | CAM_BL = 3
10 | CAM_B = 4
11 | CAM_BR = 5
12 |
13 |
14 | # =========================================================
15 | # Projections
16 | # =========================================================
17 | def rotation_from_euler(rolls, pitchs, yaws, cuda=True):
18 | """
19 | Get rotation matrix
20 | Args:
21 | roll, pitch, yaw: In degrees
22 |
23 | Returns:
24 | R: [B, 4, 4]
25 | """
26 | B = len(rolls)
27 |
28 | si, sj, sk = torch.sin(rolls), torch.sin(pitchs), torch.sin(yaws)
29 | ci, cj, ck = torch.cos(rolls), torch.cos(pitchs), torch.cos(yaws)
30 | cc, cs = ci * ck, ci * sk
31 | sc, ss = si * ck, si * sk
32 |
33 | R = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)
34 | if cuda:
35 | R = R.cuda()
36 | R[:, 0, 0] = cj * ck
37 | R[:, 0, 1] = sj * sc - cs
38 | R[:, 0, 2] = sj * cc + ss
39 | R[:, 1, 0] = cj * sk
40 | R[:, 1, 1] = sj * ss + cc
41 | R[:, 1, 2] = sj * cs - sc
42 | R[:, 2, 0] = -sj
43 | R[:, 2, 1] = cj * si
44 | R[:, 2, 2] = cj * ci
45 | return R
46 |
47 |
48 | def perspective(cam_coords, proj_mat, h, w, extrinsic, offset=None):
49 | """
50 | P = proj_mat @ (x, y, z, 1)
51 | Project cam2pixel
52 |
53 | Args:
54 | cam_coords: [B, 4, npoints]
55 | proj_mat: [B, 4, 4]
56 |
57 | Returns:
58 | pix coords: [B, h, w, 2]
59 | """
60 | eps = 1e-7
61 | pix_coords = proj_mat @ cam_coords
62 |
63 | N, _, _ = pix_coords.shape
64 |
65 | if extrinsic:
66 | pix_coords[:, 0] += offset[0] / 2
67 | pix_coords[:, 2] -= offset[1] / 8
68 | pix_coords = torch.stack([pix_coords[:, 2], pix_coords[:, 0]], axis=1)
69 | else:
70 | pix_coords = pix_coords[:, :2, :] / (pix_coords[:, 2, :][:, None, :] + eps)
71 | pix_coords = pix_coords.view(N, 2, h, w)
72 | pix_coords = pix_coords.permute(0, 2, 3, 1).contiguous()
73 | return pix_coords
74 |
75 |
76 | def bilinear_sampler(imgs, pix_coords):
77 | """
78 | Construct a new image by bilinear sampling from the input image.
79 | Args:
80 | imgs: [B, H, W, C]
81 | pix_coords: [B, h, w, 2]
82 | :return:
83 | sampled image [B, h, w, c]
84 | """
85 | B, img_h, img_w, img_c = imgs.shape
86 | B, pix_h, pix_w, pix_c = pix_coords.shape
87 | out_shape = (B, pix_h, pix_w, img_c)
88 |
89 | pix_x, pix_y = torch.split(pix_coords, 1, dim=-1) # [B, pix_h, pix_w, 1]
90 |
91 | # Rounding
92 | pix_x0 = torch.floor(pix_x)
93 | pix_x1 = pix_x0 + 1
94 | pix_y0 = torch.floor(pix_y)
95 | pix_y1 = pix_y0 + 1
96 |
97 | # Clip within image boundary
98 | y_max = (img_h - 1)
99 | x_max = (img_w - 1)
100 |
101 | pix_x0 = torch.clip(pix_x0, 0, x_max)
102 | pix_y0 = torch.clip(pix_y0, 0, y_max)
103 | pix_x1 = torch.clip(pix_x1, 0, x_max)
104 | pix_y1 = torch.clip(pix_y1, 0, y_max)
105 |
106 | # Weights [B, pix_h, pix_w, 1]
107 | wt_x0 = pix_x1 - pix_x
108 | wt_x1 = pix_x - pix_x0
109 | wt_y0 = pix_y1 - pix_y
110 | wt_y1 = pix_y - pix_y0
111 |
112 | # indices in the image to sample from
113 | dim = img_w
114 |
115 | # Apply the lower and upper bound pix coord
116 | base_y0 = pix_y0 * dim
117 | base_y1 = pix_y1 * dim
118 |
119 | # 4 corner vert ices
120 | idx00 = (pix_x0 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long()
121 | idx01 = (pix_x0 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long()
122 | idx10 = (pix_x1 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long()
123 | idx11 = (pix_x1 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long()
124 |
125 | # Gather pixels from image using vertices
126 | imgs_flat = imgs.reshape([B, -1, img_c])
127 |
128 | im00 = torch.gather(imgs_flat, 1, idx00).reshape(out_shape)
129 | im01 = torch.gather(imgs_flat, 1, idx01).reshape(out_shape)
130 | im10 = torch.gather(imgs_flat, 1, idx10).reshape(out_shape)
131 | im11 = torch.gather(imgs_flat, 1, idx11).reshape(out_shape)
132 |
133 | # Apply weights [pix_h, pix_w, 1]
134 | w00 = wt_x0 * wt_y0
135 | w01 = wt_x0 * wt_y1
136 | w10 = wt_x1 * wt_y0
137 | w11 = wt_x1 * wt_y1
138 | output = w00 * im00 + w01 * im01 + w10 * im10 + w11 * im11
139 | return output
140 |
141 |
142 | def plane_grid(xbound, ybound, zs, yaws, rolls, pitchs, cuda=True):
143 | B = len(zs)
144 |
145 | xmin, xmax = xbound[0], xbound[1]
146 | num_x = int((xbound[1] - xbound[0]) / xbound[2])
147 | ymin, ymax = ybound[0], ybound[1]
148 | num_y = int((ybound[1] - ybound[0]) / ybound[2])
149 |
150 | # y = torch.linspace(xmin, xmax, num_x, dtype=torch.double).cuda()
151 | # x = torch.linspace(ymin, ymax, num_y, dtype=torch.double).cuda()
152 | y = torch.linspace(xmin, xmax, num_x)
153 | x = torch.linspace(ymin, ymax, num_y)
154 | if cuda:
155 | x = x.cuda()
156 | y = y.cuda()
157 |
158 | y, x = torch.meshgrid(x, y)
159 |
160 | x = x.flatten()
161 | y = y.flatten()
162 |
163 | x = x.unsqueeze(0).repeat(B, 1)
164 | y = y.unsqueeze(0).repeat(B, 1)
165 |
166 | # z = torch.ones_like(x, dtype=torch.double).cuda() * zs.view(-1, 1)
167 | # d = torch.ones_like(x, dtype=torch.double).cuda()
168 | z = torch.ones_like(x) * zs.view(-1, 1)
169 | d = torch.ones_like(x)
170 | if cuda:
171 | z = z.cuda()
172 | d = d.cuda()
173 |
174 | coords = torch.stack([x, y, z, d], axis=1)
175 |
176 | rotation_matrix = rotation_from_euler(pitchs, rolls, yaws, cuda)
177 |
178 | coords = rotation_matrix @ coords
179 | return coords
180 |
181 |
182 | def ipm_from_parameters(image, xyz, K, RT, target_h, target_w, extrinsic, post_RT=None):
183 | """
184 | :param image: [B, H, W, C]
185 | :param xyz: [B, 4, npoints]
186 | :param K: [B, 4, 4]
187 | :param RT: [B, 4, 4]
188 | :param target_h: int
189 | :param target_w: int
190 | :return: warped_images: [B, target_h, target_w, C]
191 | """
192 | P = K @ RT
193 | if post_RT is not None:
194 | P = post_RT @ P
195 | P = P.reshape(-1, 4, 4)
196 | pixel_coords = perspective(xyz, P, target_h, target_w, extrinsic, image.shape[1:3])
197 | image2 = bilinear_sampler(image, pixel_coords)
198 | image2 = image2.type_as(image)
199 | return image2
200 |
201 |
202 | class PlaneEstimationModule(nn.Module):
203 | def __init__(self, N, C):
204 | super(PlaneEstimationModule, self).__init__()
205 | self.max_pool = nn.AdaptiveMaxPool2d(1)
206 | self.linear = nn.Linear(N*C, 3)
207 |
208 | self.linear.weight.data.fill_(0.)
209 | self.linear.bias.data.fill_(0.)
210 |
211 | def forward(self, x):
212 | B, N, C, H, W = x.shape
213 | x = x.view(B*N, C, H, W)
214 | x = self.max_pool(x)
215 | x = x.view(B, N*C)
216 | x = self.linear(x)
217 | z, pitch, roll = x[:, 0], x[:, 1], x[:, 2]
218 | return z, pitch, roll
219 |
220 |
221 | class IPM(nn.Module):
222 | def __init__(self, xbound, ybound, N, C, z_roll_pitch=False, visual=False, extrinsic=False, cuda=True):
223 | super(IPM, self).__init__()
224 | self.visual = visual
225 | self.z_roll_pitch = z_roll_pitch
226 | self.xbound = xbound
227 | self.ybound = ybound
228 | self.extrinsic = extrinsic
229 | self.w = int((xbound[1] - xbound[0]) / xbound[2])
230 | self.h = int((ybound[1] - ybound[0]) / ybound[2])
231 |
232 | if z_roll_pitch:
233 | self.plane_esti = PlaneEstimationModule(N, C)
234 | else:
235 | zs = torch.tensor([0.]).cuda()
236 | yaws = torch.tensor([0.]).cuda()
237 | rolls = torch.tensor([0.]).cuda()
238 | pitchs = torch.tensor([0.]).cuda()
239 | self.planes = plane_grid(self.xbound, self.ybound, zs, yaws, rolls, pitchs)[0]
240 |
241 | tri_mask = np.zeros((self.h, self.w))
242 | vertices = np.array([[0, 0], [0, self.h], [self.w, self.h]], np.int32)
243 | pts = vertices.reshape((-1, 1, 2))
244 | cv2.fillPoly(tri_mask, [pts], color=1.)
245 | self.tri_mask = torch.tensor(tri_mask[None, :, :, None])
246 | self.flipped_tri_mask = torch.flip(self.tri_mask, [2]).bool()
247 | if cuda:
248 | self.tri_mask = self.tri_mask.cuda()
249 | self.flipped_tri_mask = self.flipped_tri_mask.cuda()
250 | self.tri_mask = self.tri_mask.bool()
251 |
252 | def mask_warped(self, warped_fv_images):
253 | warped_fv_images[:, CAM_F, :, :self.w//2, :] *= 0 # CAM_FRONT
254 | warped_fv_images[:, CAM_FL] *= self.flipped_tri_mask # CAM_FRONT_LEFT
255 | warped_fv_images[:, CAM_FR] *= ~self.tri_mask # CAM_FRONT_RIGHT
256 | warped_fv_images[:, CAM_B, :, self.w//2:, :] *= 0 # CAM_BACK
257 | warped_fv_images[:, CAM_BL] *= self.tri_mask # CAM_BACK_LEFT
258 | warped_fv_images[:, CAM_BR] *= ~self.flipped_tri_mask # CAM_BACK_RIGHT
259 | return warped_fv_images
260 |
261 | def forward(self, images, Ks, RTs, translation, yaw_roll_pitch, post_RTs=None):
262 | images = images.permute(0, 1, 3, 4, 2).contiguous()
263 | B, N, H, W, C = images.shape
264 |
265 | if self.z_roll_pitch:
266 | # z, roll, pitch = self.plane_esti(images)
267 | zs = translation[:, 2]
268 | rolls = yaw_roll_pitch[:, 1]
269 | pitchs = yaw_roll_pitch[:, 2]
270 | # zs += z
271 | # rolls += roll
272 | # pitchs += pitch
273 | planes = plane_grid(self.xbound, self.ybound, zs, torch.zeros_like(rolls), rolls, pitchs)
274 | planes = planes.repeat(N, 1, 1)
275 | else:
276 | planes = self.planes
277 |
278 | images = images.reshape(B*N, H, W, C)
279 | warped_fv_images = ipm_from_parameters(images, planes, Ks, RTs, self.h, self.w, self.extrinsic, post_RTs)
280 | warped_fv_images = warped_fv_images.reshape((B, N, self.h, self.w, C))
281 | if self.visual:
282 | warped_fv_images = self.mask_warped(warped_fv_images)
283 |
284 | if self.visual:
285 | warped_topdown = warped_fv_images[:, CAM_F] + warped_fv_images[:, CAM_B] # CAM_FRONT + CAM_BACK
286 | warped_mask = warped_topdown == 0
287 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_FL][warped_mask] + warped_fv_images[:, CAM_FR][warped_mask]
288 | warped_mask = warped_topdown == 0
289 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_BL][warped_mask] + warped_fv_images[:, CAM_BR][warped_mask]
290 | return warped_topdown.permute(0, 3, 1, 2).contiguous()
291 | else:
292 | warped_topdown, _ = warped_fv_images.max(1)
293 | warped_topdown = warped_topdown.permute(0, 3, 1, 2).contiguous()
294 | warped_topdown = warped_topdown.view(B, C, self.h, self.w)
295 | return warped_topdown
296 |
297 |
298 |
--------------------------------------------------------------------------------
/model/HDMapNet/init:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/model/HDMapNet/ipm_net.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from .homography import IPM, bilinear_sampler
5 | from .utils import plane_grid_2d, get_rot_2d, cam_to_pixel
6 | from .base import CamEncode, BevEncode
7 |
8 |
9 | class IPMNet(nn.Module):
10 | def __init__(self, xbound, ybound, outC, camC=64, instance_seg=True, embedded_dim=16, cam_encoding=True, bev_encoding=True, z_roll_pitch=False):
11 | super(IPMNet, self).__init__()
12 | self.xbound = xbound
13 | self.ybound = ybound
14 | self.camC = camC
15 | self.downsample = 16
16 | if cam_encoding:
17 | self.ipm = IPM(xbound, ybound, N=6, C=camC, z_roll_pitch=z_roll_pitch, extrinsic=False)
18 | else:
19 | self.ipm = IPM(xbound, ybound, N=6, C=camC, visual=True, z_roll_pitch=z_roll_pitch, extrinsic=False)
20 | self.cam_encoding = cam_encoding
21 | if cam_encoding:
22 | self.camencode = CamEncode(camC)
23 | self.bev_encoding = bev_encoding
24 | if bev_encoding:
25 | self.bevencode = BevEncode(inC=camC, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim)
26 |
27 | def get_cam_feats(self, x):
28 | """Return B x N x D x H/downsample x W/downsample x C
29 | """
30 | B, N, C, imH, imW = x.shape
31 |
32 | x = x.view(B*N, C, imH, imW)
33 | x = self.camencode(x)
34 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample)
35 | return x
36 |
37 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans):
38 | B, N, _, _ = intrins.shape
39 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
40 | Ks[:, :, :3, :3] = intrins
41 |
42 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
43 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous()
44 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
45 | Ts[:, :, :3, 3] = -trans
46 | RTs = Rs @ Ts
47 |
48 | post_RTs = torch.eye(4, device=post_rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
49 | post_RTs[:, :, :3, :3] = post_rots
50 | post_RTs[:, :, :3, 3] = post_trans
51 |
52 | if self.cam_encoding:
53 | scale = torch.Tensor([
54 | [1/self.downsample, 0, 0, 0],
55 | [0, 1/self.downsample, 0, 0],
56 | [0, 0, 1, 0],
57 | [0, 0, 0, 1]
58 | ]).cuda()
59 | post_RTs = scale @ post_RTs
60 |
61 | return Ks, RTs, post_RTs
62 |
63 | def forward(self, points, points_mask, x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll):
64 | if self.cam_encoding:
65 | x = self.get_cam_feats(x)
66 |
67 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans)
68 | topdown = self.ipm(x, Ks, RTs, translation, yaw_pitch_roll, post_RTs)
69 |
70 | if self.bev_encoding:
71 | return self.bevencode(topdown)
72 | else:
73 | return topdown
74 |
75 |
76 | class TemporalIPMNet(IPMNet):
77 | def __init__(self, xbound, ybound, outC, camC=64, instance_seg=True, embedded_dim=16):
78 | super(IPMNet, self).__init__(xbound, ybound, outC, camC, instance_seg, embedded_dim)
79 |
80 | def get_cam_feats(self, x):
81 | """Return B x T x N x H/downsample x W/downsample x C
82 | """
83 | B, T, N, C, imH, imW = x.shape
84 |
85 | x = x.view(B*T*N, C, imH, imW)
86 | x = self.camencode(x)
87 | x = x.view(B, T, N, self.camC, imH//self.downsample, imW//self.downsample)
88 | return x
89 |
90 | def temporal_fusion(self, topdown, translation, yaw):
91 | B, T, C, H, W = topdown.shape
92 |
93 | if T == 1:
94 | return topdown[:, 0]
95 |
96 | grid = plane_grid_2d(self.xbound, self.ybound).view(1, 1, 2, H*W).repeat(B, T-1, 1, 1)
97 | rot0 = get_rot_2d(yaw[:, 1:])
98 | trans0 = translation[:, 1:, :2].view(B, T-1, 2, 1)
99 | rot1 = get_rot_2d(yaw[:, 0].view(B, 1).repeat(1, T-1))
100 | trans1 = translation[:, 0, :2].view(B, 1, 2, 1).repeat(1, T-1, 1, 1)
101 | grid = rot1.transpose(2, 3) @ grid
102 | grid = grid + trans1
103 | grid = grid - trans0
104 | grid = rot0 @ grid
105 | grid = grid.view(B*(T-1), 2, H, W).permute(0, 2, 3, 1).contiguous()
106 | grid = cam_to_pixel(grid, self.xbound, self.ybound)
107 | topdown = topdown.permute(0, 1, 3, 4, 2).contiguous()
108 | prev_topdown = topdown[:, 1:]
109 | warped_prev_topdown = bilinear_sampler(prev_topdown.reshape(B*(T-1), H, W, C), grid).view(B, T-1, H, W, C)
110 | topdown = torch.cat([topdown[:, 0].unsqueeze(1), warped_prev_topdown], axis=1)
111 | topdown = topdown.view(B, T, H, W, C)
112 | topdown = topdown.max(1)[0]
113 | topdown = topdown.permute(0, 3, 1, 2).contiguous()
114 | return topdown
115 |
116 | def forward(self, points, points_mask, x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll):
117 | x = self.get_cam_feats(x)
118 | B, T, N, C, h, w = x.shape
119 |
120 | x = x.view(B*T, N, C, h, w)
121 | intrins = intrins.view(B*T, N, 3, 3)
122 | rots = rots.view(B*T, N, 3, 3)
123 | trans = trans.view(B*T, N, 3)
124 | post_rots = post_rots.view(B*T, N, 3, 3)
125 | post_trans = post_trans.view(B*T, N, 3)
126 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans)
127 | topdown = self.ipm(x, Ks, RTs, translation, yaw_pitch_roll, post_RTs)
128 | _, C, H, W = topdown.shape
129 | topdown = topdown.view(B, T, C, H, W)
130 | topdown = self.temporal_fusion(topdown, translation, yaw_pitch_roll[..., 0])
131 | return self.bevencode(topdown)
132 |
--------------------------------------------------------------------------------
/model/HDMapNet/pointpillar.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch_scatter
4 |
5 | from .voxel import points_to_voxels
6 |
7 |
8 | class PillarBlock(nn.Module):
9 | def __init__(self, idims=64, dims=64, num_layers=1,
10 | stride=1):
11 | super(PillarBlock, self).__init__()
12 | layers = []
13 | self.idims = idims
14 | self.stride = stride
15 | for i in range(num_layers):
16 | layers.append(nn.Conv2d(self.idims, dims, 3, stride=self.stride,
17 | padding=1, bias=False))
18 | layers.append(nn.BatchNorm2d(dims))
19 | layers.append(nn.ReLU(inplace=True))
20 | self.idims = dims
21 | self.stride = 1
22 | self.layers = nn.Sequential(*layers)
23 |
24 | def forward(self, x):
25 | return self.layers(x)
26 |
27 |
28 | class PointNet(nn.Module):
29 | def __init__(self, idims=64, odims=64):
30 | super(PointNet, self).__init__()
31 | self.pointnet = nn.Sequential(
32 | nn.Conv1d(idims, odims, kernel_size=1, bias=False),
33 | nn.BatchNorm1d(odims),
34 | nn.ReLU(inplace=True)
35 | )
36 |
37 | def forward(self, points_feature, points_mask):
38 | batch_size, num_points, num_dims = points_feature.shape
39 | points_feature = points_feature.permute(0, 2, 1)
40 | mask = points_mask.view(batch_size, 1, num_points)
41 | return self.pointnet(points_feature) * mask
42 |
43 |
44 | class PointPillar(nn.Module):
45 | def __init__(self, C, xbound, ybound, zbound, embedded_dim=16, direction_dim=37):
46 | super(PointPillar, self).__init__()
47 | self.xbound = xbound
48 | self.ybound = ybound
49 | self.zbound = zbound
50 | self.embedded_dim = embedded_dim
51 | self.pn = PointNet(15, 64)
52 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1)
53 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2)
54 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2)
55 | self.up1 = nn.Sequential(
56 | nn.Conv2d(64, 64, 3, padding=1, bias=False),
57 | nn.BatchNorm2d(64),
58 | nn.ReLU(inplace=True)
59 | )
60 | self.up2 = nn.Sequential(
61 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
62 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False),
63 | nn.BatchNorm2d(128),
64 | nn.ReLU(inplace=True)
65 | )
66 | self.up3 = nn.Sequential(
67 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True),
68 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False),
69 | nn.BatchNorm2d(256),
70 | nn.ReLU(inplace=True)
71 | )
72 | self.conv_out = nn.Sequential(
73 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
74 | nn.BatchNorm2d(256),
75 | nn.ReLU(inplace=True),
76 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
77 | nn.BatchNorm2d(128),
78 | nn.ReLU(inplace=True),
79 | nn.Conv2d(128, C, 1),
80 | )
81 | self.instance_conv_out = nn.Sequential(
82 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
83 | nn.BatchNorm2d(256),
84 | nn.ReLU(inplace=True),
85 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
86 | nn.BatchNorm2d(128),
87 | nn.ReLU(inplace=True),
88 | nn.Conv2d(128, embedded_dim, 1),
89 | )
90 | self.direction_conv_out = nn.Sequential(
91 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
92 | nn.BatchNorm2d(256),
93 | nn.ReLU(inplace=True),
94 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
95 | nn.BatchNorm2d(128),
96 | nn.ReLU(inplace=True),
97 | nn.Conv2d(128, direction_dim, 1),
98 | )
99 |
100 | def forward(self, points, points_mask,
101 | x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll):
102 | points_xyz = points[:, :, :3]
103 | points_feature = points[:, :, 3:]
104 | voxels = points_to_voxels(
105 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound
106 | )
107 | points_feature = torch.cat(
108 | [points, # 5
109 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1
110 | voxels['local_points_xyz'], # 3
111 | voxels['point_centroids'], # 3
112 | points_xyz - voxels['voxel_centers'], # 3
113 | ], dim=-1
114 | )
115 | points_feature = self.pn(points_feature, voxels['points_mask'])
116 | voxel_feature = torch_scatter.scatter_mean(
117 | points_feature,
118 | torch.unsqueeze(voxels['voxel_indices'], dim=1),
119 | dim=2,
120 | dim_size=voxels['num_voxels'])
121 | batch_size = points.size(0)
122 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1])
123 | voxel_feature1 = self.block1(voxel_feature)
124 | voxel_feature2 = self.block2(voxel_feature1)
125 | voxel_feature3 = self.block3(voxel_feature2)
126 | voxel_feature1 = self.up1(voxel_feature1)
127 | voxel_feature2 = self.up2(voxel_feature2)
128 | voxel_feature3 = self.up3(voxel_feature3)
129 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1)
130 | return self.conv_out(voxel_feature).transpose(3, 2), self.instance_conv_out(voxel_feature).transpose(3, 2), self.direction_conv_out(voxel_feature).transpose(3, 2)
131 |
132 |
133 | class PointPillarEncoder(nn.Module):
134 | def __init__(self, C, xbound, ybound, zbound):
135 | super(PointPillarEncoder, self).__init__()
136 | self.xbound = xbound
137 | self.ybound = ybound
138 | self.zbound = zbound
139 | self.pn = PointNet(15, 64)
140 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1)
141 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2)
142 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2)
143 | self.up1 = nn.Sequential(
144 | nn.Conv2d(64, 64, 3, padding=1, bias=False),
145 | nn.BatchNorm2d(64),
146 | nn.ReLU(inplace=True)
147 | )
148 | self.up2 = nn.Sequential(
149 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
150 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False),
151 | nn.BatchNorm2d(128),
152 | nn.ReLU(inplace=True)
153 | )
154 | self.up3 = nn.Sequential(
155 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True),
156 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False),
157 | nn.BatchNorm2d(256),
158 | nn.ReLU(inplace=True)
159 | )
160 | self.conv_out = nn.Sequential(
161 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
162 | nn.BatchNorm2d(256),
163 | nn.ReLU(inplace=True),
164 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
165 | nn.BatchNorm2d(128),
166 | nn.ReLU(inplace=True),
167 | nn.Conv2d(128, C, 1),
168 | )
169 |
170 | def forward(self, points, points_mask):
171 | points_xyz = points[:, :, :3]
172 | points_feature = points[:, :, 3:]
173 | voxels = points_to_voxels(
174 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound
175 | )
176 | points_feature = torch.cat(
177 | [points, # 5
178 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1
179 | voxels['local_points_xyz'], # 3
180 | voxels['point_centroids'], # 3
181 | points_xyz - voxels['voxel_centers'], # 3
182 | ], dim=-1
183 | )
184 | points_feature = self.pn(points_feature, voxels['points_mask'])
185 | voxel_feature = torch_scatter.scatter_mean(
186 | points_feature,
187 | torch.unsqueeze(voxels['voxel_indices'], dim=1),
188 | dim=2,
189 | dim_size=voxels['num_voxels'])
190 | batch_size = points.size(0)
191 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1])
192 | voxel_feature1 = self.block1(voxel_feature)
193 | voxel_feature2 = self.block2(voxel_feature1)
194 | voxel_feature3 = self.block3(voxel_feature2)
195 | voxel_feature1 = self.up1(voxel_feature1)
196 | voxel_feature2 = self.up2(voxel_feature2)
197 | voxel_feature3 = self.up3(voxel_feature3)
198 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1)
199 | return self.conv_out(voxel_feature).transpose(3, 2)
200 |
--------------------------------------------------------------------------------
/model/HDMapNet/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | def plane_grid_2d(xbound, ybound):
4 | xmin, xmax = xbound[0], xbound[1]
5 | num_x = int((xbound[1] - xbound[0]) / xbound[2])
6 | ymin, ymax = ybound[0], ybound[1]
7 | num_y = int((ybound[1] - ybound[0]) / ybound[2])
8 |
9 | y = torch.linspace(xmin, xmax, num_x).cuda()
10 | x = torch.linspace(ymin, ymax, num_y).cuda()
11 | y, x = torch.meshgrid(x, y)
12 | x = x.flatten()
13 | y = y.flatten()
14 |
15 | coords = torch.stack([x, y], axis=0)
16 | return coords
17 |
18 |
19 | def cam_to_pixel(points, xbound, ybound):
20 | new_points = torch.zeros_like(points)
21 | new_points[..., 0] = (points[..., 0] - xbound[0]) / xbound[2]
22 | new_points[..., 1] = (points[..., 1] - ybound[0]) / ybound[2]
23 | return new_points
24 |
25 |
26 | def get_rot_2d(yaw):
27 | sin_yaw = torch.sin(yaw)
28 | cos_yaw = torch.cos(yaw)
29 | rot = torch.zeros(list(yaw.shape) + [2, 2]).cuda()
30 | rot[..., 0, 0] = cos_yaw
31 | rot[..., 0, 1] = sin_yaw
32 | rot[..., 1, 0] = -sin_yaw
33 | rot[..., 1, 1] = cos_yaw
34 | return rot
35 |
36 |
37 |
--------------------------------------------------------------------------------
/model/HDMapNet/voxel.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch_scatter
4 |
5 |
6 | def pad_or_trim_to_np(x, shape, pad_val=0):
7 | shape = np.asarray(shape)
8 | pad = shape - np.minimum(np.shape(x), shape)
9 | zeros = np.zeros_like(pad)
10 | x = np.pad(x, np.stack([zeros, pad], axis=1), constant_values=pad_val)
11 | return x[:shape[0], :shape[1]]
12 |
13 |
14 | def raval_index(coords, dims):
15 | dims = torch.cat((dims, torch.ones(1, device=dims.device)), dim=0)[1:]
16 | dims = torch.flip(dims, dims=[0])
17 | dims = torch.cumprod(dims, dim=0) / dims[0]
18 | multiplier = torch.flip(dims, dims=[0])
19 | indices = torch.sum(coords * multiplier, dim=1)
20 | return indices
21 |
22 |
23 | def points_to_voxels(
24 | points_xyz,
25 | points_mask,
26 | grid_range_x,
27 | grid_range_y,
28 | grid_range_z
29 | ):
30 | batch_size, num_points, _ = points_xyz.shape
31 | voxel_size_x = grid_range_x[2]
32 | voxel_size_y = grid_range_y[2]
33 | voxel_size_z = grid_range_z[2]
34 | grid_size = np.asarray([
35 | (grid_range_x[1]-grid_range_x[0]) / voxel_size_x,
36 | (grid_range_y[1]-grid_range_y[0]) / voxel_size_y,
37 | (grid_range_z[1]-grid_range_z[0]) / voxel_size_z
38 | ]).astype('int32')
39 | voxel_size = np.asarray([voxel_size_x, voxel_size_y, voxel_size_z])
40 | voxel_size = torch.Tensor(voxel_size).to(points_xyz.device)
41 | num_voxels = grid_size[0] * grid_size[1] * grid_size[2]
42 | grid_offset = torch.Tensor([grid_range_x[0], grid_range_y[0], grid_range_z[0]]).to(points_xyz.device)
43 | shifted_points_xyz = points_xyz - grid_offset
44 | voxel_xyz = shifted_points_xyz / voxel_size
45 | voxel_coords = voxel_xyz.int()
46 | grid_size = torch.from_numpy(grid_size).to(points_xyz.device)
47 | grid_size = grid_size.int()
48 | zeros = torch.zeros_like(grid_size)
49 | voxel_paddings = ((points_mask < 1.0) |
50 | torch.any((voxel_coords >= grid_size) |
51 | (voxel_coords < zeros), dim=-1))
52 | voxel_indices = raval_index(
53 | torch.reshape(voxel_coords, [batch_size * num_points, 3]), grid_size)
54 | voxel_indices = torch.reshape(voxel_indices, [batch_size, num_points])
55 | voxel_indices = torch.where(voxel_paddings,
56 | torch.zeros_like(voxel_indices),
57 | voxel_indices)
58 | voxel_centers = ((0.5 + voxel_coords.float()) * voxel_size + grid_offset)
59 | voxel_coords = torch.where(torch.unsqueeze(voxel_paddings, dim=-1),
60 | torch.zeros_like(voxel_coords),
61 | voxel_coords)
62 | voxel_xyz = torch.where(torch.unsqueeze(voxel_paddings, dim=-1),
63 | torch.zeros_like(voxel_xyz),
64 | voxel_xyz)
65 | voxel_paddings = voxel_paddings.float()
66 |
67 | voxel_indices = voxel_indices.long()
68 | points_per_voxel = torch_scatter.scatter_sum(
69 | torch.ones((batch_size, num_points), dtype=voxel_coords.dtype, device=voxel_coords.device) * (1-voxel_paddings),
70 | voxel_indices,
71 | dim=1,
72 | dim_size=num_voxels
73 | )
74 |
75 | voxel_point_count = torch.gather(points_per_voxel,
76 | dim=1,
77 | index=voxel_indices)
78 |
79 |
80 | voxel_centroids = torch_scatter.scatter_mean(
81 | points_xyz,
82 | voxel_indices,
83 | dim=1,
84 | dim_size=num_voxels)
85 | point_centroids = torch.gather(voxel_centroids, dim=1, index=torch.unsqueeze(voxel_indices, dim=-1).repeat(1, 1, 3))
86 | local_points_xyz = points_xyz - point_centroids
87 |
88 | result = {
89 | 'local_points_xyz': local_points_xyz,
90 | 'shifted_points_xyz': shifted_points_xyz,
91 | 'point_centroids': point_centroids,
92 | 'points_xyz': points_xyz,
93 | 'grid_offset': grid_offset,
94 | 'voxel_coords': voxel_coords,
95 | 'voxel_centers': voxel_centers,
96 | 'voxel_indices': voxel_indices,
97 | 'voxel_paddings': voxel_paddings,
98 | 'points_mask': 1 - voxel_paddings,
99 | 'num_voxels': num_voxels,
100 | 'grid_size': grid_size,
101 | 'voxel_xyz': voxel_xyz,
102 | 'voxel_size': voxel_size,
103 | 'voxel_point_count': voxel_point_count,
104 | 'points_per_voxel': points_per_voxel
105 | }
106 |
107 |
108 | return result
109 |
--------------------------------------------------------------------------------
/model/VPN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .base import CamEncode, BevEncode
5 | from .pointpillar import PointPillarEncoder
6 | from .homography import IPM
7 |
8 | class TransformModule(nn.Module):
9 | def __init__(self,dim, num_view=4):
10 | super(TransformModule, self).__init__()
11 | self.num_view = num_view
12 | self.dim = dim
13 |
14 | self.mat_list = nn.ModuleList()
15 | for i in range(self.num_view):
16 | fc_transform = nn.Sequential(
17 | nn.Linear(self.dim[0] * self.dim[1], self.dim[2] * self.dim[3]),
18 | nn.ReLU(),
19 | nn.Linear(self.dim[2] * self.dim[3], self.dim[2] * self.dim[3]),
20 | nn.ReLU()
21 | )
22 | self.mat_list += [fc_transform]
23 |
24 | def forward(self, x):
25 | # shape x: B, V, C, H, W
26 | #B, N, C, H, W = x.shape
27 | #x = x.view(B, N, C, H * W)
28 | x = x.view(list(x.size()[:3]) + [self.dim[0] * self.dim[1]])
29 | view_comb = self.mat_list[0](x[:, 0])
30 | for index in range(x.size(1))[1:]:
31 | view_comb = view_comb + self.mat_list[index](x[:, index])
32 | view_comb = view_comb.view(list(view_comb.size()[:2]) + [self.dim[2], self.dim[3]])
33 | #view_comb = view_comb.view(B, C,self.dim[2], self.dim[3])
34 | return view_comb
35 |
36 |
37 | class VPNModel(nn.Module):
38 | def __init__(self, outC, camC=64, instance_seg=True, embedded_dim=16, extrinsic=False, lidar=False, xbound=None, ybound=None, zbound=None):
39 | super(VPNModel, self).__init__()
40 | self.camC = camC
41 | self.extrinsic = extrinsic
42 | self.downsample = 16
43 |
44 | self.camencode = CamEncode(camC)
45 |
46 | ipm_xbound = [-60,60,0.6] # -60 60 0.6 //200
47 | ipm_ybound = [-30,30,0.6] # -30 30 0.6 //100
48 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=6, C=self.camC, extrinsic=True)
49 | self.view_fusion = TransformModule(dim=(8, 22,32,88))
50 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
51 | self.up_sampler_2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
52 | self.lidar = lidar
53 | if lidar:
54 | self.pp = PointPillarEncoder(128, xbound, ybound, zbound)
55 | self.bevencode = BevEncode(inC=camC+128, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim)
56 | else:
57 | self.bevencode = BevEncode(inC=camC, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim)
58 |
59 |
60 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans):
61 | B, N, _, _ = intrins.shape
62 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
63 | # Ks[:, :, :3, :3] = intrins
64 |
65 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
66 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous()
67 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
68 | Ts[:, :, :3, 3] = -trans
69 | RTs = Rs @ Ts
70 |
71 | post_RTs = None
72 |
73 | return Ks, RTs, post_RTs
74 |
75 | def get_cam_feats(self, x):
76 | """Return B x N x D x H/downsample x W/downsample x C
77 | """
78 | B, N, C, imH, imW = x.shape
79 |
80 | x = x.view(B*N, C, imH, imW)
81 | x = self.camencode(x)
82 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample)
83 | return x
84 |
85 | def forward(self, x, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll):
86 | x = self.get_cam_feats(x)
87 | x = self.view_fusion(x)#4 64 50 100
88 | x = x.unsqueeze(1).repeat(1,4,1,1,1)
89 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans)
90 | topdown = self.ipm(x, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs)
91 | #topdown = x.mean(1)
92 | #topdown = self.up_sampler(x)
93 | topdown = self.up_sampler_2(topdown)
94 | # if self.lidar:
95 | # lidar_feature = self.pp(points, points_mask)
96 | # topdown = torch.cat([topdown, lidar_feature], dim=1)
97 | if self.lidar:
98 | lidar_feature = self.pp(lidar_data, lidar_mask)
99 | topdown = torch.cat([topdown, lidar_feature], dim=1)
100 | return self.bevencode(topdown)
101 |
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .hdmapnet import BiMapper
2 |
3 |
4 | def get_model(method, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, angle_class=36):
5 |
6 |
7 | if method == 'BiMapper':
8 | model = BiMapper(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred,
9 | direction_dim=angle_class, lidar=False)
10 | else:
11 | print('no model find')
12 |
13 | return model
14 |
--------------------------------------------------------------------------------
/model/base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable, Function
4 | from efficientnet_pytorch import EfficientNet
5 | from torchvision.models.resnet import resnet18,resnet34
6 | import numpy as np
7 | import torch.nn.functional as F
8 | class DeformConv2D(nn.Module):
9 | def __init__(self, inc, outc, kernel_size=3, padding=1, bias=None):
10 | super(DeformConv2D, self).__init__()
11 | self.kernel_size = kernel_size
12 | self.padding = padding
13 | self.zero_padding = nn.ZeroPad2d(padding)
14 | self.conv_kernel = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias)
15 |
16 | def forward(self, x, offset):
17 | dtype = offset.data.type()
18 | ks = self.kernel_size
19 | N = offset.size(1) // 2
20 |
21 | # Change offset's order from [x1, x2, ..., y1, y2, ...] to [x1, y1, x2, y2, ...]
22 | # Codes below are written to make sure same results of MXNet implementation.
23 | # You can remove them, and it won't influence the module's performance.
24 | offsets_index = Variable(torch.cat([torch.arange(0, 2*N, 2), torch.arange(1, 2*N+1, 2)]), requires_grad=False).type_as(x).long()
25 | offsets_index = offsets_index.unsqueeze(dim=0).unsqueeze(dim=-1).unsqueeze(dim=-1).expand(*offset.size())
26 | offset = torch.gather(offset, dim=1, index=offsets_index)
27 | # ------------------------------------------------------------------------
28 |
29 | if self.padding:
30 | x = self.zero_padding(x)
31 |
32 | # (b, 2N, h, w)
33 | p = self._get_p(offset, dtype)
34 |
35 | # (b, h, w, 2N)
36 | p = p.contiguous().permute(0, 2, 3, 1)
37 | q_lt = Variable(p.data, requires_grad=False).floor()
38 | q_rb = q_lt + 1
39 |
40 | q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long()
41 | q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long()
42 | q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], -1)
43 | q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], -1)
44 |
45 | # (b, h, w, N)
46 | mask = torch.cat([p[..., :N].lt(self.padding)+p[..., :N].gt(x.size(2)-1-self.padding),
47 | p[..., N:].lt(self.padding)+p[..., N:].gt(x.size(3)-1-self.padding)], dim=-1).type_as(p)
48 | mask = mask.detach()
49 | floor_p = p - (p - torch.floor(p))
50 | p = p*(1-mask) + floor_p*mask
51 | p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1)
52 |
53 | # bilinear kernel (b, h, w, N)
54 | g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
55 | g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
56 | g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
57 | g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
58 |
59 | # (b, c, h, w, N)
60 | x_q_lt = self._get_x_q(x, q_lt, N)
61 | x_q_rb = self._get_x_q(x, q_rb, N)
62 | x_q_lb = self._get_x_q(x, q_lb, N)
63 | x_q_rt = self._get_x_q(x, q_rt, N)
64 |
65 | # (b, c, h, w, N)
66 | x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
67 | g_rb.unsqueeze(dim=1) * x_q_rb + \
68 | g_lb.unsqueeze(dim=1) * x_q_lb + \
69 | g_rt.unsqueeze(dim=1) * x_q_rt
70 |
71 | x_offset = self._reshape_x_offset(x_offset, ks)
72 | out = self.conv_kernel(x_offset)
73 |
74 | return out
75 |
76 | def _get_p_n(self, N, dtype):
77 | p_n_x, p_n_y = np.meshgrid(range(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1),
78 | range(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), indexing='ij')
79 | # (2N, 1)
80 | p_n = np.concatenate((p_n_x.flatten(), p_n_y.flatten()))
81 | p_n = np.reshape(p_n, (1, 2*N, 1, 1))
82 | p_n = Variable(torch.from_numpy(p_n).type(dtype), requires_grad=False)
83 |
84 | return p_n
85 |
86 | @staticmethod
87 | def _get_p_0(h, w, N, dtype):
88 | p_0_x, p_0_y = np.meshgrid(range(1, h+1), range(1, w+1), indexing='ij')
89 | p_0_x = p_0_x.flatten().reshape(1, 1, h, w).repeat(N, axis=1)
90 | p_0_y = p_0_y.flatten().reshape(1, 1, h, w).repeat(N, axis=1)
91 | p_0 = np.concatenate((p_0_x, p_0_y), axis=1)
92 | p_0 = Variable(torch.from_numpy(p_0).type(dtype), requires_grad=False)
93 |
94 | return p_0
95 |
96 | def _get_p(self, offset, dtype):
97 | N, h, w = offset.size(1)//2, offset.size(2), offset.size(3)
98 |
99 | # (1, 2N, 1, 1)
100 | p_n = self._get_p_n(N, dtype)
101 | # (1, 2N, h, w)
102 | p_0 = self._get_p_0(h, w, N, dtype)
103 | p = p_0 + p_n + offset
104 | return p
105 |
106 | def _get_x_q(self, x, q, N):
107 | b, h, w, _ = q.size()
108 | padded_w = x.size(3)
109 | c = x.size(1)
110 | # (b, c, h*w)
111 | x = x.contiguous().view(b, c, -1)
112 |
113 | # (b, h, w, N)
114 | index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y
115 | # (b, c, h*w*N)
116 | index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
117 |
118 | x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N)
119 |
120 | return x_offset
121 |
122 | @staticmethod
123 | def _reshape_x_offset(x_offset, ks):
124 | b, c, h, w, N = x_offset.size()
125 | x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1)
126 | x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks)
127 |
128 | return x_offset
129 |
130 | class Up(nn.Module):
131 | def __init__(self, in_channels, out_channels, scale_factor=2):
132 | super().__init__()
133 |
134 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear',
135 | align_corners=True)
136 |
137 | self.conv = nn.Sequential(
138 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
139 | nn.BatchNorm2d(out_channels),
140 | nn.ReLU(inplace=True),
141 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
142 | nn.BatchNorm2d(out_channels),
143 | nn.ReLU(inplace=True)
144 | )
145 | # self.offset = nn.Conv2d(out_channels, 18, kernel_size=3, padding=1, bias=False)
146 | # self.deconv =DeformConv2D(out_channels, out_channels, kernel_size=3, padding=1)
147 | # self.bn4 = nn.BatchNorm2d(out_channels)
148 |
149 |
150 | def forward(self, x1, x2):
151 | x1 = self.up(x1)
152 | x1 = torch.cat([x2, x1], dim=1)
153 | x3 = self.conv(x1)
154 | # offsets = self.offset(x3)
155 | # x4 = F.relu(self.deconv(x3, offsets))
156 | # x5 = self.bn4(x4)
157 | return x3
158 |
159 |
160 | class CamEncode(nn.Module):
161 | def __init__(self, C):
162 | super(CamEncode, self).__init__()
163 | self.C = C
164 |
165 | self.trunk = EfficientNet.from_pretrained("efficientnet-b0")
166 | #self.trunk = resnet34(pretrained=False, zero_init_residual=True)
167 | self.up1 = Up(320+112, self.C)
168 | self.up2 = Up(40 + 64, self.C)
169 | self.up3 = Up(24 + 64, self.C)
170 | def get_eff_depth(self, x):
171 | # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231
172 | endpoints = dict()
173 |
174 | # Stem
175 | x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x)))
176 | prev_x = x
177 |
178 | # Blocks
179 | for idx, block in enumerate(self.trunk._blocks):
180 | drop_connect_rate = self.trunk._global_params.drop_connect_rate
181 | if drop_connect_rate:
182 | drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate
183 | x = block(x, drop_connect_rate=drop_connect_rate) # 卷积过程
184 | if prev_x.size(2) > x.size(2):
185 | endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x
186 | prev_x = x
187 |
188 | # Head
189 | endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
190 | # 1 24 16 64 176
191 | # 2 24 24 32 88
192 | # 3 24 40 16 44
193 | # 4 24 112 8 22
194 | # 5 24 320 4 11
195 | x = self.up1(endpoints['reduction_5'], endpoints['reduction_4']) # 24 64 8 22
196 | x_1 = self.up2(x, endpoints['reduction_3']) # 24 64 16 44
197 | x_2 = self.up3(x_1,endpoints['reduction_2'] )#24 24 32 88
198 | # x_1 = endpoints['reduction_3'] # 64 16 44
199 | # x_2 = endpoints['reduction_2'] # 64 32 88
200 | return x,x_1,x_2
201 |
202 |
203 | def forward(self, x):
204 | return self.get_eff_depth(x)
205 |
206 |
207 | class BevEncode(nn.Module):
208 | def __init__(self, inC, outC, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=37):
209 | super(BevEncode, self).__init__()
210 | trunk = resnet18(pretrained=False, zero_init_residual=True)
211 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3,
212 | bias=False)
213 | self.bn1 = trunk.bn1
214 | self.relu = trunk.relu
215 |
216 | self.layer1 = trunk.layer1
217 | self.layer2 = trunk.layer2
218 | self.layer3 = trunk.layer3
219 |
220 | self.up1 = Up(64 + 256, 256, scale_factor=4)
221 | self.up2 = nn.Sequential(
222 | nn.Upsample(scale_factor=2, mode='bilinear',
223 | align_corners=True),
224 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
225 | nn.BatchNorm2d(128),
226 | nn.ReLU(inplace=True),
227 | nn.Conv2d(128, outC, kernel_size=1, padding=0),
228 | )
229 |
230 | self.instance_seg = instance_seg
231 | if instance_seg:
232 | self.up1_embedded = Up(64 + 256, 256, scale_factor=4)
233 | self.up2_embedded = nn.Sequential(
234 | nn.Upsample(scale_factor=2, mode='bilinear',
235 | align_corners=True),
236 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
237 | nn.BatchNorm2d(128),
238 | nn.ReLU(inplace=True),
239 | nn.Conv2d(128, embedded_dim, kernel_size=1, padding=0),
240 | )
241 |
242 | self.direction_pred = direction_pred
243 | if direction_pred:
244 | self.up1_direction = Up(64 + 256, 256, scale_factor=4)
245 | self.up2_direction = nn.Sequential(
246 | nn.Upsample(scale_factor=2, mode='bilinear',
247 | align_corners=True),
248 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
249 | nn.BatchNorm2d(128),
250 | nn.ReLU(inplace=True),
251 | nn.Conv2d(128, direction_dim, kernel_size=1, padding=0),
252 | )
253 |
254 | def forward(self, x):
255 | x = self.conv1(x)# 64 100 200
256 | x = self.bn1(x)
257 | x = self.relu(x)
258 |
259 | x1 = self.layer1(x)#64 100 200
260 | x = self.layer2(x1)#128 50 100
261 | x2 = self.layer3(x)#256 25 50
262 |
263 | x = self.up1(x2, x1) #256 50 100
264 | x = self.up2(x)
265 |
266 | if self.instance_seg:
267 | x_embedded = self.up1_embedded(x2, x1)
268 | x_embedded = self.up2_embedded(x_embedded)
269 | else:
270 | x_embedded = None
271 |
272 | if self.direction_pred:
273 | x_direction = self.up1_embedded(x2, x1)
274 | x_direction = self.up2_direction(x_direction)
275 | else:
276 | x_direction = None
277 |
278 | return x, x1, x_direction
279 |
280 |
281 | class CamEncode2(nn.Module):
282 | def __init__(self,c):
283 | super(CamEncode2, self).__init__()
284 | trunk = resnet34(pretrained=False, zero_init_residual=True)
285 | self.c=c
286 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
287 | bias=False)
288 | self.layer1 = trunk.layer1
289 | self.layer2 = trunk.layer2
290 | self.layer3 = trunk.layer3
291 | self.layer4 = trunk.layer4
292 |
293 | self.up1 = Up(64 + 256, 256, scale_factor=4)
294 | self.up2 = nn.Sequential(
295 | # nn.Upsample(scale_factor=2, mode='bilinear',
296 | # align_corners=True),
297 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
298 | nn.BatchNorm2d(128),
299 | nn.ReLU(inplace=True),
300 | nn.Conv2d(128, self.c, kernel_size=1, padding=0),
301 | )
302 | self.up3 = nn.Sequential(
303 | nn.Conv2d(512, 128, kernel_size=3, padding=1, bias=False),
304 | nn.BatchNorm2d(128),
305 | nn.ReLU(inplace=True),
306 | nn.Conv2d(128, self.c, kernel_size=1, padding=0),
307 | )
308 | self.up4 = Up(64 + 256,64, scale_factor=2)
309 | self.up5 = Up(64 + 128, 64, scale_factor=2)
310 |
311 | def forward(self, x):
312 |
313 | x = self.conv1(x)
314 | x1 = self.layer1(x)#64 64 176
315 | x = self.layer2(x1)#128 32 88
316 | x2 = self.layer3(x)# 256 16 44
317 | x3 = self.layer4(x2)#512 8 22
318 | x3 = self.up3(x3)
319 | x4 = self.up4(x3,x2)#64 16 44
320 | x5 = self.up5(x4,x)#64 32 88
321 | # x = self.up1(x2, x)#32 88
322 | # x = self.up2(x)
323 |
324 | return x3,x4,x5
325 |
326 | class STNNet(nn.Module):
327 | def __init__(self):
328 | super(STNNet, self).__init__()
329 | # Spatial transformer localization-network
330 | self.localization = nn.Sequential(
331 | nn.Conv2d(64, 10, kernel_size=7),
332 | nn.MaxPool2d(2, stride=2),
333 | nn.ReLU(True),
334 | nn.Conv2d(10, 10, kernel_size=7),
335 | nn.MaxPool2d(2, stride=2),
336 | nn.ReLU(True)
337 | )
338 |
339 | # Regressor for the 3 * 2 affine matrix
340 | self.fc_loc = nn.Sequential(
341 | nn.Linear(10 * 20 * 45, 32),
342 | nn.ReLU(True),
343 |
344 | nn.Linear(32, 3 * 2)
345 | )
346 |
347 | # Initialize the weights/bias with identity transformation
348 | self.fc_loc[2].weight.data.zero_()
349 | self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
350 |
351 | def forward(self, x):#4 64 100 200
352 | xs = self.localization(x)#4 10 20 45
353 | b,c,h,w = xs.shape
354 | xs = xs.view(-1, c * h * w)
355 | theta = self.fc_loc(xs)
356 | theta = theta.view(-1, 2, 3)
357 |
358 | grid = F.affine_grid(theta, x.size())
359 | x = F.grid_sample(x, grid)
360 |
361 | return x
362 |
363 | if __name__ == '__main__':
364 | stn = STNNet()
365 | x = torch.ones((4,64,100,200))
366 | y = stn(x)
--------------------------------------------------------------------------------
/model/hdmapnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from .homography import IPM
5 |
6 | from .base import CamEncode, BevEncode
7 | from data.utils import gen_dx_bx
8 | import torch.nn.functional as F
9 |
10 | class IPMTransformation(nn.Module):
11 | def __init__(self ,n_views=6):
12 | super(IPMTransformation, self).__init__()
13 | self.n_views = n_views
14 | self.hw_mat = []
15 | self.up1 = []
16 | self.up2 = []
17 |
18 |
19 | for i in range(self.n_views):
20 | fc_transform = nn.Sequential(
21 | nn.Conv2d(64, 128, kernel_size=1, padding=0),
22 | nn.ReLU(),
23 | nn.Dropout(0.5),
24 | nn.Conv2d(128, 128, kernel_size=1, padding=0),
25 | nn.ReLU(),
26 | nn.Dropout(0.5),
27 | )
28 | self.hw_mat.append(fc_transform)
29 | self.conv1 = nn.Sequential(
30 | nn.Conv2d(128+64, 128, kernel_size=3, padding=1, bias=False),
31 | nn.BatchNorm2d(128),
32 | nn.ReLU(inplace=True),
33 | nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
34 | nn.BatchNorm2d(128),
35 | nn.ReLU(inplace=True)
36 | )
37 | self.up1.append(self.conv1)
38 | self.conv2 = nn.Sequential(
39 | nn.Conv2d(128 + 64, 64, kernel_size=3, padding=1, bias=False),
40 | nn.BatchNorm2d(64),
41 | nn.ReLU(inplace=True),
42 | nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),
43 | nn.BatchNorm2d(64),
44 | nn.ReLU(inplace=True)
45 | )
46 | self.up2.append(self.conv2)
47 | # fc_transform = nn.Sequential(
48 | # nn.Conv2d(768, 768, kernel_size=1, padding=0),
49 | # nn.ReLU(),
50 | # nn.Dropout(0.5),
51 | # nn.Conv2d(768, 768, kernel_size=1, padding=0),
52 | # nn.ReLU(),
53 | # nn.Dropout(0.5),
54 | # )
55 | # self.hw_mat.append(fc_transform)
56 | # self.conv1 = nn.Sequential(
57 | # nn.Conv2d(768+384, 384, kernel_size=3, padding=1, bias=False),
58 | # nn.LayerNorm(384),
59 | # nn.ReLU(inplace=True),
60 | # nn.Conv2d(384, 384, kernel_size=3, padding=1, bias=False),
61 | # nn.LayerNorm(384),
62 | # nn.ReLU(inplace=True)
63 | # )
64 | # self.up1.append(self.conv1)
65 | # self.conv2 = nn.Sequential(
66 | # nn.Conv2d(384 + 192, 64, kernel_size=3, padding=1, bias=False),
67 | # nn.LayerNorm(64),
68 | # nn.ReLU(inplace=True),
69 | # nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),
70 | # nn.LayerNorm(64),
71 | # nn.ReLU(inplace=True)
72 | # )
73 | # self.up2.append(self.conv2)
74 | self.hw_mat = nn.ModuleList(self.hw_mat)
75 | self.up1 = nn.ModuleList(self.up1)
76 | self.up2 = nn.ModuleList(self.up2)
77 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
78 | def forward(self, x1,x2,x3):
79 | B, N, C, H, W = x1.shape
80 | outputs = []
81 | for i in range(N):
82 | tt = x1[:, i] #+ self.position[i](feat[:, i])
83 | output_1 = self.hw_mat[i](tt)#4 128 8 22
84 | output_1 = self.up_sampler(output_1)#4 128 16 44
85 | output_2 = self.up1[i](torch.cat([output_1,x2[:,i]],dim=1))#
86 | output_2 = self.up_sampler(output_2)
87 | output_3 = self.up2[i](torch.cat([output_2,x3[:, i]], dim=1))
88 | outputs.append(output_3)
89 | outputs = torch.stack(outputs, 1)
90 | return outputs
91 |
92 | class TransformModule(nn.Module):
93 | def __init__(self, fv_size, bv_size, n_view=6):
94 | super(TransformModule, self).__init__()
95 | self.num_view = n_view
96 | fv_dim = fv_size[0] * fv_size[1]
97 | bv_dim = bv_size[0] * bv_size[1]
98 | self.bv_size = bv_size
99 | self.mat_list = nn.ModuleList()
100 | for i in range(self.num_view):
101 | fc_transform = nn.Sequential(
102 | nn.Linear(fv_dim, bv_dim),
103 | nn.ReLU(),
104 | nn.Linear(bv_dim, bv_dim),
105 | nn.ReLU()
106 | )
107 | self.mat_list += [fc_transform]
108 |
109 | def forward(self, x):
110 | # shape x: B, V, C, H, W
111 | # x = x.view(list(x.size()[:3]) + [self.dim * self.dim])
112 | B, N, C, H, W = x.shape
113 | x = x.view(B, N, C, H * W)
114 | view_comb = self.mat_list[0](x[:, 0])
115 | for index in range(N)[1:]:
116 | view_comb = view_comb + self.mat_list[index](x[:, index])
117 | view_comb = view_comb.view(list(view_comb.size()[:2]) + [self.bv_size[0], self.bv_size[1]])
118 | #view_comb = view_comb.unsqueeze(dim=1).repeat(1,self.num_view,1,1,1)
119 | return view_comb
120 |
121 |
122 | class BiMapper(nn.Module):
123 | def __init__(self, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=36,
124 | lidar=False):
125 | super(BiMapper, self).__init__()
126 | self.camC = 64
127 | self.downsample = 16
128 | self.merge_flag = data_conf["merge"]
129 | dx, bx, nx = gen_dx_bx(data_conf['xbound'], data_conf['ybound'], data_conf['zbound'])
130 | self.view = 6
131 |
132 | self.camencode = CamEncode(self.camC) # 初始化对象origin
133 | self.camencode_2 = CamEncode(self.camC) # 初始化对象img
134 | fv_size = (data_conf['image_size_i'][0] // self.downsample, data_conf['image_size_i'][1] // self.downsample) # 下采样后图片大小
135 | self.ipm_tran = IPMTransformation(n_views=self.view)
136 |
137 | bv_size_2 = (50,100)
138 | self.view_fore = TransformModule(fv_size=fv_size, bv_size=bv_size_2,n_view=self.view)
139 |
140 | ipm_xbound = [-30, 30, 0.3] # -60 60 0.6 //200
141 | ipm_ybound = [-15, 15, 0.3] # -30 30 0.6 //100
142 |
143 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=6, C=self.camC, extrinsic=True)
144 |
145 | self.up_sampler_fore = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
146 |
147 | self.merge = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, padding=0),nn.BatchNorm2d(64),nn.ReLU(inplace=True))
148 | # 融合后上采样
149 | self.up_sampler_merge = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
150 | #单分支损失
151 | self.conv = nn.Conv2d(64, 4, kernel_size=1, padding=0)
152 | self.conv1 = nn.Conv2d(4, 64, kernel_size=1, padding=0)
153 |
154 | self.lidar = lidar
155 |
156 | self.i_one = nn.Conv2d(64, 2, kernel_size=1, padding=0)
157 | self.f_one = nn.Conv2d(64, 2, kernel_size=1, padding=0)
158 | self.mode = 'cross'
159 | self.loss_fn_i = torch.nn.BCEWithLogitsLoss()#内部含有sigmod
160 | self.loss_fn_f = torch.nn.BCEWithLogitsLoss()#内部含有sigmod
161 |
162 | self.bevencode_merge = BevEncode(inC=self.camC, outC=data_conf['num_channels'], instance_seg=True,
163 | embedded_dim=embedded_dim, direction_pred=direction_pred,
164 | direction_dim=direction_dim + 1)
165 | self.B = data_conf['B']
166 | self.N = 6
167 | self.xmin, self.xmax = -10, 10
168 | self.num_x = 300
169 | self.zmin, self.zmax = 0, 20
170 | self.num_z = 600
171 |
172 |
173 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans):
174 | B, N, _, _ = intrins.shape
175 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
176 | # print(intrins[0][0])
177 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
178 | rots = torch.transpose(rots, dim0=2, dim1=3)
179 | Rs[:, :, :3, :3] = rots
180 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
181 | Ts[:, :, :3, 3] = -trans#-trans
182 | RTs = Rs @ Ts
183 |
184 | post_RTs = None
185 |
186 | return Ks, RTs, post_RTs
187 |
188 | def get_cam_feats(self, x):
189 | B, N, C, imH, imW = x.shape
190 | x = x.view(B * N, C, imH, imW)
191 | x, x_1,x_2 = self.camencode_2(x)
192 | x = x.view(B, N, self.camC, imH // self.downsample, imW // self.downsample)
193 | x_1 = x_1.view(B, N, self.camC, 2 * imH // self.downsample, 2 * imW // self.downsample)
194 | x_2 = x_2.view(B, N, self.camC, 4 * imH // self.downsample, 4 * imW // self.downsample)
195 | return x, x_1,x_2
196 |
197 | def get_cam_origin_fea(self, x):
198 | B, N, C, imH, imW = x.shape
199 | x = x.view(B * N, C, imH, imW)
200 | x, x_1, x_2 = self.camencode(x)
201 | x = x.view(B, N, self.camC, imH // self.downsample, imW // self.downsample)
202 | x_1 = x_1.view(B, N, self.camC, 2 * imH // self.downsample, 2 * imW // self.downsample)
203 | x_2 = x_2.view(B, N, self.camC, 4 * imH // self.downsample, 4 * imW // self.downsample)
204 | return x, x_1, x_2
205 |
206 |
207 | def cam_loss(self,v):
208 |
209 | v2 = self.conv(v)
210 | v3 = self.conv1(v2)
211 | return v2,v3+v
212 |
213 |
214 | def mutual_loss(self,v_i,v_f):
215 | if self.mode=="kl" :
216 | i_m = torch.sum(v_i, dim=1) # 4 100 200
217 | f_m = torch.sum(v_f, dim=1)
218 |
219 | B, h, w = i_m.shape
220 | i_m_flat = i_m.view(B, h * w)
221 | i_m_soft = F.softmax(i_m_flat, dim=-1)
222 | i_m_soft = i_m_soft.view(B, h, w)
223 |
224 | f_m_flat = f_m.view(B, h * w)
225 | f_m_soft = F.softmax(f_m_flat, dim=-1)
226 | f_m_soft = f_m_soft.view(B, h, w)
227 |
228 | mask = torch.ones_like(i_m_soft) * 1e-7
229 | i_m_soft = i_m_soft + mask
230 | f_m_soft = f_m_soft + mask
231 | kl_2 = F.kl_div(f_m_soft.log(), i_m_soft, reduction='mean') # 第一个为预测分布,第二个为真实分布
232 | kl_1 = F.kl_div(i_m_soft.log(), f_m_soft, reduction='mean') # 第一个为预测分布,第二个为真实分布
233 | if self.mode == "cross":
234 | i_m = self.i_one(v_i)
235 | f_m = self.f_one(v_f)#4 2 100 200
236 | i_m_soft = F.softmax(i_m,dim=1)
237 | f_m_soft = F.softmax(f_m, dim=1)
238 | i_m_mask = torch.ones_like(i_m_soft)
239 | i_m_mask[:,0] = i_m_soft[:,0]>0.5
240 | i_m_mask[:,1] = i_m_soft[:, 1] > 0.5
241 | kl_2 = self.loss_fn_i(f_m_soft,i_m_mask)
242 | f_m_mask = torch.ones_like(f_m_soft)
243 | f_m_mask[:, 0] = f_m_soft[:, 0] > 0.5
244 | f_m_mask[:, 1] = f_m_soft[:, 1] > 0.5
245 | kl_1 = self.loss_fn_f(i_m_soft, f_m_mask)#vpn为真值
246 |
247 | return kl_1,kl_2
248 |
249 | def forward(self, img_origin, img, trans, rots, intrins, post_trans, post_rots, car_trans,
250 | yaw_pitch_roll):
251 |
252 |
253 | x_31, x_41,x_51 = self.get_cam_origin_fea(img_origin) # 4 6 64 8 22plane_fea_1.contiguous()
254 | x_5 = self.ipm_tran(x_31, x_41,x_51) # 4 6 64 32 88
255 |
256 | x, x_1,x_2 = self.get_cam_feats(img) # 4 6 64 8 22
257 | x_6 = self.view_fore(x)# 4 64 50 100
258 | x_6 = self.up_sampler_fore(x_6)# 100 200
259 |
260 |
261 | b,n,c,h,w = x_5.shape
262 | x_5 = x_5.view(-1,c,h,w)
263 | x_view_l,x_51 = self.cam_loss(x_5)
264 | x_51 = x_51.view(b, n, c, h, w)
265 |
266 |
267 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans)
268 | x_71 = self.ipm(x_51, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs)#100,200
269 |
270 |
271 | if True:
272 | topdown = 0.1*x_71 + x_6 #
273 | topdown = self.merge(topdown)#xg
274 | mu_loss,mu_loss_i = self.mutual_loss(x_71,x_6)
275 | topdown = self.up_sampler_merge(topdown) # 200,400
276 | semantic,x1, direction = self.bevencode_merge(topdown)#
277 |
278 |
279 |
280 | return semantic,x_view_l,mu_loss,mu_loss_i#,mu_loss_m#,embedding#, direction
281 |
282 |
283 |
284 |
285 |
286 |
287 |
--------------------------------------------------------------------------------
/model/homography.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | import cv2
6 | from .base import STNNet
7 | CAM_FL = 0
8 | CAM_F = 1
9 | CAM_FR = 2
10 | CAM_BL = 3
11 | CAM_B = 4
12 | CAM_BR = 5
13 |
14 |
15 | # =========================================================
16 | # Projections
17 | # =========================================================
18 | def rotation_from_euler(rolls, pitchs, yaws, cuda=True):
19 | """
20 | Get rotation matrix
21 | Args:
22 | roll, pitch, yaw: In degrees
23 |
24 | Returns:
25 | R: [B, 4, 4]
26 | """
27 | B = len(rolls)
28 |
29 | si, sj, sk = torch.sin(rolls), torch.sin(pitchs), torch.sin(yaws)
30 | ci, cj, ck = torch.cos(rolls), torch.cos(pitchs), torch.cos(yaws)
31 | cc, cs = ci * ck, ci * sk
32 | sc, ss = si * ck, si * sk
33 |
34 | R = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)
35 | if cuda:
36 | R = R.cuda()
37 | R[:, 0, 0] = cj * ck
38 | R[:, 0, 1] = sj * sc - cs
39 | R[:, 0, 2] = sj * cc + ss
40 | R[:, 1, 0] = cj * sk
41 | R[:, 1, 1] = sj * ss + cc
42 | R[:, 1, 2] = sj * cs - sc
43 | R[:, 2, 0] = -sj
44 | R[:, 2, 1] = cj * si
45 | R[:, 2, 2] = cj * ci
46 | return R
47 |
48 |
49 | def perspective(cam_coords, proj_mat, h, w, extrinsic, offset=None):
50 | """
51 | P = proj_mat @ (x, y, z, 1)
52 | Project cam2pixel
53 |
54 | Args:
55 | cam_coords: [B, 4, npoints]
56 | proj_mat: [B, 4, 4]
57 |
58 | Returns:
59 | pix coords: [B, h, w, 2]
60 | """
61 | eps = 1e-7
62 | pix_coords = proj_mat @ cam_coords#24 4 20000 = 24 4 4 * 4 20000
63 |
64 | N, _, _ = pix_coords.shape
65 |
66 | if extrinsic:
67 | # pix_coords[:, 0] += offset[0] / 2
68 | # pix_coords[:, 2] -= offset[1] / 8
69 | # pix_coords[:, 0] = pix_coords[:, 0] / pix_coords[:, 1]
70 | # pix_coords[:, 2] = pix_coords[:, 2] / pix_coords[:, 1]
71 | pix_coords = torch.stack([pix_coords[:, 2], pix_coords[:, 0]], axis=1)
72 | else:
73 | pix_coords = pix_coords[:, :2, :] / (pix_coords[:, 2, :][:, None, :] + eps)
74 | pix_coords = pix_coords.view(N, 2, h, w)
75 | pix_coords = pix_coords.permute(0, 2, 3, 1).contiguous()
76 | return pix_coords
77 |
78 |
79 | def bilinear_sampler(imgs, pix_coords):
80 | """
81 | Construct a new image by bilinear sampling from the input image.
82 | Args:
83 | imgs: [B, H, W, C]
84 | pix_coords: [B, h, w, 2]
85 | :return:
86 | sampled image [B, h, w, c]
87 | """
88 | B, img_h, img_w, img_c = imgs.shape
89 | B, pix_h, pix_w, pix_c = pix_coords.shape
90 | out_shape = (B, pix_h, pix_w, img_c)
91 |
92 | pix_x, pix_y = torch.split(pix_coords, 1, dim=-1) # [B, pix_h, pix_w, 1]
93 |
94 | #将真实坐标换为像素坐标
95 | pix_x = (pix_x-3)/0.3
96 | pix_y = (pix_y+5)/0.3
97 |
98 |
99 | # pix_x = pix_x/ 0.1 + 44
100 | # pix_y = pix_y/ 0.5 + 16
101 |
102 | # Rounding
103 | pix_x0 = torch.floor(pix_x)
104 | pix_x1 = pix_x0 + 1
105 | pix_y0 = torch.floor(pix_y)
106 | pix_y1 = pix_y0 + 1
107 |
108 | # Clip within image boundary
109 | y_max = (img_h - 1)#32
110 | x_max = (img_w - 1)#88
111 | pix_x = torch.clip(pix_x, 0, x_max) # z,
112 | pix_y = torch.clip(pix_y, 0, y_max) # x,
113 |
114 | pix_x0 = torch.clip(pix_x0, 0, x_max)#z,
115 | pix_y0 = torch.clip(pix_y0, 0, y_max)#x,
116 | pix_x1 = torch.clip(pix_x1, 0, x_max)#x_max
117 | pix_y1 = torch.clip(pix_y1, 0, y_max)#0, y_max
118 |
119 |
120 | # Weights [B, pix_h, pix_w, 1]
121 | wt_x0 = pix_x1 - pix_x
122 | wt_x1 = pix_x - pix_x0
123 | wt_y0 = pix_y1 - pix_y
124 | wt_y1 = pix_y - pix_y0
125 |
126 | # indices in the image to sample from
127 | dim = img_w
128 |
129 | # Apply the lower and upper bound pix coord
130 | base_y0 = pix_y0 * dim
131 | base_y1 = pix_y1 * dim
132 |
133 | # 4 corner vert ices
134 | idx00 = (pix_x0 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long()
135 | idx01 = (pix_x0 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long()
136 | idx10 = (pix_x1 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long()
137 | idx11 = (pix_x1 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long()
138 |
139 | # Gather pixels from image using vertices
140 | imgs_flat = imgs.reshape([B, -1, img_c])
141 |
142 | im00 = torch.gather(imgs_flat, 1, idx00).reshape(out_shape)
143 | im01 = torch.gather(imgs_flat, 1, idx01).reshape(out_shape)
144 | im10 = torch.gather(imgs_flat, 1, idx10).reshape(out_shape)
145 | im11 = torch.gather(imgs_flat, 1, idx11).reshape(out_shape)
146 |
147 | # Apply weights [pix_h, pix_w, 1]
148 | w00 = wt_x0 * wt_y0
149 | w01 = wt_x0 * wt_y1
150 | w10 = wt_x1 * wt_y0
151 | w11 = wt_x1 * wt_y1
152 | output = w00 * im00 + w01 * im01 + w10 * im10 + w11 * im11
153 | return output
154 |
155 |
156 | def plane_grid(xbound, ybound, zs, yaws, rolls, pitchs, h_w, cuda=True):
157 | B = len(zs)
158 |
159 | xmin, xmax = xbound[0], xbound[1]#-30 30
160 | num_x = int((xbound[1] - xbound[0]) / xbound[2])
161 | ymin, ymax = ybound[0], ybound[1]
162 | num_y = int((ybound[1] - ybound[0]) / ybound[2])
163 |
164 | # y = torch.linspace(xmin, xmax, num_x, dtype=torch.double).cuda()
165 | # x = torch.linspace(ymin, ymax, num_y, dtype=torch.double).cuda()
166 | y = torch.linspace(xmin, xmax, num_x)
167 | x = torch.linspace(ymin, ymax, num_y)
168 | if cuda:
169 | x = x.cuda()
170 | y = y.cuda()
171 |
172 | y, x = torch.meshgrid(x, y)
173 |
174 | x = x.flatten()
175 | y = y.flatten()
176 |
177 | x = x.unsqueeze(0).repeat(B, 1)
178 | y = y.unsqueeze(0).repeat(B, 1)
179 |
180 | # z = torch.ones_like(x, dtype=torch.double).cuda() * zs.view(-1, 1)
181 | # d = torch.ones_like(x, dtype=torch.double).cuda()
182 | z = torch.ones_like(x)
183 | z = z*h_w
184 | d = torch.ones_like(x)
185 | if cuda:
186 | z = z.cuda()
187 | d = d.cuda()
188 |
189 | coords = torch.stack([x, y, z, d], axis=1)
190 |
191 | rotation_matrix = rotation_from_euler(pitchs, rolls, yaws, cuda)
192 |
193 | coords = rotation_matrix @ coords
194 | coords_v = coords.cpu().numpy()
195 | return coords
196 |
197 |
198 | def ipm_from_parameters(image, xyz, K, RT, target_h, target_w, extrinsic, post_RT=None):
199 | """
200 | :param image: [B, H, W, C]
201 | :param xyz: [B, 4, npoints]
202 | :param K: [B, 4, 4]
203 | :param RT: [B, 4, 4]
204 | :param target_h: int
205 | :param target_w: int
206 | :return: warped_images: [B, target_h, target_w, C]
207 | """
208 | P = K @ RT#4 6 4 4
209 | if post_RT is not None:
210 | P = post_RT @ P
211 | P = P.reshape(-1, 4, 4)#24 4 4
212 | pixel_coords = perspective(xyz, P, target_h, target_w, extrinsic, image.shape[1:3])#转为相机坐标系
213 | image2 = bilinear_sampler(image, pixel_coords)
214 |
215 | image2 = image2.type_as(image)
216 | return image2
217 |
218 |
219 | class PlaneEstimationModule(nn.Module):
220 | def __init__(self, N, C):
221 | super(PlaneEstimationModule, self).__init__()
222 | self.max_pool = nn.AdaptiveMaxPool2d(1)
223 | self.linear = nn.Linear(N*C, 3)
224 |
225 | self.linear.weight.data.fill_(0.)
226 | self.linear.bias.data.fill_(0.)
227 |
228 | def forward(self, x):
229 | B, N, C, H, W = x.shape
230 | x = x.view(B*N, C, H, W)
231 | x = self.max_pool(x)
232 | x = x.view(B, N*C)
233 | x = self.linear(x)
234 | z, pitch, roll = x[:, 0], x[:, 1], x[:, 2]
235 | return z, pitch, roll
236 |
237 |
238 | class IPM(nn.Module):
239 | def __init__(self, xbound, ybound, N, C, z_roll_pitch=False, visual=False, extrinsic=False, cuda=True):
240 | super(IPM, self).__init__()
241 | self.visual = visual
242 | self.z_roll_pitch = z_roll_pitch
243 | self.xbound = xbound
244 | self.ybound = ybound
245 | self.extrinsic = extrinsic
246 | self.w = int((xbound[1] - xbound[0]) / xbound[2])
247 | self.h = int((ybound[1] - ybound[0]) / ybound[2])
248 | self.conv = nn.Conv2d(128, 64, kernel_size=1, padding=0)
249 | if z_roll_pitch:
250 | self.plane_esti = PlaneEstimationModule(N, C)
251 | else:
252 | zs = torch.tensor([0]).cuda()
253 | yaws = torch.tensor([0]).cuda()
254 | rolls = torch.tensor([0]).cuda()
255 | pitchs = torch.tensor([0]).cuda()
256 | self.planes_1 = plane_grid(self.xbound, self.ybound, zs, yaws, rolls, pitchs,0.5)[0]
257 | #self.planes_2 = plane_grid(self.xbound, self.ybound, zs, yaws, rolls, pitchs, 1.3)[0]
258 |
259 | tri_mask = np.zeros((self.h, self.w))
260 | vertices = np.array([[0, 0], [0, self.h], [self.w, self.h]], np.int32)
261 | pts = vertices.reshape((-1, 1, 2))
262 | cv2.fillPoly(tri_mask, [pts], color=1.)
263 | self.tri_mask = torch.tensor(tri_mask[None, :, :, None])
264 | self.flipped_tri_mask = torch.flip(self.tri_mask, [2]).bool()
265 | if cuda:
266 | self.tri_mask = self.tri_mask.cuda()
267 | self.flipped_tri_mask = self.flipped_tri_mask.cuda()
268 | self.tri_mask = self.tri_mask.bool()
269 | # self.STN_n = []
270 | # for i in range(6):
271 | # self.STN_n.append(STNNet())
272 | # self.STN_n = nn.ModuleList(self.STN_n)
273 | def mask_warped(self, warped_fv_images):
274 | warped_fv_images[:, CAM_F, :, :self.w//2, :] *= 0 # CAM_FRONT
275 | warped_fv_images[:, CAM_FL] *= self.flipped_tri_mask # CAM_FRONT_LEFT
276 | warped_fv_images[:, CAM_FR] *= ~self.tri_mask # CAM_FRONT_RIGHT
277 | warped_fv_images[:, CAM_B, :, self.w//2:, :] *= 0 # CAM_BACK
278 | warped_fv_images[:, CAM_BL] *= self.tri_mask # CAM_BACK_LEFT
279 | warped_fv_images[:, CAM_BR] *= ~self.flipped_tri_mask # CAM_BACK_RIGHT
280 | return warped_fv_images
281 | def STN_enhance(self,x):
282 | #b,n,h,w,c = x.shape
283 | x = x.permute(0, 1, 4, 2, 3).contiguous()
284 | outputs = []
285 | for i in range(6):
286 | x_i = self.STN_n[i](x[:,i,:,:,:])
287 | outputs.append(x_i)
288 | outputs = torch.stack(outputs, 1)
289 | return outputs
290 | def forward(self, images, Ks, RTs, translation, yaw_roll_pitch, post_RTs=None):
291 | images = images.permute(0, 1, 3, 4, 2).contiguous()
292 | B, N, H, W, C = images.shape
293 |
294 | if self.z_roll_pitch:
295 | # z, roll, pitch = self.plane_esti(images)
296 | zs = translation[:, 2]
297 | rolls = yaw_roll_pitch[:, 1]
298 | pitchs = yaw_roll_pitch[:, 2]
299 | # zs += z
300 | # rolls += roll
301 | # pitchs += pitch
302 | planes = plane_grid(self.xbound, self.ybound, zs, torch.zeros_like(rolls), rolls, pitchs)
303 | planes = planes.repeat(N, 1, 1)
304 | else:
305 | planes_1 = self.planes_1#4 20000
306 | #planes_2 = self.planes_2 # 4 20000
307 |
308 | images = images.reshape(B*N, H, W, C)
309 | warped_fv_images_1 = ipm_from_parameters(images, planes_1, Ks, RTs, self.h, self.w, self.extrinsic, post_RTs)
310 | # warped_fv_images_2 = ipm_from_parameters(images, planes_2, Ks, RTs, self.h, self.w, self.extrinsic, post_RTs)
311 | # warped_fv_images = torch.cat((warped_fv_images_1,warped_fv_images_2),dim=-1)#24 100 200 128
312 | # warped_fv_images = warped_fv_images.permute(0,3,1,2).contiguous()
313 | # warped_fv_images = self.conv(warped_fv_images)#24 64 100 200
314 | # warped_fv_images = warped_fv_images.permute(0, 2, 3, 1).contiguous()
315 | warped_fv_images = warped_fv_images_1.reshape((B, N, self.h, self.w, C))
316 | if self.visual:
317 | warped_fv_images = self.mask_warped(warped_fv_images)
318 |
319 | if self.visual:
320 | warped_topdown = warped_fv_images[:, CAM_F] + warped_fv_images[:, CAM_B] # CAM_FRONT + CAM_BACK
321 | warped_mask = warped_topdown == 0
322 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_FL][warped_mask] + warped_fv_images[:, CAM_FR][warped_mask]
323 | warped_mask = warped_topdown == 0
324 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_BL][warped_mask] + warped_fv_images[:, CAM_BR][warped_mask]
325 | return warped_topdown.permute(0, 3, 1, 2).contiguous()
326 | else:
327 |
328 | #warped_fv_images = self.STN_enhance(warped_fv_images)#4 6 64 100 200
329 |
330 | warped_topdown, _ = warped_fv_images.max(1)
331 | #warped_topdown = torch.sum(warped_fv_images,dim=1)#多视角相加
332 | warped_topdown = warped_topdown.permute(0, 3, 1, 2).contiguous()
333 | warped_topdown = warped_topdown.view(B, C, self.h, self.w)
334 | return warped_topdown
335 |
336 |
337 |
--------------------------------------------------------------------------------
/model/init:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/model/lift_splat.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
3 | Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
4 | Authors: Jonah Philion and Sanja Fidler
5 | """
6 |
7 | import torch
8 | from torch import nn
9 |
10 | from efficientnet_pytorch import EfficientNet
11 | from torchvision.models.resnet import resnet18
12 | from data.utils import gen_dx_bx
13 | class Up(nn.Module):
14 | def __init__(self, in_channels, out_channels, scale_factor=2):
15 | super().__init__()
16 |
17 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear',
18 | align_corners=True)
19 |
20 | self.conv = nn.Sequential(
21 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
22 | nn.BatchNorm2d(out_channels),
23 | nn.ReLU(inplace=True),
24 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
25 | nn.BatchNorm2d(out_channels),
26 | nn.ReLU(inplace=True)
27 | )
28 |
29 | def forward(self, x1, x2):
30 | x1 = self.up(x1)
31 | x1 = torch.cat([x2, x1], dim=1)
32 | return self.conv(x1)
33 |
34 | class CamEncode(nn.Module):
35 | def __init__(self, D, C, downsample):
36 | super(CamEncode, self).__init__()
37 | self.D = D
38 | self.C = C
39 |
40 | self.trunk = EfficientNet.from_pretrained("efficientnet-b0")
41 |
42 | self.up1 = Up(320+112, 512)
43 | self.depthnet = nn.Conv2d(512, self.D + self.C, kernel_size=1, padding=0)
44 |
45 | def get_depth_dist(self, x, eps=1e-20):
46 | return x.softmax(dim=1)
47 |
48 | def get_depth_feat(self, x):
49 | x = self.get_eff_depth(x)# 20 512 8 22
50 | # Depth#
51 | x = self.depthnet(x)# 20 105 8 22
52 |
53 | depth = self.get_depth_dist(x[:, :self.D])#20 41 8 22 学习深度预测
54 |
55 | new_x = depth.unsqueeze(1) * x[:, self.D:(self.D + self.C)].unsqueeze(2)#选取64维图像特征 * 深度预测
56 |
57 | return depth, new_x
58 |
59 | def get_eff_depth(self, x):
60 | # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231
61 | endpoints = dict()
62 |
63 | # Stem
64 | x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x)))
65 | prev_x = x
66 |
67 | # Blocks
68 | for idx, block in enumerate(self.trunk._blocks):
69 | drop_connect_rate = self.trunk._global_params.drop_connect_rate
70 | if drop_connect_rate:
71 | drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate
72 | x = block(x, drop_connect_rate=drop_connect_rate)
73 | if prev_x.size(2) > x.size(2):
74 | endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x
75 | prev_x = x
76 |
77 | # Head
78 | endpoints['reduction_{}'.format(len(endpoints)+1)] = x
79 | x = self.up1(endpoints['reduction_5'], endpoints['reduction_4'])
80 | return x
81 |
82 | def forward(self, x):
83 | depth, x = self.get_depth_feat(x)
84 |
85 | return x
86 |
87 |
88 | class BevEncode(nn.Module):
89 | def __init__(self, inC, outC):
90 | super(BevEncode, self).__init__()
91 |
92 | trunk = resnet18(pretrained=False, zero_init_residual=True)
93 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3,
94 | bias=False)
95 | self.bn1 = trunk.bn1
96 | self.relu = trunk.relu
97 |
98 | self.layer1 = trunk.layer1
99 | self.layer2 = trunk.layer2
100 | self.layer3 = trunk.layer3
101 |
102 | self.up1 = Up(64+256, 256, scale_factor=4)
103 | self.up2 = nn.Sequential(
104 | nn.Upsample(scale_factor=2, mode='bilinear',
105 | align_corners=True),
106 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
107 | nn.BatchNorm2d(128),
108 | nn.ReLU(inplace=True),
109 | nn.Conv2d(128, outC, kernel_size=1, padding=0),
110 | )
111 |
112 | def forward(self, x):
113 | x = self.conv1(x)
114 | x = self.bn1(x)
115 | x = self.relu(x)
116 |
117 | x1 = self.layer1(x)
118 | x = self.layer2(x1)
119 | x = self.layer3(x)
120 |
121 | x = self.up1(x, x1)
122 | x = self.up2(x)
123 |
124 | return x
125 |
126 |
127 | def cumsum_trick(x, geom_feats, ranks):
128 | x = x.cumsum(0)
129 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
130 | kept[:-1] = (ranks[1:] != ranks[:-1])
131 |
132 | x, geom_feats = x[kept], geom_feats[kept]
133 | x = torch.cat((x[:1], x[1:] - x[:-1]))
134 |
135 | return x, geom_feats
136 |
137 |
138 | class QuickCumsum(torch.autograd.Function):
139 | @staticmethod
140 | def forward(ctx, x, geom_feats, ranks):
141 | x = x.cumsum(0)
142 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
143 | kept[:-1] = (ranks[1:] != ranks[:-1])
144 |
145 | x, geom_feats = x[kept], geom_feats[kept]
146 | x = torch.cat((x[:1], x[1:] - x[:-1]))
147 |
148 | # save kept for backward
149 | ctx.save_for_backward(kept)
150 |
151 | # no gradient for geom_feats
152 | ctx.mark_non_differentiable(geom_feats)
153 |
154 | return x, geom_feats
155 |
156 | @staticmethod
157 | def backward(ctx, gradx, gradgeom):
158 | kept, = ctx.saved_tensors
159 | back = torch.cumsum(kept, 0)
160 | back[kept] -= 1
161 |
162 | val = gradx[back]
163 |
164 | return val, None, None
165 |
166 |
167 | class LiftSplat(nn.Module):
168 | def __init__(self, grid_conf, instance_seg, embedded_dim):#, data_aug_conf
169 | super(LiftSplat, self).__init__()
170 | self.grid_conf = grid_conf
171 | self.data_aug_conf = data_aug_conf = {
172 | 'resize_lim': (0.193, 0.225),
173 | 'final_dim': (128, 352),
174 |
175 | }
176 |
177 | dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'],
178 | self.grid_conf['ybound'],
179 | self.grid_conf['zbound'],
180 | )
181 | self.dx = nn.Parameter(dx, requires_grad=False)
182 | self.bx = nn.Parameter(bx, requires_grad=False)
183 | self.nx = nn.Parameter(nx, requires_grad=False)
184 |
185 | self.downsample = 16
186 | self.camC = 64
187 | self.frustum = self.create_frustum()
188 | # D x H/downsample x D/downsample x 3
189 | self.D, _, _, _ = self.frustum.shape
190 | self.camencode = CamEncode( self.D, self.camC, self.downsample)
191 | self.bevencode = BevEncode(inC=self.camC, outC=grid_conf['num_channels'])
192 |
193 | # toggle using QuickCumsum vs. autograd
194 | self.use_quickcumsum = True
195 |
196 | def create_frustum(self):
197 | # make grid in image plane
198 | ogfH, ogfW = self.data_aug_conf['final_dim']
199 | fH, fW = ogfH // self.downsample, ogfW // self.downsample
200 | ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW)
201 | D, _, _ = ds.shape
202 | xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW)
203 | ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW)
204 |
205 | # D x H x W x 3
206 | frustum = torch.stack((xs, ys, ds), -1)
207 | return nn.Parameter(frustum, requires_grad=False)
208 |
209 | def get_geometry(self, rots, trans, intrins, post_rots, post_trans):
210 | """Determine the (x,y,z) locations (in the ego frame)
211 | of the points in the point cloud.
212 | Returns B x N x D x H/downsample x W/downsample x 3
213 | """
214 | B, N, _ = trans.shape
215 |
216 | # *undo* post-transformation
217 | # B x N x D x H x W x 3
218 | points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3)
219 | points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1))
220 |
221 | # cam_to_ego
222 | points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3],
223 | points[:, :, :, :, :, 2:3]
224 | ), 5)
225 | combine = rots.matmul(torch.inverse(intrins))
226 | points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
227 | points += trans.view(B, N, 1, 1, 1, 3)
228 |
229 | return points
230 |
231 | def get_cam_feats(self, x):
232 | """Return B x N x D x H/downsample x W/downsample x C
233 | """
234 | B, N, C, imH, imW = x.shape
235 |
236 | x = x.view(B*N, C, imH, imW)
237 | x = self.camencode(x)
238 | x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample)
239 | x = x.permute(0, 1, 3, 4, 5, 2)
240 |
241 | return x
242 |
243 | def voxel_pooling(self, geom_feats, x):
244 | B, N, D, H, W, C = x.shape
245 | Nprime = B*N*D*H*W
246 |
247 | # flatten x
248 | x = x.reshape(Nprime, C)
249 |
250 | # flatten indices
251 | # B x N x D x H/downsample x W/downsample x 3
252 | geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long()
253 | geom_feats = geom_feats.view(Nprime, 3)
254 | batch_ix = torch.cat([torch.full([Nprime//B, 1], ix, device=x.device, dtype=torch.long) for ix in range(B)])
255 | geom_feats = torch.cat((geom_feats, batch_ix), 1) # x, y, z, b
256 |
257 | # filter out points that are outside box
258 | kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\
259 | & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\
260 | & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2])
261 | x = x[kept]
262 | geom_feats = geom_feats[kept]
263 |
264 | # get tensors from the same voxel next to each other
265 | ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\
266 | + geom_feats[:, 1] * (self.nx[2] * B)\
267 | + geom_feats[:, 2] * B\
268 | + geom_feats[:, 3]
269 | sorts = ranks.argsort()
270 | x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts]
271 |
272 | # cumsum trick
273 | if not self.use_quickcumsum:
274 | x, geom_feats = cumsum_trick(x, geom_feats, ranks)
275 | else:
276 | x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks)
277 |
278 | # griddify (B x C x Z x X x Y)
279 | final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device)
280 | final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x
281 |
282 | # collapse Z
283 | final = torch.cat(final.unbind(dim=2), 1)
284 |
285 | return final
286 |
287 | def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans):
288 | # B x N x D x H/downsample x W/downsample x 3: (x,y,z) locations (in the ego frame)
289 | geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans)
290 | # B x N x D x H/downsample x W/downsample x C: cam feats
291 | x = self.get_cam_feats(x)
292 |
293 | x = self.voxel_pooling(geom, x)
294 |
295 | return x
296 |
297 | def forward(self, x, trans, rots, intrins, post_trans, post_rots, translation, yaw_pitch_roll):
298 | x = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans)
299 | x = self.bevencode(x)
300 | return x
301 |
--------------------------------------------------------------------------------
/model/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | def plane_grid_2d(xbound, ybound):
4 | xmin, xmax = xbound[0], xbound[1]
5 | num_x = int((xbound[1] - xbound[0]) / xbound[2])
6 | ymin, ymax = ybound[0], ybound[1]
7 | num_y = int((ybound[1] - ybound[0]) / ybound[2])
8 |
9 | y = torch.linspace(xmin, xmax, num_x).cuda()
10 | x = torch.linspace(ymin, ymax, num_y).cuda()
11 | y, x = torch.meshgrid(x, y)
12 | x = x.flatten()
13 | y = y.flatten()
14 |
15 | coords = torch.stack([x, y], axis=0)
16 | return coords
17 |
18 |
19 | def cam_to_pixel(points, xbound, ybound):
20 | new_points = torch.zeros_like(points)
21 | new_points[..., 0] = (points[..., 0] - xbound[0]) / xbound[2]
22 | new_points[..., 1] = (points[..., 1] - ybound[0]) / ybound[2]
23 | return new_points
24 |
25 |
26 | def get_rot_2d(yaw):
27 | sin_yaw = torch.sin(yaw)
28 | cos_yaw = torch.cos(yaw)
29 | rot = torch.zeros(list(yaw.shape) + [2, 2]).cuda()
30 | rot[..., 0, 0] = cos_yaw
31 | rot[..., 0, 1] = sin_yaw
32 | rot[..., 1, 0] = -sin_yaw
33 | rot[..., 1, 1] = cos_yaw
34 | return rot
35 |
36 |
37 |
--------------------------------------------------------------------------------
/pic/ex.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/pic/img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/pic/img1.png
--------------------------------------------------------------------------------
/pic/img2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/pic/img2.png
--------------------------------------------------------------------------------
/pic/img3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/pic/img3.png
--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | addict==2.4.0
2 | argon2-cffi==21.3.0
3 | argon2-cffi-bindings==21.2.0
4 | asttokens==2.0.5
5 | attrs==21.4.0
6 | backcall==0.2.0
7 | black==22.1.0
8 | bleach==4.1.0
9 | cachetools==5.0.0
10 | certifi==2021.10.8
11 | cffi==1.15.0
12 | click==8.0.4
13 | cycler==0.11.0
14 | debugpy==1.5.1
15 | decorator==5.1.1
16 | defusedxml==0.7.1
17 | descartes==1.1.0
18 | efficientnet-pytorch==0.7.1
19 | entrypoints==0.4
20 | executing==0.8.2
21 | fire==0.4.0
22 | fonttools==4.29.1
23 | importlib-resources==5.4.0
24 | ipdb==0.13.9
25 | ipykernel==6.9.1
26 | ipython==8.0.1
27 | ipython-genutils==0.2.0
28 | ipywidgets==7.6.5
29 | jedi==0.18.1
30 | Jinja2==3.0.3
31 | joblib==1.1.0
32 | jsonschema==4.4.0
33 | jupyter==1.0.0
34 | jupyter-client==7.1.2
35 | jupyter-console==6.4.0
36 | jupyter-core==4.9.2
37 | jupyterlab-pygments==0.1.2
38 | jupyterlab-widgets==1.0.2
39 | kiwisolver==1.3.2
40 | MarkupSafe==2.1.0
41 | matplotlib==3.5.1
42 | matplotlib-inline==0.1.3
43 | mistune==0.8.4
44 | mmcv==1.4.5
45 | mypy-extensions==0.4.3
46 | nbclient==0.5.11
47 | nbconvert==6.4.2
48 | nbformat==5.1.3
49 | nest-asyncio==1.5.4
50 | notebook==6.4.8
51 | numpy==1.22.2
52 | nuscenes-devkit==1.1.9
53 | opencv-python==4.5.5.62
54 | packaging==21.3
55 | pandocfilters==1.5.0
56 | parso==0.8.3
57 | pathspec==0.9.0
58 | pexpect==4.8.0
59 | pickleshare==0.7.5
60 | Pillow==9.0.1
61 | platformdirs==2.5.1
62 | prometheus-client==0.13.1
63 | prompt-toolkit==3.0.28
64 | protobuf==3.19.4
65 | ptyprocess==0.7.0
66 | pure-eval==0.2.2
67 | pycocotools==2.0.4
68 | pycparser==2.21
69 | Pygments==2.11.2
70 | pyparsing==3.0.7
71 | pyquaternion==0.9.9
72 | pyrsistent==0.18.1
73 | python-dateutil==2.8.2
74 | PyYAML==6.0
75 | pyzmq==22.3.0
76 | qtconsole==5.2.2
77 | QtPy==2.0.1
78 | scikit-learn==1.0.2
79 | scipy==1.8.0
80 | Send2Trash==1.8.0
81 | Shapely==1.8.1.post1
82 | six==1.16.0
83 | stack-data==0.2.0
84 | tensorboardX==2.4.1
85 | termcolor==1.1.0
86 | terminado==0.13.1
87 | testpath==0.5.0
88 | threadpoolctl==3.1.0
89 | toml==0.10.2
90 | tomli==2.0.1
91 | torch==1.10.2+cu113
92 | torch-scatter==2.0.9
93 | torchaudio==0.10.2+cu113
94 | torchvision==0.11.3+cu113
95 | tornado==6.1
96 | tqdm==4.62.3
97 | traitlets==5.1.1
98 | typing_extensions==4.1.1
99 | wcwidth==0.2.5
100 | webencodings==0.5.1
101 | widgetsnbextension==3.5.2
102 | yapf==0.32.0
103 | zipp==3.7.0
104 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
3 | import numpy as np
4 | import sys
5 | import logging
6 | from time import time
7 | from tensorboardX import SummaryWriter
8 | import argparse
9 | import cv2
10 | import torch
11 | from torch.optim.lr_scheduler import StepLR
12 | from loss import SimpleLoss, DiscriminativeLoss, view_loss,FocalLoss
13 | import torch.nn as nn
14 | from data.dataset import semantic_dataset,vir_semantic_dataset
15 | from data.const import NUM_CLASSES
16 | from evaluation.iou import get_batch_iou
17 | from evaluation.angle_diff import calc_angle_diff
18 | from model import get_model
19 | from evaluate import onehot_encoding, eval_iou
20 |
21 | def write_log(writer, ious, title, counter):
22 | writer.add_scalar(f'{title}/iou', torch.mean(ious[1:]), counter)
23 |
24 | for i, iou in enumerate(ious):
25 | writer.add_scalar(f'{title}/class_{i}/iou', iou, counter)
26 | def train(args):
27 | if not os.path.exists(args.logdir):
28 | os.mkdir(args.logdir)
29 | logging.basicConfig(filename=os.path.join(args.logdir, "results.log"),
30 | filemode='w',
31 | format='%(asctime)s: %(message)s',
32 | datefmt='%Y-%m-%d %H:%M:%S',
33 | level=logging.INFO)
34 | logging.getLogger('shapely.geos').setLevel(logging.CRITICAL)
35 |
36 | logger = logging.getLogger()
37 | logger.addHandler(logging.StreamHandler(sys.stdout))
38 |
39 | data_conf = {
40 | 'num_channels': NUM_CLASSES + 1,
41 | 'image_size': args.image_size,
42 | 'image_size_i': args.image_size_i,
43 | 'xbound': args.xbound,
44 | 'ybound': args.ybound,
45 | 'zbound': args.zbound,
46 | 'dbound': args.dbound,
47 | 'thickness': args.thickness,
48 | 'angle_class': args.angle_class,
49 | 'B':args.bsz,
50 | 'merge': True
51 | }
52 | #train_loader, val_loader = vir_semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)
53 | train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)#构建训练和验证集
54 | model = get_model(args.model, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class)#选择不同传感器布局方式的网络
55 | read_last = args.read_last
56 | if read_last:#args.finetune
57 | model.load_state_dict(torch.load(args.modelf), strict=False)
58 | for name, param in model.named_parameters():
59 | param.requires_grad = True
60 |
61 |
62 | model.cuda()
63 |
64 | opt = torch.optim.AdamW([{'params': model.parameters(), 'initial_lr': 1e-3}], lr=1e-3, weight_decay=args.weight_decay)
65 |
66 |
67 | if read_last:
68 | sched = StepLR(opt, 10, 0.1, last_epoch=read_last-1)
69 | else:
70 | sched = StepLR(opt, 10, 0.1)
71 |
72 | writer = SummaryWriter(logdir=args.logdir)
73 |
74 | loss_fn = SimpleLoss(args.pos_weight).cuda()#损失对象
75 | loss_fn_view = SimpleLoss(args.pos_weight).cuda() # 损失对象
76 |
77 |
78 |
79 | model.train()
80 | counter = 0
81 | last_idx = len(train_loader) - 1
82 |
83 |
84 | for epoch in range(read_last,args.nepochs):
85 | sched.last_epoch = -1
86 | for batchi, (img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots,car_trans,
87 | yaw_pitch_roll, semantic_gt, instance_gt) in enumerate(train_loader):#, direction_gt
88 | t0 = time()
89 |
90 |
91 | opt.zero_grad()
92 | semantic , x_view_pre, mu_loss, mu_loss_i= model(feas.cuda(),imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
93 | post_trans.cuda(), post_rots.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda())
94 |
95 |
96 | semantic_gt = semantic_gt.cuda().float()
97 |
98 |
99 | view_img_gt = view_loss(semantic_gt.cuda(),trans.cuda(),rots.cuda())#b*n 5 h w
100 | view_img_loss = loss_fn_view(x_view_pre,view_img_gt)
101 | seg_loss = loss_fn(semantic, semantic_gt)
102 |
103 |
104 | if epoch<5:
105 | final_loss = seg_loss * args.scale_seg + view_img_loss +0.1*mu_loss_i#+seg_loss_f#+ var_loss * args.scale_var + dist_loss * args.scale_dist+ mu_loss# + direction_loss * args.scale_direction
106 |
107 | else:
108 | final_loss = seg_loss * args.scale_seg + view_img_loss + 0.1*mu_loss_i+ 0.1*mu_loss
109 |
110 | final_loss.requires_grad_(True)
111 | final_loss.backward()
112 |
113 | torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm, norm_type=2)
114 | opt.step()
115 | counter += 1
116 | t1 = time()
117 | #sched.step()
118 |
119 | if counter % 10 == 0:
120 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt)
121 | iou = intersects / (union + 1e-7)
122 | intersects1, union1 = get_batch_iou(onehot_encoding(x_view_pre), view_img_gt)
123 | iou_cam = intersects1 / (union1 + 1e-7)
124 |
125 | logger.info(f"TRAIN[{epoch:>3d}]: [{batchi:>4d}/{last_idx}] "
126 | f"Time: {t1-t0:>7.4f} "
127 | f"Loss: [{final_loss.item():>7.4f}],[{seg_loss.item():>7.4f}],[{view_img_loss.item():>7.4f}] "#
128 | f"mutual_Loss:[{mu_loss_i:>7.4f}],[{mu_loss.item():>7.4f}]"
129 | f"IOU: {np.array2string(iou[1:].numpy(), precision=3, floatmode='fixed')} "
130 | f"IOU_cam: {np.array2string(iou_cam[1:].numpy(), precision=3, floatmode='fixed')} "
131 | )
132 |
133 | write_log(writer, iou, 'train', counter)
134 | writer.add_scalar('train/step_time', t1 - t0, counter)
135 | writer.add_scalar('train/seg_loss', seg_loss, counter)
136 |
137 |
138 | iou,iou_cam = eval_iou(model, val_loader)
139 | logger.info(f"EVAL[{epoch:>2d}]: "
140 | f"IOU: {np.array2string(iou[1:].cpu().numpy(), precision=3, floatmode='fixed')} "
141 | f"IOU_cam: {np.array2string(iou_cam[1:].cpu().numpy(), precision=3, floatmode='fixed')}"
142 | )
143 | write_log(writer, iou, 'eval', counter)
144 |
145 | model_name = os.path.join(args.logdir, f"model{epoch}.pt")
146 | torch.save(model.state_dict(), model_name)
147 | logger.info(f"{model_name} saved")
148 | model.train()
149 |
150 | sched.step()
151 |
152 |
153 | if __name__ == '__main__':
154 | parser = argparse.ArgumentParser(description='HDMapNet training.')
155 | # logging config
156 | parser.add_argument("--logdir", type=str, default='./runs')#
157 |
158 | # nuScenes config
159 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0')
160 | parser.add_argument('--version', type=str, default='v1.0-trainval', choices=['v1.0-trainval', 'v1.0-mini'])
161 | parser.add_argument('--read_last', type=int, default=0)
162 | # model config
163 | parser.add_argument("--model", type=str, default='BiMapper')
164 |
165 | # training config
166 | parser.add_argument("--nepochs", type=int, default=24)
167 | parser.add_argument("--max_grad_norm", type=float, default=35.0)
168 | parser.add_argument("--pos_weight", type=float, default=2.13)
169 | parser.add_argument("--bsz", type=int, default=4)
170 | parser.add_argument("--nworkers", type=int, default=10)
171 | parser.add_argument("--lr", type=float, default=1e-3)
172 | parser.add_argument("--weight_decay", type=float, default=0.01)
173 |
174 | # finetune config
175 | parser.add_argument('--finetune', action='store_true')
176 | parser.add_argument('--modelf', type=str)
177 |
178 | # data config
179 | parser.add_argument("--thickness", type=int, default=5)
180 | parser.add_argument("--image_size", nargs=2, type=int, default=[256, 704])
181 | parser.add_argument("--image_size_i", nargs=2, type=int, default=[128, 352])
182 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
183 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])#200
184 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0])
185 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0])
186 |
187 | # embedding config
188 | parser.add_argument('--instance_seg', action='store_true')
189 | parser.add_argument("--embedding_dim", type=int, default=16)
190 | parser.add_argument("--delta_v", type=float, default=0.5)
191 | parser.add_argument("--delta_d", type=float, default=3.0)
192 |
193 | # direction config
194 | parser.add_argument('--direction_pred', action='store_true')
195 | parser.add_argument('--angle_class', type=int, default=36)
196 |
197 | # loss config
198 | parser.add_argument("--scale_seg", type=float, default=1.0)
199 | parser.add_argument("--scale_var", type=float, default=1.0)
200 | parser.add_argument("--scale_dist", type=float, default=1.0)
201 | parser.add_argument("--scale_direction", type=float, default=0.2)
202 |
203 | args = parser.parse_args()
204 | train(args)
205 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # nuScenes dev-kit.
2 | # Code written by Holger Caesar, 2018.
3 |
4 | from typing import List, Dict, Any
5 |
6 | import numpy as np
7 | from pyquaternion import Quaternion
8 |
9 | from nuscenes.eval.common.data_classes import EvalBox
10 | from nuscenes.utils.data_classes import Box
11 |
12 | DetectionBox = Any # Workaround as direct imports lead to cyclic dependencies.
13 |
14 |
15 | def center_distance(gt_box: EvalBox, pred_box: EvalBox) -> float:
16 | """
17 | L2 distance between the box centers (xy only).
18 | :param gt_box: GT annotation sample.
19 | :param pred_box: Predicted sample.
20 | :return: L2 distance.
21 | """
22 | return np.linalg.norm(np.array(pred_box.translation[:2]) - np.array(gt_box.translation[:2]))
23 |
24 |
25 | def velocity_l2(gt_box: EvalBox, pred_box: EvalBox) -> float:
26 | """
27 | L2 distance between the velocity vectors (xy only).
28 | If the predicted velocities are nan, we return inf, which is subsequently clipped to 1.
29 | :param gt_box: GT annotation sample.
30 | :param pred_box: Predicted sample.
31 | :return: L2 distance.
32 | """
33 | return np.linalg.norm(np.array(pred_box.velocity) - np.array(gt_box.velocity))
34 |
35 |
36 | def yaw_diff(gt_box: EvalBox, eval_box: EvalBox, period: float = 2*np.pi) -> float:
37 | """
38 | Returns the yaw angle difference between the orientation of two boxes.
39 | :param gt_box: Ground truth box.
40 | :param eval_box: Predicted box.
41 | :param period: Periodicity in radians for assessing angle difference.
42 | :return: Yaw angle difference in radians in [0, pi].
43 | """
44 | yaw_gt = quaternion_yaw(Quaternion(gt_box.rotation))
45 | yaw_est = quaternion_yaw(Quaternion(eval_box.rotation))
46 |
47 | return abs(angle_diff(yaw_gt, yaw_est, period))
48 |
49 |
50 | def angle_diff(x: float, y: float, period: float) -> float:
51 | """
52 | Get the smallest angle difference between 2 angles: the angle from y to x.
53 | :param x: To angle.
54 | :param y: From angle.
55 | :param period: Periodicity in radians for assessing angle difference.
56 | :return: . Signed smallest between-angle difference in range (-pi, pi).
57 | """
58 |
59 | # calculate angle difference, modulo to [0, 2*pi]
60 | diff = (x - y + period / 2) % period - period / 2
61 | if diff > np.pi:
62 | diff = diff - (2 * np.pi) # shift (pi, 2*pi] to (-pi, 0]
63 |
64 | return diff
65 |
66 |
67 | def attr_acc(gt_box: DetectionBox, pred_box: DetectionBox) -> float:
68 | """
69 | Computes the classification accuracy for the attribute of this class (if any).
70 | If the GT class has no attributes or the annotation is missing attributes, we assign an accuracy of nan, which is
71 | ignored later on.
72 | :param gt_box: GT annotation sample.
73 | :param pred_box: Predicted sample.
74 | :return: Attribute classification accuracy (0 or 1) or nan if GT annotation does not have any attributes.
75 | """
76 | if gt_box.attribute_name == '':
77 | # If the class does not have attributes or this particular sample is missing attributes, return nan, which is
78 | # ignored later. Note that about 0.4% of the sample_annotations have no attributes, although they should.
79 | acc = np.nan
80 | else:
81 | # Check that label is correct.
82 | acc = float(gt_box.attribute_name == pred_box.attribute_name)
83 | return acc
84 |
85 |
86 | def scale_iou(sample_annotation: EvalBox, sample_result: EvalBox) -> float:
87 | """
88 | This method compares predictions to the ground truth in terms of scale.
89 | It is equivalent to intersection over union (IOU) between the two boxes in 3D,
90 | if we assume that the boxes are aligned, i.e. translation and rotation are considered identical.
91 | :param sample_annotation: GT annotation sample.
92 | :param sample_result: Predicted sample.
93 | :return: Scale IOU.
94 | """
95 | # Validate inputs.
96 | sa_size = np.array(sample_annotation.size)
97 | sr_size = np.array(sample_result.size)
98 | assert all(sa_size > 0), 'Error: sample_annotation sizes must be >0.'
99 | assert all(sr_size > 0), 'Error: sample_result sizes must be >0.'
100 |
101 | # Compute IOU.
102 | min_wlh = np.minimum(sa_size, sr_size)
103 | volume_annotation = np.prod(sa_size)
104 | volume_result = np.prod(sr_size)
105 | intersection = np.prod(min_wlh) # type: float
106 | union = volume_annotation + volume_result - intersection # type: float
107 | iou = intersection / union
108 |
109 | return iou
110 |
111 |
112 | def quaternion_yaw(q: Quaternion) -> float:
113 | """
114 | Calculate the yaw angle from a quaternion.
115 | Note that this only works for a quaternion that represents a box in lidar or global coordinate frame.
116 | It does not work for a box in the camera frame.
117 | :param q: Quaternion of interest.
118 | :return: Yaw angle in radians.
119 | """
120 |
121 | # Project into xy plane.
122 | v = np.dot(q.rotation_matrix, np.array([1, 0, 0]))
123 |
124 | # Measure yaw using arctan.
125 | yaw = np.arctan2(v[1], v[0])
126 |
127 | return yaw
128 |
129 |
130 | def boxes_to_sensor(boxes: List[EvalBox], pose_record: Dict, cs_record: Dict):
131 | """
132 | Map boxes from global coordinates to the vehicle's sensor coordinate system.
133 | :param boxes: The boxes in global coordinates.
134 | :param pose_record: The pose record of the vehicle at the current timestamp.
135 | :param cs_record: The calibrated sensor record of the sensor.
136 | :return: The transformed boxes.
137 | """
138 | boxes_out = []
139 | for box in boxes:
140 | # Create Box instance.
141 | box = Box(box.translation, box.size, Quaternion(box.rotation))
142 |
143 | # Move box to ego vehicle coord system.
144 | box.translate(-np.array(pose_record['translation']))
145 | box.rotate(Quaternion(pose_record['rotation']).inverse)
146 |
147 | # Move box to sensor coord system.
148 | box.translate(-np.array(cs_record['translation']))
149 | box.rotate(Quaternion(cs_record['rotation']).inverse)
150 |
151 | boxes_out.append(box)
152 |
153 | return boxes_out
154 |
155 |
156 | def cummean(x: np.array) -> np.array:
157 | """
158 | Computes the cumulative mean up to each position in a NaN sensitive way
159 | - If all values are NaN return an array of ones.
160 | - If some values are NaN, accumulate arrays discording those entries.
161 | """
162 | if sum(np.isnan(x)) == len(x):
163 | # Is all numbers in array are NaN's.
164 | return np.ones(len(x)) # If all errors are NaN set to error to 1 for all operating points.
165 | else:
166 | # Accumulate in a nan-aware manner.
167 | sum_vals = np.nancumsum(x.astype(float)) # Cumulative sum ignoring nans.
168 | count_vals = np.cumsum(~np.isnan(x)) # Number of non-nans up to each position.
169 | return np.divide(sum_vals, count_vals, out=np.zeros_like(sum_vals), where=count_vals != 0)
170 |
--------------------------------------------------------------------------------
/vis_label.py:
--------------------------------------------------------------------------------
1 | import os
2 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
3 | import tqdm
4 | import argparse
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | from PIL import Image
8 | from loss import SimpleLoss, DiscriminativeLoss, view_loss,FocalLoss
9 | from data.dataset import HDMapNetSemanticDataset_all, CAMS,HDMapNetSemanticDataset_cam,HDMapNetDataset
10 | from data.utils import get_proj_mat, perspective
11 | from data.image import denormalize_img
12 | from data.viz_data import BaseViz
13 | import cv2
14 | def to_image(x):
15 | return (255 * x).byte().cpu().numpy().transpose(1, 2, 0)
16 | def vis_label(dataroot, version, xbound, ybound):
17 | data_conf = {
18 | 'image_size': (900, 1600),
19 | 'xbound': xbound,
20 | 'ybound': ybound,
21 | 'zbound': args.zbound,
22 | 'dbound': args.dbound
23 | }
24 |
25 | color_map = np.random.randint(0, 256, (256, 3))
26 | color_map[0] = np.array([0, 0, 0])
27 | colors_plt = ['r', 'b', 'g']
28 |
29 | dataset = HDMapNetDataset(version=version, dataroot=dataroot, data_conf=data_conf, is_train=False)
30 | gt_path = os.path.join(dataroot, 'samples', 'GT')
31 | if not os.path.exists(gt_path):
32 | os.mkdir(gt_path)
33 |
34 | car_img = Image.open('icon/car.png')#车辆模型
35 | for idx in tqdm.tqdm(range(dataset.__len__())):
36 | rec = dataset.nusc.sample[idx]#不同时间帧sample
37 | img_origin,feas,imgs, trans, rots, intrins, post_trans, post_rots = dataset.get_imgs(rec,1)#不同相机的数据
38 | vectors = dataset.get_vectors(rec)
39 |
40 | lidar_top_path = dataset.nusc.get_sample_data_path(rec['data']['LIDAR_TOP'])
41 |
42 | base_path = lidar_top_path.split('/')[-1].replace('__LIDAR_TOP__', '_').split('.')[0]
43 | base_path = os.path.join(gt_path, base_path)
44 |
45 | if not os.path.exists(base_path):
46 | os.mkdir(base_path)
47 |
48 | plt.figure(figsize=(4, 2))
49 | plt.xlim(-30, 30)
50 | plt.ylim(-15, 15)
51 | plt.axis('off')
52 | for vector in vectors:
53 | pts, pts_num, line_type = vector['pts'], vector['pts_num'], vector['type']
54 | pts = pts[:pts_num]
55 | x = np.array([pt[0] for pt in pts])
56 | y = np.array([pt[1] for pt in pts])
57 | plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, color=colors_plt[line_type])
58 |
59 | plt.imshow(car_img, extent=[-1.5, 1.5, -1.2, 1.2])
60 |
61 | map_path = os.path.join(base_path, 'MAP.png')
62 | plt.savefig(map_path, bbox_inches='tight', dpi=400)
63 | plt.close()
64 |
65 | for img, intrin, rot, tran, cam in zip(imgs, intrins, rots, trans, CAMS):
66 | img = denormalize_img(img)
67 | P = get_proj_mat(intrin, rot, tran)
68 | plt.figure(figsize=(9, 16))
69 | fig = plt.imshow(img)
70 | fig.axes.get_xaxis().set_visible(False)
71 | fig.axes.get_yaxis().set_visible(False)
72 | plt.xlim(1600, 0)
73 | plt.ylim(900, 0)
74 | plt.axis('off')
75 | for vector in vectors:
76 | pts, pts_num, line_type = vector['pts'], vector['pts_num'], vector['type']
77 | pts = pts[:pts_num]
78 | zeros = np.zeros((pts_num, 1))
79 | ones = np.ones((pts_num, 1))
80 | world_coords = np.concatenate([pts, zeros, ones], axis=1).transpose(1, 0)
81 | pix_coords = perspective(world_coords, P)
82 | x = np.array([pts[0] for pts in pix_coords])
83 | y = np.array([pts[1] for pts in pix_coords])
84 | plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy',
85 | angles='xy', scale=1, color=colors_plt[line_type])
86 |
87 | cam_path = os.path.join(base_path, f'{cam}.png')
88 | plt.savefig(cam_path, bbox_inches='tight', pad_inches=0, dpi=400)
89 | plt.close()
90 |
91 | def vis_gt(dataroot, version, xbound, ybound):
92 | data_conf = {
93 | 'xbound': xbound,
94 | 'ybound': ybound,
95 | 'zbound': args.zbound,
96 | 'thickness': args.thickness,
97 | 'angle_class': args.angle_class,
98 | 'image_size': args.image_size,
99 | 'dbound': args.dbound
100 | }
101 | dataset = HDMapNetSemanticDataset_all(version=version, dataroot=dataroot, data_conf=data_conf, is_train=False)
102 | for idx in tqdm.tqdm(range(dataset.__len__())):#dataset.__len__()
103 | rec = dataset.nusc.sample[idx]#不同时间帧sample
104 | # img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots = dataset.get_imgs(rec, 1)
105 | # for j in range(6):
106 | # plt.imshow(to_image(img_origin[j]))
107 | # plt.show()
108 | semantic_masks, instance_masks, forward_masks, backward_masks, direction_masks = dataset.get_semantic_map(rec)
109 |
110 | # plt.imshow(semantic_masks[1], vmin=0, cmap='Blues', vmax=1, alpha=0.6)
111 | # plt.imshow(semantic_masks[2], vmin=0, cmap='Reds', vmax=1, alpha=0.6)
112 | # plt.imshow(semantic_masks[3], vmin=0, cmap='Greens', vmax=1, alpha=0.6)
113 | # plt.show()
114 | img_origin,feas,imgs, trans, rots, intrins, post_trans, post_rots = dataset.get_imgs(rec, 1)
115 | #cam_to_world_t, cam_to_world_r =dataset.get_cam_pose(rec, trans.numpy(), rots.numpy())
116 | #semantic_map_cam = dataset.get_cam(rec, cam_to_world_t, cam_to_world_r)
117 |
118 | # plt.axis('off') # 去坐标轴
119 | # plt.xticks([]) # 去 x 轴刻度
120 | # plt.yticks([]) # 去 y 轴刻度
121 | view_img_gt = view_loss(semantic_masks.unsqueeze(0).cuda(), trans.unsqueeze(0).cuda(), rots.unsqueeze(0).cuda()) # b*n 5 h w
122 |
123 | view_img_gt = view_img_gt.cpu().numpy()
124 |
125 |
126 | for j in range(6):
127 | plt.axis('off') # 去坐标轴
128 | plt.xticks([]) # 去 x 轴刻度
129 | plt.yticks([]) # 去 y 轴刻度
130 | plt.imshow(to_image(img_origin[j]))
131 | #plt.show()
132 | plt.savefig('/data/lsy/HDmap_1/vis_gt_cam/img_{}_{}_{}'.format(idx,j,0))
133 |
134 |
135 | plt.axis('off') # 去坐标轴
136 | plt.xticks([]) # 去 x 轴刻度
137 | plt.yticks([]) # 去 y 轴刻度
138 | plt.imshow(view_img_gt[j][1], vmin=0, cmap='Blues', vmax=1, alpha=0.6)
139 | plt.imshow(view_img_gt[j][2], vmin=0, cmap='Reds', vmax=1, alpha=0.6)
140 | plt.imshow(view_img_gt[j][3], vmin=0, cmap='Greens', vmax=1, alpha=0.6)
141 | #plt.show()
142 | plt.savefig('/data/lsy/HDmap_1/vis_gt_cam/img_{}_{}_{}'.format(idx,j,1))
143 |
144 | # plt.axis('off') # 去坐标轴
145 | # plt.xticks([]) # 去 x 轴刻度
146 | # plt.yticks([]) # 去 y 轴刻度
147 | # plt.imshow(semantic_map_cam[j][0][:,110:190], vmin=0, cmap='Blues', vmax=1, alpha=0.6)
148 | # plt.imshow(semantic_map_cam[j][1][:,110:190], vmin=0, cmap='Reds', vmax=1, alpha=0.6)
149 | # plt.imshow(semantic_map_cam[j][2][:,110:190], vmin=0, cmap='Greens', vmax=1, alpha=0.6)
150 | # plt.show()
151 |
152 | # print('saving', idx)
153 | # plt.savefig('/data/lsy/HDmap_1/vis_lable/img_{}'.format(idx))
154 | # plt.close()
155 |
156 |
157 |
158 | if __name__ == '__main__':
159 | parser = argparse.ArgumentParser(description='Local HD Map Demo.')
160 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0')
161 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini'])
162 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
163 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
164 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0])
165 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0])
166 | parser.add_argument("--thickness", type=int, default=5)
167 | parser.add_argument('--angle_class', type=int, default=36)
168 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352])
169 | args = parser.parse_args()
170 |
171 |
172 |
--------------------------------------------------------------------------------
/vis_pred.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import numpy as np
3 | from PIL import Image
4 | import os
5 | os.environ['CUDA_VISIBLE_DEVICES'] = '2'
6 | import matplotlib.pyplot as plt
7 |
8 | import tqdm
9 | import torch
10 |
11 | from data.dataset import semantic_dataset,my_semantic_dataset,vir_semantic_dataset
12 | from data.const import NUM_CLASSES
13 | from model import get_model
14 | from postprocess.vectorize import vectorize
15 | from data.viz_data import BaseViz,to_image_single,to_color,to_image
16 | import cv2
17 | from loss import view_loss
18 | def onehot_encoding(logits, dim=1):
19 | max_idx = torch.argmax(logits, dim, keepdim=True)
20 | one_hot = logits.new_full(logits.shape, 0)
21 | one_hot.scatter_(dim, max_idx, 1)
22 | return one_hot
23 |
24 | def vis_gt(val_loader):
25 | viz = BaseViz()
26 | with torch.no_grad():
27 | for batchi, (img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots,car_trans,
28 | yaw_pitch_roll, semantic_gt, instance_gt) in enumerate(val_loader):
29 |
30 | if batchi >= 0:
31 | print(batchi)
32 | for si in range(semantic_gt.shape[0]):
33 | # img = to_image(img_origin[si][1])
34 | # plt.imshow(img)
35 | # plt.show()
36 | # img = to_image(feas[si][1])
37 | # plt.imshow(img)
38 | # plt.show()
39 | # 展示预测结果
40 | plt.figure(figsize=(4, 2), dpi=100)
41 | plt.axis('off') # 去坐标轴
42 | plt.xticks([]) # 去 x 轴刻度
43 | plt.yticks([]) # 去 y 轴刻度
44 |
45 | plt.imshow(semantic_gt[si][1], vmin=0, cmap='Blues', vmax=1, alpha=0.6)
46 | plt.imshow(semantic_gt[si][2], vmin=0, cmap='Reds', vmax=1, alpha=0.6)
47 | plt.imshow(semantic_gt[si][3], vmin=0, cmap='Greens', vmax=1, alpha=0.6)
48 | #plt.imshow(semantic[si][4], vmin=0, cmap='Reds', vmax=1, alpha=0.6)
49 | plt.show()
50 |
51 | imname = f'eval{batchi:06}_{si:03}.jpg'
52 | print('saving', imname)
53 | plt.savefig('/data/lsy/HDmap_1/vis_lable_gt/img_{}_{}'.format(batchi,si))
54 | plt.close()
55 |
56 | # image = np.vstack(viz(semantic_gt[si], img_origin[si]))
57 | # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
58 | # plt.axis('off') # 去坐标轴
59 | # plt.xticks([]) # 去 x 轴刻度
60 | # plt.yticks([]) # 去 y 轴刻度
61 | # plt.imshow(image)
62 | # #plt.show()
63 | # imname = f'eval{batchi:06}_{si:03}.jpg'
64 | # print('saving', imname)
65 | # plt.savefig('/data/lsy/HDmap_1/vis_img/img_{}_{}'.format(batchi,si))
66 | # plt.close()
67 | def vis_segmentation(model, val_loader):
68 | model.eval()
69 |
70 | with torch.no_grad():
71 | for batchi, (img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots,car_trans,
72 | yaw_pitch_roll, semantic_gt, instance_gt) in enumerate(val_loader):
73 |
74 | semantic, view_img, embedding, direction = model(feas.cuda(),imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
75 | post_trans.cuda(), post_rots.cuda(),car_trans.cuda(), yaw_pitch_roll.cuda())
76 | semantic = semantic.softmax(1).cpu().numpy()
77 | semantic[semantic < 0.1] = np.nan
78 |
79 |
80 | if batchi <51 :#and batchi < 3
81 | print(batchi)
82 |
83 | for si in range(semantic.shape[0]):
84 |
85 |
86 | # 展示预测结果
87 | plt.figure(figsize=(4, 2), dpi=100)
88 | plt.axis('off') # 去坐标轴
89 | plt.xticks([]) # 去 x 轴刻度
90 | plt.yticks([]) # 去 y 轴刻度
91 | plt.imshow(semantic[si][1], vmin=0, cmap='Blues', vmax=1, alpha=0.6)
92 | plt.imshow(semantic[si][2], vmin=0, cmap='Reds', vmax=1, alpha=0.6)
93 | plt.imshow(semantic[si][3], vmin=0, cmap='Greens', vmax=1, alpha=0.6)
94 |
95 |
96 |
97 |
98 | else:break
99 |
100 |
101 |
102 | def main(args):
103 | data_conf = {
104 | 'num_channels': NUM_CLASSES + 1,
105 | 'image_size': args.image_size,
106 | 'xbound': args.xbound,
107 | 'ybound': args.ybound,
108 | 'zbound': args.zbound,
109 | 'dbound': args.dbound,
110 | 'thickness': args.thickness,
111 | 'angle_class': args.angle_class,
112 | 'B': args.bsz,
113 | 'merge': True,
114 | }
115 |
116 | #train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)
117 | val_loader = my_semantic_dataset("/data/lsy/dataset/my_dataset/ww",args.bsz, args.nworkers)
118 | #train_loader, val_loader = vir_semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)
119 | model = get_model(args.model, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class)
120 | model.load_state_dict(torch.load(args.modelf), strict=False)
121 | model.cuda()
122 |
123 |
124 | vis_segmentation(model, val_loader)
125 |
126 |
127 | if __name__ == '__main__':
128 | parser = argparse.ArgumentParser()
129 | # logging config
130 | parser.add_argument("--logdir", type=str, default='./runs')
131 |
132 | # nuScenes config
133 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0')
134 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini'])
135 |
136 | # model config
137 | parser.add_argument("--model", type=str, default='BiMapper')
138 |
139 | # training config
140 | parser.add_argument("--nepochs", type=int, default=30)
141 | parser.add_argument("--max_grad_norm", type=float, default=5.0)
142 | parser.add_argument("--pos_weight", type=float, default=2.13)
143 | parser.add_argument("--bsz", type=int, default=4)
144 | parser.add_argument("--nworkers", type=int, default=5)
145 | parser.add_argument("--lr", type=float, default=1e-3)
146 | parser.add_argument("--weight_decay", type=float, default=1e-7)
147 |
148 | # finetune config
149 | parser.add_argument('--finetune', action='store_true')
150 | parser.add_argument('--modelf', type=str, )
151 |
152 | # data config
153 | parser.add_argument("--thickness", type=int, default=5)
154 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352])
155 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
156 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
157 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0])
158 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0])
159 |
160 | # embedding config
161 | parser.add_argument('--instance_seg', action='store_true')
162 | parser.add_argument("--embedding_dim", type=int, default=16)
163 | parser.add_argument("--delta_v", type=float, default=0.5)
164 | parser.add_argument("--delta_d", type=float, default=3.0)
165 |
166 | # direction config
167 | parser.add_argument('--direction_pred', action='store_true')
168 | parser.add_argument('--angle_class', type=int, default=36)
169 |
170 | # loss config
171 | parser.add_argument("--scale_seg", type=float, default=1.0)
172 | parser.add_argument("--scale_var", type=float, default=1.0)
173 | parser.add_argument("--scale_dist", type=float, default=1.0)
174 | parser.add_argument("--scale_direction", type=float, default=0.2)
175 |
176 | args = parser.parse_args()
177 | main(args)
178 |
--------------------------------------------------------------------------------