├── README.md ├── data ├── __init__.py ├── common.py ├── const.py ├── dataset.py ├── image.py ├── lidar.py ├── my_dataset.py ├── rasterize.py ├── txt ├── utils.py ├── vector_map.py └── viz_data.py ├── evaluate.py ├── evaluate_json.py ├── export_gt_to_json.py ├── export_pred_to_json.py ├── loss.py ├── model ├── HDMapNet │ ├── base.py │ ├── hdmapnet.py │ ├── homography.py │ ├── init │ ├── ipm_net.py │ ├── pointpillar.py │ ├── utils.py │ └── voxel.py ├── VPN.py ├── __init__.py ├── base.py ├── hdmapnet.py ├── homography.py ├── init ├── lift_splat.py └── utils.py ├── pic ├── ex.md ├── img1.png ├── img2.png └── img3.png ├── requirement.txt ├── train.py ├── utils.py ├── vis_label.py └── vis_pred.py /README.md: -------------------------------------------------------------------------------- 1 | # Bi-Mapper: Holistic BEV Semantic Mapping for Autonomous Driving (IEEE RA-L) 2 | 3 | Siyu Li, [Kailun Yang](https://yangkailun.com/), Hao Shi, [Jiaming Zhang](https://jamycheung.github.io/), Jiacheng Lin, Zhifeng Teng, and Zhiyong Li 4 | 5 | **IEEE Robotics and Automation Letters** [[PDF](https://arxiv.org/pdf/2305.04205.pdf)] 6 | 7 | ## Motivation 8 |

9 |

10 |

11 | 12 | 13 | ### Abstract 14 | 15 | A semantic map of the road scene, covering fundamental road elements, is an essential ingredient in autonomous driving systems. It provides important perception foundations for positioning and planning when rendered in the Bird’sEye-View (BEV). Currently, the learning of prior knowledge, the hypothetical depth, has two sides in the research of BEV scene understanding. It can guide learning of translating front perspective views into BEV directly on the help of calibration parameters. However, it suffers from geometric distortions in the representation of distant objects. In this paper, we propose a Bi-Mapper framework for top-down road-scene semantic understanding. The dual streams incorporate global view and local prior knowledge, which are learned asynchronously according to the learning timing. At the same time, an Across-Space Loss (ASL) is designed to mitigate the negative impact of geometric distortions. Extensive results verify the effectiveness of each module in the proposed Bi-Mapper framework. Compared with exiting road mapping networks, the proposed Bi-Mapper achieves 5.0 higher IoU on the nuScenes dataset. Moreover, we verify the generalization performance of Bi-Mapper in a realworld driving scenario. 16 | 17 | ### Method 18 | ![img2](https://github.com/lynn-yu/Bi-Mapper/blob/main/pic/img2.png) 19 | 20 | ### Result 21 | 22 | ![img3](https://github.com/lynn-yu/Bi-Mapper/blob/main/pic/img3.png) 23 | 24 | ### Update 25 | 26 | 2023.04.28 Init repository. 27 | 2024.07.24 Update 28 | 29 | ### Data 30 | Download [nuScenes dataset](https://www.nuscenes.org/). And change the dataset path in the code. 31 | 32 | ### Environment 33 | pip install -r requirement.txt 34 | 35 | ### Training 36 | # Note the need to change the dataset path! 37 | python train.py 38 | 39 | ### Evaluation 40 | ## Note the need to change the path of dataset and modelf! 41 | python evalute.py 42 | 43 | ## Publication 44 | If you find this repo useful, please consider referencing the following paper: 45 | ``` 46 | @article{li2023bimapper, 47 | title={Bi-Mapper: Holistic BEV Semantic Mapping for Autonomous Driving}, 48 | author={Li, Siyu and Yang, Kailun and Shi, Hao and Zhang, Jiaming and Lin, Jiacheng and Teng, Zhifeng and Li, Zhiyong}, 49 | journal={IEEE Robotics and Automation Letters}, 50 | year={2023} 51 | } 52 | ``` 53 | ### Acknowledgement 54 | The code framework of this project is based on ![HDMapNet](https://github.com/Tsinghua-MARS-Lab/HDMapNet), thanks to this excellent work. 55 | ### Contact 56 | 57 | Feel free to contact me if you have additional questions or have interests in collaboration. Please drop me an email at lsynn@hnu.edu.cn 58 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/data/__init__.py -------------------------------------------------------------------------------- /data/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from pathlib import Path 5 | from pyquaternion import Quaternion 6 | 7 | 8 | INTERPOLATION = cv2.LINE_8 9 | 10 | 11 | def get_split(split, dataset_name): 12 | split_dir = Path(__file__).parent / 'splits' / dataset_name 13 | split_path = split_dir / f'{split}.txt' 14 | 15 | return split_path.read_text().strip().split('\n')#返回场景序号列表[scenes-0001,....] 16 | 17 | 18 | def get_view_matrix(h=200, w=400, h_meters=30.0, w_meters=60.0, h_offset=2, w_offset=-2): 19 | sh = h / h_meters 20 | sw = w / w_meters 21 | t1 = np.float32([ 22 | [ 0., sw, w*w_offset+w/2.], 23 | [sh, 0., h*h_offset+h/2.], 24 | [ 0., 0., 1.] 25 | ]) 26 | t2 = np.float32([ 27 | [ sw, 0., w*w_offset+w/2.], 28 | [0., sh, h*h_offset+h/2.], 29 | [ 0., 0., 1.] 30 | ]) 31 | return t2 32 | 33 | 34 | def get_transformation_matrix(R, t, inv=False): 35 | pose = np.eye(4, dtype=np.float32) 36 | pose[:3, :3] = R if not inv else R.T 37 | pose[:3, -1] = t if not inv else R.T @ -t 38 | 39 | return pose 40 | 41 | 42 | def get_pose(rotation, translation, inv=False, flat=False): 43 | if flat: 44 | yaw = Quaternion(rotation).yaw_pitch_roll[0] 45 | R = Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).rotation_matrix 46 | else: 47 | R = Quaternion(rotation).rotation_matrix 48 | 49 | t = np.array(translation, dtype=np.float32) 50 | 51 | return get_transformation_matrix(R, t, inv=inv) 52 | 53 | 54 | def encode(x): 55 | """ 56 | (h, w, c) np.uint8 {0, 255} 57 | """ 58 | n = x.shape[2] 59 | 60 | # assert n < 16 61 | assert x.ndim == 3 62 | assert x.dtype == np.uint8 63 | assert all(x in [0, 255] for x in np.unique(x)) 64 | 65 | shift = np.arange(n, dtype=np.int32)[None, None] 66 | 67 | binary = (x > 0) 68 | binary = (binary << shift).sum(-1) 69 | binary = binary.astype(np.int32) 70 | 71 | return binary 72 | 73 | 74 | def decode(img, n): 75 | """ 76 | returns (h, w, n) np.int32 {0, 1} 77 | """ 78 | shift = np.arange(n, dtype=np.int32)[None, None] 79 | 80 | x = np.array(img)[..., None] 81 | x = (x >> shift) & 1 82 | 83 | return x 84 | 85 | 86 | if __name__ == '__main__': 87 | from PIL import Image 88 | 89 | n = 12 90 | 91 | x = np.random.rand(64, 64, n) 92 | x = 255 * (x > 0.5).astype(np.uint8) 93 | 94 | x_encoded = encode(x) 95 | x_img = Image.fromarray(x_encoded) 96 | x_img.save('tmp.png') 97 | x_loaded = Image.open('tmp.png') 98 | x_decoded = 255 * decode(x_loaded, 12) 99 | x_decoded = x_decoded[..., :n] 100 | 101 | print(abs(x_decoded - x).max()) 102 | -------------------------------------------------------------------------------- /data/const.py: -------------------------------------------------------------------------------- 1 | MAP = ['boston-seaport', 'singapore-hollandvillage', 'singapore-onenorth', 'singapore-queenstown'] 2 | CAMS = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'] 3 | CLASS2LABEL = { 4 | 'road_divider': 0, 5 | 'lane_divider': 0, 6 | 'ped_crossing': 1, 7 | 'contours': 2, 8 | 'car':3, 9 | 'others': -1 10 | } 11 | NUM_CLASSES = 3 12 | IMG_ORIGIN_H = 900 13 | IMG_ORIGIN_W = 1600 14 | -------------------------------------------------------------------------------- /data/image.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | import torch 5 | import torchvision 6 | 7 | class NormalizeInverse(torchvision.transforms.Normalize): 8 | # https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/8 9 | def __init__(self, mean, std): 10 | mean = torch.as_tensor(mean) 11 | std = torch.as_tensor(std) 12 | std_inv = 1 / (std + 1e-7) 13 | mean_inv = -mean * std_inv 14 | super().__init__(mean=mean_inv, std=std_inv) 15 | 16 | def __call__(self, tensor): 17 | return super().__call__(tensor.clone()) 18 | 19 | 20 | normalize_img = torchvision.transforms.Compose(( 21 | torchvision.transforms.ToTensor(), 22 | torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], 23 | std=[0.229, 0.224, 0.225]), 24 | ))#对比度增强 25 | 26 | nor_img = torchvision.transforms.Compose(( 27 | torchvision.transforms.ToTensor(), 28 | )) 29 | denor_img = torchvision.transforms.Compose(( 30 | torchvision.transforms.ToPILImage(), 31 | )) 32 | denormalize_img = torchvision.transforms.Compose(( 33 | NormalizeInverse(mean=[0.485, 0.456, 0.406], 34 | std=[0.229, 0.224, 0.225]), 35 | torchvision.transforms.ToPILImage(), 36 | )) 37 | 38 | 39 | def img_transform(img, resize, resize_dims): 40 | post_rot2 = torch.eye(2) 41 | post_tran2 = torch.zeros(2) 42 | 43 | img = img.resize(resize_dims) 44 | 45 | rot_resize = torch.Tensor([[resize[0], 0], 46 | [0, resize[1]]]) 47 | post_rot2 = rot_resize @ post_rot2#矩阵乘法 48 | post_tran2 = rot_resize @ post_tran2 49 | 50 | post_tran = torch.zeros(3) 51 | post_rot = torch.eye(3) 52 | post_tran[:2] = post_tran2 53 | post_rot[:2, :2] = post_rot2 54 | return img, post_rot, post_tran 55 | 56 | 57 | def get_rot(h): 58 | return torch.Tensor([ 59 | [np.cos(h), np.sin(h)], 60 | [-np.sin(h), np.cos(h)], 61 | ]) 62 | 63 | def img_transform_rot(img, resize, resize_dims, crop, flip, rotate): 64 | post_rot2 = torch.eye(2) 65 | post_tran2 = torch.zeros(2) 66 | 67 | # adjust image 68 | img = img.resize(resize_dims) 69 | img = img.crop(crop) 70 | if flip:#翻转 71 | img = img.transpose(method=Image.FLIP_LEFT_RIGHT) 72 | img = img.rotate(rotate) 73 | 74 | # post-homography transformation 75 | post_rot2 *= resize 76 | post_tran2 -= torch.Tensor(crop[:2]) 77 | if flip: 78 | A = torch.Tensor([[-1, 0], [0, 1]]) 79 | b = torch.Tensor([crop[2] - crop[0], 0]) 80 | post_rot2 = A.matmul(post_rot2) 81 | post_tran2 = A.matmul(post_tran2) + b 82 | A = get_rot(rotate/180*np.pi) 83 | b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2 84 | b = A.matmul(-b) + b 85 | post_rot2 = A.matmul(post_rot2) 86 | post_tran2 = A.matmul(post_tran2) + b 87 | 88 | post_tran = torch.zeros(3) 89 | post_rot = torch.eye(3) 90 | post_tran[:2] = post_tran2 91 | post_rot[:2, :2] = post_rot2 92 | return img, post_rot, post_tran 93 | 94 | -------------------------------------------------------------------------------- /data/lidar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from functools import reduce 4 | 5 | from pyquaternion import Quaternion 6 | 7 | from nuscenes.utils.data_classes import LidarPointCloud 8 | from nuscenes.utils.geometry_utils import transform_matrix 9 | 10 | 11 | def get_lidar_data(nusc, sample_rec, nsweeps, min_distance): 12 | """ 13 | Returns at most nsweeps of lidar in the ego frame. 14 | Returned tensor is 5(x, y, z, reflectance, dt) x N 15 | Adapted from https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/data_classes.py#L56 16 | """ 17 | points = np.zeros((5, 0)) 18 | 19 | # Get reference pose and timestamp. 20 | ref_sd_token = sample_rec['data']['LIDAR_TOP'] 21 | ref_sd_rec = nusc.get('sample_data', ref_sd_token) 22 | ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) 23 | ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) 24 | ref_time = 1e-6 * ref_sd_rec['timestamp'] 25 | 26 | # Homogeneous transformation matrix from global to _current_ ego car frame. 27 | car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), 28 | inverse=True) 29 | 30 | # Aggregate current and previous sweeps. 31 | sample_data_token = sample_rec['data']['LIDAR_TOP'] 32 | current_sd_rec = nusc.get('sample_data', sample_data_token) 33 | for _ in range(nsweeps): 34 | # Load up the pointcloud and remove points close to the sensor. 35 | current_pc = LidarPointCloud.from_file(os.path.join(nusc.dataroot, current_sd_rec['filename'])) 36 | current_pc.remove_close(min_distance) 37 | 38 | # Get past pose. 39 | current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token']) 40 | global_from_car = transform_matrix(current_pose_rec['translation'], 41 | Quaternion(current_pose_rec['rotation']), inverse=False) 42 | 43 | # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. 44 | current_cs_rec = nusc.get('calibrated_sensor', current_sd_rec['calibrated_sensor_token']) 45 | car_from_current = transform_matrix(current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), 46 | inverse=False) 47 | 48 | # Fuse four transformation matrices into one and perform transform. 49 | trans_matrix = reduce(np.dot, [car_from_global, global_from_car, car_from_current]) 50 | current_pc.transform(trans_matrix) 51 | 52 | # Add time vector which can be used as a temporal feature. 53 | time_lag = ref_time - 1e-6 * current_sd_rec['timestamp'] 54 | times = time_lag * np.ones((1, current_pc.nbr_points())) 55 | 56 | new_points = np.concatenate((current_pc.points, times), 0) 57 | points = np.concatenate((points, new_points), 1) 58 | 59 | # Abort if there are no previous sweeps. 60 | if current_sd_rec['prev'] == '': 61 | break 62 | else: 63 | current_sd_rec = nusc.get('sample_data', current_sd_rec['prev']) 64 | 65 | return points -------------------------------------------------------------------------------- /data/my_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | from PIL import Image 5 | from torch.utils.data import Dataset 6 | import matplotlib.pyplot as plt 7 | Img_path = ['ImgLeft','ImgFront','Imgright','ImgBack'] 8 | def my_dataset(Dataset): 9 | def __init__(self, dataroot): 10 | super(my_dataset, self).__init__() 11 | self.dataroot = dataroot 12 | self.data_conf = { 13 | 'image_size': [128, 352], 14 | 'xbound': [-30.0, 30.0, 0.15], 15 | 'ybound': [-15.0, 15.0, 0.15], 16 | 'thickness': 5, 17 | 'angle_class': 36, 18 | } 19 | self.len = 100 20 | def __len__(self): 21 | return self.len 22 | def get_intrinsic(self,i): 23 | d = dict() 24 | d[0] = np.array([[273.7, 0.0, 334.9], [0, 273.4, 244.8], [0, 0, 1]])#左 25 | d[1] = np.array([[462.9, 0.0, 328.1], [0, 462.3, 183.2], [0, 0, 1]])#前 26 | d[2] = np.array([[273.2, 0.0, 328.7], [0, 274.1, 219.2], [0, 0, 1]])#右 27 | d[3] = np.array([[274.2, 0.0, 332.2], [0, 272.8, 239.1], [0, 0, 1]])#后 28 | return d[i] 29 | def get_rot(self,i): 30 | d = dict() 31 | d[0] = np.array([[0.99, -0.015, -0.00], [0.005, -0.004, 0.99], [-0.015, -0.99, -0.004]]) 32 | d[1] = np.array([[0.0, 0.0, 1], [-1, 0, 0], [0, -1, 0]]) 33 | d[2] = np.array([[-0.99, -0.0168, 0.0025], [-0.00223, -0.0160, -0.9998], [0.0168, -0.9997, 0.016]]) 34 | d[3] = np.array([[-0.0027, -0.0307, -0.9995], [0.99992, -0.0117, -0.00237], [-0.0116, -0.9994, 0.0308]]) 35 | return d[i] 36 | def get_trans(self,i): 37 | d = dict() 38 | d[0] = np.array([0, 0.263, 0.92]) 39 | d[1] = np.array([0.35, 0.599, 0.92]) 40 | d[2] = np.array([0, -0.263, 0.92]) 41 | d[3] = np.array([ -0.365, 0.263, 0.92]) 42 | return d[i] 43 | def sample_augmentation(self): 44 | fH, fW = self.data_conf['image_size'] # 128 352 45 | resize = (fW / IMG_ORIGIN_W, fH / IMG_ORIGIN_H) # （1，1）变换尺度 46 | resize_dims = (fW, fH) # （128,352）目标数据尺寸 47 | return resize, resize_dims 48 | def get_img(self,id): 49 | imgs = [] 50 | img_origin = [] 51 | feas = [] 52 | for i in range(len(Img_path)): 53 | if i == 4: 54 | imgs.append(torch.zeros_like(img)) 55 | a = np.array([[0,0,0],[0,0,0],[0,0,0]]) 56 | b = np.array([0,0,0]) 57 | trans.append(torch.Tensor(b)) # 标定位移变换，相机到车辆的变换 58 | rots.append(torch.Tensor(a)) # 标定旋转变换 59 | intrins.append(torch.Tensor(a)) 60 | feas.append(torch.zeros_like(fea)) 61 | root = os.path.join(self.path,Img_path[i]) 62 | n = str(id) 63 | n = n.zfill(6) 64 | root = root + n + ".jpg" 65 | img = Image.open(root) 66 | resize, resize_dims = self.sample_augmentation() # 图片变换尺度，变换大小，目标数据尺寸 67 | img_o_res, _, _ = img_transform(img, resize, resize_dims) # 输出3*3旋转和平移矩阵 68 | img_o = nor_img(img) # 3 900 1600 69 | img_origin.append(nor_img(img_o_res)) 70 | img, post_rot, post_tran = img_transform(img, resize, resize_dims) # 输出3*3旋转和平移矩阵 71 | img = normalize_img(img) # 归一化3 128 325 72 | imgs.append(img) # 不同角度相机累积 73 | 74 | trans.append(torch.Tensor(get_trans(i))) # 标定位移变换，相机到车辆的变换 75 | rots.append(torch.Tensor(get_rot(i))) # 标定旋转变换 76 | intrins.append(torch.Tensor(get_intrinsic(i))) 77 | 78 | fea = get_bev_cam(img_o,torch.Tensor(get_intrinsic(i)) , 1) # ,fea_1 79 | fea_p = denor_img(fea) 80 | fea_p = fea_p.resize(resize_dims) 81 | fea = normalize_img(fea_p) 82 | # fea= nor_img(fea_p)#显示原图 83 | feas.append(fea) 84 | imgs.append(torch.zeros_like(img)) 85 | a = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]]) 86 | b = np.array([0, 0, 0]) 87 | trans.append(torch.Tensor(b)) # 标定位移变换，相机到车辆的变换 88 | rots.append(torch.Tensor(a)) # 标定旋转变换 89 | intrins.append(torch.Tensor(a)) 90 | feas.append(torch.zeros_like(fea)) 91 | return torch.stack(img_origin),torch.stack(feas),torch.stack(imgs), torch.stack(trans), torch.stack(rots), torch.stack(intrins), None, None 92 | def __getitem__(self, idx): 93 | img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots = self.get_imgs(idx) 94 | return img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots, None, None, None, None -------------------------------------------------------------------------------- /data/rasterize.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | import torch 5 | 6 | from shapely import affinity 7 | from shapely.geometry import LineString, box 8 | 9 | 10 | def get_patch_coord(patch_box, patch_angle=0.0): 11 | patch_x, patch_y, patch_h, patch_w = patch_box 12 | 13 | x_min = patch_x - patch_w / 2.0 14 | y_min = patch_y - patch_h / 2.0 15 | x_max = patch_x + patch_w / 2.0 16 | y_max = patch_y + patch_h / 2.0 17 | 18 | patch = box(x_min, y_min, x_max, y_max) 19 | patch = affinity.rotate(patch, patch_angle, origin=(patch_x, patch_y), use_radians=False) 20 | 21 | return patch 22 | 23 | 24 | def get_discrete_degree(vec, angle_class=36): 25 | deg = np.mod(np.degrees(np.arctan2(vec[1], vec[0])), 360) 26 | deg = (int(deg / (360 / angle_class) + 0.5) % angle_class) + 1 27 | return deg 28 | 29 | 30 | def mask_for_lines(lines, mask, thickness, idx, type='index', angle_class=36): 31 | coords = np.asarray(list(lines.coords), np.int32) 32 | coords = coords.reshape((-1, 2)) 33 | if len(coords) < 2: 34 | return mask, idx 35 | if type == 'backward': 36 | coords = np.flip(coords, 0)#翻转 37 | 38 | if type == 'index': 39 | cv2.polylines(mask, [coords], False, color=idx, thickness=thickness) 40 | idx += 1 41 | else: 42 | for i in range(len(coords) - 1): 43 | cv2.polylines(mask, [coords[i:]], False, color=get_discrete_degree(coords[i + 1] - coords[i], angle_class=angle_class), thickness=thickness) 44 | return mask, idx 45 | 46 | 47 | def line_geom_to_mask(layer_geom, confidence_levels, local_box, canvas_size, thickness, idx, type='index', angle_class=36): 48 | patch_x, patch_y, patch_h, patch_w = local_box 49 | 50 | patch = get_patch_coord(local_box) 51 | 52 | canvas_h = canvas_size[0] 53 | canvas_w = canvas_size[1] 54 | scale_height = canvas_h / patch_h 55 | scale_width = canvas_w / patch_w 56 | 57 | trans_x = -patch_x + patch_w / 2.0 58 | trans_y = -patch_y + patch_h / 2.0 59 | 60 | map_mask = np.zeros(canvas_size, np.uint8) 61 | 62 | for line in layer_geom: 63 | if isinstance(line, tuple): 64 | line, confidence = line 65 | else: 66 | confidence = None 67 | new_line = line.intersection(patch) 68 | if not new_line.is_empty: 69 | new_line = affinity.affine_transform(new_line, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y]) 70 | new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0)) 71 | confidence_levels.append(confidence) 72 | if new_line.geom_type == 'MultiLineString': 73 | for new_single_line in new_line: 74 | map_mask, idx = mask_for_lines(new_single_line, map_mask, thickness, idx, type, angle_class) 75 | else: 76 | map_mask, idx = mask_for_lines(new_line, map_mask, thickness, idx, type, angle_class) 77 | return map_mask, idx 78 | 79 | 80 | def overlap_filter(mask, filter_mask): 81 | C, _, _ = mask.shape 82 | for c in range(C-1, -1, -1): 83 | filter = np.repeat((filter_mask[c] != 0)[None, :], c, axis=0) 84 | mask[:c][filter] = 0 85 | 86 | return mask 87 | 88 | 89 | def preprocess_map(vectors, patch_size, canvas_size, num_classes, thickness, angle_class): 90 | confidence_levels = [-1] 91 | vector_num_list = {} 92 | for i in range(num_classes): 93 | vector_num_list[i] = [] 94 | 95 | for vector in vectors: 96 | if vector['pts_num'] >= 2: 97 | vector_num_list[vector['type']].append(LineString(vector['pts'][:vector['pts_num']])) 98 | 99 | local_box = (0.0, 0.0, patch_size[0], patch_size[1]) 100 | 101 | idx = 1 102 | filter_masks = [] 103 | instance_masks = [] 104 | forward_masks = [] 105 | backward_masks = [] 106 | for i in range(num_classes): 107 | map_mask, idx = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, idx) 108 | instance_masks.append(map_mask) 109 | filter_mask, _ = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness + 4, 1) 110 | filter_masks.append(filter_mask) 111 | forward_mask, _ = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, 1, type='forward', angle_class=angle_class) 112 | forward_masks.append(forward_mask) 113 | backward_mask, _ = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, 1, type='backward', angle_class=angle_class) 114 | backward_masks.append(backward_mask) 115 | 116 | filter_masks = np.stack(filter_masks) 117 | instance_masks = np.stack(instance_masks) 118 | forward_masks = np.stack(forward_masks) 119 | backward_masks = np.stack(backward_masks) 120 | 121 | instance_masks = overlap_filter(instance_masks, filter_masks) 122 | forward_masks = overlap_filter(forward_masks, filter_masks).sum(0).astype('int32') 123 | backward_masks = overlap_filter(backward_masks, filter_masks).sum(0).astype('int32') 124 | 125 | return torch.tensor(instance_masks), torch.tensor(forward_masks), torch.tensor(backward_masks) 126 | 127 | 128 | def rasterize_map(vectors, patch_size, canvas_size, num_classes, thickness): 129 | confidence_levels = [-1] 130 | vector_num_list = {} 131 | for i in range(num_classes): 132 | vector_num_list[i] = [] 133 | 134 | for vector in vectors: 135 | if vector['pts_num'] >= 2: 136 | vector_num_list[vector['type']].append((LineString(vector['pts'][:vector['pts_num']]), vector.get('confidence_level', 1))) 137 | 138 | local_box = (0.0, 0.0, patch_size[0], patch_size[1]) 139 | 140 | idx = 1 141 | masks = [] 142 | for i in range(num_classes): 143 | map_mask, idx = line_geom_to_mask(vector_num_list[i], confidence_levels, local_box, canvas_size, thickness, idx) 144 | masks.append(map_mask) 145 | 146 | return np.stack(masks), confidence_levels 147 | -------------------------------------------------------------------------------- /data/txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xml.etree.ElementTree as xmlET 3 | import torch 4 | import os 5 | import math 6 | 7 | def get_proj_mat(intrins, rots, trans): 8 | K = np.eye(4) 9 | K[:3, :3] = intrins 10 | R = np.eye(4) 11 | R[:3, :3] = rots.transpose(-1, -2) 12 | T = np.eye(4) 13 | T[:3, 3] = -trans 14 | RT = R @ T 15 | return K @ RT 16 | 17 | 18 | def perspective(cam_coords, proj_mat): 19 | pix_coords = proj_mat @ cam_coords 20 | valid_idx = pix_coords[2, :] > 0 21 | pix_coords = pix_coords[:, valid_idx] 22 | pix_coords = pix_coords[:2, :] / (pix_coords[2, :] + 1e-7) 23 | pix_coords = pix_coords.transpose(1, 0) 24 | return pix_coords 25 | 26 | 27 | def label_onehot_decoding(onehot): 28 | return torch.argmax(onehot, axis=0) 29 | 30 | 31 | def label_onehot_encoding(label, num_classes=4): 32 | H, W = label.shape 33 | onehot = torch.zeros((num_classes, H, W)) 34 | onehot.scatter_(0, label[None].long(), 1) 35 | return onehot 36 | 37 | 38 | def gen_dx_bx(xbound, ybound, zbound): 39 | dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]]) 40 | bx = torch.Tensor([row[0] + row[2] / 2.0 for row in [xbound, ybound, zbound]]) 41 | nx = torch.LongTensor([int((row[1] - row[0]) / row[2]) for row in [xbound, ybound, zbound]]) 42 | return dx, bx, nx 43 | 44 | def eulerAngles2rotationMat(theta, format='degree'): 45 | """ 46 | Calculates Rotation Matrix given euler angles. 47 | :param theta: 1-by-3 list [rx, ry, rz] angle in degree 48 | :return: 49 | RPY角，是ZYX欧拉角，依次绕定轴XYZ转动[rx, ry, rz] 50 | """ 51 | if format == 'degree': 52 | theta = [i * math.pi / 180.0 for i in theta] 53 | 54 | R_x = np.array([[1, 0, 0], 55 | [0, math.cos(theta[0]), -math.sin(theta[0])], 56 | [0, math.sin(theta[0]), math.cos(theta[0])] 57 | ]) 58 | 59 | R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1])], 60 | [0, 1, 0], 61 | [-math.sin(theta[1]), 0, math.cos(theta[1])] 62 | ]) 63 | 64 | R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0], 65 | [math.sin(theta[2]), math.cos(theta[2]), 0], 66 | [0, 0, 1] 67 | ]) 68 | R = np.dot(R_z, np.dot(R_y, R_x)) 69 | return R 70 | def get_files_in_folder(folder): 71 | 72 | return sorted([os.path.join(folder, f) for f in os.listdir(folder)])#返回图片名字路径 73 | 74 | def one_hot_encode_image(image, palette): 75 | 76 | one_hot_map = [] 77 | 78 | # find instances of class colors and append layer to one-hot-map 79 | for class_colors in palette: 80 | class_map = np.zeros(image.shape[0:2], dtype=bool) 81 | for color in class_colors: 82 | # temp = np.ones_like(image) 83 | # temp = temp [:,:,0] * color[0] 84 | # temp = temp[:, :, 1] * color[1] 85 | # temp = temp[:, :, 2] * color[2] 86 | class_map = class_map | (image==color).all(axis=-1) 87 | one_hot_map.append(class_map) 88 | 89 | # finalize one-hot-map 90 | one_hot_map = np.stack(one_hot_map, axis=-1) 91 | one_hot_map = one_hot_map.astype(np.float32) 92 | 93 | return one_hot_map 94 | def parse_convert_xml(conversion_file_path): 95 | 96 | defRoot = xmlET.parse(conversion_file_path).getroot() 97 | 98 | one_hot_palette = [] 99 | class_list = [] 100 | for idx, defElement in enumerate(defRoot.findall("SLabel")): 101 | from_color = np.fromstring(defElement.get("fromColour"), dtype=int, sep=" ") 102 | to_class = np.fromstring(defElement.get("toValue"), dtype=int, sep=" ") 103 | if to_class in class_list: 104 | one_hot_palette[class_list.index(to_class)].append(from_color) 105 | else: 106 | one_hot_palette.append([from_color]) 107 | class_list.append(to_class) 108 | 109 | return one_hot_palette 110 | -------------------------------------------------------------------------------- /data/vector_map.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer 4 | from nuscenes.eval.common.utils import quaternion_yaw, Quaternion 5 | from shapely import affinity, ops 6 | from shapely.geometry import LineString, box, MultiPolygon, MultiLineString 7 | import math 8 | from .const import CLASS2LABEL 9 | 10 | 11 | def isRotationMatrix(R): 12 | Rt = np.transpose(R) 13 | shouldBeIdentity = np.dot(Rt, R) 14 | I = np.identity(3, dtype=R.dtype) 15 | n = np.linalg.norm(I - shouldBeIdentity) 16 | return n < 1e-6 17 | 18 | 19 | def rotationMatrixToEulerAngles(R): 20 | assert (isRotationMatrix(R)) 21 | sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0]) 22 | singular = sy < 1e-6 23 | 24 | if not singular: 25 | x = math.atan2(R[2, 1], R[2, 2]) 26 | y = math.atan2(-R[2, 0], sy) 27 | z = math.atan2(R[1, 0], R[0, 0]) 28 | else: 29 | x = math.atan2(-R[1, 2], R[1, 1]) 30 | y = math.atan2(-R[2, 0], sy) 31 | z = 0 32 | 33 | return np.array([x, y, z]) 34 | 35 | 36 | def eulerAnglesToRotationMatrix(theta): 37 | R_x = np.array([[1, 0, 0], 38 | [0, math.cos(theta[0]), -math.sin(theta[0])], 39 | [0, math.sin(theta[0]), math.cos(theta[0])] 40 | ]) 41 | 42 | R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1])], 43 | [0, 1, 0], 44 | [-math.sin(theta[1]), 0, math.cos(theta[1])] 45 | ]) 46 | 47 | R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0], 48 | [math.sin(theta[2]), math.cos(theta[2]), 0], 49 | [0, 0, 1] 50 | ]) 51 | 52 | R = np.dot(R_z, np.dot(R_y, R_x)) 53 | 54 | return R 55 | 56 | 57 | def EulerAndQuaternionTransform(intput_data): 58 | """ 59 | 四元素与欧拉角互换 60 | """ 61 | data_len = len(intput_data) 62 | angle_is_not_rad = False 63 | 64 | if data_len == 3: 65 | r = 0 66 | p = 0 67 | y = 0 68 | if angle_is_not_rad: # 180 ->pi 69 | r = math.radians(intput_data[0]) 70 | p = math.radians(intput_data[1]) 71 | y = math.radians(intput_data[2]) 72 | else: 73 | r = intput_data[0] 74 | p = intput_data[1] 75 | y = intput_data[2] 76 | 77 | sinp = math.sin(p / 2) 78 | siny = math.sin(y / 2) 79 | sinr = math.sin(r / 2) 80 | 81 | cosp = math.cos(p / 2) 82 | cosy = math.cos(y / 2) 83 | cosr = math.cos(r / 2) 84 | 85 | w = cosr * cosp * cosy + sinr * sinp * siny 86 | x = sinr * cosp * cosy - cosr * sinp * siny 87 | y = cosr * sinp * cosy + sinr * cosp * siny 88 | z = cosr * cosp * siny - sinr * sinp * cosy 89 | return [w, x, y, z] 90 | 91 | elif data_len == 4: 92 | 93 | w = intput_data[0] 94 | x = intput_data[1] 95 | y = intput_data[2] 96 | z = intput_data[3] 97 | 98 | r = math.atan2(2 * (w * x + y * z), 1 - 2 * (x * x + y * y)) 99 | p = math.asin(2 * (w * y - z * x)) 100 | y = math.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)) 101 | 102 | if angle_is_not_rad: # pi -> 180 103 | r = math.degrees(r) 104 | p = math.degrees(p) 105 | y = math.degrees(y) 106 | return [r, p, y] 107 | 108 | 109 | class VectorizedLocalMap(object): 110 | def __init__(self, 111 | dataroot, 112 | patch_size, 113 | canvas_size, 114 | line_classes=['road_divider', 'lane_divider'], 115 | ped_crossing_classes=['ped_crossing'], 116 | contour_classes=['road_segment', 'lane'], 117 | sample_dist=1, 118 | num_samples=250, 119 | padding=False, 120 | normalize=False, 121 | fixed_num=-1): 122 | ''' 123 | Args: 124 | fixed_num = -1 : no fixed num 125 | ''' 126 | super().__init__() 127 | self.data_root = dataroot 128 | self.MAPS = ['boston-seaport', 'singapore-hollandvillage', 129 | 'singapore-onenorth', 'singapore-queenstown'] 130 | self.line_classes = line_classes 131 | self.ped_crossing_classes = ped_crossing_classes 132 | self.polygon_classes = contour_classes 133 | self.nusc_maps = {} 134 | self.map_explorer = {} 135 | for loc in self.MAPS: 136 | self.nusc_maps[loc] = NuScenesMap(dataroot=self.data_root, map_name=loc) 137 | self.map_explorer[loc] = NuScenesMapExplorer(self.nusc_maps[loc]) 138 | 139 | self.patch_size = patch_size 140 | self.canvas_size = canvas_size 141 | self.sample_dist = sample_dist 142 | self.num_samples = num_samples 143 | self.padding = padding 144 | self.normalize = normalize 145 | self.fixed_num = fixed_num 146 | 147 | 148 | 149 | def gen_vectorized_samples(self, location, ego2global_translation, ego2global_rotation): 150 | map_pose = ego2global_translation[:2]# 151 | rotation = Quaternion(ego2global_rotation) 152 | #print('ego',map_pose) 153 | patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])#[x_center, y_center, height, width] 154 | patch_angle = quaternion_yaw(rotation) / np.pi * 180 155 | #print(patch_angle) 156 | line_geom = self.get_map_geom(patch_box, patch_angle, self.line_classes, location)#list 2 : 157 | line_vector_dict = self.line_geoms_to_vectors(line_geom) 158 | 159 | ped_geom = self.get_map_geom(patch_box, patch_angle, self.ped_crossing_classes, location) 160 | # ped_vector_list = self.ped_geoms_to_vectors(ped_geom) 161 | ped_vector_list = self.line_geoms_to_vectors(ped_geom)['ped_crossing'] 162 | 163 | polygon_geom = self.get_map_geom(patch_box, patch_angle, self.polygon_classes, location) 164 | poly_bound_list = self.poly_geoms_to_vectors(polygon_geom) 165 | 166 | vectors = [] 167 | for line_type, vects in line_vector_dict.items(): 168 | for line, length in vects: 169 | vectors.append((line.astype(float), length, CLASS2LABEL.get(line_type, -1))) 170 | 171 | for ped_line, length in ped_vector_list: 172 | vectors.append((ped_line.astype(float), length, CLASS2LABEL.get('ped_crossing', -1))) 173 | 174 | for contour, length in poly_bound_list: 175 | vectors.append((contour.astype(float), length, CLASS2LABEL.get('contours', -1))) 176 | 177 | # filter out -1 178 | filtered_vectors = [] 179 | for pts, pts_num, type in vectors: 180 | if type != -1: 181 | filtered_vectors.append({ 182 | 'pts': pts, 183 | 'pts_num': pts_num, 184 | 'type': type 185 | }) 186 | 187 | return filtered_vectors 188 | 189 | 190 | def get_map_geom(self, patch_box, patch_angle, layer_names, location): 191 | map_geom = [] 192 | for layer_name in layer_names: 193 | if layer_name in self.line_classes: 194 | geoms = self.map_explorer[location]._get_layer_line(patch_box, patch_angle, layer_name) 195 | map_geom.append((layer_name, geoms)) 196 | elif layer_name in self.polygon_classes: 197 | geoms = self.map_explorer[location]._get_layer_polygon(patch_box, patch_angle, layer_name) 198 | map_geom.append((layer_name, geoms)) 199 | elif layer_name in self.ped_crossing_classes: 200 | geoms = self.get_ped_crossing_line(patch_box, patch_angle, location) 201 | # geoms = self.map_explorer[location]._get_layer_polygon(patch_box, patch_angle, layer_name) 202 | map_geom.append((layer_name, geoms)) 203 | return map_geom 204 | 205 | def _one_type_line_geom_to_vectors(self, line_geom): 206 | line_vectors = [] 207 | for line in line_geom: 208 | if not line.is_empty: 209 | if line.geom_type == 'MultiLineString': 210 | for single_line in line.geoms: 211 | line_vectors.append(self.sample_pts_from_line(single_line)) 212 | elif line.geom_type == 'LineString': 213 | line_vectors.append(self.sample_pts_from_line(line)) 214 | else: 215 | raise NotImplementedError 216 | return line_vectors 217 | 218 | def poly_geoms_to_vectors(self, polygon_geom): 219 | roads = polygon_geom[0][1] 220 | lanes = polygon_geom[1][1] 221 | union_roads = ops.unary_union(roads) 222 | union_lanes = ops.unary_union(lanes) 223 | union_segments = ops.unary_union([union_roads, union_lanes]) 224 | max_x = self.patch_size[1] / 2 225 | max_y = self.patch_size[0] / 2 226 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2) 227 | exteriors = [] 228 | interiors = [] 229 | if union_segments.geom_type != 'MultiPolygon': 230 | union_segments = MultiPolygon([union_segments]) 231 | for poly in union_segments.geoms: 232 | exteriors.append(poly.exterior) 233 | for inter in poly.interiors: 234 | interiors.append(inter) 235 | 236 | results = [] 237 | for ext in exteriors: 238 | if ext.is_ccw: 239 | ext.coords = list(ext.coords)[::-1] 240 | lines = ext.intersection(local_patch) 241 | if isinstance(lines, MultiLineString): 242 | lines = ops.linemerge(lines) 243 | results.append(lines) 244 | 245 | for inter in interiors: 246 | if not inter.is_ccw: 247 | inter.coords = list(inter.coords)[::-1] 248 | lines = inter.intersection(local_patch) 249 | if isinstance(lines, MultiLineString): 250 | lines = ops.linemerge(lines) 251 | results.append(lines) 252 | 253 | return self._one_type_line_geom_to_vectors(results) 254 | 255 | def line_geoms_to_vectors(self, line_geom): 256 | line_vectors_dict = dict() 257 | for line_type, a_type_of_lines in line_geom:#每一种对象的直线数 258 | one_type_vectors = self._one_type_line_geom_to_vectors(a_type_of_lines) 259 | line_vectors_dict[line_type] = one_type_vectors 260 | 261 | return line_vectors_dict#dict[road_divide: ,lane_divied:list[5],5个对象均分点] 262 | 263 | def ped_geoms_to_vectors(self, ped_geom): 264 | ped_geom = ped_geom[0][1] 265 | union_ped = ops.unary_union(ped_geom) 266 | if union_ped.geom_type != 'MultiPolygon': 267 | union_ped = MultiPolygon([union_ped]) 268 | 269 | max_x = self.patch_size[1] / 2 270 | max_y = self.patch_size[0] / 2 271 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2) 272 | results = [] 273 | for ped_poly in union_ped: 274 | # rect = ped_poly.minimum_rotated_rectangle 275 | ext = ped_poly.exterior 276 | if not ext.is_ccw: 277 | ext.coords = list(ext.coords)[::-1] 278 | lines = ext.intersection(local_patch) 279 | results.append(lines) 280 | 281 | return self._one_type_line_geom_to_vectors(results) 282 | 283 | def get_ped_crossing_line(self, patch_box, patch_angle, location): 284 | def add_line(poly_xy, idx, patch, patch_angle, patch_x, patch_y, line_list): 285 | points = [(p0, p1) for p0, p1 in zip(poly_xy[0, idx:idx + 2], poly_xy[1, idx:idx + 2])] 286 | line = LineString(points) 287 | line = line.intersection(patch) 288 | if not line.is_empty: 289 | line = affinity.rotate(line, -patch_angle, origin=(patch_x, patch_y), use_radians=False) 290 | line = affinity.affine_transform(line, [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y]) 291 | line_list.append(line) 292 | 293 | patch_x = patch_box[0] 294 | patch_y = patch_box[1] 295 | 296 | patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle) 297 | line_list = [] 298 | records = getattr(self.nusc_maps[location], 'ped_crossing') 299 | for record in records: 300 | polygon = self.map_explorer[location].extract_polygon(record['polygon_token']) 301 | poly_xy = np.array(polygon.exterior.xy) 302 | dist = np.square(poly_xy[:, 1:] - poly_xy[:, :-1]).sum(0) 303 | x1, x2 = np.argsort(dist)[-2:] 304 | 305 | add_line(poly_xy, x1, patch, patch_angle, patch_x, patch_y, line_list) 306 | add_line(poly_xy, x2, patch, patch_angle, patch_x, patch_y, line_list) 307 | 308 | return line_list 309 | 310 | def sample_pts_from_line(self, line): 311 | if self.fixed_num < 0: 312 | distances = np.arange(0, line.length, self.sample_dist) 313 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)#按照顺序坐标的？ 314 | else: 315 | # fixed number of points, so distance is line.length / self.fixed_num 316 | distances = np.linspace(0, line.length, self.fixed_num) 317 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2) 318 | 319 | if self.normalize: 320 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]]) 321 | 322 | num_valid = len(sampled_points) 323 | 324 | if not self.padding or self.fixed_num > 0: 325 | # fixed num sample can return now! 326 | return sampled_points, num_valid 327 | 328 | # fixed distance sampling need padding! 329 | num_valid = len(sampled_points) 330 | 331 | if self.fixed_num < 0: 332 | if num_valid < self.num_samples: 333 | padding = np.zeros((self.num_samples - len(sampled_points), 2)) 334 | sampled_points = np.concatenate([sampled_points, padding], axis=0) 335 | else: 336 | sampled_points = sampled_points[:self.num_samples, :] 337 | num_valid = self.num_samples 338 | 339 | if self.normalize: 340 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]]) 341 | num_valid = len(sampled_points) 342 | 343 | return sampled_points, num_valid 344 | -------------------------------------------------------------------------------- /data/viz_data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | from matplotlib.pyplot import get_cmap 7 | DIVIDER = ['road_divider and lane_divider'] 8 | Ped = ['ped_crossing'] 9 | STATIC = ['lane and road_segment'] 10 | DYNAMIC = [ 11 | 'car', 'truck', 'bus', 12 | 'trailer', 'construction', 13 | 'pedestrian', 14 | 'motorcycle', 'bicycle', 15 | ] 16 | 17 | # many colors from 18 | # https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/color_map.py 19 | COLORS = { 20 | # dividers 21 | 'road_divider and lane_divider': (255, 200, 0), 22 | 'ped_crossing': (255, 158, 0), 23 | # static 24 | 'lane and road_segment': (90, 90, 90), 25 | # dynamic 26 | 'car': (255, 158, 0), 27 | 'truck': (255, 99, 71), 28 | 'bus': (255, 127, 80), 29 | 'trailer': (255, 140, 0), 30 | 'construction': (233, 150, 70), 31 | 'pedestrian': (0, 0, 230), 32 | 'motorcycle': (255, 61, 99), 33 | 'bicycle': (220, 20, 60), 34 | 35 | 'nothing': (200, 200, 200) 36 | } 37 | 38 | 39 | def colorize(x, colormap=None): 40 | """ 41 | x: (h w) np.uint8 0-255 42 | colormap 43 | """ 44 | try: 45 | return (255 * get_cmap(colormap)(x)[..., :3]).astype(np.uint8) 46 | except: 47 | pass 48 | 49 | if x.dtype == np.float32: 50 | x = (255 * x).astype(np.uint8) 51 | 52 | if colormap is None: 53 | return x[..., None].repeat(3, 2) 54 | 55 | return cv2.applyColorMap(x, getattr(cv2, f'COLORMAP_{colormap.upper()}')) 56 | 57 | 58 | def get_colors(semantics): 59 | return np.array([COLORS[s] for s in semantics], dtype=np.uint8) 60 | 61 | 62 | def to_image(x): 63 | return (255 * x).byte().cpu().numpy().transpose(1, 2, 0) 64 | def to_image_single(x): 65 | 66 | 67 | return (255 * x).astype(np.uint8) 68 | 69 | def to_color(x): 70 | 71 | t = 255 * np.ones_like(x) 72 | x = x[np.newaxis,:,:] 73 | t = t[np.newaxis, :, :] 74 | c = np.concatenate([t, t, x], axis=0).transpose(1, 2, 0) 75 | 76 | return c.astype(np.uint8) 77 | def greyscale(x): 78 | return (255 * x.repeat(3, 2)).astype(np.uint8) 79 | 80 | 81 | def resize(src, dst=None, shape=None, idx=0): 82 | if dst is not None: 83 | ratio = dst.shape[idx] / src.shape[idx] 84 | elif shape is not None: 85 | ratio = shape[idx] / src.shape[idx] 86 | 87 | width = int(ratio * src.shape[1]) 88 | height = int(ratio * src.shape[0]) 89 | 90 | return cv2.resize(src, (width, height), interpolation=cv2.INTER_CUBIC) 91 | 92 | 93 | class BaseViz: 94 | SEMANTICS = DIVIDER + Ped +STATIC +DYNAMIC 95 | 96 | def __init__(self, label_indices=None, colormap='inferno'): 97 | self.label_indices = label_indices 98 | self.colors = get_colors(self.SEMANTICS)#12 3 99 | self.colormap = colormap 100 | 101 | def visualize_pred(self, bev, pred, threshold=None): 102 | """ 103 | (c, h, w) torch float {0, 1} 104 | (c, h, w) torch float [0-1] 105 | """ 106 | if isinstance(bev, torch.Tensor): 107 | bev = bev.cpu().numpy().transpose(1, 2, 0) 108 | 109 | if isinstance(pred, torch.Tensor): 110 | pred = pred.cpu().numpy().transpose(1, 2, 0) 111 | 112 | if self.label_indices is not None: 113 | bev = [bev[..., idx].max(-1) for idx in self.label_indices] 114 | bev = np.stack(bev, -1) 115 | 116 | if threshold is not None: 117 | pred = (pred > threshold).astype(np.float32) 118 | 119 | result = colorize((255 * pred.squeeze(2)).astype(np.uint8), self.colormap) 120 | 121 | return result 122 | 123 | # def visualize_pred_unused(self, bev, pred, threshold=None): 124 | # h, w, c = pred.shape 125 | 126 | # img = np.zeros((h, w, 3), dtype=np.float32) 127 | # img[...] = 0.5 128 | # colors = np.float32([ 129 | # [0, .6, 0], 130 | # [1, .7, 0], 131 | # [1, 0, 0] 132 | # ]) 133 | # tp = (pred > threshold) & (bev > threshold) 134 | # fp = (pred > threshold) & (bev < threshold) 135 | # fn = (pred <= threshold) & (bev > threshold) 136 | 137 | # for channel in range(c): 138 | # for i, m in enumerate([tp, fp, fn]): 139 | # img[m[..., channel]] = colors[i][None] 140 | 141 | # return (255 * img).astype(np.uint8) 142 | 143 | def visualize_bev(self, bev): 144 | """ 145 | (c, h, w) torch [0, 1] float 146 | 147 | returns (h, w, 3) np.uint8 148 | """ 149 | if isinstance(bev, torch.Tensor): 150 | bev = bev.cpu().numpy().transpose(1, 2, 0) 151 | 152 | h, w, c = bev.shape 153 | 154 | #assert c == len(self.SEMANTICS) 155 | 156 | # Prioritize higher class labels 157 | eps = (1e-5 * np.arange(c))[None, None]#1 1 12 158 | idx = (bev + eps).argmax(axis=-1)#200 400 给地图利用数字划分区域，取最大的区域 159 | val = np.take_along_axis(bev, idx[..., None], -1) 160 | 161 | # Spots with no labels are light grey 162 | empty = np.uint8(COLORS['nothing'])[None, None] 163 | 164 | result = (val * self.colors[idx]) + ((1 - val) * empty) 165 | result = np.uint8(result) 166 | 167 | return result 168 | 169 | def visualize_custom(self, batch, pred): 170 | return [] 171 | 172 | @torch.no_grad() 173 | def visualize(self, bev, img, pred=None, b_max=8, **kwargs): 174 | 175 | b,h,w= bev.shape# 5 h w 176 | t = torch.ones([b,h,w]).cuda() 177 | bev = t * bev 178 | if True: 179 | if pred is not None: 180 | right = self.visualize_pred(bev, pred['bev'].sigmoid()) 181 | else: 182 | right = self.visualize_bev(bev) 183 | 184 | right = [right] + self.visualize_custom(bev, pred) 185 | right = [x for x in right if x is not None] 186 | right = np.hstack(right) 187 | 188 | image = img#6 3 128 352 189 | 190 | if image is not None: 191 | imgs = [to_image(image[i]) for i in range(image.shape[0])] 192 | 193 | if len(imgs) == 6: 194 | a = np.hstack(imgs[:3]) 195 | b = np.hstack(imgs[3:]) 196 | left = resize(np.vstack((a, b)), right) 197 | if len(imgs) == 4: 198 | a = np.hstack(imgs[:2]) 199 | b = np.hstack(imgs[2:]) 200 | left = resize(np.vstack((a, b)), right) 201 | else: 202 | left = np.hstack([resize(x, right) for x in imgs]) 203 | 204 | yield np.hstack((left, right)) 205 | else: 206 | yield right 207 | 208 | def __call__(self, bev, img, pred=None, **kwargs): 209 | 210 | return list(self.visualize(bev=bev[1:].cuda(), img = img, pred=pred, **kwargs)) -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tqdm 3 | import os 4 | import torch 5 | from data.dataset import semantic_dataset,mydataset,vir_semantic_dataset 6 | from data.const import NUM_CLASSES 7 | from evaluation.iou import get_batch_iou 8 | from model import get_model 9 | 10 | from loss import view_loss 11 | 12 | def onehot_encoding(logits, dim=1): 13 | max_idx = torch.argmax(logits, dim, keepdim=True) 14 | one_hot = logits.new_full(logits.shape, 0) 15 | one_hot.scatter_(dim, max_idx, 1) 16 | return one_hot 17 | 18 | def eval_iou(model, val_loader): 19 | model.eval() 20 | total_intersects = 0 21 | total_union = 0 22 | total_intersects1 = 0 23 | total_union1 = 0 24 | with torch.no_grad(): 25 | for img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots, car_trans, yaw_pitch_roll, semantic_gt, instance_gt in tqdm.tqdm(val_loader): 26 | 27 | semantic, x_view_pre,view_ipm, viw_fore = model(feas.cuda(),imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(), 28 | post_trans.cuda(), post_rots.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda()) 29 | #, embedding, direction 30 | semantic_gt = semantic_gt.cuda().float() 31 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt) 32 | total_intersects += intersects 33 | total_union += union 34 | view_img_gt = view_loss(semantic_gt.cuda(), trans.cuda(), rots.cuda()) # b*n 5 h w 35 | intersects1, union1 = get_batch_iou(onehot_encoding(x_view_pre), view_img_gt) 36 | total_intersects1 += intersects1 37 | total_union1 += union1 38 | return total_intersects / (total_union + 1e-7),total_intersects1 / (total_union1 + 1e-7) 39 | 40 | def main(args): 41 | data_conf = { 42 | 'num_channels': NUM_CLASSES + 1, 43 | 'image_size': args.image_size, 44 | 'xbound': args.xbound, 45 | 'ybound': args.ybound, 46 | 'zbound': args.zbound, 47 | 'dbound': args.dbound, 48 | 'thickness': args.thickness, 49 | 'angle_class': args.angle_class, 50 | 'B': args.bsz, 51 | 'merge': True 52 | } 53 | 54 | train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers) 55 | #train_loader, val_loader = vir_semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers) 56 | 57 | model = get_model(args.model, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class) 58 | model.load_state_dict(torch.load(args.modelf), strict=False) 59 | model.cuda() 60 | res,_ = eval_iou(model,val_loader) 61 | print(res) 62 | 63 | 64 | 65 | if __name__ == '__main__': 66 | parser = argparse.ArgumentParser() 67 | # logging config 68 | parser.add_argument("--logdir", type=str, default='./runs') 69 | 70 | # nuScenes config 71 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0') 72 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini']) 73 | 74 | # model config 75 | parser.add_argument("--model", type=str, default='BiMapper') 76 | 77 | # training config 78 | parser.add_argument("--nepochs", type=int, default=30) 79 | parser.add_argument("--max_grad_norm", type=float, default=5.0) 80 | parser.add_argument("--pos_weight", type=float, default=2.13) 81 | parser.add_argument("--bsz", type=int, default=1) 82 | parser.add_argument("--nworkers", type=int, default=4) 83 | parser.add_argument("--lr", type=float, default=1e-3) 84 | parser.add_argument("--weight_decay", type=float, default=1e-7) 85 | 86 | # finetune config 87 | parser.add_argument('--finetune', default=False, action='store_true') 88 | parser.add_argument('--modelf', type=str, default='') 89 | 90 | # data config 91 | parser.add_argument("--thickness", type=int, default=5) 92 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352]) 93 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 94 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15]) 95 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0]) 96 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0]) 97 | 98 | # embedding config 99 | parser.add_argument('--instance_seg', action='store_true') 100 | parser.add_argument("--embedding_dim", type=int, default=16) 101 | parser.add_argument("--delta_v", type=float, default=0.5) 102 | parser.add_argument("--delta_d", type=float, default=3.0) 103 | 104 | # direction config 105 | parser.add_argument('--direction_pred', action='store_true') 106 | parser.add_argument('--angle_class', type=int, default=36) 107 | 108 | # loss config 109 | parser.add_argument("--scale_seg", type=float, default=1.0) 110 | parser.add_argument("--scale_var", type=float, default=1.0) 111 | parser.add_argument("--scale_dist", type=float, default=1.0) 112 | parser.add_argument("--scale_direction", type=float, default=0.2) 113 | 114 | args = parser.parse_args() 115 | main(args) 116 | -------------------------------------------------------------------------------- /evaluate_json.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import tqdm 3 | 4 | from evaluation.dataset import HDMapNetEvalDataset 5 | from evaluation.chamfer_distance import semantic_mask_chamfer_dist_cum 6 | from evaluation.AP import instance_mask_AP 7 | from evaluation.iou import get_batch_iou 8 | 9 | SAMPLED_RECALLS = torch.linspace(0.1, 1, 10) 10 | THRESHOLDS = [2, 4, 6] 11 | 12 | 13 | def get_val_info(args): 14 | data_conf = { 15 | 'xbound': args.xbound, 16 | 'ybound': args.ybound, 17 | 'thickness': args.thickness, 18 | } 19 | 20 | dataset = HDMapNetEvalDataset(args.version, args.dataroot, args.eval_set, args.result_path, data_conf) 21 | 22 | data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.bsz, shuffle=False, drop_last=False) 23 | 24 | total_CD1 = torch.zeros(args.max_channel).cuda() 25 | total_CD2 = torch.zeros(args.max_channel).cuda() 26 | total_CD_num1 = torch.zeros(args.max_channel).cuda() 27 | total_CD_num2 = torch.zeros(args.max_channel).cuda() 28 | total_intersect = torch.zeros(args.max_channel).cuda() 29 | total_union = torch.zeros(args.max_channel).cuda() 30 | AP_matrix = torch.zeros((args.max_channel, len(THRESHOLDS))).cuda()#3 3 31 | AP_count_matrix = torch.zeros((args.max_channel, len(THRESHOLDS))).cuda() 32 | 33 | print('running eval...') 34 | for pred_map, confidence_level, gt_map in tqdm.tqdm(data_loader): 35 | # iou 36 | pred_map = pred_map.cuda() 37 | confidence_level = confidence_level.cuda() 38 | gt_map = gt_map.cuda() 39 | 40 | intersect, union = get_batch_iou(pred_map, gt_map) 41 | CD1, CD2, num1, num2 = semantic_mask_chamfer_dist_cum(pred_map, gt_map, args.xbound[2], args.ybound[2], threshold=args.CD_threshold)#5 42 | 43 | instance_mask_AP(AP_matrix, AP_count_matrix, pred_map, gt_map, args.xbound[2], args.ybound[2], 44 | confidence_level, THRESHOLDS, sampled_recalls=SAMPLED_RECALLS) 45 | 46 | total_intersect += intersect.cuda() 47 | total_union += union.cuda() 48 | total_CD1 += CD1 49 | total_CD2 += CD2 50 | total_CD_num1 += num1 51 | total_CD_num2 += num2 52 | 53 | CD_pred = total_CD1 / total_CD_num1 54 | CD_label = total_CD2 / total_CD_num2 55 | CD = (total_CD1 + total_CD2) / (total_CD_num1 + total_CD_num2) 56 | CD_pred[CD_pred > args.CD_threshold] = args.CD_threshold 57 | CD_label[CD_label > args.CD_threshold] = args.CD_threshold 58 | CD[CD > args.CD_threshold] = args.CD_threshold 59 | return { 60 | 'iou': total_intersect / total_union, 61 | 'CD_pred': CD_pred, 62 | 'CD_label': CD_label, 63 | 'CD': CD, 64 | 'Average_precision': AP_matrix / AP_count_matrix, 65 | } 66 | 67 | 68 | if __name__ == '__main__': 69 | import argparse 70 | parser = argparse.ArgumentParser(description='Evaluate nuScenes local HD Map Construction Results.') 71 | parser.add_argument('--result_path', type=str) 72 | parser.add_argument('--dataroot', type=str, default='dataset/nuScenes/') 73 | parser.add_argument('--bsz', type=int, default=4) 74 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini']) 75 | parser.add_argument('--eval_set', type=str, default='mini_val', choices=['train', 'val', 'test', 'mini_train', 'mini_val']) 76 | parser.add_argument('--thickness', type=int, default=5) 77 | parser.add_argument('--max_channel', type=int, default=3) 78 | parser.add_argument('--CD_threshold', type=int, default=5) 79 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 80 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15]) 81 | 82 | args = parser.parse_args() 83 | 84 | print(get_val_info(args)) 85 | -------------------------------------------------------------------------------- /export_gt_to_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from tqdm import tqdm 4 | 5 | from nuscenes import NuScenes 6 | from nuscenes.utils.splits import create_splits_scenes 7 | 8 | from data.vector_map import VectorizedLocalMap 9 | from data.rasterize import rasterize_map 10 | 11 | import mmcv 12 | 13 | 14 | def main(args): 15 | patch_h = args.ybound[1] - args.ybound[0] 16 | patch_w = args.xbound[1] - args.xbound[0] 17 | canvas_h = int(patch_h / args.ybound[2]) 18 | canvas_w = int(patch_w / args.xbound[2]) 19 | patch_size = (patch_h, patch_w) 20 | canvas_size = (canvas_h, canvas_w) 21 | 22 | submission = { 23 | "meta": { 24 | "use_camera": True, 25 | "use_lidar": False, 26 | "use_radar": False, 27 | "use_external": False, 28 | "vector": not args.raster 29 | }, 30 | "results": {} 31 | } 32 | 33 | scenes = create_splits_scenes()[args.eval_set] 34 | nusc = NuScenes(version=args.version, dataroot=args.dataroot, verbose=False) 35 | vector_map = VectorizedLocalMap(args.dataroot, patch_size=patch_size, canvas_size=canvas_size) 36 | 37 | samples = [samp for samp in nusc.sample if nusc.get('scene', samp['scene_token'])['name'] in scenes] 38 | for rec in tqdm(samples): 39 | location = nusc.get('log', nusc.get('scene', rec['scene_token'])['log_token'])['location'] 40 | ego_pose = nusc.get('ego_pose', nusc.get('sample_data', rec['data']['LIDAR_TOP'])['ego_pose_token']) 41 | vectors = vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation']) 42 | if args.raster: 43 | pred_map, confidence_level = rasterize_map(vectors, patch_size, canvas_size, args.max_channel, args.thickness) 44 | submission['results'][rec['token']] = { 45 | 'map': pred_map, 46 | 'confidence_level': confidence_level 47 | } 48 | else: 49 | for vector in vectors: 50 | vector['confidence_level'] = 1 51 | vector['pts'] = vector['pts'].tolist() 52 | submission['results'][rec['token']] = vectors 53 | 54 | with open(args.output, 'w') as f: 55 | json.dump(submission, f) 56 | print(f"exported to {args.output}") 57 | 58 | 59 | if __name__ == '__main__': 60 | import argparse 61 | parser = argparse.ArgumentParser(description='Demo Script to turn GT into json submission.') 62 | parser.add_argument('--dataroot', type=str, default='dataset/nuScenes/') 63 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini']) 64 | parser.add_argument('--eval_set', type=str, default='mini_val', choices=['train', 'val', 'test', 'mini_train', 'mini_val']) 65 | parser.add_argument('--output', type=str, default='submission.json') 66 | parser.add_argument('--thickness', type=int, default=1) 67 | parser.add_argument('--max_channel', type=int, default=3) 68 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 69 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15]) 70 | parser.add_argument('--raster', action='store_true', default=False) 71 | args = parser.parse_args() 72 | 73 | main(args) 74 | -------------------------------------------------------------------------------- /export_pred_to_json.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import mmcv 3 | import tqdm 4 | import torch 5 | 6 | from data.dataset import semantic_dataset 7 | from data.const import NUM_CLASSES 8 | from model import get_model 9 | from postprocess.vectorize import vectorize 10 | 11 | 12 | def gen_dx_bx(xbound, ybound): 13 | dx = [row[2] for row in [xbound, ybound]] 14 | bx = [row[0] + row[2] / 2.0 for row in [xbound, ybound]] 15 | nx = [(row[1] - row[0]) / row[2] for row in [xbound, ybound]] 16 | return dx, bx, nx 17 | 18 | def export_to_json(model, val_loader, angle_class, args): 19 | submission = { 20 | "meta": { 21 | "use_camera": True, 22 | "use_lidar": False, 23 | "use_radar": False, 24 | "use_external": False, 25 | "vector": True, 26 | }, 27 | "results": {} 28 | } 29 | 30 | dx, bx, nx = gen_dx_bx(args.xbound, args.ybound) 31 | 32 | model.eval() 33 | with torch.no_grad(): 34 | for batchi, (imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll, segmentation_gt, instance_gt, direction_gt) in enumerate(tqdm.tqdm(val_loader)): 35 | segmentation, embedding, direction = model(imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(), 36 | post_trans.cuda(), post_rots.cuda(), lidar_data.cuda(), 37 | lidar_mask.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda()) 38 | 39 | for si in range(segmentation.shape[0]): 40 | coords, confidences, line_types = vectorize(segmentation[si], embedding[si], direction[si], angle_class) 41 | vectors = [] 42 | for coord, confidence, line_type in zip(coords, confidences, line_types): 43 | vector = {'pts': coord * dx + bx, 'pts_num': len(coord), "type": line_type, "confidence_level": confidence} 44 | vectors.append(vector) 45 | rec = val_loader.dataset.samples[batchi * val_loader.batch_size + si] 46 | submission['results'][rec['token']] = vectors 47 | 48 | mmcv.dump(submission, args.output) 49 | 50 | 51 | def main(args): 52 | data_conf = { 53 | 'num_channels': NUM_CLASSES + 1, 54 | 'image_size': args.image_size, 55 | 'xbound': args.xbound, 56 | 'ybound': args.ybound, 57 | 'zbound': args.zbound, 58 | 'dbound': args.dbound, 59 | 'thickness': args.thickness, 60 | 'angle_class': args.angle_class, 61 | } 62 | 63 | train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers) 64 | model = get_model(args.model, data_conf, True, args.embedding_dim, True, args.angle_class) 65 | model.load_state_dict(torch.load(args.modelf), strict=False) 66 | model.cuda() 67 | export_to_json(model, val_loader, args.angle_class, args) 68 | 69 | 70 | if __name__ == '__main__': 71 | parser = argparse.ArgumentParser() 72 | # nuScenes config 73 | parser.add_argument('--dataroot', type=str, default='dataset/nuScenes/') 74 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini']) 75 | 76 | # model config 77 | parser.add_argument("--model", type=str, default='HDMapNet_cam') 78 | 79 | # training config 80 | parser.add_argument("--bsz", type=int, default=4) 81 | parser.add_argument("--nworkers", type=int, default=10) 82 | 83 | parser.add_argument('--modelf', type=str, default=None) 84 | 85 | # data config 86 | parser.add_argument("--thickness", type=int, default=5) 87 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352]) 88 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 89 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15]) 90 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0]) 91 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0]) 92 | 93 | # embedding config 94 | parser.add_argument("--embedding_dim", type=int, default=16) 95 | 96 | # direction config 97 | parser.add_argument('--angle_class', type=int, default=36) 98 | 99 | # output 100 | parser.add_argument("--output", type=str, default='output.json') 101 | 102 | args = parser.parse_args() 103 | main(args) 104 | -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class FocalLoss(nn.Module): 7 | def __init__(self, alpha=1, gamma=2, reduce='mean'): 8 | super(FocalLoss, self).__init__() 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.reduce = reduce 12 | 13 | def forward(self, inputs, targets): 14 | BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False) 15 | pt = torch.exp(-BCE_loss) 16 | F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss 17 | 18 | if self.reduce == 'mean': 19 | return torch.mean(F_loss) 20 | elif self.reduce == 'sum': 21 | return torch.sum(F_loss) 22 | else: 23 | raise NotImplementedError 24 | 25 | 26 | class SimpleLoss(torch.nn.Module): 27 | def __init__(self, pos_weight): 28 | super(SimpleLoss, self).__init__() 29 | self.loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([pos_weight]))#内部含有sigmod 30 | 31 | def forward(self, ypred, ytgt): 32 | loss = self.loss_fn(ypred, ytgt) 33 | return loss 34 | 35 | 36 | class DiscriminativeLoss(nn.Module): 37 | def __init__(self, embed_dim, delta_v, delta_d): 38 | super(DiscriminativeLoss, self).__init__() 39 | self.embed_dim = embed_dim 40 | self.delta_v = delta_v 41 | self.delta_d = delta_d 42 | 43 | def forward(self, embedding, seg_gt): 44 | if embedding is None: 45 | return 0, 0, 0 46 | bs = embedding.shape[0]#4 47 | 48 | var_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device) 49 | dist_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device) 50 | reg_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device) 51 | 52 | for b in range(bs): 53 | embedding_b = embedding[b] # (embed_dim, H, W) 16 200 400 预测16个对象位置 54 | seg_gt_b = seg_gt[b]# 200 400 55 | 56 | labels = torch.unique(seg_gt_b)#挑出所有不重复数字1 2 ...，所有对象 57 | labels = labels[labels != 0] 58 | num_lanes = len(labels) 59 | if num_lanes == 0: 60 | # please refer to issue here: https://github.com/harryhan618/LaneNet/issues/12 61 | _nonsense = embedding.sum() 62 | _zero = torch.zeros_like(_nonsense) 63 | var_loss = var_loss + _nonsense * _zero 64 | dist_loss = dist_loss + _nonsense * _zero 65 | reg_loss = reg_loss + _nonsense * _zero 66 | continue 67 | 68 | centroid_mean = [] 69 | for lane_idx in labels:#每个类别数字 70 | seg_mask_i = (seg_gt_b == lane_idx)#取出其中一个类别的数字 71 | if not seg_mask_i.any(): 72 | continue 73 | embedding_i = embedding_b[:, seg_mask_i] 74 | 75 | mean_i = torch.mean(embedding_i, dim=1) 76 | centroid_mean.append(mean_i) 77 | 78 | # ---------- var_loss ------------- 79 | var_loss = var_loss + torch.mean(F.relu(torch.norm(embedding_i-mean_i.reshape(self.embed_dim, 1), dim=0) - self.delta_v) ** 2) / num_lanes 80 | centroid_mean = torch.stack(centroid_mean) # (n_lane, embed_dim) 81 | 82 | if num_lanes > 1: 83 | centroid_mean1 = centroid_mean.reshape(-1, 1, self.embed_dim) 84 | centroid_mean2 = centroid_mean.reshape(1, -1, self.embed_dim) 85 | dist = torch.norm(centroid_mean1-centroid_mean2, dim=2) # shape (num_lanes, num_lanes) 86 | dist = dist + torch.eye(num_lanes, dtype=dist.dtype, device=dist.device) * self.delta_d # diagonal elements are 0, now mask above delta_d 87 | 88 | # divided by two for double calculated loss above, for implementation convenience 89 | dist_loss = dist_loss + torch.sum(F.relu(-dist + self.delta_d)**2) / (num_lanes * (num_lanes-1)) / 2 90 | 91 | # reg_loss is not used in original paper 92 | # reg_loss = reg_loss + torch.mean(torch.norm(centroid_mean, dim=1)) 93 | 94 | var_loss = var_loss / bs 95 | dist_loss = dist_loss / bs 96 | reg_loss = reg_loss / bs 97 | return var_loss, dist_loss, reg_loss 98 | 99 | 100 | def calc_loss(): 101 | pass 102 | 103 | xmin, xmax = -5, 5#-30 30 104 | xx_l = xmax- xmin 105 | num_x = 32 106 | zmin, zmax = 3, 29 107 | zz_l = zmax - zmin 108 | num_z = 88 109 | #h = 0.5 110 | def plane_grid_cam(trans, rots,h): 111 | 112 | B,N,_ = trans.size()#4 6 3 113 | 114 | 115 | # y = torch.linspace(xmin, xmax, num_x, dtype=torch.double).cuda() 116 | # x = torch.linspace(ymin, ymax, num_y, dtype=torch.double).cuda() 117 | x = torch.linspace(xmin, xmax, num_x) 118 | z = torch.linspace(zmin, zmax, num_z) 119 | 120 | x = x.cuda() 121 | z = z.cuda() 122 | 123 | x, z = torch.meshgrid(x, z) 124 | 125 | x = x.flatten()#1 3200 126 | z = z.flatten() 127 | x = x.unsqueeze(0).repeat(B*N, 1)#24 3200 128 | z = z.unsqueeze(0).repeat(B*N, 1) 129 | 130 | y = torch.ones_like(x) 131 | y= y*h 132 | d = torch.ones_like(x) 133 | y = y.cuda() 134 | d = d.cuda() 135 | 136 | coords = torch.stack([x, y, z,d], axis=1)# 24 3 3200 137 | Rs = torch.eye(4, device=rots.device).repeat(B, N, 1, 1)#转为车辆坐标系 138 | Rs[:, :, :3, :3] = rots 139 | Ts = torch.eye(4, device=trans.device).repeat(B, N, 1, 1) 140 | Ts[:, :, :3, 3] = trans 141 | RTs = Rs @ Ts#b 6 4 4 142 | RTs = RTs.view(-1,4,4) 143 | # rots = rots.view(-1,3,3) 144 | # ego_coords = rots @ coords#24 4 4 * 24 4 3200 = 24 4 3200 145 | # trans = trans.view(-1,3,1) 146 | # ego_coords += trans 147 | #RTs = RTs.cuda() 148 | ego_coords = RTs @ coords 149 | ego_coords = torch.stack([ego_coords[:, 0], ego_coords[:, 1]], axis=1)# 24 2 3200 150 | ego_coords = ego_coords.view(B*N,2,num_x,num_z) 151 | ego_coords = ego_coords.view(B, N, 2, num_x, num_z) 152 | ego_coords = ego_coords.permute(0, 1, 3, 4, 2).contiguous()#4 6 40 80 2 153 | return ego_coords 154 | 155 | def get_cam_gt(img,ego_coords):#传入真值 156 | _, img_c , img_h, img_w= img.shape#4 5 200 400 157 | img = img.permute(0,2,3,1).contiguous()#4 200 400 5 158 | B, N, pix_h, pix_w, pix_c = ego_coords.shape#4 6 40 80 2 159 | out_shape = (B, N, pix_h, pix_w, img_c) 160 | 161 | pix_x, pix_y = torch.split(ego_coords, 1, dim=-1) # [B, n,pix_h, pix_w,1] 162 | pix_x = pix_x / 0.15 + 200 163 | pix_y = pix_y / 0.15 + 100 164 | # Rounding 165 | pix_x0 = torch.floor(pix_x) 166 | pix_x1 = pix_x0 + 1 167 | pix_y0 = torch.floor(pix_y) 168 | pix_y1 = pix_y0 + 1 169 | 170 | # Clip within image boundary 171 | y_max = (img_h - 1) 172 | x_max = (img_w - 1) 173 | 174 | pix_x0 = torch.clip(pix_x0, 0, x_max) 175 | pix_y0 = torch.clip(pix_y0, 0, y_max) 176 | pix_x1 = torch.clip(pix_x1, 0, x_max) 177 | pix_y1 = torch.clip(pix_y1, 0, y_max) # 178 | 179 | # Weights [B, n,pix_h, pix_w, 1] 180 | wt_x0 = pix_x1 - pix_x 181 | wt_x1 = pix_x - pix_x0 182 | wt_y0 = pix_y1 - pix_y 183 | wt_y1 = pix_y - pix_y0 184 | 185 | # indices in the image to sample from 186 | dim = img_w 187 | 188 | # Apply the lower and upper bound pix coord 189 | base_y0 = pix_y0 * dim 190 | base_y1 = pix_y1 * dim 191 | 192 | # 4 corner vert ices 193 | idx00 = (pix_x0 + base_y0).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long() 194 | idx01 = (pix_x0 + base_y1).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long() 195 | idx10 = (pix_x1 + base_y0).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long() 196 | idx11 = (pix_x1 + base_y1).view(B,N, -1, 1).repeat(1, 1, 1, img_c).long() 197 | 198 | # Gather pixels from image using vertices 199 | 200 | imgs_flat = img.reshape([B, -1, img_c]).unsqueeze(1) 201 | imgs_flat = imgs_flat.repeat(1,N,1,1) 202 | #imgs_flat = imgs_flat.cuda() 203 | im00 = torch.gather(imgs_flat, 2, idx00).reshape(out_shape) 204 | im01 = torch.gather(imgs_flat, 2, idx01).reshape(out_shape) 205 | im10 = torch.gather(imgs_flat, 2, idx10).reshape(out_shape) 206 | im11 = torch.gather(imgs_flat, 2, idx11).reshape(out_shape) 207 | 208 | # Apply weights [pix_h, pix_w, 1] 209 | w00 = wt_x0 * wt_y0 210 | w01 = wt_x0 * wt_y1 211 | w10 = wt_x1 * wt_y0 212 | w11 = wt_x1 * wt_y1 213 | gt = w00 * im00 + w01 * im01 + w10 * im10 + w11 * im11 214 | return gt#b n h w 5 215 | 216 | 217 | def view_loss(gt,trans,rots,h=0.5): 218 | 219 | ego_coords = plane_grid_cam(trans, rots,h)#4 6 2 40 80 220 | cam_gt = get_cam_gt(gt,ego_coords)#b n h w 5 221 | cam_gt = cam_gt.permute(0,1,4,2,3).contiguous() 222 | b,n,c,h,w = cam_gt.size() 223 | cam_gt = cam_gt.view(-1, c, h, w) 224 | return cam_gt 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /model/HDMapNet/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from efficientnet_pytorch import EfficientNet 5 | from torchvision.models.resnet import resnet18 6 | 7 | 8 | class Up(nn.Module): 9 | def __init__(self, in_channels, out_channels, scale_factor=2): 10 | super().__init__() 11 | 12 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear', 13 | align_corners=True) 14 | 15 | self.conv = nn.Sequential( 16 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), 17 | nn.BatchNorm2d(out_channels), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False), 20 | nn.BatchNorm2d(out_channels), 21 | nn.ReLU(inplace=True) 22 | ) 23 | 24 | def forward(self, x1, x2): 25 | x1 = self.up(x1) 26 | x1 = torch.cat([x2, x1], dim=1) 27 | return self.conv(x1) 28 | 29 | 30 | class CamEncode(nn.Module): 31 | def __init__(self, C): 32 | super(CamEncode, self).__init__() 33 | self.C = C 34 | 35 | self.trunk = EfficientNet.from_pretrained("efficientnet-b0") 36 | self.up1 = Up(320+112, self.C) 37 | 38 | def get_eff_depth(self, x): 39 | # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231 40 | endpoints = dict() 41 | 42 | # Stem 43 | x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x))) 44 | prev_x = x 45 | 46 | # Blocks 47 | for idx, block in enumerate(self.trunk._blocks): 48 | drop_connect_rate = self.trunk._global_params.drop_connect_rate 49 | if drop_connect_rate: 50 | drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate 51 | x = block(x, drop_connect_rate=drop_connect_rate) 52 | if prev_x.size(2) > x.size(2): 53 | endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x 54 | prev_x = x 55 | 56 | # Head 57 | endpoints['reduction_{}'.format(len(endpoints)+1)] = x 58 | x = self.up1(endpoints['reduction_5'], endpoints['reduction_4']) 59 | return x 60 | 61 | def forward(self, x): 62 | return self.get_eff_depth(x) 63 | 64 | 65 | class BevEncode(nn.Module): 66 | def __init__(self, inC, outC, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=37): 67 | super(BevEncode, self).__init__() 68 | trunk = resnet18(pretrained=False, zero_init_residual=True) 69 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, 70 | bias=False) 71 | self.bn1 = trunk.bn1 72 | self.relu = trunk.relu 73 | 74 | self.layer1 = trunk.layer1 75 | self.layer2 = trunk.layer2 76 | self.layer3 = trunk.layer3 77 | 78 | self.up1 = Up(64 + 256, 256, scale_factor=4) 79 | self.up2 = nn.Sequential( 80 | nn.Upsample(scale_factor=2, mode='bilinear', 81 | align_corners=True), 82 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 83 | nn.BatchNorm2d(128), 84 | nn.ReLU(inplace=True), 85 | nn.Conv2d(128, outC, kernel_size=1, padding=0), 86 | ) 87 | 88 | self.instance_seg = instance_seg 89 | if instance_seg: 90 | self.up1_embedded = Up(64 + 256, 256, scale_factor=4) 91 | self.up2_embedded = nn.Sequential( 92 | nn.Upsample(scale_factor=2, mode='bilinear', 93 | align_corners=True), 94 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 95 | nn.BatchNorm2d(128), 96 | nn.ReLU(inplace=True), 97 | nn.Conv2d(128, embedded_dim, kernel_size=1, padding=0), 98 | ) 99 | 100 | self.direction_pred = direction_pred 101 | if direction_pred: 102 | self.up1_direction = Up(64 + 256, 256, scale_factor=4) 103 | self.up2_direction = nn.Sequential( 104 | nn.Upsample(scale_factor=2, mode='bilinear', 105 | align_corners=True), 106 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 107 | nn.BatchNorm2d(128), 108 | nn.ReLU(inplace=True), 109 | nn.Conv2d(128, direction_dim, kernel_size=1, padding=0), 110 | ) 111 | 112 | def forward(self, x): 113 | x = self.conv1(x) 114 | x = self.bn1(x) 115 | x = self.relu(x) 116 | 117 | x1 = self.layer1(x) 118 | x = self.layer2(x1) 119 | x2 = self.layer3(x) 120 | 121 | x = self.up1(x2, x1) 122 | x = self.up2(x) 123 | 124 | if self.instance_seg: 125 | x_embedded = self.up1_embedded(x2, x1) 126 | x_embedded = self.up2_embedded(x_embedded) 127 | else: 128 | x_embedded = None 129 | 130 | if self.direction_pred: 131 | x_direction = self.up1_embedded(x2, x1) 132 | x_direction = self.up2_direction(x_direction) 133 | else: 134 | x_direction = None 135 | 136 | return x, x_embedded, x_direction 137 | -------------------------------------------------------------------------------- /model/HDMapNet/hdmapnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .homography import bilinear_sampler, IPM 5 | from .utils import plane_grid_2d, get_rot_2d, cam_to_pixel 6 | from .pointpillar import PointPillarEncoder 7 | from .base import CamEncode, BevEncode 8 | from data.utils import gen_dx_bx 9 | 10 | 11 | class ViewTransformation(nn.Module): 12 | def __init__(self, fv_size, bv_size, n_views=6): 13 | super(ViewTransformation, self).__init__() 14 | self.n_views = n_views 15 | self.hw_mat = [] 16 | self.bv_size = bv_size 17 | fv_dim = fv_size[0] * fv_size[1] 18 | bv_dim = bv_size[0] * bv_size[1] 19 | for i in range(self.n_views): 20 | fc_transform = nn.Sequential( 21 | nn.Linear(fv_dim, bv_dim), 22 | nn.ReLU(), 23 | nn.Linear(bv_dim, bv_dim), 24 | nn.ReLU() 25 | ) 26 | self.hw_mat.append(fc_transform) 27 | self.hw_mat = nn.ModuleList(self.hw_mat) 28 | 29 | def forward(self, feat): 30 | B, N, C, H, W = feat.shape 31 | feat = feat.view(B, N, C, H*W) 32 | outputs = [] 33 | for i in range(N): 34 | output = self.hw_mat[i](feat[:, i]).view(B, C, self.bv_size[0], self.bv_size[1]) 35 | outputs.append(output) 36 | outputs = torch.stack(outputs, 1) 37 | return outputs 38 | 39 | 40 | class HDMapNet(nn.Module): 41 | def __init__(self, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=36, lidar=False): 42 | super(HDMapNet, self).__init__() 43 | self.camC = 64 44 | self.downsample = 16 45 | 46 | dx, bx, nx = gen_dx_bx(data_conf['xbound'], data_conf['ybound'], data_conf['zbound']) 47 | #nx = [400,200,1] 48 | final_H, final_W = nx[1].item(), nx[0].item() 49 | 50 | self.camencode = CamEncode(self.camC)#初始化对象 51 | fv_size = (data_conf['image_size'][0]//self.downsample, data_conf['image_size'][1]//self.downsample)#下采样后图片大小 52 | bv_size = (final_H//5, final_W//5)#鸟瞰图大小？ 53 | self.view_fusion = ViewTransformation(fv_size=fv_size, bv_size=bv_size)#初始化变换对象 54 | 55 | res_x = bv_size[1] * 3 // 4 56 | ipm_xbound = [-res_x, res_x, 4*res_x/final_W] 57 | ipm_ybound = [-res_x/2, res_x/2, 2*res_x/final_H] 58 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=6, C=self.camC, extrinsic=True) 59 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 60 | # self.up_sampler = nn.Upsample(scale_factor=5, mode='bilinear', align_corners=True) 61 | 62 | self.lidar = lidar 63 | if lidar: 64 | self.pp = PointPillarEncoder(128, data_conf['xbound'], data_conf['ybound'], data_conf['zbound']) 65 | self.bevencode = BevEncode(inC=self.camC+128, outC=data_conf['num_channels'], instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=direction_dim+1) 66 | else: 67 | self.bevencode = BevEncode(inC=self.camC, outC=data_conf['num_channels'], instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=direction_dim+1) 68 | 69 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans): 70 | B, N, _, _ = intrins.shape 71 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 72 | 73 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 74 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous() 75 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 76 | Ts[:, :, :3, 3] = -trans 77 | RTs = Rs @ Ts 78 | 79 | post_RTs = None 80 | 81 | return Ks, RTs, post_RTs 82 | 83 | def get_cam_feats(self, x): 84 | B, N, C, imH, imW = x.shape 85 | x = x.view(B*N, C, imH, imW) 86 | x = self.camencode(x) 87 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample) 88 | return x 89 | 90 | def forward(self, img, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll): 91 | x = self.get_cam_feats(img) 92 | x = self.view_fusion(x) 93 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans) 94 | topdown = self.ipm(x, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs) 95 | topdown = self.up_sampler(topdown) 96 | if self.lidar: 97 | lidar_feature = self.pp(lidar_data, lidar_mask) 98 | topdown = torch.cat([topdown, lidar_feature], dim=1) 99 | return self.bevencode(topdown) 100 | -------------------------------------------------------------------------------- /model/HDMapNet/homography.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import cv2 5 | 6 | CAM_FL = 0 7 | CAM_F = 1 8 | CAM_FR = 2 9 | CAM_BL = 3 10 | CAM_B = 4 11 | CAM_BR = 5 12 | 13 | 14 | # ========================================================= 15 | # Projections 16 | # ========================================================= 17 | def rotation_from_euler(rolls, pitchs, yaws, cuda=True): 18 | """ 19 | Get rotation matrix 20 | Args: 21 | roll, pitch, yaw: In degrees 22 | 23 | Returns: 24 | R: [B, 4, 4] 25 | """ 26 | B = len(rolls) 27 | 28 | si, sj, sk = torch.sin(rolls), torch.sin(pitchs), torch.sin(yaws) 29 | ci, cj, ck = torch.cos(rolls), torch.cos(pitchs), torch.cos(yaws) 30 | cc, cs = ci * ck, ci * sk 31 | sc, ss = si * ck, si * sk 32 | 33 | R = torch.eye(4).unsqueeze(0).repeat(B, 1, 1) 34 | if cuda: 35 | R = R.cuda() 36 | R[:, 0, 0] = cj * ck 37 | R[:, 0, 1] = sj * sc - cs 38 | R[:, 0, 2] = sj * cc + ss 39 | R[:, 1, 0] = cj * sk 40 | R[:, 1, 1] = sj * ss + cc 41 | R[:, 1, 2] = sj * cs - sc 42 | R[:, 2, 0] = -sj 43 | R[:, 2, 1] = cj * si 44 | R[:, 2, 2] = cj * ci 45 | return R 46 | 47 | 48 | def perspective(cam_coords, proj_mat, h, w, extrinsic, offset=None): 49 | """ 50 | P = proj_mat @ (x, y, z, 1) 51 | Project cam2pixel 52 | 53 | Args: 54 | cam_coords: [B, 4, npoints] 55 | proj_mat: [B, 4, 4] 56 | 57 | Returns: 58 | pix coords: [B, h, w, 2] 59 | """ 60 | eps = 1e-7 61 | pix_coords = proj_mat @ cam_coords 62 | 63 | N, _, _ = pix_coords.shape 64 | 65 | if extrinsic: 66 | pix_coords[:, 0] += offset[0] / 2 67 | pix_coords[:, 2] -= offset[1] / 8 68 | pix_coords = torch.stack([pix_coords[:, 2], pix_coords[:, 0]], axis=1) 69 | else: 70 | pix_coords = pix_coords[:, :2, :] / (pix_coords[:, 2, :][:, None, :] + eps) 71 | pix_coords = pix_coords.view(N, 2, h, w) 72 | pix_coords = pix_coords.permute(0, 2, 3, 1).contiguous() 73 | return pix_coords 74 | 75 | 76 | def bilinear_sampler(imgs, pix_coords): 77 | """ 78 | Construct a new image by bilinear sampling from the input image. 79 | Args: 80 | imgs: [B, H, W, C] 81 | pix_coords: [B, h, w, 2] 82 | :return: 83 | sampled image [B, h, w, c] 84 | """ 85 | B, img_h, img_w, img_c = imgs.shape 86 | B, pix_h, pix_w, pix_c = pix_coords.shape 87 | out_shape = (B, pix_h, pix_w, img_c) 88 | 89 | pix_x, pix_y = torch.split(pix_coords, 1, dim=-1) # [B, pix_h, pix_w, 1] 90 | 91 | # Rounding 92 | pix_x0 = torch.floor(pix_x) 93 | pix_x1 = pix_x0 + 1 94 | pix_y0 = torch.floor(pix_y) 95 | pix_y1 = pix_y0 + 1 96 | 97 | # Clip within image boundary 98 | y_max = (img_h - 1) 99 | x_max = (img_w - 1) 100 | 101 | pix_x0 = torch.clip(pix_x0, 0, x_max) 102 | pix_y0 = torch.clip(pix_y0, 0, y_max) 103 | pix_x1 = torch.clip(pix_x1, 0, x_max) 104 | pix_y1 = torch.clip(pix_y1, 0, y_max) 105 | 106 | # Weights [B, pix_h, pix_w, 1] 107 | wt_x0 = pix_x1 - pix_x 108 | wt_x1 = pix_x - pix_x0 109 | wt_y0 = pix_y1 - pix_y 110 | wt_y1 = pix_y - pix_y0 111 | 112 | # indices in the image to sample from 113 | dim = img_w 114 | 115 | # Apply the lower and upper bound pix coord 116 | base_y0 = pix_y0 * dim 117 | base_y1 = pix_y1 * dim 118 | 119 | # 4 corner vert ices 120 | idx00 = (pix_x0 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long() 121 | idx01 = (pix_x0 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long() 122 | idx10 = (pix_x1 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long() 123 | idx11 = (pix_x1 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long() 124 | 125 | # Gather pixels from image using vertices 126 | imgs_flat = imgs.reshape([B, -1, img_c]) 127 | 128 | im00 = torch.gather(imgs_flat, 1, idx00).reshape(out_shape) 129 | im01 = torch.gather(imgs_flat, 1, idx01).reshape(out_shape) 130 | im10 = torch.gather(imgs_flat, 1, idx10).reshape(out_shape) 131 | im11 = torch.gather(imgs_flat, 1, idx11).reshape(out_shape) 132 | 133 | # Apply weights [pix_h, pix_w, 1] 134 | w00 = wt_x0 * wt_y0 135 | w01 = wt_x0 * wt_y1 136 | w10 = wt_x1 * wt_y0 137 | w11 = wt_x1 * wt_y1 138 | output = w00 * im00 + w01 * im01 + w10 * im10 + w11 * im11 139 | return output 140 | 141 | 142 | def plane_grid(xbound, ybound, zs, yaws, rolls, pitchs, cuda=True): 143 | B = len(zs) 144 | 145 | xmin, xmax = xbound[0], xbound[1] 146 | num_x = int((xbound[1] - xbound[0]) / xbound[2]) 147 | ymin, ymax = ybound[0], ybound[1] 148 | num_y = int((ybound[1] - ybound[0]) / ybound[2]) 149 | 150 | # y = torch.linspace(xmin, xmax, num_x, dtype=torch.double).cuda() 151 | # x = torch.linspace(ymin, ymax, num_y, dtype=torch.double).cuda() 152 | y = torch.linspace(xmin, xmax, num_x) 153 | x = torch.linspace(ymin, ymax, num_y) 154 | if cuda: 155 | x = x.cuda() 156 | y = y.cuda() 157 | 158 | y, x = torch.meshgrid(x, y) 159 | 160 | x = x.flatten() 161 | y = y.flatten() 162 | 163 | x = x.unsqueeze(0).repeat(B, 1) 164 | y = y.unsqueeze(0).repeat(B, 1) 165 | 166 | # z = torch.ones_like(x, dtype=torch.double).cuda() * zs.view(-1, 1) 167 | # d = torch.ones_like(x, dtype=torch.double).cuda() 168 | z = torch.ones_like(x) * zs.view(-1, 1) 169 | d = torch.ones_like(x) 170 | if cuda: 171 | z = z.cuda() 172 | d = d.cuda() 173 | 174 | coords = torch.stack([x, y, z, d], axis=1) 175 | 176 | rotation_matrix = rotation_from_euler(pitchs, rolls, yaws, cuda) 177 | 178 | coords = rotation_matrix @ coords 179 | return coords 180 | 181 | 182 | def ipm_from_parameters(image, xyz, K, RT, target_h, target_w, extrinsic, post_RT=None): 183 | """ 184 | :param image: [B, H, W, C] 185 | :param xyz: [B, 4, npoints] 186 | :param K: [B, 4, 4] 187 | :param RT: [B, 4, 4] 188 | :param target_h: int 189 | :param target_w: int 190 | :return: warped_images: [B, target_h, target_w, C] 191 | """ 192 | P = K @ RT 193 | if post_RT is not None: 194 | P = post_RT @ P 195 | P = P.reshape(-1, 4, 4) 196 | pixel_coords = perspective(xyz, P, target_h, target_w, extrinsic, image.shape[1:3]) 197 | image2 = bilinear_sampler(image, pixel_coords) 198 | image2 = image2.type_as(image) 199 | return image2 200 | 201 | 202 | class PlaneEstimationModule(nn.Module): 203 | def __init__(self, N, C): 204 | super(PlaneEstimationModule, self).__init__() 205 | self.max_pool = nn.AdaptiveMaxPool2d(1) 206 | self.linear = nn.Linear(N*C, 3) 207 | 208 | self.linear.weight.data.fill_(0.) 209 | self.linear.bias.data.fill_(0.) 210 | 211 | def forward(self, x): 212 | B, N, C, H, W = x.shape 213 | x = x.view(B*N, C, H, W) 214 | x = self.max_pool(x) 215 | x = x.view(B, N*C) 216 | x = self.linear(x) 217 | z, pitch, roll = x[:, 0], x[:, 1], x[:, 2] 218 | return z, pitch, roll 219 | 220 | 221 | class IPM(nn.Module): 222 | def __init__(self, xbound, ybound, N, C, z_roll_pitch=False, visual=False, extrinsic=False, cuda=True): 223 | super(IPM, self).__init__() 224 | self.visual = visual 225 | self.z_roll_pitch = z_roll_pitch 226 | self.xbound = xbound 227 | self.ybound = ybound 228 | self.extrinsic = extrinsic 229 | self.w = int((xbound[1] - xbound[0]) / xbound[2]) 230 | self.h = int((ybound[1] - ybound[0]) / ybound[2]) 231 | 232 | if z_roll_pitch: 233 | self.plane_esti = PlaneEstimationModule(N, C) 234 | else: 235 | zs = torch.tensor([0.]).cuda() 236 | yaws = torch.tensor([0.]).cuda() 237 | rolls = torch.tensor([0.]).cuda() 238 | pitchs = torch.tensor([0.]).cuda() 239 | self.planes = plane_grid(self.xbound, self.ybound, zs, yaws, rolls, pitchs)[0] 240 | 241 | tri_mask = np.zeros((self.h, self.w)) 242 | vertices = np.array([[0, 0], [0, self.h], [self.w, self.h]], np.int32) 243 | pts = vertices.reshape((-1, 1, 2)) 244 | cv2.fillPoly(tri_mask, [pts], color=1.) 245 | self.tri_mask = torch.tensor(tri_mask[None, :, :, None]) 246 | self.flipped_tri_mask = torch.flip(self.tri_mask, [2]).bool() 247 | if cuda: 248 | self.tri_mask = self.tri_mask.cuda() 249 | self.flipped_tri_mask = self.flipped_tri_mask.cuda() 250 | self.tri_mask = self.tri_mask.bool() 251 | 252 | def mask_warped(self, warped_fv_images): 253 | warped_fv_images[:, CAM_F, :, :self.w//2, :] *= 0 # CAM_FRONT 254 | warped_fv_images[:, CAM_FL] *= self.flipped_tri_mask # CAM_FRONT_LEFT 255 | warped_fv_images[:, CAM_FR] *= ~self.tri_mask # CAM_FRONT_RIGHT 256 | warped_fv_images[:, CAM_B, :, self.w//2:, :] *= 0 # CAM_BACK 257 | warped_fv_images[:, CAM_BL] *= self.tri_mask # CAM_BACK_LEFT 258 | warped_fv_images[:, CAM_BR] *= ~self.flipped_tri_mask # CAM_BACK_RIGHT 259 | return warped_fv_images 260 | 261 | def forward(self, images, Ks, RTs, translation, yaw_roll_pitch, post_RTs=None): 262 | images = images.permute(0, 1, 3, 4, 2).contiguous() 263 | B, N, H, W, C = images.shape 264 | 265 | if self.z_roll_pitch: 266 | # z, roll, pitch = self.plane_esti(images) 267 | zs = translation[:, 2] 268 | rolls = yaw_roll_pitch[:, 1] 269 | pitchs = yaw_roll_pitch[:, 2] 270 | # zs += z 271 | # rolls += roll 272 | # pitchs += pitch 273 | planes = plane_grid(self.xbound, self.ybound, zs, torch.zeros_like(rolls), rolls, pitchs) 274 | planes = planes.repeat(N, 1, 1) 275 | else: 276 | planes = self.planes 277 | 278 | images = images.reshape(B*N, H, W, C) 279 | warped_fv_images = ipm_from_parameters(images, planes, Ks, RTs, self.h, self.w, self.extrinsic, post_RTs) 280 | warped_fv_images = warped_fv_images.reshape((B, N, self.h, self.w, C)) 281 | if self.visual: 282 | warped_fv_images = self.mask_warped(warped_fv_images) 283 | 284 | if self.visual: 285 | warped_topdown = warped_fv_images[:, CAM_F] + warped_fv_images[:, CAM_B] # CAM_FRONT + CAM_BACK 286 | warped_mask = warped_topdown == 0 287 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_FL][warped_mask] + warped_fv_images[:, CAM_FR][warped_mask] 288 | warped_mask = warped_topdown == 0 289 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_BL][warped_mask] + warped_fv_images[:, CAM_BR][warped_mask] 290 | return warped_topdown.permute(0, 3, 1, 2).contiguous() 291 | else: 292 | warped_topdown, _ = warped_fv_images.max(1) 293 | warped_topdown = warped_topdown.permute(0, 3, 1, 2).contiguous() 294 | warped_topdown = warped_topdown.view(B, C, self.h, self.w) 295 | return warped_topdown 296 | 297 | 298 | -------------------------------------------------------------------------------- /model/HDMapNet/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /model/HDMapNet/ipm_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .homography import IPM, bilinear_sampler 5 | from .utils import plane_grid_2d, get_rot_2d, cam_to_pixel 6 | from .base import CamEncode, BevEncode 7 | 8 | 9 | class IPMNet(nn.Module): 10 | def __init__(self, xbound, ybound, outC, camC=64, instance_seg=True, embedded_dim=16, cam_encoding=True, bev_encoding=True, z_roll_pitch=False): 11 | super(IPMNet, self).__init__() 12 | self.xbound = xbound 13 | self.ybound = ybound 14 | self.camC = camC 15 | self.downsample = 16 16 | if cam_encoding: 17 | self.ipm = IPM(xbound, ybound, N=6, C=camC, z_roll_pitch=z_roll_pitch, extrinsic=False) 18 | else: 19 | self.ipm = IPM(xbound, ybound, N=6, C=camC, visual=True, z_roll_pitch=z_roll_pitch, extrinsic=False) 20 | self.cam_encoding = cam_encoding 21 | if cam_encoding: 22 | self.camencode = CamEncode(camC) 23 | self.bev_encoding = bev_encoding 24 | if bev_encoding: 25 | self.bevencode = BevEncode(inC=camC, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim) 26 | 27 | def get_cam_feats(self, x): 28 | """Return B x N x D x H/downsample x W/downsample x C 29 | """ 30 | B, N, C, imH, imW = x.shape 31 | 32 | x = x.view(B*N, C, imH, imW) 33 | x = self.camencode(x) 34 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample) 35 | return x 36 | 37 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans): 38 | B, N, _, _ = intrins.shape 39 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 40 | Ks[:, :, :3, :3] = intrins 41 | 42 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 43 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous() 44 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 45 | Ts[:, :, :3, 3] = -trans 46 | RTs = Rs @ Ts 47 | 48 | post_RTs = torch.eye(4, device=post_rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 49 | post_RTs[:, :, :3, :3] = post_rots 50 | post_RTs[:, :, :3, 3] = post_trans 51 | 52 | if self.cam_encoding: 53 | scale = torch.Tensor([ 54 | [1/self.downsample, 0, 0, 0], 55 | [0, 1/self.downsample, 0, 0], 56 | [0, 0, 1, 0], 57 | [0, 0, 0, 1] 58 | ]).cuda() 59 | post_RTs = scale @ post_RTs 60 | 61 | return Ks, RTs, post_RTs 62 | 63 | def forward(self, points, points_mask, x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll): 64 | if self.cam_encoding: 65 | x = self.get_cam_feats(x) 66 | 67 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans) 68 | topdown = self.ipm(x, Ks, RTs, translation, yaw_pitch_roll, post_RTs) 69 | 70 | if self.bev_encoding: 71 | return self.bevencode(topdown) 72 | else: 73 | return topdown 74 | 75 | 76 | class TemporalIPMNet(IPMNet): 77 | def __init__(self, xbound, ybound, outC, camC=64, instance_seg=True, embedded_dim=16): 78 | super(IPMNet, self).__init__(xbound, ybound, outC, camC, instance_seg, embedded_dim) 79 | 80 | def get_cam_feats(self, x): 81 | """Return B x T x N x H/downsample x W/downsample x C 82 | """ 83 | B, T, N, C, imH, imW = x.shape 84 | 85 | x = x.view(B*T*N, C, imH, imW) 86 | x = self.camencode(x) 87 | x = x.view(B, T, N, self.camC, imH//self.downsample, imW//self.downsample) 88 | return x 89 | 90 | def temporal_fusion(self, topdown, translation, yaw): 91 | B, T, C, H, W = topdown.shape 92 | 93 | if T == 1: 94 | return topdown[:, 0] 95 | 96 | grid = plane_grid_2d(self.xbound, self.ybound).view(1, 1, 2, H*W).repeat(B, T-1, 1, 1) 97 | rot0 = get_rot_2d(yaw[:, 1:]) 98 | trans0 = translation[:, 1:, :2].view(B, T-1, 2, 1) 99 | rot1 = get_rot_2d(yaw[:, 0].view(B, 1).repeat(1, T-1)) 100 | trans1 = translation[:, 0, :2].view(B, 1, 2, 1).repeat(1, T-1, 1, 1) 101 | grid = rot1.transpose(2, 3) @ grid 102 | grid = grid + trans1 103 | grid = grid - trans0 104 | grid = rot0 @ grid 105 | grid = grid.view(B*(T-1), 2, H, W).permute(0, 2, 3, 1).contiguous() 106 | grid = cam_to_pixel(grid, self.xbound, self.ybound) 107 | topdown = topdown.permute(0, 1, 3, 4, 2).contiguous() 108 | prev_topdown = topdown[:, 1:] 109 | warped_prev_topdown = bilinear_sampler(prev_topdown.reshape(B*(T-1), H, W, C), grid).view(B, T-1, H, W, C) 110 | topdown = torch.cat([topdown[:, 0].unsqueeze(1), warped_prev_topdown], axis=1) 111 | topdown = topdown.view(B, T, H, W, C) 112 | topdown = topdown.max(1)[0] 113 | topdown = topdown.permute(0, 3, 1, 2).contiguous() 114 | return topdown 115 | 116 | def forward(self, points, points_mask, x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll): 117 | x = self.get_cam_feats(x) 118 | B, T, N, C, h, w = x.shape 119 | 120 | x = x.view(B*T, N, C, h, w) 121 | intrins = intrins.view(B*T, N, 3, 3) 122 | rots = rots.view(B*T, N, 3, 3) 123 | trans = trans.view(B*T, N, 3) 124 | post_rots = post_rots.view(B*T, N, 3, 3) 125 | post_trans = post_trans.view(B*T, N, 3) 126 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans) 127 | topdown = self.ipm(x, Ks, RTs, translation, yaw_pitch_roll, post_RTs) 128 | _, C, H, W = topdown.shape 129 | topdown = topdown.view(B, T, C, H, W) 130 | topdown = self.temporal_fusion(topdown, translation, yaw_pitch_roll[..., 0]) 131 | return self.bevencode(topdown) 132 | -------------------------------------------------------------------------------- /model/HDMapNet/pointpillar.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch_scatter 4 | 5 | from .voxel import points_to_voxels 6 | 7 | 8 | class PillarBlock(nn.Module): 9 | def __init__(self, idims=64, dims=64, num_layers=1, 10 | stride=1): 11 | super(PillarBlock, self).__init__() 12 | layers = [] 13 | self.idims = idims 14 | self.stride = stride 15 | for i in range(num_layers): 16 | layers.append(nn.Conv2d(self.idims, dims, 3, stride=self.stride, 17 | padding=1, bias=False)) 18 | layers.append(nn.BatchNorm2d(dims)) 19 | layers.append(nn.ReLU(inplace=True)) 20 | self.idims = dims 21 | self.stride = 1 22 | self.layers = nn.Sequential(*layers) 23 | 24 | def forward(self, x): 25 | return self.layers(x) 26 | 27 | 28 | class PointNet(nn.Module): 29 | def __init__(self, idims=64, odims=64): 30 | super(PointNet, self).__init__() 31 | self.pointnet = nn.Sequential( 32 | nn.Conv1d(idims, odims, kernel_size=1, bias=False), 33 | nn.BatchNorm1d(odims), 34 | nn.ReLU(inplace=True) 35 | ) 36 | 37 | def forward(self, points_feature, points_mask): 38 | batch_size, num_points, num_dims = points_feature.shape 39 | points_feature = points_feature.permute(0, 2, 1) 40 | mask = points_mask.view(batch_size, 1, num_points) 41 | return self.pointnet(points_feature) * mask 42 | 43 | 44 | class PointPillar(nn.Module): 45 | def __init__(self, C, xbound, ybound, zbound, embedded_dim=16, direction_dim=37): 46 | super(PointPillar, self).__init__() 47 | self.xbound = xbound 48 | self.ybound = ybound 49 | self.zbound = zbound 50 | self.embedded_dim = embedded_dim 51 | self.pn = PointNet(15, 64) 52 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1) 53 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2) 54 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2) 55 | self.up1 = nn.Sequential( 56 | nn.Conv2d(64, 64, 3, padding=1, bias=False), 57 | nn.BatchNorm2d(64), 58 | nn.ReLU(inplace=True) 59 | ) 60 | self.up2 = nn.Sequential( 61 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), 62 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False), 63 | nn.BatchNorm2d(128), 64 | nn.ReLU(inplace=True) 65 | ) 66 | self.up3 = nn.Sequential( 67 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True), 68 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False), 69 | nn.BatchNorm2d(256), 70 | nn.ReLU(inplace=True) 71 | ) 72 | self.conv_out = nn.Sequential( 73 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 74 | nn.BatchNorm2d(256), 75 | nn.ReLU(inplace=True), 76 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 77 | nn.BatchNorm2d(128), 78 | nn.ReLU(inplace=True), 79 | nn.Conv2d(128, C, 1), 80 | ) 81 | self.instance_conv_out = nn.Sequential( 82 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 83 | nn.BatchNorm2d(256), 84 | nn.ReLU(inplace=True), 85 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 86 | nn.BatchNorm2d(128), 87 | nn.ReLU(inplace=True), 88 | nn.Conv2d(128, embedded_dim, 1), 89 | ) 90 | self.direction_conv_out = nn.Sequential( 91 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 92 | nn.BatchNorm2d(256), 93 | nn.ReLU(inplace=True), 94 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 95 | nn.BatchNorm2d(128), 96 | nn.ReLU(inplace=True), 97 | nn.Conv2d(128, direction_dim, 1), 98 | ) 99 | 100 | def forward(self, points, points_mask, 101 | x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll): 102 | points_xyz = points[:, :, :3] 103 | points_feature = points[:, :, 3:] 104 | voxels = points_to_voxels( 105 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound 106 | ) 107 | points_feature = torch.cat( 108 | [points, # 5 109 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1 110 | voxels['local_points_xyz'], # 3 111 | voxels['point_centroids'], # 3 112 | points_xyz - voxels['voxel_centers'], # 3 113 | ], dim=-1 114 | ) 115 | points_feature = self.pn(points_feature, voxels['points_mask']) 116 | voxel_feature = torch_scatter.scatter_mean( 117 | points_feature, 118 | torch.unsqueeze(voxels['voxel_indices'], dim=1), 119 | dim=2, 120 | dim_size=voxels['num_voxels']) 121 | batch_size = points.size(0) 122 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1]) 123 | voxel_feature1 = self.block1(voxel_feature) 124 | voxel_feature2 = self.block2(voxel_feature1) 125 | voxel_feature3 = self.block3(voxel_feature2) 126 | voxel_feature1 = self.up1(voxel_feature1) 127 | voxel_feature2 = self.up2(voxel_feature2) 128 | voxel_feature3 = self.up3(voxel_feature3) 129 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1) 130 | return self.conv_out(voxel_feature).transpose(3, 2), self.instance_conv_out(voxel_feature).transpose(3, 2), self.direction_conv_out(voxel_feature).transpose(3, 2) 131 | 132 | 133 | class PointPillarEncoder(nn.Module): 134 | def __init__(self, C, xbound, ybound, zbound): 135 | super(PointPillarEncoder, self).__init__() 136 | self.xbound = xbound 137 | self.ybound = ybound 138 | self.zbound = zbound 139 | self.pn = PointNet(15, 64) 140 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1) 141 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2) 142 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2) 143 | self.up1 = nn.Sequential( 144 | nn.Conv2d(64, 64, 3, padding=1, bias=False), 145 | nn.BatchNorm2d(64), 146 | nn.ReLU(inplace=True) 147 | ) 148 | self.up2 = nn.Sequential( 149 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), 150 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False), 151 | nn.BatchNorm2d(128), 152 | nn.ReLU(inplace=True) 153 | ) 154 | self.up3 = nn.Sequential( 155 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True), 156 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False), 157 | nn.BatchNorm2d(256), 158 | nn.ReLU(inplace=True) 159 | ) 160 | self.conv_out = nn.Sequential( 161 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 162 | nn.BatchNorm2d(256), 163 | nn.ReLU(inplace=True), 164 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 165 | nn.BatchNorm2d(128), 166 | nn.ReLU(inplace=True), 167 | nn.Conv2d(128, C, 1), 168 | ) 169 | 170 | def forward(self, points, points_mask): 171 | points_xyz = points[:, :, :3] 172 | points_feature = points[:, :, 3:] 173 | voxels = points_to_voxels( 174 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound 175 | ) 176 | points_feature = torch.cat( 177 | [points, # 5 178 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1 179 | voxels['local_points_xyz'], # 3 180 | voxels['point_centroids'], # 3 181 | points_xyz - voxels['voxel_centers'], # 3 182 | ], dim=-1 183 | ) 184 | points_feature = self.pn(points_feature, voxels['points_mask']) 185 | voxel_feature = torch_scatter.scatter_mean( 186 | points_feature, 187 | torch.unsqueeze(voxels['voxel_indices'], dim=1), 188 | dim=2, 189 | dim_size=voxels['num_voxels']) 190 | batch_size = points.size(0) 191 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1]) 192 | voxel_feature1 = self.block1(voxel_feature) 193 | voxel_feature2 = self.block2(voxel_feature1) 194 | voxel_feature3 = self.block3(voxel_feature2) 195 | voxel_feature1 = self.up1(voxel_feature1) 196 | voxel_feature2 = self.up2(voxel_feature2) 197 | voxel_feature3 = self.up3(voxel_feature3) 198 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1) 199 | return self.conv_out(voxel_feature).transpose(3, 2) 200 | -------------------------------------------------------------------------------- /model/HDMapNet/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def plane_grid_2d(xbound, ybound): 4 | xmin, xmax = xbound[0], xbound[1] 5 | num_x = int((xbound[1] - xbound[0]) / xbound[2]) 6 | ymin, ymax = ybound[0], ybound[1] 7 | num_y = int((ybound[1] - ybound[0]) / ybound[2]) 8 | 9 | y = torch.linspace(xmin, xmax, num_x).cuda() 10 | x = torch.linspace(ymin, ymax, num_y).cuda() 11 | y, x = torch.meshgrid(x, y) 12 | x = x.flatten() 13 | y = y.flatten() 14 | 15 | coords = torch.stack([x, y], axis=0) 16 | return coords 17 | 18 | 19 | def cam_to_pixel(points, xbound, ybound): 20 | new_points = torch.zeros_like(points) 21 | new_points[..., 0] = (points[..., 0] - xbound[0]) / xbound[2] 22 | new_points[..., 1] = (points[..., 1] - ybound[0]) / ybound[2] 23 | return new_points 24 | 25 | 26 | def get_rot_2d(yaw): 27 | sin_yaw = torch.sin(yaw) 28 | cos_yaw = torch.cos(yaw) 29 | rot = torch.zeros(list(yaw.shape) + [2, 2]).cuda() 30 | rot[..., 0, 0] = cos_yaw 31 | rot[..., 0, 1] = sin_yaw 32 | rot[..., 1, 0] = -sin_yaw 33 | rot[..., 1, 1] = cos_yaw 34 | return rot 35 | 36 | 37 | -------------------------------------------------------------------------------- /model/HDMapNet/voxel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch_scatter 4 | 5 | 6 | def pad_or_trim_to_np(x, shape, pad_val=0): 7 | shape = np.asarray(shape) 8 | pad = shape - np.minimum(np.shape(x), shape) 9 | zeros = np.zeros_like(pad) 10 | x = np.pad(x, np.stack([zeros, pad], axis=1), constant_values=pad_val) 11 | return x[:shape[0], :shape[1]] 12 | 13 | 14 | def raval_index(coords, dims): 15 | dims = torch.cat((dims, torch.ones(1, device=dims.device)), dim=0)[1:] 16 | dims = torch.flip(dims, dims=[0]) 17 | dims = torch.cumprod(dims, dim=0) / dims[0] 18 | multiplier = torch.flip(dims, dims=[0]) 19 | indices = torch.sum(coords * multiplier, dim=1) 20 | return indices 21 | 22 | 23 | def points_to_voxels( 24 | points_xyz, 25 | points_mask, 26 | grid_range_x, 27 | grid_range_y, 28 | grid_range_z 29 | ): 30 | batch_size, num_points, _ = points_xyz.shape 31 | voxel_size_x = grid_range_x[2] 32 | voxel_size_y = grid_range_y[2] 33 | voxel_size_z = grid_range_z[2] 34 | grid_size = np.asarray([ 35 | (grid_range_x[1]-grid_range_x[0]) / voxel_size_x, 36 | (grid_range_y[1]-grid_range_y[0]) / voxel_size_y, 37 | (grid_range_z[1]-grid_range_z[0]) / voxel_size_z 38 | ]).astype('int32') 39 | voxel_size = np.asarray([voxel_size_x, voxel_size_y, voxel_size_z]) 40 | voxel_size = torch.Tensor(voxel_size).to(points_xyz.device) 41 | num_voxels = grid_size[0] * grid_size[1] * grid_size[2] 42 | grid_offset = torch.Tensor([grid_range_x[0], grid_range_y[0], grid_range_z[0]]).to(points_xyz.device) 43 | shifted_points_xyz = points_xyz - grid_offset 44 | voxel_xyz = shifted_points_xyz / voxel_size 45 | voxel_coords = voxel_xyz.int() 46 | grid_size = torch.from_numpy(grid_size).to(points_xyz.device) 47 | grid_size = grid_size.int() 48 | zeros = torch.zeros_like(grid_size) 49 | voxel_paddings = ((points_mask < 1.0) | 50 | torch.any((voxel_coords >= grid_size) | 51 | (voxel_coords < zeros), dim=-1)) 52 | voxel_indices = raval_index( 53 | torch.reshape(voxel_coords, [batch_size * num_points, 3]), grid_size) 54 | voxel_indices = torch.reshape(voxel_indices, [batch_size, num_points]) 55 | voxel_indices = torch.where(voxel_paddings, 56 | torch.zeros_like(voxel_indices), 57 | voxel_indices) 58 | voxel_centers = ((0.5 + voxel_coords.float()) * voxel_size + grid_offset) 59 | voxel_coords = torch.where(torch.unsqueeze(voxel_paddings, dim=-1), 60 | torch.zeros_like(voxel_coords), 61 | voxel_coords) 62 | voxel_xyz = torch.where(torch.unsqueeze(voxel_paddings, dim=-1), 63 | torch.zeros_like(voxel_xyz), 64 | voxel_xyz) 65 | voxel_paddings = voxel_paddings.float() 66 | 67 | voxel_indices = voxel_indices.long() 68 | points_per_voxel = torch_scatter.scatter_sum( 69 | torch.ones((batch_size, num_points), dtype=voxel_coords.dtype, device=voxel_coords.device) * (1-voxel_paddings), 70 | voxel_indices, 71 | dim=1, 72 | dim_size=num_voxels 73 | ) 74 | 75 | voxel_point_count = torch.gather(points_per_voxel, 76 | dim=1, 77 | index=voxel_indices) 78 | 79 | 80 | voxel_centroids = torch_scatter.scatter_mean( 81 | points_xyz, 82 | voxel_indices, 83 | dim=1, 84 | dim_size=num_voxels) 85 | point_centroids = torch.gather(voxel_centroids, dim=1, index=torch.unsqueeze(voxel_indices, dim=-1).repeat(1, 1, 3)) 86 | local_points_xyz = points_xyz - point_centroids 87 | 88 | result = { 89 | 'local_points_xyz': local_points_xyz, 90 | 'shifted_points_xyz': shifted_points_xyz, 91 | 'point_centroids': point_centroids, 92 | 'points_xyz': points_xyz, 93 | 'grid_offset': grid_offset, 94 | 'voxel_coords': voxel_coords, 95 | 'voxel_centers': voxel_centers, 96 | 'voxel_indices': voxel_indices, 97 | 'voxel_paddings': voxel_paddings, 98 | 'points_mask': 1 - voxel_paddings, 99 | 'num_voxels': num_voxels, 100 | 'grid_size': grid_size, 101 | 'voxel_xyz': voxel_xyz, 102 | 'voxel_size': voxel_size, 103 | 'voxel_point_count': voxel_point_count, 104 | 'points_per_voxel': points_per_voxel 105 | } 106 | 107 | 108 | return result 109 | -------------------------------------------------------------------------------- /model/VPN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .base import CamEncode, BevEncode 5 | from .pointpillar import PointPillarEncoder 6 | from .homography import IPM 7 | 8 | class TransformModule(nn.Module): 9 | def __init__(self,dim, num_view=4): 10 | super(TransformModule, self).__init__() 11 | self.num_view = num_view 12 | self.dim = dim 13 | 14 | self.mat_list = nn.ModuleList() 15 | for i in range(self.num_view): 16 | fc_transform = nn.Sequential( 17 | nn.Linear(self.dim[0] * self.dim[1], self.dim[2] * self.dim[3]), 18 | nn.ReLU(), 19 | nn.Linear(self.dim[2] * self.dim[3], self.dim[2] * self.dim[3]), 20 | nn.ReLU() 21 | ) 22 | self.mat_list += [fc_transform] 23 | 24 | def forward(self, x): 25 | # shape x: B, V, C, H, W 26 | #B, N, C, H, W = x.shape 27 | #x = x.view(B, N, C, H * W) 28 | x = x.view(list(x.size()[:3]) + [self.dim[0] * self.dim[1]]) 29 | view_comb = self.mat_list[0](x[:, 0]) 30 | for index in range(x.size(1))[1:]: 31 | view_comb = view_comb + self.mat_list[index](x[:, index]) 32 | view_comb = view_comb.view(list(view_comb.size()[:2]) + [self.dim[2], self.dim[3]]) 33 | #view_comb = view_comb.view(B, C,self.dim[2], self.dim[3]) 34 | return view_comb 35 | 36 | 37 | class VPNModel(nn.Module): 38 | def __init__(self, outC, camC=64, instance_seg=True, embedded_dim=16, extrinsic=False, lidar=False, xbound=None, ybound=None, zbound=None): 39 | super(VPNModel, self).__init__() 40 | self.camC = camC 41 | self.extrinsic = extrinsic 42 | self.downsample = 16 43 | 44 | self.camencode = CamEncode(camC) 45 | 46 | ipm_xbound = [-60,60,0.6] # -60 60 0.6 //200 47 | ipm_ybound = [-30,30,0.6] # -30 30 0.6 //100 48 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=6, C=self.camC, extrinsic=True) 49 | self.view_fusion = TransformModule(dim=(8, 22,32,88)) 50 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 51 | self.up_sampler_2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 52 | self.lidar = lidar 53 | if lidar: 54 | self.pp = PointPillarEncoder(128, xbound, ybound, zbound) 55 | self.bevencode = BevEncode(inC=camC+128, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim) 56 | else: 57 | self.bevencode = BevEncode(inC=camC, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim) 58 | 59 | 60 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans): 61 | B, N, _, _ = intrins.shape 62 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 63 | # Ks[:, :, :3, :3] = intrins 64 | 65 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 66 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous() 67 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 68 | Ts[:, :, :3, 3] = -trans 69 | RTs = Rs @ Ts 70 | 71 | post_RTs = None 72 | 73 | return Ks, RTs, post_RTs 74 | 75 | def get_cam_feats(self, x): 76 | """Return B x N x D x H/downsample x W/downsample x C 77 | """ 78 | B, N, C, imH, imW = x.shape 79 | 80 | x = x.view(B*N, C, imH, imW) 81 | x = self.camencode(x) 82 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample) 83 | return x 84 | 85 | def forward(self, x, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll): 86 | x = self.get_cam_feats(x) 87 | x = self.view_fusion(x)#4 64 50 100 88 | x = x.unsqueeze(1).repeat(1,4,1,1,1) 89 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans) 90 | topdown = self.ipm(x, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs) 91 | #topdown = x.mean(1) 92 | #topdown = self.up_sampler(x) 93 | topdown = self.up_sampler_2(topdown) 94 | # if self.lidar: 95 | # lidar_feature = self.pp(points, points_mask) 96 | # topdown = torch.cat([topdown, lidar_feature], dim=1) 97 | if self.lidar: 98 | lidar_feature = self.pp(lidar_data, lidar_mask) 99 | topdown = torch.cat([topdown, lidar_feature], dim=1) 100 | return self.bevencode(topdown) 101 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | from .hdmapnet import BiMapper 2 | 3 | 4 | def get_model(method, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, angle_class=36): 5 | 6 | 7 | if method == 'BiMapper': 8 | model = BiMapper(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, 9 | direction_dim=angle_class, lidar=False) 10 | else: 11 | print('no model find') 12 | 13 | return model 14 | -------------------------------------------------------------------------------- /model/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable, Function 4 | from efficientnet_pytorch import EfficientNet 5 | from torchvision.models.resnet import resnet18,resnet34 6 | import numpy as np 7 | import torch.nn.functional as F 8 | class DeformConv2D(nn.Module): 9 | def __init__(self, inc, outc, kernel_size=3, padding=1, bias=None): 10 | super(DeformConv2D, self).__init__() 11 | self.kernel_size = kernel_size 12 | self.padding = padding 13 | self.zero_padding = nn.ZeroPad2d(padding) 14 | self.conv_kernel = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias) 15 | 16 | def forward(self, x, offset): 17 | dtype = offset.data.type() 18 | ks = self.kernel_size 19 | N = offset.size(1) // 2 20 | 21 | # Change offset's order from [x1, x2, ..., y1, y2, ...] to [x1, y1, x2, y2, ...] 22 | # Codes below are written to make sure same results of MXNet implementation. 23 | # You can remove them, and it won't influence the module's performance. 24 | offsets_index = Variable(torch.cat([torch.arange(0, 2*N, 2), torch.arange(1, 2*N+1, 2)]), requires_grad=False).type_as(x).long() 25 | offsets_index = offsets_index.unsqueeze(dim=0).unsqueeze(dim=-1).unsqueeze(dim=-1).expand(*offset.size()) 26 | offset = torch.gather(offset, dim=1, index=offsets_index) 27 | # ------------------------------------------------------------------------ 28 | 29 | if self.padding: 30 | x = self.zero_padding(x) 31 | 32 | # (b, 2N, h, w) 33 | p = self._get_p(offset, dtype) 34 | 35 | # (b, h, w, 2N) 36 | p = p.contiguous().permute(0, 2, 3, 1) 37 | q_lt = Variable(p.data, requires_grad=False).floor() 38 | q_rb = q_lt + 1 39 | 40 | q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long() 41 | q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long() 42 | q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], -1) 43 | q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], -1) 44 | 45 | # (b, h, w, N) 46 | mask = torch.cat([p[..., :N].lt(self.padding)+p[..., :N].gt(x.size(2)-1-self.padding), 47 | p[..., N:].lt(self.padding)+p[..., N:].gt(x.size(3)-1-self.padding)], dim=-1).type_as(p) 48 | mask = mask.detach() 49 | floor_p = p - (p - torch.floor(p)) 50 | p = p*(1-mask) + floor_p*mask 51 | p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1) 52 | 53 | # bilinear kernel (b, h, w, N) 54 | g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:])) 55 | g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:])) 56 | g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:])) 57 | g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:])) 58 | 59 | # (b, c, h, w, N) 60 | x_q_lt = self._get_x_q(x, q_lt, N) 61 | x_q_rb = self._get_x_q(x, q_rb, N) 62 | x_q_lb = self._get_x_q(x, q_lb, N) 63 | x_q_rt = self._get_x_q(x, q_rt, N) 64 | 65 | # (b, c, h, w, N) 66 | x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \ 67 | g_rb.unsqueeze(dim=1) * x_q_rb + \ 68 | g_lb.unsqueeze(dim=1) * x_q_lb + \ 69 | g_rt.unsqueeze(dim=1) * x_q_rt 70 | 71 | x_offset = self._reshape_x_offset(x_offset, ks) 72 | out = self.conv_kernel(x_offset) 73 | 74 | return out 75 | 76 | def _get_p_n(self, N, dtype): 77 | p_n_x, p_n_y = np.meshgrid(range(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), 78 | range(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), indexing='ij') 79 | # (2N, 1) 80 | p_n = np.concatenate((p_n_x.flatten(), p_n_y.flatten())) 81 | p_n = np.reshape(p_n, (1, 2*N, 1, 1)) 82 | p_n = Variable(torch.from_numpy(p_n).type(dtype), requires_grad=False) 83 | 84 | return p_n 85 | 86 | @staticmethod 87 | def _get_p_0(h, w, N, dtype): 88 | p_0_x, p_0_y = np.meshgrid(range(1, h+1), range(1, w+1), indexing='ij') 89 | p_0_x = p_0_x.flatten().reshape(1, 1, h, w).repeat(N, axis=1) 90 | p_0_y = p_0_y.flatten().reshape(1, 1, h, w).repeat(N, axis=1) 91 | p_0 = np.concatenate((p_0_x, p_0_y), axis=1) 92 | p_0 = Variable(torch.from_numpy(p_0).type(dtype), requires_grad=False) 93 | 94 | return p_0 95 | 96 | def _get_p(self, offset, dtype): 97 | N, h, w = offset.size(1)//2, offset.size(2), offset.size(3) 98 | 99 | # (1, 2N, 1, 1) 100 | p_n = self._get_p_n(N, dtype) 101 | # (1, 2N, h, w) 102 | p_0 = self._get_p_0(h, w, N, dtype) 103 | p = p_0 + p_n + offset 104 | return p 105 | 106 | def _get_x_q(self, x, q, N): 107 | b, h, w, _ = q.size() 108 | padded_w = x.size(3) 109 | c = x.size(1) 110 | # (b, c, h*w) 111 | x = x.contiguous().view(b, c, -1) 112 | 113 | # (b, h, w, N) 114 | index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y 115 | # (b, c, h*w*N) 116 | index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1) 117 | 118 | x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N) 119 | 120 | return x_offset 121 | 122 | @staticmethod 123 | def _reshape_x_offset(x_offset, ks): 124 | b, c, h, w, N = x_offset.size() 125 | x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1) 126 | x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks) 127 | 128 | return x_offset 129 | 130 | class Up(nn.Module): 131 | def __init__(self, in_channels, out_channels, scale_factor=2): 132 | super().__init__() 133 | 134 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear', 135 | align_corners=True) 136 | 137 | self.conv = nn.Sequential( 138 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), 139 | nn.BatchNorm2d(out_channels), 140 | nn.ReLU(inplace=True), 141 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False), 142 | nn.BatchNorm2d(out_channels), 143 | nn.ReLU(inplace=True) 144 | ) 145 | # self.offset = nn.Conv2d(out_channels, 18, kernel_size=3, padding=1, bias=False) 146 | # self.deconv =DeformConv2D(out_channels, out_channels, kernel_size=3, padding=1) 147 | # self.bn4 = nn.BatchNorm2d(out_channels) 148 | 149 | 150 | def forward(self, x1, x2): 151 | x1 = self.up(x1) 152 | x1 = torch.cat([x2, x1], dim=1) 153 | x3 = self.conv(x1) 154 | # offsets = self.offset(x3) 155 | # x4 = F.relu(self.deconv(x3, offsets)) 156 | # x5 = self.bn4(x4) 157 | return x3 158 | 159 | 160 | class CamEncode(nn.Module): 161 | def __init__(self, C): 162 | super(CamEncode, self).__init__() 163 | self.C = C 164 | 165 | self.trunk = EfficientNet.from_pretrained("efficientnet-b0") 166 | #self.trunk = resnet34(pretrained=False, zero_init_residual=True) 167 | self.up1 = Up(320+112, self.C) 168 | self.up2 = Up(40 + 64, self.C) 169 | self.up3 = Up(24 + 64, self.C) 170 | def get_eff_depth(self, x): 171 | # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231 172 | endpoints = dict() 173 | 174 | # Stem 175 | x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x))) 176 | prev_x = x 177 | 178 | # Blocks 179 | for idx, block in enumerate(self.trunk._blocks): 180 | drop_connect_rate = self.trunk._global_params.drop_connect_rate 181 | if drop_connect_rate: 182 | drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate 183 | x = block(x, drop_connect_rate=drop_connect_rate) # 卷积过程 184 | if prev_x.size(2) > x.size(2): 185 | endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x 186 | prev_x = x 187 | 188 | # Head 189 | endpoints['reduction_{}'.format(len(endpoints) + 1)] = x 190 | # 1 24 16 64 176 191 | # 2 24 24 32 88 192 | # 3 24 40 16 44 193 | # 4 24 112 8 22 194 | # 5 24 320 4 11 195 | x = self.up1(endpoints['reduction_5'], endpoints['reduction_4']) # 24 64 8 22 196 | x_1 = self.up2(x, endpoints['reduction_3']) # 24 64 16 44 197 | x_2 = self.up3(x_1,endpoints['reduction_2'] )#24 24 32 88 198 | # x_1 = endpoints['reduction_3'] # 64 16 44 199 | # x_2 = endpoints['reduction_2'] # 64 32 88 200 | return x,x_1,x_2 201 | 202 | 203 | def forward(self, x): 204 | return self.get_eff_depth(x) 205 | 206 | 207 | class BevEncode(nn.Module): 208 | def __init__(self, inC, outC, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=37): 209 | super(BevEncode, self).__init__() 210 | trunk = resnet18(pretrained=False, zero_init_residual=True) 211 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, 212 | bias=False) 213 | self.bn1 = trunk.bn1 214 | self.relu = trunk.relu 215 | 216 | self.layer1 = trunk.layer1 217 | self.layer2 = trunk.layer2 218 | self.layer3 = trunk.layer3 219 | 220 | self.up1 = Up(64 + 256, 256, scale_factor=4) 221 | self.up2 = nn.Sequential( 222 | nn.Upsample(scale_factor=2, mode='bilinear', 223 | align_corners=True), 224 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 225 | nn.BatchNorm2d(128), 226 | nn.ReLU(inplace=True), 227 | nn.Conv2d(128, outC, kernel_size=1, padding=0), 228 | ) 229 | 230 | self.instance_seg = instance_seg 231 | if instance_seg: 232 | self.up1_embedded = Up(64 + 256, 256, scale_factor=4) 233 | self.up2_embedded = nn.Sequential( 234 | nn.Upsample(scale_factor=2, mode='bilinear', 235 | align_corners=True), 236 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 237 | nn.BatchNorm2d(128), 238 | nn.ReLU(inplace=True), 239 | nn.Conv2d(128, embedded_dim, kernel_size=1, padding=0), 240 | ) 241 | 242 | self.direction_pred = direction_pred 243 | if direction_pred: 244 | self.up1_direction = Up(64 + 256, 256, scale_factor=4) 245 | self.up2_direction = nn.Sequential( 246 | nn.Upsample(scale_factor=2, mode='bilinear', 247 | align_corners=True), 248 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 249 | nn.BatchNorm2d(128), 250 | nn.ReLU(inplace=True), 251 | nn.Conv2d(128, direction_dim, kernel_size=1, padding=0), 252 | ) 253 | 254 | def forward(self, x): 255 | x = self.conv1(x)# 64 100 200 256 | x = self.bn1(x) 257 | x = self.relu(x) 258 | 259 | x1 = self.layer1(x)#64 100 200 260 | x = self.layer2(x1)#128 50 100 261 | x2 = self.layer3(x)#256 25 50 262 | 263 | x = self.up1(x2, x1) #256 50 100 264 | x = self.up2(x) 265 | 266 | if self.instance_seg: 267 | x_embedded = self.up1_embedded(x2, x1) 268 | x_embedded = self.up2_embedded(x_embedded) 269 | else: 270 | x_embedded = None 271 | 272 | if self.direction_pred: 273 | x_direction = self.up1_embedded(x2, x1) 274 | x_direction = self.up2_direction(x_direction) 275 | else: 276 | x_direction = None 277 | 278 | return x, x1, x_direction 279 | 280 | 281 | class CamEncode2(nn.Module): 282 | def __init__(self,c): 283 | super(CamEncode2, self).__init__() 284 | trunk = resnet34(pretrained=False, zero_init_residual=True) 285 | self.c=c 286 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 287 | bias=False) 288 | self.layer1 = trunk.layer1 289 | self.layer2 = trunk.layer2 290 | self.layer3 = trunk.layer3 291 | self.layer4 = trunk.layer4 292 | 293 | self.up1 = Up(64 + 256, 256, scale_factor=4) 294 | self.up2 = nn.Sequential( 295 | # nn.Upsample(scale_factor=2, mode='bilinear', 296 | # align_corners=True), 297 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 298 | nn.BatchNorm2d(128), 299 | nn.ReLU(inplace=True), 300 | nn.Conv2d(128, self.c, kernel_size=1, padding=0), 301 | ) 302 | self.up3 = nn.Sequential( 303 | nn.Conv2d(512, 128, kernel_size=3, padding=1, bias=False), 304 | nn.BatchNorm2d(128), 305 | nn.ReLU(inplace=True), 306 | nn.Conv2d(128, self.c, kernel_size=1, padding=0), 307 | ) 308 | self.up4 = Up(64 + 256,64, scale_factor=2) 309 | self.up5 = Up(64 + 128, 64, scale_factor=2) 310 | 311 | def forward(self, x): 312 | 313 | x = self.conv1(x) 314 | x1 = self.layer1(x)#64 64 176 315 | x = self.layer2(x1)#128 32 88 316 | x2 = self.layer3(x)# 256 16 44 317 | x3 = self.layer4(x2)#512 8 22 318 | x3 = self.up3(x3) 319 | x4 = self.up4(x3,x2)#64 16 44 320 | x5 = self.up5(x4,x)#64 32 88 321 | # x = self.up1(x2, x)#32 88 322 | # x = self.up2(x) 323 | 324 | return x3,x4,x5 325 | 326 | class STNNet(nn.Module): 327 | def __init__(self): 328 | super(STNNet, self).__init__() 329 | # Spatial transformer localization-network 330 | self.localization = nn.Sequential( 331 | nn.Conv2d(64, 10, kernel_size=7), 332 | nn.MaxPool2d(2, stride=2), 333 | nn.ReLU(True), 334 | nn.Conv2d(10, 10, kernel_size=7), 335 | nn.MaxPool2d(2, stride=2), 336 | nn.ReLU(True) 337 | ) 338 | 339 | # Regressor for the 3 * 2 affine matrix 340 | self.fc_loc = nn.Sequential( 341 | nn.Linear(10 * 20 * 45, 32), 342 | nn.ReLU(True), 343 | 344 | nn.Linear(32, 3 * 2) 345 | ) 346 | 347 | # Initialize the weights/bias with identity transformation 348 | self.fc_loc[2].weight.data.zero_() 349 | self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)) 350 | 351 | def forward(self, x):#4 64 100 200 352 | xs = self.localization(x)#4 10 20 45 353 | b,c,h,w = xs.shape 354 | xs = xs.view(-1, c * h * w) 355 | theta = self.fc_loc(xs) 356 | theta = theta.view(-1, 2, 3) 357 | 358 | grid = F.affine_grid(theta, x.size()) 359 | x = F.grid_sample(x, grid) 360 | 361 | return x 362 | 363 | if __name__ == '__main__': 364 | stn = STNNet() 365 | x = torch.ones((4,64,100,200)) 366 | y = stn(x) -------------------------------------------------------------------------------- /model/hdmapnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .homography import IPM 5 | 6 | from .base import CamEncode, BevEncode 7 | from data.utils import gen_dx_bx 8 | import torch.nn.functional as F 9 | 10 | class IPMTransformation(nn.Module): 11 | def __init__(self ,n_views=6): 12 | super(IPMTransformation, self).__init__() 13 | self.n_views = n_views 14 | self.hw_mat = [] 15 | self.up1 = [] 16 | self.up2 = [] 17 | 18 | 19 | for i in range(self.n_views): 20 | fc_transform = nn.Sequential( 21 | nn.Conv2d(64, 128, kernel_size=1, padding=0), 22 | nn.ReLU(), 23 | nn.Dropout(0.5), 24 | nn.Conv2d(128, 128, kernel_size=1, padding=0), 25 | nn.ReLU(), 26 | nn.Dropout(0.5), 27 | ) 28 | self.hw_mat.append(fc_transform) 29 | self.conv1 = nn.Sequential( 30 | nn.Conv2d(128+64, 128, kernel_size=3, padding=1, bias=False), 31 | nn.BatchNorm2d(128), 32 | nn.ReLU(inplace=True), 33 | nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), 34 | nn.BatchNorm2d(128), 35 | nn.ReLU(inplace=True) 36 | ) 37 | self.up1.append(self.conv1) 38 | self.conv2 = nn.Sequential( 39 | nn.Conv2d(128 + 64, 64, kernel_size=3, padding=1, bias=False), 40 | nn.BatchNorm2d(64), 41 | nn.ReLU(inplace=True), 42 | nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False), 43 | nn.BatchNorm2d(64), 44 | nn.ReLU(inplace=True) 45 | ) 46 | self.up2.append(self.conv2) 47 | # fc_transform = nn.Sequential( 48 | # nn.Conv2d(768, 768, kernel_size=1, padding=0), 49 | # nn.ReLU(), 50 | # nn.Dropout(0.5), 51 | # nn.Conv2d(768, 768, kernel_size=1, padding=0), 52 | # nn.ReLU(), 53 | # nn.Dropout(0.5), 54 | # ) 55 | # self.hw_mat.append(fc_transform) 56 | # self.conv1 = nn.Sequential( 57 | # nn.Conv2d(768+384, 384, kernel_size=3, padding=1, bias=False), 58 | # nn.LayerNorm(384), 59 | # nn.ReLU(inplace=True), 60 | # nn.Conv2d(384, 384, kernel_size=3, padding=1, bias=False), 61 | # nn.LayerNorm(384), 62 | # nn.ReLU(inplace=True) 63 | # ) 64 | # self.up1.append(self.conv1) 65 | # self.conv2 = nn.Sequential( 66 | # nn.Conv2d(384 + 192, 64, kernel_size=3, padding=1, bias=False), 67 | # nn.LayerNorm(64), 68 | # nn.ReLU(inplace=True), 69 | # nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False), 70 | # nn.LayerNorm(64), 71 | # nn.ReLU(inplace=True) 72 | # ) 73 | # self.up2.append(self.conv2) 74 | self.hw_mat = nn.ModuleList(self.hw_mat) 75 | self.up1 = nn.ModuleList(self.up1) 76 | self.up2 = nn.ModuleList(self.up2) 77 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 78 | def forward(self, x1,x2,x3): 79 | B, N, C, H, W = x1.shape 80 | outputs = [] 81 | for i in range(N): 82 | tt = x1[:, i] #+ self.position[i](feat[:, i]) 83 | output_1 = self.hw_mat[i](tt)#4 128 8 22 84 | output_1 = self.up_sampler(output_1)#4 128 16 44 85 | output_2 = self.up1[i](torch.cat([output_1,x2[:,i]],dim=1))# 86 | output_2 = self.up_sampler(output_2) 87 | output_3 = self.up2[i](torch.cat([output_2,x3[:, i]], dim=1)) 88 | outputs.append(output_3) 89 | outputs = torch.stack(outputs, 1) 90 | return outputs 91 | 92 | class TransformModule(nn.Module): 93 | def __init__(self, fv_size, bv_size, n_view=6): 94 | super(TransformModule, self).__init__() 95 | self.num_view = n_view 96 | fv_dim = fv_size[0] * fv_size[1] 97 | bv_dim = bv_size[0] * bv_size[1] 98 | self.bv_size = bv_size 99 | self.mat_list = nn.ModuleList() 100 | for i in range(self.num_view): 101 | fc_transform = nn.Sequential( 102 | nn.Linear(fv_dim, bv_dim), 103 | nn.ReLU(), 104 | nn.Linear(bv_dim, bv_dim), 105 | nn.ReLU() 106 | ) 107 | self.mat_list += [fc_transform] 108 | 109 | def forward(self, x): 110 | # shape x: B, V, C, H, W 111 | # x = x.view(list(x.size()[:3]) + [self.dim * self.dim]) 112 | B, N, C, H, W = x.shape 113 | x = x.view(B, N, C, H * W) 114 | view_comb = self.mat_list[0](x[:, 0]) 115 | for index in range(N)[1:]: 116 | view_comb = view_comb + self.mat_list[index](x[:, index]) 117 | view_comb = view_comb.view(list(view_comb.size()[:2]) + [self.bv_size[0], self.bv_size[1]]) 118 | #view_comb = view_comb.unsqueeze(dim=1).repeat(1,self.num_view,1,1,1) 119 | return view_comb 120 | 121 | 122 | class BiMapper(nn.Module): 123 | def __init__(self, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=36, 124 | lidar=False): 125 | super(BiMapper, self).__init__() 126 | self.camC = 64 127 | self.downsample = 16 128 | self.merge_flag = data_conf["merge"] 129 | dx, bx, nx = gen_dx_bx(data_conf['xbound'], data_conf['ybound'], data_conf['zbound']) 130 | self.view = 6 131 | 132 | self.camencode = CamEncode(self.camC) # 初始化对象origin 133 | self.camencode_2 = CamEncode(self.camC) # 初始化对象img 134 | fv_size = (data_conf['image_size_i'][0] // self.downsample, data_conf['image_size_i'][1] // self.downsample) # 下采样后图片大小 135 | self.ipm_tran = IPMTransformation(n_views=self.view) 136 | 137 | bv_size_2 = (50,100) 138 | self.view_fore = TransformModule(fv_size=fv_size, bv_size=bv_size_2,n_view=self.view) 139 | 140 | ipm_xbound = [-30, 30, 0.3] # -60 60 0.6 //200 141 | ipm_ybound = [-15, 15, 0.3] # -30 30 0.6 //100 142 | 143 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=6, C=self.camC, extrinsic=True) 144 | 145 | self.up_sampler_fore = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 146 | 147 | self.merge = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, padding=0),nn.BatchNorm2d(64),nn.ReLU(inplace=True)) 148 | # 融合后上采样 149 | self.up_sampler_merge = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 150 | #单分支损失 151 | self.conv = nn.Conv2d(64, 4, kernel_size=1, padding=0) 152 | self.conv1 = nn.Conv2d(4, 64, kernel_size=1, padding=0) 153 | 154 | self.lidar = lidar 155 | 156 | self.i_one = nn.Conv2d(64, 2, kernel_size=1, padding=0) 157 | self.f_one = nn.Conv2d(64, 2, kernel_size=1, padding=0) 158 | self.mode = 'cross' 159 | self.loss_fn_i = torch.nn.BCEWithLogitsLoss()#内部含有sigmod 160 | self.loss_fn_f = torch.nn.BCEWithLogitsLoss()#内部含有sigmod 161 | 162 | self.bevencode_merge = BevEncode(inC=self.camC, outC=data_conf['num_channels'], instance_seg=True, 163 | embedded_dim=embedded_dim, direction_pred=direction_pred, 164 | direction_dim=direction_dim + 1) 165 | self.B = data_conf['B'] 166 | self.N = 6 167 | self.xmin, self.xmax = -10, 10 168 | self.num_x = 300 169 | self.zmin, self.zmax = 0, 20 170 | self.num_z = 600 171 | 172 | 173 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans): 174 | B, N, _, _ = intrins.shape 175 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 176 | # print(intrins[0][0]) 177 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 178 | rots = torch.transpose(rots, dim0=2, dim1=3) 179 | Rs[:, :, :3, :3] = rots 180 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1) 181 | Ts[:, :, :3, 3] = -trans#-trans 182 | RTs = Rs @ Ts 183 | 184 | post_RTs = None 185 | 186 | return Ks, RTs, post_RTs 187 | 188 | def get_cam_feats(self, x): 189 | B, N, C, imH, imW = x.shape 190 | x = x.view(B * N, C, imH, imW) 191 | x, x_1,x_2 = self.camencode_2(x) 192 | x = x.view(B, N, self.camC, imH // self.downsample, imW // self.downsample) 193 | x_1 = x_1.view(B, N, self.camC, 2 * imH // self.downsample, 2 * imW // self.downsample) 194 | x_2 = x_2.view(B, N, self.camC, 4 * imH // self.downsample, 4 * imW // self.downsample) 195 | return x, x_1,x_2 196 | 197 | def get_cam_origin_fea(self, x): 198 | B, N, C, imH, imW = x.shape 199 | x = x.view(B * N, C, imH, imW) 200 | x, x_1, x_2 = self.camencode(x) 201 | x = x.view(B, N, self.camC, imH // self.downsample, imW // self.downsample) 202 | x_1 = x_1.view(B, N, self.camC, 2 * imH // self.downsample, 2 * imW // self.downsample) 203 | x_2 = x_2.view(B, N, self.camC, 4 * imH // self.downsample, 4 * imW // self.downsample) 204 | return x, x_1, x_2 205 | 206 | 207 | def cam_loss(self,v): 208 | 209 | v2 = self.conv(v) 210 | v3 = self.conv1(v2) 211 | return v2,v3+v 212 | 213 | 214 | def mutual_loss(self,v_i,v_f): 215 | if self.mode=="kl" : 216 | i_m = torch.sum(v_i, dim=1) # 4 100 200 217 | f_m = torch.sum(v_f, dim=1) 218 | 219 | B, h, w = i_m.shape 220 | i_m_flat = i_m.view(B, h * w) 221 | i_m_soft = F.softmax(i_m_flat, dim=-1) 222 | i_m_soft = i_m_soft.view(B, h, w) 223 | 224 | f_m_flat = f_m.view(B, h * w) 225 | f_m_soft = F.softmax(f_m_flat, dim=-1) 226 | f_m_soft = f_m_soft.view(B, h, w) 227 | 228 | mask = torch.ones_like(i_m_soft) * 1e-7 229 | i_m_soft = i_m_soft + mask 230 | f_m_soft = f_m_soft + mask 231 | kl_2 = F.kl_div(f_m_soft.log(), i_m_soft, reduction='mean') # 第一个为预测分布，第二个为真实分布 232 | kl_1 = F.kl_div(i_m_soft.log(), f_m_soft, reduction='mean') # 第一个为预测分布，第二个为真实分布 233 | if self.mode == "cross": 234 | i_m = self.i_one(v_i) 235 | f_m = self.f_one(v_f)#4 2 100 200 236 | i_m_soft = F.softmax(i_m,dim=1) 237 | f_m_soft = F.softmax(f_m, dim=1) 238 | i_m_mask = torch.ones_like(i_m_soft) 239 | i_m_mask[:,0] = i_m_soft[:,0]>0.5 240 | i_m_mask[:,1] = i_m_soft[:, 1] > 0.5 241 | kl_2 = self.loss_fn_i(f_m_soft,i_m_mask) 242 | f_m_mask = torch.ones_like(f_m_soft) 243 | f_m_mask[:, 0] = f_m_soft[:, 0] > 0.5 244 | f_m_mask[:, 1] = f_m_soft[:, 1] > 0.5 245 | kl_1 = self.loss_fn_f(i_m_soft, f_m_mask)#vpn为真值 246 | 247 | return kl_1,kl_2 248 | 249 | def forward(self, img_origin, img, trans, rots, intrins, post_trans, post_rots, car_trans, 250 | yaw_pitch_roll): 251 | 252 | 253 | x_31, x_41,x_51 = self.get_cam_origin_fea(img_origin) # 4 6 64 8 22plane_fea_1.contiguous() 254 | x_5 = self.ipm_tran(x_31, x_41,x_51) # 4 6 64 32 88 255 | 256 | x, x_1,x_2 = self.get_cam_feats(img) # 4 6 64 8 22 257 | x_6 = self.view_fore(x)# 4 64 50 100 258 | x_6 = self.up_sampler_fore(x_6)# 100 200 259 | 260 | 261 | b,n,c,h,w = x_5.shape 262 | x_5 = x_5.view(-1,c,h,w) 263 | x_view_l,x_51 = self.cam_loss(x_5) 264 | x_51 = x_51.view(b, n, c, h, w) 265 | 266 | 267 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans) 268 | x_71 = self.ipm(x_51, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs)#100,200 269 | 270 | 271 | if True: 272 | topdown = 0.1*x_71 + x_6 # 273 | topdown = self.merge(topdown)#xg 274 | mu_loss,mu_loss_i = self.mutual_loss(x_71,x_6) 275 | topdown = self.up_sampler_merge(topdown) # 200,400 276 | semantic,x1, direction = self.bevencode_merge(topdown)# 277 | 278 | 279 | 280 | return semantic,x_view_l,mu_loss,mu_loss_i#,mu_loss_m#,embedding#, direction 281 | 282 | 283 | 284 | 285 | 286 | 287 | -------------------------------------------------------------------------------- /model/homography.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import cv2 6 | from .base import STNNet 7 | CAM_FL = 0 8 | CAM_F = 1 9 | CAM_FR = 2 10 | CAM_BL = 3 11 | CAM_B = 4 12 | CAM_BR = 5 13 | 14 | 15 | # ========================================================= 16 | # Projections 17 | # ========================================================= 18 | def rotation_from_euler(rolls, pitchs, yaws, cuda=True): 19 | """ 20 | Get rotation matrix 21 | Args: 22 | roll, pitch, yaw: In degrees 23 | 24 | Returns: 25 | R: [B, 4, 4] 26 | """ 27 | B = len(rolls) 28 | 29 | si, sj, sk = torch.sin(rolls), torch.sin(pitchs), torch.sin(yaws) 30 | ci, cj, ck = torch.cos(rolls), torch.cos(pitchs), torch.cos(yaws) 31 | cc, cs = ci * ck, ci * sk 32 | sc, ss = si * ck, si * sk 33 | 34 | R = torch.eye(4).unsqueeze(0).repeat(B, 1, 1) 35 | if cuda: 36 | R = R.cuda() 37 | R[:, 0, 0] = cj * ck 38 | R[:, 0, 1] = sj * sc - cs 39 | R[:, 0, 2] = sj * cc + ss 40 | R[:, 1, 0] = cj * sk 41 | R[:, 1, 1] = sj * ss + cc 42 | R[:, 1, 2] = sj * cs - sc 43 | R[:, 2, 0] = -sj 44 | R[:, 2, 1] = cj * si 45 | R[:, 2, 2] = cj * ci 46 | return R 47 | 48 | 49 | def perspective(cam_coords, proj_mat, h, w, extrinsic, offset=None): 50 | """ 51 | P = proj_mat @ (x, y, z, 1) 52 | Project cam2pixel 53 | 54 | Args: 55 | cam_coords: [B, 4, npoints] 56 | proj_mat: [B, 4, 4] 57 | 58 | Returns: 59 | pix coords: [B, h, w, 2] 60 | """ 61 | eps = 1e-7 62 | pix_coords = proj_mat @ cam_coords#24 4 20000 = 24 4 4 * 4 20000 63 | 64 | N, _, _ = pix_coords.shape 65 | 66 | if extrinsic: 67 | # pix_coords[:, 0] += offset[0] / 2 68 | # pix_coords[:, 2] -= offset[1] / 8 69 | # pix_coords[:, 0] = pix_coords[:, 0] / pix_coords[:, 1] 70 | # pix_coords[:, 2] = pix_coords[:, 2] / pix_coords[:, 1] 71 | pix_coords = torch.stack([pix_coords[:, 2], pix_coords[:, 0]], axis=1) 72 | else: 73 | pix_coords = pix_coords[:, :2, :] / (pix_coords[:, 2, :][:, None, :] + eps) 74 | pix_coords = pix_coords.view(N, 2, h, w) 75 | pix_coords = pix_coords.permute(0, 2, 3, 1).contiguous() 76 | return pix_coords 77 | 78 | 79 | def bilinear_sampler(imgs, pix_coords): 80 | """ 81 | Construct a new image by bilinear sampling from the input image. 82 | Args: 83 | imgs: [B, H, W, C] 84 | pix_coords: [B, h, w, 2] 85 | :return: 86 | sampled image [B, h, w, c] 87 | """ 88 | B, img_h, img_w, img_c = imgs.shape 89 | B, pix_h, pix_w, pix_c = pix_coords.shape 90 | out_shape = (B, pix_h, pix_w, img_c) 91 | 92 | pix_x, pix_y = torch.split(pix_coords, 1, dim=-1) # [B, pix_h, pix_w, 1] 93 | 94 | #将真实坐标换为像素坐标 95 | pix_x = (pix_x-3)/0.3 96 | pix_y = (pix_y+5)/0.3 97 | 98 | 99 | # pix_x = pix_x/ 0.1 + 44 100 | # pix_y = pix_y/ 0.5 + 16 101 | 102 | # Rounding 103 | pix_x0 = torch.floor(pix_x) 104 | pix_x1 = pix_x0 + 1 105 | pix_y0 = torch.floor(pix_y) 106 | pix_y1 = pix_y0 + 1 107 | 108 | # Clip within image boundary 109 | y_max = (img_h - 1)#32 110 | x_max = (img_w - 1)#88 111 | pix_x = torch.clip(pix_x, 0, x_max) # z, 112 | pix_y = torch.clip(pix_y, 0, y_max) # x, 113 | 114 | pix_x0 = torch.clip(pix_x0, 0, x_max)#z, 115 | pix_y0 = torch.clip(pix_y0, 0, y_max)#x, 116 | pix_x1 = torch.clip(pix_x1, 0, x_max)#x_max 117 | pix_y1 = torch.clip(pix_y1, 0, y_max)#0, y_max 118 | 119 | 120 | # Weights [B, pix_h, pix_w, 1] 121 | wt_x0 = pix_x1 - pix_x 122 | wt_x1 = pix_x - pix_x0 123 | wt_y0 = pix_y1 - pix_y 124 | wt_y1 = pix_y - pix_y0 125 | 126 | # indices in the image to sample from 127 | dim = img_w 128 | 129 | # Apply the lower and upper bound pix coord 130 | base_y0 = pix_y0 * dim 131 | base_y1 = pix_y1 * dim 132 | 133 | # 4 corner vert ices 134 | idx00 = (pix_x0 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long() 135 | idx01 = (pix_x0 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long() 136 | idx10 = (pix_x1 + base_y0).view(B, -1, 1).repeat(1, 1, img_c).long() 137 | idx11 = (pix_x1 + base_y1).view(B, -1, 1).repeat(1, 1, img_c).long() 138 | 139 | # Gather pixels from image using vertices 140 | imgs_flat = imgs.reshape([B, -1, img_c]) 141 | 142 | im00 = torch.gather(imgs_flat, 1, idx00).reshape(out_shape) 143 | im01 = torch.gather(imgs_flat, 1, idx01).reshape(out_shape) 144 | im10 = torch.gather(imgs_flat, 1, idx10).reshape(out_shape) 145 | im11 = torch.gather(imgs_flat, 1, idx11).reshape(out_shape) 146 | 147 | # Apply weights [pix_h, pix_w, 1] 148 | w00 = wt_x0 * wt_y0 149 | w01 = wt_x0 * wt_y1 150 | w10 = wt_x1 * wt_y0 151 | w11 = wt_x1 * wt_y1 152 | output = w00 * im00 + w01 * im01 + w10 * im10 + w11 * im11 153 | return output 154 | 155 | 156 | def plane_grid(xbound, ybound, zs, yaws, rolls, pitchs, h_w, cuda=True): 157 | B = len(zs) 158 | 159 | xmin, xmax = xbound[0], xbound[1]#-30 30 160 | num_x = int((xbound[1] - xbound[0]) / xbound[2]) 161 | ymin, ymax = ybound[0], ybound[1] 162 | num_y = int((ybound[1] - ybound[0]) / ybound[2]) 163 | 164 | # y = torch.linspace(xmin, xmax, num_x, dtype=torch.double).cuda() 165 | # x = torch.linspace(ymin, ymax, num_y, dtype=torch.double).cuda() 166 | y = torch.linspace(xmin, xmax, num_x) 167 | x = torch.linspace(ymin, ymax, num_y) 168 | if cuda: 169 | x = x.cuda() 170 | y = y.cuda() 171 | 172 | y, x = torch.meshgrid(x, y) 173 | 174 | x = x.flatten() 175 | y = y.flatten() 176 | 177 | x = x.unsqueeze(0).repeat(B, 1) 178 | y = y.unsqueeze(0).repeat(B, 1) 179 | 180 | # z = torch.ones_like(x, dtype=torch.double).cuda() * zs.view(-1, 1) 181 | # d = torch.ones_like(x, dtype=torch.double).cuda() 182 | z = torch.ones_like(x) 183 | z = z*h_w 184 | d = torch.ones_like(x) 185 | if cuda: 186 | z = z.cuda() 187 | d = d.cuda() 188 | 189 | coords = torch.stack([x, y, z, d], axis=1) 190 | 191 | rotation_matrix = rotation_from_euler(pitchs, rolls, yaws, cuda) 192 | 193 | coords = rotation_matrix @ coords 194 | coords_v = coords.cpu().numpy() 195 | return coords 196 | 197 | 198 | def ipm_from_parameters(image, xyz, K, RT, target_h, target_w, extrinsic, post_RT=None): 199 | """ 200 | :param image: [B, H, W, C] 201 | :param xyz: [B, 4, npoints] 202 | :param K: [B, 4, 4] 203 | :param RT: [B, 4, 4] 204 | :param target_h: int 205 | :param target_w: int 206 | :return: warped_images: [B, target_h, target_w, C] 207 | """ 208 | P = K @ RT#4 6 4 4 209 | if post_RT is not None: 210 | P = post_RT @ P 211 | P = P.reshape(-1, 4, 4)#24 4 4 212 | pixel_coords = perspective(xyz, P, target_h, target_w, extrinsic, image.shape[1:3])#转为相机坐标系 213 | image2 = bilinear_sampler(image, pixel_coords) 214 | 215 | image2 = image2.type_as(image) 216 | return image2 217 | 218 | 219 | class PlaneEstimationModule(nn.Module): 220 | def __init__(self, N, C): 221 | super(PlaneEstimationModule, self).__init__() 222 | self.max_pool = nn.AdaptiveMaxPool2d(1) 223 | self.linear = nn.Linear(N*C, 3) 224 | 225 | self.linear.weight.data.fill_(0.) 226 | self.linear.bias.data.fill_(0.) 227 | 228 | def forward(self, x): 229 | B, N, C, H, W = x.shape 230 | x = x.view(B*N, C, H, W) 231 | x = self.max_pool(x) 232 | x = x.view(B, N*C) 233 | x = self.linear(x) 234 | z, pitch, roll = x[:, 0], x[:, 1], x[:, 2] 235 | return z, pitch, roll 236 | 237 | 238 | class IPM(nn.Module): 239 | def __init__(self, xbound, ybound, N, C, z_roll_pitch=False, visual=False, extrinsic=False, cuda=True): 240 | super(IPM, self).__init__() 241 | self.visual = visual 242 | self.z_roll_pitch = z_roll_pitch 243 | self.xbound = xbound 244 | self.ybound = ybound 245 | self.extrinsic = extrinsic 246 | self.w = int((xbound[1] - xbound[0]) / xbound[2]) 247 | self.h = int((ybound[1] - ybound[0]) / ybound[2]) 248 | self.conv = nn.Conv2d(128, 64, kernel_size=1, padding=0) 249 | if z_roll_pitch: 250 | self.plane_esti = PlaneEstimationModule(N, C) 251 | else: 252 | zs = torch.tensor([0]).cuda() 253 | yaws = torch.tensor([0]).cuda() 254 | rolls = torch.tensor([0]).cuda() 255 | pitchs = torch.tensor([0]).cuda() 256 | self.planes_1 = plane_grid(self.xbound, self.ybound, zs, yaws, rolls, pitchs,0.5)[0] 257 | #self.planes_2 = plane_grid(self.xbound, self.ybound, zs, yaws, rolls, pitchs, 1.3)[0] 258 | 259 | tri_mask = np.zeros((self.h, self.w)) 260 | vertices = np.array([[0, 0], [0, self.h], [self.w, self.h]], np.int32) 261 | pts = vertices.reshape((-1, 1, 2)) 262 | cv2.fillPoly(tri_mask, [pts], color=1.) 263 | self.tri_mask = torch.tensor(tri_mask[None, :, :, None]) 264 | self.flipped_tri_mask = torch.flip(self.tri_mask, [2]).bool() 265 | if cuda: 266 | self.tri_mask = self.tri_mask.cuda() 267 | self.flipped_tri_mask = self.flipped_tri_mask.cuda() 268 | self.tri_mask = self.tri_mask.bool() 269 | # self.STN_n = [] 270 | # for i in range(6): 271 | # self.STN_n.append(STNNet()) 272 | # self.STN_n = nn.ModuleList(self.STN_n) 273 | def mask_warped(self, warped_fv_images): 274 | warped_fv_images[:, CAM_F, :, :self.w//2, :] *= 0 # CAM_FRONT 275 | warped_fv_images[:, CAM_FL] *= self.flipped_tri_mask # CAM_FRONT_LEFT 276 | warped_fv_images[:, CAM_FR] *= ~self.tri_mask # CAM_FRONT_RIGHT 277 | warped_fv_images[:, CAM_B, :, self.w//2:, :] *= 0 # CAM_BACK 278 | warped_fv_images[:, CAM_BL] *= self.tri_mask # CAM_BACK_LEFT 279 | warped_fv_images[:, CAM_BR] *= ~self.flipped_tri_mask # CAM_BACK_RIGHT 280 | return warped_fv_images 281 | def STN_enhance(self,x): 282 | #b,n,h,w,c = x.shape 283 | x = x.permute(0, 1, 4, 2, 3).contiguous() 284 | outputs = [] 285 | for i in range(6): 286 | x_i = self.STN_n[i](x[:,i,:,:,:]) 287 | outputs.append(x_i) 288 | outputs = torch.stack(outputs, 1) 289 | return outputs 290 | def forward(self, images, Ks, RTs, translation, yaw_roll_pitch, post_RTs=None): 291 | images = images.permute(0, 1, 3, 4, 2).contiguous() 292 | B, N, H, W, C = images.shape 293 | 294 | if self.z_roll_pitch: 295 | # z, roll, pitch = self.plane_esti(images) 296 | zs = translation[:, 2] 297 | rolls = yaw_roll_pitch[:, 1] 298 | pitchs = yaw_roll_pitch[:, 2] 299 | # zs += z 300 | # rolls += roll 301 | # pitchs += pitch 302 | planes = plane_grid(self.xbound, self.ybound, zs, torch.zeros_like(rolls), rolls, pitchs) 303 | planes = planes.repeat(N, 1, 1) 304 | else: 305 | planes_1 = self.planes_1#4 20000 306 | #planes_2 = self.planes_2 # 4 20000 307 | 308 | images = images.reshape(B*N, H, W, C) 309 | warped_fv_images_1 = ipm_from_parameters(images, planes_1, Ks, RTs, self.h, self.w, self.extrinsic, post_RTs) 310 | # warped_fv_images_2 = ipm_from_parameters(images, planes_2, Ks, RTs, self.h, self.w, self.extrinsic, post_RTs) 311 | # warped_fv_images = torch.cat((warped_fv_images_1,warped_fv_images_2),dim=-1)#24 100 200 128 312 | # warped_fv_images = warped_fv_images.permute(0,3,1,2).contiguous() 313 | # warped_fv_images = self.conv(warped_fv_images)#24 64 100 200 314 | # warped_fv_images = warped_fv_images.permute(0, 2, 3, 1).contiguous() 315 | warped_fv_images = warped_fv_images_1.reshape((B, N, self.h, self.w, C)) 316 | if self.visual: 317 | warped_fv_images = self.mask_warped(warped_fv_images) 318 | 319 | if self.visual: 320 | warped_topdown = warped_fv_images[:, CAM_F] + warped_fv_images[:, CAM_B] # CAM_FRONT + CAM_BACK 321 | warped_mask = warped_topdown == 0 322 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_FL][warped_mask] + warped_fv_images[:, CAM_FR][warped_mask] 323 | warped_mask = warped_topdown == 0 324 | warped_topdown[warped_mask] = warped_fv_images[:, CAM_BL][warped_mask] + warped_fv_images[:, CAM_BR][warped_mask] 325 | return warped_topdown.permute(0, 3, 1, 2).contiguous() 326 | else: 327 | 328 | #warped_fv_images = self.STN_enhance(warped_fv_images)#4 6 64 100 200 329 | 330 | warped_topdown, _ = warped_fv_images.max(1) 331 | #warped_topdown = torch.sum(warped_fv_images,dim=1)#多视角相加 332 | warped_topdown = warped_topdown.permute(0, 3, 1, 2).contiguous() 333 | warped_topdown = warped_topdown.view(B, C, self.h, self.w) 334 | return warped_topdown 335 | 336 | 337 | -------------------------------------------------------------------------------- /model/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /model/lift_splat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot. 4 | Authors: Jonah Philion and Sanja Fidler 5 | """ 6 | 7 | import torch 8 | from torch import nn 9 | 10 | from efficientnet_pytorch import EfficientNet 11 | from torchvision.models.resnet import resnet18 12 | from data.utils import gen_dx_bx 13 | class Up(nn.Module): 14 | def __init__(self, in_channels, out_channels, scale_factor=2): 15 | super().__init__() 16 | 17 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear', 18 | align_corners=True) 19 | 20 | self.conv = nn.Sequential( 21 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), 22 | nn.BatchNorm2d(out_channels), 23 | nn.ReLU(inplace=True), 24 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False), 25 | nn.BatchNorm2d(out_channels), 26 | nn.ReLU(inplace=True) 27 | ) 28 | 29 | def forward(self, x1, x2): 30 | x1 = self.up(x1) 31 | x1 = torch.cat([x2, x1], dim=1) 32 | return self.conv(x1) 33 | 34 | class CamEncode(nn.Module): 35 | def __init__(self, D, C, downsample): 36 | super(CamEncode, self).__init__() 37 | self.D = D 38 | self.C = C 39 | 40 | self.trunk = EfficientNet.from_pretrained("efficientnet-b0") 41 | 42 | self.up1 = Up(320+112, 512) 43 | self.depthnet = nn.Conv2d(512, self.D + self.C, kernel_size=1, padding=0) 44 | 45 | def get_depth_dist(self, x, eps=1e-20): 46 | return x.softmax(dim=1) 47 | 48 | def get_depth_feat(self, x): 49 | x = self.get_eff_depth(x)# 20 512 8 22 50 | # Depth# 51 | x = self.depthnet(x)# 20 105 8 22 52 | 53 | depth = self.get_depth_dist(x[:, :self.D])#20 41 8 22 学习深度预测 54 | 55 | new_x = depth.unsqueeze(1) * x[:, self.D:(self.D + self.C)].unsqueeze(2)#选取64维图像特征 * 深度预测 56 | 57 | return depth, new_x 58 | 59 | def get_eff_depth(self, x): 60 | # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231 61 | endpoints = dict() 62 | 63 | # Stem 64 | x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x))) 65 | prev_x = x 66 | 67 | # Blocks 68 | for idx, block in enumerate(self.trunk._blocks): 69 | drop_connect_rate = self.trunk._global_params.drop_connect_rate 70 | if drop_connect_rate: 71 | drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate 72 | x = block(x, drop_connect_rate=drop_connect_rate) 73 | if prev_x.size(2) > x.size(2): 74 | endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x 75 | prev_x = x 76 | 77 | # Head 78 | endpoints['reduction_{}'.format(len(endpoints)+1)] = x 79 | x = self.up1(endpoints['reduction_5'], endpoints['reduction_4']) 80 | return x 81 | 82 | def forward(self, x): 83 | depth, x = self.get_depth_feat(x) 84 | 85 | return x 86 | 87 | 88 | class BevEncode(nn.Module): 89 | def __init__(self, inC, outC): 90 | super(BevEncode, self).__init__() 91 | 92 | trunk = resnet18(pretrained=False, zero_init_residual=True) 93 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, 94 | bias=False) 95 | self.bn1 = trunk.bn1 96 | self.relu = trunk.relu 97 | 98 | self.layer1 = trunk.layer1 99 | self.layer2 = trunk.layer2 100 | self.layer3 = trunk.layer3 101 | 102 | self.up1 = Up(64+256, 256, scale_factor=4) 103 | self.up2 = nn.Sequential( 104 | nn.Upsample(scale_factor=2, mode='bilinear', 105 | align_corners=True), 106 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 107 | nn.BatchNorm2d(128), 108 | nn.ReLU(inplace=True), 109 | nn.Conv2d(128, outC, kernel_size=1, padding=0), 110 | ) 111 | 112 | def forward(self, x): 113 | x = self.conv1(x) 114 | x = self.bn1(x) 115 | x = self.relu(x) 116 | 117 | x1 = self.layer1(x) 118 | x = self.layer2(x1) 119 | x = self.layer3(x) 120 | 121 | x = self.up1(x, x1) 122 | x = self.up2(x) 123 | 124 | return x 125 | 126 | 127 | def cumsum_trick(x, geom_feats, ranks): 128 | x = x.cumsum(0) 129 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool) 130 | kept[:-1] = (ranks[1:] != ranks[:-1]) 131 | 132 | x, geom_feats = x[kept], geom_feats[kept] 133 | x = torch.cat((x[:1], x[1:] - x[:-1])) 134 | 135 | return x, geom_feats 136 | 137 | 138 | class QuickCumsum(torch.autograd.Function): 139 | @staticmethod 140 | def forward(ctx, x, geom_feats, ranks): 141 | x = x.cumsum(0) 142 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool) 143 | kept[:-1] = (ranks[1:] != ranks[:-1]) 144 | 145 | x, geom_feats = x[kept], geom_feats[kept] 146 | x = torch.cat((x[:1], x[1:] - x[:-1])) 147 | 148 | # save kept for backward 149 | ctx.save_for_backward(kept) 150 | 151 | # no gradient for geom_feats 152 | ctx.mark_non_differentiable(geom_feats) 153 | 154 | return x, geom_feats 155 | 156 | @staticmethod 157 | def backward(ctx, gradx, gradgeom): 158 | kept, = ctx.saved_tensors 159 | back = torch.cumsum(kept, 0) 160 | back[kept] -= 1 161 | 162 | val = gradx[back] 163 | 164 | return val, None, None 165 | 166 | 167 | class LiftSplat(nn.Module): 168 | def __init__(self, grid_conf, instance_seg, embedded_dim):#, data_aug_conf 169 | super(LiftSplat, self).__init__() 170 | self.grid_conf = grid_conf 171 | self.data_aug_conf = data_aug_conf = { 172 | 'resize_lim': (0.193, 0.225), 173 | 'final_dim': (128, 352), 174 | 175 | } 176 | 177 | dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'], 178 | self.grid_conf['ybound'], 179 | self.grid_conf['zbound'], 180 | ) 181 | self.dx = nn.Parameter(dx, requires_grad=False) 182 | self.bx = nn.Parameter(bx, requires_grad=False) 183 | self.nx = nn.Parameter(nx, requires_grad=False) 184 | 185 | self.downsample = 16 186 | self.camC = 64 187 | self.frustum = self.create_frustum() 188 | # D x H/downsample x D/downsample x 3 189 | self.D, _, _, _ = self.frustum.shape 190 | self.camencode = CamEncode( self.D, self.camC, self.downsample) 191 | self.bevencode = BevEncode(inC=self.camC, outC=grid_conf['num_channels']) 192 | 193 | # toggle using QuickCumsum vs. autograd 194 | self.use_quickcumsum = True 195 | 196 | def create_frustum(self): 197 | # make grid in image plane 198 | ogfH, ogfW = self.data_aug_conf['final_dim'] 199 | fH, fW = ogfH // self.downsample, ogfW // self.downsample 200 | ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW) 201 | D, _, _ = ds.shape 202 | xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW) 203 | ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW) 204 | 205 | # D x H x W x 3 206 | frustum = torch.stack((xs, ys, ds), -1) 207 | return nn.Parameter(frustum, requires_grad=False) 208 | 209 | def get_geometry(self, rots, trans, intrins, post_rots, post_trans): 210 | """Determine the (x,y,z) locations (in the ego frame) 211 | of the points in the point cloud. 212 | Returns B x N x D x H/downsample x W/downsample x 3 213 | """ 214 | B, N, _ = trans.shape 215 | 216 | # *undo* post-transformation 217 | # B x N x D x H x W x 3 218 | points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3) 219 | points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1)) 220 | 221 | # cam_to_ego 222 | points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3], 223 | points[:, :, :, :, :, 2:3] 224 | ), 5) 225 | combine = rots.matmul(torch.inverse(intrins)) 226 | points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1) 227 | points += trans.view(B, N, 1, 1, 1, 3) 228 | 229 | return points 230 | 231 | def get_cam_feats(self, x): 232 | """Return B x N x D x H/downsample x W/downsample x C 233 | """ 234 | B, N, C, imH, imW = x.shape 235 | 236 | x = x.view(B*N, C, imH, imW) 237 | x = self.camencode(x) 238 | x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample) 239 | x = x.permute(0, 1, 3, 4, 5, 2) 240 | 241 | return x 242 | 243 | def voxel_pooling(self, geom_feats, x): 244 | B, N, D, H, W, C = x.shape 245 | Nprime = B*N*D*H*W 246 | 247 | # flatten x 248 | x = x.reshape(Nprime, C) 249 | 250 | # flatten indices 251 | # B x N x D x H/downsample x W/downsample x 3 252 | geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long() 253 | geom_feats = geom_feats.view(Nprime, 3) 254 | batch_ix = torch.cat([torch.full([Nprime//B, 1], ix, device=x.device, dtype=torch.long) for ix in range(B)]) 255 | geom_feats = torch.cat((geom_feats, batch_ix), 1) # x, y, z, b 256 | 257 | # filter out points that are outside box 258 | kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\ 259 | & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\ 260 | & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2]) 261 | x = x[kept] 262 | geom_feats = geom_feats[kept] 263 | 264 | # get tensors from the same voxel next to each other 265 | ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\ 266 | + geom_feats[:, 1] * (self.nx[2] * B)\ 267 | + geom_feats[:, 2] * B\ 268 | + geom_feats[:, 3] 269 | sorts = ranks.argsort() 270 | x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts] 271 | 272 | # cumsum trick 273 | if not self.use_quickcumsum: 274 | x, geom_feats = cumsum_trick(x, geom_feats, ranks) 275 | else: 276 | x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks) 277 | 278 | # griddify (B x C x Z x X x Y) 279 | final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device) 280 | final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x 281 | 282 | # collapse Z 283 | final = torch.cat(final.unbind(dim=2), 1) 284 | 285 | return final 286 | 287 | def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans): 288 | # B x N x D x H/downsample x W/downsample x 3: (x,y,z) locations (in the ego frame) 289 | geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans) 290 | # B x N x D x H/downsample x W/downsample x C: cam feats 291 | x = self.get_cam_feats(x) 292 | 293 | x = self.voxel_pooling(geom, x) 294 | 295 | return x 296 | 297 | def forward(self, x, trans, rots, intrins, post_trans, post_rots, translation, yaw_pitch_roll): 298 | x = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans) 299 | x = self.bevencode(x) 300 | return x 301 | -------------------------------------------------------------------------------- /model/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def plane_grid_2d(xbound, ybound): 4 | xmin, xmax = xbound[0], xbound[1] 5 | num_x = int((xbound[1] - xbound[0]) / xbound[2]) 6 | ymin, ymax = ybound[0], ybound[1] 7 | num_y = int((ybound[1] - ybound[0]) / ybound[2]) 8 | 9 | y = torch.linspace(xmin, xmax, num_x).cuda() 10 | x = torch.linspace(ymin, ymax, num_y).cuda() 11 | y, x = torch.meshgrid(x, y) 12 | x = x.flatten() 13 | y = y.flatten() 14 | 15 | coords = torch.stack([x, y], axis=0) 16 | return coords 17 | 18 | 19 | def cam_to_pixel(points, xbound, ybound): 20 | new_points = torch.zeros_like(points) 21 | new_points[..., 0] = (points[..., 0] - xbound[0]) / xbound[2] 22 | new_points[..., 1] = (points[..., 1] - ybound[0]) / ybound[2] 23 | return new_points 24 | 25 | 26 | def get_rot_2d(yaw): 27 | sin_yaw = torch.sin(yaw) 28 | cos_yaw = torch.cos(yaw) 29 | rot = torch.zeros(list(yaw.shape) + [2, 2]).cuda() 30 | rot[..., 0, 0] = cos_yaw 31 | rot[..., 0, 1] = sin_yaw 32 | rot[..., 1, 0] = -sin_yaw 33 | rot[..., 1, 1] = cos_yaw 34 | return rot 35 | 36 | 37 | -------------------------------------------------------------------------------- /pic/ex.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pic/img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/pic/img1.png -------------------------------------------------------------------------------- /pic/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/pic/img2.png -------------------------------------------------------------------------------- /pic/img3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lynn-yu/Bi-Mapper/06ca19664fb93d5404d6f25fc4dd8d5efba1c686/pic/img3.png -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | addict==2.4.0 2 | argon2-cffi==21.3.0 3 | argon2-cffi-bindings==21.2.0 4 | asttokens==2.0.5 5 | attrs==21.4.0 6 | backcall==0.2.0 7 | black==22.1.0 8 | bleach==4.1.0 9 | cachetools==5.0.0 10 | certifi==2021.10.8 11 | cffi==1.15.0 12 | click==8.0.4 13 | cycler==0.11.0 14 | debugpy==1.5.1 15 | decorator==5.1.1 16 | defusedxml==0.7.1 17 | descartes==1.1.0 18 | efficientnet-pytorch==0.7.1 19 | entrypoints==0.4 20 | executing==0.8.2 21 | fire==0.4.0 22 | fonttools==4.29.1 23 | importlib-resources==5.4.0 24 | ipdb==0.13.9 25 | ipykernel==6.9.1 26 | ipython==8.0.1 27 | ipython-genutils==0.2.0 28 | ipywidgets==7.6.5 29 | jedi==0.18.1 30 | Jinja2==3.0.3 31 | joblib==1.1.0 32 | jsonschema==4.4.0 33 | jupyter==1.0.0 34 | jupyter-client==7.1.2 35 | jupyter-console==6.4.0 36 | jupyter-core==4.9.2 37 | jupyterlab-pygments==0.1.2 38 | jupyterlab-widgets==1.0.2 39 | kiwisolver==1.3.2 40 | MarkupSafe==2.1.0 41 | matplotlib==3.5.1 42 | matplotlib-inline==0.1.3 43 | mistune==0.8.4 44 | mmcv==1.4.5 45 | mypy-extensions==0.4.3 46 | nbclient==0.5.11 47 | nbconvert==6.4.2 48 | nbformat==5.1.3 49 | nest-asyncio==1.5.4 50 | notebook==6.4.8 51 | numpy==1.22.2 52 | nuscenes-devkit==1.1.9 53 | opencv-python==4.5.5.62 54 | packaging==21.3 55 | pandocfilters==1.5.0 56 | parso==0.8.3 57 | pathspec==0.9.0 58 | pexpect==4.8.0 59 | pickleshare==0.7.5 60 | Pillow==9.0.1 61 | platformdirs==2.5.1 62 | prometheus-client==0.13.1 63 | prompt-toolkit==3.0.28 64 | protobuf==3.19.4 65 | ptyprocess==0.7.0 66 | pure-eval==0.2.2 67 | pycocotools==2.0.4 68 | pycparser==2.21 69 | Pygments==2.11.2 70 | pyparsing==3.0.7 71 | pyquaternion==0.9.9 72 | pyrsistent==0.18.1 73 | python-dateutil==2.8.2 74 | PyYAML==6.0 75 | pyzmq==22.3.0 76 | qtconsole==5.2.2 77 | QtPy==2.0.1 78 | scikit-learn==1.0.2 79 | scipy==1.8.0 80 | Send2Trash==1.8.0 81 | Shapely==1.8.1.post1 82 | six==1.16.0 83 | stack-data==0.2.0 84 | tensorboardX==2.4.1 85 | termcolor==1.1.0 86 | terminado==0.13.1 87 | testpath==0.5.0 88 | threadpoolctl==3.1.0 89 | toml==0.10.2 90 | tomli==2.0.1 91 | torch==1.10.2+cu113 92 | torch-scatter==2.0.9 93 | torchaudio==0.10.2+cu113 94 | torchvision==0.11.3+cu113 95 | tornado==6.1 96 | tqdm==4.62.3 97 | traitlets==5.1.1 98 | typing_extensions==4.1.1 99 | wcwidth==0.2.5 100 | webencodings==0.5.1 101 | widgetsnbextension==3.5.2 102 | yapf==0.32.0 103 | zipp==3.7.0 104 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 3 | import numpy as np 4 | import sys 5 | import logging 6 | from time import time 7 | from tensorboardX import SummaryWriter 8 | import argparse 9 | import cv2 10 | import torch 11 | from torch.optim.lr_scheduler import StepLR 12 | from loss import SimpleLoss, DiscriminativeLoss, view_loss,FocalLoss 13 | import torch.nn as nn 14 | from data.dataset import semantic_dataset,vir_semantic_dataset 15 | from data.const import NUM_CLASSES 16 | from evaluation.iou import get_batch_iou 17 | from evaluation.angle_diff import calc_angle_diff 18 | from model import get_model 19 | from evaluate import onehot_encoding, eval_iou 20 | 21 | def write_log(writer, ious, title, counter): 22 | writer.add_scalar(f'{title}/iou', torch.mean(ious[1:]), counter) 23 | 24 | for i, iou in enumerate(ious): 25 | writer.add_scalar(f'{title}/class_{i}/iou', iou, counter) 26 | def train(args): 27 | if not os.path.exists(args.logdir): 28 | os.mkdir(args.logdir) 29 | logging.basicConfig(filename=os.path.join(args.logdir, "results.log"), 30 | filemode='w', 31 | format='%(asctime)s: %(message)s', 32 | datefmt='%Y-%m-%d %H:%M:%S', 33 | level=logging.INFO) 34 | logging.getLogger('shapely.geos').setLevel(logging.CRITICAL) 35 | 36 | logger = logging.getLogger() 37 | logger.addHandler(logging.StreamHandler(sys.stdout)) 38 | 39 | data_conf = { 40 | 'num_channels': NUM_CLASSES + 1, 41 | 'image_size': args.image_size, 42 | 'image_size_i': args.image_size_i, 43 | 'xbound': args.xbound, 44 | 'ybound': args.ybound, 45 | 'zbound': args.zbound, 46 | 'dbound': args.dbound, 47 | 'thickness': args.thickness, 48 | 'angle_class': args.angle_class, 49 | 'B':args.bsz, 50 | 'merge': True 51 | } 52 | #train_loader, val_loader = vir_semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers) 53 | train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers)#构建训练和验证集 54 | model = get_model(args.model, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class)#选择不同传感器布局方式的网络 55 | read_last = args.read_last 56 | if read_last:#args.finetune 57 | model.load_state_dict(torch.load(args.modelf), strict=False) 58 | for name, param in model.named_parameters(): 59 | param.requires_grad = True 60 | 61 | 62 | model.cuda() 63 | 64 | opt = torch.optim.AdamW([{'params': model.parameters(), 'initial_lr': 1e-3}], lr=1e-3, weight_decay=args.weight_decay) 65 | 66 | 67 | if read_last: 68 | sched = StepLR(opt, 10, 0.1, last_epoch=read_last-1) 69 | else: 70 | sched = StepLR(opt, 10, 0.1) 71 | 72 | writer = SummaryWriter(logdir=args.logdir) 73 | 74 | loss_fn = SimpleLoss(args.pos_weight).cuda()#损失对象 75 | loss_fn_view = SimpleLoss(args.pos_weight).cuda() # 损失对象 76 | 77 | 78 | 79 | model.train() 80 | counter = 0 81 | last_idx = len(train_loader) - 1 82 | 83 | 84 | for epoch in range(read_last,args.nepochs): 85 | sched.last_epoch = -1 86 | for batchi, (img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots,car_trans, 87 | yaw_pitch_roll, semantic_gt, instance_gt) in enumerate(train_loader):#, direction_gt 88 | t0 = time() 89 | 90 | 91 | opt.zero_grad() 92 | semantic , x_view_pre, mu_loss, mu_loss_i= model(feas.cuda(),imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(), 93 | post_trans.cuda(), post_rots.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda()) 94 | 95 | 96 | semantic_gt = semantic_gt.cuda().float() 97 | 98 | 99 | view_img_gt = view_loss(semantic_gt.cuda(),trans.cuda(),rots.cuda())#b*n 5 h w 100 | view_img_loss = loss_fn_view(x_view_pre,view_img_gt) 101 | seg_loss = loss_fn(semantic, semantic_gt) 102 | 103 | 104 | if epoch<5: 105 | final_loss = seg_loss * args.scale_seg + view_img_loss +0.1*mu_loss_i#+seg_loss_f#+ var_loss * args.scale_var + dist_loss * args.scale_dist+ mu_loss# + direction_loss * args.scale_direction 106 | 107 | else: 108 | final_loss = seg_loss * args.scale_seg + view_img_loss + 0.1*mu_loss_i+ 0.1*mu_loss 109 | 110 | final_loss.requires_grad_(True) 111 | final_loss.backward() 112 | 113 | torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm, norm_type=2) 114 | opt.step() 115 | counter += 1 116 | t1 = time() 117 | #sched.step() 118 | 119 | if counter % 10 == 0: 120 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt) 121 | iou = intersects / (union + 1e-7) 122 | intersects1, union1 = get_batch_iou(onehot_encoding(x_view_pre), view_img_gt) 123 | iou_cam = intersects1 / (union1 + 1e-7) 124 | 125 | logger.info(f"TRAIN[{epoch:>3d}]: [{batchi:>4d}/{last_idx}] " 126 | f"Time: {t1-t0:>7.4f} " 127 | f"Loss: [{final_loss.item():>7.4f}],[{seg_loss.item():>7.4f}],[{view_img_loss.item():>7.4f}] "# 128 | f"mutual_Loss:[{mu_loss_i:>7.4f}],[{mu_loss.item():>7.4f}]" 129 | f"IOU: {np.array2string(iou[1:].numpy(), precision=3, floatmode='fixed')} " 130 | f"IOU_cam: {np.array2string(iou_cam[1:].numpy(), precision=3, floatmode='fixed')} " 131 | ) 132 | 133 | write_log(writer, iou, 'train', counter) 134 | writer.add_scalar('train/step_time', t1 - t0, counter) 135 | writer.add_scalar('train/seg_loss', seg_loss, counter) 136 | 137 | 138 | iou,iou_cam = eval_iou(model, val_loader) 139 | logger.info(f"EVAL[{epoch:>2d}]: " 140 | f"IOU: {np.array2string(iou[1:].cpu().numpy(), precision=3, floatmode='fixed')} " 141 | f"IOU_cam: {np.array2string(iou_cam[1:].cpu().numpy(), precision=3, floatmode='fixed')}" 142 | ) 143 | write_log(writer, iou, 'eval', counter) 144 | 145 | model_name = os.path.join(args.logdir, f"model{epoch}.pt") 146 | torch.save(model.state_dict(), model_name) 147 | logger.info(f"{model_name} saved") 148 | model.train() 149 | 150 | sched.step() 151 | 152 | 153 | if __name__ == '__main__': 154 | parser = argparse.ArgumentParser(description='HDMapNet training.') 155 | # logging config 156 | parser.add_argument("--logdir", type=str, default='./runs')# 157 | 158 | # nuScenes config 159 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0') 160 | parser.add_argument('--version', type=str, default='v1.0-trainval', choices=['v1.0-trainval', 'v1.0-mini']) 161 | parser.add_argument('--read_last', type=int, default=0) 162 | # model config 163 | parser.add_argument("--model", type=str, default='BiMapper') 164 | 165 | # training config 166 | parser.add_argument("--nepochs", type=int, default=24) 167 | parser.add_argument("--max_grad_norm", type=float, default=35.0) 168 | parser.add_argument("--pos_weight", type=float, default=2.13) 169 | parser.add_argument("--bsz", type=int, default=4) 170 | parser.add_argument("--nworkers", type=int, default=10) 171 | parser.add_argument("--lr", type=float, default=1e-3) 172 | parser.add_argument("--weight_decay", type=float, default=0.01) 173 | 174 | # finetune config 175 | parser.add_argument('--finetune', action='store_true') 176 | parser.add_argument('--modelf', type=str) 177 | 178 | # data config 179 | parser.add_argument("--thickness", type=int, default=5) 180 | parser.add_argument("--image_size", nargs=2, type=int, default=[256, 704]) 181 | parser.add_argument("--image_size_i", nargs=2, type=int, default=[128, 352]) 182 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 183 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])#200 184 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0]) 185 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0]) 186 | 187 | # embedding config 188 | parser.add_argument('--instance_seg', action='store_true') 189 | parser.add_argument("--embedding_dim", type=int, default=16) 190 | parser.add_argument("--delta_v", type=float, default=0.5) 191 | parser.add_argument("--delta_d", type=float, default=3.0) 192 | 193 | # direction config 194 | parser.add_argument('--direction_pred', action='store_true') 195 | parser.add_argument('--angle_class', type=int, default=36) 196 | 197 | # loss config 198 | parser.add_argument("--scale_seg", type=float, default=1.0) 199 | parser.add_argument("--scale_var", type=float, default=1.0) 200 | parser.add_argument("--scale_dist", type=float, default=1.0) 201 | parser.add_argument("--scale_direction", type=float, default=0.2) 202 | 203 | args = parser.parse_args() 204 | train(args) 205 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # nuScenes dev-kit. 2 | # Code written by Holger Caesar, 2018. 3 | 4 | from typing import List, Dict, Any 5 | 6 | import numpy as np 7 | from pyquaternion import Quaternion 8 | 9 | from nuscenes.eval.common.data_classes import EvalBox 10 | from nuscenes.utils.data_classes import Box 11 | 12 | DetectionBox = Any # Workaround as direct imports lead to cyclic dependencies. 13 | 14 | 15 | def center_distance(gt_box: EvalBox, pred_box: EvalBox) -> float: 16 | """ 17 | L2 distance between the box centers (xy only). 18 | :param gt_box: GT annotation sample. 19 | :param pred_box: Predicted sample. 20 | :return: L2 distance. 21 | """ 22 | return np.linalg.norm(np.array(pred_box.translation[:2]) - np.array(gt_box.translation[:2])) 23 | 24 | 25 | def velocity_l2(gt_box: EvalBox, pred_box: EvalBox) -> float: 26 | """ 27 | L2 distance between the velocity vectors (xy only). 28 | If the predicted velocities are nan, we return inf, which is subsequently clipped to 1. 29 | :param gt_box: GT annotation sample. 30 | :param pred_box: Predicted sample. 31 | :return: L2 distance. 32 | """ 33 | return np.linalg.norm(np.array(pred_box.velocity) - np.array(gt_box.velocity)) 34 | 35 | 36 | def yaw_diff(gt_box: EvalBox, eval_box: EvalBox, period: float = 2*np.pi) -> float: 37 | """ 38 | Returns the yaw angle difference between the orientation of two boxes. 39 | :param gt_box: Ground truth box. 40 | :param eval_box: Predicted box. 41 | :param period: Periodicity in radians for assessing angle difference. 42 | :return: Yaw angle difference in radians in [0, pi]. 43 | """ 44 | yaw_gt = quaternion_yaw(Quaternion(gt_box.rotation)) 45 | yaw_est = quaternion_yaw(Quaternion(eval_box.rotation)) 46 | 47 | return abs(angle_diff(yaw_gt, yaw_est, period)) 48 | 49 | 50 | def angle_diff(x: float, y: float, period: float) -> float: 51 | """ 52 | Get the smallest angle difference between 2 angles: the angle from y to x. 53 | :param x: To angle. 54 | :param y: From angle. 55 | :param period: Periodicity in radians for assessing angle difference. 56 | :return: . Signed smallest between-angle difference in range (-pi, pi). 57 | """ 58 | 59 | # calculate angle difference, modulo to [0, 2*pi] 60 | diff = (x - y + period / 2) % period - period / 2 61 | if diff > np.pi: 62 | diff = diff - (2 * np.pi) # shift (pi, 2*pi] to (-pi, 0] 63 | 64 | return diff 65 | 66 | 67 | def attr_acc(gt_box: DetectionBox, pred_box: DetectionBox) -> float: 68 | """ 69 | Computes the classification accuracy for the attribute of this class (if any). 70 | If the GT class has no attributes or the annotation is missing attributes, we assign an accuracy of nan, which is 71 | ignored later on. 72 | :param gt_box: GT annotation sample. 73 | :param pred_box: Predicted sample. 74 | :return: Attribute classification accuracy (0 or 1) or nan if GT annotation does not have any attributes. 75 | """ 76 | if gt_box.attribute_name == '': 77 | # If the class does not have attributes or this particular sample is missing attributes, return nan, which is 78 | # ignored later. Note that about 0.4% of the sample_annotations have no attributes, although they should. 79 | acc = np.nan 80 | else: 81 | # Check that label is correct. 82 | acc = float(gt_box.attribute_name == pred_box.attribute_name) 83 | return acc 84 | 85 | 86 | def scale_iou(sample_annotation: EvalBox, sample_result: EvalBox) -> float: 87 | """ 88 | This method compares predictions to the ground truth in terms of scale. 89 | It is equivalent to intersection over union (IOU) between the two boxes in 3D, 90 | if we assume that the boxes are aligned, i.e. translation and rotation are considered identical. 91 | :param sample_annotation: GT annotation sample. 92 | :param sample_result: Predicted sample. 93 | :return: Scale IOU. 94 | """ 95 | # Validate inputs. 96 | sa_size = np.array(sample_annotation.size) 97 | sr_size = np.array(sample_result.size) 98 | assert all(sa_size > 0), 'Error: sample_annotation sizes must be >0.' 99 | assert all(sr_size > 0), 'Error: sample_result sizes must be >0.' 100 | 101 | # Compute IOU. 102 | min_wlh = np.minimum(sa_size, sr_size) 103 | volume_annotation = np.prod(sa_size) 104 | volume_result = np.prod(sr_size) 105 | intersection = np.prod(min_wlh) # type: float 106 | union = volume_annotation + volume_result - intersection # type: float 107 | iou = intersection / union 108 | 109 | return iou 110 | 111 | 112 | def quaternion_yaw(q: Quaternion) -> float: 113 | """ 114 | Calculate the yaw angle from a quaternion. 115 | Note that this only works for a quaternion that represents a box in lidar or global coordinate frame. 116 | It does not work for a box in the camera frame. 117 | :param q: Quaternion of interest. 118 | :return: Yaw angle in radians. 119 | """ 120 | 121 | # Project into xy plane. 122 | v = np.dot(q.rotation_matrix, np.array([1, 0, 0])) 123 | 124 | # Measure yaw using arctan. 125 | yaw = np.arctan2(v[1], v[0]) 126 | 127 | return yaw 128 | 129 | 130 | def boxes_to_sensor(boxes: List[EvalBox], pose_record: Dict, cs_record: Dict): 131 | """ 132 | Map boxes from global coordinates to the vehicle's sensor coordinate system. 133 | :param boxes: The boxes in global coordinates. 134 | :param pose_record: The pose record of the vehicle at the current timestamp. 135 | :param cs_record: The calibrated sensor record of the sensor. 136 | :return: The transformed boxes. 137 | """ 138 | boxes_out = [] 139 | for box in boxes: 140 | # Create Box instance. 141 | box = Box(box.translation, box.size, Quaternion(box.rotation)) 142 | 143 | # Move box to ego vehicle coord system. 144 | box.translate(-np.array(pose_record['translation'])) 145 | box.rotate(Quaternion(pose_record['rotation']).inverse) 146 | 147 | # Move box to sensor coord system. 148 | box.translate(-np.array(cs_record['translation'])) 149 | box.rotate(Quaternion(cs_record['rotation']).inverse) 150 | 151 | boxes_out.append(box) 152 | 153 | return boxes_out 154 | 155 | 156 | def cummean(x: np.array) -> np.array: 157 | """ 158 | Computes the cumulative mean up to each position in a NaN sensitive way 159 | - If all values are NaN return an array of ones. 160 | - If some values are NaN, accumulate arrays discording those entries. 161 | """ 162 | if sum(np.isnan(x)) == len(x): 163 | # Is all numbers in array are NaN's. 164 | return np.ones(len(x)) # If all errors are NaN set to error to 1 for all operating points. 165 | else: 166 | # Accumulate in a nan-aware manner. 167 | sum_vals = np.nancumsum(x.astype(float)) # Cumulative sum ignoring nans. 168 | count_vals = np.cumsum(~np.isnan(x)) # Number of non-nans up to each position. 169 | return np.divide(sum_vals, count_vals, out=np.zeros_like(sum_vals), where=count_vals != 0) 170 | -------------------------------------------------------------------------------- /vis_label.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 3 | import tqdm 4 | import argparse 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from PIL import Image 8 | from loss import SimpleLoss, DiscriminativeLoss, view_loss,FocalLoss 9 | from data.dataset import HDMapNetSemanticDataset_all, CAMS,HDMapNetSemanticDataset_cam,HDMapNetDataset 10 | from data.utils import get_proj_mat, perspective 11 | from data.image import denormalize_img 12 | from data.viz_data import BaseViz 13 | import cv2 14 | def to_image(x): 15 | return (255 * x).byte().cpu().numpy().transpose(1, 2, 0) 16 | def vis_label(dataroot, version, xbound, ybound): 17 | data_conf = { 18 | 'image_size': (900, 1600), 19 | 'xbound': xbound, 20 | 'ybound': ybound, 21 | 'zbound': args.zbound, 22 | 'dbound': args.dbound 23 | } 24 | 25 | color_map = np.random.randint(0, 256, (256, 3)) 26 | color_map[0] = np.array([0, 0, 0]) 27 | colors_plt = ['r', 'b', 'g'] 28 | 29 | dataset = HDMapNetDataset(version=version, dataroot=dataroot, data_conf=data_conf, is_train=False) 30 | gt_path = os.path.join(dataroot, 'samples', 'GT') 31 | if not os.path.exists(gt_path): 32 | os.mkdir(gt_path) 33 | 34 | car_img = Image.open('icon/car.png')#车辆模型 35 | for idx in tqdm.tqdm(range(dataset.__len__())): 36 | rec = dataset.nusc.sample[idx]#不同时间帧sample 37 | img_origin,feas,imgs, trans, rots, intrins, post_trans, post_rots = dataset.get_imgs(rec,1)#不同相机的数据 38 | vectors = dataset.get_vectors(rec) 39 | 40 | lidar_top_path = dataset.nusc.get_sample_data_path(rec['data']['LIDAR_TOP']) 41 | 42 | base_path = lidar_top_path.split('/')[-1].replace('__LIDAR_TOP__', '_').split('.')[0] 43 | base_path = os.path.join(gt_path, base_path) 44 | 45 | if not os.path.exists(base_path): 46 | os.mkdir(base_path) 47 | 48 | plt.figure(figsize=(4, 2)) 49 | plt.xlim(-30, 30) 50 | plt.ylim(-15, 15) 51 | plt.axis('off') 52 | for vector in vectors: 53 | pts, pts_num, line_type = vector['pts'], vector['pts_num'], vector['type'] 54 | pts = pts[:pts_num] 55 | x = np.array([pt[0] for pt in pts]) 56 | y = np.array([pt[1] for pt in pts]) 57 | plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', angles='xy', scale=1, color=colors_plt[line_type]) 58 | 59 | plt.imshow(car_img, extent=[-1.5, 1.5, -1.2, 1.2]) 60 | 61 | map_path = os.path.join(base_path, 'MAP.png') 62 | plt.savefig(map_path, bbox_inches='tight', dpi=400) 63 | plt.close() 64 | 65 | for img, intrin, rot, tran, cam in zip(imgs, intrins, rots, trans, CAMS): 66 | img = denormalize_img(img) 67 | P = get_proj_mat(intrin, rot, tran) 68 | plt.figure(figsize=(9, 16)) 69 | fig = plt.imshow(img) 70 | fig.axes.get_xaxis().set_visible(False) 71 | fig.axes.get_yaxis().set_visible(False) 72 | plt.xlim(1600, 0) 73 | plt.ylim(900, 0) 74 | plt.axis('off') 75 | for vector in vectors: 76 | pts, pts_num, line_type = vector['pts'], vector['pts_num'], vector['type'] 77 | pts = pts[:pts_num] 78 | zeros = np.zeros((pts_num, 1)) 79 | ones = np.ones((pts_num, 1)) 80 | world_coords = np.concatenate([pts, zeros, ones], axis=1).transpose(1, 0) 81 | pix_coords = perspective(world_coords, P) 82 | x = np.array([pts[0] for pts in pix_coords]) 83 | y = np.array([pts[1] for pts in pix_coords]) 84 | plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], scale_units='xy', 85 | angles='xy', scale=1, color=colors_plt[line_type]) 86 | 87 | cam_path = os.path.join(base_path, f'{cam}.png') 88 | plt.savefig(cam_path, bbox_inches='tight', pad_inches=0, dpi=400) 89 | plt.close() 90 | 91 | def vis_gt(dataroot, version, xbound, ybound): 92 | data_conf = { 93 | 'xbound': xbound, 94 | 'ybound': ybound, 95 | 'zbound': args.zbound, 96 | 'thickness': args.thickness, 97 | 'angle_class': args.angle_class, 98 | 'image_size': args.image_size, 99 | 'dbound': args.dbound 100 | } 101 | dataset = HDMapNetSemanticDataset_all(version=version, dataroot=dataroot, data_conf=data_conf, is_train=False) 102 | for idx in tqdm.tqdm(range(dataset.__len__())):#dataset.__len__() 103 | rec = dataset.nusc.sample[idx]#不同时间帧sample 104 | # img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots = dataset.get_imgs(rec, 1) 105 | # for j in range(6): 106 | # plt.imshow(to_image(img_origin[j])) 107 | # plt.show() 108 | semantic_masks, instance_masks, forward_masks, backward_masks, direction_masks = dataset.get_semantic_map(rec) 109 | 110 | # plt.imshow(semantic_masks[1], vmin=0, cmap='Blues', vmax=1, alpha=0.6) 111 | # plt.imshow(semantic_masks[2], vmin=0, cmap='Reds', vmax=1, alpha=0.6) 112 | # plt.imshow(semantic_masks[3], vmin=0, cmap='Greens', vmax=1, alpha=0.6) 113 | # plt.show() 114 | img_origin,feas,imgs, trans, rots, intrins, post_trans, post_rots = dataset.get_imgs(rec, 1) 115 | #cam_to_world_t, cam_to_world_r =dataset.get_cam_pose(rec, trans.numpy(), rots.numpy()) 116 | #semantic_map_cam = dataset.get_cam(rec, cam_to_world_t, cam_to_world_r) 117 | 118 | # plt.axis('off') # 去坐标轴 119 | # plt.xticks([]) # 去 x 轴刻度 120 | # plt.yticks([]) # 去 y 轴刻度 121 | view_img_gt = view_loss(semantic_masks.unsqueeze(0).cuda(), trans.unsqueeze(0).cuda(), rots.unsqueeze(0).cuda()) # b*n 5 h w 122 | 123 | view_img_gt = view_img_gt.cpu().numpy() 124 | 125 | 126 | for j in range(6): 127 | plt.axis('off') # 去坐标轴 128 | plt.xticks([]) # 去 x 轴刻度 129 | plt.yticks([]) # 去 y 轴刻度 130 | plt.imshow(to_image(img_origin[j])) 131 | #plt.show() 132 | plt.savefig('/data/lsy/HDmap_1/vis_gt_cam/img_{}_{}_{}'.format(idx,j,0)) 133 | 134 | 135 | plt.axis('off') # 去坐标轴 136 | plt.xticks([]) # 去 x 轴刻度 137 | plt.yticks([]) # 去 y 轴刻度 138 | plt.imshow(view_img_gt[j][1], vmin=0, cmap='Blues', vmax=1, alpha=0.6) 139 | plt.imshow(view_img_gt[j][2], vmin=0, cmap='Reds', vmax=1, alpha=0.6) 140 | plt.imshow(view_img_gt[j][3], vmin=0, cmap='Greens', vmax=1, alpha=0.6) 141 | #plt.show() 142 | plt.savefig('/data/lsy/HDmap_1/vis_gt_cam/img_{}_{}_{}'.format(idx,j,1)) 143 | 144 | # plt.axis('off') # 去坐标轴 145 | # plt.xticks([]) # 去 x 轴刻度 146 | # plt.yticks([]) # 去 y 轴刻度 147 | # plt.imshow(semantic_map_cam[j][0][:,110:190], vmin=0, cmap='Blues', vmax=1, alpha=0.6) 148 | # plt.imshow(semantic_map_cam[j][1][:,110:190], vmin=0, cmap='Reds', vmax=1, alpha=0.6) 149 | # plt.imshow(semantic_map_cam[j][2][:,110:190], vmin=0, cmap='Greens', vmax=1, alpha=0.6) 150 | # plt.show() 151 | 152 | # print('saving', idx) 153 | # plt.savefig('/data/lsy/HDmap_1/vis_lable/img_{}'.format(idx)) 154 | # plt.close() 155 | 156 | 157 | 158 | if __name__ == '__main__': 159 | parser = argparse.ArgumentParser(description='Local HD Map Demo.') 160 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0') 161 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini']) 162 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 163 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15]) 164 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0]) 165 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0]) 166 | parser.add_argument("--thickness", type=int, default=5) 167 | parser.add_argument('--angle_class', type=int, default=36) 168 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352]) 169 | args = parser.parse_args() 170 | 171 | 172 | -------------------------------------------------------------------------------- /vis_pred.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | from PIL import Image 4 | import os 5 | os.environ['CUDA_VISIBLE_DEVICES'] = '2' 6 | import matplotlib.pyplot as plt 7 | 8 | import tqdm 9 | import torch 10 | 11 | from data.dataset import semantic_dataset,my_semantic_dataset,vir_semantic_dataset 12 | from data.const import NUM_CLASSES 13 | from model import get_model 14 | from postprocess.vectorize import vectorize 15 | from data.viz_data import BaseViz,to_image_single,to_color,to_image 16 | import cv2 17 | from loss import view_loss 18 | def onehot_encoding(logits, dim=1): 19 | max_idx = torch.argmax(logits, dim, keepdim=True) 20 | one_hot = logits.new_full(logits.shape, 0) 21 | one_hot.scatter_(dim, max_idx, 1) 22 | return one_hot 23 | 24 | def vis_gt(val_loader): 25 | viz = BaseViz() 26 | with torch.no_grad(): 27 | for batchi, (img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots,car_trans, 28 | yaw_pitch_roll, semantic_gt, instance_gt) in enumerate(val_loader): 29 | 30 | if batchi >= 0: 31 | print(batchi) 32 | for si in range(semantic_gt.shape[0]): 33 | # img = to_image(img_origin[si][1]) 34 | # plt.imshow(img) 35 | # plt.show() 36 | # img = to_image(feas[si][1]) 37 | # plt.imshow(img) 38 | # plt.show() 39 | # 展示预测结果 40 | plt.figure(figsize=(4, 2), dpi=100) 41 | plt.axis('off') # 去坐标轴 42 | plt.xticks([]) # 去 x 轴刻度 43 | plt.yticks([]) # 去 y 轴刻度 44 | 45 | plt.imshow(semantic_gt[si][1], vmin=0, cmap='Blues', vmax=1, alpha=0.6) 46 | plt.imshow(semantic_gt[si][2], vmin=0, cmap='Reds', vmax=1, alpha=0.6) 47 | plt.imshow(semantic_gt[si][3], vmin=0, cmap='Greens', vmax=1, alpha=0.6) 48 | #plt.imshow(semantic[si][4], vmin=0, cmap='Reds', vmax=1, alpha=0.6) 49 | plt.show() 50 | 51 | imname = f'eval{batchi:06}_{si:03}.jpg' 52 | print('saving', imname) 53 | plt.savefig('/data/lsy/HDmap_1/vis_lable_gt/img_{}_{}'.format(batchi,si)) 54 | plt.close() 55 | 56 | # image = np.vstack(viz(semantic_gt[si], img_origin[si])) 57 | # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 58 | # plt.axis('off') # 去坐标轴 59 | # plt.xticks([]) # 去 x 轴刻度 60 | # plt.yticks([]) # 去 y 轴刻度 61 | # plt.imshow(image) 62 | # #plt.show() 63 | # imname = f'eval{batchi:06}_{si:03}.jpg' 64 | # print('saving', imname) 65 | # plt.savefig('/data/lsy/HDmap_1/vis_img/img_{}_{}'.format(batchi,si)) 66 | # plt.close() 67 | def vis_segmentation(model, val_loader): 68 | model.eval() 69 | 70 | with torch.no_grad(): 71 | for batchi, (img_origin, feas, imgs, trans, rots, intrins, post_trans, post_rots,car_trans, 72 | yaw_pitch_roll, semantic_gt, instance_gt) in enumerate(val_loader): 73 | 74 | semantic, view_img, embedding, direction = model(feas.cuda(),imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(), 75 | post_trans.cuda(), post_rots.cuda(),car_trans.cuda(), yaw_pitch_roll.cuda()) 76 | semantic = semantic.softmax(1).cpu().numpy() 77 | semantic[semantic < 0.1] = np.nan 78 | 79 | 80 | if batchi <51 :#and batchi < 3 81 | print(batchi) 82 | 83 | for si in range(semantic.shape[0]): 84 | 85 | 86 | # 展示预测结果 87 | plt.figure(figsize=(4, 2), dpi=100) 88 | plt.axis('off') # 去坐标轴 89 | plt.xticks([]) # 去 x 轴刻度 90 | plt.yticks([]) # 去 y 轴刻度 91 | plt.imshow(semantic[si][1], vmin=0, cmap='Blues', vmax=1, alpha=0.6) 92 | plt.imshow(semantic[si][2], vmin=0, cmap='Reds', vmax=1, alpha=0.6) 93 | plt.imshow(semantic[si][3], vmin=0, cmap='Greens', vmax=1, alpha=0.6) 94 | 95 | 96 | 97 | 98 | else:break 99 | 100 | 101 | 102 | def main(args): 103 | data_conf = { 104 | 'num_channels': NUM_CLASSES + 1, 105 | 'image_size': args.image_size, 106 | 'xbound': args.xbound, 107 | 'ybound': args.ybound, 108 | 'zbound': args.zbound, 109 | 'dbound': args.dbound, 110 | 'thickness': args.thickness, 111 | 'angle_class': args.angle_class, 112 | 'B': args.bsz, 113 | 'merge': True, 114 | } 115 | 116 | #train_loader, val_loader = semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers) 117 | val_loader = my_semantic_dataset("/data/lsy/dataset/my_dataset/ww",args.bsz, args.nworkers) 118 | #train_loader, val_loader = vir_semantic_dataset(args.version, args.dataroot, data_conf, args.bsz, args.nworkers) 119 | model = get_model(args.model, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class) 120 | model.load_state_dict(torch.load(args.modelf), strict=False) 121 | model.cuda() 122 | 123 | 124 | vis_segmentation(model, val_loader) 125 | 126 | 127 | if __name__ == '__main__': 128 | parser = argparse.ArgumentParser() 129 | # logging config 130 | parser.add_argument("--logdir", type=str, default='./runs') 131 | 132 | # nuScenes config 133 | parser.add_argument('--dataroot', type=str, default='/dataset/nuScenes/v1.0') 134 | parser.add_argument('--version', type=str, default='v1.0-mini', choices=['v1.0-trainval', 'v1.0-mini']) 135 | 136 | # model config 137 | parser.add_argument("--model", type=str, default='BiMapper') 138 | 139 | # training config 140 | parser.add_argument("--nepochs", type=int, default=30) 141 | parser.add_argument("--max_grad_norm", type=float, default=5.0) 142 | parser.add_argument("--pos_weight", type=float, default=2.13) 143 | parser.add_argument("--bsz", type=int, default=4) 144 | parser.add_argument("--nworkers", type=int, default=5) 145 | parser.add_argument("--lr", type=float, default=1e-3) 146 | parser.add_argument("--weight_decay", type=float, default=1e-7) 147 | 148 | # finetune config 149 | parser.add_argument('--finetune', action='store_true') 150 | parser.add_argument('--modelf', type=str, ) 151 | 152 | # data config 153 | parser.add_argument("--thickness", type=int, default=5) 154 | parser.add_argument("--image_size", nargs=2, type=int, default=[128, 352]) 155 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 156 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15]) 157 | parser.add_argument("--zbound", nargs=3, type=float, default=[-10.0, 10.0, 20.0]) 158 | parser.add_argument("--dbound", nargs=3, type=float, default=[4.0, 45.0, 1.0]) 159 | 160 | # embedding config 161 | parser.add_argument('--instance_seg', action='store_true') 162 | parser.add_argument("--embedding_dim", type=int, default=16) 163 | parser.add_argument("--delta_v", type=float, default=0.5) 164 | parser.add_argument("--delta_d", type=float, default=3.0) 165 | 166 | # direction config 167 | parser.add_argument('--direction_pred', action='store_true') 168 | parser.add_argument('--angle_class', type=int, default=36) 169 | 170 | # loss config 171 | parser.add_argument("--scale_seg", type=float, default=1.0) 172 | parser.add_argument("--scale_var", type=float, default=1.0) 173 | parser.add_argument("--scale_dist", type=float, default=1.0) 174 | parser.add_argument("--scale_direction", type=float, default=0.2) 175 | 176 | args = parser.parse_args() 177 | main(args) 178 | --------------------------------------------------------------------------------