├── .gitignore ├── README.md ├── checkpoint └── checkpoint ├── logs ├── pred │ └── 1 └── tensorboard │ └── 1 └── src ├── __init__.py ├── config ├── __init__.py ├── anchors │ ├── bev_anchors.txt │ └── image_anchors.txt └── config.py ├── data ├── __init__.py ├── dataset.py ├── labels.py ├── lidar_preprocess │ ├── CMakeLists.txt │ ├── cuda_create_maps.cu │ ├── cuda_create_maps.h │ ├── data_types.h │ ├── debug_utils.cu │ ├── debug_utils.h │ ├── preprocessor.cu │ ├── preprocessor.h │ └── timer.h ├── loader.py ├── postprocess.py └── preprocess.py ├── models ├── __init__.py ├── backbone.py ├── basic_layers.py ├── contfuse_network.py ├── headnet.py └── loss.py ├── predict.py ├── scripts ├── __init__.py ├── check_fusion_map.py ├── check_gather_result.py ├── check_labelmap.py ├── check_postprocess.py ├── gen_dataset_idx.py └── statistic.py ├── temp.py ├── train.py └── utils ├── math.py ├── tensorboard_tools.py ├── timer.py ├── transform.py ├── utils.py └── vis_tools.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.o 3 | */build/ 4 | */__pycache__/ 5 | *.pyc 6 | */__pycache__/* 7 | 8 | # tensorflow weights/ 9 | *.index 10 | *.meta 11 | *.data-* 12 | output/ 13 | ckpt/ 14 | out/ 15 | pre_weights/ 16 | *.pb 17 | 18 | 19 | 20 | # Distribution / packaging 21 | .Python 22 | env/ 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | *.pyc 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *,cover 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | src/utils/__pycache__/dataset.cpython-37.pyc 71 | src/utils/__pycache__/utils.cpython-37.pyc 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Contfuse: Deep Continuous Fusion for Multi-Sensor 3D Object Detection 2 | ## Introduction 3 | 4 | It's a unofficial tensorflow Implementation of Contfuse: Deep Continuous Fusion for Multi-Sensor 3D Object Detection. It uses C++ \ CUDA C \ Python to complete this project. 5 | 6 | ## Train on KITTI Dataset 7 | 8 | I split KITTI train data to testing data \ training data \ verification data. 9 | 10 | ```shell 11 | kitti dataset: <-- 7481 train data 12 | |-- data_object_calib <-- 7481 13 | |--calib 14 | |-- image_2 <-- 7481 15 | |-- lidar_files <-- 7481 16 | |-- testing 17 | |-- label_files <-- 1000 18 | |-- training 19 | |-- label_files <-- 6431 20 | |-- val 21 | |-- label_files <-- 50 22 | ``` 23 | 24 | ## How to use it? 25 | 26 | ### Dependencies 27 | 28 | tensorflow 1.14 29 | numpy 1.16 30 | opencv 3.4 31 | easydict 32 | cudnn 7.6.0 33 | cuda 10.1 34 | python 3.7 35 | tqdm 36 | 37 | ### train 38 | 39 | step. 1 Generate dataset index document, you need modified dataset path. 40 | 41 | ```shell 42 | cd src/scripts 43 | python gen_dataset_idx.py 44 | ``` 45 | 46 | step. 2 train 47 | 48 | ```shell 49 | python train.py 50 | ``` 51 | 52 | ### predict 53 | 54 | ``` 55 | python predict.py 56 | ``` 57 | 58 | ## Credit 59 | 60 | CONTFUSE: Deep Continuous Fusion for Multi-Sensor 3D Object Detection 61 | 62 | PIXOR: Real-time 3D Object Detection from Point Clouds -------------------------------------------------------------------------------- /checkpoint/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "contfuse_last_epoch-Sat Feb 29 00:01:11 2020.ckpt-16" -------------------------------------------------------------------------------- /logs/pred/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/logs/pred/1 -------------------------------------------------------------------------------- /logs/tensorboard/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/logs/tensorboard/1 -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/src/__init__.py -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/src/config/__init__.py -------------------------------------------------------------------------------- /src/config/anchors/bev_anchors.txt: -------------------------------------------------------------------------------- 1 | 1.5 1.6 3.8 2.5 7.0 2 | 2.2 1.9 5.0 3.0 9.5 3 | 3.2 2.5 10.0 4.5 19.5 4 | 1.7 0.6 0.8 0.5 1.0 5 | 1.7 0.5 1.7 0.5 3.0 6 | 1.9 1.5 3.5 2.5 6.5 -------------------------------------------------------------------------------- /src/config/anchors/image_anchors.txt: -------------------------------------------------------------------------------- 1 | 37.234535 39.074062 2 | 55.515236 94.413540 3 | 164.34821 79.551384 4 | 91.609245 167.24788 5 | 153.37099 252.82355 6 | 206.96756 368.34634 -------------------------------------------------------------------------------- /src/config/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | from easydict import EasyDict as edict 4 | 5 | 6 | __C = edict() 7 | cfg = __C 8 | 9 | 10 | __C.CONTFUSE = edict() 11 | __C.CONTFUSE.CLASSES_LIST = ['Car','Van','Truck','Pedestrian','Cyclist','Misc'] 12 | __C.CONTFUSE.CLASSES_COLOR = [(255,0,0),(255,255,0),(255,0,255),(0,255,0),(128,64,255),(0,255,255)] 13 | __C.CONTFUSE.CLASSES_NUM = len(__C.CONTFUSE.CLASSES_LIST) 14 | __C.CONTFUSE.EPSILON = 0.00001 15 | __C.CONTFUSE.MAX_PTS_NUM = 200000 16 | __C.CONTFUSE.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '../..')) 17 | __C.CONTFUSE.LOG_DIR = osp.join(__C.CONTFUSE.ROOT_DIR, 'logs') 18 | __C.CONTFUSE.DATASETS_DIR = "/home/ljh/dataset/detection_3d/kitti_compitetion" 19 | __C.CONTFUSE.TRAIN_DATA = osp.join(__C.CONTFUSE.DATASETS_DIR, "training.txt") 20 | __C.CONTFUSE.VAL_DATA = osp.join(__C.CONTFUSE.DATASETS_DIR, "val.txt") 21 | __C.CONTFUSE.TEST_DATA = osp.join(__C.CONTFUSE.DATASETS_DIR, "testing.txt") 22 | __C.CONTFUSE.PREPROCESS_LIB = osp.join(__C.CONTFUSE.ROOT_DIR, 'src/utils/liblidar_preprocessor.a') 23 | __C.CONTFUSE.MOVING_AVE_DECAY = 0.9995 24 | __C.CONTFUSE.IS_USE_THREAD = False 25 | 26 | 27 | __C.BEV = edict() 28 | __C.BEV.ANCHORS = __C.CONTFUSE.ROOT_DIR + "/src/config/anchors/bev_anchors.txt" 29 | # __C.BEV.LOSS_SCALE = np.array([1.00, 6.47, 6.37, 72.97, 107.18, 35.13]) 30 | __C.BEV.LOSS_SCALE = np.array([1.00, 5.00, 5.00, 20.0, 20.0, 10.0]) 31 | __C.BEV.X_MAX = 80 32 | __C.BEV.X_MIN = 0 33 | __C.BEV.Y_MAX = 40 34 | __C.BEV.Y_MIN = -40 35 | __C.BEV.Z_MAX = 1 36 | __C.BEV.Z_MIN = -2.5 37 | __C.BEV.X_RESOLUTION = 0.125 38 | __C.BEV.Y_RESOLUTION = 0.125 39 | __C.BEV.Z_RESOLUTION = 0.5 40 | __C.BEV.Z_STATISTIC_DIM = 6 41 | __C.BEV.STRIDE = 4 42 | __C.BEV.BBOX_DIM = 6 43 | __C.BEV.PROB_DECAY = 0.98 44 | __C.BEV.IS_LIDAR_AUG = False 45 | __C.BEV.INPUT_X = int((__C.BEV.X_MAX - __C.BEV.X_MIN) / __C.BEV.X_RESOLUTION) 46 | __C.BEV.INPUT_Y = int((__C.BEV.Y_MAX - __C.BEV.Y_MIN) / __C.BEV.Y_RESOLUTION) 47 | __C.BEV.LAYERED_DIM = int((__C.BEV.Z_MAX - __C.BEV.Z_MIN)/ __C.BEV.Z_RESOLUTION) 48 | __C.BEV.INPUT_Z = __C.BEV.LAYERED_DIM + __C.BEV.Z_STATISTIC_DIM 49 | __C.BEV.LABEL_Z = int(1 + 1 + 1 + __C.BEV.BBOX_DIM * __C.CONTFUSE.CLASSES_NUM) 50 | __C.BEV.OUTPUT_X = int(__C.BEV.INPUT_X / __C.BEV.STRIDE) 51 | __C.BEV.OUTPUT_Y = int(__C.BEV.INPUT_Y / __C.BEV.STRIDE) 52 | __C.BEV.DISTANCE_THRESHOLDS = [1.5, 3.0, 3.0, 1.0, 1.0, 1.5] 53 | 54 | 55 | __C.IMAGE = edict() 56 | __C.IMAGE.ANCHORS = __C.CONTFUSE.ROOT_DIR + "/src/config/anchors/image_anchors.txt" 57 | # __C.IMAGE.LOSS_SCALE = np.array([1.00, 9.87, 26.22, 6.41, 17.81, 29.92]) 58 | __C.IMAGE.LOSS_SCALE = np.array([1.00, 5.00, 10.0, 5.00, 10.0, 10.0]) 59 | __C.IMAGE.INPUT_H = 192 60 | __C.IMAGE.INPUT_W = 640 61 | __C.IMAGE.H_SCALE_RATIO = __C.IMAGE.INPUT_H / 375 62 | __C.IMAGE.W_SCALE_RATIO = __C.IMAGE.INPUT_W / 1242 63 | __C.IMAGE.BBOX_DIM = 4 64 | __C.IMAGE.STRIDE = 8 65 | __C.IMAGE.IS_IMG_AUG = False 66 | __C.IMAGE.OUTPUT_H = int(__C.IMAGE.INPUT_H/ __C.IMAGE.STRIDE) 67 | __C.IMAGE.OUTPUT_W = int(__C.IMAGE.INPUT_W / __C.IMAGE.STRIDE) 68 | __C.IMAGE.ANCHORS_NUM = 6 69 | __C.IMAGE.LABEL_Z = int(1 + (__C.IMAGE.BBOX_DIM + 1 + 1) * __C.IMAGE.ANCHORS_NUM) 70 | __C.IMAGE.IOU_THRESHOLD = 0.5 71 | 72 | __C.TRAIN = edict() 73 | 74 | __C.TRAIN.PRETRAIN_WEIGHT = "../checkpoint/contfuse_val_loss=1266.4186.ckpt-10" 75 | __C.TRAIN.SAVING_STEPS = 6000 76 | __C.TRAIN.BATCH_SIZE = 1 77 | __C.TRAIN.FRIST_STAGE_EPOCHS = 1 78 | __C.TRAIN.SECOND_STAGE_EPOCHS = 15 79 | __C.TRAIN.WARMUP_EPOCHS = 0 80 | __C.TRAIN.LEARN_RATE_INIT = 1e-3 81 | __C.TRAIN.LEARN_RATE_END = 1e-5 82 | __C.TRAIN.IS_DATA_AUG = True 83 | 84 | 85 | __C.EVAL = edict() 86 | __C.EVAL.BATCH_SIZE = 1 87 | __C.EVAL.WEIGHT = "../checkpoint/contfuse_val_loss=1266.4186.ckpt-10" 88 | __C.EVAL.OUTPUT_GT_PATH = osp.join(__C.CONTFUSE.LOG_DIR, "gt") 89 | __C.EVAL.OUTPUT_PRED_PATH = osp.join(__C.CONTFUSE.LOG_DIR, "pred") 90 | 91 | 92 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import ctypes 5 | import threading 6 | import numpy as np 7 | from config.config import cfg 8 | from utils import utils 9 | from utils import vis_tools 10 | from utils import timer 11 | from utils import transform 12 | from data import loader 13 | from data import labels 14 | from data import preprocess 15 | 16 | 17 | class Dataset(object): 18 | def __init__(self, preprocessor, dataset_type): 19 | if dataset_type == 'train': 20 | self.anno_path = cfg.CONTFUSE.TRAIN_DATA 21 | self.batch_size = cfg.TRAIN.BATCH_SIZE 22 | self.is_data_aug = cfg.TRAIN.IS_DATA_AUG 23 | if dataset_type == 'val': 24 | self.anno_path = cfg.CONTFUSE.VAL_DATA 25 | self.batch_size = cfg.EVAL.BATCH_SIZE 26 | self.is_data_aug = False 27 | if dataset_type == 'test': 28 | self.anno_path = cfg.CONTFUSE.TEST_DATA 29 | self.batch_size = cfg.EVAL.BATCH_SIZE 30 | self.is_data_aug = False 31 | 32 | 33 | self.img_anchors = loader.load_anchors(cfg.IMAGE.ANCHORS) 34 | self.bev_anchors = loader.load_anchors(cfg.BEV.ANCHORS) 35 | self.annotations = loader.load_annotations(self.anno_path) 36 | self.num_samples = len(self.annotations) 37 | self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) 38 | self.batch_count = 0 39 | self.is_use_thread = cfg.CONTFUSE.IS_USE_THREAD 40 | 41 | self.cuda_preprocessor = preprocessor.preprocessor 42 | 43 | 44 | self.loader_need_exit = 0 45 | self.timer = timer.Timer() 46 | 47 | if self.is_use_thread: 48 | self.prepr_data = [] 49 | self.max_cache_size = 10 50 | self.lodaer_processing = threading.Thread(target=self.loader) 51 | self.lodaer_processing.start() 52 | 53 | def __exit__(self, exc_type, exc_val, exc_tb): 54 | self.loader_need_exit = True 55 | print('set loader_need_exit True') 56 | self.lodaer_processing.join() 57 | print('exit lodaer_processing') 58 | 59 | def __len__(self): 60 | return len(self.annotations) 61 | 62 | def __iter__(self): 63 | return self 64 | 65 | def __next__(self): 66 | if self.batch_count < self.num_batchs: 67 | self.batch_count += 1 68 | return self.load() 69 | else: 70 | self.batch_count = 0 71 | np.random.shuffle(self.annotations) 72 | raise StopIteration 73 | 74 | 75 | 76 | def preprocess_data(self): 77 | batch_bev = np.zeros((self.batch_size, cfg.BEV.INPUT_X, cfg.BEV.INPUT_Y, cfg.BEV.INPUT_Z), dtype=np.float32) 78 | batch_img = np.zeros((self.batch_size, cfg.IMAGE.INPUT_H, cfg.IMAGE.INPUT_W, 3), dtype=np.float32) 79 | batch_mapping1x = np.zeros((self.batch_size, cfg.BEV.INPUT_X, cfg.BEV.INPUT_Y, 2), dtype=np.int32) 80 | batch_mapping2x = np.zeros((self.batch_size, int(cfg.BEV.INPUT_X/2), int(cfg.BEV.INPUT_Y/2), 2), dtype=np.int32) 81 | batch_mapping4x = np.zeros((self.batch_size, int(cfg.BEV.INPUT_X/4), int(cfg.BEV.INPUT_Y/4), 2), dtype=np.int32) 82 | batch_mapping8x = np.zeros((self.batch_size, int(cfg.BEV.INPUT_X/8), int(cfg.BEV.INPUT_Y/8), 2), dtype=np.int32) 83 | batch_bev_label = np.zeros((self.batch_size, cfg.BEV.OUTPUT_X, cfg.BEV.OUTPUT_Y, cfg.BEV.LABEL_Z), dtype=np.float32) 84 | batch_img_label = np.zeros((self.batch_size, cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, cfg.IMAGE.LABEL_Z), dtype=np.float32) 85 | batch_tr = np.zeros((self.batch_size, 3, 4), dtype=np.float32) 86 | 87 | batch_frame_id = [] 88 | num = 0 89 | while num < self.batch_size: 90 | index = self.batch_count * self.batch_size + num 91 | if index == self.num_samples: index=0 92 | annotation = self.annotations[index] 93 | if not annotation: continue 94 | lidar_file , image_file, label_file, calib_file = annotation.split() 95 | frame_id = lidar_file[-10:-4] 96 | p20, r0, tr_lidar2cam = loader.load_calib(calib_file) 97 | img = loader.load_image(image_file) 98 | point_cloud = loader.load_lidar(lidar_file) 99 | bev, mapping1x, mapping2x, mapping4x, mapping8x = preprocess.lidar_preprocess(point_cloud, 100 | p20, r0, tr_lidar2cam, self.cuda_preprocessor) 101 | types, dimensions, box2d_corners, locations, rzs = loader.load_label(label_file) 102 | bev_label = labels.create_bev_label(locations, dimensions, rzs, types, tr_lidar2cam, self.bev_anchors) 103 | img_label = labels.create_img_label(types, box2d_corners, self.img_anchors) 104 | # vis_tools.imshow_image(img) 105 | batch_bev[num, ...] = bev 106 | batch_img[num, ...] = img / 255.0 107 | batch_mapping1x[num, ...] = mapping1x 108 | batch_mapping2x[num, ...] = mapping2x 109 | batch_mapping4x[num, ...] = mapping4x 110 | batch_mapping8x[num, ...] = mapping8x 111 | batch_bev_label[num, ...] = bev_label 112 | batch_img_label[num, ...] = img_label 113 | batch_tr[num, ...] = tr_lidar2cam 114 | batch_frame_id.append(frame_id) 115 | num += 1 116 | return (batch_bev, batch_img, batch_mapping1x, batch_mapping2x, batch_mapping4x, 117 | batch_mapping8x, batch_bev_label, batch_img_label, batch_tr, batch_frame_id) 118 | 119 | 120 | def loader(self): 121 | while(not self.loader_need_exit): 122 | if len(self.prepr_data) < self.max_cache_size: 123 | self.prepr_data = self.preprocess_data() + self.prepr_data 124 | else: 125 | time.sleep(0.1) 126 | self.loader_need_exit = False 127 | 128 | def load(self): 129 | if self.is_use_thread: 130 | while len(self.prepr_data) == 0: 131 | time.sleep(0.1) 132 | data_ori = self.prepr_data.pop() 133 | else: 134 | data_ori = self.preprocess_data() 135 | return data_ori 136 | 137 | 138 | 139 | if __name__ == "__main__": 140 | pass -------------------------------------------------------------------------------- /src/data/labels.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from utils import transform 4 | from config.config import cfg 5 | 6 | 7 | 8 | def create_img_label(types, box2d_corners, anchors): 9 | def iou_wh(r1, r2): 10 | min_w = min(r1[0],r2[0]) 11 | min_h = min(r1[1],r2[1]) 12 | area_r1 = r1[0]*r1[1] 13 | area_r2 = r2[0]*r2[1] 14 | intersect = min_w * min_h 15 | union = area_r1 + area_r2 - intersect 16 | return intersect/union 17 | 18 | def get_active_anchors(roi, anchors): 19 | indxs = [] 20 | iou_max, index_max = 0, 0 21 | for i,a in enumerate(anchors): 22 | iou = iou_wh(roi, a) 23 | if iou>0.5: 24 | indxs.append(i) 25 | if iou > iou_max: 26 | iou_max, index_max = iou, i 27 | if len(indxs) == 0: 28 | indxs.append(index_max) 29 | return indxs 30 | 31 | obj_num = len(types) 32 | s = 1 + 1 + cfg.IMAGE.BBOX_DIM 33 | label = np.zeros((cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, cfg.IMAGE.LABEL_Z), dtype=np.float32) 34 | for i in range(obj_num): 35 | h = (box2d_corners[i][3]-box2d_corners[i][1]) * cfg.IMAGE.H_SCALE_RATIO 36 | w = (box2d_corners[i][2]-box2d_corners[i][0]) * cfg.IMAGE.W_SCALE_RATIO 37 | center_h = (box2d_corners[i][3]+box2d_corners[i][1])/2 * cfg.IMAGE.H_SCALE_RATIO 38 | center_w = (box2d_corners[i][2]+box2d_corners[i][0])/2 * cfg.IMAGE.W_SCALE_RATIO 39 | grid_h = int(center_h / cfg.IMAGE.STRIDE) 40 | grid_w = int(center_w / cfg.IMAGE.STRIDE) 41 | grid_h_offset = center_h / cfg.IMAGE.STRIDE - grid_h 42 | grid_w_offset = center_w / cfg.IMAGE.STRIDE - grid_w 43 | active_idxs = get_active_anchors([h, w], anchors) 44 | label[grid_h, grid_w, 0] = 1 45 | for idx in active_idxs: 46 | dh = h / anchors[idx][0] 47 | dw = w / anchors[idx][1] 48 | label[grid_h, grid_w, s*idx+1:s*(idx+1)+1] = np.array([1, types[i], grid_h_offset, grid_w_offset, dh, dw]) 49 | return label 50 | 51 | 52 | def create_bev_label(locations, dimensions, rys, types, tr, anchors): 53 | obj_num = len(types) 54 | objectness_class_map = np.zeros((cfg.BEV.OUTPUT_X, cfg.BEV.OUTPUT_Y, 3), dtype=np.float32) 55 | bev_center_map = np.zeros((cfg.BEV.OUTPUT_X, cfg.BEV.OUTPUT_Y, 3), dtype=np.float32) 56 | rzs = transform.ry_to_rz(rys) 57 | for i in range(obj_num): 58 | location = transform.location_cam2lidar(locations[i], tr) 59 | bev_location = transform.location_lidar2bevlabel(location) 60 | xx, yy, _ = bev_location 61 | hwl = np.array(dimensions[i]) / (cfg.BEV.X_RESOLUTION * 4) 62 | box = transform.bevbox_compose(xx, yy, hwl[1], hwl[2], rzs[i]) 63 | cv2.fillConvexPoly(objectness_class_map, box, [i+1, types[i], 0.0]) 64 | cv2.fillConvexPoly(bev_center_map, box, [float(xx), float(yy), 0.0]) 65 | bev_label = np.zeros([cfg.BEV.OUTPUT_X, cfg.BEV.OUTPUT_Y, cfg.BEV.LABEL_Z], np.float32) 66 | for i in range(cfg.BEV.OUTPUT_X): 67 | for j in range(cfg.BEV.OUTPUT_Y): 68 | if objectness_class_map[i][j][0] < 0.1: continue 69 | type_id = int(objectness_class_map[i][j][1]) 70 | idx = int(objectness_class_map[i][j][0]-1) 71 | rz = rzs[idx] 72 | dim = dimensions[idx] 73 | theta = rz if rz < 0 else rz + 3.14 74 | center_x, center_y, _ = bev_center_map[i][j] 75 | delta_x = (i-center_x) / anchors[type_id][3] 76 | delta_y = (j-center_y) / anchors[type_id][4] 77 | offset_xy = np.sqrt(pow((center_x-i), 2) + pow((center_y-j), 2)) 78 | prob = pow(cfg.BEV.PROB_DECAY, offset_xy) 79 | h, w, l= dim / anchors[type_id][:3] 80 | box = np.array([delta_x, delta_y, h, w, l, theta], np.float32) 81 | bev_label[i][j][:3] = np.array([1, type_id, prob]) 82 | bev_label[i][j][3+type_id*cfg.BEV.BBOX_DIM:3+(type_id+1)*cfg.BEV.BBOX_DIM] = box 83 | return bev_label 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /src/data/lidar_preprocess/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(cuda_preprocessor) 3 | # set(CMAKE_CXX_STANDARD 11) 4 | set(BUILD_SHARED_LIBS ON) 5 | # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fPIC -std=c++11") 6 | 7 | 8 | find_package(Boost 1.71 REQUIRED) 9 | # FIND_PACKAGE(Boost 1.71 COMPONENTS python) 10 | if(Boost_FOUND) 11 | message(" * BOOST ${BOOST_VERSION} was found") 12 | endif() 13 | 14 | 15 | find_package(CUDA) 16 | if (CUDA_FOUND) 17 | message(" * CUDA ${CUDA_VERSION} was found") 18 | 19 | include(FindCUDA) 20 | include_directories(${CUDA_INCLUDE_DIRS} 21 | ) 22 | 23 | set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "") 24 | 25 | if (NOT DEFINED CUDA_CAPABILITY_VERSION_CHECKER) 26 | set(CUDA_CAPABILITY_VERSION_CHECKER 27 | "${CATKIN_DEVEL_PREFIX}/lib/capability_version_checker") 28 | endif () 29 | 30 | execute_process(COMMAND ${CUDA_CAPABILITY_VERSION_CHECKER} 31 | OUTPUT_VARIABLE CUDA_CAPABILITY_VERSION 32 | OUTPUT_STRIP_TRAILING_WHITESPACE) 33 | 34 | if ("${CUDA_CAPABILITY_VERSION}" MATCHES "^[1-9][0-9]+$") 35 | set(CUDA_ARCH "sm_${CUDA_CAPABILITY_VERSION}") 36 | else () 37 | set(CUDA_ARCH "sm_52") 38 | endif () 39 | 40 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=${CUDA_ARCH};-std=c++11;--ptxas-options=-v) 41 | 42 | set(SUBSYS_DESC "Point cloud ndt gpu library") 43 | 44 | else () 45 | message("CUDA was not found.") 46 | endif () 47 | 48 | file(GLOB SOURCES "*.cc" "*.cu") 49 | 50 | include_directories( 51 | ${CUDA_INCLUDE_DIRS} 52 | ${Boost_INCLUDE_DIRS} 53 | ./ 54 | ) 55 | 56 | 57 | link_libraries(cuda_preprocessor 58 | boost_python-py35 59 | boost_numpy35 60 | ${Boost_LIBRARIES}) 61 | CUDA_ADD_LIBRARY(cuda_preprocessor ${SOURCES}) 62 | -------------------------------------------------------------------------------- /src/data/lidar_preprocess/cuda_create_maps.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_create_maps.h" 2 | 3 | __device__ static float atomicMax(float* address, float val) 4 | { 5 | int* address_as_i = (int*) address; 6 | int old = *address_as_i, assumed; 7 | do { 8 | assumed = old; 9 | old = ::atomicCAS(address_as_i, assumed, 10 | __float_as_int(::fmaxf(val, __int_as_float(assumed)))); 11 | } while (assumed != old); 12 | return __int_as_float(old); 13 | } 14 | 15 | __global__ void CreatePreBevMapOnGPU(float *bev_d, const float *pts_d, 16 | const int pts_num, PreprocessParams params) { 17 | 18 | int col = blockIdx.x * blockDim.x + threadIdx.x; 19 | if(colparams.bev_x_min && pt_xparams.bev_y_min && 25 | pt_yparams.bev_z_min && pt_z bev_d[bev_idx_5]) { 61 | old = atomicExch(bev_d+bev_idx_3, pt_i); 62 | old = atomicExch(bev_d+bev_idx_5, point_height); 63 | } 64 | __syncthreads(); 65 | } 66 | } 67 | } 68 | 69 | __global__ void CreatePreFusionIdxMapOnGPU(float *mapping1x_d, float *mapping2x_d, 70 | float *mapping4x_d, float *mapping8x_d, 71 | const float *pts_d, const int pts_num, 72 | PreprocessParams params) { 73 | int col = blockIdx.x * blockDim.x + threadIdx.x; 74 | 75 | if(colparams.bev_x_min && pt_xparams.bev_y_min && 81 | pt_yparams.bev_z_min && pt_z mapping1x_d[bev_idx1+2]) { 89 | float old = atomicExch(mapping1x_d+bev_idx1+0, pt_x); 90 | old = atomicExch(mapping1x_d+bev_idx1+1, pt_y); 91 | old = atomicExch(mapping1x_d+bev_idx1+2, point_height); 92 | } 93 | // mapping2x_d 94 | int x2 = (pt_x-params.bev_x_min) / (params.bev_x_resolution*2) + 1; 95 | int y2 = (pt_y-params.bev_y_min) / (params.bev_y_resolution*2) + 1; 96 | int bev_idx2 = params.bev_input_y * params.premapping_z_d * (params.bev_input_x/2 - x2) / 2 97 | + params.premapping_z_d * (params.bev_input_y/2 - y2); 98 | 99 | if(point_height > mapping2x_d[bev_idx2+2]) { 100 | float old = atomicExch(mapping2x_d+bev_idx2+0, pt_x); 101 | old = atomicExch(mapping2x_d+bev_idx2+1, pt_y); 102 | old = atomicExch(mapping2x_d+bev_idx2+2, point_height); 103 | } 104 | // mapping4x_d 105 | int x4 = (pt_x-params.bev_x_min) / (params.bev_x_resolution*4) + 1; 106 | int y4 = (pt_y-params.bev_y_min) / (params.bev_y_resolution*4) + 1; 107 | int bev_idx4 = params.bev_input_y * params.premapping_z_d * (params.bev_input_x/4 - x4) / 4 108 | + params.premapping_z_d * (params.bev_input_y/4 - y4); 109 | if(point_height > mapping4x_d[bev_idx4+2]) { 110 | float old = atomicExch(mapping4x_d+bev_idx4+0, pt_x); 111 | old = atomicExch(mapping4x_d+bev_idx4+1, pt_y); 112 | old = atomicExch(mapping4x_d+bev_idx4+2, point_height); 113 | } 114 | // mapping8x_d 115 | int x8 = (pt_x-params.bev_x_min) / (params.bev_x_resolution*8) + 1; 116 | int y8 = (pt_y-params.bev_y_min) / (params.bev_y_resolution*8) + 1; 117 | int bev_idx8 = params.bev_input_y * params.premapping_z_d * (params.bev_input_x/8 - x8) / 8 118 | + params.premapping_z_d * (params.bev_input_y/8 - y8); 119 | if(point_height > mapping8x_d[bev_idx8+2]) { 120 | float old = atomicExch(mapping8x_d+bev_idx8+0, pt_x); 121 | old = atomicExch(mapping8x_d+bev_idx8+1, pt_y); 122 | old = atomicExch(mapping8x_d+bev_idx8+2, point_height); 123 | } 124 | 125 | } 126 | __syncthreads(); 127 | } 128 | } 129 | 130 | __global__ void CreateBevMapOnGPU(float *bev_flip_d, PreprocessParams params) { 131 | int density_map_idx = params.bev_layered_dim + 0; 132 | int var_map_idx = params.bev_layered_dim + 2; 133 | int row = blockIdx.y * blockDim.y + threadIdx.y; 134 | int col = blockIdx.x * blockDim.x + threadIdx.x; 135 | if(row < params.bev_input_y && col < params.bev_input_x) { 136 | int idx = row * params.bev_input_y * params.bev_input_z + col * params.bev_input_z; 137 | if(bev_flip_d[idx+var_map_idx]>2) { 138 | bev_flip_d[idx+var_map_idx] = 2; 139 | } 140 | __syncthreads(); 141 | float density = log(bev_flip_d[idx+density_map_idx]+1) / log(32.0); 142 | if(density < 1) { 143 | bev_flip_d[idx+density_map_idx] = density; 144 | } else { 145 | bev_flip_d[idx+density_map_idx] = 1; 146 | } 147 | __syncthreads(); 148 | } 149 | } 150 | 151 | __global__ void CreateFusionIdxMapOnGPU(float *pre_mapping, float *mapping, const float *tr, 152 | const int downsample_ratio, PreprocessParams params) { 153 | int row = blockIdx.y * blockDim.y + threadIdx.y; 154 | int col = blockIdx.x * blockDim.x + threadIdx.x; 155 | int x_size = params.bev_input_x / downsample_ratio; 156 | int y_size = params.bev_input_y / downsample_ratio; 157 | int premapping_z_d = params.premapping_z_d; 158 | int mapping_z_h = params.mapping_z_h; 159 | int tr_size = params.tr_size; 160 | int idx0 = row * y_size * premapping_z_d + col * premapping_z_d; 161 | int idx1 = row * y_size * mapping_z_h + col * mapping_z_h; 162 | if(row < y_size && col < x_size) { 163 | float sum[4] = {0.0f,0.0f,0.0f,0.0f}; 164 | for(int i=0; i0.1f){ 166 | sum[i] = pre_mapping[idx0+0] * tr[i*tr_size+0] + pre_mapping[idx0+1] * tr[i*tr_size+1]; 167 | sum[i] += (pre_mapping[idx0+2]+params.bev_z_min) * tr[i*tr_size+2] + 1 * tr[i*tr_size+3]; 168 | } 169 | } 170 | __syncthreads(); 171 | float x = sum[0] * params.img_w_scale / (sum[2]*downsample_ratio+params.epsilon); 172 | float y = sum[1] * params.img_h_scale / (sum[2]*downsample_ratio+params.epsilon); 173 | if(y>0 && y< params.resized_img_h / downsample_ratio && x > 0 174 | && x < params.resized_img_w / downsample_ratio) { 175 | mapping[idx1+0] = x; 176 | mapping[idx1+1] = y; 177 | } 178 | } 179 | } -------------------------------------------------------------------------------- /src/data/lidar_preprocess/cuda_create_maps.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDACREATEMAPS_H 2 | #define _CUDACREATEMAPS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "data_types.h" 9 | 10 | 11 | 12 | __device__ static float atomicMax(float* address, float val); 13 | 14 | __global__ void CreatePreBevMapOnGPU(float *bev_d, const float *pts_d, 15 | const int pts_num, PreprocessParams params); 16 | 17 | __global__ void CreatePreFusionIdxMapOnGPU(float *mapping1x_d, float *mapping2x_d, 18 | float *mapping4x_d, float *mapping8x_d, 19 | const float *pts_d, const int pts_num, 20 | PreprocessParams params); 21 | 22 | __global__ void CreateBevMapOnGPU(float *bev_flip_d, PreprocessParams params); 23 | 24 | __global__ void CreateFusionIdxMapOnGPU(float *pre_mapping, float *mapping, const float *tr, 25 | const int downsample_ratio, PreprocessParams params); 26 | 27 | #endif -------------------------------------------------------------------------------- /src/data/lidar_preprocess/data_types.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _DATATYPES_H 3 | #define _DATATYPES_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | inline void gassert(cudaError_t err_code, const char *file, int line) 10 | { 11 | if (err_code != cudaSuccess) { 12 | fprintf(stderr, "Error: %s %s %d\n", cudaGetErrorString(err_code), file, line); 13 | cudaDeviceReset(); 14 | exit(EXIT_FAILURE); 15 | } 16 | } 17 | #define CheckCudaErrors(err_code) gassert(err_code, __FILE__, __LINE__); 18 | 19 | struct PreprocessParams { 20 | 21 | PreprocessParams() = default; 22 | PreprocessParams(float x_min, float x_max, float y_min, float y_max, float z_min, 23 | float z_max, float x_resolution, float y_resolution, float z_resolution, 24 | size_t sat_z, size_t h, size_t w, float h_scale, float w_scale) : 25 | bev_x_min(x_min), bev_x_max(x_max), 26 | bev_y_min(y_min), bev_y_max(y_max), 27 | bev_z_min(z_min), bev_z_max(z_max), 28 | bev_x_resolution(x_resolution), bev_y_resolution(y_resolution), 29 | bev_z_resolution(z_resolution), bev_sat_z(sat_z), 30 | resized_img_h(h), resized_img_w(w), img_h_scale(h_scale), img_w_scale(w_scale) { 31 | bev_input_x = ceil((bev_x_max - bev_x_min) / bev_x_resolution - epsilon); 32 | bev_input_y = ceil((bev_y_max - bev_y_min) / bev_y_resolution - epsilon); 33 | bev_layered_dim = ceil((bev_z_max - bev_z_min) / bev_z_resolution - epsilon); 34 | bev_input_z = bev_sat_z + bev_layered_dim; 35 | } 36 | 37 | float epsilon = 0.0001; 38 | size_t max_pts_num = 200000; 39 | size_t premapping_z_d = 3; 40 | size_t mapping_z_h = 2; 41 | size_t pt_size = 4; 42 | size_t tr_size = 4; 43 | float bev_x_min; 44 | float bev_x_max; 45 | float bev_y_min; 46 | float bev_y_max; 47 | float bev_z_min; 48 | float bev_z_max; 49 | float bev_x_resolution; 50 | float bev_y_resolution; 51 | float bev_z_resolution; 52 | size_t bev_sat_z; 53 | size_t resized_img_w; 54 | size_t resized_img_h; 55 | float img_w_scale; 56 | float img_h_scale; 57 | size_t bev_input_x; 58 | size_t bev_input_y; 59 | size_t bev_input_z; 60 | size_t bev_layered_dim; 61 | 62 | }; 63 | 64 | struct MemorySize { 65 | MemorySize() = default; 66 | MemorySize(const PreprocessParams &p) { 67 | 68 | max_pts_bts = sizeof(float) * p.max_pts_num * p.pt_size; 69 | bev_bts = sizeof(float) * p.bev_input_x * p.bev_input_y * p.bev_input_z; 70 | tr_bts = sizeof(float) * p.tr_size * p.tr_size; 71 | premapping1x_d_bts = sizeof(float) * p.bev_input_x * p.bev_input_y * p.premapping_z_d; 72 | premapping2x_d_bts = premapping1x_d_bts / 4; 73 | premapping4x_d_bts = premapping1x_d_bts / 16; 74 | premapping8x_d_bts = premapping1x_d_bts / 64; 75 | mapping1x_bts = sizeof(float) * p.bev_input_x * p.bev_input_y * p.mapping_z_h; 76 | mapping2x_bts = mapping1x_bts / 4; 77 | mapping4x_bts = mapping1x_bts / 16; 78 | mapping8x_bts = mapping1x_bts / 64; 79 | } 80 | size_t max_pts_bts ; 81 | size_t bev_bts; 82 | size_t tr_bts; 83 | size_t premapping1x_d_bts; 84 | size_t premapping2x_d_bts; 85 | size_t premapping4x_d_bts; 86 | size_t premapping8x_d_bts; 87 | size_t mapping1x_bts; 88 | size_t mapping2x_bts; 89 | size_t mapping4x_bts; 90 | size_t mapping8x_bts; 91 | }; 92 | 93 | #endif -------------------------------------------------------------------------------- /src/data/lidar_preprocess/debug_utils.cu: -------------------------------------------------------------------------------- 1 | #include "debug_utils.h" 2 | 3 | void PrintArray(const float* array, int size) { 4 | printf("============ %d ===========", size); 5 | for(int i=0; i 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "data_types.h" 10 | 11 | 12 | void PrintArray(const float* array, int size); 13 | 14 | void ParamsPrint(const PreprocessParams ¶ms, const MemorySize &mz); 15 | 16 | #endif //_DEBUGUTILS_H -------------------------------------------------------------------------------- /src/data/lidar_preprocess/preprocessor.cu: -------------------------------------------------------------------------------- 1 | #include "preprocessor.h" 2 | 3 | 4 | 5 | double cpuSecond() { 6 | struct timeval tp; 7 | gettimeofday(&tp, NULL); 8 | return ((double)tp.tv_sec + (double)tp.tv_usec*1.e-6); 9 | } 10 | 11 | size_t Preprocessor::GetPtsMemorySize(size_t pts_size, size_t pts_num) { 12 | assert(pts_size > 0 && pts_num > 0); 13 | size_t pts_bts = sizeof(float) * pts_num * pts_size; 14 | return pts_bts; 15 | } 16 | 17 | void Preprocessor::FreeMemory() { 18 | CheckCudaErrors(cudaFree(pts_d_)); 19 | CheckCudaErrors(cudaFree(bev_d_)); 20 | CheckCudaErrors(cudaFree(tr_d_)); 21 | CheckCudaErrors(cudaFree(premapping1x_d_)); 22 | CheckCudaErrors(cudaFree(premapping2x_d_)); 23 | CheckCudaErrors(cudaFree(premapping4x_d_)); 24 | CheckCudaErrors(cudaFree(premapping8x_d_)); 25 | CheckCudaErrors(cudaFree(mapping1x_d_)); 26 | CheckCudaErrors(cudaFree(mapping2x_d_)); 27 | CheckCudaErrors(cudaFree(mapping4x_d_)); 28 | CheckCudaErrors(cudaFree(mapping8x_d_)); 29 | free(bev_h_); 30 | free(mapping1x_h_); 31 | free(mapping2x_h_); 32 | free(mapping4x_h_); 33 | free(mapping8x_h_); 34 | } 35 | 36 | void Preprocessor::MemoryReset(const MemorySize &mz) { 37 | CheckCudaErrors(cudaMemset(pts_d_, 0.0f, mz.max_pts_bts)); 38 | CheckCudaErrors(cudaMemset(bev_d_, 0.0f, mz.bev_bts)); 39 | CheckCudaErrors(cudaMemset(tr_d_, 0.0f, mz.tr_bts)); 40 | CheckCudaErrors(cudaMemset(mapping1x_d_, 0.0f, mz.mapping1x_bts)); 41 | CheckCudaErrors(cudaMemset(mapping2x_d_, 0.0f, mz.mapping2x_bts)); 42 | CheckCudaErrors(cudaMemset(mapping4x_d_, 0.0f, mz.mapping4x_bts)); 43 | CheckCudaErrors(cudaMemset(mapping8x_d_, 0.0f, mz.mapping8x_bts)); 44 | CheckCudaErrors(cudaMemset(premapping1x_d_, 0.0f, mz.premapping1x_d_bts)); 45 | CheckCudaErrors(cudaMemset(premapping2x_d_, 0.0f, mz.premapping2x_d_bts)); 46 | CheckCudaErrors(cudaMemset(premapping4x_d_, 0.0f, mz.premapping4x_d_bts)); 47 | CheckCudaErrors(cudaMemset(premapping8x_d_, 0.0f, mz.premapping8x_d_bts)); 48 | memset(bev_h_, 0.0f, mz.bev_bts); 49 | memset(mapping1x_h_, 0.0f, mz.mapping1x_bts); 50 | memset(mapping2x_h_, 0.0f, mz.mapping2x_bts); 51 | memset(mapping4x_h_, 0.0f, mz.mapping4x_bts); 52 | memset(mapping8x_h_, 0.0f, mz.mapping8x_bts); 53 | } 54 | 55 | 56 | void Preprocessor::MemoryAlloc(const MemorySize &mz) { 57 | int nb_devices; 58 | CheckCudaErrors(cudaGetDeviceCount(&nb_devices)); 59 | CheckCudaErrors(cudaSetDevice(0)); 60 | CheckCudaErrors(cudaMalloc((float **) &mapping1x_d_, mz.mapping1x_bts)); 61 | CheckCudaErrors(cudaMalloc((float **) &mapping2x_d_, mz.mapping2x_bts)); 62 | CheckCudaErrors(cudaMalloc((float **) &mapping4x_d_, mz.mapping4x_bts)); 63 | CheckCudaErrors(cudaMalloc((float **) &mapping8x_d_, mz.mapping8x_bts)); 64 | CheckCudaErrors(cudaMalloc((float **) &premapping1x_d_, mz.premapping1x_d_bts)); 65 | CheckCudaErrors(cudaMalloc((float **) &premapping2x_d_, mz.premapping2x_d_bts)); 66 | CheckCudaErrors(cudaMalloc((float **) &premapping4x_d_, mz.premapping4x_d_bts)); 67 | CheckCudaErrors(cudaMalloc((float **) &premapping8x_d_, mz.premapping8x_d_bts)); 68 | CheckCudaErrors(cudaMalloc((float **) &pts_d_, mz.max_pts_bts)); 69 | CheckCudaErrors(cudaMalloc((float **) &bev_d_, mz.bev_bts)); 70 | CheckCudaErrors(cudaMalloc((float **) &tr_d_, mz.tr_bts)); 71 | bev_h_ = (float*) malloc(mz.bev_bts); 72 | mapping1x_h_ = (float*) malloc(mz.mapping1x_bts); 73 | mapping2x_h_ = (float*) malloc(mz.mapping2x_bts); 74 | mapping4x_h_ = (float*) malloc(mz.mapping4x_bts); 75 | mapping8x_h_ = (float*) malloc(mz.mapping8x_bts); 76 | } 77 | 78 | 79 | 80 | void Preprocessor::CopyDataFromHostToDevice(const bn::ndarray &pts, 81 | const bn::ndarray &tr, const MemorySize &mz, 82 | size_t pts_size, size_t pts_num) { 83 | size_t pts_bts = GetPtsMemorySize(pts_size, pts_num); 84 | CheckCudaErrors(cudaMemcpy(pts_d_, pts.get_data(), pts_bts, cudaMemcpyHostToDevice)); 85 | CheckCudaErrors(cudaMemcpy(tr_d_, tr.get_data(), mz.tr_bts, cudaMemcpyHostToDevice)); 86 | } 87 | 88 | 89 | void Preprocessor::CopyDataFromDeviceToHost(const MemorySize &mz) { 90 | CheckCudaErrors(cudaMemcpy(bev_h_, bev_d_, mz.bev_bts, cudaMemcpyDeviceToHost)); 91 | CheckCudaErrors(cudaMemcpy(mapping1x_h_, mapping1x_d_, mz.mapping1x_bts, 92 | cudaMemcpyDeviceToHost)); 93 | CheckCudaErrors(cudaMemcpy(mapping2x_h_, mapping2x_d_, mz.mapping2x_bts, 94 | cudaMemcpyDeviceToHost)); 95 | CheckCudaErrors(cudaMemcpy(mapping4x_h_, mapping4x_d_, mz.mapping4x_bts, 96 | cudaMemcpyDeviceToHost)); 97 | CheckCudaErrors(cudaMemcpy(mapping8x_h_, mapping8x_d_, mz.mapping8x_bts, 98 | cudaMemcpyDeviceToHost)); 99 | } 100 | 101 | void Preprocessor::TestArrayMemoryReset(int bts) { 102 | memset(test_array_h_, 0.0f, bts); 103 | } 104 | 105 | void Preprocessor::TestArrayMemoryAlloc(int bts) { 106 | test_array_h_ = (float *) malloc(bts); 107 | } 108 | 109 | void Preprocessor::CopyTestArrayDataFromDeviceToHost(float *array_h, float *arrray_d, int bts) { 110 | CheckCudaErrors(cudaMemcpy(array_h, arrray_d, bts, cudaMemcpyDeviceToHost)); 111 | } 112 | 113 | void Preprocessor::PreprocessorInit(float bev_x_min, float bev_x_max, 114 | float bev_y_min, float bev_y_max, 115 | float bev_z_min, float bev_z_max, 116 | float bev_x_resolution, float bev_y_resolution, 117 | float bev_z_resolution, size_t bev_sat_z, 118 | size_t h, size_t w, 119 | float h_scale, float w_scale) { 120 | Py_Initialize(); 121 | bn::initialize(); 122 | params_ = PreprocessParams(bev_x_min, bev_x_max, bev_y_min, bev_y_max, 123 | bev_z_min, bev_z_max, bev_x_resolution, bev_y_resolution, 124 | bev_z_resolution, bev_sat_z, h, w, h_scale, w_scale); 125 | memory_size_ = MemorySize(params_); 126 | ParamsPrint(params_, memory_size_); 127 | MemoryAlloc(memory_size_); 128 | // TestArrayMemoryAlloc(memory_size_.premapping1x_d_bts); 129 | } 130 | 131 | void Preprocessor::PreprocessData(const bn::ndarray &lidar, const bn::ndarray &tr, size_t pts_num) { 132 | // timer.start(); 133 | MemoryReset(memory_size_); 134 | // TestArrayMemoryReset(memory_size_.premapping1x_d_bts); 135 | CopyDataFromHostToDevice(lidar, tr, memory_size_, params_.pt_size, pts_num); 136 | // printf("CopyDataFromHostToDevice: %4f.\n", timer.stop()); 137 | dim3 grid_0(256, 1, 1); 138 | dim3 block_0(768, 1, 1); 139 | CreatePreBevMapOnGPU<<>>(bev_d_, pts_d_, pts_num, params_); 140 | CheckCudaErrors(cudaDeviceSynchronize()); 141 | CheckCudaErrors(cudaGetLastError()); 142 | // printf("CreatePreBevMapOnGPU: %4f.\n", timer.stop()); 143 | CreatePreFusionIdxMapOnGPU<<>>(premapping1x_d_, premapping2x_d_, 144 | premapping4x_d_, premapping8x_d_, 145 | pts_d_, pts_num, 146 | params_); 147 | CheckCudaErrors(cudaDeviceSynchronize()); 148 | CheckCudaErrors(cudaGetLastError()); 149 | // printf("CreatePreFusionIdxMapOnGPU: %4f.\n", timer.stop()); 150 | dim3 grid_1(32, 32); 151 | dim3 block_1(32, 32); 152 | CreateBevMapOnGPU<<>>(bev_d_, params_); 153 | CheckCudaErrors(cudaDeviceSynchronize()); 154 | CheckCudaErrors(cudaGetLastError()); 155 | // printf("CreateBevMapOnGPU: %4f.\n", timer.stop()); 156 | dim3 grid_2(16,16); 157 | dim3 grid_3(8,8); 158 | dim3 grid_4(4,4); 159 | CreateFusionIdxMapOnGPU<<>>(premapping1x_d_, mapping1x_d_, tr_d_, 1, params_); 160 | CreateFusionIdxMapOnGPU<<>>(premapping2x_d_, mapping2x_d_, tr_d_, 2, params_); 161 | CreateFusionIdxMapOnGPU<<>>(premapping4x_d_, mapping4x_d_, tr_d_, 4, params_); 162 | CreateFusionIdxMapOnGPU<<>>(premapping8x_d_, mapping8x_d_, tr_d_, 8, params_); 163 | CheckCudaErrors(cudaDeviceSynchronize()); 164 | CheckCudaErrors(cudaGetLastError()); 165 | // printf("CreateFusionIdxMapOnGPU: %4f.\n", timer.stop()); 166 | CopyDataFromDeviceToHost(memory_size_); 167 | // CopyTestArrayDataFromDeviceToHost(test_array_h_, premapping1x_d_, memory_size_.premapping1x_d_bts); 168 | CheckCudaErrors(cudaDeviceSynchronize()); 169 | CheckCudaErrors(cudaGetLastError()); 170 | // printf("CopyDataFromDeviceToHost: %4f \n.", timer.stop()); 171 | } 172 | 173 | bn::ndarray Preprocessor::GetBev() { 174 | bp::tuple shape = bp::make_tuple(params_.bev_input_x, params_.bev_input_y, params_.bev_input_z); 175 | bp::tuple stride = bp::make_tuple(params_.bev_input_y * params_.bev_input_z * sizeof(float), 176 | params_.bev_input_z * sizeof(float), sizeof(float)); 177 | bn::dtype dt1 = bn::dtype::get_builtin(); 178 | return bn::from_data(bev_h_, dt1, shape, stride, bp::object()); 179 | } 180 | 181 | bn::ndarray Preprocessor::GetMapping1x() { 182 | bp::tuple shape = bp::make_tuple(params_.bev_input_x, params_.bev_input_y, params_.mapping_z_h); 183 | bp::tuple stride = bp::make_tuple(params_.bev_input_y * params_.mapping_z_h * sizeof(float), 184 | params_.mapping_z_h * sizeof(float) , sizeof(float)); 185 | bn::dtype dt1 = bn::dtype::get_builtin(); 186 | return bn::from_data(mapping1x_h_, dt1, shape, stride, bp::object()); 187 | } 188 | 189 | bn::ndarray Preprocessor::GetMapping2x() { 190 | bp::tuple shape = bp::make_tuple(params_.bev_input_x/2, params_.bev_input_y/2, params_.mapping_z_h); 191 | bp::tuple stride = bp::make_tuple(params_.bev_input_y * params_.mapping_z_h * sizeof(float) / 2, 192 | params_.mapping_z_h * sizeof(float), sizeof(float)); 193 | bn::dtype dt1 = bn::dtype::get_builtin(); 194 | return bn::from_data(mapping2x_h_, dt1, shape, stride, bp::object()); 195 | } 196 | 197 | bn::ndarray Preprocessor::GetMapping4x() { 198 | bp::tuple shape = bp::make_tuple(params_.bev_input_x/4, params_.bev_input_y/4, params_.mapping_z_h); 199 | bp::tuple stride = bp::make_tuple(params_.bev_input_y * params_.mapping_z_h * sizeof(float) / 4, 200 | params_.mapping_z_h * sizeof(float), sizeof(float)); 201 | bn::dtype dt1 = bn::dtype::get_builtin(); 202 | return bn::from_data(mapping4x_h_, dt1, shape, stride, bp::object()); 203 | } 204 | 205 | bn::ndarray Preprocessor::GetMapping8x() { 206 | bp::tuple shape = bp::make_tuple(params_.bev_input_x/8, params_.bev_input_y/8, params_.mapping_z_h); 207 | bp::tuple stride = bp::make_tuple(params_.bev_input_y * params_.mapping_z_h * sizeof(float) / 8, 208 | params_.mapping_z_h * sizeof(float), sizeof(float)); 209 | bn::dtype dt1 = bn::dtype::get_builtin(); 210 | return bn::from_data(mapping8x_h_, dt1, shape, stride, bp::object()); 211 | } 212 | 213 | bn::ndarray Preprocessor::GetTestArray() { 214 | bp::tuple shape = bp::make_tuple(params_.bev_input_x, params_.bev_input_y, params_.premapping_z_d); 215 | bp::tuple stride = bp::make_tuple(params_.bev_input_y * params_.premapping_z_d * sizeof(float), 216 | params_.premapping_z_d * sizeof(float) , sizeof(float)); 217 | bn::dtype dt1 = bn::dtype::get_builtin(); 218 | return bn::from_data(test_array_h_, dt1, shape, stride, bp::object()); 219 | } 220 | 221 | BOOST_PYTHON_MODULE(libcuda_preprocessor) { 222 | Py_Initialize(); 223 | bn::initialize(); 224 | bp::class_("Preprocessor") 225 | .def("PreprocessorInit", &Preprocessor::PreprocessorInit) 226 | .def("PreprocessData", &Preprocessor::PreprocessData) 227 | .def("GetMapping1x", &Preprocessor::GetMapping1x) 228 | .def("GetMapping2x", &Preprocessor::GetMapping2x) 229 | .def("GetMapping4x", &Preprocessor::GetMapping4x) 230 | .def("GetMapping8x", &Preprocessor::GetMapping8x) 231 | // .def("GetTestArray", &Preprocessor::GetTestArray) 232 | .def("GetBev", &Preprocessor::GetBev); 233 | } 234 | 235 | -------------------------------------------------------------------------------- /src/data/lidar_preprocess/preprocessor.h: -------------------------------------------------------------------------------- 1 | #ifndef _PREPROCESSOR_H 2 | #define _PREPROCESSOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "boost/python.hpp" 10 | #include "boost/python/numpy.hpp" 11 | 12 | #include "debug_utils.h" 13 | #include "cuda_create_maps.h" 14 | #include "data_types.h" 15 | #include "timer.h" 16 | 17 | 18 | namespace bp = boost::python; 19 | namespace bn = boost::python::numpy; 20 | 21 | 22 | double cpuSecond(); 23 | 24 | class Preprocessor 25 | { 26 | public: 27 | Preprocessor() { 28 | } 29 | ~Preprocessor() { 30 | FreeMemory(); 31 | } 32 | // Preprocessor(const Preprocessor&) = delete; 33 | // Preprocessor &operator=(const Preprocessor&) = delete; 34 | void PreprocessorInit(float bev_x_min, float bev_x_max, 35 | float bev_y_min, float bev_y_max, 36 | float bev_z_min, float bev_z_max, 37 | float bev_x_resolution, float bev_y_resolution, 38 | float bev_z_resolution, size_t sat_z, 39 | size_t h, size_t w, 40 | float h_scale, float w_scale); 41 | void PreprocessData(const bn::ndarray &lidar, const bn::ndarray &tr, size_t pts_num); 42 | bn::ndarray GetBev(); 43 | bn::ndarray GetMapping1x(); 44 | bn::ndarray GetMapping2x(); 45 | bn::ndarray GetMapping4x(); 46 | bn::ndarray GetMapping8x(); 47 | bn::ndarray GetTestArray(); 48 | 49 | private: 50 | float *pts_d_; 51 | float *bev_d_; 52 | float *tr_d_; 53 | float *premapping1x_d_; 54 | float *premapping2x_d_; 55 | float *premapping4x_d_; 56 | float *premapping8x_d_; 57 | float *mapping1x_d_; 58 | float *mapping2x_d_; 59 | float *mapping4x_d_; 60 | float *mapping8x_d_; 61 | 62 | 63 | float *bev_h_; 64 | float *mapping1x_h_; 65 | float *mapping2x_h_; 66 | float *mapping4x_h_; 67 | float *mapping8x_h_; 68 | float *test_array_h_; 69 | 70 | MemorySize memory_size_; 71 | PreprocessParams params_; 72 | Timer timer; 73 | 74 | void FreeMemory(); 75 | void MemoryReset(const MemorySize &mz); 76 | void MemoryAlloc(const MemorySize &mz); 77 | void FreeMemoryOnDevice(); 78 | void GetArraybnytesSize(); 79 | size_t GetPtsMemorySize(size_t pts_size, size_t pts_num); 80 | void CopyDataFromDeviceToHost(const MemorySize &mz); 81 | void CopyDataFromHostToDevice(const bn::ndarray &pts, const bn::ndarray &tr, 82 | const MemorySize &mz, size_t pts_size, size_t pts_num); 83 | 84 | void TestArrayMemoryReset(int bts); 85 | 86 | void TestArrayMemoryAlloc(int bts); 87 | 88 | void CopyTestArrayDataFromDeviceToHost(float *array_h, float *arrray_d, int bts); 89 | 90 | 91 | }; 92 | 93 | 94 | #endif //_PREPROCESSOR_H -------------------------------------------------------------------------------- /src/data/lidar_preprocess/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef _TIMER_H 2 | #define _TIMER_H 3 | 4 | #include 5 | 6 | class Timer 7 | { 8 | 9 | public: 10 | 11 | clock_t time; 12 | 13 | Timer() 14 | { 15 | } 16 | 17 | ~Timer() 18 | { 19 | } 20 | 21 | 22 | void start() 23 | { 24 | time = (double) clock(); 25 | } 26 | 27 | double stop() 28 | { 29 | double diff_time = ((double) clock() - time) / CLOCKS_PER_SEC; 30 | time = (double)clock(); 31 | return diff_time; 32 | } 33 | }; 34 | 35 | 36 | 37 | #endif -------------------------------------------------------------------------------- /src/data/loader.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from config.config import cfg 4 | from utils import transform 5 | 6 | 7 | def load_annotations(annot_path): 8 | with open(annot_path, 'r') as f: 9 | txt = f.readlines() 10 | annotations = [line.strip() for line in txt if len(line.strip().split())!= 0] 11 | np.random.shuffle(annotations) 12 | return annotations 13 | 14 | 15 | def load_anchors(anchors_path): 16 | with open(anchors_path) as f: 17 | anchors = f.readlines() 18 | new_anchors = np.zeros([len(anchors), len(anchors[0].split())], dtype=np.float32) 19 | for i in range(len(anchors)): 20 | new_anchors[i] = np.array(anchors[i].split(), dtype=np.float32) 21 | return new_anchors 22 | 23 | 24 | def load_calib(calib_file): 25 | 26 | with open(calib_file) as fi: 27 | lines = fi.readlines() 28 | assert (len(lines) == 8) 29 | obj = lines[0].strip().split(' ')[1:] 30 | P00 = np.array(obj, dtype=np.float32) 31 | obj = lines[1].strip().split(' ')[1:] 32 | P10 = np.array(obj, dtype=np.float32) 33 | obj = lines[2].strip().split(' ')[1:] 34 | P20 = np.array(obj, dtype=np.float32) 35 | obj = lines[3].strip().split(' ')[1:] 36 | P30 = np.array(obj, dtype=np.float32) 37 | obj = lines[4].strip().split(' ')[1:] 38 | R0 = np.array(obj, dtype=np.float32) 39 | obj = lines[5].strip().split(' ')[1:] 40 | tr_lidar2cam = np.array(obj, dtype=np.float32) 41 | obj = lines[6].strip().split(' ')[1:] 42 | tr_imu2lidar = np.array(obj, dtype=np.float32) 43 | return P20.reshape(3, 4),R0.reshape(3, 3),tr_lidar2cam.reshape(3, 4) 44 | 45 | 46 | def load_lidar(lidar_file): 47 | lidar = np.fromfile(lidar_file, np.float32).reshape((-1, 4)) 48 | return lidar 49 | 50 | 51 | def load_label(label_file): 52 | with open(label_file, "r") as f: 53 | lines = f.read().split("\n") 54 | types = [] 55 | dimensions = [] 56 | box2d_corners = [] 57 | locations = [] 58 | rzs = [] 59 | 60 | for line in lines: 61 | if not line: 62 | continue 63 | line = line.split(" ") 64 | if(line[0] not in cfg.CONTFUSE.CLASSES_LIST): 65 | continue 66 | types.append(cfg.CONTFUSE.CLASSES_LIST.index(line[0])) 67 | dimensions.append(np.array(line[8:11]).astype(np.float32)) 68 | box2d_corners.append(np.array(line[4:8]).astype(np.float32)) 69 | locations.append(np.array(line[11:14]).astype(np.float32)) 70 | rzs.append(float(line[14])) 71 | return types, dimensions, box2d_corners, locations, rzs 72 | 73 | 74 | def load_image(image_file): 75 | img = cv2.imread(image_file) 76 | out_img = cv2.resize(img, (cfg.IMAGE.INPUT_W, cfg.IMAGE.INPUT_H), cv2.INTER_CUBIC) 77 | return out_img 78 | -------------------------------------------------------------------------------- /src/data/postprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import numpy as np 4 | import cv2 5 | from utils import vis_tools 6 | from utils import utils 7 | from utils import math 8 | from utils import transform 9 | from config.config import cfg 10 | 11 | 12 | def parse_bev_predmap(predmap, anchors): 13 | xmap = np.tile(np.array(range(cfg.BEV.OUTPUT_Y))[:, np.newaxis], [1, cfg.BEV.OUTPUT_X]) 14 | ymap = np.tile(np.array(range(cfg.BEV.OUTPUT_X))[np.newaxis, :], [cfg.BEV.OUTPUT_Y, 1]) 15 | xy_grid = np.stack((xmap,ymap), axis=-1) 16 | predmap = np.concatenate((predmap, xy_grid), axis=-1) 17 | preds = predmap[math.sigmoid(predmap[..., 0])>0.6] 18 | objness = math.sigmoid(preds[..., 0])[..., np.newaxis] 19 | clsness = math.sigmoid(preds[..., 1:cfg.CONTFUSE.CLASSES_NUM+1]) 20 | box = preds[..., cfg.CONTFUSE.CLASSES_NUM+1:-2].reshape(-1, cfg.CONTFUSE.CLASSES_NUM, cfg.BEV.BBOX_DIM) 21 | prob = clsness * objness 22 | cls_max_prob = np.max(prob, axis=-1) 23 | cls_idx = np.argmax(prob, axis=-1) 24 | box = box[np.arange(box.shape[0]), cls_idx] 25 | xx = preds[..., -2] - box[..., 0] * anchors[cls_idx, 3] 26 | yy = preds[..., -1] - box[..., 1] * anchors[cls_idx, 4] 27 | x = cfg.BEV.X_MAX - xx * cfg.BEV.X_RESOLUTION * cfg.BEV.STRIDE 28 | y = cfg.BEV.Y_MAX - yy * cfg.BEV.Y_RESOLUTION * cfg.BEV.STRIDE 29 | hwl = box[..., 2:5] * anchors[cls_idx][..., :3] 30 | theta = np.arctan2(np.sin(box[..., 5]), np.cos(box[..., 5])) 31 | result = np.stack([cls_idx, cls_max_prob, x, y, hwl[..., 0], hwl[..., 1], hwl[..., 2], theta], axis=-1) 32 | return result[cls_max_prob>0.6] 33 | 34 | 35 | def parse_img_predmap(predmap, anchors): 36 | anchor_shape = [cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, anchors.shape[0], anchors.shape[1]] 37 | anchors = np.broadcast_to(np.array(anchors), anchor_shape) 38 | h = np.tile(np.array(range(cfg.IMAGE.OUTPUT_H))[:, np.newaxis], [1, cfg.IMAGE.OUTPUT_W]) 39 | w = np.tile(np.array(range(cfg.IMAGE.OUTPUT_W))[np.newaxis, :], [cfg.IMAGE.OUTPUT_H, 1]) 40 | hw_grid = np.stack((h, w), axis=-1) 41 | hw_shape = [cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, cfg.IMAGE.ANCHORS_NUM, 2] 42 | hw_grid = np.tile(hw_grid, cfg.IMAGE.ANCHORS_NUM).reshape(hw_shape) 43 | box_shape = [cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, cfg.IMAGE.ANCHORS_NUM, cfg.CONTFUSE.CLASSES_NUM+cfg.IMAGE.BBOX_DIM+1] 44 | predmap = predmap.reshape(box_shape) 45 | predmap = np.concatenate((predmap, hw_grid, anchors), axis=-1) 46 | preds = predmap[math.sigmoid(predmap[..., 0])>0.5] 47 | objness = math.sigmoid(preds[..., 0])[..., np.newaxis] 48 | clsness = math.sigmoid(preds[..., 1:cfg.CONTFUSE.CLASSES_NUM+1]) 49 | box = preds[..., cfg.CONTFUSE.CLASSES_NUM+1:] 50 | prob = objness * clsness 51 | cls_max_prob = np.max(prob, axis=-1) 52 | cls_idx = np.argmax(prob, axis=-1) 53 | x = (box[:, 0] + box[:, -4]) * cfg.IMAGE.STRIDE / cfg.IMAGE.H_SCALE_RATIO 54 | y = (box[:, 1] + box[:, -3]) * cfg.IMAGE.STRIDE / cfg.IMAGE.W_SCALE_RATIO 55 | h = box[:, 2] / cfg.IMAGE.H_SCALE_RATIO * box[:, -2] 56 | w = box[:, 3] / cfg.IMAGE.W_SCALE_RATIO * box[:, -1] 57 | left = y - w / 2 58 | top = x - h / 2 59 | right = y + w / 2 60 | bottom = x + h / 2 61 | result = np.stack([cls_idx, cls_max_prob, left, top, right, bottom], axis=-1) 62 | return result[cls_max_prob>0.5] 63 | 64 | 65 | def img_nms(bboxes, iou_threshold, sigma=0.3, method='nms'): 66 | 67 | def bboxes_iou(boxes1, boxes2): 68 | boxes1 = np.array(boxes1) 69 | boxes2 = np.array(boxes2) 70 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) 71 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) 72 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 73 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 74 | inter_section = np.maximum(right_down - left_up, 0.0) 75 | inter_area = inter_section[..., 0] * inter_section[..., 1] 76 | union_area = boxes1_area + boxes2_area - inter_area 77 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) 78 | return ious 79 | 80 | classes_in_img = list(set(bboxes[:, 0])) 81 | best_bboxes = [] 82 | for cls_type in classes_in_img: 83 | cls_mask = (bboxes[:, 0] == cls_type) 84 | cls_bboxes = bboxes[cls_mask] 85 | while len(cls_bboxes): 86 | max_ind = np.argmax(cls_bboxes[:, 1]) 87 | best_bbox = cls_bboxes[max_ind] 88 | best_bboxes.append(best_bbox) 89 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind+1:]]) 90 | iou = bboxes_iou(best_bbox[np.newaxis, 2:], cls_bboxes[:, 2:]) 91 | weight = np.ones((len(iou),), dtype=np.float32) 92 | assert method in ['nms', 'soft-nms'] 93 | if method == 'nms': 94 | iou_mask = iou > iou_threshold 95 | weight[iou_mask] = 0.0 96 | if method == 'soft-nms': 97 | weight = np.exp(-(1.0 * iou ** 2 / sigma)) 98 | cls_bboxes[:, 1] = cls_bboxes[:, 1] * weight 99 | score_mask = cls_bboxes[:, 1] > 0. 100 | cls_bboxes = cls_bboxes[score_mask] 101 | return best_bboxes 102 | 103 | 104 | def bev_nms(bboxes, thresholds): 105 | 106 | def is_close_center(bbox1, bbox2, threshold): 107 | xy = bbox2[..., 2:4] - bbox1[..., 2:4] 108 | distance = np.sqrt(xy[:,0]*xy[:,0]+ xy[:,1]*xy[:,1]) 109 | return distance < threshold 110 | 111 | def merge_obj_bboxes(bboxes, cls_type): 112 | new_box = np.zeros(bboxes.shape[-1]) 113 | new_box[0] = cls_type 114 | # print(bboxes[..., -1]) 115 | new_box[1] = np.mean(bboxes[..., 1]) 116 | new_box[2:] = np.sum(bboxes[..., 2:]*bboxes[..., 1][..., np.newaxis], axis=0) 117 | sum_of_prob = np.sum(bboxes[..., 1]) 118 | new_box[2:] = new_box[2:] / sum_of_prob 119 | area = new_box[5] * new_box[6] 120 | if sum_of_prob / area < 1: 121 | return [] 122 | return new_box 123 | 124 | classes_in_bev = list(set(bboxes[:, 0])) 125 | best_bboxes = [] 126 | for cls_type in classes_in_bev: 127 | cls_mask = (bboxes[:, 0] == cls_type) 128 | cls_bboxes = bboxes[cls_mask] 129 | while len(cls_bboxes): 130 | max_ind = np.argmax(cls_bboxes[:, 1]) 131 | sample_bbox = cls_bboxes[max_ind] 132 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind+1: ]]) 133 | distance_mask = is_close_center(sample_bbox, cls_bboxes, thresholds[int(cls_type)]) 134 | obj_bboxes = cls_bboxes[distance_mask] 135 | merged_bbox = merge_obj_bboxes(obj_bboxes, cls_type) 136 | if len(merged_bbox): 137 | best_bboxes.append(merged_bbox) 138 | cls_bboxes = cls_bboxes[np.logical_not(distance_mask)] 139 | return best_bboxes 140 | 141 | 142 | def save_lidar_results(bev_bboxes, tr, frame_id, save_path): 143 | bev_file = os.path.join(save_path, frame_id+'.txt') 144 | f1 = open(bev_file,'w') 145 | for box in bev_bboxes: 146 | pred_cls = cfg.CONTFUSE.CLASSES_LIST[int(box[0])] 147 | location = transform.location_lidar2cam(box[2:5], tr) 148 | ry = transform.rz_to_ry(box[-1]) 149 | line = pred_cls + " -1.0 -1.0 -10.0 -1.0 -1.0 -1.0 -1.0 " 150 | line += "{:.2f} {:.2f} {:.2f} ".format(box[3], box[4], box[5]) 151 | line += "{:.2f} {:.2f} {:.2f} ".format(location[0], location[1], location[2]) 152 | line += "{:.2f} {:.2f}\n".format(box[1], ry) 153 | f1.write(line) 154 | f1.close() 155 | return 156 | 157 | 158 | def save_image_results(img_bboxes, frame_id, save_path): 159 | img_file = os.path.join(save_path, frame_id+'.txt') 160 | f1 = open(img_file,'w') 161 | for box in img_bboxes: 162 | pred_cls = cfg.CONTFUSE.CLASSES_LIST[int(box[0])] 163 | line = pred_cls + " -1.0 -1.0 -10.0 " 164 | line += "{:.2f} {:.2f} {:.2f} {:.2f} ".format(box[2], box[3], box[4], box[5]) 165 | line += "-10.0 -10.0 -10.0 -10.0 -10.0 -10.0 -10.0" 166 | line += "{:.2f}\n".format(box[1]) 167 | f1.write(line) 168 | f1.close() 169 | return 170 | 171 | -------------------------------------------------------------------------------- /src/data/preprocess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from config.config import cfg 3 | from data.lidar_preprocess.build import libcuda_preprocessor 4 | from utils import transform 5 | 6 | class LidarPreprocessor(): 7 | def __init__(self): 8 | self.preprocessor = libcuda_preprocessor.Preprocessor() 9 | self.preprocessor.PreprocessorInit(cfg.BEV.X_MIN, cfg.BEV.X_MAX, 10 | cfg.BEV.Y_MIN, cfg.BEV.Y_MAX, 11 | cfg.BEV.Z_MIN, cfg.BEV.Z_MAX, 12 | cfg.BEV.X_RESOLUTION, cfg.BEV.Y_RESOLUTION, 13 | cfg.BEV.Z_RESOLUTION, cfg.BEV.Z_STATISTIC_DIM, 14 | cfg.IMAGE.INPUT_H, cfg.IMAGE.INPUT_W, 15 | cfg.IMAGE.H_SCALE_RATIO, cfg.IMAGE.W_SCALE_RATIO) 16 | 17 | 18 | 19 | 20 | def lidar_preprocess(point_cloud, p20, r0, tr_lidar2cam, cuda_preprocessor): 21 | tr_lidar2img = transform.get_tr_lidar2img(p20, r0, tr_lidar2cam).astype(np.float32) 22 | cuda_preprocessor.PreprocessData(point_cloud, tr_lidar2img, int(point_cloud.shape[0])) 23 | bev = cuda_preprocessor.GetBev().astype(np.float32) 24 | mapping1x = cuda_preprocessor.GetMapping1x() 25 | mapping2x = cuda_preprocessor.GetMapping2x() 26 | mapping4x = cuda_preprocessor.GetMapping4x() 27 | mapping8x = cuda_preprocessor.GetMapping8x() 28 | return bev, mapping1x, mapping2x, mapping4x, mapping8x 29 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/backbone.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import models.basic_layers as bl 3 | import tensorflow as tf 4 | 5 | 6 | def resnet_backbone(bev_input, img_input, mapping1x, mapping2x, mapping4x, mapping8x, trainable): 7 | with tf.variable_scope('rb_block_0') as scope: 8 | fused_block_0 = tf.gather_nd(img_input, mapping1x, name='fusion_gather_0', batch_dims=1) 9 | fused_block_0 = tf.stop_gradient(fused_block_0, name='fused_block_0') 10 | fused_block_0 = tf.concat([bev_input, fused_block_0], -1) 11 | 12 | with tf.variable_scope('rb_block_1') as scope: 13 | bev_block = bl.convolutional(bev_input, (3, 32), trainable, 'bev_conv1') 14 | bev_block = bl.convolutional(bev_input, (1, 16), trainable, 'bev_conv2') 15 | bev_block_1 = bl.convolutional(bev_block, (3, 32), trainable, 'bev_conv3', downsample=True) 16 | img_block = bl.convolutional(img_input, (3, 32), trainable, 'img_conv1') 17 | img_block = bl.convolutional(img_block, (1, 16), trainable, 'img_conv2') 18 | img_block = bl.convolutional(img_block, (3, 32), trainable, 'img_conv3', downsample=True) 19 | fused_block_1 = tf.gather_nd(img_block, mapping2x, name='fusion_gather_1', batch_dims=1) 20 | fused_block_1 = tf.stop_gradient(fused_block_1, name='fused_block_1') 21 | fused_block_1 = tf.concat([bev_block_1, fused_block_1], -1) 22 | 23 | with tf.variable_scope('rb_block_2') as scope: 24 | bev_block = bl.resnet_block(fused_block_1, 32, trainable, 'bev_res1') 25 | bev_block = bl.resnet_block(bev_block, 32, trainable, 'bev_res2') 26 | bev_block_2 = bl.convolutional(bev_block, (3, 96), trainable, 'bev_conv1', downsample=True) 27 | img_block = bl.resnet_block(img_block, 18, trainable, 'img_res1') 28 | img_block = bl.resnet_block(img_block, 18, trainable, 'img_res2') 29 | img_block = bl.convolutional(img_block, (3, 64), trainable, 'img_conv1', downsample=True) 30 | fused_block_2 = tf.gather_nd(img_block, mapping4x, name='fusion_gather_2', batch_dims=1) 31 | fused_block_2 = tf.stop_gradient(fused_block_2, name='fused_block_2') 32 | fused_block_2 = tf.concat([bev_block_2, fused_block_2], -1) 33 | 34 | with tf.variable_scope('rb_block_3') as scope: 35 | bev_block = bl.resnet_block(fused_block_2, 80, trainable, 'bev_res1') 36 | bev_block = bl.resnet_block(bev_block, 80, trainable, 'bev_res2') 37 | bev_block = bl.resnet_block(bev_block, 80, trainable, 'bev_res3') 38 | bev_block_3 = bl.convolutional(bev_block, (3, 240), trainable, 'bev_conv1', downsample=True) 39 | img_block = bl.resnet_block(img_block, 32, trainable, 'img_res1') 40 | img_block = bl.resnet_block(img_block, 32, trainable, 'img_res2') 41 | img_block = bl.resnet_block(img_block, 32, trainable, 'img_res3') 42 | img_last_block = bl.convolutional(img_block, (3, 128), trainable, 'img_conv1', downsample=True) 43 | fused_block_3 = tf.gather_nd(img_block, mapping8x, name='fusion_gather_3', batch_dims=1) 44 | fused_block_3 = tf.stop_gradient(fused_block_3, name='fused_block_3') 45 | fused_block_3 = tf.concat([bev_block_3, fused_block_3], -1) 46 | 47 | with tf.variable_scope('rb_upsample_block') as scope: 48 | up_block_4d = bl.upsample(fused_block_3, "deconv2d_1") 49 | up_block_4d = bl.convolutional(up_block_4d, (3, 256), trainable, 'bev_conv1') 50 | up_block_4d = bl.resnet_block(up_block_4d, 128, trainable, 'bev_res1') 51 | up_block_4d = tf.concat([up_block_4d, bev_block_2], -1) 52 | return up_block_4d, img_last_block -------------------------------------------------------------------------------- /src/models/basic_layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def _upsample_by_deconv(inputs, filters, kernel_size=3, strides=2): 4 | net = tf.layers.conv2d_transpose(inputs, filters, kernel_size=3, strides=2, padding='SAME') 5 | return net 6 | 7 | @tf.contrib.framework.add_arg_scope 8 | def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1, activation_fn=None, is_training=False): 9 | if strides > 1: inputs = _fixed_padding(inputs, kernel_size) 10 | net = tf.layers.conv2d(inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, use_bias=False, 11 | padding=('SAME' if strides == 1 else 'VALID'), activation=None) 12 | net = tf.layers.batch_normalization(inputs=net, training=is_training) 13 | if not activation_fn is None: 14 | net = activation_fn(net) 15 | return net 16 | 17 | @tf.contrib.framework.add_arg_scope 18 | def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs): 19 | """ 20 | Pads the input along the spatial dimensions independently of input size. 21 | 22 | Args: 23 | inputs: A tensor of size [batch, channels, height_in, width_in] or 24 | [batch, height_in, width_in, channels] depending on data_format. 25 | kernel_size: The kernel to be used in the conv2d or max_pool2d operation. 26 | Should be a positive integer. 27 | mode: The mode for tf.pad. 28 | 29 | Returns: 30 | A tensor with the same format as the input with the data either intact 31 | (if kernel_size == 1) or padded (if kernel_size > 1). 32 | """ 33 | pad_total = kernel_size - 1 34 | pad_beg = pad_total // 2 35 | pad_end = pad_total - pad_beg 36 | 37 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], 38 | [pad_beg, pad_end], [0, 0]], mode=mode) 39 | return padded_inputs 40 | 41 | 42 | def convolutional(input_data, filters_shape, trainable, name, downsample=False, activate=True, bn=True): 43 | 44 | with tf.variable_scope(name): 45 | if downsample: 46 | pad_h, pad_w = (filters_shape[0] - 2) // 2 + 1, (filters_shape[0] - 2) // 2 + 1 47 | paddings = tf.constant([[0, 0], [pad_h, pad_h], [pad_w, pad_w], [0, 0]]) 48 | input_data = tf.pad(input_data, paddings, 'CONSTANT') 49 | strides = (1, 2, 2, 1) 50 | padding = 'VALID' 51 | else: 52 | strides = (1, 1, 1, 1) 53 | padding = "SAME" 54 | 55 | input_shape=input_data.get_shape() 56 | C=input_shape[-1] 57 | H=filters_shape[0] 58 | W=filters_shape[0] 59 | K=filters_shape[1] 60 | weight = tf.get_variable(name='weight', dtype=tf.float32, trainable=True, 61 | shape=[H,W,C,K], initializer=tf.truncated_normal_initializer(stddev=0.01)) 62 | conv = tf.nn.conv2d(input=input_data, filter=weight, strides=strides, padding=padding) 63 | 64 | if bn: 65 | conv = tf.layers.batch_normalization(conv, beta_initializer=tf.zeros_initializer(), 66 | gamma_initializer=tf.ones_initializer(), 67 | moving_mean_initializer=tf.zeros_initializer(), 68 | moving_variance_initializer=tf.ones_initializer(), training=trainable) 69 | else: 70 | bias = tf.get_variable(name='bias', shape=filters_shape[-1], trainable=True, 71 | dtype=tf.float32, initializer=tf.constant_initializer(0.0)) 72 | conv = tf.nn.bias_add(conv, bias) 73 | if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1) 74 | return conv 75 | 76 | 77 | def resnet_block(input_data, mid_channel, trainable, name): 78 | short_cut = input_data 79 | input_shape = input_data.get_shape() 80 | output_channel =input_shape[-1] 81 | with tf.variable_scope(name): 82 | input_data = convolutional(input_data, (1, mid_channel), trainable, 'conv1') 83 | input_data = convolutional(input_data, (3, mid_channel), trainable, 'conv2') 84 | input_data = convolutional(input_data, (1, output_channel), trainable, 'conv3') 85 | output_data = input_data + short_cut 86 | return output_data 87 | 88 | 89 | def route(name, previous_output, current_output): 90 | with tf.variable_scope(name): 91 | output = tf.concat([current_output, previous_output], axis=-1) 92 | 93 | return output 94 | 95 | 96 | def upsample(input_data, name, method="deconv"): 97 | assert method in ["resize", "deconv"] 98 | 99 | if method == "resize": 100 | with tf.variable_scope(name): 101 | input_shape = tf.shape(input_data) 102 | output = tf.image.resize_nearest_neighbor(input_data, (input_shape[1] * 2, input_shape[2] * 2)) 103 | 104 | if method == "deconv": 105 | # replace resize_nearest_neighbor with conv2d_transpose To support TensorRT optimization 106 | numm_filter = input_data.shape.as_list()[-1] 107 | output = tf.layers.conv2d_transpose(input_data, numm_filter, kernel_size=2, padding='same', 108 | strides=(2,2), kernel_initializer=tf.random_normal_initializer()) 109 | 110 | return output -------------------------------------------------------------------------------- /src/models/contfuse_network.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import models.basic_layers as bl 3 | import tensorflow as tf 4 | from config.config import cfg 5 | from data import loader 6 | from models import backbone 7 | from models import headnet 8 | from models import loss 9 | 10 | class ContfuseNetwork(object): 11 | def __init__(self): 12 | self.cls_num = cfg.CONTFUSE.CLASSES_NUM 13 | self.bev_bbox_dim = self.cls_num * cfg.BEV.BBOX_DIM 14 | self.img_anchor_num = len(loader.load_anchors(cfg.IMAGE.ANCHORS)) 15 | self.img_output_z = self.img_anchor_num * (cfg.IMAGE.BBOX_DIM + cfg.CONTFUSE.CLASSES_NUM + 1) 16 | 17 | 18 | def net(self, bev_input, img_input, mapping1x, mapping2x, mapping4x, mapping8x, trainable): 19 | with tf.variable_scope('contfuse_backbone') as scope: 20 | bev_block, img_block = backbone.resnet_backbone(bev_input, img_input, mapping1x, mapping2x, 21 | mapping4x, mapping8x, trainable) 22 | with tf.variable_scope('contfuse_headnet') as scope: 23 | bev_pred, img_pred = headnet.res_headnet(bev_block, img_block, self.cls_num, self.bev_bbox_dim, 24 | self.img_output_z, trainable) 25 | return bev_pred, img_pred 26 | 27 | 28 | def load(self): 29 | bev_shape = [None, cfg.BEV.INPUT_X, cfg.BEV.INPUT_Y, cfg.BEV.INPUT_Z] 30 | img_shape = [None, cfg.IMAGE.INPUT_H, cfg.IMAGE.INPUT_W, 3] 31 | bev_label_shape = [None, cfg.BEV.OUTPUT_X, cfg.BEV.OUTPUT_Y, cfg.BEV.LABEL_Z] 32 | img_label_shape = [None, cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, cfg.IMAGE.LABEL_Z] 33 | mapping1x_shape = [None, cfg.BEV.INPUT_X, cfg.BEV.INPUT_Y, 2] 34 | mapping2x_shape = [None, int(cfg.BEV.INPUT_X / 2), int(cfg.BEV.INPUT_Y / 2), 2] 35 | mapping4x_shape = [None, int(cfg.BEV.INPUT_X / 4), int(cfg.BEV.INPUT_Y / 4), 2] 36 | mapping8x_shape = [None, int(cfg.BEV.INPUT_X / 8), int(cfg.BEV.INPUT_Y / 8), 2] 37 | bev_input = tf.placeholder(dtype=tf.float32, shape=bev_shape, name='bev_input_placeholder') 38 | img_input = tf.placeholder(dtype=tf.float32, shape=img_shape, name='img_input_placeholder') 39 | bev_label = tf.placeholder(dtype=tf.float32, shape=bev_label_shape, name='bev_label_placeholder') 40 | img_label = tf.placeholder(dtype=tf.float32, shape=img_label_shape, name='img_label_placeholder') 41 | mapping1x = tf.placeholder(dtype=tf.int32, shape=mapping1x_shape, name='mapping1x_placeholder') 42 | mapping2x = tf.placeholder(dtype=tf.int32, shape=mapping2x_shape, name='mapping2x_placeholder') 43 | mapping4x = tf.placeholder(dtype=tf.int32, shape=mapping4x_shape, name='mapping4x_placeholder') 44 | mapping8x = tf.placeholder(dtype=tf.int32, shape=mapping8x_shape, name='mapping8x_placeholder') 45 | bev_loss_scale = tf.placeholder(dtype=tf.float32, shape=[6], name='bev_loss_scale') 46 | img_loss_scale = tf.placeholder(dtype=tf.float32, shape=[6], name='img_loss_scale') 47 | trainable = tf.placeholder(dtype=tf.bool, name='training') 48 | bev_pred, img_pred = self.net(bev_input, img_input, mapping1x, mapping2x, 49 | mapping4x, mapping8x, trainable) 50 | 51 | with tf.variable_scope('bev_loss') as scope: 52 | bev_loss = loss.bev_loss(bev_pred, bev_label, bev_loss_scale) 53 | 54 | with tf.variable_scope('img_loss') as scope: 55 | img_loss = loss.img_loss(img_pred, img_label, self.img_anchor_num, img_loss_scale) 56 | 57 | return {'bev_input':bev_input, 58 | 'img_input':img_input, 59 | 'bev_label':bev_label, 60 | 'img_label':img_label, 61 | 'bev_pred':bev_pred, 62 | 'img_pred':img_pred, 63 | 'mapping1x':mapping1x, 64 | 'mapping2x':mapping2x, 65 | 'mapping4x':mapping4x, 66 | 'mapping8x':mapping8x, 67 | 'bev_loss_scale':bev_loss_scale, 68 | 'img_loss_scale':img_loss_scale, 69 | 'trainable':trainable, 70 | 'contfuse_loss': bev_loss[0] + img_loss[0], 71 | 'bev_loss': bev_loss[0], 72 | 'bev_obj_loss': bev_loss[1], 73 | 'bev_cls_loss': bev_loss[2], 74 | 'bev_bbox_loss': bev_loss[3], 75 | 'img_loss': img_loss[0], 76 | 'img_obj_loss': img_loss[1], 77 | 'img_cls_loss': img_loss[2], 78 | 'img_bbox_loss': img_loss[3] 79 | } 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/models/headnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import models.basic_layers as bl 3 | import tensorflow as tf 4 | 5 | 6 | def res_headnet(bev_block, img_block, cls_num, bev_bbox_dim, img_output_z, trainable): 7 | with tf.variable_scope('rh_block') as scope: 8 | bev_block = bl.resnet_block(bev_block, 180, trainable, 'head_bev_res1') 9 | bev_block = bl.resnet_block(bev_block, 180, trainable, 'head_bev_res2') 10 | bev_block = bl.resnet_block(bev_block, 180, trainable, 'head_bev_res3') 11 | bev_block = bl.resnet_block(bev_block, 180, trainable, 'head_bev_res4') 12 | bev_obj_cls = bl.convolutional(bev_block, (1, 1 + cls_num), trainable, 'bev_obj_cls') 13 | bev_bbox = bl.convolutional(bev_block, (3, bev_bbox_dim), trainable, 'bev_bbox') 14 | bev_pred = tf.concat([bev_obj_cls, bev_bbox], -1) 15 | img_block = bl.resnet_block(img_block, 64, trainable, 'head_img_res1') 16 | img_block = bl.resnet_block(img_block, 64, trainable, 'head_img_res2') 17 | img_block = bl.resnet_block(img_block, 64, trainable, 'head_img_res3') 18 | img_block = bl.resnet_block(img_block, 64, trainable, 'head_img_res4') 19 | img_pred = bl.convolutional(img_block, (1, img_output_z), trainable, 'img_pred') 20 | return bev_pred, img_pred -------------------------------------------------------------------------------- /src/models/loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from config.config import cfg 3 | 4 | 5 | def smooth_l1(deltas, targets, sigma=2.0): 6 | ''' 7 | ResultLoss = outside_weights * SmoothL1(inside_weights * (box_pred - box_targets)) 8 | SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 9 | |x| - 0.5 / sigma^2, otherwise 10 | ''' 11 | sigma2 = sigma * sigma 12 | diffs = tf.subtract(deltas, targets) 13 | l1_signs = tf.cast(tf.less(tf.abs(diffs), 1.0 / sigma2), tf.float32) 14 | 15 | l1_option1 = tf.multiply(diffs, diffs) * 0.5 * sigma2 16 | l1_option2 = tf.abs(diffs) - 0.5 / sigma2 17 | l1_add = tf.multiply(l1_option1, l1_signs) + \ 18 | tf.multiply(l1_option2, 1-l1_signs) 19 | l1 = l1_add 20 | 21 | return l1 22 | 23 | 24 | def bev_loss(pred, label, bev_loss_scale): 25 | cls_num = cfg.CONTFUSE.CLASSES_NUM 26 | epsilon = cfg.CONTFUSE.EPSILON 27 | mask = tf.cast(label[...,0] ,tf.bool) 28 | with tf.name_scope('mask'): 29 | masked_label = tf.boolean_mask(label, mask) 30 | masked_pred = tf.boolean_mask(pred, mask) 31 | masked_neg_pred = tf.boolean_mask(pred, tf.logical_not(mask)) 32 | 33 | with tf.name_scope('pred'): 34 | pred_o = tf.sigmoid(masked_pred[...,0]) 35 | pred_no_o = tf.sigmoid(masked_neg_pred[...,0]) 36 | pred_c = tf.sigmoid(masked_pred[..., 1:cls_num+1]) 37 | box_shape = (-1, cls_num, cfg.BEV.BBOX_DIM) 38 | pred_box = tf.reshape(masked_pred[..., 1+cls_num:], shape = box_shape) 39 | pred_xyhwl = pred_box[...,:-1] 40 | pred_re = tf.cos(pred_box[...,-1]) 41 | pred_im = tf.sin(pred_box[...,-1]) 42 | 43 | with tf.name_scope('label'): 44 | label_c = tf.one_hot(tf.cast(masked_label[..., 1], tf.int32), depth=cls_num) 45 | label_c_scale = tf.gather(bev_loss_scale, tf.cast(masked_label[..., 1], tf.int32)) 46 | label_prob = masked_label[..., 2] 47 | box_shape = (-1, cls_num, cfg.BEV.BBOX_DIM) 48 | label_box = tf.reshape(masked_label[..., 3:], shape = box_shape) 49 | label_xyhwl = label_box[...,:-1] 50 | label_re = tf.cos(label_box[...,-1]) 51 | label_im = tf.sin(label_box[...,-1]) 52 | 53 | with tf.name_scope('loss'): 54 | xywhl_loss = tf.reduce_sum(tf.reduce_sum(smooth_l1(pred_xyhwl, label_xyhwl), axis=-1)*label_c) 55 | re_loss = tf.reduce_sum(smooth_l1(pred_re, label_re)*label_c) * 2 56 | im_loss = tf.reduce_sum(smooth_l1(pred_im, label_im)*label_c) * 2 57 | has_obj_loss = tf.reduce_sum(-tf.log(pred_o + epsilon) * label_prob) * 5 58 | no_obj_loss = tf.reduce_sum(-tf.log(1 - pred_no_o + epsilon)) * 0.1 59 | cls_loss = tf.reduce_sum(tf.reduce_sum(-tf.log(pred_c + epsilon)*label_c, axis=-1)*label_c_scale) * 5 + \ 60 | tf.reduce_sum(-tf.log(1-pred_c + epsilon)*(1-label_c)) 61 | cls_loss = cls_loss / cfg.TRAIN.BATCH_SIZE 62 | bbox_loss = (xywhl_loss + re_loss + im_loss) / cfg.TRAIN.BATCH_SIZE 63 | objness_loss = (has_obj_loss + no_obj_loss) / cfg.TRAIN.BATCH_SIZE 64 | total_loss = bbox_loss * 5 + objness_loss + cls_loss 65 | 66 | return total_loss, objness_loss, cls_loss, bbox_loss 67 | 68 | 69 | def img_loss(pred, label, img_anchor_num, img_loss_scale): 70 | epsilon = cfg.CONTFUSE.EPSILON 71 | cls_num = cfg.CONTFUSE.CLASSES_NUM 72 | mask_0 = tf.cast(label[...,0] ,tf.bool) 73 | with tf.name_scope('mask'): 74 | box_shape = (-1, img_anchor_num, cfg.IMAGE.BBOX_DIM+1+1) 75 | masked_label_0 = tf.reshape(tf.boolean_mask(label[..., 1:], mask_0), shape=box_shape) 76 | masked_1 = tf.cast(masked_label_0[...,0], tf.bool) 77 | masked_label = tf.boolean_mask(masked_label_0, masked_1) 78 | box_shape = (-1, img_anchor_num, cfg.IMAGE.BBOX_DIM+cfg.CONTFUSE.CLASSES_NUM+1) 79 | masked_pred_0 = tf.reshape(tf.boolean_mask(pred, mask_0), shape=box_shape) 80 | masked_pred = tf.boolean_mask(masked_pred_0, masked_1) 81 | masked_pred_no = tf.boolean_mask(masked_pred_0, tf.logical_not(masked_1)) 82 | masked_neg_pred = tf.reshape(tf.boolean_mask(pred, tf.logical_not(mask_0)), shape=box_shape) 83 | with tf.name_scope('pred'): 84 | pred_o = tf.sigmoid(masked_pred[..., 0]) 85 | pred_no_o = tf.sigmoid(masked_pred_no) 86 | pred_neg_o = tf.sigmoid(masked_neg_pred[..., 0]) 87 | pred_c = tf.sigmoid(masked_pred[..., 1:cls_num+1]) 88 | pred_box = masked_pred[..., cls_num+1:] 89 | with tf.name_scope('label'): 90 | label_c = tf.one_hot(tf.cast(masked_label[..., 1], tf.int32), depth=cls_num) 91 | label_c_scale = tf.gather(img_loss_scale, tf.cast(masked_label[..., 1], tf.int32), axis=-1) 92 | label_box = masked_label[..., 1+1:] 93 | with tf.name_scope('loss'): 94 | has_obj_loss = tf.reduce_sum(-tf.log(pred_o + epsilon)) * 5 + \ 95 | tf.reduce_sum(-tf.log(1 - pred_no_o + epsilon)) 96 | no_obj_loss = tf.reduce_sum(-tf.log(1 - pred_neg_o + epsilon)) * 0.1 97 | no_cls_loss = tf.reduce_sum(tf.reduce_sum(-tf.log(1-pred_c+epsilon)*(1-label_c), axis=-1)) 98 | cls_loss = tf.reduce_sum(tf.reduce_sum(-tf.log(pred_c+epsilon)*label_c, axis=-1)* 99 | label_c_scale) * 5 + no_cls_loss 100 | bbox_loss = tf.reduce_sum(tf.reduce_sum(smooth_l1(pred_box, label_box), axis=-1)) 101 | cls_loss = cls_loss / cfg.TRAIN.BATCH_SIZE 102 | bbox_loss = bbox_loss / cfg.TRAIN.BATCH_SIZE 103 | objness_loss = (has_obj_loss + no_obj_loss) / cfg.TRAIN.BATCH_SIZE 104 | total_loss = bbox_loss * 5 + objness_loss + cls_loss 105 | return total_loss, objness_loss, cls_loss, bbox_loss -------------------------------------------------------------------------------- /src/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import time 4 | import shutil 5 | import cv2 6 | from tqdm import tqdm 7 | import numpy as np 8 | import tensorflow as tf 9 | from config.config import cfg 10 | from utils import utils 11 | from data import dataset 12 | from data import preprocess 13 | from data import postprocess 14 | from data import loader 15 | from utils import vis_tools 16 | from models import contfuse_network 17 | 18 | 19 | 20 | 21 | 22 | class predicter(object): 23 | 24 | def __init__(self): 25 | self.initial_weight = cfg.EVAL.WEIGHT 26 | self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 27 | self.moving_ave_decay = cfg.CONTFUSE.MOVING_AVE_DECAY 28 | self.eval_logdir = "./data/logs/eval" 29 | self.lidar_preprocessor = preprocess.LidarPreprocessor() 30 | self.evalset = dataset.Dataset(self.lidar_preprocessor, 'test') 31 | self.output_dir = cfg.EVAL.OUTPUT_PRED_PATH 32 | self.img_anchors = loader.load_anchors(cfg.IMAGE.ANCHORS ) 33 | self.bev_anchors = loader.load_anchors(cfg.BEV.ANCHORS) 34 | 35 | with tf.name_scope('model'): 36 | self.model = contfuse_network.ContfuseNetwork() 37 | self.net = self.model.load() 38 | self.img_pred = self.net['img_pred'] 39 | self.bev_pred = self.net['bev_pred'] 40 | 41 | self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 42 | self.saver = tf.train.Saver()#ema_obj.variables_to_restore()) 43 | self.saver.restore(self.sess, self.initial_weight) 44 | 45 | 46 | def predict(self): 47 | bev_imwrite_path = os.path.join(self.output_dir, "bev_imshow_result/") 48 | img_imwrite_path = os.path.join(self.output_dir, "img_imshow_result/") 49 | bev_result_path = os.path.join(self.output_dir, "bev_result/") 50 | img_result_path = os.path.join(self.output_dir, "img_result/") 51 | img_dir = os.path.join(cfg.CONTFUSE.DATASETS_DIR, "image_2/") 52 | if os.path.exists(bev_imwrite_path): 53 | shutil.rmtree(bev_imwrite_path) 54 | os.mkdir(bev_imwrite_path) 55 | if os.path.exists(img_imwrite_path): 56 | shutil.rmtree(img_imwrite_path) 57 | os.mkdir(img_imwrite_path) 58 | if os.path.exists(bev_result_path): 59 | shutil.rmtree(bev_result_path) 60 | os.mkdir(bev_result_path) 61 | if os.path.exists(img_result_path): 62 | shutil.rmtree(bev_result_path) 63 | os.mkdir(bev_result_path) 64 | for epoch in range(len(self.evalset)): 65 | eval_data = next(self.evalset) 66 | frame_id = eval_data[9][0] 67 | tr = eval_data[8] 68 | img_pred, bev_pred = self.sess.run([self.img_pred, self.bev_pred], 69 | feed_dict={self.net["bev_input"]: eval_data[0], 70 | self.net["img_input"]: eval_data[1], 71 | self.net["mapping1x"]: eval_data[2], 72 | self.net["mapping2x"]: eval_data[3], 73 | self.net["mapping4x"]: eval_data[4], 74 | self.net["mapping8x"]: eval_data[5], 75 | self.net["trainable"]: True 76 | }) 77 | bev_bboxes = postprocess.parse_bev_predmap(bev_pred[0], self.bev_anchors) 78 | bev_bboxes = postprocess.bev_nms(bev_bboxes, cfg.BEV.DISTANCE_THRESHOLDS) 79 | img_bboxes = postprocess.parse_img_predmap(img_pred[0], self.img_anchors) 80 | img_bboxes = postprocess.img_nms(img_bboxes, cfg.IMAGE.IOU_THRESHOLD) 81 | postprocess.save_lidar_results(bev_bboxes, tr, frame_id[0], bev_result_path) 82 | postprocess.save_image_results(img_bboxes, frame_id, img_result_path) 83 | vis_tools.imwrite_bev_bbox(eval_data[0][0][..., -3:]*200, bev_bboxes, bev_imwrite_path, frame_id) 84 | img_file = os.path.join(img_dir, frame_id+".png") 85 | img = cv2.imread(img_file) 86 | vis_tools.imwrite_img_bbox(img, img_bboxes, img_imwrite_path, frame_id) 87 | print("{}/{}, bev bboxes:\n".format(epoch, len(self.evalset)), bev_bboxes) 88 | 89 | if __name__ == "__main__": 90 | predicter = predicter() 91 | predicter.predict() -------------------------------------------------------------------------------- /src/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/src/scripts/__init__.py -------------------------------------------------------------------------------- /src/scripts/check_fusion_map.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("../") 3 | import numpy as np 4 | import cv2 5 | from data import dataset 6 | from data import preprocess 7 | from utils import vis_tools 8 | from tqdm import tqdm 9 | 10 | 11 | def project_fusionmap_to_img(img, fusionmap, down_ratio): 12 | points = fusionmap[fusionmap[..., 0] > 0] 13 | new_size = (int(img.shape[1]/down_ratio),int(img.shape[0]/down_ratio)) 14 | img = cv2.resize(img, new_size) 15 | for p in points: 16 | img[p[1]][p[0]] = 1.0 17 | vis_tools.imshow_image(img) 18 | vis_tools.imshow_image(fusionmap[..., 0].astype(np.float32)) 19 | 20 | 21 | if __name__ == "__main__": 22 | lidar_preprocessor = preprocess.LidarPreprocessor() 23 | trainset = dataset.Dataset(lidar_preprocessor, 'train') 24 | pbar = tqdm(trainset) 25 | for data in pbar: 26 | img = data[1][0] 27 | # print(img) 28 | mapping1x = data[2][0] 29 | mapping2x = data[3][0] 30 | mapping4x = data[4][0] 31 | mapping8x = data[5][0] 32 | project_fusionmap_to_img(img, mapping1x, 1) 33 | # vis_tools.imshow_image(img) -------------------------------------------------------------------------------- /src/scripts/check_gather_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JaHorL/Contfuse/bd5ac478ef06dcd24c0909136b9da43c8d282513/src/scripts/check_gather_result.py -------------------------------------------------------------------------------- /src/scripts/check_labelmap.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("../") 3 | import os 4 | import cv2 5 | from data import dataset 6 | from utils import utils 7 | from utils import vis_tools 8 | from tqdm import tqdm 9 | from config.config import cfg 10 | from data import postprocess 11 | from data import preprocess 12 | from data import loader 13 | import numpy as np 14 | 15 | 16 | 17 | def get_idx(array): 18 | idx_tuple = np.where(array==1) 19 | u, idx = np.unique(idx_tuple[0], return_index = True) 20 | return u, idx_tuple[1][idx] 21 | 22 | 23 | def parse_bevlabel(bevlabel, anchors): 24 | xmap = np.tile(np.array(range(cfg.BEV.OUTPUT_Y))[:, np.newaxis], [1, cfg.BEV.OUTPUT_X]) 25 | ymap = np.tile(np.array(range(cfg.BEV.OUTPUT_X))[np.newaxis, :], [cfg.BEV.OUTPUT_Y, 1]) 26 | xy_grid = np.stack((xmap,ymap), axis=-1) 27 | bevlabel = np.concatenate((bevlabel, xy_grid), axis=-1) 28 | labels = bevlabel[bevlabel[..., 0]==1] 29 | cls_type = labels[..., 1].astype(np.int32) 30 | prob = np.ones(cls_type.shape[0], dtype=np.float32) 31 | box = labels[..., 3:-2].reshape(-1, cfg.CONTFUSE.CLASSES_NUM, cfg.BEV.BBOX_DIM) 32 | box = box[np.arange(box.shape[0]), cls_type] 33 | xx = labels[..., -2] - box[..., 0] * anchors[cls_type,3] 34 | yy = labels[..., -1] - box[..., 1] * anchors[cls_type,4] 35 | x = cfg.BEV.X_MAX - xx * cfg.BEV.X_RESOLUTION * cfg.BEV.STRIDE 36 | y = cfg.BEV.Y_MAX - yy * cfg.BEV.Y_RESOLUTION * cfg.BEV.STRIDE 37 | hwl = box[..., 2:5] * anchors[cls_type, :3] 38 | theta = np.arctan2(np.sin(box[..., 5]), np.cos(box[..., 5])) 39 | return np.stack([cls_type, prob, x, y, hwl[..., 0], hwl[..., 1], hwl[..., 2], theta], axis=-1) 40 | 41 | 42 | def parse_imglabel(imglabel, anchors): 43 | anchor_shape = [cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, anchors.shape[0], anchors.shape[1]] 44 | anchors = np.broadcast_to(np.array(anchors), anchor_shape) 45 | h = np.tile(np.array(range(cfg.IMAGE.OUTPUT_H))[:, np.newaxis], [1, cfg.IMAGE.OUTPUT_W]) 46 | w = np.tile(np.array(range(cfg.IMAGE.OUTPUT_W))[np.newaxis, :], [cfg.IMAGE.OUTPUT_H, 1]) 47 | hw_grid = np.stack((h, w), axis=-1) 48 | hw_shape = [cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, cfg.IMAGE.ANCHORS_NUM, 2] 49 | hw_grid = np.tile(hw_grid, cfg.IMAGE.ANCHORS_NUM).reshape(hw_shape) 50 | box_shape = [cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, cfg.IMAGE.ANCHORS_NUM, cfg.IMAGE.BBOX_DIM+1+1] 51 | imglabel = imglabel[..., 1:].reshape(box_shape) 52 | imglabel = np.concatenate((imglabel, hw_grid, anchors), axis=-1) 53 | preds = imglabel[imglabel[..., 0]>0.1] 54 | objness = preds[..., 0] 55 | cls_idx = preds[..., 1] 56 | box = preds[..., 2:] 57 | x = (box[:, 0] + box[:, -4]) * cfg.IMAGE.STRIDE / cfg.IMAGE.H_SCALE_RATIO 58 | y = (box[:, 1] + box[:, -3]) * cfg.IMAGE.STRIDE / cfg.IMAGE.W_SCALE_RATIO 59 | h = box[:, 2] / cfg.IMAGE.H_SCALE_RATIO * box[:, -2] 60 | w = box[:, 3] / cfg.IMAGE.W_SCALE_RATIO * box[:, -1] 61 | left = y - w / 2 62 | top = x - h / 2 63 | right = y + w / 2 64 | bottom = x + h / 2 65 | return np.stack([cls_idx, objness, left, top, right, bottom], axis=-1) 66 | 67 | 68 | 69 | 70 | lidar_preprocessor = preprocess.LidarPreprocessor() 71 | trainset = dataset.Dataset(lidar_preprocessor, 'train') 72 | pbar = tqdm(trainset) 73 | img_anchors = loader.load_anchors(cfg.IMAGE.ANCHORS) 74 | bev_anchors = loader.load_anchors(cfg.BEV.ANCHORS) 75 | img_dir = os.path.join(cfg.CONTFUSE.DATASETS_DIR, "image_2") 76 | lidar_dir = os.path.join(cfg.CONTFUSE.DATASETS_DIR, "lidar_files") 77 | for data in pbar: 78 | vis_tools.imshow_image(data[0][0][..., -1]) 79 | vis_tools.imshow_image(data[1][0]) 80 | vis_tools.imshow_image(data[6][0][..., 0]) 81 | vis_tools.imshow_image(data[7][0][..., 0]) 82 | bevlabel = parse_bevlabel(data[6][0], bev_anchors) 83 | imglabel = parse_imglabel(data[7][0], img_anchors) 84 | img_bboxes = postprocess.img_nms(imglabel, cfg.IMAGE.IOU_THRESHOLD) 85 | bev_bboxes = postprocess.bev_nms(bevlabel, cfg.BEV.DISTANCE_THRESHOLDS) 86 | img_file = os.path.join(img_dir, data[9][0]+'.png') 87 | img = cv2.imread(img_file) 88 | vis_tools.imshow_img_bbox(img, np.array(img_bboxes)) 89 | vis_tools.imshow_bev_bbox(data[0][0][..., -3:], np.array(bev_bboxes)) 90 | 91 | 92 | -------------------------------------------------------------------------------- /src/scripts/check_postprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("../") 3 | import os 4 | import cv2 5 | from glob import glob 6 | from utils import math 7 | from utils import vis_tools 8 | from config.config import cfg 9 | from data import postprocess 10 | from data import loader 11 | import numpy as np 12 | 13 | 14 | img_pred_files = glob(cfg.CONTFUSE.LOG_DIR+"/pred/img_pred/*") 15 | bev_pred_files = glob(cfg.CONTFUSE.LOG_DIR+"/pred/bev_pred/*") 16 | img_anchors = loader.load_anchors(cfg.IMAGE.ANCHORS) 17 | bev_anchors = loader.load_anchors(cfg.BEV.ANCHORS) 18 | img_dir = os.path.join(cfg.CONTFUSE.DATASETS_DIR, "image_2/") 19 | lidar_dir = os.path.join(cfg.CONTFUSE.DATASETS_DIR, "lidar_files/") 20 | 21 | 22 | for fi in bev_pred_files: 23 | bev = np.zeros([640, 640, 3], dtype=np.float32) 24 | bev_pred = np.load(fi) 25 | vis_tools.imshow_image(math.sigmoid(bev_pred[..., 0])) 26 | bev_pred_cls = math.sigmoid(bev_pred[..., 1:cfg.CONTFUSE.CLASSES_NUM+1]) 27 | # vis_tools.imshow_image(math.sigmoid(bev_pre 28 | bev_bboxes = postprocess.parse_bev_predmap(bev_pred, bev_anchors) 29 | bev_bboxes = postprocess.bev_nms(bev_bboxes, cfg.BEV.DISTANCE_THRESHOLDS) 30 | vis_tools.imshow_bev_bbox(bev, np.array(bev_bboxes)) 31 | # vis_tools.imshow_image(bev_pred[..., 0]) 32 | 33 | 34 | for fi in img_pred_files: 35 | img_pred = np.load(fi) 36 | img_map = img_pred.reshape([cfg.IMAGE.OUTPUT_H, cfg.IMAGE.OUTPUT_W, 6, 11]) 37 | vis_tools.imshow_image(np.max(math.sigmoid(img_map[..., 0]), axis=-1)) 38 | vis_tools.imshow_image(np.max(math.sigmoid(img_map[..., 1:cfg.CONTFUSE.CLASSES_NUM+1])[..., 0], axis=-1)) 39 | img_bboxes = postprocess.parse_img_predmap(img_pred, img_anchors) 40 | img_bboxes = postprocess.img_nms(img_bboxes, cfg.IMAGE.IOU_THRESHOLD) 41 | img_file = img_dir + fi[-14:-8] + ".png" 42 | img = cv2.imread(img_file) 43 | vis_tools.imshow_img_bbox(img, np.array(img_bboxes)) -------------------------------------------------------------------------------- /src/scripts/gen_dataset_idx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import os.path as osp 4 | 5 | dataset_dir = "/home/ljh/dataset/detection_3d/kitti_compitetion" 6 | print(dataset_dir) 7 | lidar_dir = osp.join(dataset_dir, "lidar_files") 8 | calib_dir = osp.join(dataset_dir, "data_object_calib/calib") 9 | image_dir = osp.join(dataset_dir, "image_2") 10 | testing_dir = osp.join(dataset_dir, "testing/label_files") 11 | training_dir = osp.join(dataset_dir, "training/label_files") 12 | val_dir = osp.join(dataset_dir, "val/label_files") 13 | 14 | def gen_dataset_idx(dir_name, data_type): 15 | total_lines = [] 16 | for fi in os.listdir(dir_name): 17 | label_fi = osp.join(dir_name, fi) 18 | if ".txt" not in label_fi: 19 | print(label_fi) 20 | continue 21 | lidar_fi = osp.join(lidar_dir, fi[:-4]+".bin") 22 | calib_fi = osp.join(calib_dir, fi[:-4]+".txt") 23 | image_fi = osp.join(image_dir, fi[:-4]+".png") 24 | if osp.exists(lidar_fi) and osp.exists(calib_dir) and osp.exists(image_dir): 25 | total_lines.append(lidar_fi + " " + image_fi + " " + label_fi + " " + calib_fi+"\n") 26 | else: 27 | print(data_type, fi) 28 | save_fi = open(osp.join(dataset_dir, data_type+".txt"), "w") 29 | print(save_fi) 30 | random.shuffle(total_lines) 31 | for line in total_lines: 32 | save_fi.write(line) 33 | save_fi.close() 34 | 35 | 36 | if __name__ == "__main__": 37 | gen_dataset_idx(training_dir, "training") 38 | gen_dataset_idx(testing_dir, "testing") 39 | gen_dataset_idx(val_dir, "val") 40 | -------------------------------------------------------------------------------- /src/scripts/statistic.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | sys.path.append("..") 4 | import os 5 | import numpy as np 6 | from sklearn.cluster import KMeans 7 | from config.config import cfg 8 | 9 | 10 | 11 | class statistic(object): 12 | def __init__(self): 13 | self.anno_path = cfg.CONTFUSE.TRAIN_DATA 14 | self.class_list = cfg.CONTFUSE.CLASSES_LIST 15 | self.annotations = self.load_annotations(self.anno_path) 16 | 17 | 18 | def load_annotations(self, annot_path): 19 | with open(annot_path, 'r') as f: 20 | txt = f.readlines() 21 | annotations = [line.strip() for line in txt if len(line.strip().split())!= 0] 22 | np.random.shuffle(annotations) 23 | return annotations 24 | 25 | def statist_all_labels(self): 26 | total_types = [] 27 | total_dimensions = [] 28 | total_box2d_corners = [] 29 | total_box3d_locations = [] 30 | total_rzs = [] 31 | for fi in self.annotations: 32 | label_file = fi.split()[2] 33 | types, dimensions, box2d_corners, box3d_locations, rzs = self.load_label(label_file) 34 | total_types += types 35 | total_dimensions += dimensions 36 | total_box2d_corners += box2d_corners 37 | total_box3d_locations += box3d_locations 38 | total_rzs += rzs 39 | img_anchors = self.analysis_box2d(total_box2d_corners) 40 | bev_anchors = self.analysis_box3d(total_dimensions, total_types) 41 | loss_scale = self.analysis_types(total_types, bev_anchors) 42 | 43 | 44 | def load_label(self, label_file): 45 | with open(label_file, "r") as f: 46 | lines = f.read().split("\n") 47 | types = [] 48 | dimensions = [] 49 | box2d_corners = [] 50 | box3d_locations = [] 51 | rzs = [] 52 | for line in lines: 53 | if not line: 54 | continue 55 | line = line.split(" ") 56 | if(line[0] not in self.class_list): 57 | continue 58 | types.append(self.class_list.index(line[0])) 59 | dimensions.append(np.array(line[8:11]).astype(np.float32)) 60 | box2d_corners.append(np.array(line[4:8]).astype(np.float32)) 61 | box3d_locations.append(np.array(line[11:14]).astype(np.float32)) 62 | rzs.append(float(line[14])) 63 | return types, dimensions, box2d_corners, box3d_locations, rzs 64 | 65 | 66 | def analysis_box2d(self, box2d_corners): 67 | box_hw = np.zeros([len(box2d_corners), 2], dtype=np.float32) 68 | count = 0 69 | for corner in box2d_corners: 70 | h = corner[3]-corner[1] 71 | w = corner[2]-corner[0] 72 | box_hw[count] = [h, w] 73 | count += 1 74 | cluster = KMeans(n_clusters=6) 75 | cluster.fit(box_hw) 76 | print("cluster_center: ", cluster.cluster_centers_) 77 | return cluster.cluster_centers_ 78 | 79 | def calc_mean_dxdy(self, w, l): 80 | ww = int(w / (cfg.BEV.X_RESOLUTION * 2)) 81 | ll = int(l / (cfg.BEV.X_RESOLUTION * 2)) 82 | dx_sum = 0 83 | dy_sum = 0 84 | for i in range(ww): 85 | dx_sum += i 86 | dx_mean = dx_sum / ww 87 | for i in range(ll): 88 | dy_sum += i 89 | dy_mean = dy_sum / ll 90 | return np.array([dx_mean, dy_mean], dtype=np.float32) 91 | 92 | def analysis_box3d(self, box3d_dimensions, types): 93 | total_box_hwl = np.zeros([len(self.class_list), 4]) 94 | mean_box_hwldxdy = np.zeros([len(self.class_list), 5]) 95 | for i in range(len(box3d_dimensions)): 96 | total_box_hwl[types[i]][:3] += box3d_dimensions[i] 97 | total_box_hwl[types[i]][3] += 1 98 | for i in range(len(total_box_hwl)): 99 | h, w, l = total_box_hwl[i][:3] / total_box_hwl[i][3] 100 | dx, dy = self.calc_mean_dxdy(w, l) 101 | mean_box_hwldxdy[i] = np.array([h, w, l, dx, dy], dtype=np.float32) 102 | print("mean_box_hwldxdy: ", mean_box_hwldxdy) 103 | return mean_box_hwldxdy 104 | 105 | def analysis_types(self, types, bev_anchors): 106 | types_set = set(types) 107 | types = np.array(types) 108 | types_num_array = np.zeros(len(types_set)) 109 | for i in types_set: 110 | types_num_array[i] = len(types[types==i]) 111 | img_ratio = 1 / (types_num_array / np.sum(types_num_array)) 112 | img_scale = img_ratio / img_ratio[0] 113 | bev_types_num = types_num_array * bev_anchors[:, 1] * bev_anchors[:, 2] 114 | bev_ratio = 1 / ( bev_types_num / np.sum(bev_types_num)) 115 | bev_scale = bev_ratio / bev_ratio[0] 116 | print(bev_ratio, bev_scale) 117 | return img_scale, bev_scale 118 | 119 | if __name__ == "__main__": 120 | sat = statistic() 121 | sat.statist_all_labels() 122 | # print(sat.annotations[0]) -------------------------------------------------------------------------------- /src/temp.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | localtime = time.asctime( time.localtime(time.time()) ) 4 | print("本地时间为 :", localtime) -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import time 4 | import shutil 5 | from tqdm import tqdm 6 | import numpy as np 7 | import tensorflow as tf 8 | from config.config import cfg 9 | from data import dataset 10 | from data import preprocess 11 | from models import contfuse_network 12 | 13 | 14 | 15 | 16 | class Trainer(object): 17 | 18 | def __init__(self): 19 | self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT 20 | self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END 21 | self.first_stage_epochs = cfg.TRAIN.FRIST_STAGE_EPOCHS 22 | self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS 23 | self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS 24 | self.initial_weight = cfg.TRAIN.PRETRAIN_WEIGHT 25 | self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) 26 | self.moving_ave_decay = cfg.CONTFUSE.MOVING_AVE_DECAY 27 | self.train_logdir = "./data/log/train" 28 | self.lidar_preprocessor = preprocess.LidarPreprocessor() 29 | self.trainset = dataset.Dataset(self.lidar_preprocessor, 'train') 30 | self.valset = dataset.Dataset(self.lidar_preprocessor, 'val') 31 | self.steps_per_period = len(self.trainset) 32 | self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 33 | 34 | with tf.name_scope('model'): 35 | self.model = contfuse_network.ContfuseNetwork() 36 | self.net = self.model.load() 37 | self.net_var = tf.global_variables() 38 | self.loss = self.net["contfuse_loss"] 39 | 40 | with tf.name_scope('learn_rate'): 41 | self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step') 42 | warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period, 43 | dtype=tf.float64, name='warmup_steps') 44 | train_steps = tf.constant( (self.first_stage_epochs + self.second_stage_epochs)* self.steps_per_period, 45 | dtype=tf.float64, name='train_steps') 46 | self.learn_rate = tf.cond( 47 | pred=self.global_step < warmup_steps, 48 | true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init, 49 | false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) * 50 | (1 + tf.cos((self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi)) 51 | ) 52 | global_step_update = tf.assign_add(self.global_step, 1.0) 53 | 54 | with tf.name_scope("define_weight_decay"): 55 | moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables()) 56 | 57 | with tf.name_scope("define_first_stage_train"): 58 | self.first_stage_trainable_var_list = [] 59 | for var in tf.trainable_variables(): 60 | var_name = var.op.name 61 | var_name_mess = str(var_name).split('/') 62 | if var_name_mess[0] in ["contfuse_headnet"]: 63 | self.first_stage_trainable_var_list.append(var) 64 | first_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss, 65 | var_list=self.first_stage_trainable_var_list) 66 | with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): 67 | with tf.control_dependencies([first_stage_optimizer, global_step_update]): 68 | with tf.control_dependencies([moving_ave]): 69 | self.train_op_with_frozen_variables = tf.no_op() 70 | 71 | with tf.name_scope("define_second_stage_train"): 72 | second_stage_trainable_var_list = tf.trainable_variables() 73 | second_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss, 74 | var_list=second_stage_trainable_var_list) 75 | with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): 76 | with tf.control_dependencies([second_stage_optimizer, global_step_update]): 77 | with tf.control_dependencies([moving_ave]): 78 | self.train_op_with_all_variables = tf.no_op() 79 | 80 | with tf.name_scope('loader_and_saver'): 81 | self.loader = tf.train.Saver(self.net_var) 82 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) 83 | 84 | with tf.name_scope('summary'): 85 | tf.summary.scalar("learn_rate", self.learn_rate) 86 | tf.summary.scalar("total_loss", self.net["contfuse_loss"]) 87 | tf.summary.scalar("bev_loss", self.net["bev_loss"]) 88 | tf.summary.scalar("bev_obj_loss", self.net["bev_obj_loss"]) 89 | tf.summary.scalar("bev_cls_loss", self.net["bev_cls_loss"]) 90 | tf.summary.scalar("bev_bbox_loss", self.net["bev_bbox_loss"]) 91 | tf.summary.scalar("img_loss", self.net["img_loss"]) 92 | tf.summary.scalar("img_obj_loss", self.net["img_obj_loss"]) 93 | tf.summary.scalar("img_cls_loss", self.net["img_cls_loss"]) 94 | tf.summary.scalar("img_bbox_loss", self.net["img_bbox_loss"]) 95 | logdir = "../logs/tensorboard" 96 | if os.path.exists(logdir): 97 | shutil.rmtree(logdir) 98 | os.mkdir(logdir) 99 | self.write_op = tf.summary.merge_all() 100 | self.summary_writer = tf.summary.FileWriter(logdir, graph=self.sess.graph) 101 | img_pred_dir = cfg.CONTFUSE.LOG_DIR+"/pred/img_pred/" 102 | bev_pred_dir = cfg.CONTFUSE.LOG_DIR+"/pred/bev_pred/" 103 | if os.path.exists(img_pred_dir): 104 | shutil.rmtree(img_pred_dir) 105 | os.mkdir(img_pred_dir) 106 | if os.path.exists(bev_pred_dir): 107 | shutil.rmtree(bev_pred_dir) 108 | os.mkdir(bev_pred_dir) 109 | 110 | 111 | def train(self): 112 | self.sess.run(tf.global_variables_initializer()) 113 | try: 114 | print('=> Restoring weights from: %s ... ' % self.initial_weight) 115 | self.loader.restore(self.sess, self.initial_weight) 116 | except: 117 | print('=> %s does not exist !!!' % self.initial_weight) 118 | print('=> Now it starts to train CONTFUSE from scratch ...') 119 | self.first_stage_epochs = 0 120 | 121 | for epoch in range(1, 1+self.first_stage_epochs+self.second_stage_epochs): 122 | if epoch <= self.first_stage_epochs: 123 | train_op = self.train_op_with_frozen_variables 124 | else: 125 | train_op = self.train_op_with_all_variables 126 | pbar = tqdm(self.trainset) 127 | train_epoch_loss = [] 128 | for train_data in pbar: 129 | _, summary, train_step_loss, global_step_val = self.sess.run( 130 | [train_op, self.write_op, self.loss, self.global_step],feed_dict={ 131 | self.net["bev_input"]: train_data[0], 132 | self.net["img_input"]: train_data[1], 133 | self.net["mapping1x"]: train_data[2], 134 | self.net["mapping2x"]: train_data[3], 135 | self.net["mapping4x"]: train_data[4], 136 | self.net["mapping8x"]: train_data[5], 137 | self.net["bev_label"]: train_data[6], 138 | self.net["img_label"]: train_data[7], 139 | self.net["bev_loss_scale"]: cfg.BEV.LOSS_SCALE, 140 | self.net["img_loss_scale"]: cfg.IMAGE.LOSS_SCALE, 141 | self.net["trainable"]: True 142 | }) 143 | train_epoch_loss.append(train_step_loss) 144 | self.summary_writer.add_summary(summary, global_step_val) 145 | pbar.set_description("train loss: %.2f" %train_step_loss) 146 | 147 | if global_step_val % cfg.TRAIN.SAVING_STEPS==0: 148 | val_epoch_loss = [] 149 | print("valing...") 150 | for val_data in self.valset: 151 | val_step_loss, img_pred, bev_pred = self.sess.run( 152 | [self.loss, self.net['img_pred'], self.net['bev_pred']],feed_dict={ 153 | self.net["bev_input"]: val_data[0], 154 | self.net["img_input"]: val_data[1], 155 | self.net["mapping1x"]: val_data[2], 156 | self.net["mapping2x"]: val_data[3], 157 | self.net["mapping4x"]: val_data[4], 158 | self.net["mapping8x"]: val_data[5], 159 | self.net["bev_label"]: val_data[6], 160 | self.net["img_label"]: val_data[7], 161 | self.net["bev_loss_scale"]: cfg.BEV.LOSS_SCALE, 162 | self.net["img_loss_scale"]: cfg.IMAGE.LOSS_SCALE, 163 | self.net["trainable"]: True 164 | }) 165 | val_epoch_loss.append(val_step_loss) 166 | for i in range(cfg.TRAIN.BATCH_SIZE): 167 | np.save(cfg.CONTFUSE.LOG_DIR+"/pred/img_pred/"+val_data[9][i]+"_img", img_pred[i]) 168 | np.save(cfg.CONTFUSE.LOG_DIR+"/pred/bev_pred/"+val_data[9][i]+"_bev", bev_pred[i]) 169 | print("saving...") 170 | train_epoch_loss_m, val_epoch_loss_m = np.mean(train_epoch_loss), np.mean(val_epoch_loss) 171 | ckpt_file = "../checkpoint/contfuse_val_loss=%.4f.ckpt" % val_epoch_loss_m 172 | log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) 173 | print("=> Epoch: %2d Time: %s Train loss: %.2f val loss: %.2f Saving %s ..." 174 | %(epoch, log_time, train_epoch_loss_m, val_epoch_loss_m, ckpt_file)) 175 | self.saver.save(self.sess, ckpt_file, global_step=epoch) 176 | print("saving...") 177 | save_time = time.asctime(time.localtime(time.time())) 178 | ckpt_file = "../checkpoint/contfuse_last_epoch-%s.ckpt" % save_time 179 | self.saver.save(self.sess, ckpt_file, global_step=epoch) 180 | 181 | 182 | 183 | if __name__ == "__main__": 184 | trainer = Trainer() 185 | trainer.train() -------------------------------------------------------------------------------- /src/utils/math.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sigmoid(x): 5 | return 1.0/(1.0 + np.exp(-x)) 6 | 7 | 8 | def softmax(x): 9 | e_x = np.exp(x) 10 | return e_x/np.sum(e_x, axis=-1)[..., np.newaxis] -------------------------------------------------------------------------------- /src/utils/tensorboard_tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import cv2 4 | import tensorflow as tf 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | class TensorboardTools(object): 9 | 10 | def __init__(self): 11 | self.summary_writer=None 12 | 13 | 14 | def summary_image(self, image, tag, new_size=(256, 256), step=None): 15 | 16 | # image=cv2.resize(image, new_size) 17 | 18 | if self.summary_writer == None: 19 | raise Exception("summary_writer is None") 20 | 21 | im_summaries = [] 22 | # Write the image to a string 23 | s = io.BytesIO() 24 | plt.imsave(s,image) 25 | 26 | # Create an Image object 27 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 28 | height=new_size[0], 29 | width=new_size[1]) 30 | # Create a Summary value 31 | im_summaries.append(tf.Summary.Value(tag=tag, image=img_sum)) 32 | 33 | # Create and write Summary 34 | summary = tf.Summary(value=im_summaries) 35 | self.summary_writer.add_summary(summary, step) 36 | 37 | 38 | def summary_scalar(self, value, tag, step=None): 39 | """Log a scalar variable. 40 | Parameter 41 | ---------- 42 | tag : basestring 43 | Name of the scalar 44 | value 45 | step : int 46 | training iteration 47 | """ 48 | if self.summary_writer == None: 49 | raise Exception("summary_writer is None") 50 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 51 | self.summary_writer.add_summary(summary, step) -------------------------------------------------------------------------------- /src/utils/timer.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | class Timer: 4 | def __init__(self): 5 | self.init_time = time() 6 | self.time_now = self.init_time 7 | 8 | def time_diff_per_n_loops(self): 9 | time_diff = time() - self.time_now 10 | self.time_now = time() 11 | return time_diff 12 | 13 | def total_time(self): 14 | return time() - self.init_time 15 | 16 | 17 | 18 | if __name__ == '__main__': 19 | from time import sleep 20 | 21 | timeit = timer() 22 | for i in range(10): 23 | sleep(1) 24 | print('It takes {} secs per loop.'.format(timeit.time_diff_per_n_loops())) 25 | 26 | print('It takes {} secs per whole script.'.format(timeit.total_time())) 27 | -------------------------------------------------------------------------------- /src/utils/transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from config.config import cfg 3 | 4 | 5 | def bevbox_compose(x, y, w, l, rz): 6 | center_mat = np.array([[y, y, y, y], 7 | [x, x, x, x]]) 8 | tracklet_box = np.array([[w / 2, -w / 2, -w / 2, w / 2], 9 | [-l / 2, -l / 2, l / 2, l / 2]]) 10 | yaw = -rz 11 | rot_mat = np.array([[np.cos(yaw), -np.sin(yaw)], 12 | [np.sin(yaw), np.cos(yaw)]]) 13 | corner_pos_in_lidar = np.dot(rot_mat, tracklet_box)+center_mat 14 | bevbox = corner_pos_in_lidar.transpose().astype(np.int32) 15 | return bevbox 16 | 17 | 18 | def box3d_compose(location, dimension, rz): 19 | box3d = np.zeros((8,3), dtype=np.float32) 20 | x, y, z = location 21 | h, w, l = dimension 22 | center_mat = np.array([[y, y, y, y], 23 | [x, x, x, x]]) 24 | tracklet_box = np.array([[w / 2, -w / 2, -w / 2, w / 2], 25 | [-l / 2, -l / 2, l / 2, l / 2]]) 26 | yaw = -rz 27 | rot_mat = np.array([[np.cos(yaw), -np.sin(yaw)], 28 | [np.sin(yaw), np.cos(yaw)]]) 29 | corner_pos_in_lidar = np.dot(rot_mat, tracklet_box)+center_mat 30 | box = corner_pos_in_lidar.transpose().astype(np.int32) 31 | bottom = z-h/2 32 | top = z+h/2 33 | box3d[:4, :2] = box 34 | box3d[:4, 2] = bottom 35 | box3d[4:, :2] = box 36 | box3d[:4, 2] = top 37 | return box3d 38 | 39 | 40 | def location_lidar2bev(location): 41 | location[0] = (cfg.BEV.X_MAX - location[0]) / cfg.BEV.X_RESOLUTION 42 | location[1] = (cfg.BEV.Y_MAX - location[1]) / cfg.BEV.Y_RESOLUTION 43 | location[2] = (cfg.BEV.Z_MAX - location[2]) / cfg.BEV.Z_RESOLUTION 44 | return location 45 | 46 | 47 | def location_lidar2bevlabel(location): 48 | location[0] = (cfg.BEV.X_MAX - location[0]) / (cfg.BEV.X_RESOLUTION * cfg.BEV.STRIDE) 49 | location[1] = (cfg.BEV.Y_MAX - location[1]) / (cfg.BEV.Y_RESOLUTION * cfg.BEV.STRIDE) 50 | location[2] = (cfg.BEV.Z_MAX - location[2]) / cfg.BEV.Z_RESOLUTION 51 | return location 52 | 53 | 54 | def bbox3d_lidar2img(lidar_bboxes3d, pp, r0): 55 | num = len(lidar_bboxes3d) 56 | img_bboxes3d = np.zeros((num, 8, 2)).astype(np.int8) 57 | for i in range(num): 58 | lidar_bbox = lidar_bboxes3d[i] 59 | tmp = np.ones([4]) 60 | tmp[:3] = lidar_bbox 61 | img_bbox = (pp.dot(r0)).dot(tmp) 62 | img_bboxes3d[i] = np.array([img_bbox[0]/img_bbox[2], img_bbox[1]/img_bbox[2]]).astype(np.int8) 63 | return img_bboxes3d 64 | 65 | 66 | def location_cam2lidar(location, tr): 67 | location_in_cam = np.ones([4], dtype=np.float32) 68 | location_in_cam[:3] = location 69 | t = np.zeros([4, 4], dtype=np.float32) 70 | t[:3, :] = tr 71 | t[3, 3] = 1 72 | t_inv = np.linalg.inv(t) 73 | location_in_lidar = t_inv.dot(location_in_cam) 74 | location_in_lidar = location_in_lidar[:3] 75 | return location_in_lidar 76 | 77 | 78 | def location_lidar2cam(location, tr): 79 | location_in_lidar = np.ones([4], dtype=np.float32) 80 | location_in_lidar[:3] = location 81 | t = np.zeros([4, 4], dtype=np.float32) 82 | t[:3, :] = tr 83 | t[3, 3] = 1 84 | location_in_cam = location_in_lidar.dot(t) 85 | location_in_cam = location_in_cam[:3] 86 | return location_in_cam 87 | 88 | 89 | def get_tr_lidar2img(p20, r0, tr_lidar2cam): 90 | t_p20 = np.zeros([4,4], dtype=float) 91 | t_r0 = np.zeros([4,4], dtype=float) 92 | t_tr_lidar2cam = np.zeros([4,4], dtype=float) 93 | t_p20[:3,:] = p20 94 | t_r0[:3,:3] = r0 95 | t_tr_lidar2cam[:3,:] = tr_lidar2cam 96 | t_p20[3][3] = 1 97 | t_r0[3][3] = 1 98 | t_tr_lidar2cam[3][3] = 1 99 | tr = (t_p20.dot(t_r0)).dot(t_tr_lidar2cam) 100 | return tr 101 | 102 | 103 | def ry_to_rz(ry): 104 | ry = np.array(ry).astype(np.float32) 105 | angle = -ry - np.pi / 2 106 | return angle 107 | 108 | 109 | def rz_to_ry(rz): 110 | rz = np.array(rz).astype(np.float32) 111 | angle = -rz - np.pi / 2 112 | return angle 113 | -------------------------------------------------------------------------------- /src/utils/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from config.config import cfg 4 | 5 | 6 | def read_pb_return_tensors(graph, pb_file, return_elements): 7 | 8 | with tf.gfile.FastGFile(pb_file, 'rb') as f: 9 | frozen_graph_def = tf.GraphDef() 10 | frozen_graph_def.ParseFromString(f.read()) 11 | 12 | with graph.as_default(): 13 | return_elements = tf.import_graph_def(frozen_graph_def, 14 | return_elements=return_elements) 15 | return return_elements 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/utils/vis_tools.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import numpy as np 4 | import mayavi.mlab as mlab 5 | from config.config import cfg 6 | from utils import transform 7 | 8 | def draw_img_bboxes2d(img, bboxes, cls_types): 9 | bboxes = bboxes.astype(np.int32) 10 | num=len(bboxes) 11 | for n in range(num): 12 | b = bboxes[n] 13 | color = cfg.CONTFUSE.CLASSES_COLOR[int(cls_types[n])] 14 | cv2.line(img, (b[0],b[1]), (b[2],b[1]), color, 2, cv2.LINE_AA) 15 | cv2.line(img, (b[0],b[1]), (b[0],b[3]), color, 2, cv2.LINE_AA) 16 | cv2.line(img, (b[0],b[3]), (b[2],b[3]), color, 2, cv2.LINE_AA) 17 | cv2.line(img, (b[2],b[1]), (b[2],b[3]), color, 2, cv2.LINE_AA) 18 | return img 19 | 20 | 21 | def draw_img_bboxes3d(img, bboxes, cls_types, color=(255,0,255)): 22 | num=len(bboxes) 23 | for n in range(num): 24 | b = bboxes[n] 25 | color = cfg.CONTFUSE.CLASSES_COLOR[int(cls_types[n])] 26 | for k in range(0,4): 27 | i,j=k,(k+1)%4 28 | cv2.line(img, (b[i,0],b[i,1]), (b[j,0],b[j,1]), color, 2, cv2.LINE_AA) 29 | i,j=k+4,(k+1)%4 + 4 30 | cv2.line(img, (b[i,0],b[i,1]), (b[j,0],b[j,1]), color, 2, cv2.LINE_AA) 31 | i,j=k,k+4 32 | cv2.line(img, (b[i,0],b[i,1]), (b[j,0],b[j,1]), color, 2, cv2.LINE_AA) 33 | return img 34 | 35 | 36 | def draw_bev_bboxes(bev, bev_bboxes, cls_types, color=(255,0,0)): 37 | bev = bev.copy() 38 | num=len(bev_bboxes) 39 | for n in range(num): 40 | b = bev_bboxes[n] 41 | color = cfg.CONTFUSE.CLASSES_COLOR[int(cls_types[n])] 42 | cv2.line(bev, (b[0,0],b[0,1]), (b[1,0],b[1,1]), color, 2, cv2.LINE_AA) 43 | cv2.line(bev, (b[1,0],b[1,1]), (b[2,0],b[2,1]), color, 2, cv2.LINE_AA) 44 | cv2.line(bev, (b[2,0],b[2,1]), (b[3,0],b[3,1]), color, 2, cv2.LINE_AA) 45 | cv2.line(bev, (b[3,0],b[3,1]), (b[0,0],b[0,1]), color, 2, cv2.LINE_AA) 46 | return bev 47 | 48 | 49 | def get_pointcloud_figure(is_grid, is_axis): 50 | fig = mlab.figure(1, fgcolor=(0, 0, 0), bgcolor=(1,1,1)) 51 | if is_grid: 52 | L=25 53 | dL=5 54 | Z=-2 55 | mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=0.2) 56 | for y in np.arange(-L,L+1,dL): 57 | x1,y1,z1 = -L, y, Z 58 | x2,y2,z2 = L, y, Z 59 | mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.3,0.3,0.3), 60 | tube_radius=None, line_width=1, figure=fig) 61 | for x in np.arange(-L,L+1,dL): 62 | x1,y1,z1 = x,-L, Z 63 | x2,y2,z2 = x, L, Z 64 | mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.3,0.3,0.3), 65 | tube_radius=None, line_width=1, figure=fig) 66 | if is_axis: 67 | axes=np.array([ 68 | [2.,0.,0.,0.], 69 | [0.,2.,0.,0.], 70 | [0.,0.,2.,0.], 71 | ],dtype=np.float64) 72 | mlab.points3d(0, 0, 0, color=(1,1,1), mode='sphere', scale_factor=0.2) 73 | mlab.plot3d([0, axes[0,0]], [0, axes[0,1]], [0, axes[0,2]], color=(1,0,0), 74 | tube_radius=None, line_width=2, figure=fig) 75 | mlab.plot3d([0, axes[1,0]], [0, axes[1,1]], [0, axes[1,2]], color=(0,1,0), 76 | tube_radius=None, line_width=2, figure=fig) 77 | mlab.plot3d([0, axes[2,0]], [0, axes[2,1]], [0, axes[2,2]], color=(0,0,1), 78 | tube_radius=None, line_width=2, figure=fig) 79 | return fig 80 | 81 | 82 | def draw_pointcloud_bboxes(fig, lidar, bboxes, color=(1,1,1), line_width=1): 83 | pxs=lidar[:,0] 84 | pys=lidar[:,1] 85 | pzs=lidar[:,2] 86 | prs=lidar[:,3] 87 | prs = np.clip(prs/15,0,1) 88 | mlab.points3d(pxs, pys, pzs, prs, mode='point', scale_factor=1, figure=fig) 89 | if boxes3d.shape==(8,3): boxes3d=boxes3d.reshape(1,8,3) 90 | for n in range(len(boxes3d)): 91 | b = boxes3d[n] 92 | for k in range(0,4): 93 | i,j=k,(k+1)%4 94 | mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], 95 | color=color, tube_radius=None, line_width=line_width, figure=fig) 96 | i,j=k+4,(k+1)%4 + 4 97 | mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], 98 | color=color, tube_radius=None, line_width=line_width, figure=fig) 99 | i,j=k,k+4 100 | mlab.plot3d([b[i,0], b[j,0]], [b[i,1], b[j,1]], [b[i,2], b[j,2]], 101 | color=color, tube_radius=None, line_width=line_width, figure=fig) 102 | 103 | 104 | def imshow_image(img, new_size=None, name=None): 105 | if not name: 106 | name = 'img1' 107 | if new_size: 108 | img = cv2.resize(img, new_size) 109 | cv2.imshow(name, img) 110 | cv2.moveWindow(name, 0, 0) 111 | cv2.waitKey(0) 112 | cv2.destroyAllWindows() 113 | 114 | 115 | def write_image(img, save_path, idx, size=None): 116 | name = save_path + "/" + idx + '.jpg' 117 | if size: 118 | img = img.resize(size) 119 | # img = img.astype(np.int32) 120 | cv2.imwrite(name, img) 121 | 122 | def imshow_img_bbox(img, bboxes): 123 | bboxes = np.array(bboxes) 124 | if(len(bboxes) == 0): 125 | return 126 | corners = np.stack([bboxes[..., 2], bboxes[..., 3], bboxes[..., 4], bboxes[..., 5]], axis=-1) 127 | img = draw_img_bboxes2d(img, corners, bboxes[..., 0]) 128 | imshow_image(img) 129 | 130 | 131 | def imshow_bev_bbox(bev, bboxes): 132 | bboxes = np.array(bboxes) 133 | if(len(bboxes) == 0): 134 | return 135 | bev_bboxes = [] 136 | for b in bboxes: 137 | xyz = transform.location_lidar2bev(b[2:5]) 138 | w = b[5] / cfg.BEV.X_RESOLUTION 139 | l = b[6] / cfg.BEV.Y_RESOLUTION 140 | bev_bbox = transform.bevbox_compose(xyz[0], xyz[1], w, l, b[7]) 141 | bev_bboxes.append(bev_bbox) 142 | bev = draw_bev_bboxes(bev, bev_bboxes, bboxes[..., 0]) 143 | imshow_image(bev) 144 | 145 | 146 | def imwrite_img_bbox(img, bboxes, save_path, idx): 147 | bboxes = np.array(bboxes) 148 | if(len(bboxes) == 0): 149 | return 150 | corners = np.stack([bboxes[..., 2], bboxes[..., 3], bboxes[..., 4], bboxes[..., 5]], axis=-1) 151 | img = draw_img_bboxes2d(img, corners, bboxes[..., 0]) 152 | write_image(img, save_path, idx) 153 | 154 | 155 | def imwrite_bev_bbox(bev, bboxes, save_path, idx): 156 | bboxes = np.array(bboxes) 157 | if(len(bboxes) == 0): 158 | return 159 | bev_bboxes = [] 160 | for b in bboxes: 161 | xyz = transform.location_lidar2bev(b[2:5]) 162 | w = b[5] / cfg.BEV.X_RESOLUTION 163 | l = b[6] / cfg.BEV.Y_RESOLUTION 164 | bev_bbox = transform.bevbox_compose(xyz[0], xyz[1], w, l, b[7]) 165 | bev_bboxes.append(bev_bbox) 166 | bev = draw_bev_bboxes(bev, bev_bboxes, bboxes[..., 0]) 167 | write_image(bev, save_path, idx) 168 | --------------------------------------------------------------------------------