├── .gitignore ├── LICENSE ├── README.md ├── app_main.py ├── app_run_detector.py ├── bin └── train.py ├── cfg └── unet_bl_voxel_jrdb_0.05_0.1.yaml ├── filter_submission.py ├── lib ├── iou3d │ ├── iou3d │ │ ├── __init__.py │ │ └── src │ │ │ ├── iou3d.cpp │ │ │ └── iou3d_kernel.cu │ └── setup.py ├── jrdb_det3d_eval │ ├── LICENSE │ ├── example.py │ ├── jrdb_det3d_eval │ │ ├── __init__.py │ │ ├── eval.py │ │ ├── evaluate.py │ │ ├── kitti_common.py │ │ └── rotate_iou.py │ └── setup.py └── jrdb_devkit │ ├── detection_eval │ ├── README.md │ ├── convert_dataset_to_KITTI.py │ ├── convert_labels_to_KITTI.py │ ├── evaluate_object │ └── evaluate_object.cpp │ └── tracking_eval │ ├── README.md │ ├── __init__.py │ ├── convert_dataset_to_MOT.py │ ├── eval_mot.py │ └── tools │ ├── __init__.py │ ├── distances.py │ ├── io.py │ ├── lap_util.py │ ├── metrics.py │ ├── mot.py │ └── utils.py ├── lidar_det ├── __init__.py ├── dataset │ ├── __init__.py │ ├── builder.py │ ├── dataset_det3d.py │ ├── depracted │ │ ├── jrdb_detection_3d.py │ │ └── kitti_detection_3d.py │ ├── handles │ │ ├── _pypcd.py │ │ ├── jrdb_handle.py │ │ ├── kitti_handle.py │ │ └── nuscenes_handle.py │ └── utils.py ├── detector.py ├── model │ ├── __init__.py │ ├── builder.py │ ├── model_fn.py │ └── nets │ │ ├── minkresnet.py │ │ ├── minkunet.py │ │ └── minkunet_pillar.py ├── pipeline │ ├── __init__.py │ ├── bin │ │ └── clean_log_dir.py │ ├── logger.py │ ├── loss_lib.py │ ├── optim.py │ ├── pipeline.py │ └── trainer.py ├── tests_common.py └── utils │ ├── __init__.py │ ├── eval_nuscenes.py │ ├── jrdb_transforms.py │ ├── kitti_calibration.py │ ├── utils_box3d.py │ ├── viz_mayavi.py │ ├── viz_plt.py │ └── viz_pyviz.py ├── pyrightconfig.json ├── run.sh └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *__pycache__* 3 | *_ext* 4 | *.bag 5 | *.csv 6 | *.cu.o 7 | *.DS_Store 8 | *.gif 9 | *.ipynb_checkpoints 10 | *.pkl 11 | *.png 12 | *.pth 13 | *.pyc 14 | *.so 15 | *.tfevents* 16 | *.yml 17 | 18 | .idea/ 19 | .vscode/ 20 | *.egg-info/ 21 | build/ 22 | ckpt/ 23 | ckpts/ 24 | dist/ 25 | devel/ 26 | experiments/ 27 | work_dirs/ 28 | 29 | data 30 | logs 31 | wandb 32 | work_dirs 33 | work_dirs_tmp 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Visual Computing Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Person-MinkUNet 2 | 3 | PyTorch implementation of Person-MinkUNet. 4 | Winner of JRDB 3D detection challenge in JRDB-ACT Workshop at CVPR 2021 5 | [`[arXiv]`](https://arxiv.org/abs/2107.06780) 6 | [`[video]`](https://www.youtube.com/watch?v=RnGnONoX9cU) 7 | [`[leaderboard]`](https://jrdb.erc.monash.edu/leaderboards/detection). 8 | 9 | # Prerequisite 10 | 11 | - `python>=3.8` 12 | - `torchsparse==1.2.0` [(link)](https://github.com/mit-han-lab/torchsparse) 13 | - `PyTorch==1.6.0` 14 | 15 | # Quick start 16 | 17 | Download [JackRabbot dataset](https://jrdb.stanford.edu/) under `PROJECT/data/JRDB`. 18 | 19 | ``` 20 | # install lidar_det project 21 | python setup.py develop 22 | 23 | # build libraries 24 | cd lib/iou3d 25 | python setup.py develop 26 | 27 | cd ../jrdb_det3d_eval 28 | python setup.py develop 29 | ``` 30 | 31 | Run 32 | ``` 33 | python bin/train.py --cfg PATH_TO_CFG [--ckpt PATH_TO_CKPT] [--evaluation] 34 | ``` 35 | 36 | # Model zoo 37 | 38 | | Split | Checkpoint | Config | 39 | |-------|------------|--------| 40 | | train | [ckpt](https://github.com/VisualComputingInstitute/Person_MinkUNet/releases/download/v1.0/ckpt_e40_train.pth) | [cfg](https://github.com/VisualComputingInstitute/Person_MinkUNet/releases/download/v1.0/unet_bl_voxel_jrdb_0.05_0.1.yaml) | 41 | | train + val | [ckpt](https://github.com/VisualComputingInstitute/Person_MinkUNet/releases/download/v1.0/ckpt_e40_train_val.pth) | [cfg](https://github.com/VisualComputingInstitute/Person_MinkUNet/releases/download/v1.0/unet_bl_voxel_jrdb_0.05_0.1.yaml) | 42 | 43 | # Acknowledgement 44 | 45 | - torchsparse [(link)](https://github.com/mit-han-lab/torchsparse) 46 | - PointRCNN [(link)](https://github.com/sshaoshuai/PointRCNN/tree/master/lib/utils/iou3d) 47 | 48 | # Citation 49 | ``` 50 | @inproceedings{Jia2021PersonMinkUnet, 51 | title = {{Person-MinkUNet: 3D Person Detection with LiDAR Point Cloud}}, 52 | author = {Dan Jia and Bastian Leibe}, 53 | booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)}, 54 | year = {2021} 55 | } 56 | ``` 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /app_main.py: -------------------------------------------------------------------------------- 1 | import io 2 | from fastapi import FastAPI, UploadFile, File, HTTPException 3 | from fastapi.responses import StreamingResponse 4 | 5 | from app_run_detector import run_detector_plot_result 6 | from lidar_det.dataset import load_pcb 7 | 8 | 9 | app = FastAPI() 10 | 11 | 12 | @app.get("/") 13 | def hello_world(): 14 | return {"Hello": "world"} 15 | 16 | 17 | @app.post("/detect") 18 | def get_detection(file: UploadFile = File(), score_threshold: float = 0.5): 19 | # validate file 20 | filename = file.filename 21 | file_extension = filename.split(".")[-1] in ("pcd") 22 | if not file_extension: 23 | raise HTTPException( 24 | status_code=415, detail="Unsupported file provided. Only supports pcd now.") 25 | 26 | pc_stream = io.BytesIO(file.file.read()) 27 | pc_stream.seek(0) 28 | 29 | pc = load_pcb(pc_stream) 30 | 31 | result_img = run_detector_plot_result(pc, score_threshold) 32 | 33 | memory_stream = io.BytesIO() 34 | result_img.save(memory_stream, format="PNG") 35 | memory_stream.seek(0) 36 | 37 | return StreamingResponse(memory_stream, media_type="image/png") 38 | -------------------------------------------------------------------------------- /app_run_detector.py: -------------------------------------------------------------------------------- 1 | from mayavi import mlab 2 | import numpy as np 3 | from PIL import Image 4 | 5 | import lidar_det.utils.utils_box3d as ub3d 6 | from lidar_det.detector import PersonMinkUNet 7 | 8 | # # https://docs.enthought.com/mayavi/mayavi/tips.html#rendering-using-the-virtual-framebuffer 9 | # from pyvirtualdisplay.display import Display 10 | # display = Display(visible=True, size=(1280, 1024)) 11 | # display.start() 12 | 13 | mlab.options.offscreen = True 14 | 15 | ckpt = "/globalwork/jia/archive/JRDB_cvpr21_workshop/" \ 16 | "logs/unet_bl_voxel_jrdb_0.05_0.1_20210519_232859/ckpt/ckpt_e40.pth" 17 | detector = PersonMinkUNet(ckpt) 18 | 19 | 20 | def draw_pointcloud_and_detections( 21 | pc: np.ndarray, boxes: np.ndarray, scores: np.ndarray 22 | ): 23 | fig = mlab.figure( 24 | bgcolor=(1, 1, 1), fgcolor=(0, 0, 0), engine=None, size=(800, 500), 25 | ) 26 | 27 | # points 28 | s = np.hypot(pc[0], pc[1]) 29 | # print(pc.mean(axis=1)) 30 | mpt = mlab.points3d( 31 | pc[0], 32 | pc[1], 33 | pc[2], 34 | s, 35 | colormap="blue-red", 36 | mode="sphere", 37 | scale_factor=0.06, 38 | figure=fig, 39 | ) 40 | mpt.glyph.scale_mode = "scale_by_vector" 41 | 42 | # boxes 43 | s_argsort = scores.argsort() 44 | scores = scores[s_argsort] 45 | # plot low confidence boxes first (on bottom layer) 46 | boxes = boxes[s_argsort] 47 | boxes_color = [(0.0, 1.0, 0.0)] * len(boxes) 48 | 49 | # limit the number of boxes so it does not take forever to plot 50 | if len(boxes) > 50: 51 | boxes = boxes[-50:] 52 | boxes_color = boxes_color[-50:] 53 | 54 | corners, connect_inds = ub3d.boxes_to_corners(boxes, connect_inds=True) 55 | for corner, color in zip(corners, boxes_color): 56 | for inds in connect_inds: 57 | mlab.plot3d( 58 | corner[0, inds], 59 | corner[1, inds], 60 | corner[2, inds], 61 | color=color, 62 | tube_radius=None, 63 | line_width=1.0, 64 | figure=fig, 65 | ) 66 | 67 | # print(mlab.view()) 68 | 69 | mlab.view( 70 | # azimuth=180, 71 | # elevation=180, 72 | # focalpoint=[12.0909996, -1.04700089, -2.03249991], 73 | # distance="auto", 74 | focalpoint=pc.mean(axis=1), 75 | distance=20, 76 | figure=fig, 77 | ) 78 | 79 | # mlab.show() # for finding a good view interactively 80 | 81 | # convert to image 82 | fig.scene._lift() 83 | img = mlab.screenshot(figure=fig) 84 | mlab.close(fig) 85 | 86 | return Image.fromarray(img) 87 | 88 | 89 | def run_detector_plot_result(pc: np.ndarray, score_threshold: float) -> Image.Image: 90 | boxes, scores = detector(pc) # (B 7), (B) 91 | mask = scores >= score_threshold 92 | img = draw_pointcloud_and_detections(pc, boxes[mask], scores[mask]) 93 | return img 94 | 95 | 96 | if __name__ == "__main__": 97 | import yaml 98 | from lidar_det.dataset import JRDBDet3D 99 | # import matplotlib.pyplot as plt 100 | 101 | cfg_file = "/globalwork/jia/archive/JRDB_cvpr21_workshop/logs/" \ 102 | "unet_bl_voxel_jrdb_0.05_0.1_20210519_232859/backup/" \ 103 | "unet_bl_voxel_jrdb_0.05_0.1.yaml" 104 | with open(cfg_file, "r") as f: 105 | cfg = yaml.safe_load(f) 106 | 107 | cfg["dataset"]["target_mode"] = cfg["model"]["target_mode"] 108 | cfg["dataset"]["num_anchors"] = cfg["model"]["kwargs"]["num_anchors"] 109 | cfg["dataset"]["num_ori_bins"] = cfg["model"]["kwargs"]["num_ori_bins"] 110 | 111 | pc_path = "/globalwork/datasets/JRDB_may17" 112 | dataset = JRDBDet3D(pc_path, "test", cfg["dataset"]) 113 | 114 | # pc = dataset[0]["points"] 115 | # img = run_detector_plot_result(pc, 0.5) 116 | 117 | # # plot plt 118 | # fig = plt.figure(figsize=(7, 5)) 119 | # plt.imshow(img) 120 | # plt.show() 121 | -------------------------------------------------------------------------------- /bin/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import yaml 3 | import torch 4 | 5 | from lidar_det.dataset import get_dataloader 6 | from lidar_det.pipeline import Pipeline 7 | from lidar_det.model import get_model 8 | 9 | 10 | def run_training(model, pipeline, cfg): 11 | # main train loop 12 | train_loader = get_dataloader( 13 | split="train", shuffle=True, dataset_cfg=cfg["dataset"], **cfg["dataloader"] 14 | ) 15 | val_loader = get_dataloader( 16 | split="val", shuffle=False, dataset_cfg=cfg["dataset"], **cfg["dataloader"] 17 | ) 18 | status = pipeline.train(model, train_loader, val_loader) 19 | 20 | # # test after training 21 | # if not status: 22 | # test_loader = get_dataloader( 23 | # split="val", 24 | # shuffle=False, 25 | # dataset_cfg=cfg["dataset"], 26 | # num_workers=1, 27 | # batch_size=1, 28 | # ) 29 | # pipeline.evaluate(model, test_loader, tb_prefix="VAL") 30 | 31 | 32 | def run_evaluation(model, pipeline, cfg): 33 | val_loader = get_dataloader( 34 | split="val", 35 | shuffle=False, 36 | dataset_cfg=cfg["dataset"], 37 | num_workers=1, 38 | batch_size=1, 39 | ) 40 | pipeline.evaluate(model, val_loader, tb_prefix="VAL", rm_files=False) 41 | 42 | # test_loader = get_dataloader( 43 | # split="test", 44 | # shuffle=False, 45 | # dataset_cfg=cfg["dataset"], 46 | # num_workers=1, 47 | # batch_size=1, 48 | # ) 49 | # pipeline.evaluate(model, test_loader, tb_prefix="TEST", rm_files=False) 50 | 51 | 52 | if __name__ == "__main__": 53 | # Run benchmark to select fastest implementation of ops. 54 | torch.backends.cudnn.benchmark = True 55 | 56 | parser = argparse.ArgumentParser(description="arg parser") 57 | parser.add_argument( 58 | "--cfg", type=str, required=True, help="configuration of the experiment" 59 | ) 60 | parser.add_argument("--ckpt", type=str, required=False, default=None) 61 | parser.add_argument("--cont", default=False, action="store_true") 62 | parser.add_argument("--tmp", default=False, action="store_true") 63 | parser.add_argument("--bs_one", default=False, action="store_true") 64 | parser.add_argument("--evaluation", default=False, action="store_true") 65 | args = parser.parse_args() 66 | 67 | with open(args.cfg, "r") as f: 68 | cfg = yaml.safe_load(f) 69 | cfg["pipeline"]["Logger"]["backup_list"].append(args.cfg) 70 | if args.evaluation: 71 | cfg["pipeline"]["Logger"]["tag"] += "_EVAL" 72 | if args.tmp: 73 | cfg["pipeline"]["Logger"]["tag"] = "TMP_" + cfg["pipeline"]["Logger"]["tag"] 74 | if args.bs_one: 75 | cfg["dataloader"]["batch_size"] = 1 76 | 77 | cfg["dataset"]["target_mode"] = cfg["model"]["target_mode"] 78 | cfg["dataset"]["num_anchors"] = cfg["model"]["kwargs"]["num_anchors"] 79 | cfg["dataset"]["num_ori_bins"] = cfg["model"]["kwargs"]["num_ori_bins"] 80 | if cfg["dataset"]["name"] == "nuScenes": 81 | nc = len(cfg["dataset"]["included_classes"]) 82 | cfg["model"]["kwargs"]["num_classes"] = nc if nc > 0 else 10 83 | cfg["model"]["nuscenes"] = True 84 | cfg["model"]["kwargs"]["input_dim"] = 3 + len( 85 | cfg["dataset"]["additional_features"] 86 | ) 87 | else: 88 | cfg["model"]["kwargs"]["num_classes"] = 1 89 | cfg["model"]["nuscenes"] = False 90 | cfg["model"]["kwargs"]["input_dim"] = 3 91 | 92 | model = get_model(cfg["model"]) 93 | model.cuda() 94 | 95 | pipeline = Pipeline(model, cfg["pipeline"]) 96 | 97 | if args.ckpt: 98 | pipeline.load_ckpt(model, args.ckpt) 99 | elif args.cont and pipeline.sigterm_ckpt_exists(): 100 | pipeline.load_sigterm_ckpt(model) 101 | 102 | # training or evaluation 103 | if not args.evaluation: 104 | run_training(model, pipeline, cfg) 105 | else: 106 | run_evaluation(model, pipeline, cfg) 107 | 108 | pipeline.close() 109 | -------------------------------------------------------------------------------- /cfg/unet_bl_voxel_jrdb_0.05_0.1.yaml: -------------------------------------------------------------------------------- 1 | dataloader: 2 | batch_size: 36 3 | num_workers: 4 4 | dataset: 5 | additional_features: 6 | - intensity 7 | - time 8 | augmentation: true 9 | canonical: false 10 | included_classes: 11 | - pedestrian 12 | name: JRDB 13 | nsweeps: 10 14 | num_points: 30000 15 | voxel_size: 16 | - 0.05 17 | - 0.05 18 | - 0.1 19 | model: 20 | disentangled_loss: false 21 | kwargs: 22 | cr: 1.0 23 | fpn: false 24 | num_anchors: 1 25 | num_ori_bins: 12 26 | run_up: true 27 | target_mode: 2 28 | type: MinkUNet 29 | pipeline: 30 | Logger: 31 | backup_list: [] 32 | log_dir: ./logs/ 33 | log_fname: log.txt 34 | tag: unet_bl_voxel_jrdb_0.05_0.1 35 | use_timestamp: true 36 | Optim: 37 | scheduler_kwargs: 38 | epoch0: 15 39 | epoch1: 40 40 | lr0: 0.001 41 | lr1: 1.0e-06 42 | Trainer: 43 | ckpt_interval: 8 44 | epoch: 40 45 | eval_interval: 100000.0 46 | grad_norm_clip: -1.0 47 | -------------------------------------------------------------------------------- /filter_submission.py: -------------------------------------------------------------------------------- 1 | # remove low confidence detections from the submission file 2 | 3 | import os 4 | import numpy as np 5 | 6 | THRESH = 1e-3 7 | SUBMISSION_DIR = "/home/jia/jrdb_person_minkunet" 8 | OUT_DIR = "/home/jia/jrdb_person_minkunet_new" 9 | 10 | paths = os.listdir(SUBMISSION_DIR) 11 | for i, path in enumerate(paths): 12 | print(f"process [{i}/{len(paths)}] {path}") 13 | seq_path = os.path.join(SUBMISSION_DIR, path) 14 | seq_path_new = os.path.join(OUT_DIR, path) 15 | os.makedirs(seq_path_new, exist_ok=True) 16 | 17 | for file in os.listdir(seq_path): 18 | det_path = os.path.join(seq_path, file) 19 | with open(det_path, "r") as f: 20 | lines = f.readlines() 21 | confs = np.asarray([float(line.strip("\n").split(" ")[-1]) for line in lines]) 22 | lines_new = np.asarray(lines)[confs > THRESH] 23 | # print(len(lines), len(lines_new)) 24 | 25 | det_path_new = os.path.join(seq_path_new, file) 26 | with open(det_path_new, "w") as f: 27 | f.writelines(lines_new.tolist()) 28 | 29 | -------------------------------------------------------------------------------- /lib/iou3d/iou3d/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import iou3d_cuda 3 | 4 | 5 | # From https://github.com/sshaoshuai/PointRCNN/blob/master/lib/utils/iou3d/iou3d_utils.py 6 | 7 | 8 | def boxes3d_to_bev_torch(boxes3d): 9 | """ 10 | :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] 11 | :return: 12 | boxes_bev: (N, 5) [x1, y1, x2, y2, ry] 13 | """ 14 | boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5))) 15 | 16 | cu, cv = boxes3d[:, 0], boxes3d[:, 2] 17 | half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2 18 | boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w 19 | boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w 20 | boxes_bev[:, 4] = boxes3d[:, 6] 21 | return boxes_bev 22 | 23 | 24 | def boxes_iou_bev(boxes_a, boxes_b): 25 | """ 26 | :param boxes_a: (M, 5) 27 | :param boxes_b: (N, 5) 28 | :return: 29 | ans_iou: (M, N) 30 | """ 31 | 32 | ans_iou = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() 33 | 34 | iou3d_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) 35 | 36 | return ans_iou 37 | 38 | 39 | def boxes_iou3d_gpu(boxes_a, boxes_b): 40 | """ 41 | :param boxes_a: (M, 7) [x, y, z, h, w, l, ry] 42 | :param boxes_b: (N, 7) [x, y, z, h, w, l, ry] 43 | :return: 44 | ans_iou: (M, N) 45 | """ 46 | boxes_a_bev = boxes3d_to_bev_torch(boxes_a) 47 | boxes_b_bev = boxes3d_to_bev_torch(boxes_b) 48 | 49 | # bev overlap 50 | overlaps_bev = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() # (N, M) 51 | iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), overlaps_bev) 52 | 53 | # height overlap 54 | boxes_a_height_min = (boxes_a[:, 1] - boxes_a[:, 3]).view(-1, 1) 55 | boxes_a_height_max = boxes_a[:, 1].view(-1, 1) 56 | boxes_b_height_min = (boxes_b[:, 1] - boxes_b[:, 3]).view(1, -1) 57 | boxes_b_height_max = boxes_b[:, 1].view(1, -1) 58 | 59 | max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min) 60 | min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max) 61 | overlaps_h = torch.clamp(min_of_max - max_of_min, min=0) 62 | 63 | # 3d iou 64 | overlaps_3d = overlaps_bev * overlaps_h 65 | 66 | vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1) 67 | vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1) 68 | 69 | iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-7) 70 | 71 | return iou3d 72 | 73 | 74 | def nms_gpu(boxes, scores, thresh): 75 | """ 76 | :param boxes: (N, 5) [x1, y1, x2, y2, ry] 77 | :param scores: (N) 78 | :param thresh: 79 | :return: 80 | """ 81 | # areas = (x2 - x1) * (y2 - y1) 82 | order = scores.sort(0, descending=True)[1] 83 | 84 | boxes = boxes[order].contiguous() 85 | 86 | keep = torch.LongTensor(boxes.size(0)) 87 | num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh) 88 | return order[keep[:num_out].cuda()].contiguous() 89 | 90 | 91 | def nms_normal_gpu(boxes, scores, thresh): 92 | """ 93 | :param boxes: (N, 5) [x1, y1, x2, y2, ry] 94 | :param scores: (N) 95 | :param thresh: 96 | :return: 97 | """ 98 | # areas = (x2 - x1) * (y2 - y1) 99 | order = scores.sort(0, descending=True)[1] 100 | 101 | boxes = boxes[order].contiguous() 102 | 103 | keep = torch.LongTensor(boxes.size(0)) 104 | num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh) 105 | return order[keep[:num_out].cuda()].contiguous() 106 | 107 | 108 | def nms_dist_gpu(boxes, scores, l_ave, w_ave, thresh): 109 | """ 110 | :param boxes: (N, 5) [x1, y1, x2, y2, ry] 111 | :param scores: (N) 112 | :param thresh: 113 | :return: 114 | """ 115 | # areas = (x2 - x1) * (y2 - y1) 116 | order = scores.sort(0, descending=True)[1] 117 | 118 | boxes = boxes[order].contiguous() 119 | 120 | keep = torch.LongTensor(boxes.size(0)) 121 | num_out = iou3d_cuda.nms_dist_gpu(boxes, keep, l_ave, w_ave, thresh) 122 | return order[keep[:num_out].cuda()].contiguous() 123 | 124 | 125 | if __name__ == '__main__': 126 | pass 127 | -------------------------------------------------------------------------------- /lib/iou3d/iou3d/src/iou3d.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ") 8 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 9 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 10 | 11 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 12 | 13 | #define CHECK_ERROR(ans) { gpuAssert((ans), __FILE__, __LINE__); } 14 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) 15 | { 16 | if (code != cudaSuccess) 17 | { 18 | fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 19 | if (abort) exit(code); 20 | } 21 | } 22 | 23 | const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; 24 | 25 | 26 | void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap); 27 | void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou); 28 | void nmsLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); 29 | void nmsNormalLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); 30 | void nmsDistLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float l_ave, float w_ave, float nms_overlap_thresh); 31 | 32 | int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap){ 33 | // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] 34 | // params boxes_b: (M, 5) 35 | // params ans_overlap: (N, M) 36 | 37 | CHECK_INPUT(boxes_a); 38 | CHECK_INPUT(boxes_b); 39 | CHECK_INPUT(ans_overlap); 40 | 41 | int num_a = boxes_a.size(0); 42 | int num_b = boxes_b.size(0); 43 | 44 | const float * boxes_a_data = boxes_a.data(); 45 | const float * boxes_b_data = boxes_b.data(); 46 | float * ans_overlap_data = ans_overlap.data(); 47 | 48 | boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_overlap_data); 49 | 50 | return 1; 51 | } 52 | 53 | int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_iou){ 54 | // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] 55 | // params boxes_b: (M, 5) 56 | // params ans_overlap: (N, M) 57 | 58 | CHECK_INPUT(boxes_a); 59 | CHECK_INPUT(boxes_b); 60 | CHECK_INPUT(ans_iou); 61 | 62 | int num_a = boxes_a.size(0); 63 | int num_b = boxes_b.size(0); 64 | 65 | const float * boxes_a_data = boxes_a.data(); 66 | const float * boxes_b_data = boxes_b.data(); 67 | float * ans_iou_data = ans_iou.data(); 68 | 69 | boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data); 70 | 71 | return 1; 72 | } 73 | 74 | int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ 75 | // params boxes: (N, 5) [x1, y1, x2, y2, ry] 76 | // params keep: (N) 77 | 78 | CHECK_INPUT(boxes); 79 | CHECK_CONTIGUOUS(keep); 80 | 81 | int boxes_num = boxes.size(0); 82 | const float * boxes_data = boxes.data(); 83 | long * keep_data = keep.data(); 84 | 85 | const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); 86 | 87 | unsigned long long *mask_data = NULL; 88 | CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); 89 | nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); 90 | 91 | // unsigned long long mask_cpu[boxes_num * col_blocks]; 92 | // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; 93 | std::vector mask_cpu(boxes_num * col_blocks); 94 | 95 | // printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); 96 | CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), 97 | cudaMemcpyDeviceToHost)); 98 | 99 | cudaFree(mask_data); 100 | 101 | unsigned long long remv_cpu[col_blocks]; 102 | memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); 103 | 104 | int num_to_keep = 0; 105 | 106 | for (int i = 0; i < boxes_num; i++){ 107 | int nblock = i / THREADS_PER_BLOCK_NMS; 108 | int inblock = i % THREADS_PER_BLOCK_NMS; 109 | 110 | if (!(remv_cpu[nblock] & (1ULL << inblock))){ 111 | keep_data[num_to_keep++] = i; 112 | unsigned long long *p = &mask_cpu[0] + i * col_blocks; 113 | for (int j = nblock; j < col_blocks; j++){ 114 | remv_cpu[j] |= p[j]; 115 | } 116 | } 117 | } 118 | if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); 119 | 120 | return num_to_keep; 121 | } 122 | 123 | 124 | int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ 125 | // params boxes: (N, 5) [x1, y1, x2, y2, ry] 126 | // params keep: (N) 127 | 128 | CHECK_INPUT(boxes); 129 | CHECK_CONTIGUOUS(keep); 130 | 131 | int boxes_num = boxes.size(0); 132 | const float * boxes_data = boxes.data(); 133 | long * keep_data = keep.data(); 134 | 135 | const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); 136 | 137 | unsigned long long *mask_data = NULL; 138 | CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); 139 | nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); 140 | 141 | // unsigned long long mask_cpu[boxes_num * col_blocks]; 142 | // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; 143 | std::vector mask_cpu(boxes_num * col_blocks); 144 | 145 | // printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); 146 | CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), 147 | cudaMemcpyDeviceToHost)); 148 | 149 | cudaFree(mask_data); 150 | 151 | unsigned long long remv_cpu[col_blocks]; 152 | memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); 153 | 154 | int num_to_keep = 0; 155 | 156 | for (int i = 0; i < boxes_num; i++){ 157 | int nblock = i / THREADS_PER_BLOCK_NMS; 158 | int inblock = i % THREADS_PER_BLOCK_NMS; 159 | 160 | if (!(remv_cpu[nblock] & (1ULL << inblock))){ 161 | keep_data[num_to_keep++] = i; 162 | unsigned long long *p = &mask_cpu[0] + i * col_blocks; 163 | for (int j = nblock; j < col_blocks; j++){ 164 | remv_cpu[j] |= p[j]; 165 | } 166 | } 167 | } 168 | if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); 169 | 170 | return num_to_keep; 171 | } 172 | 173 | 174 | int nms_dist_gpu(at::Tensor boxes, at::Tensor keep, float l_ave, float w_ave, float nms_overlap_thresh){ 175 | // params boxes: (N, 5) [x1, y1, x2, y2, ry] 176 | // params keep: (N) 177 | 178 | CHECK_INPUT(boxes); 179 | CHECK_CONTIGUOUS(keep); 180 | 181 | int boxes_num = boxes.size(0); 182 | const float * boxes_data = boxes.data(); 183 | long * keep_data = keep.data(); 184 | 185 | const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); 186 | 187 | unsigned long long *mask_data = NULL; 188 | CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); 189 | nmsDistLauncher(boxes_data, mask_data, boxes_num, l_ave, w_ave, nms_overlap_thresh); 190 | 191 | // unsigned long long mask_cpu[boxes_num * col_blocks]; 192 | // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; 193 | std::vector mask_cpu(boxes_num * col_blocks); 194 | 195 | // printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); 196 | CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), 197 | cudaMemcpyDeviceToHost)); 198 | 199 | cudaFree(mask_data); 200 | 201 | unsigned long long remv_cpu[col_blocks]; 202 | memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); 203 | 204 | int num_to_keep = 0; 205 | 206 | for (int i = 0; i < boxes_num; i++){ 207 | int nblock = i / THREADS_PER_BLOCK_NMS; 208 | int inblock = i % THREADS_PER_BLOCK_NMS; 209 | 210 | if (!(remv_cpu[nblock] & (1ULL << inblock))){ 211 | keep_data[num_to_keep++] = i; 212 | unsigned long long *p = &mask_cpu[0] + i * col_blocks; 213 | for (int j = nblock; j < col_blocks; j++){ 214 | remv_cpu[j] |= p[j]; 215 | } 216 | } 217 | } 218 | if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); 219 | 220 | return num_to_keep; 221 | } 222 | 223 | 224 | 225 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 226 | m.def("boxes_overlap_bev_gpu", &boxes_overlap_bev_gpu, "oriented boxes overlap"); 227 | m.def("boxes_iou_bev_gpu", &boxes_iou_bev_gpu, "oriented boxes iou"); 228 | m.def("nms_gpu", &nms_gpu, "oriented nms gpu"); 229 | m.def("nms_normal_gpu", &nms_normal_gpu, "nms gpu"); 230 | m.def("nms_dist_gpu", &nms_dist_gpu, "oriented distance based nms gpu"); 231 | } 232 | 233 | -------------------------------------------------------------------------------- /lib/iou3d/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | # From https://github.com/sshaoshuai/PointRCNN/tree/master/lib/utils/iou3d 5 | 6 | setup( 7 | name='iou3d', 8 | packages=find_packages(), 9 | ext_modules=[ 10 | CUDAExtension('iou3d_cuda', [ 11 | 'iou3d/src/iou3d.cpp', 12 | 'iou3d/src/iou3d_kernel.cu', 13 | ], 14 | extra_compile_args={'cxx': ['-g'], 15 | 'nvcc': ['-O2']}) 16 | ], 17 | cmdclass={'build_ext': BuildExtension}) 18 | -------------------------------------------------------------------------------- /lib/jrdb_det3d_eval/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/jrdb_det3d_eval/example.py: -------------------------------------------------------------------------------- 1 | from jrdb_det3d_eval import eval_jrdb 2 | 3 | 4 | det_dir = "/globalwork/jia/dr_spaam_output/logs/20201210_124150_jrdb_det_3d_EVAL/output/val/e000000/detections" 5 | # gt_dir = "/globalwork/jia/jrdb_eval/val" 6 | gt_dir = "/globalwork/datasets/JRDB/train_dataset/labels_kitti" 7 | 8 | ap_dict = eval_jrdb(gt_dir, det_dir) 9 | for k, v in ap_dict.items(): 10 | print(k, v) 11 | -------------------------------------------------------------------------------- /lib/jrdb_det3d_eval/jrdb_det3d_eval/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | import numpy as np 4 | import shutil 5 | 6 | from .kitti_common import get_label_annos 7 | from .eval import get_official_eval_result 8 | 9 | 10 | def _eval_seq(gt_annos, det_annos): 11 | with warnings.catch_warnings(): 12 | # so numba compile warning does not pollute logs 13 | warnings.simplefilter("ignore") 14 | result_str, result_dict = get_official_eval_result(gt_annos, det_annos, 1) 15 | 16 | # print(result_str) 17 | # for k, v in result_dict.items(): 18 | # print(k, v) 19 | seq_ap = result_dict["Pedestrian_3d/moderate_R40"] 20 | 21 | return seq_ap 22 | 23 | 24 | def eval_jrdb(gt_dir, det_dir, rm_det_files=False): 25 | # gt_sequences = sorted(os.listdir(gt_dir)) 26 | det_sequences = sorted(os.listdir(det_dir)) 27 | # assert gt_sequences == det_sequences 28 | 29 | ap_dict = {} 30 | 31 | # per sequence eval 32 | seq_ap, seq_len = [], [] 33 | for idx, seq in enumerate(det_sequences): 34 | print(f"({idx + 1}/{len(det_sequences)}) Evaluating {seq}") 35 | 36 | gt_annos = get_label_annos(os.path.join(gt_dir, seq)) 37 | det_annos = get_label_annos(os.path.join(det_dir, seq)) 38 | 39 | ap_dict[seq] = _eval_seq(gt_annos, det_annos) 40 | print(f"{seq}, AP={ap_dict[seq]:.4f}, len={len(gt_annos)}") 41 | 42 | seq_ap.append(ap_dict[seq]) 43 | seq_len.append(len(gt_annos)) 44 | 45 | # NOTE Jointly evaluating all sequences crashes, don't know why. Use average 46 | # AP of all sequences instead. 47 | print("Evaluating whole set") 48 | seq_ap = np.array(seq_ap) 49 | seq_len = np.array(seq_len) 50 | ap_dict["all"] = np.sum(seq_ap * (seq_len / seq_len.sum())) 51 | print(f"Whole set, AP={ap_dict['all']:.4f}, len={seq_len.sum()}") 52 | 53 | if rm_det_files: 54 | shutil.rmtree(det_dir) 55 | 56 | return ap_dict 57 | -------------------------------------------------------------------------------- /lib/jrdb_det3d_eval/jrdb_det3d_eval/evaluate.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import fire 4 | 5 | import .kitti_common as kitti 6 | from .eval import get_coco_eval_result, get_official_eval_result 7 | 8 | 9 | def _read_imageset_file(path): 10 | with open(path, 'r') as f: 11 | lines = f.readlines() 12 | return [int(line) for line in lines] 13 | 14 | 15 | def evaluate(label_path, 16 | result_path, 17 | label_split_file, 18 | current_class=0, 19 | coco=False, 20 | score_thresh=-1): 21 | dt_annos = kitti.get_label_annos(result_path) 22 | if score_thresh > 0: 23 | dt_annos = kitti.filter_annos_low_score(dt_annos, score_thresh) 24 | val_image_ids = _read_imageset_file(label_split_file) 25 | gt_annos = kitti.get_label_annos(label_path, val_image_ids) 26 | if coco: 27 | return get_coco_eval_result(gt_annos, dt_annos, current_class) 28 | else: 29 | return get_official_eval_result(gt_annos, dt_annos, current_class) 30 | 31 | 32 | if __name__ == '__main__': 33 | fire.Fire() 34 | -------------------------------------------------------------------------------- /lib/jrdb_det3d_eval/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # Modified from https://github.com/sshaoshuai/PointRCNN/tree/master/tools/kitti_object_eval_python 4 | 5 | # NOTE Only intended as a rough estimate and gives 1-4 higher AP than the official 6 | # C++ evaluation code. Does not work for some sequences, possibily because of bounding 7 | # boxes above ground. Takes about 6 minutes for the validation set on my computer 8 | 9 | setup( 10 | name='jrdb_det3d_eval', 11 | packages=find_packages(), 12 | ) 13 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/detection_eval/README.md: -------------------------------------------------------------------------------- 1 | # JRDB 2D/3D object detection evaluation script 2 | 3 | ## Overview 4 | 5 | This file describes the JRDB 2D/3D object detection evaluation script. 6 | For overview of the dataset and documentation of the data and label format, 7 | please refer to our website: https://jrdb.stanford.edu 8 | 9 | ## Label Format 10 | 11 | The evaluation script expects a folder in a following structure: 12 | 13 | ``` 14 | cubberly-auditorium-2019-04-22_1/ 15 | ├── 000000.txt 16 | ├── 000001.txt 17 | │ ... 18 | └── 001078.txt 19 | ... 20 | tressider-2019-04-26_3/ 21 | ├── 000000.txt 22 | ├── 000001.txt 23 | │ ... 24 | └── 001658.txt 25 | ``` 26 | 27 | Each subfolder represents a sequence and each text file within the subfolder 28 | is a label file of a given frame. The label files contain the following 29 | information. All values (numerical or strings) are separated via spaces, each 30 | row corresponds to one object. The 17 columns represent: 31 | 32 | ``` 33 | #Values Name Description 34 | ---------------------------------------------------------------------------- 35 | 1 type 'Pedestrian' is the only valid type as of now. 36 | 1 truncated Integer 0 (non-truncated) and 1 (truncated), where 37 | truncated refers to the object leaving image boundaries 38 | * May be an arbitrary value for evaluation. 39 | 1 occluded Integer (0, 1, 2, 3) indicating occlusion state: 40 | 0 = fully visible, 1 = mostly visible 41 | 2 = severely occluded, 3 = fully occluded 42 | * May be an arbitrary value for evaluation. 43 | 1 num_points Integer, number of points within a 3D bounding box. 44 | * May be an arbitrary value for evaluation. 45 | * May be a negative value to indicate a 2D bounding box 46 | without corresponding 3D bounding box. 47 | 1 alpha Observation angle of object, ranging [-pi..pi] 48 | 4 bbox 2D bounding box of object in the image (0-based index): 49 | contains left, top, right, bottom pixel coordinates 50 | * May be a negative value to indicate a 3D bounding box 51 | without corresponding 2D bounding box. 52 | 3 dimensions 3D object dimensions: height, width, length (in meters) 53 | 3 location 3D object location x,y,z in camera coordinates (in meters) 54 | 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] 55 | 1 conf Float, indicating confidence in detection, needed for p/r 56 | curves, higher is better. 57 | * May be an arbitrary value for groundtruth. 58 | ``` 59 | 60 | ## 2D Object Detection Benchmark 61 | 62 | The goal in the 2D object detection task is to train object detectors for 63 | pedestrian in a 360 panorama image. The object detectors must provide as output 64 | the 2D 0-based bounding box in the image using the format specified above, as 65 | well as a detection score, indicating the confidence in the detection. All 66 | other values must be set to their default values. 67 | 68 | In our evaluation, we only evaluate detections on 2D bounding box larger than 69 | 500^2 pixel^2 in the image and that are not fully occluded. For evaluation 70 | criterion, inspired by PASCAL, we use 41-point interpolated AP and require the 71 | intersection-over-union of bounding boxes to be larger than 50% for an object 72 | to be detected correctly. 73 | 74 | ## 3D Object Detection Benchmark 75 | 76 | The goal in the 3D object detection task is to train object detectors for 77 | pedestrian in a lidar point clouds. The object detectors must provide the 3D 78 | bounding box (in the format specified above, i.e. 3D dimensions and 3D 79 | locations) and the detection score/confidence. All other values must be set to 80 | their default values. 81 | 82 | In our evaluation, we only evaluate detections on 3D bounding box which 83 | encloses more than 10 3D lidar points and lies within 25 meters in bird's eye 84 | view. For evaluation criterion, inspired by PASCAL, we use 41-point 85 | interpolated AP and require the intersection-over-union of bounding boxes to be 86 | larger than 30% for an object to be detected correctly. 87 | 88 | ## Suggested Validation Split 89 | 90 | We provide a suggested validation split to help parameter tune. In the paper, 91 | we show the distribution is very similar to the test dataset. Validation split: 92 | 93 | clark-center-2019-02-28_1 94 | gates-ai-lab-2019-02-08_0 95 | huang-2-2019-01-25_0 96 | meyer-green-2019-03-16_0 97 | nvidia-aud-2019-04-18_0 98 | tressider-2019-03-16_1 99 | tressider-2019-04-26_2 100 | 101 | ## Evaluation Protocol 102 | 103 | For transparency, we included the evaluation code. It can be compiled via: 104 | 105 | ``` 106 | g++ -O3 -o evaluate_object evaluate_object.cpp 107 | ``` 108 | 109 | ## Acknowledgement 110 | 111 | This code is a fork of KITTI 3D Object Detection development kit: 112 | http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d 113 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/detection_eval/convert_dataset_to_KITTI.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import glob 4 | import json 5 | import multiprocessing as mp 6 | import os 7 | import shutil 8 | ​ 9 | import numpy as np 10 | import open3d as o3d 11 | import yaml 12 | ​ 13 | ​ 14 | IN_IMG_PATH = 'images/image_stitched/%s/%s.jpg' 15 | IN_PTC_LOWER_PATH = 'pointclouds/lower_velodyne/%s/%s.pcd' 16 | IN_PTC_UPPER_PATH = 'pointclouds/upper_velodyne/%s/%s.pcd' 17 | IN_LABELS_3D = 'processed_labels/labels_3d/*.json' 18 | IN_LABELS_2D = 'processed_labels/labels_2d_stitched/*.json' 19 | IN_DETECTIONS_2D = 'detections/*.json' 20 | IN_CALIBRATION_F = 'calibration/defaults.yaml' 21 | ​ 22 | OUT_IMG_PATH = 'image_2' 23 | OUT_PTC_PATH = 'velodyne' 24 | OUT_LABEL_PATH = 'label_2' 25 | OUT_DETECTION_PATH = 'detection' 26 | ​ 27 | ​ 28 | def parse_args(): 29 | ap = argparse.ArgumentParser() 30 | ap.add_argument('-o', 31 | '--output_dir', 32 | default='KITTI', 33 | help='location to store dataset in KITTI format') 34 | ap.add_argument('-i', 35 | '--input_dir', 36 | default='dataset', 37 | help='root directory in jrdb format') 38 | return ap.parse_args() 39 | ​ 40 | ​ 41 | def get_file_list(input_dir): 42 | def _filepath2filelist(path): 43 | return set( 44 | tuple(os.path.splitext(f)[0].split(os.sep)[-2:]) 45 | for f in glob.glob(os.path.join(input_dir, path % ('*', '*')))) 46 | ​ 47 | def _label2filelist(path, key='labels'): 48 | seq_dicts = [] 49 | for json_f in glob.glob(os.path.join(input_dir, path)): 50 | with open(json_f) as f: 51 | labels = json.load(f) 52 | seq_name = os.path.basename(os.path.splitext(json_f)[0]) 53 | seq_dicts.append({(seq_name, os.path.splitext(file_name)[0]): label 54 | for file_name, label in labels[key].items() 55 | }) 56 | return dict(collections.ChainMap(*seq_dicts)) 57 | ​ 58 | imgs = _filepath2filelist(IN_IMG_PATH) 59 | lower_ptcs = _filepath2filelist(IN_PTC_LOWER_PATH) 60 | upper_ptcs = _filepath2filelist(IN_PTC_UPPER_PATH) 61 | labels_2d = _label2filelist(IN_LABELS_2D) 62 | labels_3d = _label2filelist(IN_LABELS_3D) 63 | detections_2d = _label2filelist(IN_DETECTIONS_2D, key='detections') 64 | filelist = set.intersection( 65 | imgs, lower_ptcs, upper_ptcs, labels_2d.keys(), labels_3d.keys()) 66 | ​ 67 | return {f: (labels_2d[f], labels_3d[f], detections_2d[f]) 68 | for f in sorted(filelist)} 69 | ​ 70 | ​ 71 | def move_frame(input_dir, output_dir, calib, seq_name, file_name, labels_2d, 72 | labels_3d, detection_2d, file_idx): 73 | def _load_pointcloud(path, calib_key): 74 | ptc = np.asarray(o3d.io.read_point_cloud( 75 | os.path.join(input_dir, path % (seq_name, file_name))).points) 76 | ptc -= np.expand_dims(np.array(calib[calib_key]['translation']), 0) 77 | theta = -float(calib[calib_key]['rotation'][-1]) 78 | rotation_matrix = np.array( 79 | ((np.cos(theta), np.sin(theta)), (-np.sin(theta), np.cos(theta)))) 80 | ptc[:, :2] = np.squeeze( 81 | np.matmul(rotation_matrix, np.expand_dims(ptc[:, :2], 2))) 82 | return ptc 83 | ​ 84 | # Copy image 85 | shutil.copy(os.path.join(input_dir, IN_IMG_PATH % (seq_name, file_name)), 86 | os.path.join(output_dir, OUT_IMG_PATH, f'{file_idx:06d}.jpg')) 87 | ​ 88 | # Copy point cloud 89 | lower_ptc = _load_pointcloud(IN_PTC_LOWER_PATH, 'lidar_lower_to_rgb') 90 | upper_ptc = _load_pointcloud(IN_PTC_UPPER_PATH, 'lidar_upper_to_rgb') 91 | ptc = np.vstack((upper_ptc, lower_ptc)) 92 | ptc = np.hstack((ptc, np.ones((ptc.shape[0], 1)))) 93 | np.save(os.path.join(output_dir, OUT_PTC_PATH, f'{file_idx:06d}.npy'), ptc) 94 | ​ 95 | label_id_2d = set(f['label_id'] for f in labels_2d) 96 | label_id_3d = set(f['label_id'] for f in labels_3d) 97 | label_ids = label_id_2d.intersection(label_id_3d) 98 | ​ 99 | # Create label 100 | label_lines = [] 101 | for label_id in label_ids: 102 | if not label_id.startswith('pedestrian:'): 103 | continue 104 | label_2d = [l for l in labels_2d if l['label_id'] == label_id][0] 105 | label_3d = [l for l in labels_3d if l['label_id'] == label_id][0] 106 | rotation_y = (-label_3d['box']['rot_z'] 107 | if label_3d['box']['rot_z'] < np.pi else 2 * np.pi - 108 | label_3d['box']['rot_z']) 109 | label_lines.append( 110 | f"Pedestrian 0 0 {label_3d['observation_angle']} " 111 | f"{label_2d['box'][0]} {label_2d['box'][1]} " 112 | f"{label_2d['box'][0] + label_2d['box'][2]} " 113 | f"{label_2d['box'][1] + label_2d['box'][3]} " 114 | f"{label_3d['box']['h']} {label_3d['box']['w']} " 115 | f"{label_3d['box']['l']} {-label_3d['box']['cy']} " 116 | f"{-label_3d['box']['cz'] + label_3d['box']['h'] / 2} " 117 | f"{label_3d['box']['cx']} {rotation_y} 1\n" 118 | ) 119 | label_out = os.path.join(output_dir, OUT_LABEL_PATH, f'{file_idx:06d}.txt') 120 | with open(label_out, 'w') as f: 121 | f.writelines(label_lines) 122 | ​ 123 | # Create detection 124 | detection_lines = [] 125 | for detection in detection_2d: 126 | if not detection['label_id'].startswith('person:'): 127 | continue 128 | detection_lines.append( 129 | f"Pedestrian 0 0 -1 " 130 | f"{detection['box'][0]} {detection['box'][1]} " 131 | f"{detection['box'][0] + detection['box'][2]} " 132 | f"{detection['box'][1] + detection['box'][3]} " 133 | f"-1 -1 -1 -1 -1 -1 -1 {detection['score']}\n" 134 | ) 135 | detection_out = os.path.join( 136 | output_dir, OUT_DETECTION_PATH, f'{file_idx:06d}.txt') 137 | with open(detection_out, 'w') as f: 138 | f.writelines(detection_lines) 139 | ​ 140 | ​ 141 | def convert_jr2kitti(input_dir, output_dir, file_list): 142 | ​ 143 | os.makedirs(os.path.join(output_dir, OUT_IMG_PATH)) 144 | os.makedirs(os.path.join(output_dir, OUT_PTC_PATH)) 145 | os.makedirs(os.path.join(output_dir, OUT_LABEL_PATH)) 146 | os.makedirs(os.path.join(output_dir, OUT_DETECTION_PATH)) 147 | ​ 148 | with open(os.path.join(output_dir, 'filelist.txt'), 'w') as f: 149 | f.write('\n'.join(a + ' ' + b for a, b in file_list.keys())) 150 | ​ 151 | pool = mp.Pool(20) 152 | with open(os.path.join(input_dir, IN_CALIBRATION_F)) as f: 153 | calib = yaml.safe_load(f)['calibrated'] 154 | ​ 155 | pool.starmap( 156 | move_frame, 157 | [(input_dir, output_dir, calib, seq_name, file_name, label_2d, 158 | label_3d, detection_2d, idx) 159 | for idx, ((seq_name, file_name), 160 | (label_2d, label_3d, detection_2d)) 161 | in enumerate(file_list.items())]) 162 | shutil.copytree(os.path.join(input_dir, 'calibration'), 163 | os.path.join(output_dir, 'calib')) 164 | ​ 165 | ​ 166 | if __name__ == "__main__": 167 | args = parse_args() 168 | file_list = get_file_list(args.input_dir) 169 | convert_jr2kitti(args.input_dir, args.output_dir, file_list) 170 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/detection_eval/convert_labels_to_KITTI.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import glob 4 | import json 5 | import os 6 | 7 | import numpy as np 8 | 9 | 10 | INPUT_3D_LABELS_PATH = 'labels_3d/*.json' 11 | INPUT_2D_LABELS_PATH = 'labels_2d_stitched/*.json' 12 | LABEL_ROOT_KEY = 'labels' 13 | ENUM_OCCLUSION = ('Fully_visible', 'Mostly_visible', 'Severely_occluded', 14 | 'Fully_occluded') 15 | 16 | 17 | def parse_args(): 18 | ap = argparse.ArgumentParser() 19 | ap.add_argument('-o', 20 | '--output_kitti_dir', 21 | default='KITTI', 22 | help='location of the output KITTI-like labels') 23 | ap.add_argument('-i', 24 | '--input_jrdb_dir', 25 | default='test_dataset/labels', 26 | help='location of the input jrdb labels') 27 | return ap.parse_args() 28 | 29 | 30 | def get_labels(input_dir): 31 | """Read label directory 32 | 33 | Args: 34 | input_dir (str): Input directory of the jrdb labels. 35 | 36 | Returns: 37 | dict: {(seq_name, seq_idx) -> ([labels_2d, ...], [labels_3d, ...])} 38 | """ 39 | def _parse_label_path(path): 40 | """Read label path of 2D/3D labels 41 | 42 | Args: 43 | path (str): Input path of the jrdb labels. 44 | 45 | Returns: 46 | dict: {(seq_name, seq_idx) -> [labels, ...]} 47 | """ 48 | seq_dicts = [] 49 | for json_f in glob.glob(os.path.join(input_dir, path)): 50 | with open(json_f) as f: 51 | labels = json.load(f) 52 | seq_name = os.path.basename(os.path.splitext(json_f)[0]) 53 | seq_dicts.append({ 54 | (seq_name, os.path.splitext(file_name)[0]): 55 | label for file_name, label in labels[LABEL_ROOT_KEY].items()}) 56 | return dict(collections.ChainMap(*seq_dicts)) 57 | 58 | # Read 2D/3D label files. 59 | labels_2d = _parse_label_path(INPUT_2D_LABELS_PATH) 60 | labels_3d = _parse_label_path(INPUT_3D_LABELS_PATH) 61 | 62 | # Check if all 2D/3D sequence name/index matches. 63 | if set(labels_2d) != set(labels_3d): 64 | raise ValueError('Input jrdb 2D and 3D sequences mismatch') 65 | 66 | return {f: (labels_2d[f], labels_3d[f]) for f in sorted(labels_2d)} 67 | 68 | 69 | def convert_jr2kitti(labels, output_dir): 70 | """Write jrdb labels to output_dir in KITTI-like format of text file. 71 | 72 | Args: 73 | labels (dict): {(seq_name, seq_idx) -> 74 | ([labels_2d, ...], [labels_3d, ...])} 75 | output_dir (str): Output directory of the converted label. 76 | """ 77 | def _label_key(label): 78 | return label['label_id'] 79 | 80 | # Parse all sequences of the given label. 81 | for (seq_name, seq_idx), (labels_2d, labels_3d) in labels.items(): 82 | # Join 2D/3D labels based on the given label key. 83 | labels_2d = {_label_key(label): label for label in labels_2d} 84 | labels_3d = {_label_key(label): label for label in labels_3d} 85 | label_all = { 86 | k: (labels_2d.get(k), labels_3d.get(k)) 87 | for k in set(labels_2d).union(labels_3d) 88 | } 89 | 90 | # Parse each pedestrian in a given sequence. 91 | label_lines = [] 92 | for label_2d, label_3d in label_all.values(): 93 | # Sanity check. 94 | if label_2d is not None and label_3d is not None: 95 | assert _label_key(label_2d) == _label_key(label_3d) 96 | assert not (label_2d is None and label_3d is None) 97 | 98 | # Ignore all labels else than pedestrian. 99 | if not _label_key(label_2d or label_3d).startswith('pedestrian:'): 100 | continue 101 | 102 | # Initialize all label attributes 103 | rotation_y, num_points_3d, alpha, height_3d = -1, -1, -1, -1 104 | width_3d, length_3d, centerx_3d, centery_3d = -1, -1, -1, -1 105 | centerz_3d, x1_2d, y1_2d, x2_2d, y2_2d = -1, -1, -1, -1, -1 106 | truncated, occlusion = -1, -1 107 | 108 | # Fill in values extracted from 2D label. 109 | if label_2d is not None: 110 | x1_2d = label_2d['box'][0] 111 | y1_2d = label_2d['box'][1] 112 | x2_2d = label_2d['box'][0] + label_2d['box'][2] 113 | y2_2d = label_2d['box'][1] + label_2d['box'][3] 114 | attributes_2d = label_2d['attributes'] 115 | truncated = int(attributes_2d['truncated'].lower() == 'true') 116 | occlusion = ENUM_OCCLUSION.index(attributes_2d['occlusion']) 117 | 118 | # Fill in values extracted from 3D label. 119 | if label_3d is not None: 120 | rotation_y = (-label_3d['box']['rot_z'] if 121 | label_3d['box']['rot_z'] < np.pi else 122 | 2 * np.pi - label_3d['box']['rot_z']) 123 | attributes_3d = label_3d['attributes'] 124 | num_points_3d = attributes_3d['num_points'] 125 | alpha = label_3d['observation_angle'] 126 | height_3d = label_3d['box']['h'] 127 | width_3d = label_3d['box']['w'] 128 | length_3d = label_3d['box']['l'] 129 | centerx_3d = -label_3d['box']['cy'] 130 | centery_3d = -label_3d['box']['cz'] + label_3d['box']['h'] / 2 131 | centerz_3d = label_3d['box']['cx'] 132 | 133 | # Append a line of text in a KITTI-like format. 134 | label_lines.append( 135 | "Pedestrian %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s 1\n" % \ 136 | (truncated, occlusion, num_points_3d, alpha, x1_2d, y1_2d, x2_2d, y2_2d, height_3d, width_3d, length_3d, centerx_3d, centery_3d, centerz_3d, rotation_y) 137 | ) 138 | 139 | # Write label text file to the output directory. 140 | seq_dir = os.path.join(output_dir, seq_name) 141 | os.makedirs(seq_dir, exist_ok=True) 142 | with open(os.path.join(seq_dir, str(seq_idx)+'.txt'), 'w') as f: 143 | f.writelines(label_lines) 144 | 145 | 146 | if __name__ == "__main__": 147 | args = parse_args() 148 | labels = get_labels(args.input_jrdb_dir) 149 | convert_jr2kitti(labels, args.output_kitti_dir) 150 | 151 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/detection_eval/evaluate_object: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VisualComputingInstitute/Person_MinkUNet/e582bd69ca8fb14268a1414e2534d6c509ac24d2/lib/jrdb_devkit/detection_eval/evaluate_object -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/README.md: -------------------------------------------------------------------------------- 1 | # JRDB 2D/3D tracking evaluation script 2 | 3 | ## Overview 4 | 5 | This file describes the JRDB 2D/3D tracking evaluation script. 6 | For overview of the dataset and documentation of the data and label format, 7 | please refer to our website: https://jrdb.stanford.edu 8 | 9 | ## Label Format 10 | 11 | The evaluation script expects a folder in a following structure: 12 | 13 | ``` 14 | cubberly-auditorium-2019-04-22_1/gt/ 15 | ├── gt.txt 16 | └── 3d_gt.txt 17 | ... 18 | tressider-2019-04-26_3/gt/ 19 | ├── gt.txt 20 | └── 3d_.txt 21 | ``` 22 | 23 | Each subfolder represents a sequence and there is one text file for 2d and 3d 24 | tracking. The label files contain the following information. All values 25 | (numerical or strings) are separated via spaces, eachrow corresponds to one 26 | object. The 13 columns represent: 27 | 28 | ``` 29 | ----------------------------------------------------------------------------- 30 | #Values Name Description 31 | ----------------------------------------------------------------------------- 32 | 1 frame An integer representing the frame number, 33 | where the object appears, 34 | 1 id An integer identifies a unique ID for the 35 | object, which should be an identical value 36 | for the same object in all frames (belonging to 37 | a trajectory) 38 | 4 bbox 2D bounding box of an object in the image. 39 | Contains top-left pixel coordinates and 40 | box width and height in pixels - , 41 | , , 42 | 3 location 3D object center location in camera coordinates 43 | (in meters) i.e. , , , 44 | 3 dimension 3D object dimensions: length, height and width 45 | (in meter), i.e. , , 46 | 1 rotation_y Rotation ry around Y-axis in camera coordinates 47 | [-pi..pi] 48 | 1 conf Float, indicating confidence in detection 49 | (higher is better). 50 | * May be an arbitrary value for groundtruth. 51 | ----------------------------------------------------------------------------- 52 | ``` 53 | 54 | The conf value contains the detection confidence in the det.txt files. 55 | For a submission, it acts as a flag whether the entry is to be considered. 56 | A value of 0 means that this particular instance is ignored in the 57 | evaluation, while any other value can be used to mark it as active. For 58 | submitted results, all lines in the .txt file with a confidence of 1 are 59 | considered. Fields which are not used, such as 2D bounding box for 3D 60 | tracking or location, dimension, and rotation_y for 2D tracking, must be 61 | set to -1. 62 | 63 | 64 | ## 2D/3D Tracking Benchmark 65 | 66 | The primary metric we use to evaluate tracking is MOTA, which combines 67 | false positives, false negatives, and id switches. We also report MOTP, 68 | which is a measure of the localisation accuracy of the tracking algorithm. 69 | Rank is determined by MOTA. 70 | 71 | ## Suggested Validation Split 72 | 73 | We provide a suggested validation split to help parameter tune. In the paper, 74 | we show the distribution is very similar to the test dataset. Validation split: 75 | 76 | clark-center-2019-02-28_1 77 | gates-ai-lab-2019-02-08_0 78 | huang-2-2019-01-25_0 79 | meyer-green-2019-03-16_0 80 | nvidia-aud-2019-04-18_0 81 | tressider-2019-03-16_1 82 | tressider-2019-04-26_2 83 | 84 | ## Evaluation Protocol 85 | 86 | The MOT conversion script can be used to set up the evaluation groundtruth. 87 | The website provides details on the file structure under preparing submissions. 88 | To evaluate tracking results in 2D, run the script: 89 | 90 | ``` 91 | python evaluation/eval_mot.py path/to/sequences path/to/results output_file.txt 92 | ``` 93 | 94 | As an example: 95 | 96 | ``` 97 | python evaluation/eval_mot.py data/MOT_dataset/sequences/ results/experiment1/ results/experiment1/eval.txt 98 | ``` 99 | 100 | For a detailed explanation of the arguments, run: 101 | 102 | ``` 103 | python evaluation/eval_mot.py -h 104 | ``` 105 | 106 | The evaluation script expects the groundtruth data to be in the format 107 | described before. In order to match the results files against the correct 108 | ground truth files, the results must be in the format: 109 | 110 | ``` 111 | [sequence_name]_[image_name].txt 112 | bytes-cafe-2019-02-07_0_image_0.txt 113 | ``` 114 | 115 | To evaluate 3d tracking results, just add the --depth flag on the above calls. 116 | 117 | ``` 118 | python evaluation/eval_mot.py MOT/sequences/ results/ MOT_3d_output.txt -d 119 | ``` 120 | 121 | The results files must be named according to: 122 | 123 | ``` 124 | [sequence_name]_3d.txt 125 | bytes-cafe-2019-02-07_0_3d.txt 126 | ``` 127 | 128 | 129 | ## Acknowledgement 130 | 131 | This code is a fork of pymotmetrics: 132 | https://github.com/cheind/py-motmetrics 133 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/__init__.py: -------------------------------------------------------------------------------- 1 | from . import eval_mot 2 | from . import tools 3 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/eval_mot.py: -------------------------------------------------------------------------------- 1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking. 2 | 3 | Christoph Heindl, 2017 4 | https://github.com/cheind/py-motmetrics 5 | 6 | Files 7 | ----- 8 | All file content, ground truth, and test files have to comply with the 9 | format described in 10 | 11 | Milan, Anton, et al. 12 | "Mot16: A benchmark for multi-object tracking." 13 | arXiv preprint arXiv:1603.00831 (2016). 14 | https://motchallenge.net/ 15 | 16 | Structure 17 | --------- 18 | 19 | Layout for 2D ground truth data: 20 | /tracking///gt.txt (e.g. tracking/cubberly-auditorium-2019-04-22_1/image_stitched/gt.txt) 21 | /tracking///gt.txt (e.g. tracking/discovery-walk-2019-02-28_0/image_2/gt/gt.txt) 22 | ... 23 | 24 | Expected 2D directory structure: 25 | /.txt (e.g. cubberly-auditorium-2019-04-22_1_image_0.txt) 26 | /.txt (e.g. discovery-walk-2019-02-28_0_image_2.txt) 27 | ... 28 | 29 | Layout for 3D ground truth data: 30 | /tracking//pointclouds/3d_gt.txt (e.g. tracking/cubberly-auditorium-2019-04-22_1/pointclouds/3d_gt.txt) 31 | /tracking//pointclouds/3d_gt.txt (e.g. tracking/discovery-walk-2019-02-28_0/pointclouds/3d_gt.txt) 32 | ... 33 | 34 | Expected 3D directory structure: 35 | /.txt (e.g. cubberly-auditorium-2019-04-22_1.txt) 36 | /.txt (e.g. discovery-walk-2019-02-28_0.txt) 37 | ... 38 | 39 | Sequences of ground truth and test will be matched according to the `` 40 | string. 41 | """ 42 | 43 | import argparse 44 | import glob 45 | import os, pdb 46 | import logging 47 | import pandas as pd 48 | from collections import OrderedDict 49 | from pathlib import Path 50 | 51 | # Handles loading the tools library from the server and as a standalone script. 52 | try: 53 | from . import tools as mm 54 | except: 55 | import tools as mm 56 | 57 | def compare_dataframes(gts, ts, _3d = False): 58 | accs = [] 59 | names = [] 60 | for k, tsacc in ts.items(): 61 | if k in gts: 62 | logging.info('Comparing {}...'.format(k)) 63 | if _3d: 64 | accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distfields=['X', 'Y', 'Z', 65 | 'Length', 'Height', 'Width', 'Theta'], distth=0.7, _3d = _3d)) 66 | else: 67 | accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5, _3d = _3d)) 68 | names.append(k) 69 | else: 70 | logging.warning('No ground truth for {}, skipping.'.format(k)) 71 | 72 | return accs, names 73 | 74 | def get_cameras(tsfiles): 75 | images = [] 76 | for f in tsfiles: 77 | image = ('_').join(f.split('/')[-1].split('.')[0].split('_')[-2:]) 78 | if image not in images: 79 | images.append(image) 80 | 81 | return images 82 | 83 | def downselect_gt(cameras, gtfiles): 84 | new_gts = [] 85 | for g in gtfiles: 86 | gt_cam = Path(g).parts[-2] 87 | if gt_cam == 'gt': 88 | gt_cam = 'image_stitched' 89 | else: 90 | gt_cam = gt_cam[3:] 91 | if gt_cam in cameras: 92 | new_gts.append(g) 93 | 94 | return new_gts 95 | 96 | def evaluate(groundtruths, tests, loglevel='info', fmt='mot15-2D', depth=False, 97 | solver=None): 98 | loglevel = getattr(logging, loglevel.upper(), None) 99 | if not isinstance(loglevel, int): 100 | raise ValueError('Invalid log level: {} '.format(loglevel)) 101 | logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s - %(message)s', datefmt='%I:%M:%S') 102 | 103 | if solver: 104 | mm.lap_util.default_solver = solver 105 | 106 | if not depth: 107 | gtfiles = glob.glob(os.path.join(groundtruths,'*/gt/gt.txt')) 108 | tsfiles = [f for f in glob.glob(os.path.join(tests, '*.txt')) if("3d.txt" not in f and not os.path.basename(f).startswith('eval') and not 'result' in os.path.basename(f))] 109 | cameras = get_cameras(tsfiles) 110 | gtfiles = downselect_gt(cameras, gtfiles) 111 | else: 112 | gtfiles = glob.glob(os.path.join(groundtruths, '*/gt/3d_gt.txt')) 113 | tsfiles = [f.replace('_3d','') for f in glob.glob(os.path.join(tests, '*.txt')) if(not os.path.basename(f).startswith('eval') and not 'result' in os.path.basename(f))] 114 | 115 | logging.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) 116 | logging.info('Available LAP solvers {}'.format(mm.lap_util.available_solvers)) 117 | logging.info('Default LAP solver \'{}\''.format(mm.lap_util.default_solver)) 118 | logging.info('Loading files.') 119 | 120 | if not depth: 121 | gt_str = ', '.join(sorted(['_'.join(Path(f).parts[-3:-1]).replace('gt_','').replace('gt','image_stitched')+'.txt' for f in gtfiles])) 122 | else: 123 | gt_str = ', '.join(sorted([Path(f).parts[-3]+'.txt' for f in gtfiles])) 124 | ts_str = ', '.join(sorted([Path(f).parts[-1] for f in tsfiles])) 125 | 126 | if len(gtfiles) != len(tsfiles): 127 | return "Error: There are " + str(len(gtfiles)) + " sequences but only " + str(len(tsfiles)) + " were provided. Please ensure that all sequences are provided. If there are 0 provided, a common error is not ensuring the sequences are directly under the root in the zipped file. If there are 0 groundtruth files, that means the submission files were not named properly to indicate the camera (and did not, for example, end with _image_stitched.txt).

The list of gt sequences is: " + gt_str + "

The list of provided sequences is: " + ts_str + ".



Feel free to reach out to jrdb@cs.stanford.edu for assistance." 128 | elif gt_str != ts_str: 129 | return "Error: The sequences are not correctly named.

The list of gt sequences is: " + gt_str + ".

The list of provided sequences is: " + ts_str + ".



Feel free to reach out to jrdb@cs.stanford.edu for assistance." 130 | 131 | if not depth: 132 | gt = OrderedDict([(('_').join(Path(f).parts[-3:-1]).replace('gt_','').replace('gt','image_stitched'), mm.io.loadtxt(f, fmt=fmt, min_confidence=-1, _3d=depth)) for f in gtfiles]) 133 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=fmt, _3d=depth)) for f in tsfiles ]) 134 | else: 135 | gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt=fmt, min_confidence=-1, _3d=depth)) for f in gtfiles]) 136 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=fmt, _3d=depth)) for f in tsfiles]) 137 | 138 | mh = mm.metrics.create() 139 | accs, names = compare_dataframes(gt, ts, _3d=depth) 140 | 141 | indoor_outdoor_split = \ 142 | {'INDOOR': ['cubberly-auditorium-2019-04-22_1', 'gates-ai-lab-2019-04-17_0', 143 | 'gates-basement-elevators-2019-01-17_0', 144 | 'nvidia-aud-2019-01-25_0', 'nvidia-aud-2019-04-18_1', 145 | 'nvidia-aud-2019-04-18_2', 'gates-foyer-2019-01-17_0', 146 | 'stlc-111-2019-04-19_1', 'stlc-111-2019-04-19_2', 147 | 'hewlett-class-2019-01-23_0', 'hewlett-class-2019-01-23_1', 148 | 'huang-2-2019-01-25_1', 'indoor-coupa-cafe-2019-02-06_0', 149 | 'tressider-2019-04-26_0', 'tressider-2019-04-26_1', 150 | 'tressider-2019-04-26_3'], 151 | 'OUTDOOR': ['discovery-walk-2019-02-28_0', 'meyer-green-2019-03-16_1', 152 | 'discovery-walk-2019-02-28_1', 'food-trucks-2019-02-12_0', 153 | 'quarry-road-2019-02-28_0', 'outdoor-coupa-cafe-2019-02-06_0', 154 | 'serra-street-2019-01-30_0', 'gates-to-clark-2019-02-28_0', 155 | 'tressider-2019-03-16_2', 'lomita-serra-intersection-2019-01-30_0', 156 | 'huang-intersection-2019-01-22_0']} 157 | logging.info('Running metrics') 158 | summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, 159 | generate_overall=True, _3d=depth, sequence_splits=indoor_outdoor_split, 160 | dist_cutoffs=[5, 10, 15, 20, 25]) 161 | logging.info('Completed') 162 | return summary 163 | 164 | if __name__ == '__main__': 165 | parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) 166 | parser.add_argument('groundtruths', type=str, help='Directory containing ground truth files.') 167 | parser.add_argument('tests', type=str, help='Directory containing result files.') 168 | parser.add_argument('--loglevel', type=str, help='Log level', default='info') 169 | parser.add_argument('--fmt', type=str, help='Data format', default='mot15-2D') 170 | parser.add_argument('-d', '--depth', action='store_true', help='Whether evaluating in 3D') 171 | parser.add_argument('--solver', type=str, help='LAP solver to use') 172 | args = parser.parse_args() 173 | 174 | print(evaluate(args.groundtruths, args.tests, args.loglevel, args.fmt, args.depth, args.solver)) 175 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .mot import MOTAccumulator 2 | 3 | # Handles loading the tools library from the server and as a standalone script. 4 | try: 5 | from . import lap_util 6 | from . import metrics 7 | from . import distances 8 | from . import io 9 | from . import utils 10 | except: 11 | import evaluation.lap_util 12 | import evaluation.metrics 13 | import evaluation.distances 14 | import evaluation.io 15 | import evaluation.utils 16 | 17 | 18 | # Needs to be last line 19 | __version__ = '1.1.3' -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/tools/distances.py: -------------------------------------------------------------------------------- 1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking. 2 | 3 | Christoph Heindl, 2017 4 | https://github.com/cheind/py-motmetrics 5 | """ 6 | 7 | import numpy as np 8 | import pdb 9 | 10 | def norm2squared_matrix(objs, hyps, max_d2=float('inf')): 11 | """Computes the squared Euclidean distance matrix between object and hypothesis points. 12 | 13 | Params 14 | ------ 15 | objs : NxM array 16 | Object points of dim M in rows 17 | hyps : KxM array 18 | Hypothesis points of dim M in rows 19 | 20 | Kwargs 21 | ------ 22 | max_d2 : float 23 | Maximum tolerable squared Euclidean distance. Object / hypothesis points 24 | with larger distance are set to np.nan signalling do-not-pair. Defaults 25 | to +inf 26 | 27 | Returns 28 | ------- 29 | C : NxK array 30 | Distance matrix containing pairwise distances or np.nan. 31 | """ 32 | 33 | objs = np.atleast_2d(objs).astype(float) 34 | hyps = np.atleast_2d(hyps).astype(float) 35 | 36 | if objs.size == 0 or hyps.size == 0: 37 | return np.empty((0,0)) 38 | 39 | assert hyps.shape[1] == objs.shape[1], "Dimension mismatch" 40 | 41 | C = np.empty((objs.shape[0], hyps.shape[0])) 42 | 43 | for o in range(objs.shape[0]): 44 | for h in range(hyps.shape[0]): 45 | e = objs[o] - hyps[h] 46 | C[o, h] = e.dot(e) 47 | 48 | C[C > max_d2] = np.nan 49 | return C 50 | 51 | 52 | def iou_matrix(objs, hyps, max_iou=1.): 53 | """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles. 54 | 55 | The IoU is computed as 56 | 57 | IoU(a,b) = 1. - isect(a, b) / union(a, b) 58 | 59 | where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The 60 | IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is 61 | zero. 62 | 63 | Params 64 | ------ 65 | objs : Nx4 array 66 | Object rectangles (x,y,w,h) in rows 67 | hyps : Kx4 array 68 | Hypothesis rectangles (x,y,w,h) in rows 69 | 70 | Kwargs 71 | ------ 72 | max_iou : float 73 | Maximum tolerable overlap distance. Object / hypothesis points 74 | with larger distance are set to np.nan signalling do-not-pair. Defaults 75 | to 0.5 76 | 77 | Returns 78 | ------- 79 | C : NxK array 80 | Distance matrix containing pairwise distances or np.nan. 81 | """ 82 | 83 | objs = np.atleast_2d(objs).astype(float) 84 | hyps = np.atleast_2d(hyps).astype(float) 85 | 86 | if objs.size == 0 or hyps.size == 0: 87 | return np.empty((0,0)) 88 | 89 | assert objs.shape[1] == 4 90 | assert hyps.shape[1] == 4 91 | 92 | br_objs = objs[:, :2] + objs[:, 2:] 93 | br_hyps = hyps[:, :2] + hyps[:, 2:] 94 | 95 | C = np.empty((objs.shape[0], hyps.shape[0])) 96 | 97 | for o in range(objs.shape[0]): 98 | for h in range(hyps.shape[0]): 99 | isect_xy = np.maximum(objs[o, :2], hyps[h, :2]) 100 | isect_wh = np.maximum(np.minimum(br_objs[o], br_hyps[h]) - isect_xy, 0) 101 | isect_a = isect_wh[0]*isect_wh[1] 102 | union_a = objs[o, 2]*objs[o, 3] + hyps[h, 2]*hyps[h, 3] - isect_a 103 | if union_a != 0: 104 | C[o, h] = 1. - isect_a / union_a 105 | else: 106 | C[o, h] = np.nan 107 | 108 | C[C > max_iou] = np.nan 109 | return C 110 | 111 | 112 | def find_area(vertices): 113 | area = 0 114 | for i in range(len(vertices)): 115 | area += vertices[i][0]*(vertices[(i+1)%len(vertices)][1] - vertices[i-1][1]) 116 | return 0.5*abs(area) 117 | 118 | def get_angle(p): 119 | x, y = p 120 | angle = np.arctan2(y,x) 121 | if angle < 0: 122 | angle += np.pi*2 123 | return angle 124 | 125 | def clip_polygon(box1, box2): 126 | #clips box 1 by the edges in box2 127 | x,y,z,l,h,w,theta = box2 128 | theta = -theta 129 | 130 | box2_edges = np.asarray([(-np.cos(theta), -np.sin(theta), l/2-x*np.cos(theta)-z*np.sin(theta)), 131 | (-np.sin(theta), np.cos(theta), w/2-x*np.sin(theta)+z*np.cos(theta)), 132 | (np.cos(theta), np.sin(theta), l/2+x*np.cos(theta)+z*np.sin(theta)), 133 | (np.sin(theta), -np.cos(theta), w/2+x*np.sin(theta)-z*np.cos(theta))]) 134 | x,y,z,l,h,w,theta = box1 135 | theta = -theta 136 | 137 | box1_vertices = [(x+l/2*np.cos(theta)-w/2*np.sin(theta), z+l/2*np.sin(theta)+w/2*np.cos(theta)), 138 | (x+l/2*np.cos(theta)+w/2*np.sin(theta), z+l/2*np.sin(theta)-w/2*np.cos(theta)), 139 | (x-l/2*np.cos(theta)-w/2*np.sin(theta), z-l/2*np.sin(theta)+w/2*np.cos(theta)), 140 | (x-l/2*np.cos(theta)+w/2*np.sin(theta), z-l/2*np.sin(theta)-w/2*np.cos(theta))] 141 | out_vertices = sort_points(box1_vertices, (x, z)) 142 | for edge in box2_edges: 143 | vertex_list = out_vertices.copy() 144 | out_vertices = [] 145 | for idx, current_vertex in enumerate(vertex_list): 146 | previous_vertex = vertex_list[idx-1] 147 | if point_inside_edge(current_vertex, edge): 148 | if not point_inside_edge(previous_vertex, edge): 149 | out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge)) 150 | out_vertices.append(current_vertex) 151 | elif point_inside_edge(previous_vertex, edge): 152 | out_vertices.append(compute_intersection_point(previous_vertex, current_vertex, edge)) 153 | to_remove = [] 154 | for i in range(len(out_vertices)): 155 | if i in to_remove: 156 | continue 157 | for j in range(i+1, len(out_vertices)): 158 | if abs(out_vertices[i][0] - out_vertices[j][0]) < 1e-6 and abs(out_vertices[i][1] - out_vertices[j][1]) < 1e-6: 159 | to_remove.append(j) 160 | out_vertices = sorted([(v[0]-x, v[1]-z) for i,v in enumerate(out_vertices) if i not in to_remove], key = lambda p: get_angle((p[0],p[1]))) 161 | return out_vertices 162 | 163 | def sort_points(pts, center): 164 | x, z = center 165 | sorted_pts = sorted([(i, (v[0]-x, v[1]-z)) for i,v in enumerate(pts)], key = lambda p: get_angle((p[1][0],p[1][1]))) 166 | idx, _ = zip(*sorted_pts) 167 | return [pts[i] for i in idx] 168 | 169 | def compute_intersection_point(pt1, pt2, line1): 170 | if pt1[0] == pt2[0]: 171 | slope = np.inf 172 | else: 173 | slope = (pt1[1]-pt2[1])/(pt1[0] - pt2[0]) 174 | if np.isinf(slope): 175 | line2 = (1, 0, pt1[0]) 176 | else: 177 | line2 = (slope, -1, pt1[0]*slope-pt1[1]) 178 | # print("Line1:", line1) 179 | # print("Line2:", line2) 180 | if line1[1] == 0: 181 | x = line1[2]/line1[0] 182 | y = (line2[2] - line2[0]*x)/line2[1] 183 | elif line1[0] == 0: 184 | y = line1[2]/line1[1] 185 | x = (line2[2] - line2[1]*y)/line2[0] 186 | elif line2[1] == 0: 187 | x = pt1[0] 188 | y = (line1[2]-x*line1[0])/line1[1] 189 | else: 190 | tmp_line = (line2 - line1*(line2[1]/line1[1])) 191 | x = tmp_line[2]/tmp_line[0] 192 | y = (line2[2] - line2[0]*x)/line2[1] 193 | return (x,y) 194 | 195 | def point_inside_edge(pt, edge): 196 | lhs = pt[0]*edge[0] + pt[1]*edge[1] 197 | if lhs < edge[2] - 1e-6: 198 | return True 199 | else: 200 | return False 201 | 202 | 203 | def iou_matrix_3d(objs, hyps, max_iou=1.): 204 | """Computes 'intersection over union (IoU)' distance matrix between object and hypothesis rectangles. 205 | 206 | The IoU is computed as 207 | 208 | IoU(a,b) = 1. - isect(a, b) / union(a, b) 209 | 210 | where isect(a,b) is the area of intersection of two rectangles and union(a, b) the area of union. The 211 | IoU is bounded between zero and one. 0 when the rectangles overlap perfectly and 1 when the overlap is 212 | zero. 213 | 214 | Params 215 | ------ 216 | objs : Nx4 array 217 | Object rectangles (x,y,w,h) in rows 218 | hyps : Kx4 array 219 | Hypothesis rectangles (x,y,w,h) in rows 220 | 221 | Kwargs 222 | ------ 223 | max_iou : float 224 | Maximum tolerable overlap distance. Object / hypothesis points 225 | with larger distance are set to np.nan signalling do-not-pair. Defaults 226 | to 0.5 227 | 228 | Returns 229 | ------- 230 | C : NxK array 231 | Distance matrix containing pairwise distances or np.nan. 232 | """ 233 | 234 | objs = np.atleast_2d(objs).astype(float) 235 | hyps = np.atleast_2d(hyps).astype(float) 236 | 237 | if objs.size == 0 or hyps.size == 0: 238 | return np.empty((0,0)) 239 | assert objs.shape[1] == 7 240 | assert hyps.shape[1] == 7 241 | 242 | C = np.empty((objs.shape[0], hyps.shape[0])) 243 | for o in range(objs.shape[0]): 244 | for h in range(hyps.shape[0]): 245 | base_area = find_area(clip_polygon(objs[o], hyps[h])) 246 | height = max(objs[o][1], hyps[h][1]) - min(objs[o][1] - objs[o][4], hyps[h][1]-hyps[h][4]) 247 | intersect = base_area*height 248 | union = objs[o][3]*objs[o][4]*objs[o][5] + hyps[h][3]*hyps[h][4]*hyps[h][5] - intersect 249 | if union != 0: 250 | C[o, h] = 1. - intersect / union 251 | else: 252 | C[o, h] = np.nan 253 | C[C > max_iou] = np.nan 254 | return C 255 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/tools/io.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | 3 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking. 4 | 5 | Christoph Heindl, 2017 6 | https://github.com/cheind/py-motmetrics 7 | """ 8 | 9 | from enum import Enum 10 | import pandas as pd 11 | import numpy as np 12 | import io 13 | 14 | class Format(Enum): 15 | """Enumerates supported file formats.""" 16 | 17 | MOT16 = 'mot16' 18 | """Milan, Anton, et al. "Mot16: A benchmark for multi-object tracking." arXiv preprint arXiv:1603.00831 (2016).""" 19 | 20 | MOT15_2D = 'mot15-2D' 21 | """Leal-Taixe, Laura, et al. "MOTChallenge 2015: Towards a benchmark for multi-target tracking." arXiv preprint arXiv:1504.01942 (2015).""" 22 | 23 | VATIC_TXT = 'vatic-txt' 24 | """Vondrick, Carl, Donald Patterson, and Deva Ramanan. "Efficiently scaling up crowdsourced video annotation." International Journal of Computer Vision 101.1 (2013): 184-204. 25 | https://github.com/cvondrick/vatic 26 | """ 27 | 28 | 29 | def load_motchallenge(fname, **kwargs): 30 | """Load MOT challenge data. 31 | 32 | Params 33 | ------ 34 | fname : str 35 | Filename to load data from 36 | 37 | Kwargs 38 | ------ 39 | sep : str 40 | Allowed field separators, defaults to '\s+|\t+|,' 41 | min_confidence : float 42 | Rows with confidence less than this threshold are removed. 43 | Defaults to -1. You should set this to 1 when loading 44 | ground truth MOTChallenge data, so that invalid rectangles in 45 | the ground truth are not considered during matching. 46 | 47 | Returns 48 | ------ 49 | df : pandas.DataFrame 50 | The returned dataframe has the following columns 51 | 'X', 'Y', 'Width', 'Height', 'Confidence', 'ClassId', 'Visibility' 52 | The dataframe is indexed by ('FrameId', 'Id') 53 | """ 54 | 55 | sep = kwargs.pop('sep', '\s+|\t+|,') 56 | _3d = kwargs.pop('_3d', False) 57 | min_confidence = kwargs.pop('min_confidence', -1) 58 | if not _3d: 59 | df = pd.read_csv( 60 | fname, 61 | sep=sep, 62 | index_col=[0,1], 63 | skipinitialspace=True, 64 | header=None, 65 | names=['FrameId', 'Id', 'X', 'Y', 'Width', 'Height', 'X0', 'Y0', 'Z0', 'Length0', 'Height0', 'Width0', 'Theta0', 'Confidence'], 66 | engine='python' 67 | ) # , 'ClassId', 'Visibility'], 68 | 69 | # Account for matlab convention. 70 | # Removed from original script. We use python. 71 | # df[['X', 'Y']] -= (1, 1) 72 | else: 73 | df = pd.read_csv( 74 | fname, 75 | sep=sep, 76 | index_col=[0,1], 77 | skipinitialspace=True, 78 | header=None, 79 | names=['FrameId', 'Id', 'X0', 'Y0', 'Width0', 'Height0', 'X', 'Y', 'Z', 'Length', 'Height', 'Width', 'Theta', 'Confidence'], 80 | engine='python' 81 | ) # , 'ClassId', 'Visibility'], 82 | # Account for matlab convention. 83 | # Removed from original script. We use python. 84 | # df[['X', 'Y', 'Z']] -= (1, 1, 1) 85 | 86 | # Remove all rows without sufficient confidence 87 | return df[df['Confidence'] >= min_confidence] 88 | 89 | def load_vatictxt(fname, **kwargs): 90 | """Load Vatic text format. 91 | 92 | Loads the vatic CSV text having the following columns per row 93 | 94 | 0 Track ID. All rows with the same ID belong to the same path. 95 | 1 xmin. The top left x-coordinate of the bounding box. 96 | 2 ymin. The top left y-coordinate of the bounding box. 97 | 3 xmax. The bottom right x-coordinate of the bounding box. 98 | 4 ymax. The bottom right y-coordinate of the bounding box. 99 | 5 frame. The frame that this annotation represents. 100 | 6 lost. If 1, the annotation is outside of the view screen. 101 | 7 occluded. If 1, the annotation is occluded. 102 | 8 generated. If 1, the annotation was automatically interpolated. 103 | 9 label. The label for this annotation, enclosed in quotation marks. 104 | 10+ attributes. Each column after this is an attribute set in the current frame 105 | 106 | Params 107 | ------ 108 | fname : str 109 | Filename to load data from 110 | 111 | Returns 112 | ------ 113 | df : pandas.DataFrame 114 | The returned dataframe has the following columns 115 | 'X', 'Y', 'Width', 'Height', 'Lost', 'Occluded', 'Generated', 'ClassId', '', '', ... 116 | where is placeholder for the actual attribute name capitalized (first letter). The order of attribute 117 | columns is sorted in attribute name. The dataframe is indexed by ('FrameId', 'Id') 118 | """ 119 | 120 | sep = kwargs.pop('sep', ' ') 121 | 122 | with open(fname) as f: 123 | # First time going over file, we collect the set of all variable activities 124 | activities = set() 125 | for line in f: 126 | [activities.add(c) for c in line.rstrip().split(sep)[10:]] 127 | activitylist = sorted(list(activities)) 128 | 129 | # Second time we construct artificial binary columns for each activity 130 | data = [] 131 | f.seek(0) 132 | for line in f: 133 | fields = line.rstrip().split() 134 | attrs = ['0'] * len(activitylist) 135 | for a in fields[10:]: 136 | attrs[activitylist.index(a)] = '1' 137 | fields = fields[:10] 138 | fields.extend(attrs) 139 | data.append(' '.join(fields)) 140 | 141 | strdata = '\n'.join(data) 142 | 143 | dtype = { 144 | 'Id': np.int64, 145 | 'X': np.float32, 146 | 'Y': np.float32, 147 | 'Width': np.float32, 148 | 'Height': np.float32, 149 | 'FrameId': np.int64, 150 | 'Lost': bool, 151 | 'Occluded': bool, 152 | 'Generated': bool, 153 | 'ClassId': str, 154 | } 155 | 156 | # Remove quotes from activities 157 | activitylist = [a.replace('\"', '').capitalize() for a in activitylist] 158 | 159 | # Add dtypes for activities 160 | for a in activitylist: 161 | dtype[a] = bool 162 | 163 | # Read from CSV 164 | names = ['Id', 'X', 'Y', 'Width', 'Height', 'FrameId', 'Lost', 'Occluded', 'Generated', 'ClassId'] 165 | names.extend(activitylist) 166 | df = pd.read_csv(io.StringIO(strdata), names=names, index_col=['FrameId','Id'], header=None, sep=' ') 167 | 168 | # Correct Width and Height which are actually XMax, Ymax in files. 169 | w = df['Width'] - df['X'] 170 | h = df['Height'] - df['Y'] 171 | df['Width'] = w 172 | df['Height'] = h 173 | 174 | return df 175 | 176 | def loadtxt(fname, fmt=Format.MOT15_2D, **kwargs): 177 | """Load data from any known format.""" 178 | fmt = Format(fmt) 179 | 180 | switcher = { 181 | Format.MOT16: load_motchallenge, 182 | Format.MOT15_2D: load_motchallenge, 183 | Format.VATIC_TXT: load_vatictxt 184 | } 185 | func = switcher.get(fmt) 186 | return func(fname, **kwargs) 187 | 188 | def render_summary(summary, formatters=None, namemap=None, buf=None): 189 | """Render metrics summary to console friendly tabular output. 190 | 191 | Params 192 | ------ 193 | summary : pd.DataFrame 194 | Dataframe containing summaries in rows. 195 | 196 | Kwargs 197 | ------ 198 | buf : StringIO-like, optional 199 | Buffer to write to 200 | formatters : dict, optional 201 | Dicionary defining custom formatters for individual metrics. 202 | I.e `{'mota': '{:.2%}'.format}`. You can get preset formatters 203 | from MetricsHost.formatters 204 | namemap : dict, optional 205 | Dictionary defining new metric names for display. I.e 206 | `{'num_false_positives': 'FP'}`. 207 | 208 | Returns 209 | ------- 210 | string 211 | Formatted string 212 | """ 213 | 214 | if not namemap is None: 215 | summary = summary.rename(columns=namemap) 216 | if not formatters is None: 217 | formatters = dict([(namemap[c], f) if c in namemap else (c, f) for c, f in formatters.items()]) 218 | 219 | output = summary.to_string( 220 | buf=buf, 221 | formatters=formatters, 222 | ) 223 | 224 | return output 225 | 226 | motchallenge_metric_names = { 227 | 'idf1' : 'IDF1', 228 | 'idp' : 'IDP', 229 | 'idr' : 'IDR', 230 | 'recall' : 'Rcll', 231 | 'precision' : 'Prcn', 232 | 'num_unique_objects' : 'GT', 233 | 'mostly_tracked' : 'MT', 234 | 'partially_tracked' : 'PT', 235 | 'mostly_lost': 'ML', 236 | 'num_false_positives' : 'FP', 237 | 'num_misses' : 'FN', 238 | 'num_switches' : 'IDs', 239 | 'num_fragmentations' : 'FM', 240 | 'mota' : 'MOTA', 241 | 'motp' : 'MOTP' 242 | } 243 | """A list mappings for metric names to comply with MOTChallenge.""" 244 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/tools/lap_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | def linear_sum_assignment(costs, solver=None): 5 | """Solve a linear sum assignment problem (LSA). 6 | 7 | For large datasets solving the minimum cost assignment becomes the dominant runtime part. 8 | We therefore support various solvers out of the box (currently lapsolver, scipy, ortools, munkres) 9 | 10 | Params 11 | ------ 12 | costs : np.array 13 | numpy matrix containing costs. Use NaN/Inf values for unassignable 14 | row/column pairs. 15 | 16 | Kwargs 17 | ------ 18 | solver : callable or str, optional 19 | When str: name of solver to use. 20 | When callable: function to invoke 21 | When None: uses first available solver 22 | """ 23 | 24 | solver = solver or default_solver 25 | 26 | if isinstance(solver, str): 27 | # Try resolve from string 28 | solver = solver_map.get(solver, None) 29 | 30 | assert callable(solver), 'Invalid LAP solver.' 31 | return solver(costs) 32 | 33 | def lsa_solve_scipy(costs): 34 | """Solves the LSA problem using the scipy library.""" 35 | 36 | from scipy.optimize import linear_sum_assignment as scipy_solve 37 | 38 | # Note there is an issue in scipy.optimize.linear_sum_assignment where 39 | # it runs forever if an entire row/column is infinite or nan. We therefore 40 | # make a copy of the distance matrix and compute a safe value that indicates 41 | # 'cannot assign'. Also note + 1 is necessary in below inv-dist computation 42 | # to make invdist bigger than max dist in case max dist is zero. 43 | 44 | inv = ~np.isfinite(costs) 45 | if inv.any(): 46 | costs = costs.copy() 47 | valid = costs[~inv] 48 | INVDIST = 2 * valid.max() + 1 if valid.shape[0] > 0 else 1. 49 | costs[inv] = INVDIST 50 | 51 | return scipy_solve(costs) 52 | 53 | def lsa_solve_lapsolver(costs): 54 | """Solves the LSA problem using the lapsolver library.""" 55 | from lapsolver import solve_dense 56 | return solve_dense(costs) 57 | 58 | def lsa_solve_munkres(costs): 59 | """Solves the LSA problem using the Munkres library.""" 60 | from munkres import Munkres, DISALLOWED 61 | m = Munkres() 62 | 63 | costs = costs.copy() 64 | inv = ~np.isfinite(costs) 65 | if inv.any(): 66 | costs = costs.astype(object) 67 | costs[inv] = DISALLOWED 68 | 69 | indices = np.array(m.compute(costs), dtype=np.int64) 70 | return indices[:,0], indices[:,1] 71 | 72 | 73 | def lsa_solve_ortools(costs): 74 | """Solves the LSA problem using Google's optimization tools.""" 75 | from ortools.graph import pywrapgraph 76 | 77 | 78 | # Google OR tools only support integer costs. Here's our attempt 79 | # to convert from floating point to integer: 80 | # 81 | # We search for the minimum difference between any two costs and 82 | # compute the first non-zero digit after the decimal place. Then 83 | # we compute a factor,f, that scales all costs so that the difference 84 | # is integer representable in the first digit. 85 | # 86 | # Example: min-diff is 0.001, then first non-zero digit place -3, so 87 | # we scale by 1e3. 88 | # 89 | # For small min-diffs and large costs in general there is a change of 90 | # overflowing. 91 | 92 | valid = np.isfinite(costs) 93 | 94 | min_e = -8 95 | unique = np.unique(costs[valid]) 96 | 97 | if unique.shape[0] == 1: 98 | min_diff = unique[0] 99 | elif unique.shape[0] > 1: 100 | min_diff = np.diff(unique).min() 101 | else: 102 | min_diff = 1 103 | 104 | min_diff_e = 0 105 | if min_diff != 0.0: 106 | min_diff_e = int(np.log10(np.abs(min_diff))) 107 | if min_diff_e < 0: 108 | min_diff_e -= 1 109 | 110 | e = min(max(min_e, min_diff_e), 0) 111 | f = 10**abs(e) 112 | 113 | assignment = pywrapgraph.LinearSumAssignment() 114 | for r in range(costs.shape[0]): 115 | for c in range(costs.shape[1]): 116 | if valid[r,c]: 117 | assignment.AddArcWithCost(r, c, int(costs[r,c]*f)) 118 | 119 | if assignment.Solve() != assignment.OPTIMAL: 120 | return linear_sum_assignment(costs, solver='scipy') 121 | 122 | if assignment.NumNodes() == 0: 123 | return np.array([], dtype=np.int64), np.array([], dtype=np.int64) 124 | 125 | pairings = [] 126 | for i in range(assignment.NumNodes()): 127 | pairings.append([i, assignment.RightMate(i)]) 128 | 129 | indices = np.array(pairings, dtype=np.int64) 130 | return indices[:,0], indices[:,1] 131 | 132 | def lsa_solve_lapjv(costs): 133 | from lap import lapjv 134 | 135 | inv = ~np.isfinite(costs) 136 | if inv.any(): 137 | costs = costs.copy() 138 | valid = costs[~inv] 139 | INVDIST = 2 * valid.max() + 1 if valid.shape[0] > 0 else 1. 140 | costs[inv] = INVDIST 141 | 142 | r = lapjv(costs, return_cost=False, extend_cost=True) 143 | indices = np.array((range(costs.shape[0]), r[0]), dtype=np.int64).T 144 | indices = indices[indices[:, 1] != -1] 145 | return indices[:,0], indices[:,1] 146 | 147 | def init_standard_solvers(): 148 | import importlib 149 | from importlib import util 150 | 151 | global available_solvers, default_solver, solver_map 152 | 153 | solvers = [ 154 | ('lapsolver', lsa_solve_lapsolver), 155 | ('lap', lsa_solve_lapjv), 156 | ('scipy', lsa_solve_scipy), 157 | ('munkres', lsa_solve_munkres), 158 | ('ortools', lsa_solve_ortools), 159 | ] 160 | 161 | solver_map = dict(solvers) 162 | 163 | available_solvers = [s[0] for s in solvers if importlib.util.find_spec(s[0]) is not None] 164 | if len(available_solvers) == 0: 165 | import warnings 166 | default_solver = None 167 | warnings.warn('No standard LAP solvers found. Consider `pip install lapsolver` or `pip install scipy`', category=RuntimeWarning) 168 | else: 169 | default_solver = available_solvers[0] 170 | 171 | init_standard_solvers() 172 | 173 | from contextlib import contextmanager 174 | 175 | @contextmanager 176 | def set_default_solver(newsolver): 177 | '''Change the default solver within context. 178 | 179 | Intended usage 180 | 181 | costs = ... 182 | mysolver = lambda x: ... # solver code that returns pairings 183 | 184 | with lap.set_default_solver(mysolver): 185 | rids, cids = lap.linear_sum_assignment(costs) 186 | 187 | Params 188 | ------ 189 | newsolver : callable or str 190 | new solver function 191 | ''' 192 | 193 | global default_solver 194 | 195 | oldsolver = default_solver 196 | try: 197 | default_solver = newsolver 198 | yield 199 | finally: 200 | default_solver = oldsolver 201 | 202 | 203 | -------------------------------------------------------------------------------- /lib/jrdb_devkit/tracking_eval/tools/utils.py: -------------------------------------------------------------------------------- 1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking. 2 | 3 | Christoph Heindl, 2017 4 | https://github.com/cheind/py-motmetrics 5 | """ 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from .mot import MOTAccumulator 11 | from .distances import iou_matrix, norm2squared_matrix, iou_matrix_3d 12 | 13 | 14 | def compare_to_groundtruth(gt, dt, dist='iou', distfields=['X', 'Y', 'Width', 'Height'], distth=0.5, _3d = False): 15 | """Compare groundtruth and detector results. 16 | 17 | This method assumes both results are given in terms of DataFrames with at least the following fields 18 | - `FrameId` First level index used for matching ground-truth and test frames. 19 | - `Id` Secondary level index marking available object / hypothesis ids 20 | 21 | Depending on the distance to be used relevant distfields need to be specified. 22 | 23 | Params 24 | ------ 25 | gt : pd.DataFrame 26 | Dataframe for ground-truth 27 | test : pd.DataFrame 28 | Dataframe for detector results 29 | 30 | Kwargs 31 | ------ 32 | dist : str, optional 33 | String identifying distance to be used. Defaults to intersection over union. 34 | distfields: array, optional 35 | Fields relevant for extracting distance information. Defaults to ['X', 'Y', 'Width', 'Height'] 36 | distth: float, optional 37 | Maximum tolerable distance. Pairs exceeding this threshold are marked 'do-not-pair'. 38 | """ 39 | 40 | 41 | def compute_iou(a, b): 42 | return iou_matrix(a, b, max_iou=distth) 43 | 44 | def compute_euc(a, b): 45 | return norm2squared_matrix(a, b, max_d2=distth) 46 | 47 | def compute_3d_iou(a,b): 48 | return iou_matrix_3d(a, b, max_iou = distth) 49 | 50 | if _3d: 51 | compute_dist = compute_3d_iou if dist.upper() == 'IOU' else compute_euc 52 | else: 53 | compute_dist = compute_iou if dist.upper() == 'IOU' else compute_euc 54 | 55 | acc = MOTAccumulator() 56 | 57 | # We need to account for all frames reported either by ground truth or 58 | # detector. In case a frame is missing in GT this will lead to FPs, in 59 | # case a frame is missing in detector results this will lead to FNs. 60 | allframeids = gt.index.union(dt.index).levels[0] 61 | 62 | for fid in allframeids: 63 | oids = np.empty(0) 64 | hids = np.empty(0) 65 | dists = np.empty((0,0)) 66 | 67 | if fid in gt.index: 68 | fgt = gt.loc[fid] 69 | oids = fgt.index.values 70 | 71 | if fid in dt.index: 72 | fdt = dt.loc[fid] 73 | hids = fdt.index.values 74 | 75 | if oids.shape[0] > 0 and hids.shape[0] > 0: 76 | dists = compute_dist(fgt[distfields].values, fdt[distfields].values) 77 | if _3d: 78 | if oids.shape[0] > 0: 79 | gt_dists = np.sqrt(norm2squared_matrix(fgt[distfields], np.zeros((1, fgt[distfields].values.shape[1])))) 80 | else: 81 | gt_dists = None 82 | if hids.shape[0] > 0: 83 | det_dists = np.sqrt(norm2squared_matrix(fdt[distfields], np.zeros((1, fdt[distfields].values.shape[1])))) 84 | else: 85 | det_dists = None 86 | else: 87 | gt_dists = None 88 | det_dists = None 89 | acc.update(oids, hids, dists, frameid=fid, gt_dists=gt_dists, det_dists=det_dists) 90 | 91 | return acc 92 | -------------------------------------------------------------------------------- /lidar_det/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VisualComputingInstitute/Person_MinkUNet/e582bd69ca8fb14268a1414e2534d6c509ac24d2/lidar_det/__init__.py -------------------------------------------------------------------------------- /lidar_det/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import * 2 | from .utils import * 3 | 4 | import numpy as np 5 | from .handles._pypcd import point_cloud_from_fileobj 6 | 7 | 8 | def load_pcb(file): 9 | """Load a pcb file. 10 | 11 | Returns: 12 | pc (np.ndarray[3, N]): 13 | """ 14 | # pcd_load = 15 | # o3d.io.read_point_cloud(os.path.join(self.data_dir, url), format='pcd') 16 | # return np.asarray(pcd_load.points, dtype=np.float32) 17 | pc = point_cloud_from_fileobj(file).pc_data 18 | # NOTE: redundent copy, ok for now 19 | pc = np.array([pc["x"], pc["y"], pc["z"]], dtype=np.float32) 20 | return pc 21 | -------------------------------------------------------------------------------- /lidar_det/dataset/builder.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | from .dataset_det3d import JRDBDet3D, NuScenesDet3D 3 | 4 | 5 | def get_dataloader(split, batch_size, num_workers, shuffle, dataset_cfg): 6 | if dataset_cfg["name"] == "JRDB": 7 | # from .jrdb_detection_3d import JRDBDet3D 8 | 9 | ds = JRDBDet3D("./data/JRDB", split, dataset_cfg) 10 | elif dataset_cfg["name"] == "KITTI": 11 | # from .jrdb_detection_3d import JRDBDet3D 12 | 13 | ds = JRDBDet3D("./data/KITTI", split, dataset_cfg) 14 | elif dataset_cfg["name"] == "nuScenes": 15 | # from .jrdb_detection_3d import JRDBDet3D 16 | 17 | ds = NuScenesDet3D("./data/nuScenes", split, dataset_cfg) 18 | else: 19 | raise RuntimeError(f"Unknown dataset '{dataset_cfg['name']}'") 20 | 21 | return DataLoader( 22 | ds, 23 | batch_size=batch_size, 24 | pin_memory=True, 25 | num_workers=num_workers, 26 | shuffle=shuffle, 27 | collate_fn=ds.collate_batch, 28 | ) 29 | -------------------------------------------------------------------------------- /lidar_det/dataset/depracted/jrdb_detection_3d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data import Dataset 3 | 4 | from torchsparse import SparseTensor 5 | from torchsparse.utils import sparse_quantize 6 | 7 | import lidar_det.utils.jrdb_transforms as jt 8 | import lidar_det.utils.utils_box3d as ub3d 9 | 10 | from ._jrdb_handle import JRDBHandleDet3D 11 | from .utils import get_prediction_target, collate_sparse_tensors 12 | 13 | __all__ = [ 14 | "JRDBDet3D", 15 | ] 16 | 17 | _AVERAGE_BOX_LWH = (0.9, 0.5, 1.7) 18 | 19 | 20 | _JRDB_VAL_SEQUENCES = [ 21 | "clark-center-2019-02-28_1", 22 | "gates-ai-lab-2019-02-08_0", 23 | "huang-2-2019-01-25_0", 24 | "meyer-green-2019-03-16_0", 25 | "nvidia-aud-2019-04-18_0", 26 | "tressider-2019-03-16_1", 27 | "tressider-2019-04-26_2", 28 | ] 29 | 30 | 31 | class JRDBDet3D(Dataset): 32 | def __init__(self, data_dir, split, cfg): 33 | if split == "train": 34 | self.__handle = JRDBHandleDet3D( 35 | data_dir, "train", exclude_sequences=_JRDB_VAL_SEQUENCES 36 | ) 37 | elif split == "val": 38 | self.__handle = JRDBHandleDet3D( 39 | data_dir, "train", sequences=_JRDB_VAL_SEQUENCES 40 | ) 41 | elif split == "train_val": 42 | self.__handle = JRDBHandleDet3D(data_dir, "train") 43 | 44 | elif split == "test": 45 | self.__handle = JRDBHandleDet3D(data_dir, "test") 46 | else: 47 | raise RuntimeError(f"Invalid split: {split}") 48 | 49 | self.__split = split 50 | self._augmentation = cfg["augmentation"] and "train" in split 51 | self._voxel_size = cfg["voxel_size"] 52 | self._num_points = cfg["num_points"] 53 | self._na = cfg["num_anchors"] 54 | self._no = cfg["num_ori_bins"] 55 | self._canonical = cfg["canonical"] 56 | self._target_mode = cfg["target_mode"] 57 | self._dist_min = cfg["dist_min"] 58 | self._dist_max = cfg["dist_max"] 59 | 60 | @property 61 | def split(self): 62 | return self.__split # used by trainer.py 63 | 64 | def __len__(self): 65 | return len(self.__handle) 66 | 67 | def __getitem__(self, idx): 68 | if self.__split == "test": 69 | return self.getitem_test_set(idx) 70 | 71 | data_dict = self.__handle[idx] 72 | 73 | # point cloud in base frame 74 | pc_upper = data_dict["pc_upper"] 75 | pc_lower = data_dict["pc_lower"] 76 | pc_upper = jt.transform_pts_upper_velodyne_to_base(pc_upper) 77 | pc_lower = jt.transform_pts_lower_velodyne_to_base(pc_lower) 78 | pc = np.concatenate([pc_upper, pc_lower], axis=1) # (3, N) 79 | 80 | # bounding box in base frame 81 | boxes, _ = ub3d.string_to_boxes(data_dict["label_str"]) 82 | 83 | # filter out corrupted annotations with negative dimension 84 | valid_mask = (boxes[:, 3:6] > 0.0).min(axis=1).astype(np.bool) 85 | boxes = boxes[valid_mask] 86 | 87 | # augmentation 88 | if self._augmentation: 89 | # random scale 90 | scale_factor = np.random.uniform(0.95, 1.05) 91 | pc *= scale_factor 92 | boxes[:, :6] *= scale_factor 93 | 94 | # random rotation 95 | theta = np.random.uniform(0, 2 * np.pi) 96 | rot_mat = np.array( 97 | [ 98 | [np.cos(theta), np.sin(theta), 0], 99 | [-np.sin(theta), np.cos(theta), 0], 100 | [0, 0, 1], 101 | ], 102 | dtype=np.float32, 103 | ) 104 | pc = rot_mat @ pc 105 | boxes[:, :3] = boxes[:, :3] @ rot_mat.T 106 | boxes[:, 6] += theta 107 | 108 | # get regression label by assigning bounding box to point cloud 109 | pred_target, target_box_inds, target_boxes = get_prediction_target( 110 | pc, 111 | boxes, 112 | _AVERAGE_BOX_LWH, 113 | num_anchors=self._na, 114 | num_theta_bins=self._no, 115 | canonical=self._canonical, 116 | target_mode=self._target_mode, 117 | dist_min=self._dist_min, 118 | dist_max=self._dist_max, 119 | ) # (N, NA, C) 120 | 121 | # to voxel 122 | pc_voxel = np.round(pc / self._voxel_size) 123 | pc_voxel -= pc_voxel.min(axis=1, keepdims=True) 124 | pc_voxel = pc_voxel.T 125 | 126 | # NOTE all this does is find indices of non-duplicating elements 127 | inds, inverse_map = sparse_quantize( 128 | pc_voxel, feats=pc.T, labels=None, return_index=True, return_invs=True, 129 | ) 130 | 131 | # # # TODO is this needed? 132 | # # if "train" in self.split: 133 | # # if len(inds) > self.num_points: 134 | # # inds = np.random.choice(inds, self.num_points, replace=False) 135 | 136 | net_input = SparseTensor(pc.T[inds], pc_voxel[inds]) 137 | net_target = SparseTensor( 138 | pred_target.reshape(pc.shape[1], -1)[inds], pc_voxel[inds] 139 | ) 140 | net_target_boxes = SparseTensor(target_boxes[inds], pc_voxel[inds]) 141 | # target_full = SparseTensor(reg_labels.T, pc_voxel) 142 | # inverse_map = SparseTensor(inverse_map, pc_voxel) 143 | 144 | data_dict.update( 145 | { 146 | "net_input": net_input, 147 | "net_target": net_target, 148 | "net_target_boxes": net_target_boxes, 149 | # "target_full": target_full, 150 | "inverse_map": inverse_map, 151 | "points": pc, # (3, N) 152 | "boxes": boxes, # (B, 7) 153 | "pred_target": pred_target, # (N, NA, 8) 154 | "target_box_inds": target_box_inds, # (N,) 155 | "target_boxes": target_boxes, # (N,) 156 | "ave_lwh": _AVERAGE_BOX_LWH, 157 | "canonical": self._canonical, 158 | } 159 | ) 160 | 161 | return data_dict 162 | 163 | def getitem_test_set(self, idx): 164 | data_dict = self.__handle[idx] 165 | 166 | # point cloud in base frame 167 | pc_upper = data_dict["pc_upper"] 168 | pc_lower = data_dict["pc_lower"] 169 | pc_upper = jt.transform_pts_upper_velodyne_to_base(pc_upper) 170 | pc_lower = jt.transform_pts_lower_velodyne_to_base(pc_lower) 171 | pc = np.concatenate([pc_upper, pc_lower], axis=1) # (3, N) 172 | 173 | # to voxel 174 | pc_voxel = np.round(pc / self._voxel_size) 175 | pc_voxel -= pc_voxel.min(axis=1, keepdims=True) 176 | pc_voxel = pc_voxel.T 177 | 178 | # NOTE all this does is find indices of non-duplicating elements 179 | inds, inverse_map = sparse_quantize( 180 | pc_voxel, feats=pc.T, labels=None, return_index=True, return_invs=True, 181 | ) 182 | 183 | # # # TODO is this needed? 184 | # # if "train" in self.split: 185 | # # if len(inds) > self.num_points: 186 | # # inds = np.random.choice(inds, self.num_points, replace=False) 187 | 188 | net_input = SparseTensor(pc.T[inds], pc_voxel[inds]) 189 | # inverse_map = SparseTensor(inverse_map, pc_voxel) 190 | 191 | data_dict.update( 192 | { 193 | "net_input": net_input, 194 | "inverse_map": inverse_map, 195 | "points": pc, # (3, N) 196 | "ave_lwh": _AVERAGE_BOX_LWH, 197 | "canonical": self._canonical, 198 | } 199 | ) 200 | 201 | return data_dict 202 | 203 | def collate_batch(self, batch): 204 | rtn_dict = {} 205 | for k, v in batch[0].items(): 206 | if isinstance(v, SparseTensor): 207 | rtn_dict[k] = collate_sparse_tensors([sample[k] for sample in batch]) 208 | elif k in ("ave_lwh", "canonical"): 209 | rtn_dict[k] = v 210 | else: 211 | rtn_dict[k] = [sample[k] for sample in batch] 212 | 213 | return rtn_dict 214 | -------------------------------------------------------------------------------- /lidar_det/dataset/depracted/kitti_detection_3d.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from torch.utils.data import Dataset 4 | 5 | from torchsparse import SparseTensor 6 | from torchsparse.utils import sparse_quantize 7 | 8 | import lidar_det.utils.jrdb_transforms as jt 9 | import lidar_det.utils.utils_box3d as ub3d 10 | 11 | from ._kitti_handle import KITTIHandleDet3D 12 | from .utils import get_prediction_target, collate_sparse_tensors 13 | 14 | __all__ = [ 15 | "KITTIDet3D", 16 | ] 17 | 18 | _AVERAGE_BOX_LWH = (0.9, 0.5, 1.7) 19 | 20 | 21 | class KITTIDet3D(Dataset): 22 | def __init__(self, data_dir, split, cfg): 23 | if split == "train" or split == "val": 24 | with open(os.path.join(data_dir, f"{split}.txt"), "r") as f: 25 | frames = [x.strip() for x in f.readlines()] 26 | self.__handle = KITTIHandleDet3D(data_dir, "train", frames=frames) 27 | elif split == "train_val" or split == "test": 28 | self.__handle = KITTIHandleDet3D(data_dir, "train") 29 | else: 30 | raise RuntimeError(f"Invalid split: {split}") 31 | 32 | self.__split = split 33 | self._augmentation = cfg["augmentation"] and "train" in split 34 | self._voxel_size = cfg["voxel_size"] 35 | self._num_points = cfg["num_points"] 36 | self._na = cfg["num_anchors"] 37 | self._no = cfg["num_ori_bins"] 38 | self._canonical = cfg["canonical"] 39 | 40 | @property 41 | def split(self): 42 | return self.__split # used by trainer.py 43 | 44 | def __len__(self): 45 | return len(self.__handle) 46 | 47 | def __getitem__(self, idx): 48 | if self.__split == "test": 49 | return self.getitem_test_set(idx) 50 | 51 | data_dict = self.__handle[idx] 52 | 53 | pc = data_dict["pc"][:3] 54 | 55 | # point cloud in base frame 56 | pc_upper = data_dict["pc_upper"] 57 | pc_lower = data_dict["pc_lower"] 58 | pc_upper = jt.transform_pts_upper_velodyne_to_base(pc_upper) 59 | pc_lower = jt.transform_pts_lower_velodyne_to_base(pc_lower) 60 | pc = np.concatenate([pc_upper, pc_lower], axis=1) # (3, N) 61 | 62 | # bounding box in base frame 63 | boxes, _ = ub3d.string_to_boxes(data_dict["label_str"]) 64 | 65 | # augmentation 66 | if self._augmentation: 67 | # random scale 68 | scale_factor = np.random.uniform(0.95, 1.05) 69 | pc *= scale_factor 70 | boxes[:, :6] *= scale_factor 71 | 72 | # random rotation 73 | theta = np.random.uniform(0, 2 * np.pi) 74 | rot_mat = np.array( 75 | [ 76 | [np.cos(theta), np.sin(theta), 0], 77 | [-np.sin(theta), np.cos(theta), 0], 78 | [0, 0, 1], 79 | ], 80 | dtype=np.float32, 81 | ) 82 | pc = rot_mat @ pc 83 | boxes[:, :3] = boxes[:, :3] @ rot_mat.T 84 | boxes[:, 6] += theta 85 | 86 | # get regression label by assigning bounding box to point cloud 87 | pred_target, reg_box_inds = get_prediction_target( 88 | pc, 89 | boxes, 90 | _AVERAGE_BOX_LWH, 91 | num_anchors=self._na, 92 | num_theta_bins=self._no, 93 | canonical=self._canonical, 94 | ) # (N, NA, C) 95 | 96 | # to voxel 97 | pc_voxel = np.round(pc / self._voxel_size) 98 | pc_voxel -= pc_voxel.min(axis=1, keepdims=True) 99 | pc_voxel = pc_voxel.T 100 | 101 | # NOTE all this does is find indices of non-duplicating elements 102 | inds, inverse_map = sparse_quantize( 103 | pc_voxel, feats=pc.T, labels=None, return_index=True, return_invs=True, 104 | ) 105 | 106 | # # # TODO is this needed? 107 | # # if "train" in self.split: 108 | # # if len(inds) > self.num_points: 109 | # # inds = np.random.choice(inds, self.num_points, replace=False) 110 | 111 | net_input = SparseTensor(pc.T[inds], pc_voxel[inds]) 112 | net_target = SparseTensor( 113 | pred_target.reshape(pc.shape[1], -1)[inds], pc_voxel[inds] 114 | ) 115 | # target_full = SparseTensor(reg_labels.T, pc_voxel) 116 | # inverse_map = SparseTensor(inverse_map, pc_voxel) 117 | 118 | data_dict.update( 119 | { 120 | "net_input": net_input, 121 | "net_target": net_target, 122 | # "target_full": target_full, 123 | "inverse_map": inverse_map, 124 | "points": pc, # (3, N) 125 | "boxes": boxes, # (B, 7) 126 | "pred_target": pred_target, # (N, NA, 8) 127 | "reg_box_inds": reg_box_inds, # (N,) 128 | "ave_lwh": _AVERAGE_BOX_LWH, 129 | "canonical": self._canonical, 130 | } 131 | ) 132 | 133 | return data_dict 134 | 135 | def getitem_test_set(self, idx): 136 | data_dict = self.__handle[idx] 137 | 138 | # point cloud in base frame 139 | pc_upper = data_dict["pc_upper"] 140 | pc_lower = data_dict["pc_lower"] 141 | pc_upper = jt.transform_pts_upper_velodyne_to_base(pc_upper) 142 | pc_lower = jt.transform_pts_lower_velodyne_to_base(pc_lower) 143 | pc = np.concatenate([pc_upper, pc_lower], axis=1) # (3, N) 144 | 145 | # to voxel 146 | pc_voxel = np.round(pc / self._voxel_size) 147 | pc_voxel -= pc_voxel.min(axis=1, keepdims=True) 148 | pc_voxel = pc_voxel.T 149 | 150 | # NOTE all this does is find indices of non-duplicating elements 151 | inds, inverse_map = sparse_quantize( 152 | pc_voxel, feats=pc.T, labels=None, return_index=True, return_invs=True, 153 | ) 154 | 155 | # # # TODO is this needed? 156 | # # if "train" in self.split: 157 | # # if len(inds) > self.num_points: 158 | # # inds = np.random.choice(inds, self.num_points, replace=False) 159 | 160 | net_input = SparseTensor(pc.T[inds], pc_voxel[inds]) 161 | # inverse_map = SparseTensor(inverse_map, pc_voxel) 162 | 163 | data_dict.update( 164 | { 165 | "net_input": net_input, 166 | "inverse_map": inverse_map, 167 | "points": pc, # (3, N) 168 | "ave_lwh": _AVERAGE_BOX_LWH, 169 | "canonical": self._canonical, 170 | } 171 | ) 172 | 173 | return data_dict 174 | 175 | def collate_batch(self, batch): 176 | rtn_dict = {} 177 | for k, v in batch[0].items(): 178 | if isinstance(v, SparseTensor): 179 | rtn_dict[k] = collate_sparse_tensors([sample[k] for sample in batch]) 180 | elif k in ("ave_lwh", "canonical"): 181 | rtn_dict[k] = v 182 | else: 183 | rtn_dict[k] = [sample[k] for sample in batch] 184 | 185 | return rtn_dict 186 | -------------------------------------------------------------------------------- /lidar_det/dataset/handles/jrdb_handle.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | import os 4 | from ._pypcd import point_cloud_from_path 5 | 6 | # NOTE: Don't use open3d to load point cloud since it spams the console. Setting 7 | # verbosity level does not solve the problem 8 | # https://github.com/intel-isl/Open3D/issues/1921 9 | # https://github.com/intel-isl/Open3D/issues/884 10 | 11 | 12 | __all__ = ["JRDBHandleDet3D"] 13 | 14 | 15 | class JRDBHandleDet3D: 16 | def __init__(self, data_dir, split, sequences=None, exclude_sequences=None): 17 | data_dir = os.path.abspath(os.path.expanduser(data_dir)) 18 | data_dir = ( 19 | os.path.join(data_dir, "train_dataset") 20 | if split == "train" or split == "val" 21 | else os.path.join(data_dir, "test_dataset") 22 | ) 23 | 24 | self.data_dir = data_dir 25 | 26 | sequence_names = ( 27 | os.listdir(os.path.join(data_dir, "timestamps")) 28 | if sequences is None 29 | else sequences 30 | ) 31 | 32 | # NOTE it is important to sort the return of os.listdir, since its order 33 | # changes for different file system. 34 | sequence_names.sort() 35 | 36 | if exclude_sequences is not None: 37 | sequence_names = [s for s in sequence_names if s not in exclude_sequences] 38 | 39 | self.sequence_names = sequence_names 40 | 41 | self.sequence_handle = [] 42 | self._sequence_beginning_inds = [0] 43 | self.__flat_inds_sequence = [] 44 | self.__flat_inds_frame = [] 45 | for seq_idx, seq_name in enumerate(self.sequence_names): 46 | self.sequence_handle.append(_SequenceHandle(self.data_dir, seq_name)) 47 | 48 | # build a flat index for all sequences and frames 49 | sequence_length = len(self.sequence_handle[-1]) 50 | self.__flat_inds_sequence += sequence_length * [seq_idx] 51 | self.__flat_inds_frame += range(sequence_length) 52 | 53 | self._sequence_beginning_inds.append( 54 | self._sequence_beginning_inds[-1] + sequence_length 55 | ) 56 | 57 | def __len__(self): 58 | return len(self.__flat_inds_frame) 59 | 60 | def __getitem__(self, idx): 61 | idx_sq = self.__flat_inds_sequence[idx] 62 | idx_fr = self.__flat_inds_frame[idx] 63 | 64 | frame_dict = self.sequence_handle[idx_sq][idx_fr] 65 | urls = frame_dict["url"] 66 | 67 | frame_dict.update( 68 | { 69 | "frame_id": int(frame_dict["frame_id"]), 70 | "sequence": self.sequence_handle[idx_sq].sequence, 71 | "first_frame": idx_fr == 0, 72 | "dataset_idx": idx, 73 | "pc_upper": self.load_pointcloud(urls["pc_upper"]), 74 | "pc_lower": self.load_pointcloud(urls["pc_lower"]), 75 | } 76 | ) 77 | 78 | if urls["label"] is not None: 79 | frame_dict["label_str"] = self.load_label(urls["label"]) 80 | 81 | return frame_dict 82 | 83 | @staticmethod 84 | def box_is_on_ground(jrdb_ann_dict): 85 | bottom_h = float(jrdb_ann_dict["box"]["cz"]) - 0.5 * float( 86 | jrdb_ann_dict["box"]["h"] 87 | ) 88 | 89 | return bottom_h < -0.69 # value found by examining dataset 90 | 91 | @property 92 | def sequence_beginning_inds(self): 93 | return copy.deepcopy(self._sequence_beginning_inds) 94 | 95 | def load_pointcloud(self, url): 96 | """Load a point cloud given file url. 97 | 98 | Returns: 99 | pc (np.ndarray[3, N]): 100 | """ 101 | # pcd_load = 102 | # o3d.io.read_point_cloud(os.path.join(self.data_dir, url), format='pcd') 103 | # return np.asarray(pcd_load.points, dtype=np.float32) 104 | pc = point_cloud_from_path(url).pc_data 105 | # NOTE: redundent copy, ok for now 106 | pc = np.array([pc["x"], pc["y"], pc["z"]], dtype=np.float32) 107 | return pc 108 | 109 | def load_label(self, url): 110 | with open(url, "r") as f: 111 | s = f.read() 112 | return s 113 | 114 | 115 | class _SequenceHandle: 116 | def __init__(self, data_dir, sequence): 117 | self.sequence = sequence 118 | 119 | # pc frames 120 | pc_dir = os.path.join(data_dir, "pointclouds", "upper_velodyne", sequence) 121 | frames = [f.split(".")[0] for f in os.listdir(pc_dir)] 122 | 123 | # labels 124 | label_dir = os.path.join(data_dir, "labels_kitti", sequence) 125 | if os.path.exists(label_dir): 126 | labeled_frames = [f.split(".")[0] for f in os.listdir(label_dir)] 127 | frames = list(set(frames) & set(labeled_frames)) 128 | 129 | self._upper_pc_dir = pc_dir 130 | self._lower_pc_dir = os.path.join( 131 | data_dir, "pointclouds", "lower_velodyne", sequence 132 | ) 133 | self._label_dir = label_dir 134 | self._frames = frames 135 | self._frames.sort() 136 | self._load_labels = os.path.exists(label_dir) 137 | 138 | def __len__(self): 139 | return self._frames.__len__() 140 | 141 | def __getitem__(self, idx): 142 | frame = self._frames[idx] 143 | url_upper_pc = os.path.join(self._upper_pc_dir, frame + ".pcd") 144 | url_lower_pc = os.path.join(self._lower_pc_dir, frame + ".pcd") 145 | url_label = ( 146 | os.path.join(self._label_dir, frame + ".txt") if self._load_labels else None 147 | ) 148 | 149 | return { 150 | "frame_id": frame, 151 | "url": { 152 | "pc_upper": url_upper_pc, 153 | "pc_lower": url_lower_pc, 154 | "label": url_label, 155 | }, 156 | } 157 | -------------------------------------------------------------------------------- /lidar_det/dataset/handles/kitti_handle.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | # Force the dataloader to load only one sample, in which case the network should 6 | # fit perfectly. 7 | _DEBUG_ONE_SAMPLE = False 8 | 9 | __all__ = ["KITTIHandleDet3D"] 10 | 11 | 12 | class KITTIHandleDet3D: 13 | def __init__(self, data_dir, split="train", frames=None): 14 | if _DEBUG_ONE_SAMPLE: 15 | split = "train" 16 | 17 | if split == "train" or self._split == "val": 18 | root_dir = os.path.join(data_dir, "object/training") 19 | self._label_dir = os.path.join(root_dir, "label_2") 20 | else: 21 | root_dir = os.path.join(data_dir, "object/testing") 22 | self._label_dir = None 23 | 24 | self._split = split 25 | self._image_dir = os.path.join(root_dir, "image_2") 26 | self._lidar_dir = os.path.join(root_dir, "velodyne") 27 | self._calib_dir = os.path.join(root_dir, "calib") 28 | self._plane_dir = os.path.join(root_dir, "planes") 29 | 30 | self.__frame_inds = ( 31 | sorted([x.split(".")[0] for x in os.listdir(self._lidar_dir)]) 32 | if frames is None 33 | else frames 34 | ) 35 | 36 | def __len__(self): 37 | if _DEBUG_ONE_SAMPLE: 38 | return 80 39 | else: 40 | return len(self.__frame_inds) 41 | 42 | def __getitem__(self, idx): 43 | frame_id = self.__frame_inds[idx] 44 | 45 | pc = ( 46 | np.fromfile( 47 | os.path.join(self._lidar_dir, f"{frame_id:06d}.bin"), dtype=np.float32 48 | ) 49 | .reshape(-1, 4) 50 | .T 51 | ) 52 | 53 | im = cv2.imread(os.path.join(self._image_dir, f"{frame_id:06d}.png")) 54 | 55 | frame_dict = { 56 | "frame_id": frame_id, 57 | "pc": pc, # (4, N) 58 | "im": im, # (H, W, 3) 59 | "calib_url": os.path.join(self._calib_dir, f"{frame_id:06d}.txt"), 60 | } 61 | 62 | if self._label_dir is not None: 63 | frame_dict["plane"] = self._get_road_plane(frame_id) 64 | frame_dict["label_url"] = ( 65 | os.path.join(self._label_dir, f"{frame_id:06d}.txt"), 66 | ) 67 | 68 | return frame_dict 69 | 70 | def _get_road_plane(self, idx): 71 | plane_file = os.path.join(self._plane_dir, f"{idx:06d}.txt") 72 | with open(plane_file, "r") as f: 73 | lines = f.readlines() 74 | lines = [float(i) for i in lines[3].split()] 75 | plane = np.asarray(lines) 76 | 77 | # Ensure normal is always facing up, this is in the rectified camera coordinate 78 | if plane[1] > 0: 79 | plane = -plane 80 | 81 | norm = np.linalg.norm(plane[0:3]) 82 | plane = plane / norm 83 | 84 | return plane 85 | -------------------------------------------------------------------------------- /lidar_det/dataset/handles/nuscenes_handle.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | from pyquaternion import Quaternion 5 | 6 | from nuscenes.nuscenes import NuScenes 7 | from nuscenes.utils import splits as nuscenes_splits 8 | from nuscenes.utils.data_classes import LidarPointCloud 9 | 10 | 11 | __all__ = ["NuScenesHandle"] 12 | 13 | 14 | class NuScenesHandle: 15 | def __init__(self, data_dir, split, mini=False, nsweeps=1): 16 | data_dir = os.path.abspath(os.path.expanduser(data_dir)) 17 | self.data_dir = data_dir 18 | 19 | if mini: 20 | nusc_version = "v1.0-mini" 21 | split_scenes = ( 22 | nuscenes_splits.mini_train 23 | if split == "train" 24 | else nuscenes_splits.mini_val 25 | ) 26 | elif split == "test": 27 | nusc_version = "v1.0-test" 28 | split_scenes = nuscenes_splits.test 29 | else: 30 | nusc_version = "v1.0-trainval" 31 | if split == "train": 32 | split_scenes = nuscenes_splits.train 33 | elif split == "val": 34 | split_scenes = nuscenes_splits.val 35 | elif split == "train_val": 36 | split_scenes = nuscenes_splits.train + nuscenes_splits.val 37 | else: 38 | raise RuntimeError(f"Invalid split: {split}") 39 | 40 | self._nusc = NuScenes(version=nusc_version, dataroot=data_dir, verbose=False) 41 | self._nsweeps = nsweeps 42 | 43 | # locate scenes for the split 44 | all_scenes = self._nusc.scene 45 | all_scene_names = [s["name"] for s in all_scenes] 46 | 47 | scenes = [all_scenes[all_scene_names.index(s)] for s in split_scenes] 48 | self._scenes = scenes 49 | 50 | # build a flat list of all samples 51 | self.__flat_sample_tokens = [] 52 | for scene_idx, scene in enumerate(scenes): 53 | s_t = scene["first_sample_token"] 54 | while s_t != "": 55 | self.__flat_sample_tokens.append(s_t) 56 | s_t = self._nusc.get("sample", s_t)["next"] 57 | 58 | def __len__(self): 59 | return len(self.__flat_sample_tokens) 60 | 61 | def __getitem__(self, idx): 62 | s_t = self.__flat_sample_tokens[idx] 63 | s_dict = self._nusc.get("sample", s_t) 64 | 65 | pc, dts = self.load_pointcloud(s_dict, self._nsweeps) 66 | 67 | anns = [self._nusc.get("sample_annotation", a_t) for a_t in s_dict["anns"]] 68 | 69 | frame_dict = { 70 | "frame_id": idx, 71 | "scene_token": s_dict["scene_token"], 72 | "sample_token": s_t, 73 | "first_sample": s_dict["prev"] == "", 74 | "pc": pc, 75 | "anns": anns, 76 | } 77 | 78 | if dts is not None: 79 | frame_dict["pc_dt"] = dts 80 | 81 | return frame_dict 82 | 83 | def load_pointcloud(self, sample_dict, nsweeps): 84 | """Load a point cloud given nuScenes sample dict of the point cloud 85 | 86 | Returns: 87 | pc (np.ndarray[3, N]): Point cloud in global frame 88 | """ 89 | pc_dict = self._nusc.get("sample_data", sample_dict["data"]["LIDAR_TOP"]) 90 | if nsweeps == 1: 91 | url = os.path.join(self.data_dir, pc_dict["filename"]) 92 | pc = LidarPointCloud.from_file(url) 93 | dts = None 94 | else: 95 | # Example https://github.com/nutonomy/nuscenes-devkit/blob/5325d1b400950f777cd701bdd5e30a9d57d2eaa8/python-sdk/nuscenes/nuscenes.py#L1155 # noqa 96 | pc, dts = LidarPointCloud.from_file_multisweep( 97 | self._nusc, sample_dict, "LIDAR_TOP", "LIDAR_TOP", nsweeps=nsweeps 98 | ) 99 | dts = dts[0] 100 | 101 | # Transform to global frame 102 | # From https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/nuscenes.py#L749 # noqa 103 | # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # noqa 104 | # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. # noqa 105 | cs_record = self._nusc.get( 106 | "calibrated_sensor", pc_dict["calibrated_sensor_token"] 107 | ) 108 | pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) 109 | pc.translate(np.array(cs_record["translation"])) 110 | 111 | # Second step: transform from ego to the global frame. 112 | poserecord = self._nusc.get("ego_pose", pc_dict["ego_pose_token"]) 113 | pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix) 114 | pc.translate(np.array(poserecord["translation"])) 115 | 116 | return pc, dts 117 | -------------------------------------------------------------------------------- /lidar_det/detector.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import numpy as np 5 | 6 | from torchsparse import SparseTensor 7 | from torchsparse.utils import sparse_quantize 8 | 9 | from lidar_det.model import get_model 10 | from lidar_det.dataset.utils import target_to_boxes_torch, collate_sparse_tensors 11 | from lidar_det.utils.utils_box3d import nms_3d_dist_gpu 12 | 13 | 14 | class PersonMinkUNet(object): 15 | def __init__(self, ckpt_file, gpu=True): 16 | """A warpper class for end-to-end inference. 17 | 18 | Args: 19 | ckpt_file (str): Path to checkpoint 20 | gpu (bool): True to use GPU. Defaults to True. 21 | """ 22 | self._gpu = gpu 23 | 24 | model_cfg = { 25 | "type": "MinkUNet", 26 | "kwargs": { 27 | "cr": 1.0, 28 | "run_up": True, 29 | "num_anchors": 1, 30 | "num_ori_bins": 12, 31 | "fpn": False, 32 | }, 33 | "target_mode": 2, 34 | "disentangled_loss": False, 35 | } 36 | 37 | self._net = get_model(model_cfg, inference_only=True) 38 | self._num_anchors = model_cfg["kwargs"]["num_anchors"] 39 | self._num_cls = 1 # person is the only class 40 | self._ave_lwh = (0.9, 0.5, 1.7) 41 | 42 | self._voxel_size = np.array([0.05, 0.05, 0.1], dtype=np.float32).reshape(3, 1) 43 | self._voxel_offset = np.array([1e5, 1e5, 1e4], dtype=np.int32).reshape(3, 1) 44 | 45 | self._voxel_size_torch = torch.from_numpy(self._voxel_size).float() 46 | self._voxel_offset_torch = torch.from_numpy(self._voxel_offset).float() 47 | 48 | ckpt = torch.load(ckpt_file) 49 | self._net.load_state_dict(ckpt["model_state"]) 50 | 51 | self._net.eval() 52 | 53 | if gpu: 54 | # NOTE Set to False gives slightly faster speed 55 | torch.backends.cudnn.benchmark = False 56 | self._net = self._net.cuda() 57 | self._voxel_size_torch = self._voxel_size_torch.cuda( 58 | non_blocking=True 59 | ).float() 60 | self._voxel_offset_torch = self._voxel_offset_torch.cuda( 61 | non_blocking=True 62 | ).float() 63 | 64 | print( 65 | "Detector initialized\n", 66 | f"GPU: {gpu}\n", 67 | f"ckpt: {ckpt_file}\n", 68 | f"voxel size: {self._voxel_size}\n", 69 | f"voxel offset: {self._voxel_offset}\n", 70 | ) 71 | 72 | def __call__(self, pc): 73 | """ 74 | Args: 75 | pc (array[3, N]): Point cloud 76 | 77 | Returns: 78 | boxes (array[B, 7]): (x, y, z, l, w, h, rt) 79 | scores (array[B]) 80 | """ 81 | net_input = self._preprocess(pc) 82 | 83 | if self._gpu: 84 | net_input = net_input.cuda() 85 | 86 | # forward 87 | with torch.no_grad(): 88 | net_pred = self._net(net_input) # (N, C) 89 | boxes_nms, scores_nms = self._postprocess(net_pred, net_input) 90 | 91 | return boxes_nms, scores_nms 92 | 93 | def _preprocess(self, pc): 94 | pc_voxel = np.round(pc / self._voxel_size) + self._voxel_offset 95 | pc_voxel = pc_voxel.T 96 | 97 | # NOTE all this does is find indices of non-duplicating elements 98 | inds, inverse_map = sparse_quantize( 99 | pc_voxel, feats=pc.T, labels=None, return_index=True, return_invs=True, 100 | ) 101 | net_input = SparseTensor(pc.T[inds], pc_voxel[inds]) 102 | net_input = collate_sparse_tensors([net_input]) 103 | 104 | return net_input 105 | 106 | def _postprocess(self, net_pred, net_input): 107 | # regression is w.r.t. to voxel center 108 | voxel_center = net_input.C[:, :3].clone().float() + 0.5 109 | voxel_center = ( 110 | voxel_center - self._voxel_offset_torch.view(1, 3) 111 | ) * self._voxel_size_torch.view(1, 3) 112 | voxel_center = voxel_center[:, None, None, :] # (N, 1, 3) 113 | 114 | # decode network prediction 115 | net_pred = net_pred.view( 116 | net_pred.shape[0], self._num_anchors, self._num_cls, -1 117 | ) # (N, A, S, C) 118 | cls_pred = torch.sigmoid(net_pred[..., 0]) # (N, A, S) 119 | reg_pred = net_pred[..., 1:] # (N, A, S, C - 1) 120 | reg_pred[..., :3] = reg_pred[..., :3] + voxel_center # offset from voxel center 121 | 122 | # postprocess prediction to get boxes 123 | num_theta_bins = int((reg_pred.shape[-1] - 6) / 2) 124 | boxes = target_to_boxes_torch( 125 | reg_pred[:, :, 0, :], self._ave_lwh, num_theta_bins 126 | ).view( 127 | -1, 7 128 | ) # (N * A, 7) 129 | 130 | # fast NMS based on distance 131 | cls_pred = cls_pred.view(-1) 132 | nms_inds = nms_3d_dist_gpu( 133 | boxes, 134 | cls_pred, 135 | l_ave=self._ave_lwh[0], 136 | w_ave=self._ave_lwh[1], 137 | nms_thresh=0.4, 138 | ) 139 | boxes_nms = boxes[nms_inds].data.cpu().numpy() 140 | scores_nms = cls_pred[nms_inds].data.cpu().numpy() # in descending order 141 | 142 | return boxes_nms, scores_nms 143 | 144 | 145 | class DetectorWithClock(PersonMinkUNet): 146 | def __init__(self, *args, **kwargs): 147 | """For measuring inference speed""" 148 | super(DetectorWithClock, self).__init__(*args, **kwargs) 149 | 150 | self._pc_count = [] 151 | self._voxel_count = [] 152 | self._time_preprocess = [] 153 | self._time_forward = [] 154 | self._time_postprocess = [] 155 | 156 | def __call__(self, pc): 157 | """ 158 | Args: 159 | pc (array[3, N]): Point cloud 160 | 161 | Returns: 162 | boxes (array[B, 7]): (x, y, z, l, w, h, rt) 163 | scores (array[B]) 164 | """ 165 | t0 = time.time() 166 | 167 | net_input = self._preprocess(pc) 168 | 169 | if self._gpu: 170 | net_input = net_input.cuda() 171 | 172 | torch.cuda.synchronize() 173 | t1 = time.time() 174 | 175 | # forward 176 | with torch.no_grad(): 177 | net_pred = self._net(net_input) # (N, C) 178 | torch.cuda.synchronize() 179 | t2 = time.time() 180 | 181 | boxes_nms, scores_nms = self._postprocess(net_pred, net_input) 182 | torch.cuda.synchronize() 183 | t3 = time.time() 184 | 185 | self._time_preprocess.append(t1 - t0) 186 | self._time_forward.append(t2 - t1) 187 | self._time_postprocess.append(t3 - t2) 188 | self._pc_count.append(pc.shape[1]) 189 | self._voxel_count.append(net_input.F.shape[0]) 190 | 191 | return boxes_nms, scores_nms 192 | 193 | def __str__(self): 194 | def val2stat(val_list): 195 | arr = np.array(val_list) 196 | return f"{arr.mean():f} ({arr.std():f})" 197 | 198 | fps = 1.0 / ( 199 | np.array(self._time_forward) 200 | + np.array(self._time_preprocess) 201 | + np.array(self._time_postprocess) 202 | ) 203 | 204 | s = ( 205 | "Summary [ave (std)]\n" 206 | f"Frame count: {len(self._time_preprocess)}\n" 207 | f"Preprocess time: {val2stat(self._time_preprocess)}\n" 208 | f"Forward time: {val2stat(self._time_forward)}\n" 209 | f"Postprocess time: {val2stat(self._time_postprocess)}\n" 210 | f"FPS: {val2stat(fps)}\n" 211 | f"Point per frame: {val2stat(self._pc_count)}\n" 212 | f"Voxel per frame: {val2stat(self._voxel_count)}\n" 213 | ) 214 | return s 215 | 216 | def get_time(self): 217 | return ( 218 | self._time_preprocess, 219 | self._time_forward, 220 | self._time_postprocess, 221 | self._pc_count, 222 | self._voxel_count, 223 | ) 224 | 225 | def reset(self): 226 | self._pc_count = [] 227 | self._voxel_count = [] 228 | self._time_preprocess = [] 229 | self._time_forward = [] 230 | self._time_postprocess = [] 231 | 232 | 233 | if __name__ == "__main__": 234 | from lidar_det.dataset.handles.jrdb_handle import JRDBHandleDet3D 235 | import lidar_det.utils.jrdb_transforms as tu 236 | from lidar_det.utils.utils_box3d import boxes_to_corners 237 | 238 | from mayavi import mlab 239 | 240 | # some color 241 | gs_blue = (66.0 / 256, 133.0 / 256, 244.0 / 256) 242 | gs_red = (234.0 / 256, 68.0 / 256, 52.0 / 256) 243 | gs_yellow = (251.0 / 256, 188.0 / 256, 4.0 / 256) 244 | gs_green = (52.0 / 256, 168.0 / 256, 83.0 / 256) 245 | gs_orange = (255.0 / 256, 109.0 / 256, 1.0 / 256) 246 | gs_blue_light = (70.0 / 256, 189.0 / 256, 196.0 / 256) 247 | 248 | ckpt_path = "/globalwork/jia/share/JRDB_cvpr21_workshop/logs/unet_bl_voxel_jrdb_0.05_0.1_20210519_232859/ckpt/ckpt_e40.pth" # noqa 249 | detector = DetectorWithClock(ckpt_path) 250 | 251 | loader = JRDBHandleDet3D(data_dir="./data/JRDB", split="train") 252 | 253 | for data_dict in loader: 254 | pc_xyz_upper = tu.transform_pts_upper_velodyne_to_base(data_dict["pc_upper"]) 255 | pc_xyz_lower = tu.transform_pts_lower_velodyne_to_base(data_dict["pc_lower"]) 256 | pc = np.concatenate((pc_xyz_upper, pc_xyz_lower), axis=1) 257 | 258 | boxes, scores = detector(pc) 259 | boxes = boxes[scores > 0.5] 260 | print(boxes.shape) 261 | 262 | fig = mlab.figure( 263 | figure=None, 264 | bgcolor=(1, 1, 1), 265 | fgcolor=(0, 0, 0), 266 | engine=None, 267 | size=(1600, 1000), 268 | ) 269 | 270 | mlab.points3d( 271 | pc[0], pc[1], pc[2], scale_factor=0.05, color=gs_blue, figure=fig, 272 | ) 273 | 274 | corners_xyz, connect_inds = boxes_to_corners(boxes, connect_inds=True) 275 | for corner_xyz in corners_xyz: 276 | for inds in connect_inds: 277 | mlab.plot3d( 278 | corner_xyz[0, inds], 279 | corner_xyz[1, inds], 280 | corner_xyz[2, inds], 281 | tube_radius=None, 282 | line_width=3, 283 | color=gs_yellow, 284 | figure=fig, 285 | ) 286 | 287 | mlab.view(focalpoint=(0, 0, 0)) 288 | mlab.move(190, 0, 5.0) 289 | mlab.pitch(-10) 290 | 291 | mlab.show() 292 | break 293 | 294 | print(detector) 295 | -------------------------------------------------------------------------------- /lidar_det/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import * -------------------------------------------------------------------------------- /lidar_det/model/builder.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from .nets.minkunet import MinkUNet 4 | from .nets.minkunet_pillar import MinkPillarUNet 5 | from .nets.minkresnet import MinkResNet 6 | 7 | __all__ = [ 8 | "get_model", 9 | "MinkUNetDetector", 10 | "MinkPillarUNetDetector", 11 | "MinkResNetDetector", 12 | ] 13 | 14 | 15 | def get_model(model_cfg, inference_only=False): 16 | if model_cfg["type"] == "MinkUNet": 17 | net = MinkUNetDetector(**model_cfg["kwargs"]) 18 | elif model_cfg["type"] == "MinkPillarUNet": 19 | net = MinkPillarUNetDetector(**model_cfg["kwargs"]) 20 | elif model_cfg["type"] == "MinkResNet": 21 | net = MinkResNetDetector(**model_cfg["kwargs"]) 22 | else: 23 | raise RuntimeError(f"Unknown model '{model_cfg['type']}'") 24 | 25 | if not inference_only: 26 | from .model_fn import model_fn, model_eval_fn, model_eval_collate_fn, error_fn 27 | 28 | net.model_fn = partial( 29 | model_fn, 30 | target_mode=model_cfg["target_mode"], 31 | disentangled_loss=model_cfg["disentangled_loss"], 32 | ) 33 | net.model_eval_fn = partial(model_eval_fn, nuscenes=model_cfg["nuscenes"]) 34 | net.model_eval_collate_fn = partial( 35 | model_eval_collate_fn, nuscenes=model_cfg["nuscenes"] 36 | ) 37 | net.error_fn = error_fn 38 | 39 | return net 40 | 41 | 42 | class MinkUNetDetector(MinkUNet): 43 | def __init__( 44 | self, 45 | num_anchors, 46 | num_ori_bins, 47 | cr=1.0, 48 | run_up=True, 49 | fpn=False, 50 | num_classes=1, 51 | input_dim=3, 52 | ): 53 | out_dim = _get_num_output_channels(num_ori_bins, num_anchors, num_classes) 54 | super().__init__(cr=cr, run_up=run_up, num_classes=out_dim, input_dim=input_dim) 55 | self._na = num_anchors 56 | self._no = num_ori_bins 57 | self._nc = num_classes 58 | 59 | @property 60 | def num_anchors(self): 61 | return self._na 62 | 63 | @property 64 | def num_classes(self): 65 | return self._nc 66 | 67 | 68 | class MinkPillarUNetDetector(MinkPillarUNet): 69 | def __init__( 70 | self, 71 | num_anchors, 72 | num_ori_bins, 73 | cr=1.0, 74 | run_up=True, 75 | fpn=False, 76 | num_classes=1, 77 | input_dim=3, 78 | ): 79 | out_dim = _get_num_output_channels(num_ori_bins, num_anchors, num_classes) 80 | super().__init__(cr=cr, run_up=run_up, num_classes=out_dim, input_dim=input_dim) 81 | self._na = num_anchors 82 | self._no = num_ori_bins 83 | self._nc = num_classes 84 | 85 | @property 86 | def num_anchors(self): 87 | return self._na 88 | 89 | @property 90 | def num_classes(self): 91 | return self._nc 92 | 93 | 94 | class MinkResNetDetector(MinkResNet): 95 | def __init__( 96 | self, 97 | num_anchors, 98 | num_ori_bins, 99 | cr=1.0, 100 | run_up=True, 101 | fpn=False, 102 | num_classes=1, 103 | input_dim=3, 104 | ): 105 | out_dim = _get_num_output_channels(num_ori_bins, num_anchors, num_classes) 106 | super().__init__(cr=cr, num_classes=out_dim, fpn=fpn, input_dim=input_dim) 107 | self._na = num_anchors 108 | self._no = num_ori_bins 109 | self._nc = num_classes 110 | 111 | @property 112 | def num_anchors(self): 113 | return self._na 114 | 115 | @property 116 | def num_classes(self): 117 | return self._nc 118 | 119 | 120 | def _get_num_output_channels(num_ori_bins, num_anchors, num_classes): 121 | if num_ori_bins > 1: 122 | # out_dim = num_anchors * (num_ori_bins + 8) 123 | out_dim = num_anchors * (2 * num_ori_bins + 7) 124 | else: 125 | out_dim = num_anchors * 8 126 | 127 | out_dim *= num_classes 128 | 129 | return out_dim 130 | -------------------------------------------------------------------------------- /lidar_det/model/nets/minkresnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torchsparse.nn as spnn 5 | 6 | __all__ = ["MinkResNet"] 7 | 8 | 9 | def _ASSERT_EQUAL(arr_a, arr_b, tolerance): 10 | assert (arr_a - arr_b).abs().max() <= tolerance 11 | 12 | 13 | class BasicConvolutionBlock(nn.Module): 14 | def __init__(self, inc, outc, ks=3, stride=1, dilation=1): 15 | super().__init__() 16 | self.net = nn.Sequential( 17 | spnn.Conv3d(inc, outc, kernel_size=ks, dilation=dilation, stride=stride), 18 | spnn.BatchNorm(outc), 19 | spnn.ReLU(True), 20 | ) 21 | 22 | def forward(self, x): 23 | out = self.net(x) 24 | return out 25 | 26 | 27 | class ResidualBlock(nn.Module): 28 | def __init__(self, inc, outc, ks=3, stride=1, dilation=1): 29 | super().__init__() 30 | self.net = nn.Sequential( 31 | spnn.Conv3d(inc, outc, kernel_size=ks, dilation=dilation, stride=stride), 32 | spnn.BatchNorm(outc), 33 | spnn.ReLU(True), 34 | spnn.Conv3d(outc, outc, kernel_size=ks, dilation=dilation, stride=1), 35 | spnn.BatchNorm(outc), 36 | ) 37 | 38 | self.downsample = ( 39 | nn.Sequential() 40 | if (inc == outc and stride == 1) 41 | else nn.Sequential( 42 | spnn.Conv3d(inc, outc, kernel_size=1, dilation=1, stride=stride), 43 | spnn.BatchNorm(outc), 44 | ) 45 | ) 46 | 47 | self.relu = spnn.ReLU(True) 48 | 49 | def forward(self, x): 50 | out = self.relu(self.net(x) + self.downsample(x)) 51 | return out 52 | 53 | 54 | class ResNetBlock(nn.Module): 55 | def __init__(self, inc, outc): 56 | super().__init__() 57 | midc = int(inc / 4) 58 | self.net = nn.Sequential( 59 | spnn.Conv3d(inc, midc, kernel_size=1), 60 | spnn.BatchNorm(midc), 61 | spnn.ReLU(True), 62 | spnn.Conv3d(midc, midc, kernel_size=3), 63 | spnn.BatchNorm(midc), 64 | spnn.ReLU(True), 65 | spnn.Conv3d(midc, outc, kernel_size=1), 66 | spnn.BatchNorm(outc), 67 | ) 68 | self.relu = spnn.ReLU(True) 69 | 70 | def forward(self, x): 71 | out = self.relu(self.net(x) + x) 72 | return out 73 | 74 | 75 | class MinkResNet(nn.Module): 76 | def __init__(self, **kwargs): 77 | super().__init__() 78 | 79 | cr = kwargs.get("cr", 1.0) 80 | cs = [64, 128, 256, 512, 512] # channel size 81 | cs = [int(cr * x) for x in cs] 82 | br = [3, 4, 6, 3] # blocks repeat 83 | 84 | input_dim = kwargs.get("input_dim", 3) 85 | 86 | self.stem = nn.Sequential( 87 | spnn.Conv3d(input_dim, cs[0], kernel_size=3, stride=1), 88 | spnn.BatchNorm(cs[0]), 89 | spnn.ReLU(True), 90 | spnn.Conv3d(cs[0], cs[0], kernel_size=3, stride=1), 91 | spnn.BatchNorm(cs[0]), 92 | spnn.ReLU(True), 93 | ) 94 | 95 | blocks = [] 96 | blocks_ds = [] 97 | for i in range(len(cs) - 1): 98 | blocks_ds.append( 99 | BasicConvolutionBlock(cs[i], cs[i], ks=2, stride=2, dilation=1), 100 | ) 101 | blocks.append( 102 | nn.Sequential( 103 | *[ 104 | ResidualBlock(cs[i], cs[i], ks=3, stride=1, dilation=1) 105 | for _ in range(br[i] - 1) 106 | ], 107 | ResidualBlock(cs[i], cs[i + 1], ks=3, stride=1, dilation=1) 108 | ) 109 | ) 110 | 111 | self.blocks = nn.ModuleList(blocks) 112 | self.blocks_ds = nn.ModuleList(blocks_ds) 113 | 114 | self._fpn = kwargs.get("fpn", False) 115 | if self._fpn: 116 | self.classifier = nn.ModuleList( 117 | [ 118 | nn.Sequential(nn.Linear(cs[i + 1], kwargs["num_classes"])) 119 | for i in range(len(cs) - 1) 120 | ] 121 | ) 122 | else: 123 | self.classifier = nn.Sequential(nn.Linear(cs[-1], kwargs["num_classes"])) 124 | 125 | self.weight_initialization() 126 | self.dropout = nn.Dropout(0.3, True) 127 | 128 | def weight_initialization(self): 129 | for m in self.modules(): 130 | if isinstance(m, nn.BatchNorm1d): 131 | nn.init.constant_(m.weight, 1) 132 | nn.init.constant_(m.bias, 0) 133 | 134 | def forward(self, x): 135 | x0 = self.stem(x) 136 | 137 | inds_maps = [] 138 | voxel_centers = [] 139 | x_fpn = [x0] 140 | for i in range(len(self.blocks)): 141 | x_ds = self.blocks_ds[i](x_fpn[-1]) 142 | x_fpn.append(self.blocks[i](x_ds)) 143 | 144 | # get a mapping from downsampled voxels to the original 145 | # NOTE only downsampling changes the order of voxels 146 | kernel_map = ( 147 | x_ds.kernel_maps["k2_os%d_s2_d1" % 2 ** i][0].data.cpu().numpy() 148 | ) 149 | _, uq_inds = np.unique(kernel_map[:, 1], return_index=True) 150 | inds_map = kernel_map[uq_inds] # NOTE inds_map[:, 1] is sorted by np.unique 151 | if len(inds_maps) > 0: 152 | # kernel map gives mapping to the previous stage, we want mapping 153 | # to the original voxels 154 | inds_map[:, 0] = inds_maps[-1][inds_map[:, 0], 0] 155 | inds_maps.append(inds_map) 156 | 157 | C_center = x_ds.C[:, :3].clone().float() 158 | C_center += 0.5 * x_ds.s 159 | voxel_centers.append(C_center) 160 | 161 | # # for debug 162 | # km = kernel_map[uq_inds] 163 | # _ASSERT_EQUAL(x_fpn[-2].C[km[:, 0]], x_fpn[-1].C, 2 ** i) 164 | # _ASSERT_EQUAL(x.C[inds_maps[-1][:, 0]], x_fpn[-1].C, 2 ** (i + 1) - 1) 165 | # print((x.C[inds_maps[-1][:, 0]] - x_fpn[-1].C).abs().max()) 166 | 167 | if self._fpn: 168 | out = torch.cat( 169 | [ 170 | self.classifier[i](x_fpn[i + 1].F) 171 | for i in range(len(self.classifier)) 172 | ], 173 | dim=0, 174 | ) 175 | 176 | inds_map = np.concatenate([imap[:, 0] for imap in inds_maps]) 177 | voxel_center = torch.cat(voxel_centers, dim=0) 178 | 179 | return out, inds_map, voxel_center 180 | else: 181 | out = self.classifier(x_fpn[-1].F) 182 | inds_map = inds_maps[-1][:, 0] 183 | voxel_center = voxel_centers[-1] 184 | 185 | return out, inds_map, voxel_center 186 | -------------------------------------------------------------------------------- /lidar_det/model/nets/minkunet.py: -------------------------------------------------------------------------------- 1 | """ 2 | From https://github.com/mit-han-lab/e3d/blob/db65d6c968a12d30a4caa2715ef6766ec04d7505/spvnas/core/models/semantic_kitti/minkunet.py 3 | """ 4 | 5 | import time 6 | from collections import OrderedDict 7 | 8 | import torch 9 | import torchsparse 10 | import torch.nn as nn 11 | import torchsparse.nn as spnn 12 | 13 | __all__ = ['MinkUNet'] 14 | 15 | # _ks = 7 16 | _ks = 3 17 | 18 | 19 | class BasicConvolutionBlock(nn.Module): 20 | def __init__(self, inc, outc, ks=3, stride=1, dilation=1): 21 | super().__init__() 22 | if stride == 1 and ks == 3: 23 | ks = _ks 24 | self.net = nn.Sequential( 25 | spnn.Conv3d(inc, 26 | outc, 27 | kernel_size=ks, 28 | dilation=dilation, 29 | stride=stride), spnn.BatchNorm(outc), 30 | spnn.ReLU(True)) 31 | 32 | def forward(self, x): 33 | out = self.net(x) 34 | return out 35 | 36 | 37 | class BasicDeconvolutionBlock(nn.Module): 38 | def __init__(self, inc, outc, ks=3, stride=1): 39 | super().__init__() 40 | if stride == 1 and ks == 3: 41 | ks = _ks 42 | self.net = nn.Sequential( 43 | spnn.Conv3d(inc, 44 | outc, 45 | kernel_size=ks, 46 | stride=stride, 47 | transpose=True), spnn.BatchNorm(outc), 48 | spnn.ReLU(True)) 49 | 50 | def forward(self, x): 51 | return self.net(x) 52 | 53 | 54 | class ResidualBlock(nn.Module): 55 | def __init__(self, inc, outc, ks=3, stride=1, dilation=1): 56 | super().__init__() 57 | if stride == 1 and ks == 3: 58 | ks = _ks 59 | self.net = nn.Sequential( 60 | spnn.Conv3d(inc, 61 | outc, 62 | kernel_size=ks, 63 | dilation=dilation, 64 | stride=stride), spnn.BatchNorm(outc), 65 | spnn.ReLU(True), 66 | spnn.Conv3d(outc, 67 | outc, 68 | kernel_size=ks, 69 | dilation=dilation, 70 | stride=1), spnn.BatchNorm(outc)) 71 | 72 | self.downsample = nn.Sequential() if (inc == outc and stride == 1) else \ 73 | nn.Sequential( 74 | spnn.Conv3d(inc, outc, kernel_size=1, dilation=1, stride=stride), 75 | spnn.BatchNorm(outc) 76 | ) 77 | 78 | self.relu = spnn.ReLU(True) 79 | 80 | def forward(self, x): 81 | out = self.relu(self.net(x) + self.downsample(x)) 82 | return out 83 | 84 | 85 | class MinkUNet(nn.Module): 86 | def __init__(self, **kwargs): 87 | super().__init__() 88 | 89 | cr = kwargs.get('cr', 1.0) 90 | cs = [32, 32, 64, 128, 256, 256, 128, 96, 96] 91 | cs = [int(cr * x) for x in cs] 92 | self.run_up = kwargs.get('run_up', True) 93 | 94 | input_dim = kwargs.get("input_dim", 3) 95 | 96 | self.stem = nn.Sequential( 97 | # spnn.Conv3d(3, cs[0], kernel_size=3, stride=1), 98 | spnn.Conv3d(input_dim, cs[0], kernel_size=_ks, stride=1), 99 | spnn.BatchNorm(cs[0]), spnn.ReLU(True), 100 | # spnn.Conv3d(cs[0], cs[0], kernel_size=3, stride=1), 101 | spnn.Conv3d(cs[0], cs[0], kernel_size=_ks, stride=1), 102 | spnn.BatchNorm(cs[0]), spnn.ReLU(True)) 103 | 104 | self.stage1 = nn.Sequential( 105 | BasicConvolutionBlock(cs[0], cs[0], ks=2, stride=2, dilation=1), 106 | ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1), 107 | ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1), 108 | ) 109 | 110 | self.stage2 = nn.Sequential( 111 | BasicConvolutionBlock(cs[1], cs[1], ks=2, stride=2, dilation=1), 112 | ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1), 113 | ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1)) 114 | 115 | self.stage3 = nn.Sequential( 116 | BasicConvolutionBlock(cs[2], cs[2], ks=2, stride=2, dilation=1), 117 | ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1), 118 | ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1), 119 | ) 120 | 121 | self.stage4 = nn.Sequential( 122 | BasicConvolutionBlock(cs[3], cs[3], ks=2, stride=2, dilation=1), 123 | ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1), 124 | ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1), 125 | ) 126 | 127 | self.up1 = nn.ModuleList([ 128 | BasicDeconvolutionBlock(cs[4], cs[5], ks=2, stride=2), 129 | nn.Sequential( 130 | ResidualBlock(cs[5] + cs[3], cs[5], ks=3, stride=1, 131 | dilation=1), 132 | ResidualBlock(cs[5], cs[5], ks=3, stride=1, dilation=1), 133 | ) 134 | ]) 135 | 136 | self.up2 = nn.ModuleList([ 137 | BasicDeconvolutionBlock(cs[5], cs[6], ks=2, stride=2), 138 | nn.Sequential( 139 | ResidualBlock(cs[6] + cs[2], cs[6], ks=3, stride=1, 140 | dilation=1), 141 | ResidualBlock(cs[6], cs[6], ks=3, stride=1, dilation=1), 142 | ) 143 | ]) 144 | 145 | self.up3 = nn.ModuleList([ 146 | BasicDeconvolutionBlock(cs[6], cs[7], ks=2, stride=2), 147 | nn.Sequential( 148 | ResidualBlock(cs[7] + cs[1], cs[7], ks=3, stride=1, 149 | dilation=1), 150 | ResidualBlock(cs[7], cs[7], ks=3, stride=1, dilation=1), 151 | ) 152 | ]) 153 | 154 | self.up4 = nn.ModuleList([ 155 | BasicDeconvolutionBlock(cs[7], cs[8], ks=2, stride=2), 156 | nn.Sequential( 157 | ResidualBlock(cs[8] + cs[0], cs[8], ks=3, stride=1, 158 | dilation=1), 159 | ResidualBlock(cs[8], cs[8], ks=3, stride=1, dilation=1), 160 | ) 161 | ]) 162 | 163 | self.classifier = nn.Sequential(nn.Linear(cs[8], 164 | kwargs['num_classes'])) 165 | 166 | # self.point_transforms = nn.ModuleList([ 167 | # nn.Sequential( 168 | # nn.Linear(cs[0], cs[4]), 169 | # nn.BatchNorm1d(cs[4]), 170 | # nn.ReLU(True), 171 | # ), 172 | # nn.Sequential( 173 | # nn.Linear(cs[4], cs[6]), 174 | # nn.BatchNorm1d(cs[6]), 175 | # nn.ReLU(True), 176 | # ), 177 | # nn.Sequential( 178 | # nn.Linear(cs[6], cs[8]), 179 | # nn.BatchNorm1d(cs[8]), 180 | # nn.ReLU(True), 181 | # ) 182 | # ]) 183 | 184 | self.weight_initialization() 185 | self.dropout = nn.Dropout(0.3, True) 186 | 187 | def weight_initialization(self): 188 | for m in self.modules(): 189 | if isinstance(m, nn.BatchNorm1d): 190 | nn.init.constant_(m.weight, 1) 191 | nn.init.constant_(m.bias, 0) 192 | 193 | def forward(self, x): 194 | x0 = self.stem(x) 195 | x1 = self.stage1(x0) 196 | x2 = self.stage2(x1) 197 | x3 = self.stage3(x2) 198 | x4 = self.stage4(x3) 199 | 200 | y1 = self.up1[0](x4) 201 | y1 = torchsparse.cat([y1, x3]) 202 | y1 = self.up1[1](y1) 203 | 204 | y2 = self.up2[0](y1) 205 | y2 = torchsparse.cat([y2, x2]) 206 | y2 = self.up2[1](y2) 207 | 208 | y3 = self.up3[0](y2) 209 | y3 = torchsparse.cat([y3, x1]) 210 | y3 = self.up3[1](y3) 211 | 212 | y4 = self.up4[0](y3) 213 | y4 = torchsparse.cat([y4, x0]) 214 | y4 = self.up4[1](y4) 215 | 216 | out = self.classifier(y4.F) 217 | 218 | return out -------------------------------------------------------------------------------- /lidar_det/model/nets/minkunet_pillar.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | import torchsparse 5 | import torchsparse.nn as spnn 6 | import torchsparse.nn.functional as spf 7 | 8 | __all__ = ["MinkPillarUNet"] 9 | 10 | 11 | class BasicConvolutionBlock(nn.Module): 12 | def __init__(self, inc, outc, ks=3, stride=1, dilation=1): 13 | super().__init__() 14 | self.net = nn.Sequential( 15 | spnn.Conv3d(inc, outc, kernel_size=ks, dilation=dilation, stride=stride), 16 | spnn.BatchNorm(outc), 17 | spnn.ReLU(True), 18 | ) 19 | 20 | def forward(self, x): 21 | out = self.net(x) 22 | return out 23 | 24 | 25 | class PillarBlock(nn.Module): 26 | def __init__(self, channel, pdim=2): 27 | super().__init__() 28 | self.pdim = pdim 29 | l = [] 30 | for i in range(2): 31 | l.append( 32 | nn.Sequential( 33 | nn.Linear(channel, channel), nn.BatchNorm1d(channel), nn.ReLU(True), 34 | ), 35 | ) 36 | self.mlp = nn.Sequential(*l) 37 | 38 | def forward(self, x): 39 | # average pooling over all voxels within a pillar 40 | # https://github.com/mit-han-lab/torchsparse/blob/master/torchsparse/nn/modules/conv.py#L89 # noqa 41 | coords, feats = x.C.clone(), x.F 42 | coords[:, self.pdim] = 0 43 | coords -= coords.min(dim=0, keepdim=True)[0] 44 | feats = torch.cat([torch.ones_like(feats[:, :1]), feats], axis=1) 45 | sp_tensor = torch.cuda.sparse.FloatTensor(coords.t().long(), feats).coalesce() 46 | coords_coalesced = sp_tensor.indices().t().int() 47 | feats_coalesced = sp_tensor.values()[:, 1:] / sp_tensor.values()[:, :1].detach() 48 | 49 | feats_coalesced = self.mlp(feats_coalesced) 50 | 51 | # add pillar feature back to each voxel 52 | # https://github.com/mit-han-lab/e3d/blob/d58b12877d73f812b3f0b99ee77b72c4aad7e8da/spvnas/core/models/utils.py#L49 # noqa 53 | hash_coalesced = spf.sphash(coords_coalesced) 54 | hash_raw = spf.sphash(coords) 55 | idx = spf.sphashquery(hash_raw, hash_coalesced) 56 | x.F = x.F + feats_coalesced[idx] 57 | 58 | return x 59 | 60 | 61 | class BasicDeconvolutionBlock(nn.Module): 62 | def __init__(self, inc, outc, ks=3, stride=1): 63 | super().__init__() 64 | self.net = nn.Sequential( 65 | spnn.Conv3d(inc, outc, kernel_size=ks, stride=stride, transpose=True), 66 | spnn.BatchNorm(outc), 67 | spnn.ReLU(True), 68 | ) 69 | 70 | def forward(self, x): 71 | return self.net(x) 72 | 73 | 74 | class ResidualBlock(nn.Module): 75 | def __init__(self, inc, outc, ks=3, stride=1, dilation=1): 76 | super().__init__() 77 | self.net = nn.Sequential( 78 | spnn.Conv3d(inc, outc, kernel_size=ks, dilation=dilation, stride=stride), 79 | spnn.BatchNorm(outc), 80 | spnn.ReLU(True), 81 | spnn.Conv3d(outc, outc, kernel_size=ks, dilation=dilation, stride=1), 82 | spnn.BatchNorm(outc), 83 | ) 84 | 85 | self.downsample = ( 86 | nn.Sequential() 87 | if (inc == outc and stride == 1) 88 | else nn.Sequential( 89 | spnn.Conv3d(inc, outc, kernel_size=1, dilation=1, stride=stride), 90 | spnn.BatchNorm(outc), 91 | ) 92 | ) 93 | 94 | self.relu = spnn.ReLU(True) 95 | 96 | def forward(self, x): 97 | out = self.relu(self.net(x) + self.downsample(x)) 98 | return out 99 | 100 | 101 | class MinkPillarUNet(nn.Module): 102 | def __init__(self, **kwargs): 103 | super().__init__() 104 | 105 | cr = kwargs.get("cr", 1.0) 106 | cs = [32, 32, 64, 128, 256, 256, 128, 96, 96] 107 | cs = [int(cr * x) for x in cs] 108 | self.run_up = kwargs.get("run_up", True) 109 | 110 | input_dim = kwargs.get("input_dim", 3) 111 | 112 | self.stem = nn.Sequential( 113 | spnn.Conv3d(input_dim, cs[0], kernel_size=3, stride=1), 114 | spnn.BatchNorm(cs[0]), 115 | spnn.ReLU(True), 116 | # PillarBlock(cs[0]), 117 | spnn.Conv3d(cs[0], cs[0], kernel_size=3, stride=1), 118 | spnn.BatchNorm(cs[0]), 119 | spnn.ReLU(True), 120 | ) 121 | 122 | self.stage1 = nn.Sequential( 123 | BasicConvolutionBlock(cs[0], cs[0], ks=2, stride=2, dilation=1), 124 | PillarBlock(cs[0]), 125 | ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1), 126 | # PillarBlock(cs[1]), 127 | ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1), 128 | ) 129 | 130 | self.stage2 = nn.Sequential( 131 | BasicConvolutionBlock(cs[1], cs[1], ks=2, stride=2, dilation=1), 132 | PillarBlock(cs[1]), 133 | ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1), 134 | # PillarBlock(cs[2]), 135 | ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1), 136 | ) 137 | 138 | self.stage3 = nn.Sequential( 139 | BasicConvolutionBlock(cs[2], cs[2], ks=2, stride=2, dilation=1), 140 | PillarBlock(cs[2]), 141 | ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1), 142 | # PillarBlock(cs[3]), 143 | ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1), 144 | ) 145 | 146 | self.stage4 = nn.Sequential( 147 | BasicConvolutionBlock(cs[3], cs[3], ks=2, stride=2, dilation=1), 148 | PillarBlock(cs[3]), 149 | ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1), 150 | # PillarBlock(cs[4]), 151 | ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1), 152 | ) 153 | 154 | self.up1 = nn.ModuleList( 155 | [ 156 | BasicDeconvolutionBlock(cs[4], cs[5], ks=2, stride=2), 157 | nn.Sequential( 158 | ResidualBlock(cs[5] + cs[3], cs[5], ks=3, stride=1, dilation=1), 159 | ResidualBlock(cs[5], cs[5], ks=3, stride=1, dilation=1), 160 | ), 161 | ] 162 | ) 163 | 164 | self.up2 = nn.ModuleList( 165 | [ 166 | BasicDeconvolutionBlock(cs[5], cs[6], ks=2, stride=2), 167 | nn.Sequential( 168 | ResidualBlock(cs[6] + cs[2], cs[6], ks=3, stride=1, dilation=1), 169 | ResidualBlock(cs[6], cs[6], ks=3, stride=1, dilation=1), 170 | ), 171 | ] 172 | ) 173 | 174 | self.up3 = nn.ModuleList( 175 | [ 176 | BasicDeconvolutionBlock(cs[6], cs[7], ks=2, stride=2), 177 | nn.Sequential( 178 | ResidualBlock(cs[7] + cs[1], cs[7], ks=3, stride=1, dilation=1), 179 | ResidualBlock(cs[7], cs[7], ks=3, stride=1, dilation=1), 180 | ), 181 | ] 182 | ) 183 | 184 | self.up4 = nn.ModuleList( 185 | [ 186 | BasicDeconvolutionBlock(cs[7], cs[8], ks=2, stride=2), 187 | nn.Sequential( 188 | ResidualBlock(cs[8] + cs[0], cs[8], ks=3, stride=1, dilation=1), 189 | ResidualBlock(cs[8], cs[8], ks=3, stride=1, dilation=1), 190 | ), 191 | ] 192 | ) 193 | 194 | self.classifier = nn.Sequential(nn.Linear(cs[8], kwargs["num_classes"])) 195 | 196 | self.weight_initialization() 197 | self.dropout = nn.Dropout(0.3, True) 198 | 199 | def weight_initialization(self): 200 | for m in self.modules(): 201 | if isinstance(m, nn.BatchNorm1d): 202 | nn.init.constant_(m.weight, 1) 203 | nn.init.constant_(m.bias, 0) 204 | 205 | def forward(self, x): 206 | x0 = self.stem(x) 207 | x1 = self.stage1(x0) 208 | x2 = self.stage2(x1) 209 | x3 = self.stage3(x2) 210 | x4 = self.stage4(x3) 211 | 212 | y1 = self.up1[0](x4) 213 | y1 = torchsparse.cat([y1, x3]) 214 | y1 = self.up1[1](y1) 215 | 216 | y2 = self.up2[0](y1) 217 | y2 = torchsparse.cat([y2, x2]) 218 | y2 = self.up2[1](y2) 219 | 220 | y3 = self.up3[0](y2) 221 | y3 = torchsparse.cat([y3, x1]) 222 | y3 = self.up3[1](y3) 223 | 224 | y4 = self.up4[0](y3) 225 | y4 = torchsparse.cat([y4, x0]) 226 | y4 = self.up4[1](y4) 227 | 228 | out = self.classifier(y4.F) 229 | 230 | return out 231 | -------------------------------------------------------------------------------- /lidar_det/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline import * 2 | from .logger import * 3 | from .optim import * 4 | from .trainer import * -------------------------------------------------------------------------------- /lidar_det/pipeline/bin/clean_log_dir.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | 5 | 6 | def _parse_args(): 7 | parser = argparse.ArgumentParser(description="arg parser") 8 | parser.add_argument( 9 | "--dir", type=str, required=True, help="directory to be cleaned" 10 | ) 11 | parser.add_argument( 12 | "--date", type=int, required=True, help="logs before this date will be removed" 13 | ) 14 | args = parser.parse_args() 15 | 16 | assert args.date >= 20200101 and args.date <= 20201231 17 | 18 | return args 19 | 20 | 21 | def _user_confirms(prompt): 22 | yes = {"yes", "y"} 23 | no = {"no", "n"} 24 | 25 | choice = input(prompt).lower() 26 | if choice in yes: 27 | return True 28 | elif choice in no: 29 | return False 30 | else: 31 | print(f"Invalid input: {choice}") 32 | return False 33 | 34 | 35 | def _log_dates_earlier_than(log_dir, reference_date): 36 | log_date = int(os.path.basename(log_dir).split("_")[0]) 37 | return log_date < reference_date 38 | 39 | 40 | def clean_log_dir(log_dir, latest_date): 41 | logs_list = sorted( 42 | [ 43 | x 44 | for x in os.listdir(log_dir) 45 | if os.path.isdir(os.path.join(log_dir, x)) 46 | and _log_dates_earlier_than(x, latest_date) 47 | ] 48 | ) 49 | 50 | print("Following logs will be removed:") 51 | for x in logs_list: 52 | print(os.path.join(log_dir, x)) 53 | 54 | if not _user_confirms("Continue [y/n]?"): 55 | return 56 | 57 | for x in logs_list: 58 | x = os.path.join(log_dir, x) 59 | print(f"Remove {x}") 60 | shutil.rmtree(x) 61 | 62 | 63 | if __name__ == "__main__": 64 | args = _parse_args() 65 | 66 | clean_log_dir(args.dir, args.date) 67 | -------------------------------------------------------------------------------- /lidar_det/pipeline/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | # import pickle 5 | from shutil import copyfile 6 | import time 7 | 8 | # import json 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from tensorboardX import SummaryWriter 12 | import torch 13 | 14 | 15 | def _create_logger(root_dir, file_name="log.txt"): 16 | log_file = os.path.join(root_dir, file_name) 17 | log_format = "%(asctime)s %(levelname)5s %(message)s" 18 | logging.basicConfig(level=logging.DEBUG, format=log_format, filename=log_file) 19 | console = logging.StreamHandler() 20 | console.setLevel(logging.DEBUG) 21 | console.setFormatter(logging.Formatter(log_format)) 22 | logging.getLogger(__name__).addHandler(console) 23 | return logging.getLogger(__name__) 24 | 25 | 26 | class Logger: 27 | def __init__(self, cfg): 28 | cfg["log_dir"] = os.path.abspath(os.path.expanduser(cfg["log_dir"])) 29 | 30 | # main log 31 | if "use_timestamp" in cfg.keys() and cfg["use_timestamp"] is False: 32 | dir_name = cfg["tag"] 33 | else: 34 | timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime()) 35 | dir_name = f"{cfg['tag']}_{timestamp}" 36 | self.__log_dir = os.path.join(cfg["log_dir"], dir_name) 37 | os.makedirs(self.__log_dir, exist_ok=True) 38 | 39 | self.__log = _create_logger(self.__log_dir, cfg["log_fname"]) 40 | self.log_debug(f"Log directory: {self.__log_dir}") 41 | 42 | # backup important files (e.g. config.yaml) 43 | self.__backup_dir = os.path.join(self.__log_dir, "backup") 44 | os.makedirs(self.__backup_dir, exist_ok=True) 45 | for file_ in cfg["backup_list"]: 46 | self.log_debug(f"Backup {file_}") 47 | copyfile( 48 | os.path.abspath(file_), 49 | os.path.join(self.__backup_dir, os.path.basename(file_)), 50 | ) 51 | 52 | # for storing results (network output etc.) 53 | self.__output_dir = os.path.join(self.__log_dir, "output") 54 | os.makedirs(self.__output_dir, exist_ok=True) 55 | 56 | # for storing ckpt 57 | self.__ckpt_dir = os.path.join(self.__log_dir, "ckpt") 58 | os.makedirs(self.__ckpt_dir, exist_ok=True) 59 | 60 | # for tensorboard 61 | tb_dir = os.path.join(self.__log_dir, "tb") 62 | os.makedirs(tb_dir, exist_ok=True) 63 | self.__tb = SummaryWriter(log_dir=tb_dir) 64 | 65 | # the sigterm checkpoint 66 | self.__sigterm_ckpt = os.path.join( 67 | cfg["log_dir"], f"sigterm_ckpt_{cfg['tag']}.pth" 68 | ) 69 | 70 | gpu = ( 71 | os.environ["CUDA_VISIBLE_DEVICES"] 72 | if "CUDA_VISIBLE_DEVICES" in os.environ.keys() 73 | else "ALL" 74 | ) 75 | self.log_info(f"CUDA_VISIBLE_DEVICES={gpu}") 76 | 77 | def flush(self): 78 | self.__tb.flush() 79 | 80 | def close(self): 81 | self.__tb.close() 82 | handlers = self.__log.handlers[:] 83 | for handler in handlers: 84 | handler.close() 85 | self.__log.removeHandler(handler) 86 | 87 | """ 88 | Python log 89 | """ 90 | def log_error(self, s): 91 | self.__log.error(s) 92 | 93 | def log_warning(self, s): 94 | self.__log.warning(s) 95 | 96 | def log_info(self, s): 97 | self.__log.info(s) 98 | 99 | def log_debug(self, s): 100 | self.__log.debug(s) 101 | 102 | """ 103 | Add to tensorboard 104 | """ 105 | 106 | def add_scalar(self, key, val, step): 107 | self.__tb.add_scalar(key, val, step) 108 | 109 | def add_fig(self, key, fig, step, close_fig=False): 110 | """Convert a python fig to np.ndarry and add it to tensorboard""" 111 | fig.canvas.draw() 112 | im = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="") 113 | im = im.reshape(fig.canvas.get_width_height()[::-1] + (3,)) 114 | im = im.transpose(2, 0, 1) # (3, H, W) 115 | im = im.astype(np.float32) / 255.0 116 | self.add_im(key, im, step) 117 | 118 | if close_fig: 119 | plt.close(fig) 120 | 121 | def add_im(self, key, im, step): 122 | """Add an image to tensorboard. The image should be as np.ndarray 123 | https://tensorboardx.readthedocs.io/en/latest/tutorial.html#add-image 124 | """ 125 | self.__tb.add_image(key, im, step) 126 | 127 | """ 128 | Save to system 129 | """ 130 | 131 | def get_save_dir(self, epoch, split): 132 | return os.path.join(self.__output_dir, split, f"e{epoch:06d}") 133 | 134 | # def save_dict(self, dict_, file_name, epoch, split): 135 | # """Save the dictionary to a pickle file. Single value items in the dictionary 136 | # are stored in addition as a json file for easy inspection. 137 | # """ 138 | # json_dict = {} 139 | # for key, val in dict_.items(): 140 | # if not isinstance(val, (np.ndarray, tuple, list, dict)): 141 | # json_dict[key] = str(val) 142 | 143 | # save_dir = self.get_save_dir(epoch, split) 144 | # json_fname = os.path.join(save_dir, f"{file_name}.json") 145 | # os.makedirs(os.path.dirname(json_fname), exist_ok=True) 146 | # with open(json_fname, "w") as fp: 147 | # json.dump(json_dict, fp, sort_keys=True, indent=4) 148 | # self.log_info(f"Dictonary saved to {json_fname}.") 149 | 150 | # pickle_fname = os.path.join(save_dir, f"{file_name}.pkl") 151 | # with open(pickle_fname, "wb") as fp: 152 | # pickle.dump(dict_, fp, protocol=pickle.HIGHEST_PROTOCOL) 153 | # self.log_info(f"Dictonary saved to {pickle_fname}.") 154 | 155 | # def save_fig(self, fig, file_name, epoch, split, close_fig=True): 156 | # fname = os.path.join(self.get_save_dir(epoch, split), f"{file_name}.png") 157 | # os.makedirs(os.path.dirname(fname), exist_ok=True) 158 | # fig.savefig(fname) 159 | # if close_fig: 160 | # plt.close(fig) 161 | 162 | # def save_file(self, file_str, file_name, epoch, split): 163 | # fname = os.path.join(self.get_save_dir(epoch, split), f"{file_name}.txt") 164 | # os.makedirs(os.path.dirname(fname), exist_ok=True) 165 | # with open(fname, "w") as f: 166 | # f.write(file_str) 167 | 168 | """ 169 | Save and load checkpoints 170 | """ 171 | 172 | def save_ckpt(self, fname, model, optimizer, epoch, step): 173 | if not os.path.dirname(fname): 174 | fname = os.path.join(self.__ckpt_dir, fname) 175 | 176 | if model is not None: 177 | if isinstance(model, torch.nn.DataParallel): 178 | model_state = model.module.state_dict() 179 | else: 180 | model_state = model.state_dict() 181 | else: 182 | model_state = None 183 | optim_state = optimizer.state_dict() if optimizer is not None else None 184 | 185 | ckpt_dict = { 186 | "epoch": epoch, 187 | "step": step, 188 | "model_state": model_state, 189 | "optimizer_state": optim_state, 190 | } 191 | torch.save(ckpt_dict, fname) 192 | self.log_info(f"Checkpoint saved to {fname}.") 193 | 194 | def load_ckpt(self, fname, model, optimizer=None): 195 | ckpt = torch.load(fname) 196 | epoch = ckpt["epoch"] if "epoch" in ckpt.keys() else 0 197 | step = ckpt["step"] if "step" in ckpt.keys() else 0 198 | 199 | model.load_state_dict(ckpt["model_state"]) 200 | 201 | if optimizer is not None: 202 | optimizer.load_state_dict(ckpt["optimizer_state"]) 203 | 204 | self.log_info(f"Load checkpoint {fname}: epoch {epoch}, step {step}.") 205 | 206 | return epoch, step 207 | 208 | def save_sigterm_ckpt(self, model, optimizer, epoch, step): 209 | """Save a checkpoint, which another process can use to continue the training, 210 | if the current process is terminated or preempted. This checkpoint should 211 | be saved in a process-agnoistic directory such that it can be located by 212 | both processes. 213 | """ 214 | self.save_ckpt(self.__sigterm_ckpt, model, optimizer, epoch, step) 215 | 216 | def load_sigterm_ckpt(self, model, optimizer): 217 | return self.load_ckpt(self.__sigterm_ckpt, model, optimizer) 218 | 219 | def sigterm_ckpt_exists(self): 220 | return os.path.isfile(self.__sigterm_ckpt) 221 | -------------------------------------------------------------------------------- /lidar_det/pipeline/loss_lib.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class PartiallyHuberisedBCELoss(nn.Module): 9 | """partially Huberised softmax cross-entrop 10 | https://openreview.net/pdf?id=rklB76EKPr 11 | """ 12 | 13 | def __init__(self, tau=5.0): 14 | super(PartiallyHuberisedBCELoss, self).__init__() 15 | self._tau = tau 16 | self._log_tau = math.log(self._tau) 17 | self._inv_tau = 1.0 / self._tau 18 | 19 | def forward(self, pred, target, reduction="mean"): 20 | pred_logits = torch.sigmoid(pred) 21 | neg_pred_logits = 1.0 - pred_logits 22 | 23 | loss_pos = -self._tau * pred_logits + self._log_tau + 1.0 24 | pos_mask = pred_logits > self._inv_tau 25 | loss_pos[pos_mask] = -torch.log(pred_logits[pos_mask]) 26 | 27 | loss_neg = -self._tau * neg_pred_logits + self._log_tau + 1.0 28 | neg_mask = neg_pred_logits > self._inv_tau 29 | loss_neg[neg_mask] = -torch.log(neg_pred_logits[neg_mask]) 30 | 31 | loss = target * loss_pos + (1.0 - target) * loss_neg 32 | 33 | if reduction == "mean": 34 | return loss.mean() 35 | elif reduction == "sum": 36 | return loss.sum() 37 | elif reduction == "none": 38 | return loss 39 | else: 40 | raise RuntimeError 41 | 42 | 43 | class SelfPacedLearningLoss(nn.Module): 44 | """Self-paced learning loss 45 | https://arxiv.org/abs/1712.05055 46 | https://papers.nips.cc/paper/3923-self-paced-learning-for-latent-variable-models 47 | https://arxiv.org/abs/1511.06049 48 | """ 49 | 50 | def __init__(self, base_loss, lam1=0.4, lam2=0.5, alpha=1e-2): 51 | super(SelfPacedLearningLoss, self).__init__() 52 | self._base_loss = base_loss 53 | self._lam1 = lam1 54 | self._lam2 = lam2 55 | 56 | self._l1 = None 57 | self._l2 = None 58 | self._alpha = alpha 59 | 60 | self._lam1_max = 0.60 61 | self._lam2_max = 0.72 62 | 63 | self._step = -1 64 | self._burn_in = False 65 | self._burn_in_step = int(2629 * 0.5) 66 | self._update_step = int(2629) 67 | self._update_rate = 1.02 68 | 69 | def forward(self, pred, target, reduction="mean"): 70 | self._update() 71 | 72 | if self._burn_in: 73 | return self._base_loss(pred, target, reduction=reduction) 74 | 75 | # raw loss 76 | base_loss = self._base_loss(pred, target, reduction="none") 77 | 78 | # exponential moving average of loss percentile 79 | with torch.no_grad(): 80 | l1_now = self._percentile(base_loss, self._lam1) 81 | self._l1 = ( 82 | self._alpha * l1_now + (1.0 - self._alpha) * self._l1 83 | if self._l1 is not None 84 | else l1_now 85 | ) 86 | 87 | l2_now = self._percentile(base_loss, self._lam2) 88 | self._l2 = ( 89 | self._alpha * l2_now + (1.0 - self._alpha) * self._l2 90 | if self._l2 is not None 91 | else l2_now 92 | ) 93 | 94 | # compute v 95 | v = (1.0 - (base_loss - self._l1) / (self._l2 - self._l1)).clamp( 96 | min=0.0, max=1.0 97 | ) 98 | 99 | # weighted loss 100 | loss = base_loss * v 101 | 102 | # NOTE reweight the loss, it may be better to use a fixed weighting factor 103 | # loss = loss / (v.sum() / v.numel()) 104 | loss = loss / (loss.sum() / base_loss.sum()) 105 | 106 | if reduction == "mean": 107 | return loss.mean() 108 | elif reduction == "sum": 109 | return loss.sum() 110 | elif reduction == "none": 111 | return loss 112 | else: 113 | raise RuntimeError 114 | 115 | def _update(self): 116 | self._step += 1 117 | 118 | if self._burn_in: 119 | if self._step >= self._burn_in_step: 120 | self._burn_in = False 121 | self._step = 0 122 | else: 123 | if self._step >= self._update_step: 124 | self._lam1 = min(self._lam1_max, self._lam1 * self._update_rate) 125 | self._lam2 = min(self._lam2_max, self._lam2 * self._update_rate) 126 | self._step = 0 127 | 128 | def _percentile(self, t, q): 129 | """ 130 | From https://gist.github.com/spezold/42a451682422beb42bc43ad0c0967a30 131 | """ 132 | # Note that ``kthvalue()`` works one-based, i.e. the first sorted value 133 | # indeed corresponds to k=1, not k=0! Use float(q) instead of q directly, 134 | # so that ``round()`` returns an integer, even if q is a np.float32. 135 | k = 1 + round(float(q) * (t.numel() - 1)) 136 | result = t.kthvalue(k).values.item() 137 | return result 138 | 139 | 140 | class SymmetricBCELoss(nn.Module): 141 | """Symmetric Cross Entropy loss https://arxiv.org/pdf/1908.06112.pdf 142 | for binary classification 143 | """ 144 | 145 | def __init__(self, alpha=0.1, beta=0.5, A=-6): 146 | assert A < 0.0 147 | super(SymmetricBCELoss, self).__init__() 148 | self._alpha = alpha 149 | self._beta = beta 150 | self._A = A 151 | 152 | def forward(self, pred, target, reduction="mean"): 153 | bce_loss = F.binary_cross_entropy_with_logits(pred, target, reduction="none") 154 | 155 | log_target_pos = torch.log(target + 1e-10).clamp_min(self._A) 156 | log_target_neg = torch.log(1.0 - target + 1e-10).clamp_min(self._A) 157 | pred_logits = torch.sigmoid(pred) 158 | 159 | rbce_loss = -log_target_pos * pred_logits - log_target_neg * (1.0 - pred_logits) 160 | 161 | loss = self._alpha * bce_loss + self._beta * rbce_loss 162 | 163 | if reduction == "mean": 164 | return loss.mean() 165 | elif reduction == "sum": 166 | return loss.sum() 167 | elif reduction == "none": 168 | return loss 169 | else: 170 | raise RuntimeError 171 | 172 | 173 | class FocalLoss(nn.Module): 174 | """From https://github.com/mbsariyildiz/focal-loss.pytorch/blob/master/focalloss.py 175 | """ 176 | 177 | def __init__(self, gamma=0, alpha=None): 178 | super(FocalLoss, self).__init__() 179 | self.gamma = gamma 180 | self.alpha = alpha 181 | if isinstance(alpha, (float, int)): 182 | self.alpha = torch.Tensor([alpha, 1 - alpha]) 183 | if isinstance(alpha, list): 184 | self.alpha = torch.Tensor(alpha) 185 | 186 | def forward(self, input, target, reduction="mean"): 187 | if input.dim() > 2: 188 | input = input.view(input.size(0), input.size(1), -1) # N,C,H,W => N,C,H*W 189 | input = input.transpose(1, 2) # N,C,H*W => N,H*W,C 190 | input = input.contiguous().view(-1, input.size(2)) # N,H*W,C => N*H*W,C 191 | target = target.view(-1, 1) 192 | 193 | logpt = F.log_softmax(input, dim=1) 194 | logpt = logpt.gather(1, target) 195 | logpt = logpt.view(-1) 196 | pt = logpt.exp() 197 | 198 | if self.alpha is not None: 199 | if self.alpha.type() != input.data.type(): 200 | self.alpha = self.alpha.type_as(input.data) 201 | at = self.alpha.gather(0, target.data.view(-1)) 202 | logpt = logpt * at 203 | 204 | loss = -1 * (1 - pt) ** self.gamma * logpt 205 | 206 | if reduction == "mean": 207 | return loss.mean() 208 | elif reduction == "sum": 209 | return loss.sum() 210 | elif reduction == "none": 211 | return loss 212 | else: 213 | raise RuntimeError 214 | 215 | 216 | class BinaryFocalLoss(nn.Module): 217 | def __init__(self, gamma=2.0, alpha=-1): 218 | super(BinaryFocalLoss, self).__init__() 219 | self.gamma, self.alpha = gamma, alpha 220 | 221 | def forward(self, pred, target, reduction="mean"): 222 | return binary_focal_loss(pred, target, self.gamma, self.alpha, reduction) 223 | 224 | 225 | def binary_focal_loss(pred, target, gamma=2.0, alpha=-1, reduction="mean"): 226 | p = torch.sigmoid(pred) 227 | loss_pos = -target * (1.0 - p) ** gamma * torch.log(p + 1e-9) 228 | loss_neg = -(1.0 - target) * p ** gamma * torch.log(1.0 - p + 1e-9) 229 | 230 | if alpha >= 0.0 and alpha <= 1.0: 231 | loss_pos = loss_pos * alpha 232 | loss_neg = loss_neg * (1.0 - alpha) 233 | 234 | loss = loss_pos + loss_neg 235 | 236 | if reduction == "mean": 237 | return loss.mean() 238 | elif reduction == "sum": 239 | return loss.sum() 240 | elif reduction == "none": 241 | return loss 242 | else: 243 | raise RuntimeError 244 | -------------------------------------------------------------------------------- /lidar_det/pipeline/optim.py: -------------------------------------------------------------------------------- 1 | from torch import optim 2 | 3 | 4 | class Optim: 5 | def __init__(self, model, cfg): 6 | self._optim = optim.Adam(model.parameters(), amsgrad=True) 7 | self._lr_scheduler = _ExpDecayScheduler(**cfg["scheduler_kwargs"]) 8 | 9 | def zero_grad(self): 10 | self._optim.zero_grad() 11 | 12 | def step(self): 13 | self._optim.step() 14 | 15 | def state_dict(self): 16 | return self._optim.state_dict() 17 | 18 | def load_state_dict(self, state_dict): 19 | self._optim.load_state_dict(state_dict) 20 | 21 | def set_lr(self, epoch): 22 | for group in self._optim.param_groups: 23 | group["lr"] = self._lr_scheduler(epoch) 24 | 25 | def get_lr(self): 26 | return self._optim.param_groups[0]["lr"] 27 | 28 | 29 | class _ExpDecayScheduler: 30 | """ 31 | Return `v0` until `e` reaches `e0`, then exponentially decay 32 | to `v1` when `e` reaches `e1` and return `v1` thereafter, until 33 | reaching `eNone`, after which it returns `None`. 34 | """ 35 | 36 | def __init__(self, epoch0, lr0, epoch1, lr1): 37 | self._epoch0 = epoch0 38 | self._epoch1 = epoch1 39 | self._lr0 = lr0 40 | self._lr1 = lr1 41 | 42 | def __call__(self, epoch): 43 | if epoch < self._epoch0: 44 | return self._lr0 45 | elif epoch > self._epoch1: 46 | return self._lr1 47 | else: 48 | return self._lr0 * (self._lr1 / self._lr0) ** ( 49 | (epoch - self._epoch0) / (self._epoch1 - self._epoch0) 50 | ) 51 | -------------------------------------------------------------------------------- /lidar_det/pipeline/pipeline.py: -------------------------------------------------------------------------------- 1 | from .optim import Optim 2 | from .trainer import Trainer 3 | from .logger import Logger 4 | 5 | 6 | class Pipeline: 7 | def __init__(self, model, cfg): 8 | self.logger = Logger(cfg["Logger"]) 9 | self.optim = Optim(model, cfg["Optim"]) 10 | self.trainer = Trainer(self.logger, self.optim, cfg["Trainer"]) 11 | self.logger.log_debug("Pipeline starts.") 12 | 13 | def close(self): 14 | self.logger.log_debug("Pipeline closes.") 15 | self.logger.close() 16 | 17 | def train(self, model, train_loader, eval_loader=None): 18 | self.logger.log_debug("Training starts.") 19 | status = self.trainer.train(model, train_loader, eval_loader) 20 | self.logger.log_debug(f"Training ends (status {status}).") 21 | return status 22 | 23 | def evaluate(self, model, eval_loader, tb_prefix, rm_files=True): 24 | self.logger.log_debug("Evaluation starts.") 25 | status = self.trainer.evaluate( 26 | model, 27 | eval_loader, 28 | tb_prefix, 29 | full_eval=True, 30 | plotting=False, 31 | rm_files=rm_files, 32 | ) 33 | self.logger.log_debug(f"Evaluation ends (status {status}).") 34 | return status 35 | 36 | def load_ckpt(self, model, ckpt, use_ckpt_epoch=False): 37 | epoch, step = self.logger.load_ckpt(ckpt, model, self.optim) 38 | # When finetuning a pre-trained checkpoint, we don't care the previous 39 | # training schedule, so not setting epoch and step 40 | if use_ckpt_epoch: 41 | self.trainer.set_epoch_step(epoch, step) 42 | 43 | def load_sigterm_ckpt(self, model): 44 | epoch, step = self.logger.load_sigterm_ckpt(model, self.optim) 45 | self.trainer.set_epoch_step(epoch, step) 46 | 47 | def sigterm_ckpt_exists(self): 48 | return self.logger.sigterm_ckpt_exists() 49 | -------------------------------------------------------------------------------- /lidar_det/pipeline/trainer.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import signal 3 | import tqdm 4 | 5 | import torch 6 | from torch.nn.utils import clip_grad_norm_ 7 | 8 | 9 | def _tqdm_str(t_dict): 10 | n = t_dict["total"] 11 | t = t_dict["elapsed"] 12 | r = t / n 13 | return "{}, elapsed={}, rate={:.2f}s/it, total_it={}".format( 14 | t_dict["prefix"], str(datetime.timedelta(seconds=int(t))), r, n, 15 | ) 16 | 17 | 18 | class Trainer(object): 19 | def __init__(self, logger, optimizer, cfg): 20 | self._logger = logger 21 | self._optim = optimizer 22 | self._epoch, self._step = 0, 0 23 | 24 | self._grad_norm_clip = cfg["grad_norm_clip"] 25 | self._ckpt_interval = cfg["ckpt_interval"] 26 | self._eval_interval = cfg["eval_interval"] 27 | self._max_epoch = cfg["epoch"] 28 | 29 | self.__sigterm = False 30 | signal.signal(signal.SIGINT, self._sigterm_cb) 31 | signal.signal(signal.SIGTERM, self._sigterm_cb) 32 | 33 | def set_epoch_step(self, epoch=None, step=None): 34 | if epoch is not None: 35 | self._epoch = epoch 36 | if step is not None: 37 | self._step = step 38 | 39 | def evaluate( 40 | self, 41 | model, 42 | eval_loader, 43 | tb_prefix, 44 | full_eval=False, 45 | plotting=False, 46 | rm_files=False, 47 | ): 48 | model.eval() 49 | tb_dict_list = [] 50 | eval_dict_list = [] 51 | split = eval_loader.dataset.split 52 | pbar = tqdm.tqdm( 53 | total=len(eval_loader), 54 | leave=False, 55 | desc=f"eval ({split}) ({self._epoch}/{self._max_epoch})", 56 | ) 57 | 58 | # prevent out-of-memory during evaluation 59 | torch.cuda.empty_cache() 60 | 61 | for b_idx, batch in enumerate(eval_loader): 62 | if self.__sigterm: 63 | pbar.close() 64 | self._logger.log_info(_tqdm_str(pbar.format_dict)) 65 | return 1 66 | 67 | with torch.no_grad(): 68 | tb_dict, eval_dict = self._evaluate_batch( 69 | model, 70 | batch, 71 | full_eval, 72 | plotting, 73 | self._logger.get_save_dir(self._epoch, split), 74 | raise_oom=False, 75 | ) 76 | 77 | # collate tb_dict for epoch evaluation 78 | tb_dict_list.append(tb_dict) 79 | eval_dict_list.append(eval_dict) 80 | 81 | # # save file 82 | # for k, v in file_dict.items(): 83 | # self._logger.save_file(v, k, self._epoch, split) 84 | 85 | # # save figure 86 | # for k, (fig, ax) in fig_dict.items(): 87 | # # self._logger.add_fig(k, fig, self._step) 88 | # self._logger.save_fig(fig, k, self._epoch, split, close_fig=True) 89 | 90 | pbar.update() 91 | 92 | pbar.close() 93 | self._logger.log_info(_tqdm_str(pbar.format_dict)) 94 | 95 | # prevent out-of-memory during evaluation 96 | torch.cuda.empty_cache() 97 | 98 | tb_dict = model.model_eval_collate_fn( 99 | tb_dict_list, 100 | eval_dict_list, 101 | self._logger.get_save_dir(self._epoch, split), 102 | full_eval=full_eval, 103 | rm_files=rm_files, 104 | ) 105 | 106 | for k, v in tb_dict.items(): 107 | self._logger.add_scalar(f"{tb_prefix}_{k}", v, self._step) 108 | 109 | # for k, v in epoch_dict.items(): 110 | # self._logger.save_dict(v, k, self._epoch, split) 111 | 112 | return 0 113 | 114 | def train(self, model, train_loader, eval_loader=None): 115 | # for self._epoch in tqdm.trange(0, self._max_epoch, desc="epochs"): 116 | for self._epoch in range(self._max_epoch): 117 | if self.__sigterm: 118 | self._logger.save_sigterm_ckpt( 119 | model, self._optim, self._epoch, self._step, 120 | ) 121 | return 1 122 | 123 | self._train_epoch(model, train_loader) 124 | 125 | if not self.__sigterm: 126 | do_ckpt = self._is_ckpt_epoch() 127 | do_eval = eval_loader is not None and ( 128 | self._is_evaluation_epoch() or do_ckpt 129 | ) 130 | 131 | if do_ckpt: 132 | self._logger.save_ckpt( 133 | f"ckpt_e{self._epoch}.pth", 134 | model, 135 | self._optim, 136 | self._epoch, 137 | self._step, 138 | ) 139 | 140 | if do_eval: 141 | self.evaluate( 142 | model, 143 | eval_loader, 144 | tb_prefix="VAL", 145 | full_eval=do_ckpt, 146 | plotting=False, 147 | rm_files=True, 148 | ) 149 | 150 | self._logger.flush() 151 | 152 | return 0 153 | 154 | def _is_ckpt_epoch(self): 155 | return self._epoch % self._ckpt_interval == 0 or self._epoch == self._max_epoch 156 | 157 | def _is_evaluation_epoch(self): 158 | return self._epoch % self._eval_interval == 0 or self._epoch == self._max_epoch 159 | 160 | def _sigterm_cb(self, signum, frame): 161 | self.__sigterm = True 162 | self._logger.log_info(f"Received signal {signum} at frame {frame}.") 163 | 164 | def _train_batch(self, model, batch, ratio, raise_oom, reduce_batch): 165 | """Train one batch. `ratio` in between [0, 1) is the progress of training 166 | current epoch. It is used by the scheduler to update learning rate. 167 | """ 168 | model.train() 169 | self._optim.zero_grad() 170 | self._optim.set_lr(self._epoch + ratio) 171 | 172 | # https://github.com/pytorch/fairseq/blob/50a671f78d0c8de0392f924180db72ac9b41b801/fairseq/trainer.py#L283 # noqa 173 | try: 174 | loss, tb_dict, _ = model.model_fn(model, batch, reduce_batch=reduce_batch) 175 | loss.backward() 176 | except RuntimeError as e: 177 | if "out of memory" in str(e) and not raise_oom: 178 | reduce_batch = True 179 | self._logger.log_warning( 180 | "Trainer: out of memory, retrying batch " 181 | f"(reduce_batch: {reduce_batch})" 182 | ) 183 | torch.cuda.empty_cache() 184 | return self._train_batch( 185 | model, batch, ratio, raise_oom=True, reduce_batch=reduce_batch 186 | ) 187 | else: 188 | self._logger.log_error(model.error_fn(model, batch)) 189 | self._logger.save_ckpt( 190 | f"ckpt_e{self._epoch}_error.pth", 191 | model, 192 | self._optim, 193 | self._epoch, 194 | self._step, 195 | ) 196 | raise e 197 | 198 | if self._grad_norm_clip > 0: 199 | clip_grad_norm_(model.parameters(), self._grad_norm_clip) 200 | 201 | self._optim.step() 202 | 203 | self._logger.add_scalar("TRAIN_lr", self._optim.get_lr(), self._step) 204 | # self._logger.add_scalar("TRAIN_epoch", self._epoch + ratio, self._step) 205 | for key, val in tb_dict.items(): 206 | self._logger.add_scalar(f"TRAIN_{key}", val, self._step) 207 | 208 | return loss.item() 209 | 210 | # # NOTE Dirty fix to use mixup regularization, to be removed 211 | # if model.mixup_alpha <= 0.0: 212 | # return loss.item() 213 | 214 | # original_loss_value = loss.item() 215 | 216 | # self._optim.zero_grad() 217 | # loss, tb_dict, _ = model.model_fn_mixup(model, batch) 218 | # loss.backward() 219 | 220 | # if self._grad_norm_clip > 0: 221 | # clip_grad_norm_(model.parameters(), self._grad_norm_clip) 222 | 223 | # self._optim.step() 224 | 225 | # for key, val in tb_dict.items(): 226 | # self._logger.add_scalar(f"TRAIN_{key}", val, self._step) 227 | 228 | # return original_loss_value 229 | 230 | def _train_epoch(self, model, train_loader): 231 | pbar = tqdm.tqdm( 232 | total=len(train_loader), 233 | leave=False, 234 | desc=f"train ({self._epoch}/{self._max_epoch})", 235 | ) 236 | for ib, batch in enumerate(train_loader): 237 | if self.__sigterm: 238 | pbar.close() 239 | self._logger.log_info(_tqdm_str(pbar.format_dict)) 240 | return 241 | 242 | loss = self._train_batch( 243 | model, 244 | batch, 245 | ratio=(ib / len(train_loader)), 246 | raise_oom=False, 247 | reduce_batch=False, 248 | ) 249 | self._step += 1 250 | pbar.set_postfix({"total_it": self._step, "loss": loss}) 251 | pbar.update() 252 | 253 | self._epoch = self._epoch + 1 254 | pbar.close() 255 | self._logger.log_info(_tqdm_str(pbar.format_dict)) 256 | 257 | def _evaluate_batch(self, model, batch, full_eval, plotting, output_dir, raise_oom): 258 | try: 259 | return model.model_eval_fn( 260 | model, 261 | batch, 262 | full_eval=full_eval, 263 | plotting=plotting, 264 | output_dir=output_dir, 265 | ) 266 | except RuntimeError as e: 267 | if "out of memory" in str(e) and not raise_oom: 268 | self._logger.log_warning("Trainer: out of memory, retrying batch") 269 | torch.cuda.empty_cache() 270 | return self._evaluate_batch( 271 | model, batch, full_eval, plotting, output_dir, raise_oom=True 272 | ) 273 | else: 274 | self._logger.log_error(model.error_fn(model, batch)) 275 | self._logger.save_ckpt( 276 | f"ckpt_e{self._epoch}_error.pth", 277 | model, 278 | self._optim, 279 | self._epoch, 280 | self._step, 281 | ) 282 | raise e 283 | -------------------------------------------------------------------------------- /lidar_det/tests_common.py: -------------------------------------------------------------------------------- 1 | # Should only be used by code under ./tests 2 | 3 | 4 | def get_cfgs(): 5 | model_cfg = { 6 | "type": "MinkUNet", 7 | # "type": "MinkResNet", 8 | "kwargs": { 9 | "cr": 1.0, 10 | "run_up": True, 11 | "num_anchors": 1, 12 | "num_ori_bins": 12, 13 | # "fpn": True 14 | "fpn": False, 15 | }, 16 | "target_mode": 2, # 0-inclusion, 1-anchor, 2-dist, 3-dist and reweighting 17 | "disentangled_loss": False, 18 | } 19 | 20 | dataset_cfg = { 21 | "name": "JRDB", 22 | # "name": "nuScenes", 23 | "augmentation": True, 24 | # "voxel_size": [0.05, 0.05, 0.05], 25 | "voxel_size": [0.05, 0.05, 0.20], 26 | "num_points": 1e6, 27 | "canonical": False, 28 | "dist_min": 0.5, 29 | "dist_max": 1.0, 30 | # "included_classes": ["pedestrian"], 31 | "included_classes": [], 32 | "nsweeps": 1, 33 | "additional_features": [] 34 | } 35 | 36 | dataset_cfg["target_mode"] = model_cfg["target_mode"] 37 | dataset_cfg["num_anchors"] = model_cfg["kwargs"]["num_anchors"] 38 | dataset_cfg["num_ori_bins"] = model_cfg["kwargs"]["num_ori_bins"] 39 | if dataset_cfg["name"] == "nuScenes": 40 | nc = len(dataset_cfg["included_classes"]) 41 | model_cfg["kwargs"]["num_classes"] = nc if nc > 0 else 10 42 | model_cfg["nuscenes"] = True 43 | model_cfg["kwargs"]["input_dim"] = 3 + len(dataset_cfg["additional_features"]) 44 | else: 45 | model_cfg["kwargs"]["num_classes"] = 1 46 | model_cfg["nuscenes"] = False 47 | model_cfg["kwargs"]["input_dim"] = 3 48 | 49 | return model_cfg, dataset_cfg 50 | -------------------------------------------------------------------------------- /lidar_det/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VisualComputingInstitute/Person_MinkUNet/e582bd69ca8fb14268a1414e2534d6c509ac24d2/lidar_det/utils/__init__.py -------------------------------------------------------------------------------- /lidar_det/utils/eval_nuscenes.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from nuscenes import NuScenes 4 | from nuscenes.eval.common.config import config_factory 5 | from nuscenes.eval.detection.evaluate import DetectionEval 6 | 7 | 8 | def eval_nuscenes( 9 | result_json, 10 | out_dir, 11 | data_root, 12 | split, 13 | version, 14 | plot_examples=10, 15 | render_curves=True, 16 | verbose=True, 17 | ): 18 | # Ref https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/eval/detection/evaluate.py # noqa 19 | result_json = os.path.expanduser(result_json) 20 | out_dir = os.path.expanduser(out_dir) 21 | data_root = os.path.expanduser(data_root) 22 | cfg = config_factory("detection_cvpr_2019") 23 | 24 | nusc = NuScenes(version=version, verbose=verbose, dataroot=data_root) 25 | nusc_eval = DetectionEval( 26 | nusc, 27 | config=cfg, 28 | result_path=result_json, 29 | eval_set=split, 30 | output_dir=out_dir, 31 | verbose=verbose, 32 | ) 33 | nusc_eval.main(plot_examples=plot_examples, render_curves=render_curves) 34 | -------------------------------------------------------------------------------- /lidar_det/utils/jrdb_transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | Transformations. Following frames are defined: 5 | 6 | base: main frame where 3D annotations are done in, x-forward, y-left, z-up 7 | upper_lidar: x-forward, y-left, z-up 8 | lower_lidar: x-forward, y-left, z-up 9 | laser: x-forward, y-left, z-up 10 | """ 11 | 12 | 13 | def _get_R_z(rot_z): 14 | cs, ss = np.cos(rot_z), np.sin(rot_z) 15 | return np.array([[cs, -ss, 0], [ss, cs, 0], [0, 0, 1]], dtype=np.float32) 16 | 17 | 18 | # laser to base 19 | _rot_z_laser_to_base = np.pi / 120 20 | _R_laser_to_base = _get_R_z(_rot_z_laser_to_base) 21 | 22 | # upper_lidar to base 23 | _rot_z_upper_lidar_to_base = 0.085 24 | _T_upper_lidar_to_base = np.array([0, 0, 0.33529], dtype=np.float32).reshape(3, 1) 25 | _R_upper_lidar_to_base = _get_R_z(_rot_z_upper_lidar_to_base) 26 | 27 | # lower_lidar to base 28 | _rot_z_lower_lidar_to_base = 0.0 29 | _T_lower_lidar_to_base = np.array([0, 0, -0.13511], dtype=np.float32).reshape(3, 1) 30 | _R_lower_lidar_to_base = np.eye(3, dtype=np.float32) 31 | 32 | 33 | """ 34 | Transformation API 35 | """ 36 | 37 | 38 | def transform_pts_upper_velodyne_to_base(pts): 39 | """Transform points from upper velodyne frame to base frame 40 | 41 | Args: 42 | pts (np.array[3, N]): points (x, y, z) 43 | 44 | Returns: 45 | pts_trans (np.array[3, N]) 46 | """ 47 | return _R_upper_lidar_to_base @ pts + _T_upper_lidar_to_base 48 | 49 | 50 | def transform_pts_lower_velodyne_to_base(pts): 51 | return _R_lower_lidar_to_base @ pts + _T_lower_lidar_to_base 52 | 53 | 54 | def transform_pts_laser_to_base(pts): 55 | return _R_laser_to_base @ pts 56 | 57 | 58 | def transform_pts_base_to_upper_velodyne(pts): 59 | return _R_upper_lidar_to_base.T @ (pts - _T_upper_lidar_to_base) 60 | 61 | 62 | def transform_pts_base_to_lower_velodyne(pts): 63 | return _R_lower_lidar_to_base.T @ (pts - _T_lower_lidar_to_base) 64 | 65 | 66 | def transform_pts_base_to_laser(pts): 67 | return _R_laser_to_base.T @ pts 68 | 69 | 70 | def transform_pts_base_to_stitched_im(pts): 71 | """Project 3D points in base frame to the stitched image 72 | 73 | Args: 74 | pts (np.array[3, N]): points (x, y, z) 75 | 76 | Returns: 77 | pts_im (np.array[2, N]) 78 | inbound_mask (np.array[N]) 79 | """ 80 | im_size = (480, 3760) 81 | 82 | # to image coordinate 83 | pts_rect = pts[[1, 2, 0], :] 84 | pts_rect[:2, :] *= -1 85 | 86 | # to pixel 87 | horizontal_theta = np.arctan2(pts_rect[0], pts_rect[2]) 88 | horizontal_percent = horizontal_theta / (2 * np.pi) + 0.5 89 | x = im_size[1] * horizontal_percent 90 | y = ( 91 | 485.78 * pts_rect[1] / pts_rect[2] * np.cos(horizontal_theta) 92 | + 0.4375 * im_size[0] 93 | ) 94 | # horizontal_theta = np.arctan(pts_rect[0, :] / pts_rect[2, :]) 95 | # horizontal_theta += (pts_rect[2, :] < 0) * np.pi 96 | # horizontal_percent = horizontal_theta / (2 * np.pi) 97 | # x = ((horizontal_percent * im_size[1]) + 1880) % im_size[1] 98 | # y = ( 99 | # 485.78 * (pts_rect[1, :] / ((1 / np.cos(horizontal_theta)) * pts_rect[2, :])) 100 | # ) + (0.4375 * im_size[0]) 101 | 102 | # x is always in bound by cylindrical parametrization 103 | # y is always at the lower half of the image, since laser is lower than the camera 104 | # thus only one boundary needs to be checked 105 | inbound_mask = y < im_size[0] 106 | 107 | return np.stack((x, y), axis=0).astype(np.int32), inbound_mask 108 | 109 | 110 | def transform_pts_laser_to_stitched_im(pts): 111 | pts_base = transform_pts_laser_to_base(pts) 112 | return transform_pts_base_to_stitched_im(pts_base) 113 | -------------------------------------------------------------------------------- /lidar_det/utils/kitti_calibration.py: -------------------------------------------------------------------------------- 1 | """ 2 | From https://github.com/sshaoshuai/PointRCNN/blob/master/lib/utils/calibration.py # noqa 3 | """ 4 | 5 | import numpy as np 6 | import os 7 | 8 | 9 | def get_calib_from_file(calib_file): 10 | with open(calib_file) as f: 11 | lines = f.readlines() 12 | 13 | obj = lines[2].strip().split(' ')[1:] 14 | P2 = np.array(obj, dtype=np.float32) 15 | obj = lines[3].strip().split(' ')[1:] 16 | P3 = np.array(obj, dtype=np.float32) 17 | obj = lines[4].strip().split(' ')[1:] 18 | R0 = np.array(obj, dtype=np.float32) 19 | obj = lines[5].strip().split(' ')[1:] 20 | Tr_velo_to_cam = np.array(obj, dtype=np.float32) 21 | 22 | return {'P2': P2.reshape(3, 4), 23 | 'P3': P3.reshape(3, 4), 24 | 'R0': R0.reshape(3, 3), 25 | 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} 26 | 27 | 28 | class Calibration(object): 29 | def __init__(self, calib_file): 30 | if isinstance(calib_file, str): 31 | calib = get_calib_from_file(calib_file) 32 | else: 33 | calib = calib_file 34 | 35 | self.P2 = calib['P2'] # 3 x 4 36 | self.R0 = calib['R0'] # 3 x 3 37 | self.V2C = calib['Tr_velo2cam'] # 3 x 4 38 | 39 | # Camera intrinsics and extrinsics 40 | self.cu = self.P2[0, 2] 41 | self.cv = self.P2[1, 2] 42 | self.fu = self.P2[0, 0] 43 | self.fv = self.P2[1, 1] 44 | self.tx = self.P2[0, 3] / (-self.fu) 45 | self.ty = self.P2[1, 3] / (-self.fv) 46 | 47 | def cart_to_hom(self, pts): 48 | """ 49 | :param pts: (N, 3 or 2) 50 | :return pts_hom: (N, 4 or 3) 51 | """ 52 | pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32))) 53 | return pts_hom 54 | 55 | def lidar_to_rect(self, pts_lidar): 56 | """ 57 | :param pts_lidar: (N, 3) 58 | :return pts_rect: (N, 3) 59 | """ 60 | pts_lidar_hom = self.cart_to_hom(pts_lidar) 61 | pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T)) 62 | # pts_rect = reduce(np.dot, (pts_lidar_hom, self.V2C.T, self.R0.T)) 63 | return pts_rect 64 | 65 | def rect_to_img(self, pts_rect): 66 | """ 67 | :param pts_rect: (N, 3) 68 | :return pts_img: (N, 2) 69 | """ 70 | pts_rect_hom = self.cart_to_hom(pts_rect) 71 | pts_2d_hom = np.dot(pts_rect_hom, self.P2.T) 72 | pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T # (N, 2) 73 | pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2] # depth in rect camera coord 74 | return pts_img, pts_rect_depth 75 | 76 | def lidar_to_img(self, pts_lidar): 77 | """ 78 | :param pts_lidar: (N, 3) 79 | :return pts_img: (N, 2) 80 | """ 81 | pts_rect = self.lidar_to_rect(pts_lidar) 82 | pts_img, pts_depth = self.rect_to_img(pts_rect) 83 | return pts_img, pts_depth 84 | 85 | def img_to_rect(self, u, v, depth_rect): 86 | """ 87 | :param u: (N) 88 | :param v: (N) 89 | :param depth_rect: (N) 90 | :return: 91 | """ 92 | x = ((u - self.cu) * depth_rect) / self.fu + self.tx 93 | y = ((v - self.cv) * depth_rect) / self.fv + self.ty 94 | pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), depth_rect.reshape(-1, 1)), axis=1) 95 | return pts_rect 96 | 97 | def depthmap_to_rect(self, depth_map): 98 | """ 99 | :param depth_map: (H, W), depth_map 100 | :return: 101 | """ 102 | x_range = np.arange(0, depth_map.shape[1]) 103 | y_range = np.arange(0, depth_map.shape[0]) 104 | x_idxs, y_idxs = np.meshgrid(x_range, y_range) 105 | x_idxs, y_idxs = x_idxs.reshape(-1), y_idxs.reshape(-1) 106 | depth = depth_map[y_idxs, x_idxs] 107 | pts_rect = self.img_to_rect(x_idxs, y_idxs, depth) 108 | return pts_rect, x_idxs, y_idxs 109 | 110 | def corners3d_to_img_boxes(self, corners3d): 111 | """ 112 | :param corners3d: (N, 8, 3) corners in rect coordinate 113 | :return: boxes: (None, 4) [x1, y1, x2, y2] in rgb coordinate 114 | :return: boxes_corner: (None, 8) [xi, yi] in rgb coordinate 115 | """ 116 | sample_num = corners3d.shape[0] 117 | corners3d_hom = np.concatenate((corners3d, np.ones((sample_num, 8, 1))), axis=2) # (N, 8, 4) 118 | 119 | img_pts = np.matmul(corners3d_hom, self.P2.T) # (N, 8, 3) 120 | 121 | x, y = img_pts[:, :, 0] / img_pts[:, :, 2], img_pts[:, :, 1] / img_pts[:, :, 2] 122 | x1, y1 = np.min(x, axis=1), np.min(y, axis=1) 123 | x2, y2 = np.max(x, axis=1), np.max(y, axis=1) 124 | 125 | boxes = np.concatenate((x1.reshape(-1, 1), y1.reshape(-1, 1), x2.reshape(-1, 1), y2.reshape(-1, 1)), axis=1) 126 | boxes_corner = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1)), axis=2) 127 | 128 | return boxes, boxes_corner 129 | 130 | def camera_dis_to_rect(self, u, v, d): 131 | """ 132 | Can only process valid u, v, d, which means u, v can not beyond the image shape, reprojection error 0.02 133 | :param u: (N) 134 | :param v: (N) 135 | :param d: (N), the distance between camera and 3d points, d^2 = x^2 + y^2 + z^2 136 | :return: 137 | """ 138 | assert self.fu == self.fv, '%.8f != %.8f' % (self.fu, self.fv) 139 | fd = np.sqrt((u - self.cu)**2 + (v - self.cv)**2 + self.fu**2) 140 | x = ((u - self.cu) * d) / fd + self.tx 141 | y = ((v - self.cv) * d) / fd + self.ty 142 | z = np.sqrt(d**2 - x**2 - y**2) 143 | pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), z.reshape(-1, 1)), axis=1) 144 | return pts_rect -------------------------------------------------------------------------------- /lidar_det/utils/viz_mayavi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mayavi import mlab 3 | 4 | import lidar_det.utils.utils_box3d as ub3d 5 | 6 | 7 | def draw_lidar( 8 | pts=None, 9 | pts_color=None, 10 | pts_scale=0.06, 11 | pts_mode="sphere", 12 | fig=None, 13 | fig_size=(800, 500), 14 | boxes=None, 15 | boxes_color=(0.0, 1.0, 0.0), 16 | scores=None, 17 | score_thresh=0.0, 18 | boxes_gt=None, 19 | boxes_gt_color=(1.0, 0.0, 0.0), 20 | color_bar=False 21 | ): 22 | """Draw LiDAR points 23 | 24 | Args: 25 | pts (array[3, N]): xyz 26 | pts_color (array[N, 3] or tuple(3)) 27 | boxes (array[B, 7]) 28 | scores (array[B]): Used to color code box 29 | score_threh: Box with lower scores are not plotted 30 | boxes_gt (array[B, 7]) 31 | 32 | Returns: 33 | fig 34 | """ 35 | if fig is None: 36 | fig = mlab.figure( 37 | figure=None, 38 | bgcolor=(1, 1, 1), 39 | fgcolor=(0, 0, 0), 40 | engine=None, 41 | size=fig_size, 42 | ) 43 | 44 | if pts is not None: 45 | if pts_color is None: 46 | s = np.hypot(pts[0], pts[1]) 47 | mpt = mlab.points3d( 48 | pts[0], 49 | pts[1], 50 | pts[2], 51 | s, 52 | colormap="blue-red", 53 | mode=pts_mode, 54 | scale_factor=pts_scale, 55 | figure=fig, 56 | ) 57 | mpt.glyph.scale_mode = "scale_by_vector" 58 | if color_bar: 59 | mlab.scalarbar( 60 | object=mpt, nb_labels=8, label_fmt="%.1f", orientation="vertical" 61 | ) 62 | else: 63 | mlab.points3d( 64 | pts[0], 65 | pts[1], 66 | pts[2], 67 | color=pts_color, 68 | mode=pts_mode, 69 | scale_factor=pts_scale, 70 | figure=fig, 71 | ) 72 | 73 | # boxes 74 | if boxes is not None and len(boxes) > 0: 75 | if scores is not None: 76 | boxes = boxes[scores >= score_thresh] 77 | scores = scores[scores >= score_thresh] 78 | # plot low confidence boxes first (on bottom layer) 79 | s_argsort = scores.argsort() 80 | boxes = boxes[s_argsort] 81 | scores = scores[s_argsort] 82 | # boxes_color = plt.cm.Greens(scores) # TODO 83 | boxes_color = [(0.0, 1.0, 0.0)] * len(boxes) 84 | else: 85 | boxes_color = [boxes_color] * len(boxes) 86 | 87 | # boxes 88 | corners, connect_inds = ub3d.boxes_to_corners(boxes, connect_inds=True) 89 | for corner, color in zip(corners, boxes_color): 90 | for inds in connect_inds: 91 | mlab.plot3d( 92 | corner[0, inds], 93 | corner[1, inds], 94 | corner[2, inds], 95 | color=color, 96 | tube_radius=None, 97 | line_width=7.0, 98 | figure=fig, 99 | ) 100 | 101 | # gt boxes 102 | if boxes_gt is not None and len(boxes_gt) > 0: 103 | corners, connect_inds = ub3d.boxes_to_corners(boxes_gt, connect_inds=True) 104 | for corner in corners: 105 | for inds in connect_inds: 106 | mlab.plot3d( 107 | corner[0, inds], 108 | corner[1, inds], 109 | corner[2, inds], 110 | color=boxes_gt_color, 111 | tube_radius=None, 112 | line_width=7.0, 113 | figure=fig, 114 | ) 115 | 116 | mlab.view( 117 | azimuth=180, 118 | elevation=70, 119 | focalpoint=[12.0909996, -1.04700089, -2.03249991], 120 | distance=62, 121 | figure=fig, 122 | ) 123 | 124 | return fig 125 | -------------------------------------------------------------------------------- /lidar_det/utils/viz_plt.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | import lidar_det.utils.utils_box3d as ub3d 5 | 6 | 7 | def plot_bev( 8 | pc=None, 9 | pts_color=None, 10 | pts_scale=0.01, 11 | title=None, 12 | fig=None, 13 | ax=None, 14 | xlim=(-10, 10), 15 | ylim=(-10, 10), 16 | boxes=None, 17 | boxes_cls=None, 18 | scores=None, 19 | score_thresh=0.0, 20 | boxes_gt=None, 21 | boxes_gt_cls=None, 22 | ): 23 | """Plot BEV of LiDAR points 24 | 25 | Args: 26 | pc (array[3, N]): xyz 27 | pts_color (array[N, 3] or tuple(3)) 28 | boxes (array[B, 7]) 29 | scores (array[B]): Used to color code box 30 | score_threh: Box with lower scores are not plotted 31 | boxes_gt (array[B, 7]) 32 | 33 | Returns: 34 | fig, ax 35 | """ 36 | if fig is None or ax is None: 37 | fig = plt.figure(figsize=(10, 10)) 38 | ax = fig.add_subplot(111) 39 | 40 | ax.set_xlim(*xlim) 41 | ax.set_ylim(*ylim) 42 | ax.set_xlabel("x [m]") 43 | ax.set_ylabel("y [m]") 44 | ax.set_aspect("equal") 45 | 46 | if title is not None: 47 | ax.set_title(f"{title}") 48 | 49 | # lidar 50 | if pc is not None: 51 | if pts_color is None: 52 | s = np.hypot(pc[0], pc[1]) 53 | pts_color = plt.cm.jet(np.clip(s / 30, 0, 1)) 54 | 55 | ax.scatter(pc[0], pc[1], color=pts_color, s=pts_scale) 56 | 57 | # boxes 58 | if boxes is not None and len(boxes) > 0: 59 | if scores is not None: 60 | boxes = boxes[scores >= score_thresh] 61 | scores = scores[scores >= score_thresh] 62 | # plot low confidence boxes first (on bottom layer) 63 | s_argsort = scores.argsort() 64 | boxes = boxes[s_argsort] 65 | scores = scores[s_argsort] 66 | 67 | # color coded classes 68 | boxes_color = get_boxes_color(boxes, boxes_cls, (0.0, 1.0, 0.0), scores) 69 | corners = ub3d.boxes_to_corners(boxes) 70 | for corner, c in zip(corners, boxes_color): 71 | inds = [0, 3, 2, 1] 72 | ax.plot(corner[0, inds], corner[1, inds], linestyle="-", color=c) 73 | ax.plot(corner[0, :2], corner[1, :2], linestyle="--", color=c) 74 | 75 | if boxes_gt is not None and len(boxes_gt) > 0: 76 | boxes_gt_color = get_boxes_color(boxes_gt, boxes_gt_cls, (1.0, 0.0, 0.0)) 77 | corners = ub3d.boxes_to_corners(boxes_gt) 78 | for corner, c in zip(corners, boxes_gt_color): 79 | inds = [0, 3, 2, 1] 80 | ax.plot( 81 | corner[0, inds], 82 | corner[1, inds], 83 | linestyle="dotted", 84 | color=c, 85 | linewidth=2.0, 86 | ) 87 | ax.plot( 88 | corner[0, :2], corner[1, :2], linestyle="--", color=c, linewidth=2.0 89 | ) 90 | 91 | return fig, ax 92 | 93 | 94 | def get_boxes_color(boxes, boxes_cls, default_color, alphas=None): 95 | B = len(boxes) 96 | if boxes_cls is not None: 97 | if isinstance(boxes_cls, (int, float)): 98 | boxes_cls = boxes_cls * np.ones(B) 99 | boxes_color = plt.cm.prism(boxes_cls / 10) 100 | if alphas is not None: 101 | boxes_color[:, 3] = alphas 102 | else: 103 | boxes_color = np.tile(np.array(default_color), (B, 1)) 104 | if alphas is not None: 105 | boxes_color = np.concatenate((boxes_color, alphas.reshape(B, 1)), axis=1) 106 | 107 | return boxes_color 108 | -------------------------------------------------------------------------------- /lidar_det/utils/viz_pyviz.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from pyviz3d.visualizer import Visualizer 5 | 6 | 7 | def draw_lidar( 8 | pc=None, 9 | pts_name="points", 10 | pts_color=None, 11 | pts_alpha=None, 12 | pts_show=False, 13 | pts_size=25, 14 | viz=None, 15 | ): 16 | # pc (N, 3), boxes (B, 7) 17 | if viz is None: 18 | viz = Visualizer() 19 | 20 | if pc is not None: 21 | if pts_color is None: 22 | s = np.hypot(pc[:, 0], pc[:, 1]) 23 | pts_color = plt.cm.jet(np.clip(s / 30, 0, 1))[:, :3] 24 | 25 | if isinstance(pts_color, tuple): 26 | pts_color = np.array(pts_color).reshape(1, 3).repeat(pc.shape[0], axis=0) 27 | elif len(pts_color.shape) == 1: 28 | pts_color = plt.cm.jet(pts_color)[:, :3] 29 | 30 | if pts_alpha is not None: 31 | if isinstance(pts_alpha, np.ndarray): 32 | pts_alpha = pts_alpha.reshape(pts_color.shape[0], 1) 33 | pts_color = pts_alpha * pts_color + (1.0 - pts_alpha) * np.ones_like( 34 | pts_color 35 | ) 36 | 37 | if pts_color.max() <= 1.001: 38 | pts_color = (pts_color * 255).astype(np.uint8) 39 | 40 | viz.add_points( 41 | pts_name, pc, colors=pts_color, visible=pts_show, point_size=pts_size, 42 | ) 43 | 44 | # viz.save() 45 | 46 | return viz 47 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": [ 3 | "data", 4 | "logs", 5 | ".git" 6 | ] 7 | } -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | # RUN="unet_bl_voxel_jrdb_0.05_0.1_20210518_220414" 2 | RUN="unet_bl_voxel_jrdb_0.05_0.1_20210519_232859" 3 | 4 | python bin/train.py \ 5 | --cfg /globalwork/jia/archive/JRDB_cvpr21_workshop/logs/$RUN/backup/unet_bl_voxel_jrdb_0.05_0.1.yaml \ 6 | --ckpt /globalwork/jia/archive/JRDB_cvpr21_workshop/logs/$RUN/ckpt/ckpt_e40.pth \ 7 | --evaluation \ 8 | --tmp \ 9 | --bs_one -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="lidar_det", 5 | version="0.0.1", 6 | author="Dan Jia", 7 | author_email="jia@vision.rwth-aachen.de", 8 | packages=find_packages( 9 | include=["lidar_det", "lidar_det.*", "lidar_det.*.*"] 10 | ), 11 | license="LICENSE.txt", 12 | description="Object detection from LiDAR point cloud.", 13 | ) 14 | --------------------------------------------------------------------------------