├── README.md ├── assets └── pipeline.png ├── configs ├── _base_ │ ├── datasets │ │ ├── coco_instance.py │ │ ├── kitti-3d-3class.py │ │ ├── kitti-3d-car.py │ │ ├── kitti-mono3d.py │ │ ├── lyft-3d.py │ │ ├── nuim_instance.py │ │ ├── nus-3d.py │ │ ├── nus-mono3d.py │ │ ├── range100_lyft-3d.py │ │ ├── s3dis-3d-5class.py │ │ ├── s3dis_seg-3d-13class.py │ │ ├── scannet-3d-18class.py │ │ ├── scannet_seg-3d-20class.py │ │ ├── sunrgbd-3d-10class.py │ │ ├── waymoD5-3d-3class.py │ │ └── waymoD5-3d-car.py │ ├── default_runtime.py │ ├── models │ │ ├── 3dssd.py │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ ├── dgcnn.py │ │ ├── fcaf3d.py │ │ ├── fcos3d.py │ │ ├── groupfree3d.py │ │ ├── h3dnet.py │ │ ├── hv_pointpillars_fpn_lyft.py │ │ ├── hv_pointpillars_fpn_nus.py │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ ├── hv_second_secfpn_kitti.py │ │ ├── hv_second_secfpn_waymo.py │ │ ├── imvotenet_image.py │ │ ├── mask_rcnn_r50_fpn.py │ │ ├── paconv_cuda_ssg.py │ │ ├── paconv_ssg.py │ │ ├── parta2.py │ │ ├── pgd.py │ │ ├── point_rcnn.py │ │ ├── pointnet2_msg.py │ │ ├── pointnet2_ssg.py │ │ ├── smoke.py │ │ └── votenet.py │ └── schedules │ │ ├── cosine.py │ │ ├── cyclic_20e.py │ │ ├── cyclic_40e.py │ │ ├── mmdet_schedule_1x.py │ │ ├── schedule_2x.py │ │ ├── schedule_3x.py │ │ ├── seg_cosine_100e.py │ │ ├── seg_cosine_150e.py │ │ ├── seg_cosine_200e.py │ │ └── seg_cosine_50e.py └── fusion_occ │ └── fusion_occ.py ├── docs ├── datasets.md └── install.md ├── img_seg ├── gen_segmap.py ├── helper.py └── lidar │ ├── __init__.py │ ├── config │ └── label_mapping │ │ └── nuscenes.yaml │ └── lidar_anno.py ├── mmdet3d ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ └── anchor_3d_generator.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assigners │ │ │ └── __init__.py │ │ ├── box_np_ops.py │ │ ├── coders │ │ │ ├── __init__.py │ │ │ ├── anchor_free_bbox_coder.py │ │ │ ├── centerpoint_bbox_coders.py │ │ │ ├── delta_xyzwhlr_bbox_coder.py │ │ │ ├── fcos3d_bbox_coder.py │ │ │ ├── groupfree3d_bbox_coder.py │ │ │ ├── monoflex_bbox_coder.py │ │ │ ├── partial_bin_based_bbox_coder.py │ │ │ ├── pgd_bbox_coder.py │ │ │ ├── point_xyzwhlr_bbox_coder.py │ │ │ └── smoke_bbox_coder.py │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ └── iou3d_calculator.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ └── iou_neg_piecewise_sampler.py │ │ ├── structures │ │ │ ├── __init__.py │ │ │ ├── base_box3d.py │ │ │ ├── box_3d_mode.py │ │ │ ├── cam_box3d.py │ │ │ ├── coord_3d_mode.py │ │ │ ├── depth_box3d.py │ │ │ ├── lidar_box3d.py │ │ │ └── utils.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── indoor_eval.py │ │ ├── instance_seg_eval.py │ │ ├── kitti_utils │ │ │ ├── __init__.py │ │ │ ├── eval.py │ │ │ └── rotate_iou.py │ │ ├── lyft_eval.py │ │ ├── scannet_utils │ │ │ ├── __init__.py │ │ │ ├── evaluate_semantic_instance.py │ │ │ └── util_3d.py │ │ ├── seg_eval.py │ │ └── waymo_utils │ │ │ ├── __init__.py │ │ │ └── prediction_kitti_to_waymo.py │ ├── hook │ │ ├── __init__.py │ │ ├── ema.py │ │ ├── sequentialcontrol.py │ │ ├── syncbncontrol.py │ │ └── utils.py │ ├── points │ │ ├── __init__.py │ │ ├── base_points.py │ │ ├── cam_points.py │ │ ├── depth_points.py │ │ └── lidar_points.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── box3d_nms.py │ │ └── merge_augs.py │ ├── utils │ │ ├── __init__.py │ │ ├── array_converter.py │ │ └── gaussian.py │ ├── visualizer │ │ ├── __init__.py │ │ ├── image_vis.py │ │ ├── open3d_vis.py │ │ └── show_result.py │ └── voxel │ │ ├── __init__.py │ │ ├── builder.py │ │ └── voxel_generator.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── custom_3d.py │ ├── dataset_wrappers.py │ ├── nuscenes_dataset.py │ ├── nuscenes_dataset_occ.py │ ├── occ_metrics.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── aug_2d.py │ │ ├── compose.py │ │ ├── data_augment_utils.py │ │ ├── dbsampler.py │ │ ├── formating.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ └── transforms_3d.py │ └── utils.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── lidar_encoder.py │ │ ├── resnet.py │ │ └── swin.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bevdet.py │ │ ├── centerpoint.py │ │ ├── fusion_occ.py │ │ └── mvx_two_stage.py │ └── necks │ │ ├── __init__.py │ │ ├── fusion_view_transformer.py │ │ ├── lss_fpn.py │ │ └── view_transformer.py ├── ops │ ├── __init__.py │ ├── bev_pool_v2 │ │ ├── __init__.py │ │ ├── bev_pool.py │ │ └── src │ │ │ ├── bev_pool.cpp │ │ │ └── bev_pool_cuda.cu │ ├── dgcnn_modules │ │ ├── __init__.py │ │ ├── dgcnn_fa_module.py │ │ ├── dgcnn_fp_module.py │ │ └── dgcnn_gf_module.py │ ├── norm.py │ ├── paconv │ │ ├── __init__.py │ │ ├── paconv.py │ │ └── utils.py │ ├── pointnet_modules │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── paconv_sa_module.py │ │ ├── point_fp_module.py │ │ └── point_sa_module.py │ ├── sparse_block.py │ └── spconv │ │ ├── __init__.py │ │ └── overwrite_spconv │ │ ├── __init__.py │ │ └── write_spconv2.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── compat_cfg.py │ ├── logger.py │ ├── misc.py │ └── setup_env.py └── version.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tools ├── create_data_fusionocc.py ├── data_converter ├── __init__.py ├── create_gt_database.py ├── nuimage_converter.py └── nuscenes_converter.py ├── dist_test.sh ├── dist_train.sh ├── misc ├── browse_dataset.py ├── fuse_conv_bn.py ├── print_config.py └── visualize_results.py ├── test.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # FusionOcc 2 | > **FusionOcc: Multi-Modal Fusion for 3D Occupancy Prediction, MM 2024** [[paper](https://dl.acm.org/doi/10.1145/3664647.3681293)] 3 | 4 | ## INTRODUCTION 5 | FusionOcc is a new multi-modal fusion network for 3D occupancy prediction by fusing features of LiDAR point clouds and surround-view images. The model fuses features of these two modals in 2D and 3D space, respectively. Semi-supervised method is utilized to generate dense depth map, which is integrated by BEV images via a cross-modal fusion module. Features of voxelized point clouds are aligned and merged with BEV images' features converted by a view-transformer in 3D space. FusionOcc establishes a new baseline for further research in multi-modal fusion for 3D occupancy prediction, while achieves the new state-of-the-art on Occ3D-nuScenes dataset. 6 | 7 | ![pipeline](assets/pipeline.png) 8 | 9 | ## Getting Started 10 | 11 | - [Installation](docs/install.md) 12 | ``` 13 | # main prerequisites 14 | Python = 3.8 15 | nuscenes-devkit = 1.1.11 16 | PyTorch = 1.10.0 17 | torch-scatter = 2.0.9 18 | opencv-python = 4.9.0 19 | Pillow = 10.0.1 20 | mmcv-ful = 1.5.3 21 | mmdetection = 2.25.1 22 | ``` 23 | 24 | - [Datasets](docs/datasets.md) 25 | 26 | ``` 27 | FusionOcc 28 | ├── data 29 | │ ├── nuscenes 30 | │ │ ├── maps 31 | │ │ ├── samples 32 | │ │ ├── sweeps 33 | │ │ ├── lidarseg 34 | │ │ ├── imgseg 35 | │ │ ├── gts 36 | | | ├── v1.0-trainval 37 | | | ├── fusionocc-nuscenes_infos_train.pkl 38 | | | ├── fusionocc-nuscenes_infos_val.pkl 39 | ``` 40 | 41 | 42 | ## Model Zoo 43 | 44 | | Backbone | Config | Mask | Pretrain | mIoU | Checkpoints | 45 | | :-------: | :---: | :---: | :---: | :---: | :---: | 46 | | Swin-Base | [Base](configs/fusion_occ) | ✖️ | ImageNet, nuImages | 56.62 | [BaseWoMask](https://drive.google.com/file/d/16ELoDLoDkCYheREJUPiBz2905MHhuVHv/view) | 47 | 49 | 51 | 52 | ## Evaluation 53 | 54 | We provide instructions for evaluating our pretrained models. Download checkpoints above first. 55 | 56 | the config file is here [fusion_occ.py](configs/fusion_occ/fusion_occ.py ) 57 | 58 | Run: 59 | ```bash 60 | ./tools/dist_test.sh $config $checkpoint num_gpu 61 | ``` 62 | 63 | ## Training 64 | 65 | Modify the "load_from" path at the end of the config file to load pre-trained weights, run: 66 | 67 | ```bash 68 | ./tools/dist_train.sh $config num_gpu 69 | ``` 70 | 71 | To obtain the version without using mask, simply modify the use_mask field in the config file to False and train several epochs. 72 | 73 | You can also acquire pre-trained weights from [BEVDet](https://github.com/HuangJunJie2017/BEVDet/blob/dev3.0/docker/Dockerfile) 74 | to start training from the very beginning. 75 | 76 | 77 | 78 | ## Acknowledgement 79 | 80 | Thanks a lot to these excellent open-source projects, our code is based on them: 81 | - [BEVDet](https://github.com/HuangJunJie2017/BEVDet), [BEVFormer](https://github.com/fundamentalvision/BEVFormer), [BEVFusion](https://github.com/mit-han-lab/bevfusion) 82 | - [Occ3d](https://github.com/Tsinghua-MARS-Lab/Occ3D), [CVPR23-Occ-Chanllege](https://github.com/CVPR2023-3D-Occupancy-Prediction) 83 | 84 | Some other related projects for Occ3d prediction: 85 | - [SurroundOcc](https://github.com/weiyithu/SurroundOcc), [TPVFormer](https://github.com/wzzheng/TPVFormer) 86 | - [PanoOcc](https://github.com/Robertwyq/PanoOcc), [RenderOcc](https://github.com/pmj110119/RenderOcc) 87 | 88 | 89 | ## BibTeX 90 | 91 | If this work is helpful for your research, please consider citing the following paper: 92 | 93 | ``` 94 | @inproceedings{ 95 | zhang2024fusionocc, 96 | title={FusionOcc: Multi-Modal Fusion for 3D Occupancy Prediction}, 97 | author={Shuo Zhang and Yupeng Zhai and Jilin Mei and Yu Hu}, 98 | booktitle={ACM Multimedia 2024}, 99 | year={2024}, 100 | url={https://openreview.net/forum?id=xX66hwZJWa} 101 | } 102 | -------------------------------------------------------------------------------- /assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuoZhang-code/FusionOcc/83ded3884b98b299d35d636a91e9aa2a92d89221/assets/pipeline.png -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /configs/_base_/datasets/kitti-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'KittiMonoDataset' 2 | data_root = 'data/kitti/' 3 | class_names = ['Pedestrian', 'Cyclist', 'Car'] 4 | input_modality = dict(use_lidar=False, use_camera=True) 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFileMono3D'), 9 | dict( 10 | type='LoadAnnotations3D', 11 | with_bbox=True, 12 | with_label=True, 13 | with_attr_label=False, 14 | with_bbox_3d=True, 15 | with_label_3d=True, 16 | with_bbox_depth=True), 17 | dict(type='Resize', img_scale=(1242, 375), keep_ratio=True), 18 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='Pad', size_divisor=32), 21 | dict(type='DefaultFormatBundle3D', class_names=class_names), 22 | dict( 23 | type='Collect3D', 24 | keys=[ 25 | 'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d', 26 | 'centers2d', 'depths' 27 | ]), 28 | ] 29 | test_pipeline = [ 30 | dict(type='LoadImageFromFileMono3D'), 31 | dict( 32 | type='MultiScaleFlipAug', 33 | img_scale=(1242, 375), 34 | flip=False, 35 | transforms=[ 36 | dict(type='RandomFlip3D'), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='Pad', size_divisor=32), 39 | dict( 40 | type='DefaultFormatBundle3D', 41 | class_names=class_names, 42 | with_label=False), 43 | dict(type='Collect3D', keys=['img']), 44 | ]) 45 | ] 46 | # construct a pipeline for data and gt loading in show function 47 | # please keep its loading function consistent with test_pipeline (e.g. client) 48 | eval_pipeline = [ 49 | dict(type='LoadImageFromFileMono3D'), 50 | dict( 51 | type='DefaultFormatBundle3D', 52 | class_names=class_names, 53 | with_label=False), 54 | dict(type='Collect3D', keys=['img']) 55 | ] 56 | data = dict( 57 | samples_per_gpu=2, 58 | workers_per_gpu=2, 59 | train=dict( 60 | type=dataset_type, 61 | data_root=data_root, 62 | ann_file=data_root + 'kitti_infos_train_mono3d.coco.json', 63 | info_file=data_root + 'kitti_infos_train.pkl', 64 | img_prefix=data_root, 65 | classes=class_names, 66 | pipeline=train_pipeline, 67 | modality=input_modality, 68 | test_mode=False, 69 | box_type_3d='Camera'), 70 | val=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'kitti_infos_val_mono3d.coco.json', 74 | info_file=data_root + 'kitti_infos_val.pkl', 75 | img_prefix=data_root, 76 | classes=class_names, 77 | pipeline=test_pipeline, 78 | modality=input_modality, 79 | test_mode=True, 80 | box_type_3d='Camera'), 81 | test=dict( 82 | type=dataset_type, 83 | data_root=data_root, 84 | ann_file=data_root + 'kitti_infos_val_mono3d.coco.json', 85 | info_file=data_root + 'kitti_infos_val.pkl', 86 | img_prefix=data_root, 87 | classes=class_names, 88 | pipeline=test_pipeline, 89 | modality=input_modality, 90 | test_mode=True, 91 | box_type_3d='Camera')) 92 | evaluation = dict(interval=2) 93 | -------------------------------------------------------------------------------- /configs/_base_/datasets/nuim_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/nuimages/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1280, 720), (1920, 1080)], 15 | multiscale_mode='range', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1600, 900), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=2, 40 | workers_per_gpu=2, 41 | train=dict( 42 | type=dataset_type, 43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json', 44 | img_prefix=data_root, 45 | classes=class_names, 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 50 | img_prefix=data_root, 51 | classes=class_names, 52 | pipeline=test_pipeline), 53 | test=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 56 | img_prefix=data_root, 57 | classes=class_names, 58 | pipeline=test_pipeline)) 59 | evaluation = dict(metric=['bbox', 'segm']) 60 | -------------------------------------------------------------------------------- /configs/_base_/datasets/nus-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'NuScenesMonoDataset' 2 | data_root = 'data/nuscenes/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | # Input modality for nuScenes dataset, this is consistent with the submission 8 | # format which requires the information in input_modality. 9 | input_modality = dict( 10 | use_lidar=False, 11 | use_camera=True, 12 | use_radar=False, 13 | use_map=False, 14 | use_external=False) 15 | img_norm_cfg = dict( 16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 17 | train_pipeline = [ 18 | dict(type='LoadImageFromFileMono3D'), 19 | dict( 20 | type='LoadAnnotations3D', 21 | with_bbox=True, 22 | with_label=True, 23 | with_attr_label=True, 24 | with_bbox_3d=True, 25 | with_label_3d=True, 26 | with_bbox_depth=True), 27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True), 28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle3D', class_names=class_names), 32 | dict( 33 | type='Collect3D', 34 | keys=[ 35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d', 36 | 'gt_labels_3d', 'centers2d', 'depths' 37 | ]), 38 | ] 39 | test_pipeline = [ 40 | dict(type='LoadImageFromFileMono3D'), 41 | dict( 42 | type='MultiScaleFlipAug', 43 | scale_factor=1.0, 44 | flip=False, 45 | transforms=[ 46 | dict(type='RandomFlip3D'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict( 50 | type='DefaultFormatBundle3D', 51 | class_names=class_names, 52 | with_label=False), 53 | dict(type='Collect3D', keys=['img']), 54 | ]) 55 | ] 56 | # construct a pipeline for data and gt loading in show function 57 | # please keep its loading function consistent with test_pipeline (e.g. client) 58 | eval_pipeline = [ 59 | dict(type='LoadImageFromFileMono3D'), 60 | dict( 61 | type='DefaultFormatBundle3D', 62 | class_names=class_names, 63 | with_label=False), 64 | dict(type='Collect3D', keys=['img']) 65 | ] 66 | 67 | data = dict( 68 | samples_per_gpu=2, 69 | workers_per_gpu=2, 70 | train=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json', 74 | img_prefix=data_root, 75 | classes=class_names, 76 | pipeline=train_pipeline, 77 | modality=input_modality, 78 | test_mode=False, 79 | box_type_3d='Camera'), 80 | val=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 84 | img_prefix=data_root, 85 | classes=class_names, 86 | pipeline=test_pipeline, 87 | modality=input_modality, 88 | test_mode=True, 89 | box_type_3d='Camera'), 90 | test=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 94 | img_prefix=data_root, 95 | classes=class_names, 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | test_mode=True, 99 | box_type_3d='Camera')) 100 | evaluation = dict(interval=2) 101 | -------------------------------------------------------------------------------- /configs/_base_/datasets/s3dis-3d-5class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') 5 | train_area = [1, 2, 3, 4, 6] 6 | test_area = 5 7 | 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2, 3, 4, 5]), 15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 16 | dict(type='PointSample', num_points=40000), 17 | dict( 18 | type='RandomFlip3D', 19 | sync_2d=False, 20 | flip_ratio_bev_horizontal=0.5, 21 | flip_ratio_bev_vertical=0.5), 22 | dict( 23 | type='GlobalRotScaleTrans', 24 | # following ScanNet dataset the rotation range is 5 degrees 25 | rot_range=[-0.087266, 0.087266], 26 | scale_ratio_range=[1.0, 1.0], 27 | shift_height=True), 28 | dict(type='DefaultFormatBundle3D', class_names=class_names), 29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 30 | ] 31 | test_pipeline = [ 32 | dict( 33 | type='LoadPointsFromFile', 34 | coord_type='DEPTH', 35 | shift_height=True, 36 | load_dim=6, 37 | use_dim=[0, 1, 2, 3, 4, 5]), 38 | dict( 39 | type='MultiScaleFlipAug3D', 40 | img_scale=(1333, 800), 41 | pts_scale_ratio=1, 42 | flip=False, 43 | transforms=[ 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[0, 0], 47 | scale_ratio_range=[1., 1.], 48 | translation_std=[0, 0, 0]), 49 | dict( 50 | type='RandomFlip3D', 51 | sync_2d=False, 52 | flip_ratio_bev_horizontal=0.5, 53 | flip_ratio_bev_vertical=0.5), 54 | dict(type='PointSample', num_points=40000), 55 | dict( 56 | type='DefaultFormatBundle3D', 57 | class_names=class_names, 58 | with_label=False), 59 | dict(type='Collect3D', keys=['points']) 60 | ]) 61 | ] 62 | # construct a pipeline for data and gt loading in show function 63 | # please keep its loading function consistent with test_pipeline (e.g. client) 64 | eval_pipeline = [ 65 | dict( 66 | type='LoadPointsFromFile', 67 | coord_type='DEPTH', 68 | shift_height=False, 69 | load_dim=6, 70 | use_dim=[0, 1, 2, 3, 4, 5]), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ] 77 | 78 | data = dict( 79 | samples_per_gpu=8, 80 | workers_per_gpu=4, 81 | train=dict( 82 | type='RepeatDataset', 83 | times=5, 84 | dataset=dict( 85 | type='ConcatDataset', 86 | datasets=[ 87 | dict( 88 | type=dataset_type, 89 | data_root=data_root, 90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl', 91 | pipeline=train_pipeline, 92 | filter_empty_gt=False, 93 | classes=class_names, 94 | box_type_3d='Depth') for i in train_area 95 | ], 96 | separate_eval=False)), 97 | val=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 101 | pipeline=test_pipeline, 102 | classes=class_names, 103 | test_mode=True, 104 | box_type_3d='Depth'), 105 | test=dict( 106 | type=dataset_type, 107 | data_root=data_root, 108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 109 | pipeline=test_pipeline, 110 | classes=class_names, 111 | test_mode=True, 112 | box_type_3d='Depth')) 113 | 114 | evaluation = dict(pipeline=eval_pipeline) 115 | -------------------------------------------------------------------------------- /configs/_base_/datasets/sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SUNRGBDDataset' 2 | data_root = 'data/sunrgbd/' 3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 4 | 'night_stand', 'bookshelf', 'bathtub') 5 | 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', 12 | # path_mapping=dict({ 13 | # './data/sunrgbd/': 14 | # 's3://openmmlab/datasets/detection3d/sunrgbd_processed/', 15 | # 'data/sunrgbd/': 16 | # 's3://openmmlab/datasets/detection3d/sunrgbd_processed/' 17 | # })) 18 | 19 | train_pipeline = [ 20 | dict( 21 | type='LoadPointsFromFile', 22 | coord_type='DEPTH', 23 | shift_height=True, 24 | load_dim=6, 25 | use_dim=[0, 1, 2], 26 | file_client_args=file_client_args), 27 | dict(type='LoadAnnotations3D', file_client_args=file_client_args), 28 | dict( 29 | type='RandomFlip3D', 30 | sync_2d=False, 31 | flip_ratio_bev_horizontal=0.5, 32 | ), 33 | dict( 34 | type='GlobalRotScaleTrans', 35 | rot_range=[-0.523599, 0.523599], 36 | scale_ratio_range=[0.85, 1.15], 37 | shift_height=True), 38 | dict(type='PointSample', num_points=20000), 39 | dict(type='DefaultFormatBundle3D', class_names=class_names), 40 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 41 | ] 42 | test_pipeline = [ 43 | dict( 44 | type='LoadPointsFromFile', 45 | coord_type='DEPTH', 46 | shift_height=True, 47 | load_dim=6, 48 | use_dim=[0, 1, 2], 49 | file_client_args=file_client_args), 50 | dict( 51 | type='MultiScaleFlipAug3D', 52 | img_scale=(1333, 800), 53 | pts_scale_ratio=1, 54 | flip=False, 55 | transforms=[ 56 | dict( 57 | type='GlobalRotScaleTrans', 58 | rot_range=[0, 0], 59 | scale_ratio_range=[1., 1.], 60 | translation_std=[0, 0, 0]), 61 | dict( 62 | type='RandomFlip3D', 63 | sync_2d=False, 64 | flip_ratio_bev_horizontal=0.5, 65 | ), 66 | dict(type='PointSample', num_points=20000), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ]) 73 | ] 74 | # construct a pipeline for data and gt loading in show function 75 | # please keep its loading function consistent with test_pipeline (e.g. client) 76 | eval_pipeline = [ 77 | dict( 78 | type='LoadPointsFromFile', 79 | coord_type='DEPTH', 80 | shift_height=False, 81 | load_dim=6, 82 | use_dim=[0, 1, 2], 83 | file_client_args=file_client_args), 84 | dict( 85 | type='DefaultFormatBundle3D', 86 | class_names=class_names, 87 | with_label=False), 88 | dict(type='Collect3D', keys=['points']) 89 | ] 90 | 91 | data = dict( 92 | samples_per_gpu=16, 93 | workers_per_gpu=4, 94 | train=dict( 95 | type='RepeatDataset', 96 | times=5, 97 | dataset=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + 'sunrgbd_infos_train.pkl', 101 | pipeline=train_pipeline, 102 | classes=class_names, 103 | filter_empty_gt=False, 104 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 105 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 106 | box_type_3d='Depth', 107 | file_client_args=file_client_args)), 108 | val=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 112 | pipeline=test_pipeline, 113 | classes=class_names, 114 | test_mode=True, 115 | box_type_3d='Depth', 116 | file_client_args=file_client_args), 117 | test=dict( 118 | type=dataset_type, 119 | data_root=data_root, 120 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 121 | pipeline=test_pipeline, 122 | classes=class_names, 123 | test_mode=True, 124 | box_type_3d='Depth', 125 | file_client_args=file_client_args)) 126 | 127 | evaluation = dict(pipeline=eval_pipeline) 128 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | 20 | # disable opencv multithreading to avoid system being overloaded 21 | opencv_num_threads = 0 22 | # set multi-process start method as `fork` to speed up the training 23 | mp_start_method = 'fork' 24 | -------------------------------------------------------------------------------- /configs/_base_/models/3dssd.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SSD3DNet', 3 | backbone=dict( 4 | type='PointNet2SAMSG', 5 | in_channels=4, 6 | num_points=(4096, 512, (256, 256)), 7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)), 8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)), 9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)), 10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)), 11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))), 12 | aggregation_channels=(64, 128, 256), 13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')), 14 | fps_sample_range_lists=((-1), (-1), (512, -1)), 15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 16 | sa_cfg=dict( 17 | type='PointSAModuleMSG', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=False)), 21 | bbox_head=dict( 22 | type='SSD3DHead', 23 | in_channels=256, 24 | vote_module_cfg=dict( 25 | in_channels=256, 26 | num_points=256, 27 | gt_per_seed=1, 28 | conv_channels=(128, ), 29 | conv_cfg=dict(type='Conv1d'), 30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 31 | with_res_feat=False, 32 | vote_xyz_range=(3.0, 3.0, 2.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModuleMSG', 35 | num_point=256, 36 | radii=(4.8, 6.4), 37 | sample_nums=(16, 32), 38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)), 39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 40 | use_xyz=True, 41 | normalize_xyz=False, 42 | bias=True), 43 | pred_layer_cfg=dict( 44 | in_channels=1536, 45 | shared_conv_channels=(512, 128), 46 | cls_conv_channels=(128, ), 47 | reg_conv_channels=(128, ), 48 | conv_cfg=dict(type='Conv1d'), 49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 50 | bias=True), 51 | conv_cfg=dict(type='Conv1d'), 52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 53 | objectness_loss=dict( 54 | type='CrossEntropyLoss', 55 | use_sigmoid=True, 56 | reduction='sum', 57 | loss_weight=1.0), 58 | center_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 60 | dir_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | dir_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 64 | size_res_loss=dict( 65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 66 | corner_loss=dict( 67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05), 72 | test_cfg=dict( 73 | nms_cfg=dict(type='nms', iou_thr=0.1), 74 | sample_mod='spec', 75 | score_thr=0.0, 76 | per_class_proposal=True, 77 | max_output_num=100)) 78 | -------------------------------------------------------------------------------- /configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.1, 0.1, 0.2] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), 6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 7 | pts_middle_encoder=dict( 8 | type='SparseEncoder', 9 | in_channels=5, 10 | sparse_shape=[41, 1024, 1024], 11 | output_channels=128, 12 | order=('conv', 'norm', 'act'), 13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 14 | 128)), 15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), 16 | block_type='basicblock'), 17 | pts_backbone=dict( 18 | type='SECOND', 19 | in_channels=256, 20 | out_channels=[128, 256], 21 | layer_nums=[5, 5], 22 | layer_strides=[1, 2], 23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 24 | conv_cfg=dict(type='Conv2d', bias=False)), 25 | pts_neck=dict( 26 | type='SECONDFPN', 27 | in_channels=[128, 256], 28 | out_channels=[256, 256], 29 | upsample_strides=[1, 2], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | use_conv_for_no_stride=True), 33 | pts_bbox_head=dict( 34 | type='CenterHead', 35 | in_channels=sum([256, 256]), 36 | tasks=[ 37 | dict(num_class=1, class_names=['car']), 38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 39 | dict(num_class=2, class_names=['bus', 'trailer']), 40 | dict(num_class=1, class_names=['barrier']), 41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 43 | ], 44 | common_heads=dict( 45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 46 | share_conv_channel=64, 47 | bbox_coder=dict( 48 | type='CenterPointBBoxCoder', 49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 50 | max_num=500, 51 | score_threshold=0.1, 52 | out_size_factor=8, 53 | voxel_size=voxel_size[:2], 54 | code_size=9), 55 | separate_head=dict( 56 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 59 | norm_bbox=True), 60 | # model training and testing settings 61 | train_cfg=dict( 62 | pts=dict( 63 | grid_size=[1024, 1024, 40], 64 | voxel_size=voxel_size, 65 | out_size_factor=8, 66 | dense_reg=1, 67 | gaussian_overlap=0.1, 68 | max_objs=500, 69 | min_radius=2, 70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 71 | test_cfg=dict( 72 | pts=dict( 73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 74 | max_per_img=500, 75 | max_pool_nms=False, 76 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 77 | score_threshold=0.1, 78 | out_size_factor=8, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.2, 0.2, 8] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), 6 | pts_voxel_encoder=dict( 7 | type='PillarFeatureNet', 8 | in_channels=5, 9 | feat_channels=[64], 10 | with_distance=False, 11 | voxel_size=(0.2, 0.2, 8), 12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), 13 | legacy=False), 14 | pts_middle_encoder=dict( 15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), 16 | pts_backbone=dict( 17 | type='SECOND', 18 | in_channels=64, 19 | out_channels=[64, 128, 256], 20 | layer_nums=[3, 5, 5], 21 | layer_strides=[2, 2, 2], 22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 23 | conv_cfg=dict(type='Conv2d', bias=False)), 24 | pts_neck=dict( 25 | type='SECONDFPN', 26 | in_channels=[64, 128, 256], 27 | out_channels=[128, 128, 128], 28 | upsample_strides=[0.5, 1, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | upsample_cfg=dict(type='deconv', bias=False), 31 | use_conv_for_no_stride=True), 32 | pts_bbox_head=dict( 33 | type='CenterHead', 34 | in_channels=sum([128, 128, 128]), 35 | tasks=[ 36 | dict(num_class=1, class_names=['car']), 37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 38 | dict(num_class=2, class_names=['bus', 'trailer']), 39 | dict(num_class=1, class_names=['barrier']), 40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 42 | ], 43 | common_heads=dict( 44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 45 | share_conv_channel=64, 46 | bbox_coder=dict( 47 | type='CenterPointBBoxCoder', 48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 49 | max_num=500, 50 | score_threshold=0.1, 51 | out_size_factor=4, 52 | voxel_size=voxel_size[:2], 53 | code_size=9), 54 | separate_head=dict( 55 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 58 | norm_bbox=True), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | pts=dict( 62 | grid_size=[512, 512, 1], 63 | voxel_size=voxel_size, 64 | out_size_factor=4, 65 | dense_reg=1, 66 | gaussian_overlap=0.1, 67 | max_objs=500, 68 | min_radius=2, 69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 70 | test_cfg=dict( 71 | pts=dict( 72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 73 | max_per_img=500, 74 | max_pool_nms=False, 75 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 76 | score_threshold=0.1, 77 | pc_range=[-51.2, -51.2], 78 | out_size_factor=4, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /configs/_base_/models/dgcnn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='DGCNNBackbone', 6 | in_channels=9, # [xyz, rgb, normal_xyz], modified with dataset 7 | num_samples=(20, 20, 20), 8 | knn_modes=('D-KNN', 'F-KNN', 'F-KNN'), 9 | radius=(None, None, None), 10 | gf_channels=((64, 64), (64, 64), (64, )), 11 | fa_channels=(1024, ), 12 | act_cfg=dict(type='LeakyReLU', negative_slope=0.2)), 13 | decode_head=dict( 14 | type='DGCNNHead', 15 | fp_channels=(1216, 512), 16 | channels=256, 17 | dropout_ratio=0.5, 18 | conv_cfg=dict(type='Conv1d'), 19 | norm_cfg=dict(type='BN1d'), 20 | act_cfg=dict(type='LeakyReLU', negative_slope=0.2), 21 | loss_decode=dict( 22 | type='CrossEntropyLoss', 23 | use_sigmoid=False, 24 | class_weight=None, # modified with dataset 25 | loss_weight=1.0)), 26 | # model training and testing settings 27 | train_cfg=dict(), 28 | test_cfg=dict(mode='slide')) 29 | -------------------------------------------------------------------------------- /configs/_base_/models/fcaf3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='MinkSingleStage3DDetector', 3 | voxel_size=.01, 4 | backbone=dict(type='MinkResNet', in_channels=3, depth=34), 5 | head=dict( 6 | type='FCAF3DHead', 7 | in_channels=(64, 128, 256, 512), 8 | out_channels=128, 9 | voxel_size=.01, 10 | pts_prune_threshold=100000, 11 | pts_assign_threshold=27, 12 | pts_center_threshold=18, 13 | n_classes=18, 14 | n_reg_outs=6), 15 | train_cfg=dict(), 16 | test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01)) 17 | -------------------------------------------------------------------------------- /configs/_base_/models/fcos3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FCOSMono3D', 3 | backbone=dict( 4 | type='ResNet', 5 | depth=101, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint='open-mmlab://detectron2/resnet101_caffe')), 15 | neck=dict( 16 | type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_output', 21 | num_outs=5, 22 | relu_before_extra_convs=True), 23 | bbox_head=dict( 24 | type='FCOSMono3DHead', 25 | num_classes=10, 26 | in_channels=256, 27 | stacked_convs=2, 28 | feat_channels=256, 29 | use_direction_classifier=True, 30 | diff_rad_by_sin=True, 31 | pred_attrs=True, 32 | pred_velo=True, 33 | dir_offset=0.7854, # pi/4 34 | dir_limit_offset=0, 35 | strides=[8, 16, 32, 64, 128], 36 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 37 | cls_branch=(256, ), 38 | reg_branch=( 39 | (256, ), # offset 40 | (256, ), # depth 41 | (256, ), # size 42 | (256, ), # rot 43 | () # velo 44 | ), 45 | dir_branch=(256, ), 46 | attr_branch=(256, ), 47 | loss_cls=dict( 48 | type='FocalLoss', 49 | use_sigmoid=True, 50 | gamma=2.0, 51 | alpha=0.25, 52 | loss_weight=1.0), 53 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 54 | loss_dir=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 56 | loss_attr=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 58 | loss_centerness=dict( 59 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 60 | bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9), 61 | norm_on_bbox=True, 62 | centerness_on_reg=True, 63 | center_sampling=True, 64 | conv_bias=True, 65 | dcn_on_last_conv=True), 66 | train_cfg=dict( 67 | allowed_border=0, 68 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], 69 | pos_weight=-1, 70 | debug=False), 71 | test_cfg=dict( 72 | use_rotate_nms=True, 73 | nms_across_levels=False, 74 | nms_pre=1000, 75 | nms_thr=0.8, 76 | score_thr=0.05, 77 | min_bbox_size=0, 78 | max_per_img=200)) 79 | -------------------------------------------------------------------------------- /configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_fpn_nus.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.25, 0.25, 8] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=64, 11 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 12 | voxel_size=voxel_size, 13 | max_voxels=(30000, 40000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=4, 17 | feat_channels=[64, 64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[2, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='FPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | act_cfg=dict(type='ReLU'), 37 | in_channels=[64, 128, 256], 38 | out_channels=256, 39 | start_level=0, 40 | num_outs=3), 41 | pts_bbox_head=dict( 42 | type='Anchor3DHead', 43 | num_classes=10, 44 | in_channels=256, 45 | feat_channels=256, 46 | use_direction_classifier=True, 47 | anchor_generator=dict( 48 | type='AlignedAnchor3DRangeGenerator', 49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]], 50 | scales=[1, 2, 4], 51 | sizes=[ 52 | [2.5981, 0.8660, 1.], # 1.5 / sqrt(3) 53 | [1.7321, 0.5774, 1.], # 1 / sqrt(3) 54 | [1., 1., 1.], 55 | [0.4, 0.4, 1], 56 | ], 57 | custom_values=[0, 0], 58 | rotations=[0, 1.57], 59 | reshape_out=True), 60 | assigner_per_size=False, 61 | diff_rad_by_sin=True, 62 | dir_offset=-0.7854, # -pi / 4 63 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), 64 | loss_cls=dict( 65 | type='FocalLoss', 66 | use_sigmoid=True, 67 | gamma=2.0, 68 | alpha=0.25, 69 | loss_weight=1.0), 70 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 71 | loss_dir=dict( 72 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 73 | # model training and testing settings 74 | train_cfg=dict( 75 | pts=dict( 76 | assigner=dict( 77 | type='MaxIoUAssigner', 78 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 79 | pos_iou_thr=0.6, 80 | neg_iou_thr=0.3, 81 | min_pos_iou=0.3, 82 | ignore_iof_thr=-1), 83 | allowed_border=0, 84 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 85 | pos_weight=-1, 86 | debug=False)), 87 | test_cfg=dict( 88 | pts=dict( 89 | use_rotate_nms=True, 90 | nms_across_levels=False, 91 | nms_pre=1000, 92 | nms_thr=0.2, 93 | score_thr=0.05, 94 | min_bbox_size=0, 95 | max_num=500))) 96 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.16, 0.16, 4] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=32, # max_points_per_voxel 7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000) # (training, testing) max_voxels 10 | ), 11 | voxel_encoder=dict( 12 | type='PillarFeatureNet', 13 | in_channels=4, 14 | feat_channels=[64], 15 | with_distance=False, 16 | voxel_size=voxel_size, 17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), 18 | middle_encoder=dict( 19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=64, 23 | layer_nums=[3, 5, 5], 24 | layer_strides=[2, 2, 2], 25 | out_channels=[64, 128, 256]), 26 | neck=dict( 27 | type='SECONDFPN', 28 | in_channels=[64, 128, 256], 29 | upsample_strides=[1, 2, 4], 30 | out_channels=[128, 128, 128]), 31 | bbox_head=dict( 32 | type='Anchor3DHead', 33 | num_classes=3, 34 | in_channels=384, 35 | feat_channels=384, 36 | use_direction_classifier=True, 37 | assign_per_class=True, 38 | anchor_generator=dict( 39 | type='AlignedAnchor3DRangeGenerator', 40 | ranges=[ 41 | [0, -39.68, -0.6, 69.12, 39.68, -0.6], 42 | [0, -39.68, -0.6, 69.12, 39.68, -0.6], 43 | [0, -39.68, -1.78, 69.12, 39.68, -1.78], 44 | ], 45 | sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], 46 | rotations=[0, 1.57], 47 | reshape_out=False), 48 | diff_rad_by_sin=True, 49 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 50 | loss_cls=dict( 51 | type='FocalLoss', 52 | use_sigmoid=True, 53 | gamma=2.0, 54 | alpha=0.25, 55 | loss_weight=1.0), 56 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 57 | loss_dir=dict( 58 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | assigner=[ 62 | dict( # for Pedestrian 63 | type='MaxIoUAssigner', 64 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 65 | pos_iou_thr=0.5, 66 | neg_iou_thr=0.35, 67 | min_pos_iou=0.35, 68 | ignore_iof_thr=-1), 69 | dict( # for Cyclist 70 | type='MaxIoUAssigner', 71 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 72 | pos_iou_thr=0.5, 73 | neg_iou_thr=0.35, 74 | min_pos_iou=0.35, 75 | ignore_iof_thr=-1), 76 | dict( # for Car 77 | type='MaxIoUAssigner', 78 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 79 | pos_iou_thr=0.6, 80 | neg_iou_thr=0.45, 81 | min_pos_iou=0.45, 82 | ignore_iof_thr=-1), 83 | ], 84 | allowed_border=0, 85 | pos_weight=-1, 86 | debug=False), 87 | test_cfg=dict( 88 | use_rotate_nms=True, 89 | nms_across_levels=False, 90 | nms_thr=0.01, 91 | score_thr=0.1, 92 | min_bbox_size=0, 93 | nms_pre=100, 94 | max_num=50)) 95 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.32, 0.32, 6] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(32000, 32000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=5, 17 | feat_channels=[64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[1, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='SECONDFPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | in_channels=[64, 128, 256], 37 | upsample_strides=[1, 2, 4], 38 | out_channels=[128, 128, 128]), 39 | pts_bbox_head=dict( 40 | type='Anchor3DHead', 41 | num_classes=3, 42 | in_channels=384, 43 | feat_channels=384, 44 | use_direction_classifier=True, 45 | anchor_generator=dict( 46 | type='AlignedAnchor3DRangeGenerator', 47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], 48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], 49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]], 50 | sizes=[ 51 | [4.73, 2.08, 1.77], # car 52 | [1.81, 0.84, 1.77], # cyclist 53 | [0.91, 0.84, 1.74] # pedestrian 54 | ], 55 | rotations=[0, 1.57], 56 | reshape_out=False), 57 | diff_rad_by_sin=True, 58 | dir_offset=-0.7854, # -pi / 4 59 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 60 | loss_cls=dict( 61 | type='FocalLoss', 62 | use_sigmoid=True, 63 | gamma=2.0, 64 | alpha=0.25, 65 | loss_weight=1.0), 66 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 67 | loss_dir=dict( 68 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | pts=dict( 72 | assigner=[ 73 | dict( # car 74 | type='MaxIoUAssigner', 75 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 76 | pos_iou_thr=0.55, 77 | neg_iou_thr=0.4, 78 | min_pos_iou=0.4, 79 | ignore_iof_thr=-1), 80 | dict( # cyclist 81 | type='MaxIoUAssigner', 82 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.3, 85 | min_pos_iou=0.3, 86 | ignore_iof_thr=-1), 87 | dict( # pedestrian 88 | type='MaxIoUAssigner', 89 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.3, 92 | min_pos_iou=0.3, 93 | ignore_iof_thr=-1), 94 | ], 95 | allowed_border=0, 96 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 97 | pos_weight=-1, 98 | debug=False)), 99 | test_cfg=dict( 100 | pts=dict( 101 | use_rotate_nms=True, 102 | nms_across_levels=False, 103 | nms_pre=4096, 104 | nms_thr=0.25, 105 | score_thr=0.1, 106 | min_bbox_size=0, 107 | max_num=500))) 108 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_second_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.05, 0.05, 0.1] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=5, 7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000)), 10 | voxel_encoder=dict(type='HardSimpleVFE'), 11 | middle_encoder=dict( 12 | type='SparseEncoder', 13 | in_channels=4, 14 | sparse_shape=[41, 1600, 1408], 15 | order=('conv', 'norm', 'act')), 16 | backbone=dict( 17 | type='SECOND', 18 | in_channels=256, 19 | layer_nums=[5, 5], 20 | layer_strides=[1, 2], 21 | out_channels=[128, 256]), 22 | neck=dict( 23 | type='SECONDFPN', 24 | in_channels=[128, 256], 25 | upsample_strides=[1, 2], 26 | out_channels=[256, 256]), 27 | bbox_head=dict( 28 | type='Anchor3DHead', 29 | num_classes=3, 30 | in_channels=512, 31 | feat_channels=512, 32 | use_direction_classifier=True, 33 | anchor_generator=dict( 34 | type='Anchor3DRangeGenerator', 35 | ranges=[ 36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78], 39 | ], 40 | sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], 41 | rotations=[0, 1.57], 42 | reshape_out=False), 43 | diff_rad_by_sin=True, 44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 52 | loss_dir=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | assigner=[ 57 | dict( # for Pedestrian 58 | type='MaxIoUAssigner', 59 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 60 | pos_iou_thr=0.35, 61 | neg_iou_thr=0.2, 62 | min_pos_iou=0.2, 63 | ignore_iof_thr=-1), 64 | dict( # for Cyclist 65 | type='MaxIoUAssigner', 66 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 67 | pos_iou_thr=0.35, 68 | neg_iou_thr=0.2, 69 | min_pos_iou=0.2, 70 | ignore_iof_thr=-1), 71 | dict( # for Car 72 | type='MaxIoUAssigner', 73 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 74 | pos_iou_thr=0.6, 75 | neg_iou_thr=0.45, 76 | min_pos_iou=0.45, 77 | ignore_iof_thr=-1), 78 | ], 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | test_cfg=dict( 83 | use_rotate_nms=True, 84 | nms_across_levels=False, 85 | nms_thr=0.01, 86 | score_thr=0.1, 87 | min_bbox_size=0, 88 | nms_pre=100, 89 | max_num=50)) 90 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_second_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.08, 0.08, 0.1] 7 | model = dict( 8 | type='VoxelNet', 9 | voxel_layer=dict( 10 | max_num_points=10, 11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(80000, 90000)), 14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 15 | middle_encoder=dict( 16 | type='SparseEncoder', 17 | in_channels=5, 18 | sparse_shape=[61, 1280, 1920], 19 | order=('conv', 'norm', 'act')), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=384, 23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 24 | layer_nums=[5, 5], 25 | layer_strides=[1, 2], 26 | out_channels=[128, 256]), 27 | neck=dict( 28 | type='SECONDFPN', 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | in_channels=[128, 256], 31 | upsample_strides=[1, 2], 32 | out_channels=[256, 256]), 33 | bbox_head=dict( 34 | type='Anchor3DHead', 35 | num_classes=3, 36 | in_channels=512, 37 | feat_channels=512, 38 | use_direction_classifier=True, 39 | anchor_generator=dict( 40 | type='AlignedAnchor3DRangeGenerator', 41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], 42 | [-76.8, -51.2, 0, 76.8, 51.2, 0], 43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], 44 | sizes=[ 45 | [4.73, 2.08, 1.77], # car 46 | [0.91, 0.84, 1.74], # pedestrian 47 | [1.81, 0.84, 1.77] # cyclist 48 | ], 49 | rotations=[0, 1.57], 50 | reshape_out=False), 51 | diff_rad_by_sin=True, 52 | dir_offset=-0.7854, # -pi / 4 53 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 54 | loss_cls=dict( 55 | type='FocalLoss', 56 | use_sigmoid=True, 57 | gamma=2.0, 58 | alpha=0.25, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 61 | loss_dir=dict( 62 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | assigner=[ 66 | dict( # car 67 | type='MaxIoUAssigner', 68 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 69 | pos_iou_thr=0.55, 70 | neg_iou_thr=0.4, 71 | min_pos_iou=0.4, 72 | ignore_iof_thr=-1), 73 | dict( # pedestrian 74 | type='MaxIoUAssigner', 75 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 76 | pos_iou_thr=0.5, 77 | neg_iou_thr=0.3, 78 | min_pos_iou=0.3, 79 | ignore_iof_thr=-1), 80 | dict( # cyclist 81 | type='MaxIoUAssigner', 82 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.3, 85 | min_pos_iou=0.3, 86 | ignore_iof_thr=-1) 87 | ], 88 | allowed_border=0, 89 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 90 | pos_weight=-1, 91 | debug=False), 92 | test_cfg=dict( 93 | use_rotate_nms=True, 94 | nms_across_levels=False, 95 | nms_pre=4096, 96 | nms_thr=0.25, 97 | score_thr=0.1, 98 | min_bbox_size=0, 99 | max_num=500)) 100 | -------------------------------------------------------------------------------- /configs/_base_/models/imvotenet_image.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImVoteNet', 3 | img_backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe'), 12 | img_neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | img_rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | img_roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=10, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | 55 | # model training and testing settings 56 | train_cfg=dict( 57 | img_rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | img_rpn_proposal=dict( 75 | nms_across_levels=False, 76 | nms_pre=2000, 77 | nms_post=1000, 78 | max_per_img=1000, 79 | nms=dict(type='nms', iou_threshold=0.7), 80 | min_bbox_size=0), 81 | img_rcnn=dict( 82 | assigner=dict( 83 | type='MaxIoUAssigner', 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.5, 86 | min_pos_iou=0.5, 87 | match_low_quality=False, 88 | ignore_iof_thr=-1), 89 | sampler=dict( 90 | type='RandomSampler', 91 | num=512, 92 | pos_fraction=0.25, 93 | neg_pos_ub=-1, 94 | add_gt_as_proposals=True), 95 | pos_weight=-1, 96 | debug=False)), 97 | test_cfg=dict( 98 | img_rpn=dict( 99 | nms_across_levels=False, 100 | nms_pre=1000, 101 | nms_post=1000, 102 | max_per_img=1000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | img_rcnn=dict( 106 | score_thr=0.05, 107 | nms=dict(type='nms', iou_threshold=0.5), 108 | max_per_img=100))) 109 | -------------------------------------------------------------------------------- /configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_across_levels=False, 90 | nms_pre=2000, 91 | nms_post=1000, 92 | max_per_img=1000, 93 | nms=dict(type='nms', iou_threshold=0.7), 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | assigner=dict( 97 | type='MaxIoUAssigner', 98 | pos_iou_thr=0.5, 99 | neg_iou_thr=0.5, 100 | min_pos_iou=0.5, 101 | match_low_quality=True, 102 | ignore_iof_thr=-1), 103 | sampler=dict( 104 | type='RandomSampler', 105 | num=512, 106 | pos_fraction=0.25, 107 | neg_pos_ub=-1, 108 | add_gt_as_proposals=True), 109 | mask_size=28, 110 | pos_weight=-1, 111 | debug=False)), 112 | test_cfg=dict( 113 | rpn=dict( 114 | nms_across_levels=False, 115 | nms_pre=1000, 116 | nms_post=1000, 117 | max_per_img=1000, 118 | nms=dict(type='nms', iou_threshold=0.7), 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.5), 123 | max_per_img=100, 124 | mask_thr_binary=0.5))) 125 | -------------------------------------------------------------------------------- /configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /configs/_base_/models/paconv_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=9, # [xyz, rgb, normalized_xyz] 7 | num_points=(1024, 256, 64, 16), 8 | radius=(None, None, None, None), # use kNN instead of ball query 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d', momentum=0.1), 14 | sa_cfg=dict( 15 | type='PAConvSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False, 19 | paconv_num_kernels=[16, 16, 16], 20 | paconv_kernel_input='w_neighbor', 21 | scorenet_input='w_neighbor_dist', 22 | scorenet_cfg=dict( 23 | mlp_channels=[16, 16, 16], 24 | score_norm='softmax', 25 | temp_factor=1.0, 26 | last_bn=False))), 27 | decode_head=dict( 28 | type='PAConvHead', 29 | # PAConv model's decoder takes skip connections from beckbone 30 | # different from PointNet++, it also concats input features in the last 31 | # level of decoder, leading to `128 + 6` as the channel number 32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 33 | (128 + 6, 128, 128, 128)), 34 | channels=128, 35 | dropout_ratio=0.5, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=dict(type='BN1d'), 38 | act_cfg=dict(type='ReLU'), 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | class_weight=None, # should be modified with dataset 43 | loss_weight=1.0)), 44 | # correlation loss to regularize PAConv's kernel weights 45 | loss_regularization=dict( 46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='slide')) 50 | -------------------------------------------------------------------------------- /configs/_base_/models/pgd.py: -------------------------------------------------------------------------------- 1 | _base_ = './fcos3d.py' 2 | # model settings 3 | model = dict( 4 | bbox_head=dict( 5 | _delete_=True, 6 | type='PGDHead', 7 | num_classes=10, 8 | in_channels=256, 9 | stacked_convs=2, 10 | feat_channels=256, 11 | use_direction_classifier=True, 12 | diff_rad_by_sin=True, 13 | pred_attrs=True, 14 | pred_velo=True, 15 | pred_bbox2d=True, 16 | pred_keypoints=False, 17 | dir_offset=0.7854, # pi/4 18 | strides=[8, 16, 32, 64, 128], 19 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 20 | cls_branch=(256, ), 21 | reg_branch=( 22 | (256, ), # offset 23 | (256, ), # depth 24 | (256, ), # size 25 | (256, ), # rot 26 | () # velo 27 | ), 28 | dir_branch=(256, ), 29 | attr_branch=(256, ), 30 | loss_cls=dict( 31 | type='FocalLoss', 32 | use_sigmoid=True, 33 | gamma=2.0, 34 | alpha=0.25, 35 | loss_weight=1.0), 36 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 37 | loss_dir=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_attr=dict( 40 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 41 | loss_centerness=dict( 42 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 43 | norm_on_bbox=True, 44 | centerness_on_reg=True, 45 | center_sampling=True, 46 | conv_bias=True, 47 | dcn_on_last_conv=True, 48 | use_depth_classifier=True, 49 | depth_branch=(256, ), 50 | depth_range=(0, 50), 51 | depth_unit=10, 52 | division='uniform', 53 | depth_bins=6, 54 | bbox_coder=dict(type='PGDBBoxCoder', code_size=9)), 55 | test_cfg=dict(nms_pre=1000, nms_thr=0.8, score_thr=0.01, max_per_img=200)) 56 | -------------------------------------------------------------------------------- /configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /configs/_base_/models/smoke.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SMOKEMono3D', 3 | backbone=dict( 4 | type='DLANet', 5 | depth=34, 6 | in_channels=3, 7 | norm_cfg=dict(type='GN', num_groups=32), 8 | init_cfg=dict( 9 | type='Pretrained', 10 | checkpoint='http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth' 11 | )), 12 | neck=dict( 13 | type='DLANeck', 14 | in_channels=[16, 32, 64, 128, 256, 512], 15 | start_level=2, 16 | end_level=5, 17 | norm_cfg=dict(type='GN', num_groups=32)), 18 | bbox_head=dict( 19 | type='SMOKEMono3DHead', 20 | num_classes=3, 21 | in_channels=64, 22 | dim_channel=[3, 4, 5], 23 | ori_channel=[6, 7], 24 | stacked_convs=0, 25 | feat_channels=64, 26 | use_direction_classifier=False, 27 | diff_rad_by_sin=False, 28 | pred_attrs=False, 29 | pred_velo=False, 30 | dir_offset=0, 31 | strides=None, 32 | group_reg_dims=(8, ), 33 | cls_branch=(256, ), 34 | reg_branch=((256, ), ), 35 | num_attrs=0, 36 | bbox_code_size=7, 37 | dir_branch=(), 38 | attr_branch=(), 39 | bbox_coder=dict( 40 | type='SMOKECoder', 41 | base_depth=(28.01, 16.32), 42 | base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63, 43 | 1.53)), 44 | code_size=7), 45 | loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0), 46 | loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1 / 300), 47 | loss_dir=dict( 48 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 49 | loss_attr=None, 50 | conv_bias=True, 51 | dcn_on_last_conv=False), 52 | train_cfg=None, 53 | test_cfg=dict(topK=100, local_maximum_kernel=3, max_per_img=100)) 54 | -------------------------------------------------------------------------------- /configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='VoteHead', 20 | vote_module_cfg=dict( 21 | in_channels=256, 22 | vote_per_seed=1, 23 | gt_per_seed=3, 24 | conv_channels=(256, 256), 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | norm_feats=True, 28 | vote_loss=dict( 29 | type='ChamferDistance', 30 | mode='l1', 31 | reduction='none', 32 | loss_dst_weight=10.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModule', 35 | num_point=256, 36 | radius=0.3, 37 | num_sample=16, 38 | mlp_channels=[256, 128, 128, 128], 39 | use_xyz=True, 40 | normalize_xyz=True), 41 | pred_layer_cfg=dict( 42 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 43 | conv_cfg=dict(type='Conv1d'), 44 | norm_cfg=dict(type='BN1d'), 45 | objectness_loss=dict( 46 | type='CrossEntropyLoss', 47 | class_weight=[0.2, 0.8], 48 | reduction='sum', 49 | loss_weight=5.0), 50 | center_loss=dict( 51 | type='ChamferDistance', 52 | mode='l2', 53 | reduction='sum', 54 | loss_src_weight=10.0, 55 | loss_dst_weight=10.0), 56 | dir_class_loss=dict( 57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 58 | dir_res_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 60 | size_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | size_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 64 | semantic_loss=dict( 65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 66 | # model training and testing settings 67 | train_cfg=dict( 68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 69 | test_cfg=dict( 70 | sample_mod='seed', 71 | nms_thr=0.25, 72 | score_thr=0.05, 73 | per_class_proposal=True)) 74 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the official AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_100e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | 7 | # runtime settings 8 | runner = dict(type='EpochBasedRunner', max_epochs=100) 9 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /docs/datasets.md: -------------------------------------------------------------------------------- 1 | # Prepare Datasets 2 | Currently supported datasets: Occ3D-nuScenes. 3 | 4 | ## Occ3D-nuScenes 5 | Download nuScenes V1.0 full from [here](https://www.nuscenes.org/download) to `data/nuscenes`, nuScenes-lidarseg from [here](https://www.nuscenes.org/download), GTs of Occ(gts only) from [here](https://github.com/Tsinghua-MARS-Lab/Occ3D). \ 6 | Prepare nuScenes dataset as below, 7 | 8 | ``` 9 | FusionOcc 10 | ├── data 11 | │ ├── nuscenes 12 | │ │ ├── maps 13 | │ │ ├── samples 14 | │ │ ├── sweeps 15 | │ │ ├── lidarseg 16 | │ │ ├── gts 17 | | | ├── v1.0-trainval 18 | ``` 19 | 20 | Create the pkl file: 21 | ```python 22 | python tools/create_data_fusionocc.py 23 | ``` 24 | Generate the image segmentation labels (takes a long time) by running: 25 | ```shell 26 | python img_seg/gen_segmap.py data/nuscenes --parallel=32 27 | ``` 28 | 29 | After processing, the data structure is as follows: 30 | ``` 31 | FusionOcc 32 | ├── data 33 | │ ├── nuscenes 34 | │ │ ├── maps 35 | │ │ ├── samples 36 | │ │ ├── sweeps 37 | │ │ ├── lidarseg 38 | │ │ ├── imgseg 39 | │ │ ├── gts 40 | | | ├── v1.0-trainval 41 | | | ├── fusionocc-nuscenes_infos_train.pkl 42 | | | ├── fusionocc-nuscenes_infos_val.pkl 43 | ``` -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Installation Instructions 2 | The enviroment is based on [BEVDet](https://github.com/HuangJunJie2017/BEVDet/blob/dev3.0/docker/Dockerfile). 3 | 4 | **1. Conda Virtual Environment** 5 | ```shell 6 | conda create -n fusionocc python=3.8 -y 7 | conda activate fusionocc 8 | ``` 9 | 10 | **2. PyTorch** 11 | ```shell 12 | pip install torch==1.10.1+cu113 torchvision==0.10.1+cu113 -f https://download.pytorch.org/whl/torch_stable.html 13 | ``` 14 | 15 | **3. MMCV, MMDet, MMSeg** 16 | ```shell 17 | pip install mmcv-full==1.5.3 -f https://download.openmmlab.com/mmcv/dist/cu11.3/torch1.10.0/index.html 18 | pip install mmdet==2.25.1 mmsegmentation==0.25.0 19 | ``` 20 | 21 | **4. Others** 22 | ```shell 23 | pip install -r requirements.txt 24 | ``` 25 | download torch-scatter=2.0.9 from https://pytorch-geometric.com/whl/ 26 | ```shell 27 | pip install torch_scatter-2.0.9-cp38-cp38-linux_x86_64.whl 28 | ``` 29 | 30 | **5. Intall FusionOcc** 31 | 32 | ```shell 33 | git clone https://github.com/ShuoZhang-code/FusionOcc.git 34 | cd FusionOcc 35 | pip install -v -e . 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /img_seg/gen_segmap.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from multiprocessing import Process 4 | 5 | from nuscenes.nuscenes import NuScenes 6 | 7 | from lidar.lidar_anno import nuScenesLidarSeg 8 | from helper import * 9 | 10 | 11 | def gen_seg_map(start_idx, end_idx, nusc, lidar_seg_nus, down_sample, proj_lidar=False, save_dir=None): 12 | for i, scene in enumerate(nusc.scene[start_idx:end_idx]): 13 | sample = nusc.get('sample', scene['first_sample_token']) 14 | while True: 15 | lidar_seg = lidar_seg_nus.get_lidar_seg(sample["token"]) 16 | process_one_sample(nusc, 17 | sample, 18 | down_sample, 19 | lidar_seg=lidar_seg, 20 | proj_lidar=proj_lidar, 21 | save_dir=save_dir) 22 | if sample['next'] == '': 23 | break 24 | sample = nusc.get('sample', sample['next']) 25 | 26 | 27 | def gen_labels(nusc, lidar_seg_nus, down_sample, parallel=1, proj_lidar=False, visible_level=2, save_dir=None): 28 | total_n = len(nusc.scene) 29 | interval = total_n // parallel 30 | processes = [] 31 | for i in range(parallel + 1): 32 | start_idx = i * interval 33 | end_idx = (i + 1) * interval 34 | p = Process(target=gen_seg_map, 35 | args=(start_idx, end_idx, 36 | nusc, lidar_seg_nus, down_sample, proj_lidar, save_dir 37 | ) 38 | ) 39 | p.start() 40 | processes.append(p) 41 | for p in processes: 42 | p.join() 43 | 44 | 45 | def parse_args(): 46 | parser = argparse.ArgumentParser(description='Generate 2d images seg label') 47 | parser.add_argument('data_root', help='data root of nuscenes') 48 | parser.add_argument('--down_sample', type=int, default=8, help='down sample seg img') 49 | parser.add_argument('--parallel', type=int, default=1, help='parallel processing num') 50 | args = parser.parse_args() 51 | return args 52 | 53 | 54 | if __name__ == '__main__': 55 | args = parse_args() 56 | data_root = args.data_root 57 | version = "v1.0-trainval" 58 | save_dir = os.path.join(args.data_root, "imgseg") 59 | os.makedirs(save_dir, exist_ok=True) 60 | down_sample = args.down_sample 61 | parallel = args.parallel 62 | nusc = NuScenes(version=version, 63 | dataroot=data_root, 64 | verbose=True) 65 | lidar_seg_nus = nuScenesLidarSeg(nusc=nusc, data_path=data_root, version=version) 66 | gen_labels(nusc, lidar_seg_nus, down_sample=down_sample, parallel=parallel, 67 | proj_lidar=True, save_dir=save_dir) 68 | -------------------------------------------------------------------------------- /img_seg/lidar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuoZhang-code/FusionOcc/83ded3884b98b299d35d636a91e9aa2a92d89221/img_seg/lidar/__init__.py -------------------------------------------------------------------------------- /img_seg/lidar/config/label_mapping/nuscenes.yaml: -------------------------------------------------------------------------------- 1 | labels: 2 | 0: 'noise' 3 | 1: 'animal' 4 | 2: 'human.pedestrian.adult' 5 | 3: 'human.pedestrian.child' 6 | 4: 'human.pedestrian.construction_worker' 7 | 5: 'human.pedestrian.personal_mobility' 8 | 6: 'human.pedestrian.police_officer' 9 | 7: 'human.pedestrian.stroller' 10 | 8: 'human.pedestrian.wheelchair' 11 | 9: 'movable_object.barrier' 12 | 10: 'movable_object.debris' 13 | 11: 'movable_object.pushable_pullable' 14 | 12: 'movable_object.trafficcone' 15 | 13: 'static_object.bicycle_rack' 16 | 14: 'vehicle.bicycle' 17 | 15: 'vehicle.bus.bendy' 18 | 16: 'vehicle.bus.rigid' 19 | 17: 'vehicle.car' 20 | 18: 'vehicle.construction' 21 | 19: 'vehicle.emergency.ambulance' 22 | 20: 'vehicle.emergency.police' 23 | 21: 'vehicle.motorcycle' 24 | 22: 'vehicle.trailer' 25 | 23: 'vehicle.truck' 26 | 24: 'flat.driveable_surface' 27 | 25: 'flat.other' 28 | 26: 'flat.sidewalk' 29 | 27: 'flat.terrain' 30 | 28: 'static.manmade' 31 | 29: 'static.other' 32 | 30: 'static.vegetation' 33 | 31: 'vehicle.ego' 34 | labels_16: 35 | 0: 'noise' 36 | 1: 'barrier' 37 | 2: 'bicycle' 38 | 3: 'bus' 39 | 4: 'car' 40 | 5: 'construction_vehicle' 41 | 6: 'motorcycle' 42 | 7: 'pedestrian' 43 | 8: 'traffic_cone' 44 | 9: 'trailer' 45 | 10: 'truck' 46 | 11: 'driveable_surface' 47 | 12: 'other_flat' 48 | 13: 'sidewalk' 49 | 14: 'terrain' 50 | 15: 'manmade' 51 | 16: 'vegetation' 52 | learning_map: 53 | 1: 0 54 | 5: 0 55 | 7: 0 56 | 8: 0 57 | 10: 0 58 | 11: 0 59 | 13: 0 60 | 19: 0 61 | 20: 0 62 | 0: 0 63 | 29: 0 64 | 31: 0 65 | 9: 1 66 | 14: 2 67 | 15: 3 68 | 16: 3 69 | 17: 4 70 | 18: 5 71 | 21: 6 72 | 2: 7 73 | 3: 7 74 | 4: 7 75 | 6: 7 76 | 12: 8 77 | 22: 9 78 | 23: 10 79 | 24: 11 80 | 25: 12 81 | 26: 13 82 | 27: 14 83 | 28: 15 84 | 30: 16 -------------------------------------------------------------------------------- /mmdet3d/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | 4 | import mmdet 5 | import mmseg 6 | from .version import __version__, short_version 7 | 8 | 9 | def digit_version(version_str): 10 | digit_version = [] 11 | for x in version_str.split('.'): 12 | if x.isdigit(): 13 | digit_version.append(int(x)) 14 | elif x.find('rc') != -1: 15 | patch_version = x.split('rc') 16 | digit_version.append(int(patch_version[0]) - 1) 17 | digit_version.append(int(patch_version[1])) 18 | return digit_version 19 | 20 | 21 | mmcv_minimum_version = '1.5.2' 22 | mmcv_maximum_version = '1.7.0' 23 | mmcv_version = digit_version(mmcv.__version__) 24 | 25 | 26 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 27 | and mmcv_version <= digit_version(mmcv_maximum_version)), \ 28 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 29 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 30 | 31 | mmdet_minimum_version = '2.24.0' 32 | mmdet_maximum_version = '3.0.0' 33 | mmdet_version = digit_version(mmdet.__version__) 34 | assert (mmdet_version >= digit_version(mmdet_minimum_version) 35 | and mmdet_version <= digit_version(mmdet_maximum_version)), \ 36 | f'MMDET=={mmdet.__version__} is used but incompatible. ' \ 37 | f'Please install mmdet>={mmdet_minimum_version}, ' \ 38 | f'<={mmdet_maximum_version}.' 39 | 40 | mmseg_minimum_version = '0.20.0' 41 | mmseg_maximum_version = '1.0.0' 42 | mmseg_version = digit_version(mmseg.__version__) 43 | assert (mmseg_version >= digit_version(mmseg_minimum_version) 44 | and mmseg_version <= digit_version(mmseg_maximum_version)), \ 45 | f'MMSEG=={mmseg.__version__} is used but incompatible. ' \ 46 | f'Please install mmseg>={mmseg_minimum_version}, ' \ 47 | f'<={mmseg_maximum_version}.' 48 | 49 | __all__ = ['__version__', 'short_version'] 50 | -------------------------------------------------------------------------------- /mmdet3d/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inference import (convert_SyncBN, inference_detector, 3 | inference_mono_3d_detector, 4 | inference_multi_modality_detector, inference_segmentor, 5 | init_model, show_result_meshlab) 6 | from .test import single_gpu_test 7 | from .train import init_random_seed, train_model 8 | 9 | __all__ = [ 10 | 'inference_detector', 'init_model', 'single_gpu_test', 11 | 'inference_mono_3d_detector', 'show_result_meshlab', 'convert_SyncBN', 12 | 'train_model', 'inference_multi_modality_detector', 'inference_segmentor', 13 | 'init_random_seed' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/apis/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from os import path as osp 3 | 4 | import mmcv 5 | import torch 6 | from mmcv.image import tensor2imgs 7 | 8 | from mmdet3d.models import (Base3DDetector) 9 | 10 | 11 | def single_gpu_test(model, 12 | data_loader, 13 | show=False, 14 | out_dir=None, 15 | show_score_thr=0.3): 16 | """Test model with single gpu. 17 | 18 | This method tests model with single gpu and gives the 'show' option. 19 | By setting ``show=True``, it saves the visualization results under 20 | ``out_dir``. 21 | 22 | Args: 23 | model (nn.Module): Model to be tested. 24 | data_loader (nn.Dataloader): Pytorch data loader. 25 | show (bool, optional): Whether to save viualization results. 26 | Default: True. 27 | out_dir (str, optional): The path to save visualization results. 28 | Default: None. 29 | 30 | Returns: 31 | list[dict]: The prediction results. 32 | """ 33 | model.eval() 34 | results = [] 35 | dataset = data_loader.dataset 36 | prog_bar = mmcv.ProgressBar(len(dataset)) 37 | 38 | for i, data in enumerate(data_loader): 39 | with torch.no_grad(): 40 | result = model(return_loss=False, rescale=True, **data) 41 | 42 | if show: 43 | # Visualize the results of MMDetection3D model 44 | # 'show_results' is MMdetection3D visualization API 45 | models_3d = (Base3DDetector, Base3DSegmentor, 46 | SingleStageMono3DDetector) 47 | if isinstance(model.module, models_3d): 48 | model.module.show_results( 49 | data, 50 | result, 51 | out_dir=out_dir, 52 | show=show, 53 | score_thr=show_score_thr) 54 | # Visualize the results of MMDetection model 55 | # 'show_result' is MMdetection visualization API 56 | else: 57 | batch_size = len(result) 58 | if batch_size == 1 and isinstance(data['img'][0], 59 | torch.Tensor): 60 | img_tensor = data['img'][0] 61 | else: 62 | img_tensor = data['img'][0].data[0] 63 | img_metas = data['img_metas'][0].data[0] 64 | imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) 65 | assert len(imgs) == len(img_metas) 66 | 67 | for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): 68 | h, w, _ = img_meta['img_shape'] 69 | img_show = img[:h, :w, :] 70 | 71 | ori_h, ori_w = img_meta['ori_shape'][:-1] 72 | img_show = mmcv.imresize(img_show, (ori_w, ori_h)) 73 | 74 | if out_dir: 75 | out_file = osp.join(out_dir, img_meta['ori_filename']) 76 | else: 77 | out_file = None 78 | 79 | model.module.show_result( 80 | img_show, 81 | result[i], 82 | show=show, 83 | out_file=out_file, 84 | score_thr=show_score_thr) 85 | results.extend(result) 86 | 87 | batch_size = len(result) 88 | for _ in range(batch_size): 89 | prog_bar.update() 90 | return results 91 | -------------------------------------------------------------------------------- /mmdet3d/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .anchor import * # noqa: F401, F403 3 | from .bbox import * # noqa: F401, F403 4 | from .evaluation import * # noqa: F401, F403 5 | from .hook import * # noqa: F401, F403 6 | from .points import * # noqa: F401, F403 7 | from .post_processing import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 9 | from .visualizer import * # noqa: F401, F403 10 | from .voxel import * # noqa: F401, F403 11 | -------------------------------------------------------------------------------- /mmdet3d/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.anchor import build_prior_generator 3 | from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator, 4 | AlignedAnchor3DRangeGeneratorPerCls, 5 | Anchor3DRangeGenerator) 6 | 7 | __all__ = [ 8 | 'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator', 9 | 'build_prior_generator', 'AlignedAnchor3DRangeGeneratorPerCls' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 3 | from .coders import DeltaXYZWLHRBBoxCoder 4 | # from .bbox_target import bbox_target 5 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 6 | BboxOverlapsNearest3D, 7 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 8 | bbox_overlaps_nearest_3d) 9 | from .samplers import (BaseSampler, CombinedSampler, 10 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 11 | PseudoSampler, RandomSampler, SamplingResult) 12 | from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, 13 | Coord3DMode, DepthInstance3DBoxes, 14 | LiDARInstance3DBoxes, get_box_type, limit_period, 15 | mono_cam_box2vis, points_cam2img, points_img2cam, 16 | xywhr2xyxyr) 17 | from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back 18 | 19 | __all__ = [ 20 | 'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner', 21 | 'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler', 22 | 'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult', 23 | 'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 24 | 'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d', 25 | 'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode', 26 | 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi', 27 | 'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes', 28 | 'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img', 29 | 'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis' 30 | ] 31 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner 3 | 4 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox import build_bbox_coder 3 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder 4 | from .centerpoint_bbox_coders import CenterPointBBoxCoder 5 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder 6 | from .fcos3d_bbox_coder import FCOS3DBBoxCoder 7 | from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder 8 | from .monoflex_bbox_coder import MonoFlexCoder 9 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder 10 | from .pgd_bbox_coder import PGDBBoxCoder 11 | from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder 12 | from .smoke_bbox_coder import SMOKECoder 13 | 14 | __all__ = [ 15 | 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder', 16 | 'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder', 17 | 'PointXYZWHLRBBoxCoder', 'FCOS3DBBoxCoder', 'PGDBBoxCoder', 'SMOKECoder', 18 | 'MonoFlexCoder' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch 4 | 5 | from mmdet.core.bbox.builder import BBOX_CODERS 6 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder 7 | 8 | 9 | @BBOX_CODERS.register_module() 10 | class AnchorFreeBBoxCoder(PartialBinBasedBBoxCoder): 11 | """Anchor free bbox coder for 3D boxes. 12 | 13 | Args: 14 | num_dir_bins (int): Number of bins to encode direction angle. 15 | with_rot (bool): Whether the bbox is with rotation. 16 | """ 17 | 18 | def __init__(self, num_dir_bins, with_rot=True): 19 | super(AnchorFreeBBoxCoder, self).__init__( 20 | num_dir_bins, 0, [], with_rot=with_rot) 21 | self.num_dir_bins = num_dir_bins 22 | self.with_rot = with_rot 23 | 24 | def encode(self, gt_bboxes_3d, gt_labels_3d): 25 | """Encode ground truth to prediction targets. 26 | 27 | Args: 28 | gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes 29 | with shape (n, 7). 30 | gt_labels_3d (torch.Tensor): Ground truth classes. 31 | 32 | Returns: 33 | tuple: Targets of center, size and direction. 34 | """ 35 | # generate center target 36 | center_target = gt_bboxes_3d.gravity_center 37 | 38 | # generate bbox size target 39 | size_res_target = gt_bboxes_3d.dims / 2 40 | 41 | # generate dir target 42 | box_num = gt_labels_3d.shape[0] 43 | if self.with_rot: 44 | (dir_class_target, 45 | dir_res_target) = self.angle2class(gt_bboxes_3d.yaw) 46 | dir_res_target /= (2 * np.pi / self.num_dir_bins) 47 | else: 48 | dir_class_target = gt_labels_3d.new_zeros(box_num) 49 | dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num) 50 | 51 | return (center_target, size_res_target, dir_class_target, 52 | dir_res_target) 53 | 54 | def decode(self, bbox_out): 55 | """Decode predicted parts to bbox3d. 56 | 57 | Args: 58 | bbox_out (dict): Predictions from model, should contain keys below. 59 | 60 | - center: predicted bottom center of bboxes. 61 | - dir_class: predicted bbox direction class. 62 | - dir_res: predicted bbox direction residual. 63 | - size: predicted bbox size. 64 | 65 | Returns: 66 | torch.Tensor: Decoded bbox3d with shape (batch, n, 7). 67 | """ 68 | center = bbox_out['center'] 69 | batch_size, num_proposal = center.shape[:2] 70 | 71 | # decode heading angle 72 | if self.with_rot: 73 | dir_class = torch.argmax(bbox_out['dir_class'], -1) 74 | dir_res = torch.gather(bbox_out['dir_res'], 2, 75 | dir_class.unsqueeze(-1)) 76 | dir_res.squeeze_(2) 77 | dir_angle = self.class2angle(dir_class, dir_res).reshape( 78 | batch_size, num_proposal, 1) 79 | else: 80 | dir_angle = center.new_zeros(batch_size, num_proposal, 1) 81 | 82 | # decode bbox size 83 | bbox_size = torch.clamp(bbox_out['size'] * 2, min=0.1) 84 | 85 | bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1) 86 | return bbox3d 87 | 88 | def split_pred(self, cls_preds, reg_preds, base_xyz): 89 | """Split predicted features to specific parts. 90 | 91 | Args: 92 | cls_preds (torch.Tensor): Class predicted features to split. 93 | reg_preds (torch.Tensor): Regression predicted features to split. 94 | base_xyz (torch.Tensor): Coordinates of points. 95 | 96 | Returns: 97 | dict[str, torch.Tensor]: Split results. 98 | """ 99 | results = {} 100 | results['obj_scores'] = cls_preds 101 | 102 | start, end = 0, 0 103 | reg_preds_trans = reg_preds.transpose(2, 1) 104 | 105 | # decode center 106 | end += 3 107 | # (batch_size, num_proposal, 3) 108 | results['center_offset'] = reg_preds_trans[..., start:end] 109 | results['center'] = base_xyz.detach() + reg_preds_trans[..., start:end] 110 | start = end 111 | 112 | # decode center 113 | end += 3 114 | # (batch_size, num_proposal, 3) 115 | results['size'] = reg_preds_trans[..., start:end] 116 | start = end 117 | 118 | # decode direction 119 | end += self.num_dir_bins 120 | results['dir_class'] = reg_preds_trans[..., start:end] 121 | start = end 122 | 123 | end += self.num_dir_bins 124 | dir_res_norm = reg_preds_trans[..., start:end] 125 | start = end 126 | 127 | results['dir_res_norm'] = dir_res_norm 128 | results['dir_res'] = dir_res_norm * (2 * np.pi / self.num_dir_bins) 129 | 130 | return results 131 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from mmdet.core.bbox import BaseBBoxCoder 5 | from mmdet.core.bbox.builder import BBOX_CODERS 6 | 7 | 8 | @BBOX_CODERS.register_module() 9 | class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder): 10 | """Bbox Coder for 3D boxes. 11 | 12 | Args: 13 | code_size (int): The dimension of boxes to be encoded. 14 | """ 15 | 16 | def __init__(self, code_size=7): 17 | super(DeltaXYZWLHRBBoxCoder, self).__init__() 18 | self.code_size = code_size 19 | 20 | @staticmethod 21 | def encode(src_boxes, dst_boxes): 22 | """Get box regression transformation deltas (dx, dy, dz, dx_size, 23 | dy_size, dz_size, dr, dv*) that can be used to transform the 24 | `src_boxes` into the `target_boxes`. 25 | 26 | Args: 27 | src_boxes (torch.Tensor): source boxes, e.g., object proposals. 28 | dst_boxes (torch.Tensor): target of the transformation, e.g., 29 | ground-truth boxes. 30 | 31 | Returns: 32 | torch.Tensor: Box transformation deltas. 33 | """ 34 | box_ndim = src_boxes.shape[-1] 35 | cas, cgs, cts = [], [], [] 36 | if box_ndim > 7: 37 | xa, ya, za, wa, la, ha, ra, *cas = torch.split( 38 | src_boxes, 1, dim=-1) 39 | xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split( 40 | dst_boxes, 1, dim=-1) 41 | cts = [g - a for g, a in zip(cgs, cas)] 42 | else: 43 | xa, ya, za, wa, la, ha, ra = torch.split(src_boxes, 1, dim=-1) 44 | xg, yg, zg, wg, lg, hg, rg = torch.split(dst_boxes, 1, dim=-1) 45 | za = za + ha / 2 46 | zg = zg + hg / 2 47 | diagonal = torch.sqrt(la**2 + wa**2) 48 | xt = (xg - xa) / diagonal 49 | yt = (yg - ya) / diagonal 50 | zt = (zg - za) / ha 51 | lt = torch.log(lg / la) 52 | wt = torch.log(wg / wa) 53 | ht = torch.log(hg / ha) 54 | rt = rg - ra 55 | return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1) 56 | 57 | @staticmethod 58 | def decode(anchors, deltas): 59 | """Apply transformation `deltas` (dx, dy, dz, dx_size, dy_size, 60 | dz_size, dr, dv*) to `boxes`. 61 | 62 | Args: 63 | anchors (torch.Tensor): Parameters of anchors with shape (N, 7). 64 | deltas (torch.Tensor): Encoded boxes with shape 65 | (N, 7+n) [x, y, z, x_size, y_size, z_size, r, velo*]. 66 | 67 | Returns: 68 | torch.Tensor: Decoded boxes. 69 | """ 70 | cas, cts = [], [] 71 | box_ndim = anchors.shape[-1] 72 | if box_ndim > 7: 73 | xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1) 74 | xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(deltas, 1, dim=-1) 75 | else: 76 | xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1) 77 | xt, yt, zt, wt, lt, ht, rt = torch.split(deltas, 1, dim=-1) 78 | 79 | za = za + ha / 2 80 | diagonal = torch.sqrt(la**2 + wa**2) 81 | xg = xt * diagonal + xa 82 | yg = yt * diagonal + ya 83 | zg = zt * ha + za 84 | 85 | lg = torch.exp(lt) * la 86 | wg = torch.exp(wt) * wa 87 | hg = torch.exp(ht) * ha 88 | rg = rt + ra 89 | zg = zg - hg / 2 90 | cgs = [t + a for t, a in zip(cts, cas)] 91 | return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1) 92 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 3 | BboxOverlapsNearest3D, 4 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 5 | bbox_overlaps_nearest_3d) 6 | 7 | __all__ = [ 8 | 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 9 | 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D', 10 | 'axis_aligned_bbox_overlaps_3d' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler, 3 | InstanceBalancedPosSampler, 4 | IoUBalancedNegSampler, OHEMSampler, 5 | PseudoSampler, RandomSampler, 6 | SamplingResult) 7 | from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler 8 | 9 | __all__ = [ 10 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 11 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 12 | 'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_box3d import BaseInstance3DBoxes 3 | from .box_3d_mode import Box3DMode 4 | from .cam_box3d import CameraInstance3DBoxes 5 | from .coord_3d_mode import Coord3DMode 6 | from .depth_box3d import DepthInstance3DBoxes 7 | from .lidar_box3d import LiDARInstance3DBoxes 8 | from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period, 9 | mono_cam_box2vis, points_cam2img, points_img2cam, 10 | rotation_3d_in_axis, xywhr2xyxyr) 11 | 12 | __all__ = [ 13 | 'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes', 14 | 'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr', 15 | 'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img', 16 | 'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis', 17 | 'get_proj_mat_by_coord_type' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | 5 | def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical): 6 | """Map bboxes from testing scale to original image scale. 7 | 8 | Args: 9 | bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back. 10 | scale_factor (float): Scale factor. 11 | flip_horizontal (bool): Whether to flip horizontally. 12 | flip_vertical (bool): Whether to flip vertically. 13 | 14 | Returns: 15 | :obj:`BaseInstance3DBoxes`: Boxes mapped back. 16 | """ 17 | new_bboxes = bboxes.clone() 18 | if flip_horizontal: 19 | new_bboxes.flip('horizontal') 20 | if flip_vertical: 21 | new_bboxes.flip('vertical') 22 | new_bboxes.scale(1 / scale_factor) 23 | 24 | return new_bboxes 25 | 26 | 27 | def bbox3d2roi(bbox_list): 28 | """Convert a list of bounding boxes to roi format. 29 | 30 | Args: 31 | bbox_list (list[torch.Tensor]): A list of bounding boxes 32 | corresponding to a batch of images. 33 | 34 | Returns: 35 | torch.Tensor: Region of interests in shape (n, c), where 36 | the channels are in order of [batch_ind, x, y ...]. 37 | """ 38 | rois_list = [] 39 | for img_id, bboxes in enumerate(bbox_list): 40 | if bboxes.size(0) > 0: 41 | img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) 42 | rois = torch.cat([img_inds, bboxes], dim=-1) 43 | else: 44 | rois = torch.zeros_like(bboxes) 45 | rois_list.append(rois) 46 | rois = torch.cat(rois_list, 0) 47 | return rois 48 | 49 | 50 | def bbox3d2result(bboxes, scores, labels, attrs=None): 51 | """Convert detection results to a list of numpy arrays. 52 | 53 | Args: 54 | bboxes (torch.Tensor): Bounding boxes with shape (N, 5). 55 | labels (torch.Tensor): Labels with shape (N, ). 56 | scores (torch.Tensor): Scores with shape (N, ). 57 | attrs (torch.Tensor, optional): Attributes with shape (N, ). 58 | Defaults to None. 59 | 60 | Returns: 61 | dict[str, torch.Tensor]: Bounding box results in cpu mode. 62 | 63 | - boxes_3d (torch.Tensor): 3D boxes. 64 | - scores (torch.Tensor): Prediction scores. 65 | - labels_3d (torch.Tensor): Box labels. 66 | - attrs_3d (torch.Tensor, optional): Box attributes. 67 | """ 68 | result_dict = dict( 69 | boxes_3d=bboxes.to('cpu'), 70 | scores_3d=scores.cpu(), 71 | labels_3d=labels.cpu()) 72 | 73 | if attrs is not None: 74 | result_dict['attrs_3d'] = attrs.cpu() 75 | 76 | return result_dict 77 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .indoor_eval import indoor_eval 3 | from .instance_seg_eval import instance_seg_eval 4 | from .kitti_utils import kitti_eval, kitti_eval_coco_style 5 | from .lyft_eval import lyft_eval 6 | from .seg_eval import seg_eval 7 | 8 | __all__ = [ 9 | 'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval', 10 | 'seg_eval', 'instance_seg_eval' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/kitti_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .eval import kitti_eval, kitti_eval_coco_style 3 | 4 | __all__ = ['kitti_eval', 'kitti_eval_coco_style'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/scannet_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .evaluate_semantic_instance import evaluate_matches, scannet_eval 3 | 4 | __all__ = ['scannet_eval', 'evaluate_matches'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/scannet_utils/util_3d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/util_3d.py # noqa 3 | import json 4 | 5 | import numpy as np 6 | 7 | 8 | class Instance: 9 | """Single instance for ScanNet evaluator. 10 | 11 | Args: 12 | mesh_vert_instances (np.array): Instance ids for each point. 13 | instance_id: Id of single instance. 14 | """ 15 | instance_id = 0 16 | label_id = 0 17 | vert_count = 0 18 | med_dist = -1 19 | dist_conf = 0.0 20 | 21 | def __init__(self, mesh_vert_instances, instance_id): 22 | if instance_id == -1: 23 | return 24 | self.instance_id = int(instance_id) 25 | self.label_id = int(self.get_label_id(instance_id)) 26 | self.vert_count = int( 27 | self.get_instance_verts(mesh_vert_instances, instance_id)) 28 | 29 | @staticmethod 30 | def get_label_id(instance_id): 31 | return int(instance_id // 1000) 32 | 33 | @staticmethod 34 | def get_instance_verts(mesh_vert_instances, instance_id): 35 | return (mesh_vert_instances == instance_id).sum() 36 | 37 | def to_json(self): 38 | return json.dumps( 39 | self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 40 | 41 | def to_dict(self): 42 | dict = {} 43 | dict['instance_id'] = self.instance_id 44 | dict['label_id'] = self.label_id 45 | dict['vert_count'] = self.vert_count 46 | dict['med_dist'] = self.med_dist 47 | dict['dist_conf'] = self.dist_conf 48 | return dict 49 | 50 | def from_json(self, data): 51 | self.instance_id = int(data['instance_id']) 52 | self.label_id = int(data['label_id']) 53 | self.vert_count = int(data['vert_count']) 54 | if 'med_dist' in data: 55 | self.med_dist = float(data['med_dist']) 56 | self.dist_conf = float(data['dist_conf']) 57 | 58 | def __str__(self): 59 | return '(' + str(self.instance_id) + ')' 60 | 61 | 62 | def get_instances(ids, class_ids, class_labels, id2label): 63 | """Transform gt instance mask to Instance objects. 64 | 65 | Args: 66 | ids (np.array): Instance ids for each point. 67 | class_ids: (tuple[int]): Ids of valid categories. 68 | class_labels (tuple[str]): Class names. 69 | id2label: (dict[int, str]): Mapping of valid class id to class label. 70 | 71 | Returns: 72 | dict [str, list]: Instance objects grouped by class label. 73 | """ 74 | instances = {} 75 | for label in class_labels: 76 | instances[label] = [] 77 | instance_ids = np.unique(ids) 78 | for id in instance_ids: 79 | if id == 0: 80 | continue 81 | inst = Instance(ids, id) 82 | if inst.label_id in class_ids: 83 | instances[id2label[inst.label_id]].append(inst.to_dict()) 84 | return instances 85 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/seg_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | from mmcv.utils import print_log 4 | from terminaltables import AsciiTable 5 | 6 | 7 | def fast_hist(preds, labels, num_classes): 8 | """Compute the confusion matrix for every batch. 9 | 10 | Args: 11 | preds (np.ndarray): Prediction labels of points with shape of 12 | (num_points, ). 13 | labels (np.ndarray): Ground truth labels of points with shape of 14 | (num_points, ). 15 | num_classes (int): number of classes 16 | 17 | Returns: 18 | np.ndarray: Calculated confusion matrix. 19 | """ 20 | 21 | k = (labels >= 0) & (labels < num_classes) 22 | bin_count = np.bincount( 23 | num_classes * labels[k].astype(int) + preds[k], 24 | minlength=num_classes**2) 25 | return bin_count[:num_classes**2].reshape(num_classes, num_classes) 26 | 27 | 28 | def per_class_iou(hist): 29 | """Compute the per class iou. 30 | 31 | Args: 32 | hist(np.ndarray): Overall confusion martix 33 | (num_classes, num_classes ). 34 | 35 | Returns: 36 | np.ndarray: Calculated per class iou 37 | """ 38 | 39 | return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) 40 | 41 | 42 | def get_acc(hist): 43 | """Compute the overall accuracy. 44 | 45 | Args: 46 | hist(np.ndarray): Overall confusion martix 47 | (num_classes, num_classes ). 48 | 49 | Returns: 50 | float: Calculated overall acc 51 | """ 52 | 53 | return np.diag(hist).sum() / hist.sum() 54 | 55 | 56 | def get_acc_cls(hist): 57 | """Compute the class average accuracy. 58 | 59 | Args: 60 | hist(np.ndarray): Overall confusion martix 61 | (num_classes, num_classes ). 62 | 63 | Returns: 64 | float: Calculated class average acc 65 | """ 66 | 67 | return np.nanmean(np.diag(hist) / hist.sum(axis=1)) 68 | 69 | 70 | def seg_eval(gt_labels, seg_preds, label2cat, ignore_index, logger=None): 71 | """Semantic Segmentation Evaluation. 72 | 73 | Evaluate the result of the Semantic Segmentation. 74 | 75 | Args: 76 | gt_labels (list[torch.Tensor]): Ground truth labels. 77 | seg_preds (list[torch.Tensor]): Predictions. 78 | label2cat (dict): Map from label to category name. 79 | ignore_index (int): Index that will be ignored in evaluation. 80 | logger (logging.Logger | str, optional): The way to print the mAP 81 | summary. See `mmdet.utils.print_log()` for details. Default: None. 82 | 83 | Returns: 84 | dict[str, float]: Dict of results. 85 | """ 86 | assert len(seg_preds) == len(gt_labels) 87 | num_classes = len(label2cat) 88 | 89 | hist_list = [] 90 | for i in range(len(gt_labels)): 91 | gt_seg = gt_labels[i].clone().numpy().astype(np.int) 92 | pred_seg = seg_preds[i].clone().numpy().astype(np.int) 93 | 94 | # filter out ignored points 95 | pred_seg[gt_seg == ignore_index] = -1 96 | gt_seg[gt_seg == ignore_index] = -1 97 | 98 | # calculate one instance result 99 | hist_list.append(fast_hist(pred_seg, gt_seg, num_classes)) 100 | 101 | iou = per_class_iou(sum(hist_list)) 102 | miou = np.nanmean(iou) 103 | acc = get_acc(sum(hist_list)) 104 | acc_cls = get_acc_cls(sum(hist_list)) 105 | 106 | header = ['classes'] 107 | for i in range(len(label2cat)): 108 | header.append(label2cat[i]) 109 | header.extend(['miou', 'acc', 'acc_cls']) 110 | 111 | ret_dict = dict() 112 | table_columns = [['results']] 113 | for i in range(len(label2cat)): 114 | ret_dict[label2cat[i]] = float(iou[i]) 115 | table_columns.append([f'{iou[i]:.4f}']) 116 | ret_dict['miou'] = float(miou) 117 | ret_dict['acc'] = float(acc) 118 | ret_dict['acc_cls'] = float(acc_cls) 119 | 120 | table_columns.append([f'{miou:.4f}']) 121 | table_columns.append([f'{acc:.4f}']) 122 | table_columns.append([f'{acc_cls:.4f}']) 123 | 124 | table_data = [header] 125 | table_rows = list(zip(*table_columns)) 126 | table_data += table_rows 127 | table = AsciiTable(table_data) 128 | table.inner_footing_row_border = True 129 | print_log('\n' + table.table, logger=logger) 130 | 131 | return ret_dict 132 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/waymo_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .prediction_kitti_to_waymo import KITTI2Waymo 3 | 4 | __all__ = ['KITTI2Waymo'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/hook/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .ema import MEGVIIEMAHook 3 | from .utils import is_parallel 4 | from .sequentialcontrol import SequentialControlHook 5 | from .syncbncontrol import SyncbnControlHook 6 | 7 | __all__ = ['MEGVIIEMAHook', 'is_parallel', 'SequentialControlHook', 8 | 'SyncbnControlHook'] 9 | -------------------------------------------------------------------------------- /mmdet3d/core/hook/ema.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # modified from megvii-bevdepth. 3 | import math 4 | import os 5 | from copy import deepcopy 6 | 7 | import torch 8 | from mmcv.runner import load_state_dict 9 | from mmcv.runner.dist_utils import master_only 10 | from mmcv.runner.hooks import HOOKS, Hook 11 | 12 | from mmdet3d.core.hook.utils import is_parallel 13 | 14 | __all__ = ['ModelEMA'] 15 | 16 | 17 | class ModelEMA: 18 | """Model Exponential Moving Average from https://github.com/rwightman/ 19 | pytorch-image-models Keep a moving average of everything in the model 20 | state_dict (parameters and buffers). 21 | 22 | This is intended to allow functionality like 23 | https://www.tensorflow.org/api_docs/python/tf/train/ 24 | ExponentialMovingAverage 25 | A smoothed version of the weights is necessary for some training 26 | schemes to perform well. 27 | This class is sensitive where it is initialized in the sequence 28 | of model init, GPU assignment and distributed training wrappers. 29 | """ 30 | 31 | def __init__(self, model, decay=0.9999, updates=0): 32 | """ 33 | Args: 34 | model (nn.Module): model to apply EMA. 35 | decay (float): ema decay reate. 36 | updates (int): counter of EMA updates. 37 | """ 38 | # Create EMA(FP32) 39 | self.ema_model = deepcopy(model).eval() 40 | self.ema = self.ema_model.module.module if is_parallel( 41 | self.ema_model.module) else self.ema_model.module 42 | self.updates = updates 43 | # decay exponential ramp (to help early epochs) 44 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) 45 | for p in self.ema.parameters(): 46 | p.requires_grad_(False) 47 | 48 | def update(self, trainer, model): 49 | # Update EMA parameters 50 | with torch.no_grad(): 51 | self.updates += 1 52 | d = self.decay(self.updates) 53 | 54 | msd = model.module.state_dict() if is_parallel( 55 | model) else model.state_dict() # model state_dict 56 | for k, v in self.ema.state_dict().items(): 57 | if v.dtype.is_floating_point: 58 | v *= d 59 | v += (1.0 - d) * msd[k].detach() 60 | 61 | 62 | @HOOKS.register_module() 63 | class MEGVIIEMAHook(Hook): 64 | """EMAHook used in BEVDepth. 65 | 66 | Modified from https://github.com/Megvii-Base 67 | Detection/BEVDepth/blob/main/callbacks/ema.py. 68 | """ 69 | 70 | def __init__(self, init_updates=0, decay=0.9990, resume=None): 71 | super().__init__() 72 | self.init_updates = init_updates 73 | self.resume = resume 74 | self.decay = decay 75 | 76 | def before_run(self, runner): 77 | from torch.nn.modules.batchnorm import SyncBatchNorm 78 | 79 | bn_model_list = list() 80 | bn_model_dist_group_list = list() 81 | for model_ref in runner.model.modules(): 82 | if isinstance(model_ref, SyncBatchNorm): 83 | bn_model_list.append(model_ref) 84 | bn_model_dist_group_list.append(model_ref.process_group) 85 | model_ref.process_group = None 86 | runner.ema_model = ModelEMA(runner.model, self.decay) 87 | 88 | for bn_model, dist_group in zip(bn_model_list, 89 | bn_model_dist_group_list): 90 | bn_model.process_group = dist_group 91 | runner.ema_model.updates = self.init_updates 92 | 93 | if self.resume is not None: 94 | runner.logger.info(f'resume ema checkpoint from {self.resume}') 95 | cpt = torch.load(self.resume, map_location='cpu') 96 | load_state_dict(runner.ema_model.ema, cpt['state_dict']) 97 | runner.ema_model.updates = cpt['updates'] 98 | 99 | def after_train_iter(self, runner): 100 | runner.ema_model.update(runner, runner.model.module) 101 | 102 | def after_train_epoch(self, runner): 103 | self.save_checkpoint(runner) 104 | 105 | @master_only 106 | def save_checkpoint(self, runner): 107 | state_dict = runner.ema_model.ema.state_dict() 108 | ema_checkpoint = { 109 | 'epoch': runner.epoch, 110 | 'state_dict': state_dict, 111 | 'updates': runner.ema_model.updates 112 | } 113 | save_path = f'epoch_{runner.epoch+1}_ema.pth' 114 | save_path = os.path.join(runner.work_dir, save_path) 115 | torch.save(ema_checkpoint, save_path) 116 | runner.logger.info(f'Saving ema checkpoint at {save_path}') 117 | -------------------------------------------------------------------------------- /mmdet3d/core/hook/sequentialcontrol.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.runner.hooks import HOOKS, Hook 3 | from mmdet3d.core.hook.utils import is_parallel 4 | 5 | __all__ = ['SequentialControlHook'] 6 | 7 | 8 | @HOOKS.register_module() 9 | class SequentialControlHook(Hook): 10 | """ """ 11 | 12 | def __init__(self, temporal_start_epoch=1): 13 | super().__init__() 14 | self.temporal_start_epoch=temporal_start_epoch 15 | 16 | def set_temporal_flag(self, runner, flag): 17 | if is_parallel(runner.model.module): 18 | runner.model.module.module.with_prev=flag 19 | else: 20 | runner.model.module.with_prev = flag 21 | 22 | def before_run(self, runner): 23 | self.set_temporal_flag(runner, False) 24 | 25 | def before_train_epoch(self, runner): 26 | if runner.epoch > self.temporal_start_epoch: 27 | self.set_temporal_flag(runner, True) -------------------------------------------------------------------------------- /mmdet3d/core/hook/syncbncontrol.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.runner.hooks import HOOKS, Hook 3 | from mmdet3d.core.hook.utils import is_parallel 4 | from torch.nn import SyncBatchNorm 5 | 6 | __all__ = ['SyncbnControlHook'] 7 | 8 | 9 | @HOOKS.register_module() 10 | class SyncbnControlHook(Hook): 11 | """ """ 12 | 13 | def __init__(self, syncbn_start_epoch=1): 14 | super().__init__() 15 | self.is_syncbn=False 16 | self.syncbn_start_epoch = syncbn_start_epoch 17 | 18 | def cvt_syncbn(self, runner): 19 | if is_parallel(runner.model.module): 20 | runner.model.module.module=\ 21 | SyncBatchNorm.convert_sync_batchnorm(runner.model.module.module, 22 | process_group=None) 23 | else: 24 | runner.model.module=\ 25 | SyncBatchNorm.convert_sync_batchnorm(runner.model.module, 26 | process_group=None) 27 | 28 | def before_train_epoch(self, runner): 29 | if runner.epoch>= self.syncbn_start_epoch and not self.is_syncbn: 30 | print('start use syncbn') 31 | self.cvt_syncbn(runner) 32 | self.is_syncbn=True 33 | 34 | -------------------------------------------------------------------------------- /mmdet3d/core/hook/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from torch import nn 3 | 4 | __all__ = ['is_parallel'] 5 | 6 | 7 | def is_parallel(model): 8 | """check if model is in parallel mode.""" 9 | parallel_type = ( 10 | nn.parallel.DataParallel, 11 | nn.parallel.DistributedDataParallel, 12 | ) 13 | return isinstance(model, parallel_type) 14 | -------------------------------------------------------------------------------- /mmdet3d/core/points/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | from .cam_points import CameraPoints 4 | from .depth_points import DepthPoints 5 | from .lidar_points import LiDARPoints 6 | 7 | __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints'] 8 | 9 | 10 | def get_points_type(points_type): 11 | """Get the class of points according to coordinate type. 12 | 13 | Args: 14 | points_type (str): The type of points coordinate. 15 | The valid value are "CAMERA", "LIDAR", or "DEPTH". 16 | 17 | Returns: 18 | class: Points type. 19 | """ 20 | if points_type == 'CAMERA': 21 | points_cls = CameraPoints 22 | elif points_type == 'LIDAR': 23 | points_cls = LiDARPoints 24 | elif points_type == 'DEPTH': 25 | points_cls = DepthPoints 26 | else: 27 | raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"' 28 | f' are supported, got {points_type}') 29 | 30 | return points_cls 31 | -------------------------------------------------------------------------------- /mmdet3d/core/points/cam_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class CameraPoints(BasePoints): 6 | """Points of instances in CAM coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(CameraPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 1 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 0] = -self.tensor[:, 0] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 2] = -self.tensor[:, 2] 39 | 40 | @property 41 | def bev(self): 42 | """torch.Tensor: BEV of the points in shape (N, 2).""" 43 | return self.tensor[:, [0, 2]] 44 | 45 | def convert_to(self, dst, rt_mat=None): 46 | """Convert self to ``dst`` mode. 47 | 48 | Args: 49 | dst (:obj:`CoordMode`): The target Point mode. 50 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 51 | translation matrix between different coordinates. 52 | Defaults to None. 53 | The conversion from `src` coordinates to `dst` coordinates 54 | usually comes along the change of sensors, e.g., from camera 55 | to LiDAR. This requires a transformation matrix. 56 | 57 | Returns: 58 | :obj:`BasePoints`: The converted point of the same type 59 | in the `dst` mode. 60 | """ 61 | from mmdet3d.core.bbox import Coord3DMode 62 | return Coord3DMode.convert_point( 63 | point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat) 64 | -------------------------------------------------------------------------------- /mmdet3d/core/points/depth_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class DepthPoints(BasePoints): 6 | """Points of instances in DEPTH coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(DepthPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 0] = -self.tensor[:, 0] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 1] = -self.tensor[:, 1] 39 | 40 | def convert_to(self, dst, rt_mat=None): 41 | """Convert self to ``dst`` mode. 42 | 43 | Args: 44 | dst (:obj:`CoordMode`): The target Point mode. 45 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 46 | translation matrix between different coordinates. 47 | Defaults to None. 48 | The conversion from `src` coordinates to `dst` coordinates 49 | usually comes along the change of sensors, e.g., from camera 50 | to LiDAR. This requires a transformation matrix. 51 | 52 | Returns: 53 | :obj:`BasePoints`: The converted point of the same type 54 | in the `dst` mode. 55 | """ 56 | from mmdet3d.core.bbox import Coord3DMode 57 | return Coord3DMode.convert_point( 58 | point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat) 59 | -------------------------------------------------------------------------------- /mmdet3d/core/points/lidar_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class LiDARPoints(BasePoints): 6 | """Points of instances in LIDAR coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(LiDARPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 1] = -self.tensor[:, 1] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 0] = -self.tensor[:, 0] 39 | 40 | def convert_to(self, dst, rt_mat=None): 41 | """Convert self to ``dst`` mode. 42 | 43 | Args: 44 | dst (:obj:`CoordMode`): The target Point mode. 45 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 46 | translation matrix between different coordinates. 47 | Defaults to None. 48 | The conversion from `src` coordinates to `dst` coordinates 49 | usually comes along the change of sensors, e.g., from camera 50 | to LiDAR. This requires a transformation matrix. 51 | 52 | Returns: 53 | :obj:`BasePoints`: The converted point of the same type 54 | in the `dst` mode. 55 | """ 56 | from mmdet3d.core.bbox import Coord3DMode 57 | return Coord3DMode.convert_point( 58 | point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat) 59 | -------------------------------------------------------------------------------- /mmdet3d/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores, 4 | multiclass_nms) 5 | from .box3d_nms import (aligned_3d_nms, box3d_multiclass_nms, circle_nms, 6 | nms_bev, nms_normal_bev) 7 | from .merge_augs import merge_aug_bboxes_3d 8 | 9 | __all__ = [ 10 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 11 | 'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms', 12 | 'aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms', 'nms_bev', 13 | 'nms_normal_bev' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from mmdet3d.core.post_processing import nms_bev, nms_normal_bev 5 | from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr 6 | 7 | 8 | def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg): 9 | """Merge augmented detection 3D bboxes and scores. 10 | 11 | Args: 12 | aug_results (list[dict]): The dict of detection results. 13 | The dict contains the following keys 14 | 15 | - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox. 16 | - scores_3d (torch.Tensor): Detection scores. 17 | - labels_3d (torch.Tensor): Predicted box labels. 18 | img_metas (list[dict]): Meta information of each sample. 19 | test_cfg (dict): Test config. 20 | 21 | Returns: 22 | dict: Bounding boxes results in cpu mode, containing merged results. 23 | 24 | - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox. 25 | - scores_3d (torch.Tensor): Merged detection scores. 26 | - labels_3d (torch.Tensor): Merged predicted box labels. 27 | """ 28 | 29 | assert len(aug_results) == len(img_metas), \ 30 | '"aug_results" should have the same length as "img_metas", got len(' \ 31 | f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}' 32 | 33 | recovered_bboxes = [] 34 | recovered_scores = [] 35 | recovered_labels = [] 36 | 37 | for bboxes, img_info in zip(aug_results, img_metas): 38 | scale_factor = img_info[0]['pcd_scale_factor'] 39 | pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip'] 40 | pcd_vertical_flip = img_info[0]['pcd_vertical_flip'] 41 | recovered_scores.append(bboxes['scores_3d']) 42 | recovered_labels.append(bboxes['labels_3d']) 43 | bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor, 44 | pcd_horizontal_flip, pcd_vertical_flip) 45 | recovered_bboxes.append(bboxes) 46 | 47 | aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes) 48 | aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev) 49 | aug_scores = torch.cat(recovered_scores, dim=0) 50 | aug_labels = torch.cat(recovered_labels, dim=0) 51 | 52 | # TODO: use a more elegent way to deal with nms 53 | if test_cfg.use_rotate_nms: 54 | nms_func = nms_bev 55 | else: 56 | nms_func = nms_normal_bev 57 | 58 | merged_bboxes = [] 59 | merged_scores = [] 60 | merged_labels = [] 61 | 62 | # Apply multi-class nms when merge bboxes 63 | if len(aug_labels) == 0: 64 | return bbox3d2result(aug_bboxes, aug_scores, aug_labels) 65 | 66 | for class_id in range(torch.max(aug_labels).item() + 1): 67 | class_inds = (aug_labels == class_id) 68 | bboxes_i = aug_bboxes[class_inds] 69 | bboxes_nms_i = aug_bboxes_for_nms[class_inds, :] 70 | scores_i = aug_scores[class_inds] 71 | labels_i = aug_labels[class_inds] 72 | if len(bboxes_nms_i) == 0: 73 | continue 74 | selected = nms_func(bboxes_nms_i, scores_i, test_cfg.nms_thr) 75 | 76 | merged_bboxes.append(bboxes_i[selected, :]) 77 | merged_scores.append(scores_i[selected]) 78 | merged_labels.append(labels_i[selected]) 79 | 80 | merged_bboxes = merged_bboxes[0].cat(merged_bboxes) 81 | merged_scores = torch.cat(merged_scores, dim=0) 82 | merged_labels = torch.cat(merged_labels, dim=0) 83 | 84 | _, order = merged_scores.sort(0, descending=True) 85 | num = min(test_cfg.max_num, len(aug_bboxes)) 86 | order = order[:num] 87 | 88 | merged_bboxes = merged_bboxes[order] 89 | merged_scores = merged_scores[order] 90 | merged_labels = merged_labels[order] 91 | 92 | return bbox3d2result(merged_bboxes, merged_scores, merged_labels) 93 | -------------------------------------------------------------------------------- /mmdet3d/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .array_converter import ArrayConverter, array_converter 3 | from .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d, 4 | gaussian_radius, get_ellip_gaussian_2D) 5 | 6 | __all__ = [ 7 | 'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian', 8 | 'ArrayConverter', 'array_converter', 'ellip_gaussian2D', 9 | 'get_ellip_gaussian_2D' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/core/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .show_result import (show_multi_modality_result, show_result, 3 | show_seg_result) 4 | 5 | __all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result'] 6 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_voxel_generator 3 | from .voxel_generator import VoxelGenerator 4 | 5 | __all__ = ['build_voxel_generator', 'VoxelGenerator'] 6 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | 4 | from . import voxel_generator 5 | 6 | 7 | def build_voxel_generator(cfg, **kwargs): 8 | """Builder of voxel generator.""" 9 | if isinstance(cfg, voxel_generator.VoxelGenerator): 10 | return cfg 11 | elif isinstance(cfg, dict): 12 | return mmcv.runner.obj_from_dict( 13 | cfg, voxel_generator, default_args=kwargs) 14 | else: 15 | raise TypeError('Invalid type {} for building a sampler'.format( 16 | type(cfg))) 17 | -------------------------------------------------------------------------------- /mmdet3d/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.datasets.builder import build_dataloader 3 | from .builder import DATASETS, PIPELINES, build_dataset 4 | from .custom_3d import Custom3DDataset 5 | from .nuscenes_dataset import NuScenesDataset 6 | from .nuscenes_dataset_occ import NuScenesDatasetOccpancy 7 | # yapf: disable 8 | from .pipelines import (LoadAnnotations3D, LoadPointsFromFile, PointsLidar2Ego, PointsRangeFilter) 9 | # yapf: enable 10 | from .utils import get_loading_pipeline 11 | 12 | __all__ = [ 13 | 'build_dataloader', 'DATASETS', 'build_dataset', 'NuScenesDataset', 14 | 'PointsRangeFilter', 'LoadPointsFromFile', 'LoadAnnotations3D', 'Custom3DDataset', 15 | 'PointsLidar2Ego', 'get_loading_pipeline', 'PIPELINES', 'NuScenesDatasetOccpancy' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet3d/datasets/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import platform 3 | 4 | from mmcv.utils import Registry, build_from_cfg 5 | 6 | from mmdet.datasets import DATASETS as MMDET_DATASETS 7 | from mmdet.datasets.builder import _concat_dataset 8 | 9 | if platform.system() != 'Windows': 10 | # https://github.com/pytorch/pytorch/issues/973 11 | import resource 12 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 13 | base_soft_limit = rlimit[0] 14 | hard_limit = rlimit[1] 15 | soft_limit = min(max(4096, base_soft_limit), hard_limit) 16 | resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) 17 | 18 | OBJECTSAMPLERS = Registry('Object sampler') 19 | DATASETS = Registry('dataset') 20 | PIPELINES = Registry('pipeline') 21 | 22 | 23 | def build_dataset(cfg, default_args=None): 24 | from mmdet3d.datasets.dataset_wrappers import CBGSDataset 25 | from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset, 26 | ConcatDataset, RepeatDataset) 27 | if isinstance(cfg, (list, tuple)): 28 | dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) 29 | elif cfg['type'] == 'ConcatDataset': 30 | dataset = ConcatDataset( 31 | [build_dataset(c, default_args) for c in cfg['datasets']], 32 | cfg.get('separate_eval', True)) 33 | elif cfg['type'] == 'RepeatDataset': 34 | dataset = RepeatDataset( 35 | build_dataset(cfg['dataset'], default_args), cfg['times']) 36 | elif cfg['type'] == 'ClassBalancedDataset': 37 | dataset = ClassBalancedDataset( 38 | build_dataset(cfg['dataset'], default_args), cfg['oversample_thr']) 39 | elif cfg['type'] == 'CBGSDataset': 40 | dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args)) 41 | elif isinstance(cfg.get('ann_file'), (list, tuple)): 42 | dataset = _concat_dataset(cfg, default_args) 43 | elif cfg['type'] in DATASETS._module_dict.keys(): 44 | dataset = build_from_cfg(cfg, DATASETS, default_args) 45 | else: 46 | dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args) 47 | return dataset 48 | -------------------------------------------------------------------------------- /mmdet3d/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | 4 | from .builder import DATASETS 5 | 6 | 7 | @DATASETS.register_module() 8 | class CBGSDataset(object): 9 | """A wrapper of class sampled dataset with ann_file path. Implementation of 10 | paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object 11 | Detection `_. 12 | 13 | Balance the number of scenes under different classes. 14 | 15 | Args: 16 | dataset (:obj:`CustomDataset`): The dataset to be class sampled. 17 | """ 18 | 19 | def __init__(self, dataset): 20 | self.dataset = dataset 21 | self.CLASSES = dataset.CLASSES 22 | self.cat2id = {name: i for i, name in enumerate(self.CLASSES)} 23 | self.sample_indices = self._get_sample_indices() 24 | # self.dataset.data_infos = self.data_infos 25 | if hasattr(self.dataset, 'flag'): 26 | self.flag = np.array( 27 | [self.dataset.flag[ind] for ind in self.sample_indices], 28 | dtype=np.uint8) 29 | 30 | def _get_sample_indices(self): 31 | """Load annotations from ann_file. 32 | 33 | Args: 34 | ann_file (str): Path of the annotation file. 35 | 36 | Returns: 37 | list[dict]: List of annotations after class sampling. 38 | """ 39 | class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()} 40 | for idx in range(len(self.dataset)): 41 | sample_cat_ids = self.dataset.get_cat_ids(idx) 42 | for cat_id in sample_cat_ids: 43 | class_sample_idxs[cat_id].append(idx) 44 | duplicated_samples = sum( 45 | [len(v) for _, v in class_sample_idxs.items()]) 46 | class_distribution = { 47 | k: len(v) / duplicated_samples 48 | for k, v in class_sample_idxs.items() 49 | } 50 | 51 | sample_indices = [] 52 | 53 | frac = 1.0 / len(self.CLASSES) 54 | ratios = [frac / v for v in class_distribution.values()] 55 | for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios): 56 | sample_indices += np.random.choice(cls_inds, 57 | int(len(cls_inds) * 58 | ratio)).tolist() 59 | return sample_indices 60 | 61 | def __getitem__(self, idx): 62 | """Get item from infos according to the given index. 63 | 64 | Returns: 65 | dict: Data dictionary of the corresponding index. 66 | """ 67 | ori_idx = self.sample_indices[idx] 68 | return self.dataset[ori_idx] 69 | 70 | def __len__(self): 71 | """Return the length of data infos. 72 | 73 | Returns: 74 | int: Length of data infos. 75 | """ 76 | return len(self.sample_indices) 77 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .compose import Compose 3 | from .dbsampler import DataBaseSampler 4 | from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D 5 | from .loading import (LoadAnnotations3D, LoadAnnotationsAll, 6 | LoadPointsFromFile, 7 | PointToMultiViewDepth, LoadOccGTFromFile) 8 | from .test_time_aug import MultiScaleFlipAug3D 9 | # yapf: disable 10 | from .transforms_3d import (PointsRangeFilter, PointsLidar2Ego) 11 | 12 | __all__ = [ 13 | 'PointsRangeFilter', 'Collect3D', 14 | 'Compose', 'LoadPointsFromFile', 15 | 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 16 | 'LoadAnnotations3D', 'MultiScaleFlipAug3D', 'PointsLidar2Ego', 17 | 'LoadAnnotationsAll', 'PointToMultiViewDepth', 18 | 'LoadOccGTFromFile' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/aug_2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | from PIL import Image 5 | from PIL import ImageEnhance 6 | 7 | 8 | class Grid(object): 9 | def __init__(self, d1, d2, rotate=1, ratio=0.5, mode=0, prob=1.): 10 | self.d1 = d1 11 | self.d2 = d2 12 | self.rotate = rotate 13 | self.ratio = ratio 14 | self.mode = mode 15 | self.st_prob = self.prob = prob 16 | 17 | def set_prob(self, epoch, max_epoch): 18 | self.prob = self.st_prob * min(1, epoch / max_epoch) 19 | 20 | def __call__(self, img): 21 | if np.random.rand() > self.prob: 22 | return img 23 | h = img.size[1] 24 | w = img.size[0] 25 | 26 | # 1.5 * h, 1.5 * w works fine with the squared images 27 | # But with rectangular input, the mask might not be able to recover back to the input image shape 28 | # A square mask with edge length equal to the diagnoal of the input image 29 | # will be able to cover all the image spot after the rotation. This is also the minimum square. 30 | hh = math.ceil((math.sqrt(h * h + w * w))) 31 | 32 | d = np.random.randint(self.d1, self.d2) 33 | # d = self.d 34 | 35 | # maybe use ceil? but i guess no big difference 36 | self.l = math.ceil(d * self.ratio) 37 | 38 | mask = np.ones((hh, hh), np.float32) 39 | st_h = np.random.randint(d) 40 | st_w = np.random.randint(d) 41 | for i in range(-1, hh // d + 1): 42 | s = d * i + st_h 43 | t = s + self.l 44 | s = max(min(s, hh), 0) 45 | t = max(min(t, hh), 0) 46 | mask[s:t, :] *= 0 47 | for i in range(-1, hh // d + 1): 48 | s = d * i + st_w 49 | t = s + self.l 50 | s = max(min(s, hh), 0) 51 | t = max(min(t, hh), 0) 52 | mask[:, s:t] *= 0 53 | r = np.random.randint(self.rotate) 54 | mask = Image.fromarray(np.uint8(mask)) 55 | mask = mask.rotate(r) 56 | mask = np.asarray(mask) 57 | mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (hh - w) // 2:(hh - w) // 2 + w] 58 | 59 | # mask = torch.from_numpy(mask).float() 60 | mask = mask.reshape(mask.shape[0], mask.shape[1], 1) 61 | if self.mode == 1: 62 | mask = 1 - mask 63 | img = img * mask 64 | 65 | return img 66 | 67 | 68 | def get_grid_mask_img(img, d1=2, d2=100, rotate=(1, 120), ratio=(0.05, 0.25)): 69 | if type(rotate) == tuple or type(rotate) == list: 70 | rotate = np.random.randint(rotate[0], rotate[1]) 71 | if type(ratio) == tuple or type(ratio) == list: 72 | ratio = np.random.uniform(ratio[0], ratio[1]) 73 | gd = Grid(d1, d2, rotate=rotate, ratio=ratio) 74 | return Image.fromarray(gd(img)) 75 | 76 | 77 | def get_darker_img(img, ratio=(0.5, 1.5)): 78 | brighter = ImageEnhance.Brightness(img) 79 | if type(ratio) == tuple or type(ratio) == list: 80 | ratio = np.random.uniform(ratio[0], ratio[1]) 81 | return brighter.enhance(ratio) 82 | 83 | 84 | def get_contrast_img(img, ratio=(0.7, 1.3)): 85 | contraster = ImageEnhance.Contrast(img) 86 | if type(ratio) == tuple or type(ratio) == list: 87 | ratio = np.random.uniform(ratio[0], ratio[1]) 88 | return contraster.enhance(ratio) 89 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import collections 3 | 4 | from mmcv.utils import build_from_cfg 5 | 6 | from mmdet.datasets.builder import PIPELINES as MMDET_PIPELINES 7 | from ..builder import PIPELINES 8 | 9 | 10 | @PIPELINES.register_module() 11 | class Compose: 12 | """Compose multiple transforms sequentially. The pipeline registry of 13 | mmdet3d separates with mmdet, however, sometimes we may need to use mmdet's 14 | pipeline. So the class is rewritten to be able to use pipelines from both 15 | mmdet3d and mmdet. 16 | 17 | Args: 18 | transforms (Sequence[dict | callable]): Sequence of transform object or 19 | config dict to be composed. 20 | """ 21 | 22 | def __init__(self, transforms): 23 | assert isinstance(transforms, collections.abc.Sequence) 24 | self.transforms = [] 25 | for transform in transforms: 26 | if isinstance(transform, dict): 27 | _, key = PIPELINES.split_scope_key(transform['type']) 28 | if key in PIPELINES._module_dict.keys(): 29 | transform = build_from_cfg(transform, PIPELINES) 30 | else: 31 | transform = build_from_cfg(transform, MMDET_PIPELINES) 32 | self.transforms.append(transform) 33 | elif callable(transform): 34 | self.transforms.append(transform) 35 | else: 36 | raise TypeError('transform must be callable or a dict') 37 | 38 | def __call__(self, data): 39 | """Call function to apply transforms sequentially. 40 | 41 | Args: 42 | data (dict): A result dict contains the data to transform. 43 | 44 | Returns: 45 | dict: Transformed data. 46 | """ 47 | 48 | for t in self.transforms: 49 | data = t(data) 50 | if data is None: 51 | return None 52 | return data 53 | 54 | def __repr__(self): 55 | format_string = self.__class__.__name__ + '(' 56 | for t in self.transforms: 57 | format_string += '\n' 58 | format_string += f' {t}' 59 | format_string += '\n)' 60 | return format_string 61 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/transforms_3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from pyquaternion import Quaternion 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class PointsLidar2Ego(object): 10 | def __call__(self, input_dict): 11 | points = input_dict['points'] 12 | lidar2ego_rots = torch.tensor(Quaternion(input_dict['curr']['lidar2ego_rotation']).rotation_matrix).float() 13 | lidar2ego_trans = torch.tensor(input_dict['curr']['lidar2ego_translation']).float() 14 | points.tensor[:, :3] = ( 15 | points.tensor[:, :3] @ lidar2ego_rots.T 16 | ) 17 | points.tensor[:, :3] += lidar2ego_trans 18 | input_dict['points'] = points 19 | return input_dict 20 | 21 | 22 | @PIPELINES.register_module() 23 | class PointsRangeFilter(object): 24 | """Filter points by the range. 25 | 26 | Args: 27 | point_cloud_range (list[float]): Point cloud range. 28 | """ 29 | 30 | def __init__(self, point_cloud_range): 31 | self.pcd_range = np.array(point_cloud_range, dtype=np.float32) 32 | 33 | def __call__(self, input_dict): 34 | """Call function to filter points by the range. 35 | 36 | Args: 37 | input_dict (dict): Result dict from loading pipeline. 38 | 39 | Returns: 40 | dict: Results after filtering, 'points', 'pts_instance_mask' 41 | and 'pts_semantic_mask' keys are updated in the result dict. 42 | """ 43 | points = input_dict['points'] 44 | eps = 0.001 45 | self.pcd_range = [ 46 | self.pcd_range[0] + eps, self.pcd_range[1] + eps, self.pcd_range[2] + eps, 47 | self.pcd_range[3] - eps, self.pcd_range[4] - eps, self.pcd_range[5] - eps 48 | ] 49 | 50 | points_mask = points.in_range_3d(self.pcd_range) 51 | clean_points = points[points_mask] 52 | input_dict['points'] = clean_points 53 | points_mask = points_mask.numpy() 54 | 55 | pts_instance_mask = input_dict.get('pts_instance_mask', None) 56 | pts_semantic_mask = input_dict.get('pts_semantic_mask', None) 57 | 58 | if pts_instance_mask is not None: 59 | input_dict['pts_instance_mask'] = pts_instance_mask[points_mask] 60 | 61 | if pts_semantic_mask is not None: 62 | input_dict['pts_semantic_mask'] = pts_semantic_mask[points_mask] 63 | 64 | return input_dict 65 | 66 | def __repr__(self): 67 | """str: Return a string that describes the module.""" 68 | repr_str = self.__class__.__name__ 69 | repr_str += f'(point_cloud_range={self.pcd_range.tolist()})' 70 | return repr_str 71 | -------------------------------------------------------------------------------- /mmdet3d/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # noqa: F401,F403 3 | from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES, 4 | MIDDLE_ENCODERS, NECKS, ROI_EXTRACTORS, SEGMENTORS, 5 | SHARED_HEADS, VOXEL_ENCODERS, build_backbone, 6 | build_detector, build_fusion_layer, build_head, 7 | build_loss, build_middle_encoder, build_model, 8 | build_neck, build_roi_extractor, build_shared_head, 9 | build_voxel_encoder) 10 | from .detectors import * # noqa: F401,F403 11 | from .necks import * # noqa: F401,F403 12 | 13 | __all__ = [ 14 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 15 | 'DETECTORS', 'SEGMENTORS', 'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 16 | 'FUSION_LAYERS', 'build_backbone', 'build_neck', 'build_roi_extractor', 17 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector', 18 | 'build_fusion_layer', 'build_model', 'build_middle_encoder', 19 | 'build_voxel_encoder' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.backbones import ResNet 3 | from .resnet import CustomResNet, CustomResNet3D 4 | from .swin import SwinTransformer 5 | 6 | 7 | __all__ = [ 8 | 'ResNet', 'CustomResNet', 'CustomResNet3D', 'SwinTransformer', 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet3d/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.cnn import MODELS as MMCV_MODELS 5 | from mmcv.utils import Registry 6 | 7 | from mmdet.models.builder import BACKBONES as MMDET_BACKBONES 8 | from mmdet.models.builder import DETECTORS as MMDET_DETECTORS 9 | from mmdet.models.builder import HEADS as MMDET_HEADS 10 | from mmdet.models.builder import LOSSES as MMDET_LOSSES 11 | from mmdet.models.builder import NECKS as MMDET_NECKS 12 | from mmdet.models.builder import ROI_EXTRACTORS as MMDET_ROI_EXTRACTORS 13 | from mmdet.models.builder import SHARED_HEADS as MMDET_SHARED_HEADS 14 | from mmseg.models.builder import LOSSES as MMSEG_LOSSES 15 | 16 | MODELS = Registry('models', parent=MMCV_MODELS) 17 | 18 | BACKBONES = MODELS 19 | NECKS = MODELS 20 | ROI_EXTRACTORS = MODELS 21 | SHARED_HEADS = MODELS 22 | HEADS = MODELS 23 | LOSSES = MODELS 24 | DETECTORS = MODELS 25 | VOXEL_ENCODERS = MODELS 26 | MIDDLE_ENCODERS = MODELS 27 | FUSION_LAYERS = MODELS 28 | SEGMENTORS = MODELS 29 | 30 | 31 | def build_backbone(cfg): 32 | """Build backbone.""" 33 | if cfg['type'] in BACKBONES._module_dict.keys(): 34 | return BACKBONES.build(cfg) 35 | else: 36 | return MMDET_BACKBONES.build(cfg) 37 | 38 | 39 | def build_neck(cfg): 40 | """Build neck.""" 41 | if cfg['type'] in NECKS._module_dict.keys(): 42 | return NECKS.build(cfg) 43 | else: 44 | return MMDET_NECKS.build(cfg) 45 | 46 | 47 | def build_roi_extractor(cfg): 48 | """Build RoI feature extractor.""" 49 | if cfg['type'] in ROI_EXTRACTORS._module_dict.keys(): 50 | return ROI_EXTRACTORS.build(cfg) 51 | else: 52 | return MMDET_ROI_EXTRACTORS.build(cfg) 53 | 54 | 55 | def build_shared_head(cfg): 56 | """Build shared head of detector.""" 57 | if cfg['type'] in SHARED_HEADS._module_dict.keys(): 58 | return SHARED_HEADS.build(cfg) 59 | else: 60 | return MMDET_SHARED_HEADS.build(cfg) 61 | 62 | 63 | def build_head(cfg): 64 | """Build head.""" 65 | if cfg['type'] in HEADS._module_dict.keys(): 66 | return HEADS.build(cfg) 67 | else: 68 | return MMDET_HEADS.build(cfg) 69 | 70 | 71 | def build_loss(cfg): 72 | """Build loss function.""" 73 | if cfg['type'] in LOSSES._module_dict.keys(): 74 | return LOSSES.build(cfg) 75 | elif cfg['type'] in MMDET_LOSSES._module_dict.keys(): 76 | return MMDET_LOSSES.build(cfg) 77 | else: 78 | return MMSEG_LOSSES.build(cfg) 79 | 80 | 81 | def build_detector(cfg, train_cfg=None, test_cfg=None): 82 | """Build detector.""" 83 | if train_cfg is not None or test_cfg is not None: 84 | warnings.warn( 85 | 'train_cfg and test_cfg is deprecated, ' 86 | 'please specify them in model', UserWarning) 87 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 88 | 'train_cfg specified in both outer field and model field ' 89 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 90 | 'test_cfg specified in both outer field and model field ' 91 | if cfg['type'] in DETECTORS._module_dict.keys(): 92 | return DETECTORS.build( 93 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 94 | else: 95 | return MMDET_DETECTORS.build( 96 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 97 | 98 | 99 | def build_segmentor(cfg, train_cfg=None, test_cfg=None): 100 | """Build segmentor.""" 101 | if train_cfg is not None or test_cfg is not None: 102 | warnings.warn( 103 | 'train_cfg and test_cfg is deprecated, ' 104 | 'please specify them in model', UserWarning) 105 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 106 | 'train_cfg specified in both outer field and model field ' 107 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 108 | 'test_cfg specified in both outer field and model field ' 109 | return SEGMENTORS.build( 110 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 111 | 112 | 113 | def build_model(cfg, train_cfg=None, test_cfg=None): 114 | """A function warpper for building 3D detector or segmentor according to 115 | cfg. 116 | 117 | Should be deprecated in the future. 118 | """ 119 | if cfg.type in ['EncoderDecoder3D']: 120 | return build_segmentor(cfg, train_cfg=train_cfg, test_cfg=test_cfg) 121 | else: 122 | return build_detector(cfg, train_cfg=train_cfg, test_cfg=test_cfg) 123 | 124 | 125 | def build_voxel_encoder(cfg): 126 | """Build voxel encoder.""" 127 | return VOXEL_ENCODERS.build(cfg) 128 | 129 | 130 | def build_middle_encoder(cfg): 131 | """Build middle level encoder.""" 132 | return MIDDLE_ENCODERS.build(cfg) 133 | 134 | 135 | def build_fusion_layer(cfg): 136 | """Build fusion layer.""" 137 | return FUSION_LAYERS.build(cfg) 138 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import Base3DDetector 3 | from .bevdet import BEVDepth4D, BEVDet, BEVDet4D, BEVDetTRT 4 | from .fusion_occ import FusionOCC, FusionDepthSeg 5 | from .centerpoint import CenterPoint 6 | from .mvx_two_stage import MVXTwoStageDetector 7 | 8 | 9 | __all__ = [ 10 | 'Base3DDetector', 'MVXTwoStageDetector', 11 | 'CenterPoint', 'BEVDet', 'BEVDet4D', 'BEVDepth4D', 12 | 'BEVDetTRT', 'FusionDepthSeg', 'FusionOCC' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet3d/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.necks.fpn import FPN 3 | from .lss_fpn import FPN_LSS 4 | from .view_transformer import LSSViewTransformer, LSSViewTransformerBEVDepth, \ 5 | LSSViewTransformerBEVStereo 6 | from .fusion_view_transformer import CrossModalFusion, CrossModalLSS 7 | 8 | __all__ = [ 9 | 'FPN', 'LSSViewTransformer', 'FPN_LSS', 'LSSViewTransformerBEVDepth', 10 | 'LSSViewTransformerBEVStereo', 11 | 'CrossModalFusion', 'CrossModalLSS' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet3d/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version, 3 | get_compiling_cuda_version, nms, roi_align, 4 | sigmoid_focal_loss) 5 | from mmcv.ops.assign_score_withk import assign_score_withk 6 | from mmcv.ops.ball_query import ball_query 7 | from mmcv.ops.furthest_point_sample import (furthest_point_sample, 8 | furthest_point_sample_with_dist) 9 | from mmcv.ops.gather_points import gather_points 10 | from mmcv.ops.group_points import GroupAll, QueryAndGroup, grouping_operation 11 | from mmcv.ops.knn import knn 12 | from mmcv.ops.points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, 13 | points_in_boxes_part) 14 | from mmcv.ops.points_sampler import PointsSampler as Points_Sampler 15 | from mmcv.ops.roiaware_pool3d import RoIAwarePool3d 16 | from mmcv.ops.roipoint_pool3d import RoIPointPool3d 17 | from mmcv.ops.scatter_points import DynamicScatter, dynamic_scatter 18 | from mmcv.ops.three_interpolate import three_interpolate 19 | from mmcv.ops.three_nn import three_nn 20 | from mmcv.ops.voxelize import Voxelization, voxelization 21 | 22 | from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule 23 | from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d 24 | from .paconv import PAConv, PAConvCUDA 25 | from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, 26 | PAConvSAModule, PAConvSAModuleMSG, 27 | PointFPModule, PointSAModule, PointSAModuleMSG, 28 | build_sa_module) 29 | from .sparse_block import (SparseBasicBlock, SparseBottleneck, 30 | make_sparse_convmodule) 31 | 32 | __all__ = [ 33 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version', 34 | 'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d', 35 | 'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization', 36 | 'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss', 37 | 'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck', 38 | 'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu', 39 | 'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample', 40 | 'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn', 41 | 'gather_points', 'grouping_operation', 'GroupAll', 'QueryAndGroup', 42 | 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', 'DGCNNFPModule', 43 | 'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all', 44 | 'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version', 45 | 'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA', 46 | 'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule', 47 | 'PAConvCUDASAModuleMSG', 'RoIPointPool3d' 48 | ] 49 | -------------------------------------------------------------------------------- /mmdet3d/ops/bev_pool_v2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Phigent Robotics. All rights reserved. 2 | -------------------------------------------------------------------------------- /mmdet3d/ops/bev_pool_v2/src/bev_pool.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Phigent Robotics. All rights reserved. 2 | // Reference https://arxiv.org/abs/2211.17111 3 | #include 4 | #include 5 | 6 | // CUDA function declarations 7 | void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, 8 | const int* ranks_depth, const int* ranks_feat, const int* ranks_bev, 9 | const int* interval_starts, const int* interval_lengths, float* out); 10 | 11 | void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad, 12 | const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat, 13 | const int* ranks_bev, const int* interval_starts, const int* interval_lengths, 14 | float* depth_grad, float* feat_grad); 15 | 16 | 17 | /* 18 | Function: pillar pooling (forward, cuda) 19 | Args: 20 | depth : input depth, FloatTensor[n, d, h, w] 21 | feat : input features, FloatTensor[n, h, w, c] 22 | out : output features, FloatTensor[b, c, h_out, w_out] 23 | ranks_depth : depth index of points, IntTensor[n_points] 24 | ranks_feat : feat index of points, IntTensor[n_points] 25 | ranks_bev : output index of points, IntTensor[n_points] 26 | interval_lengths : starting position for pooled point, IntTensor[n_intervals] 27 | interval_starts : how many points in each pooled point, IntTensor[n_intervals] 28 | Return: 29 | */ 30 | void bev_pool_v2_forward( 31 | const at::Tensor _depth, 32 | const at::Tensor _feat, 33 | at::Tensor _out, 34 | const at::Tensor _ranks_depth, 35 | const at::Tensor _ranks_feat, 36 | const at::Tensor _ranks_bev, 37 | const at::Tensor _interval_lengths, 38 | const at::Tensor _interval_starts 39 | ) { 40 | int c = _feat.size(4); 41 | int n_intervals = _interval_lengths.size(0); 42 | const at::cuda::OptionalCUDAGuard device_guard(device_of(_depth)); 43 | const float* depth = _depth.data_ptr(); 44 | const float* feat = _feat.data_ptr(); 45 | const int* ranks_depth = _ranks_depth.data_ptr(); 46 | const int* ranks_feat = _ranks_feat.data_ptr(); 47 | const int* ranks_bev = _ranks_bev.data_ptr(); 48 | 49 | const int* interval_lengths = _interval_lengths.data_ptr(); 50 | const int* interval_starts = _interval_starts.data_ptr(); 51 | 52 | float* out = _out.data_ptr(); 53 | bev_pool_v2( 54 | c, n_intervals, depth, feat, ranks_depth, ranks_feat, 55 | ranks_bev, interval_starts, interval_lengths, out 56 | ); 57 | } 58 | 59 | 60 | /* 61 | Function: pillar pooling (backward, cuda) 62 | Args: 63 | out_grad : grad of output bev feature, FloatTensor[b, c, h_out, w_out] 64 | depth_grad : grad of input depth, FloatTensor[n, d, h, w] 65 | feat_grad : grad of input feature, FloatTensor[n, h, w, c] 66 | depth : input depth, FloatTensor[n, d, h, w] 67 | feat : input features, FloatTensor[n, h, w, c] 68 | ranks_depth : depth index of points, IntTensor[n_points] 69 | ranks_feat : feat index of points, IntTensor[n_points] 70 | ranks_bev : output index of points, IntTensor[n_points] 71 | interval_lengths : starting position for pooled point, IntTensor[n_intervals] 72 | interval_starts : how many points in each pooled point, IntTensor[n_intervals] 73 | */ 74 | void bev_pool_v2_backward( 75 | const at::Tensor _out_grad, 76 | at::Tensor _depth_grad, 77 | at::Tensor _feat_grad, 78 | const at::Tensor _depth, 79 | const at::Tensor _feat, 80 | const at::Tensor _ranks_depth, 81 | const at::Tensor _ranks_feat, 82 | const at::Tensor _ranks_bev, 83 | const at::Tensor _interval_lengths, 84 | const at::Tensor _interval_starts 85 | ) { 86 | int c = _out_grad.size(4); 87 | int n_intervals = _interval_lengths.size(0); 88 | const at::cuda::OptionalCUDAGuard device_guard(device_of(_out_grad)); 89 | const float* out_grad = _out_grad.data_ptr(); 90 | float* depth_grad = _depth_grad.data_ptr(); 91 | float* feat_grad = _feat_grad.data_ptr(); 92 | const float* depth = _depth.data_ptr(); 93 | const float* feat = _feat.data_ptr(); 94 | const int* ranks_depth = _ranks_depth.data_ptr(); 95 | const int* ranks_feat = _ranks_feat.data_ptr(); 96 | const int* ranks_bev = _ranks_bev.data_ptr(); 97 | const int* interval_lengths = _interval_lengths.data_ptr(); 98 | const int* interval_starts = _interval_starts.data_ptr(); 99 | 100 | bev_pool_v2_grad( 101 | c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat, 102 | ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad 103 | ); 104 | } 105 | 106 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 107 | m.def("bev_pool_v2_forward", &bev_pool_v2_forward, 108 | "bev_pool_v2_forward"); 109 | m.def("bev_pool_v2_backward", &bev_pool_v2_backward, 110 | "bev_pool_v2_backward"); 111 | } 112 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dgcnn_fa_module import DGCNNFAModule 3 | from .dgcnn_fp_module import DGCNNFPModule 4 | from .dgcnn_gf_module import DGCNNGFModule 5 | 6 | __all__ = ['DGCNNFAModule', 'DGCNNFPModule', 'DGCNNGFModule'] 7 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/dgcnn_fa_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.cnn import ConvModule 4 | from mmcv.runner import BaseModule, force_fp32 5 | from torch import nn as nn 6 | 7 | 8 | class DGCNNFAModule(BaseModule): 9 | """Point feature aggregation module used in DGCNN. 10 | 11 | Aggregate all the features of points. 12 | 13 | Args: 14 | mlp_channels (list[int]): List of mlp channels. 15 | norm_cfg (dict, optional): Type of normalization method. 16 | Defaults to dict(type='BN1d'). 17 | act_cfg (dict, optional): Type of activation method. 18 | Defaults to dict(type='ReLU'). 19 | init_cfg (dict, optional): Initialization config. Defaults to None. 20 | """ 21 | 22 | def __init__(self, 23 | mlp_channels, 24 | norm_cfg=dict(type='BN1d'), 25 | act_cfg=dict(type='ReLU'), 26 | init_cfg=None): 27 | super().__init__(init_cfg=init_cfg) 28 | self.fp16_enabled = False 29 | self.mlps = nn.Sequential() 30 | for i in range(len(mlp_channels) - 1): 31 | self.mlps.add_module( 32 | f'layer{i}', 33 | ConvModule( 34 | mlp_channels[i], 35 | mlp_channels[i + 1], 36 | kernel_size=(1, ), 37 | stride=(1, ), 38 | conv_cfg=dict(type='Conv1d'), 39 | norm_cfg=norm_cfg, 40 | act_cfg=act_cfg)) 41 | 42 | @force_fp32() 43 | def forward(self, points): 44 | """forward. 45 | 46 | Args: 47 | points (List[Tensor]): tensor of the features to be aggregated. 48 | 49 | Returns: 50 | Tensor: (B, N, M) M = mlp[-1], tensor of the output points. 51 | """ 52 | 53 | if len(points) > 1: 54 | new_points = torch.cat(points[1:], dim=-1) 55 | new_points = new_points.transpose(1, 2).contiguous() # (B, C, N) 56 | new_points_copy = new_points 57 | 58 | new_points = self.mlps(new_points) 59 | 60 | new_fa_points = new_points.max(dim=-1, keepdim=True)[0] 61 | new_fa_points = new_fa_points.repeat(1, 1, new_points.shape[-1]) 62 | 63 | new_points = torch.cat([new_fa_points, new_points_copy], dim=1) 64 | new_points = new_points.transpose(1, 2).contiguous() 65 | else: 66 | new_points = points 67 | 68 | return new_points 69 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/dgcnn_fp_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import ConvModule 3 | from mmcv.runner import BaseModule, force_fp32 4 | from torch import nn as nn 5 | 6 | 7 | class DGCNNFPModule(BaseModule): 8 | """Point feature propagation module used in DGCNN. 9 | 10 | Propagate the features from one set to another. 11 | 12 | Args: 13 | mlp_channels (list[int]): List of mlp channels. 14 | norm_cfg (dict, optional): Type of activation method. 15 | Defaults to dict(type='BN1d'). 16 | act_cfg (dict, optional): Type of activation method. 17 | Defaults to dict(type='ReLU'). 18 | init_cfg (dict, optional): Initialization config. Defaults to None. 19 | """ 20 | 21 | def __init__(self, 22 | mlp_channels, 23 | norm_cfg=dict(type='BN1d'), 24 | act_cfg=dict(type='ReLU'), 25 | init_cfg=None): 26 | super().__init__(init_cfg=init_cfg) 27 | self.fp16_enabled = False 28 | self.mlps = nn.Sequential() 29 | for i in range(len(mlp_channels) - 1): 30 | self.mlps.add_module( 31 | f'layer{i}', 32 | ConvModule( 33 | mlp_channels[i], 34 | mlp_channels[i + 1], 35 | kernel_size=(1, ), 36 | stride=(1, ), 37 | conv_cfg=dict(type='Conv1d'), 38 | norm_cfg=norm_cfg, 39 | act_cfg=act_cfg)) 40 | 41 | @force_fp32() 42 | def forward(self, points): 43 | """forward. 44 | 45 | Args: 46 | points (Tensor): (B, N, C) tensor of the input points. 47 | 48 | Returns: 49 | Tensor: (B, N, M) M = mlp[-1], tensor of the new points. 50 | """ 51 | 52 | if points is not None: 53 | new_points = points.transpose(1, 2).contiguous() # (B, C, N) 54 | new_points = self.mlps(new_points) 55 | new_points = new_points.transpose(1, 2).contiguous() 56 | else: 57 | new_points = points 58 | 59 | return new_points 60 | -------------------------------------------------------------------------------- /mmdet3d/ops/paconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .paconv import PAConv, PAConvCUDA 3 | 4 | __all__ = ['PAConv', 'PAConvCUDA'] 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/paconv/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | 5 | def calc_euclidian_dist(xyz1, xyz2): 6 | """Calculate the Euclidean distance between two sets of points. 7 | 8 | Args: 9 | xyz1 (torch.Tensor): (N, 3), the first set of points. 10 | xyz2 (torch.Tensor): (N, 3), the second set of points. 11 | 12 | Returns: 13 | torch.Tensor: (N, ), the Euclidean distance between each point pair. 14 | """ 15 | assert xyz1.shape[0] == xyz2.shape[0], 'number of points are not the same' 16 | assert xyz1.shape[1] == xyz2.shape[1] == 3, \ 17 | 'points coordinates dimension is not 3' 18 | return torch.norm(xyz1 - xyz2, dim=-1) 19 | 20 | 21 | def assign_score(scores, point_features): 22 | """Perform weighted sum to aggregate output features according to scores. 23 | This function is used in non-CUDA version of PAConv. 24 | 25 | Compared to the cuda op assigh_score_withk, this pytorch implementation 26 | pre-computes output features for the neighbors of all centers, and then 27 | performs aggregation. It consumes more GPU memories. 28 | 29 | Args: 30 | scores (torch.Tensor): (B, npoint, K, M), predicted scores to 31 | aggregate weight matrices in the weight bank. 32 | `npoint` is the number of sampled centers. 33 | `K` is the number of queried neighbors. 34 | `M` is the number of weight matrices in the weight bank. 35 | point_features (torch.Tensor): (B, npoint, K, M, out_dim) 36 | Pre-computed point features to be aggregated. 37 | 38 | Returns: 39 | torch.Tensor: (B, npoint, K, out_dim), the aggregated features. 40 | """ 41 | B, npoint, K, M = scores.size() 42 | scores = scores.view(B, npoint, K, 1, M) 43 | output = torch.matmul(scores, point_features).view(B, npoint, K, -1) 44 | return output 45 | 46 | 47 | def assign_kernel_withoutk(features, kernels, M): 48 | """Pre-compute features with weight matrices in weight bank. This function 49 | is used before cuda op assign_score_withk in CUDA version PAConv. 50 | 51 | Args: 52 | features (torch.Tensor): (B, in_dim, N), input features of all points. 53 | `N` is the number of points in current point cloud. 54 | kernels (torch.Tensor): (2 * in_dim, M * out_dim), weight matrices in 55 | the weight bank, transformed from (M, 2 * in_dim, out_dim). 56 | `2 * in_dim` is because the input features are concatenation of 57 | (point_features - center_features, point_features). 58 | M (int): Number of weight matrices in the weight bank. 59 | 60 | Returns: 61 | Tuple[torch.Tensor]: both of shape (B, N, M, out_dim): 62 | 63 | - point_features: Pre-computed features for points. 64 | - center_features: Pre-computed features for centers. 65 | """ 66 | B, in_dim, N = features.size() 67 | feat_trans = features.permute(0, 2, 1) # [B, N, in_dim] 68 | out_feat_half1 = torch.matmul(feat_trans, kernels[:in_dim]).view( 69 | B, N, M, -1) # [B, N, M, out_dim] 70 | out_feat_half2 = torch.matmul(feat_trans, kernels[in_dim:]).view( 71 | B, N, M, -1) # [B, N, M, out_dim] 72 | 73 | # TODO: why this hard-coded if condition? 74 | # when the network input is only xyz without additional features 75 | # xyz will be used as features, so that features.size(1) == 3 % 2 != 0 76 | # we need to compensate center_features because otherwise 77 | # `point_features - center_features` will result in all zeros? 78 | if features.size(1) % 2 != 0: 79 | out_feat_half_coord = torch.matmul( 80 | feat_trans[:, :, :3], # [B, N, 3] 81 | kernels[in_dim:in_dim + 3]).view(B, N, M, -1) # [B, N, M, out_dim] 82 | else: 83 | out_feat_half_coord = torch.zeros_like(out_feat_half2) 84 | 85 | point_features = out_feat_half1 + out_feat_half2 86 | center_features = out_feat_half1 + out_feat_half_coord 87 | return point_features, center_features 88 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_sa_module 3 | from .paconv_sa_module import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, 4 | PAConvSAModule, PAConvSAModuleMSG) 5 | from .point_fp_module import PointFPModule 6 | from .point_sa_module import PointSAModule, PointSAModuleMSG 7 | 8 | __all__ = [ 9 | 'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule', 10 | 'PAConvSAModule', 'PAConvSAModuleMSG', 'PAConvCUDASAModule', 11 | 'PAConvCUDASAModuleMSG' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry 3 | 4 | SA_MODULES = Registry('point_sa_module') 5 | 6 | 7 | def build_sa_module(cfg, *args, **kwargs): 8 | """Build PointNet2 set abstraction (SA) module. 9 | 10 | Args: 11 | cfg (None or dict): The SA module config, which should contain: 12 | - type (str): Module type. 13 | - module args: Args needed to instantiate an SA module. 14 | args (argument list): Arguments passed to the `__init__` 15 | method of the corresponding module. 16 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 17 | method of the corresponding SA module . 18 | 19 | Returns: 20 | nn.Module: Created SA module. 21 | """ 22 | if cfg is None: 23 | cfg_ = dict(type='PointSAModule') 24 | else: 25 | if not isinstance(cfg, dict): 26 | raise TypeError('cfg must be a dict') 27 | if 'type' not in cfg: 28 | raise KeyError('the cfg dict must contain the key "type"') 29 | cfg_ = cfg.copy() 30 | 31 | module_type = cfg_.pop('type') 32 | if module_type not in SA_MODULES: 33 | raise KeyError(f'Unrecognized module type {module_type}') 34 | else: 35 | sa_module = SA_MODULES.get(module_type) 36 | 37 | module = sa_module(*args, **kwargs, **cfg_) 38 | 39 | return module 40 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/point_fp_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import List 3 | 4 | import torch 5 | from mmcv.cnn import ConvModule 6 | from mmcv.ops import three_interpolate, three_nn 7 | from mmcv.runner import BaseModule, force_fp32 8 | from torch import nn as nn 9 | 10 | 11 | class PointFPModule(BaseModule): 12 | """Point feature propagation module used in PointNets. 13 | 14 | Propagate the features from one set to another. 15 | 16 | Args: 17 | mlp_channels (list[int]): List of mlp channels. 18 | norm_cfg (dict, optional): Type of normalization method. 19 | Default: dict(type='BN2d'). 20 | """ 21 | 22 | def __init__(self, 23 | mlp_channels: List[int], 24 | norm_cfg: dict = dict(type='BN2d'), 25 | init_cfg=None): 26 | super().__init__(init_cfg=init_cfg) 27 | self.fp16_enabled = False 28 | self.mlps = nn.Sequential() 29 | for i in range(len(mlp_channels) - 1): 30 | self.mlps.add_module( 31 | f'layer{i}', 32 | ConvModule( 33 | mlp_channels[i], 34 | mlp_channels[i + 1], 35 | kernel_size=(1, 1), 36 | stride=(1, 1), 37 | conv_cfg=dict(type='Conv2d'), 38 | norm_cfg=norm_cfg)) 39 | 40 | @force_fp32() 41 | def forward(self, target: torch.Tensor, source: torch.Tensor, 42 | target_feats: torch.Tensor, 43 | source_feats: torch.Tensor) -> torch.Tensor: 44 | """forward. 45 | 46 | Args: 47 | target (Tensor): (B, n, 3) tensor of the xyz positions of 48 | the target features. 49 | source (Tensor): (B, m, 3) tensor of the xyz positions of 50 | the source features. 51 | target_feats (Tensor): (B, C1, n) tensor of the features to be 52 | propagated to. 53 | source_feats (Tensor): (B, C2, m) tensor of features 54 | to be propagated. 55 | 56 | Return: 57 | Tensor: (B, M, N) M = mlp[-1], tensor of the target features. 58 | """ 59 | if source is not None: 60 | dist, idx = three_nn(target, source) 61 | dist_reciprocal = 1.0 / (dist + 1e-8) 62 | norm = torch.sum(dist_reciprocal, dim=2, keepdim=True) 63 | weight = dist_reciprocal / norm 64 | 65 | interpolated_feats = three_interpolate(source_feats, idx, weight) 66 | else: 67 | interpolated_feats = source_feats.expand(*source_feats.size()[0:2], 68 | target.size(1)) 69 | 70 | if target_feats is not None: 71 | new_features = torch.cat([interpolated_feats, target_feats], 72 | dim=1) # (B, C2 + C1, n) 73 | else: 74 | new_features = interpolated_feats 75 | 76 | new_features = new_features.unsqueeze(-1) 77 | new_features = self.mlps(new_features) 78 | 79 | return new_features.squeeze(-1) 80 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .overwrite_spconv.write_spconv2 import register_spconv2 3 | 4 | try: 5 | import spconv 6 | except ImportError: 7 | IS_SPCONV2_AVAILABLE = False 8 | else: 9 | if hasattr(spconv, '__version__') and spconv.__version__ >= '2.0.0': 10 | IS_SPCONV2_AVAILABLE = register_spconv2() 11 | else: 12 | IS_SPCONV2_AVAILABLE = False 13 | 14 | __all__ = ['IS_SPCONV2_AVAILABLE'] 15 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/overwrite_spconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .write_spconv2 import register_spconv2 3 | 4 | __all__ = ['register_spconv2'] 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/overwrite_spconv/write_spconv2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import itertools 3 | 4 | from mmcv.cnn.bricks.registry import CONV_LAYERS 5 | from torch.nn.parameter import Parameter 6 | 7 | 8 | def register_spconv2(): 9 | """This func registers spconv2.0 spconv ops to overwrite the default mmcv 10 | spconv ops.""" 11 | try: 12 | from spconv.pytorch import (SparseConv2d, SparseConv3d, SparseConv4d, 13 | SparseConvTranspose2d, 14 | SparseConvTranspose3d, SparseInverseConv2d, 15 | SparseInverseConv3d, SparseModule, 16 | SubMConv2d, SubMConv3d, SubMConv4d) 17 | except ImportError: 18 | return False 19 | else: 20 | CONV_LAYERS._register_module(SparseConv2d, 'SparseConv2d', force=True) 21 | CONV_LAYERS._register_module(SparseConv3d, 'SparseConv3d', force=True) 22 | CONV_LAYERS._register_module(SparseConv4d, 'SparseConv4d', force=True) 23 | 24 | CONV_LAYERS._register_module( 25 | SparseConvTranspose2d, 'SparseConvTranspose2d', force=True) 26 | CONV_LAYERS._register_module( 27 | SparseConvTranspose3d, 'SparseConvTranspose3d', force=True) 28 | 29 | CONV_LAYERS._register_module( 30 | SparseInverseConv2d, 'SparseInverseConv2d', force=True) 31 | CONV_LAYERS._register_module( 32 | SparseInverseConv3d, 'SparseInverseConv3d', force=True) 33 | 34 | CONV_LAYERS._register_module(SubMConv2d, 'SubMConv2d', force=True) 35 | CONV_LAYERS._register_module(SubMConv3d, 'SubMConv3d', force=True) 36 | CONV_LAYERS._register_module(SubMConv4d, 'SubMConv4d', force=True) 37 | SparseModule._version = 2 38 | SparseModule._load_from_state_dict = _load_from_state_dict 39 | return True 40 | 41 | 42 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, 43 | missing_keys, unexpected_keys, error_msgs): 44 | """Rewrite this func to compat the convolutional kernel weights between 45 | spconv 1.x in MMCV and 2.x in spconv2.x. 46 | 47 | Kernel weights in MMCV spconv has shape in (D,H,W,in_channel,out_channel) , 48 | while those in spcon2.x is in (out_channel,D,H,W,in_channel). 49 | """ 50 | version = local_metadata.get('version', None) 51 | for hook in self._load_state_dict_pre_hooks.values(): 52 | hook(state_dict, prefix, local_metadata, strict, missing_keys, 53 | unexpected_keys, error_msgs) 54 | 55 | local_name_params = itertools.chain(self._parameters.items(), 56 | self._buffers.items()) 57 | local_state = {k: v.data for k, v in local_name_params if v is not None} 58 | 59 | for name, param in local_state.items(): 60 | key = prefix + name 61 | if key in state_dict: 62 | input_param = state_dict[key] 63 | 64 | # Backward compatibility: loading 1-dim tensor from 65 | # 0.3.* to version 0.4+ 66 | if len(param.shape) == 0 and len(input_param.shape) == 1: 67 | input_param = input_param[0] 68 | if version != 2: 69 | dims = [len(input_param.shape) - 1] + list( 70 | range(len(input_param.shape) - 1)) 71 | input_param = input_param.permute(*dims) 72 | if input_param.shape != param.shape: 73 | # local shape should match the one in checkpoint 74 | error_msgs.append( 75 | f'size mismatch for {key}: copying a param with ' 76 | f'shape {key, input_param.shape} from checkpoint,' 77 | f'the shape in current model is {param.shape}.') 78 | continue 79 | 80 | if isinstance(input_param, Parameter): 81 | # backwards compatibility for serialized parameters 82 | input_param = input_param.data 83 | try: 84 | param.copy_(input_param) 85 | except Exception: 86 | error_msgs.append( 87 | f'While copying the parameter named "{key}", whose ' 88 | f'dimensions in the model are {param.size()} and whose ' 89 | f'dimensions in the checkpoint are {input_param.size()}.') 90 | elif strict: 91 | missing_keys.append(key) 92 | 93 | if strict: 94 | for key, input_param in state_dict.items(): 95 | if key.startswith(prefix): 96 | input_name = key[len(prefix):] 97 | input_name = input_name.split( 98 | '.', 1)[0] # get the name of param/buffer/child 99 | if input_name not in self._modules \ 100 | and input_name not in local_state: 101 | unexpected_keys.append(key) 102 | -------------------------------------------------------------------------------- /mmdet3d/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg, print_log 3 | 4 | from .collect_env import collect_env 5 | from .compat_cfg import compat_cfg 6 | from .logger import get_root_logger 7 | from .misc import find_latest_checkpoint 8 | from .setup_env import setup_multi_processes 9 | 10 | __all__ = [ 11 | 'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env', 12 | 'print_log', 'setup_multi_processes', 'find_latest_checkpoint', 13 | 'compat_cfg' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import collect_env as collect_base_env 3 | from mmcv.utils import get_git_hash 4 | 5 | import mmdet 6 | import mmdet3d 7 | import mmseg 8 | from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE 9 | 10 | 11 | def collect_env(): 12 | """Collect the information of the running environments.""" 13 | env_info = collect_base_env() 14 | env_info['MMDetection'] = mmdet.__version__ 15 | env_info['MMSegmentation'] = mmseg.__version__ 16 | env_info['MMDetection3D'] = mmdet3d.__version__ + '+' + get_git_hash()[:7] 17 | env_info['spconv2.0'] = IS_SPCONV2_AVAILABLE 18 | return env_info 19 | 20 | 21 | if __name__ == '__main__': 22 | for name, val in collect_env().items(): 23 | print(f'{name}: {val}') 24 | -------------------------------------------------------------------------------- /mmdet3d/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import logging 3 | 4 | from mmcv.utils import get_logger 5 | 6 | 7 | def get_root_logger(log_file=None, log_level=logging.INFO, name='mmdet3d'): 8 | """Get root logger and add a keyword filter to it. 9 | 10 | The logger will be initialized if it has not been initialized. By default a 11 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 12 | also be added. The name of the root logger is the top-level package name, 13 | e.g., "mmdet3d". 14 | 15 | Args: 16 | log_file (str, optional): File path of log. Defaults to None. 17 | log_level (int, optional): The level of logger. 18 | Defaults to logging.INFO. 19 | name (str, optional): The name of the root logger, also used as a 20 | filter keyword. Defaults to 'mmdet3d'. 21 | 22 | Returns: 23 | :obj:`logging.Logger`: The obtained logger 24 | """ 25 | logger = get_logger(name=name, log_file=log_file, log_level=log_level) 26 | 27 | # add a logging filter 28 | logging_filter = logging.Filter(name) 29 | logging_filter.filter = lambda record: record.find(name) != -1 30 | 31 | return logger 32 | -------------------------------------------------------------------------------- /mmdet3d/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import glob 3 | import os.path as osp 4 | import warnings 5 | 6 | 7 | def find_latest_checkpoint(path, suffix='pth'): 8 | """Find the latest checkpoint from the working directory. This function is 9 | copied from mmdetection. 10 | 11 | Args: 12 | path(str): The path to find checkpoints. 13 | suffix(str): File extension. 14 | Defaults to pth. 15 | 16 | Returns: 17 | latest_path(str | None): File path of the latest checkpoint. 18 | References: 19 | .. [1] https://github.com/microsoft/SoftTeacher 20 | /blob/main/ssod/utils/patch.py 21 | """ 22 | if not osp.exists(path): 23 | warnings.warn('The path of checkpoints does not exist.') 24 | return None 25 | if osp.exists(osp.join(path, f'latest.{suffix}')): 26 | return osp.join(path, f'latest.{suffix}') 27 | 28 | checkpoints = glob.glob(osp.join(path, f'*.{suffix}')) 29 | if len(checkpoints) == 0: 30 | warnings.warn('There are no checkpoints in the path.') 31 | return None 32 | latest = -1 33 | latest_path = None 34 | for checkpoint in checkpoints: 35 | count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0]) 36 | if count > latest: 37 | latest = count 38 | latest_path = checkpoint 39 | return latest_path 40 | -------------------------------------------------------------------------------- /mmdet3d/utils/setup_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | import platform 4 | import warnings 5 | 6 | import cv2 7 | from torch import multiprocessing as mp 8 | 9 | 10 | def setup_multi_processes(cfg): 11 | """Setup multi-processing environment variables.""" 12 | # set multi-process start method as `fork` to speed up the training 13 | if platform.system() != 'Windows': 14 | mp_start_method = cfg.get('mp_start_method', 'fork') 15 | current_method = mp.get_start_method(allow_none=True) 16 | if current_method is not None and current_method != mp_start_method: 17 | warnings.warn( 18 | f'Multi-processing start method `{mp_start_method}` is ' 19 | f'different from the previous setting `{current_method}`.' 20 | f'It will be force set to `{mp_start_method}`. You can change ' 21 | f'this behavior by changing `mp_start_method` in your config.') 22 | mp.set_start_method(mp_start_method, force=True) 23 | 24 | # disable opencv multithreading to avoid system being overloaded 25 | opencv_num_threads = cfg.get('opencv_num_threads', 0) 26 | cv2.setNumThreads(opencv_num_threads) 27 | 28 | # setup OMP threads 29 | # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py # noqa 30 | workers_per_gpu = cfg.data.get('workers_per_gpu', 1) 31 | if 'train_dataloader' in cfg.data: 32 | workers_per_gpu = \ 33 | max(cfg.data.train_dataloader.get('workers_per_gpu', 1), 34 | workers_per_gpu) 35 | 36 | if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1: 37 | omp_num_threads = 1 38 | warnings.warn( 39 | f'Setting OMP_NUM_THREADS environment variable for each process ' 40 | f'to be {omp_num_threads} in default, to avoid your system being ' 41 | f'overloaded, please further tune the variable for optimal ' 42 | f'performance in your application as needed.') 43 | os.environ['OMP_NUM_THREADS'] = str(omp_num_threads) 44 | 45 | # setup MKL threads 46 | if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1: 47 | mkl_num_threads = 1 48 | warnings.warn( 49 | f'Setting MKL_NUM_THREADS environment variable for each process ' 50 | f'to be {mkl_num_threads} in default, to avoid your system being ' 51 | f'overloaded, please further tune the variable for optimal ' 52 | f'performance in your application as needed.') 53 | os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads) 54 | -------------------------------------------------------------------------------- /mmdet3d/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '1.0.0rc4' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pycuda 2 | lyft_dataset_sdk 3 | networkx==2.2 4 | numba==0.53.0 5 | numpy 6 | nuscenes-devkit 7 | plyfile 8 | scikit-image 9 | tensorboard 10 | trimesh==2.35.39 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [yapf] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | 6 | [isort] 7 | line_length = 79 8 | multi_line_output = 0 9 | extra_standard_library = setuptools 10 | known_first_party = mmdet,mmseg,mmdet3d 11 | known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,pytorch_sphinx_theme,recommonmark,requests,scannet_utils,scipy,seaborn,shapely,skimage,sphinx,tensorflow,terminaltables,torch,trimesh,ts,waymo_open_dataset 12 | no_lines_before = STDLIB,LOCALFOLDER 13 | default_section = THIRDPARTY 14 | 15 | [codespell] 16 | ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD 17 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29501} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --seed 0 \ 20 | --launcher pytorch ${@:3} 21 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import torch 5 | from mmcv.runner import save_checkpoint 6 | from torch import nn as nn 7 | 8 | from mmdet3d.apis import init_model 9 | 10 | 11 | def fuse_conv_bn(conv, bn): 12 | """During inference, the functionary of batch norm layers is turned off but 13 | only the mean and var alone channels are used, which exposes the chance to 14 | fuse it with the preceding conv layers to save computations and simplify 15 | network structures.""" 16 | conv_w = conv.weight 17 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 18 | bn.running_mean) 19 | 20 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 21 | conv.weight = nn.Parameter(conv_w * 22 | factor.reshape([conv.out_channels, 1, 1, 1])) 23 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 24 | return conv 25 | 26 | 27 | def fuse_module(m): 28 | last_conv = None 29 | last_conv_name = None 30 | 31 | for name, child in m.named_children(): 32 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 33 | if last_conv is None: # only fuse BN that is after Conv 34 | continue 35 | fused_conv = fuse_conv_bn(last_conv, child) 36 | m._modules[last_conv_name] = fused_conv 37 | # To reduce changes, set BN as Identity instead of deleting it. 38 | m._modules[name] = nn.Identity() 39 | last_conv = None 40 | elif isinstance(child, nn.Conv2d): 41 | last_conv = child 42 | last_conv_name = name 43 | else: 44 | fuse_module(child) 45 | return m 46 | 47 | 48 | def parse_args(): 49 | parser = argparse.ArgumentParser( 50 | description='fuse Conv and BN layers in a model') 51 | parser.add_argument('config', help='config file path') 52 | parser.add_argument('checkpoint', help='checkpoint file path') 53 | parser.add_argument('out', help='output path of the converted model') 54 | args = parser.parse_args() 55 | return args 56 | 57 | 58 | def main(): 59 | args = parse_args() 60 | # build the model from a config file and a checkpoint file 61 | model = init_model(args.config, args.checkpoint) 62 | # fuse conv and bn layers of the model 63 | fused_model = fuse_module(model) 64 | save_checkpoint(fused_model, args.out) 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config, DictAction 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description='Print the whole config') 9 | parser.add_argument('config', help='config file path') 10 | parser.add_argument( 11 | '--options', nargs='+', action=DictAction, help='arguments in dict') 12 | args = parser.parse_args() 13 | 14 | return args 15 | 16 | 17 | def main(): 18 | args = parse_args() 19 | 20 | cfg = Config.fromfile(args.config) 21 | if args.options is not None: 22 | cfg.merge_from_dict(args.options) 23 | print(f'Config:\n{cfg.pretty_text}') 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import mmcv 5 | from mmcv import Config 6 | 7 | from mmdet3d.datasets import build_dataset 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='MMDet3D visualize the results') 13 | parser.add_argument('config', help='test config file path') 14 | parser.add_argument('--result', help='results file in pickle format') 15 | parser.add_argument( 16 | '--show-dir', help='directory where visualize results will be saved') 17 | args = parser.parse_args() 18 | 19 | return args 20 | 21 | 22 | def main(): 23 | args = parse_args() 24 | 25 | if args.result is not None and \ 26 | not args.result.endswith(('.pkl', '.pickle')): 27 | raise ValueError('The results file must be a pkl file.') 28 | 29 | cfg = Config.fromfile(args.config) 30 | cfg.data.test.test_mode = True 31 | 32 | # build the dataset 33 | dataset = build_dataset(cfg.data.test) 34 | results = mmcv.load(args.result) 35 | 36 | if getattr(dataset, 'show', None) is not None: 37 | # data loading pipeline for showing 38 | eval_pipeline = cfg.get('eval_pipeline', {}) 39 | if eval_pipeline: 40 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 41 | else: 42 | dataset.show(results, args.show_dir) # use default pipeline 43 | else: 44 | raise NotImplementedError( 45 | 'Show is not implemented for dataset {}!'.format( 46 | type(dataset).__name__)) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | --------------------------------------------------------------------------------