├── README.md
├── assets
    └── pipeline.png
├── configs
    ├── _base_
    │   ├── datasets
    │   │   ├── coco_instance.py
    │   │   ├── kitti-3d-3class.py
    │   │   ├── kitti-3d-car.py
    │   │   ├── kitti-mono3d.py
    │   │   ├── lyft-3d.py
    │   │   ├── nuim_instance.py
    │   │   ├── nus-3d.py
    │   │   ├── nus-mono3d.py
    │   │   ├── range100_lyft-3d.py
    │   │   ├── s3dis-3d-5class.py
    │   │   ├── s3dis_seg-3d-13class.py
    │   │   ├── scannet-3d-18class.py
    │   │   ├── scannet_seg-3d-20class.py
    │   │   ├── sunrgbd-3d-10class.py
    │   │   ├── waymoD5-3d-3class.py
    │   │   └── waymoD5-3d-car.py
    │   ├── default_runtime.py
    │   ├── models
    │   │   ├── 3dssd.py
    │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   ├── centerpoint_01voxel_second_secfpn_nus.py
    │   │   ├── centerpoint_02pillar_second_secfpn_nus.py
    │   │   ├── dgcnn.py
    │   │   ├── fcaf3d.py
    │   │   ├── fcos3d.py
    │   │   ├── groupfree3d.py
    │   │   ├── h3dnet.py
    │   │   ├── hv_pointpillars_fpn_lyft.py
    │   │   ├── hv_pointpillars_fpn_nus.py
    │   │   ├── hv_pointpillars_fpn_range100_lyft.py
    │   │   ├── hv_pointpillars_secfpn_kitti.py
    │   │   ├── hv_pointpillars_secfpn_waymo.py
    │   │   ├── hv_second_secfpn_kitti.py
    │   │   ├── hv_second_secfpn_waymo.py
    │   │   ├── imvotenet_image.py
    │   │   ├── mask_rcnn_r50_fpn.py
    │   │   ├── paconv_cuda_ssg.py
    │   │   ├── paconv_ssg.py
    │   │   ├── parta2.py
    │   │   ├── pgd.py
    │   │   ├── point_rcnn.py
    │   │   ├── pointnet2_msg.py
    │   │   ├── pointnet2_ssg.py
    │   │   ├── smoke.py
    │   │   └── votenet.py
    │   └── schedules
    │   │   ├── cosine.py
    │   │   ├── cyclic_20e.py
    │   │   ├── cyclic_40e.py
    │   │   ├── mmdet_schedule_1x.py
    │   │   ├── schedule_2x.py
    │   │   ├── schedule_3x.py
    │   │   ├── seg_cosine_100e.py
    │   │   ├── seg_cosine_150e.py
    │   │   ├── seg_cosine_200e.py
    │   │   └── seg_cosine_50e.py
    └── fusion_occ
    │   └── fusion_occ.py
├── docs
    ├── datasets.md
    └── install.md
├── img_seg
    ├── gen_segmap.py
    ├── helper.py
    └── lidar
    │   ├── __init__.py
    │   ├── config
    │       └── label_mapping
    │       │   └── nuscenes.yaml
    │   └── lidar_anno.py
├── mmdet3d
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── inference.py
    │   ├── test.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   └── anchor_3d_generator.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assigners
    │   │   │   └── __init__.py
    │   │   ├── box_np_ops.py
    │   │   ├── coders
    │   │   │   ├── __init__.py
    │   │   │   ├── anchor_free_bbox_coder.py
    │   │   │   ├── centerpoint_bbox_coders.py
    │   │   │   ├── delta_xyzwhlr_bbox_coder.py
    │   │   │   ├── fcos3d_bbox_coder.py
    │   │   │   ├── groupfree3d_bbox_coder.py
    │   │   │   ├── monoflex_bbox_coder.py
    │   │   │   ├── partial_bin_based_bbox_coder.py
    │   │   │   ├── pgd_bbox_coder.py
    │   │   │   ├── point_xyzwhlr_bbox_coder.py
    │   │   │   └── smoke_bbox_coder.py
    │   │   ├── iou_calculators
    │   │   │   ├── __init__.py
    │   │   │   └── iou3d_calculator.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   └── iou_neg_piecewise_sampler.py
    │   │   ├── structures
    │   │   │   ├── __init__.py
    │   │   │   ├── base_box3d.py
    │   │   │   ├── box_3d_mode.py
    │   │   │   ├── cam_box3d.py
    │   │   │   ├── coord_3d_mode.py
    │   │   │   ├── depth_box3d.py
    │   │   │   ├── lidar_box3d.py
    │   │   │   └── utils.py
    │   │   └── transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── indoor_eval.py
    │   │   ├── instance_seg_eval.py
    │   │   ├── kitti_utils
    │   │   │   ├── __init__.py
    │   │   │   ├── eval.py
    │   │   │   └── rotate_iou.py
    │   │   ├── lyft_eval.py
    │   │   ├── scannet_utils
    │   │   │   ├── __init__.py
    │   │   │   ├── evaluate_semantic_instance.py
    │   │   │   └── util_3d.py
    │   │   ├── seg_eval.py
    │   │   └── waymo_utils
    │   │   │   ├── __init__.py
    │   │   │   └── prediction_kitti_to_waymo.py
    │   ├── hook
    │   │   ├── __init__.py
    │   │   ├── ema.py
    │   │   ├── sequentialcontrol.py
    │   │   ├── syncbncontrol.py
    │   │   └── utils.py
    │   ├── points
    │   │   ├── __init__.py
    │   │   ├── base_points.py
    │   │   ├── cam_points.py
    │   │   ├── depth_points.py
    │   │   └── lidar_points.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── box3d_nms.py
    │   │   └── merge_augs.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── array_converter.py
    │   │   └── gaussian.py
    │   ├── visualizer
    │   │   ├── __init__.py
    │   │   ├── image_vis.py
    │   │   ├── open3d_vis.py
    │   │   └── show_result.py
    │   └── voxel
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   └── voxel_generator.py
    ├── datasets
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── custom_3d.py
    │   ├── dataset_wrappers.py
    │   ├── nuscenes_dataset.py
    │   ├── nuscenes_dataset_occ.py
    │   ├── occ_metrics.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── aug_2d.py
    │   │   ├── compose.py
    │   │   ├── data_augment_utils.py
    │   │   ├── dbsampler.py
    │   │   ├── formating.py
    │   │   ├── loading.py
    │   │   ├── test_time_aug.py
    │   │   └── transforms_3d.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── lidar_encoder.py
    │   │   ├── resnet.py
    │   │   └── swin.py
    │   ├── builder.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── bevdet.py
    │   │   ├── centerpoint.py
    │   │   ├── fusion_occ.py
    │   │   └── mvx_two_stage.py
    │   └── necks
    │   │   ├── __init__.py
    │   │   ├── fusion_view_transformer.py
    │   │   ├── lss_fpn.py
    │   │   └── view_transformer.py
    ├── ops
    │   ├── __init__.py
    │   ├── bev_pool_v2
    │   │   ├── __init__.py
    │   │   ├── bev_pool.py
    │   │   └── src
    │   │   │   ├── bev_pool.cpp
    │   │   │   └── bev_pool_cuda.cu
    │   ├── dgcnn_modules
    │   │   ├── __init__.py
    │   │   ├── dgcnn_fa_module.py
    │   │   ├── dgcnn_fp_module.py
    │   │   └── dgcnn_gf_module.py
    │   ├── norm.py
    │   ├── paconv
    │   │   ├── __init__.py
    │   │   ├── paconv.py
    │   │   └── utils.py
    │   ├── pointnet_modules
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   ├── paconv_sa_module.py
    │   │   ├── point_fp_module.py
    │   │   └── point_sa_module.py
    │   ├── sparse_block.py
    │   └── spconv
    │   │   ├── __init__.py
    │   │   └── overwrite_spconv
    │   │       ├── __init__.py
    │   │       └── write_spconv2.py
    ├── utils
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── compat_cfg.py
    │   ├── logger.py
    │   ├── misc.py
    │   └── setup_env.py
    └── version.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── tools
    ├── create_data_fusionocc.py
    ├── data_converter
        ├── __init__.py
        ├── create_gt_database.py
        ├── nuimage_converter.py
        └── nuscenes_converter.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── misc
        ├── browse_dataset.py
        ├── fuse_conv_bn.py
        ├── print_config.py
        └── visualize_results.py
    ├── test.py
    └── train.py


/README.md:
--------------------------------------------------------------------------------
  1 | # FusionOcc
  2 | > **FusionOcc: Multi-Modal Fusion for 3D Occupancy Prediction, MM 2024** [[paper](https://dl.acm.org/doi/10.1145/3664647.3681293)]
  3 | 
  4 | ## INTRODUCTION
  5 | FusionOcc is a new multi-modal fusion network for 3D occupancy prediction by fusing features of LiDAR point clouds and surround-view images. The model fuses features of these two modals in 2D and 3D space, respectively. Semi-supervised method is utilized to generate dense depth map, which is integrated by BEV images via a cross-modal fusion module. Features of voxelized point clouds are aligned and merged with BEV images' features converted by a view-transformer in 3D space. FusionOcc establishes a new baseline for further research in multi-modal fusion for 3D occupancy prediction, while achieves the new state-of-the-art on Occ3D-nuScenes dataset.
  6 | 
  7 | ![pipeline](assets/pipeline.png)
  8 | 
  9 | ## Getting Started
 10 | 
 11 | - [Installation](docs/install.md)
 12 | ```
 13 | # main prerequisites 
 14 | Python = 3.8
 15 | nuscenes-devkit = 1.1.11
 16 | PyTorch = 1.10.0
 17 | torch-scatter = 2.0.9
 18 | opencv-python = 4.9.0
 19 | Pillow = 10.0.1
 20 | mmcv-ful = 1.5.3
 21 | mmdetection = 2.25.1
 22 | ```
 23 | 
 24 | - [Datasets](docs/datasets.md) 
 25 | 
 26 | ```
 27 | FusionOcc
 28 | ├── data
 29 | │   ├── nuscenes
 30 | │   │   ├── maps
 31 | │   │   ├── samples
 32 | │   │   ├── sweeps
 33 | │   │   ├── lidarseg
 34 | │   │   ├── imgseg
 35 | │   │   ├── gts
 36 | |   |   ├── v1.0-trainval
 37 | |   |   ├── fusionocc-nuscenes_infos_train.pkl
 38 | |   |   ├── fusionocc-nuscenes_infos_val.pkl
 39 | ```
 40 | 
 41 | 
 42 | ## Model Zoo
 43 | 
 44 | | Backbone | Config | Mask | Pretrain | mIoU | Checkpoints | 
 45 | | :-------: | :---: | :---: | :---: | :---: | :---: |
 46 | | Swin-Base | [Base](configs/fusion_occ) | ✖️ | ImageNet, nuImages | 56.62 | [BaseWoMask](https://drive.google.com/file/d/16ELoDLoDkCYheREJUPiBz2905MHhuVHv/view) |
 47 | <!-- | Swin-Base | [Base](configs/) | ✔️ | ImageNet | 35.94 | [BaseMask](checkpoints/) |
 48 | -->
 49 | <!-- | ViT-Tiny | [Light](configs/) | ✔️ |  |  |  |
 50 | | ViT-Tiny | [Light](configs/) | ✖️ |  |  |  | -->
 51 | 
 52 | ## Evaluation
 53 | 
 54 | We provide instructions for evaluating our pretrained models. Download checkpoints above first.
 55 | 
 56 | the config file is here [fusion_occ.py](configs/fusion_occ/fusion_occ.py )
 57 | 
 58 | Run:
 59 | ```bash
 60 | ./tools/dist_test.sh $config $checkpoint num_gpu
 61 | ```
 62 | 
 63 | ## Training
 64 | 
 65 | Modify the "load_from" path at the end of the config file to load pre-trained weights, run:
 66 | 
 67 | ```bash
 68 | ./tools/dist_train.sh $config num_gpu
 69 | ```
 70 | 
 71 | To obtain the version without using mask, simply modify the use_mask field in the config file to False and train several epochs.
 72 | 
 73 | You can also acquire pre-trained weights from [BEVDet](https://github.com/HuangJunJie2017/BEVDet/blob/dev3.0/docker/Dockerfile)
 74 |  to start training from the very beginning.
 75 | 
 76 | 
 77 | 
 78 | ## Acknowledgement
 79 | 
 80 | Thanks a lot to these excellent open-source projects, our code is based on them:
 81 | - [BEVDet](https://github.com/HuangJunJie2017/BEVDet), [BEVFormer](https://github.com/fundamentalvision/BEVFormer), [BEVFusion](https://github.com/mit-han-lab/bevfusion)
 82 | - [Occ3d](https://github.com/Tsinghua-MARS-Lab/Occ3D), [CVPR23-Occ-Chanllege](https://github.com/CVPR2023-3D-Occupancy-Prediction)
 83 | 
 84 | Some other related projects for Occ3d prediction:
 85 | - [SurroundOcc](https://github.com/weiyithu/SurroundOcc), [TPVFormer](https://github.com/wzzheng/TPVFormer)
 86 | - [PanoOcc](https://github.com/Robertwyq/PanoOcc), [RenderOcc](https://github.com/pmj110119/RenderOcc)
 87 | 
 88 | 
 89 | ## BibTeX
 90 | 
 91 | If this work is helpful for your research, please consider citing the following paper:
 92 | 
 93 | ```
 94 | @inproceedings{
 95 |     zhang2024fusionocc,
 96 |     title={FusionOcc: Multi-Modal Fusion for 3D Occupancy Prediction},
 97 |     author={Shuo Zhang and Yupeng Zhai and Jilin Mei and Yu Hu},
 98 |     booktitle={ACM Multimedia 2024},
 99 |     year={2024},
100 |     url={https://openreview.net/forum?id=xX66hwZJWa}
101 | }
102 | 


--------------------------------------------------------------------------------
/assets/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuoZhang-code/FusionOcc/83ded3884b98b299d35d636a91e9aa2a92d89221/assets/pipeline.png


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/kitti-mono3d.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'KittiMonoDataset'
 2 | data_root = 'data/kitti/'
 3 | class_names = ['Pedestrian', 'Cyclist', 'Car']
 4 | input_modality = dict(use_lidar=False, use_camera=True)
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFileMono3D'),
 9 |     dict(
10 |         type='LoadAnnotations3D',
11 |         with_bbox=True,
12 |         with_label=True,
13 |         with_attr_label=False,
14 |         with_bbox_3d=True,
15 |         with_label_3d=True,
16 |         with_bbox_depth=True),
17 |     dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),
18 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
19 |     dict(type='Normalize', **img_norm_cfg),
20 |     dict(type='Pad', size_divisor=32),
21 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
22 |     dict(
23 |         type='Collect3D',
24 |         keys=[
25 |             'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',
26 |             'centers2d', 'depths'
27 |         ]),
28 | ]
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFileMono3D'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(1242, 375),
34 |         flip=False,
35 |         transforms=[
36 |             dict(type='RandomFlip3D'),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='Pad', size_divisor=32),
39 |             dict(
40 |                 type='DefaultFormatBundle3D',
41 |                 class_names=class_names,
42 |                 with_label=False),
43 |             dict(type='Collect3D', keys=['img']),
44 |         ])
45 | ]
46 | # construct a pipeline for data and gt loading in show function
47 | # please keep its loading function consistent with test_pipeline (e.g. client)
48 | eval_pipeline = [
49 |     dict(type='LoadImageFromFileMono3D'),
50 |     dict(
51 |         type='DefaultFormatBundle3D',
52 |         class_names=class_names,
53 |         with_label=False),
54 |     dict(type='Collect3D', keys=['img'])
55 | ]
56 | data = dict(
57 |     samples_per_gpu=2,
58 |     workers_per_gpu=2,
59 |     train=dict(
60 |         type=dataset_type,
61 |         data_root=data_root,
62 |         ann_file=data_root + 'kitti_infos_train_mono3d.coco.json',
63 |         info_file=data_root + 'kitti_infos_train.pkl',
64 |         img_prefix=data_root,
65 |         classes=class_names,
66 |         pipeline=train_pipeline,
67 |         modality=input_modality,
68 |         test_mode=False,
69 |         box_type_3d='Camera'),
70 |     val=dict(
71 |         type=dataset_type,
72 |         data_root=data_root,
73 |         ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',
74 |         info_file=data_root + 'kitti_infos_val.pkl',
75 |         img_prefix=data_root,
76 |         classes=class_names,
77 |         pipeline=test_pipeline,
78 |         modality=input_modality,
79 |         test_mode=True,
80 |         box_type_3d='Camera'),
81 |     test=dict(
82 |         type=dataset_type,
83 |         data_root=data_root,
84 |         ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',
85 |         info_file=data_root + 'kitti_infos_val.pkl',
86 |         img_prefix=data_root,
87 |         classes=class_names,
88 |         pipeline=test_pipeline,
89 |         modality=input_modality,
90 |         test_mode=True,
91 |         box_type_3d='Camera'))
92 | evaluation = dict(interval=2)
93 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/nuim_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/nuimages/'
 3 | class_names = [
 4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
 5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
 6 | ]
 7 | img_norm_cfg = dict(
 8 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 9 | train_pipeline = [
10 |     dict(type='LoadImageFromFile'),
11 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
12 |     dict(
13 |         type='Resize',
14 |         img_scale=[(1280, 720), (1920, 1080)],
15 |         multiscale_mode='range',
16 |         keep_ratio=True),
17 |     dict(type='RandomFlip', flip_ratio=0.5),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='Pad', size_divisor=32),
20 |     dict(type='DefaultFormatBundle'),
21 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
22 | ]
23 | test_pipeline = [
24 |     dict(type='LoadImageFromFile'),
25 |     dict(
26 |         type='MultiScaleFlipAug',
27 |         img_scale=(1600, 900),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='Pad', size_divisor=32),
34 |             dict(type='ImageToTensor', keys=['img']),
35 |             dict(type='Collect', keys=['img']),
36 |         ])
37 | ]
38 | data = dict(
39 |     samples_per_gpu=2,
40 |     workers_per_gpu=2,
41 |     train=dict(
42 |         type=dataset_type,
43 |         ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
44 |         img_prefix=data_root,
45 |         classes=class_names,
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
50 |         img_prefix=data_root,
51 |         classes=class_names,
52 |         pipeline=test_pipeline),
53 |     test=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
56 |         img_prefix=data_root,
57 |         classes=class_names,
58 |         pipeline=test_pipeline))
59 | evaluation = dict(metric=['bbox', 'segm'])
60 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/nus-mono3d.py:
--------------------------------------------------------------------------------
  1 | dataset_type = 'NuScenesMonoDataset'
  2 | data_root = 'data/nuscenes/'
  3 | class_names = [
  4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
  5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
  6 | ]
  7 | # Input modality for nuScenes dataset, this is consistent with the submission
  8 | # format which requires the information in input_modality.
  9 | input_modality = dict(
 10 |     use_lidar=False,
 11 |     use_camera=True,
 12 |     use_radar=False,
 13 |     use_map=False,
 14 |     use_external=False)
 15 | img_norm_cfg = dict(
 16 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 17 | train_pipeline = [
 18 |     dict(type='LoadImageFromFileMono3D'),
 19 |     dict(
 20 |         type='LoadAnnotations3D',
 21 |         with_bbox=True,
 22 |         with_label=True,
 23 |         with_attr_label=True,
 24 |         with_bbox_3d=True,
 25 |         with_label_3d=True,
 26 |         with_bbox_depth=True),
 27 |     dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
 28 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 29 |     dict(type='Normalize', **img_norm_cfg),
 30 |     dict(type='Pad', size_divisor=32),
 31 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 32 |     dict(
 33 |         type='Collect3D',
 34 |         keys=[
 35 |             'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
 36 |             'gt_labels_3d', 'centers2d', 'depths'
 37 |         ]),
 38 | ]
 39 | test_pipeline = [
 40 |     dict(type='LoadImageFromFileMono3D'),
 41 |     dict(
 42 |         type='MultiScaleFlipAug',
 43 |         scale_factor=1.0,
 44 |         flip=False,
 45 |         transforms=[
 46 |             dict(type='RandomFlip3D'),
 47 |             dict(type='Normalize', **img_norm_cfg),
 48 |             dict(type='Pad', size_divisor=32),
 49 |             dict(
 50 |                 type='DefaultFormatBundle3D',
 51 |                 class_names=class_names,
 52 |                 with_label=False),
 53 |             dict(type='Collect3D', keys=['img']),
 54 |         ])
 55 | ]
 56 | # construct a pipeline for data and gt loading in show function
 57 | # please keep its loading function consistent with test_pipeline (e.g. client)
 58 | eval_pipeline = [
 59 |     dict(type='LoadImageFromFileMono3D'),
 60 |     dict(
 61 |         type='DefaultFormatBundle3D',
 62 |         class_names=class_names,
 63 |         with_label=False),
 64 |     dict(type='Collect3D', keys=['img'])
 65 | ]
 66 | 
 67 | data = dict(
 68 |     samples_per_gpu=2,
 69 |     workers_per_gpu=2,
 70 |     train=dict(
 71 |         type=dataset_type,
 72 |         data_root=data_root,
 73 |         ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
 74 |         img_prefix=data_root,
 75 |         classes=class_names,
 76 |         pipeline=train_pipeline,
 77 |         modality=input_modality,
 78 |         test_mode=False,
 79 |         box_type_3d='Camera'),
 80 |     val=dict(
 81 |         type=dataset_type,
 82 |         data_root=data_root,
 83 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 84 |         img_prefix=data_root,
 85 |         classes=class_names,
 86 |         pipeline=test_pipeline,
 87 |         modality=input_modality,
 88 |         test_mode=True,
 89 |         box_type_3d='Camera'),
 90 |     test=dict(
 91 |         type=dataset_type,
 92 |         data_root=data_root,
 93 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 94 |         img_prefix=data_root,
 95 |         classes=class_names,
 96 |         pipeline=test_pipeline,
 97 |         modality=input_modality,
 98 |         test_mode=True,
 99 |         box_type_3d='Camera'))
100 | evaluation = dict(interval=2)
101 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/s3dis-3d-5class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'S3DISDataset'
  3 | data_root = './data/s3dis/'
  4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
  5 | train_area = [1, 2, 3, 4, 6]
  6 | test_area = 5
  7 | 
  8 | train_pipeline = [
  9 |     dict(
 10 |         type='LoadPointsFromFile',
 11 |         coord_type='DEPTH',
 12 |         shift_height=True,
 13 |         load_dim=6,
 14 |         use_dim=[0, 1, 2, 3, 4, 5]),
 15 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 16 |     dict(type='PointSample', num_points=40000),
 17 |     dict(
 18 |         type='RandomFlip3D',
 19 |         sync_2d=False,
 20 |         flip_ratio_bev_horizontal=0.5,
 21 |         flip_ratio_bev_vertical=0.5),
 22 |     dict(
 23 |         type='GlobalRotScaleTrans',
 24 |         # following ScanNet dataset the rotation range is 5 degrees
 25 |         rot_range=[-0.087266, 0.087266],
 26 |         scale_ratio_range=[1.0, 1.0],
 27 |         shift_height=True),
 28 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 29 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 30 | ]
 31 | test_pipeline = [
 32 |     dict(
 33 |         type='LoadPointsFromFile',
 34 |         coord_type='DEPTH',
 35 |         shift_height=True,
 36 |         load_dim=6,
 37 |         use_dim=[0, 1, 2, 3, 4, 5]),
 38 |     dict(
 39 |         type='MultiScaleFlipAug3D',
 40 |         img_scale=(1333, 800),
 41 |         pts_scale_ratio=1,
 42 |         flip=False,
 43 |         transforms=[
 44 |             dict(
 45 |                 type='GlobalRotScaleTrans',
 46 |                 rot_range=[0, 0],
 47 |                 scale_ratio_range=[1., 1.],
 48 |                 translation_std=[0, 0, 0]),
 49 |             dict(
 50 |                 type='RandomFlip3D',
 51 |                 sync_2d=False,
 52 |                 flip_ratio_bev_horizontal=0.5,
 53 |                 flip_ratio_bev_vertical=0.5),
 54 |             dict(type='PointSample', num_points=40000),
 55 |             dict(
 56 |                 type='DefaultFormatBundle3D',
 57 |                 class_names=class_names,
 58 |                 with_label=False),
 59 |             dict(type='Collect3D', keys=['points'])
 60 |         ])
 61 | ]
 62 | # construct a pipeline for data and gt loading in show function
 63 | # please keep its loading function consistent with test_pipeline (e.g. client)
 64 | eval_pipeline = [
 65 |     dict(
 66 |         type='LoadPointsFromFile',
 67 |         coord_type='DEPTH',
 68 |         shift_height=False,
 69 |         load_dim=6,
 70 |         use_dim=[0, 1, 2, 3, 4, 5]),
 71 |     dict(
 72 |         type='DefaultFormatBundle3D',
 73 |         class_names=class_names,
 74 |         with_label=False),
 75 |     dict(type='Collect3D', keys=['points'])
 76 | ]
 77 | 
 78 | data = dict(
 79 |     samples_per_gpu=8,
 80 |     workers_per_gpu=4,
 81 |     train=dict(
 82 |         type='RepeatDataset',
 83 |         times=5,
 84 |         dataset=dict(
 85 |             type='ConcatDataset',
 86 |             datasets=[
 87 |                 dict(
 88 |                     type=dataset_type,
 89 |                     data_root=data_root,
 90 |                     ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
 91 |                     pipeline=train_pipeline,
 92 |                     filter_empty_gt=False,
 93 |                     classes=class_names,
 94 |                     box_type_3d='Depth') for i in train_area
 95 |             ],
 96 |             separate_eval=False)),
 97 |     val=dict(
 98 |         type=dataset_type,
 99 |         data_root=data_root,
100 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
101 |         pipeline=test_pipeline,
102 |         classes=class_names,
103 |         test_mode=True,
104 |         box_type_3d='Depth'),
105 |     test=dict(
106 |         type=dataset_type,
107 |         data_root=data_root,
108 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
109 |         pipeline=test_pipeline,
110 |         classes=class_names,
111 |         test_mode=True,
112 |         box_type_3d='Depth'))
113 | 
114 | evaluation = dict(pipeline=eval_pipeline)
115 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/sunrgbd-3d-10class.py:
--------------------------------------------------------------------------------
  1 | dataset_type = 'SUNRGBDDataset'
  2 | data_root = 'data/sunrgbd/'
  3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
  4 |                'night_stand', 'bookshelf', 'bathtub')
  5 | 
  6 | file_client_args = dict(backend='disk')
  7 | # Uncomment the following if use ceph or other file clients.
  8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
  9 | # for more details.
 10 | # file_client_args = dict(
 11 | #     backend='petrel',
 12 | #     path_mapping=dict({
 13 | #         './data/sunrgbd/':
 14 | #         's3://openmmlab/datasets/detection3d/sunrgbd_processed/',
 15 | #         'data/sunrgbd/':
 16 | #         's3://openmmlab/datasets/detection3d/sunrgbd_processed/'
 17 | #     }))
 18 | 
 19 | train_pipeline = [
 20 |     dict(
 21 |         type='LoadPointsFromFile',
 22 |         coord_type='DEPTH',
 23 |         shift_height=True,
 24 |         load_dim=6,
 25 |         use_dim=[0, 1, 2],
 26 |         file_client_args=file_client_args),
 27 |     dict(type='LoadAnnotations3D', file_client_args=file_client_args),
 28 |     dict(
 29 |         type='RandomFlip3D',
 30 |         sync_2d=False,
 31 |         flip_ratio_bev_horizontal=0.5,
 32 |     ),
 33 |     dict(
 34 |         type='GlobalRotScaleTrans',
 35 |         rot_range=[-0.523599, 0.523599],
 36 |         scale_ratio_range=[0.85, 1.15],
 37 |         shift_height=True),
 38 |     dict(type='PointSample', num_points=20000),
 39 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 40 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 41 | ]
 42 | test_pipeline = [
 43 |     dict(
 44 |         type='LoadPointsFromFile',
 45 |         coord_type='DEPTH',
 46 |         shift_height=True,
 47 |         load_dim=6,
 48 |         use_dim=[0, 1, 2],
 49 |         file_client_args=file_client_args),
 50 |     dict(
 51 |         type='MultiScaleFlipAug3D',
 52 |         img_scale=(1333, 800),
 53 |         pts_scale_ratio=1,
 54 |         flip=False,
 55 |         transforms=[
 56 |             dict(
 57 |                 type='GlobalRotScaleTrans',
 58 |                 rot_range=[0, 0],
 59 |                 scale_ratio_range=[1., 1.],
 60 |                 translation_std=[0, 0, 0]),
 61 |             dict(
 62 |                 type='RandomFlip3D',
 63 |                 sync_2d=False,
 64 |                 flip_ratio_bev_horizontal=0.5,
 65 |             ),
 66 |             dict(type='PointSample', num_points=20000),
 67 |             dict(
 68 |                 type='DefaultFormatBundle3D',
 69 |                 class_names=class_names,
 70 |                 with_label=False),
 71 |             dict(type='Collect3D', keys=['points'])
 72 |         ])
 73 | ]
 74 | # construct a pipeline for data and gt loading in show function
 75 | # please keep its loading function consistent with test_pipeline (e.g. client)
 76 | eval_pipeline = [
 77 |     dict(
 78 |         type='LoadPointsFromFile',
 79 |         coord_type='DEPTH',
 80 |         shift_height=False,
 81 |         load_dim=6,
 82 |         use_dim=[0, 1, 2],
 83 |         file_client_args=file_client_args),
 84 |     dict(
 85 |         type='DefaultFormatBundle3D',
 86 |         class_names=class_names,
 87 |         with_label=False),
 88 |     dict(type='Collect3D', keys=['points'])
 89 | ]
 90 | 
 91 | data = dict(
 92 |     samples_per_gpu=16,
 93 |     workers_per_gpu=4,
 94 |     train=dict(
 95 |         type='RepeatDataset',
 96 |         times=5,
 97 |         dataset=dict(
 98 |             type=dataset_type,
 99 |             data_root=data_root,
100 |             ann_file=data_root + 'sunrgbd_infos_train.pkl',
101 |             pipeline=train_pipeline,
102 |             classes=class_names,
103 |             filter_empty_gt=False,
104 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
105 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
106 |             box_type_3d='Depth',
107 |             file_client_args=file_client_args)),
108 |     val=dict(
109 |         type=dataset_type,
110 |         data_root=data_root,
111 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
112 |         pipeline=test_pipeline,
113 |         classes=class_names,
114 |         test_mode=True,
115 |         box_type_3d='Depth',
116 |         file_client_args=file_client_args),
117 |     test=dict(
118 |         type=dataset_type,
119 |         data_root=data_root,
120 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
121 |         pipeline=test_pipeline,
122 |         classes=class_names,
123 |         test_mode=True,
124 |         box_type_3d='Depth',
125 |         file_client_args=file_client_args))
126 | 
127 | evaluation = dict(pipeline=eval_pipeline)
128 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable push
 3 | # By default we use textlogger hook and tensorboard
 4 | # For more loggers see
 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
 6 | log_config = dict(
 7 |     interval=50,
 8 |     hooks=[
 9 |         dict(type='TextLoggerHook'),
10 |         dict(type='TensorboardLoggerHook')
11 |     ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 | 
20 | # disable opencv multithreading to avoid system being overloaded
21 | opencv_num_threads = 0
22 | # set multi-process start method as `fork` to speed up the training
23 | mp_start_method = 'fork'
24 | 


--------------------------------------------------------------------------------
/configs/_base_/models/3dssd.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='SSD3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SAMSG',
 5 |         in_channels=4,
 6 |         num_points=(4096, 512, (256, 256)),
 7 |         radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
 8 |         num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
 9 |         sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
10 |                      ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
11 |                      ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
12 |         aggregation_channels=(64, 128, 256),
13 |         fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
14 |         fps_sample_range_lists=((-1), (-1), (512, -1)),
15 |         norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
16 |         sa_cfg=dict(
17 |             type='PointSAModuleMSG',
18 |             pool_mod='max',
19 |             use_xyz=True,
20 |             normalize_xyz=False)),
21 |     bbox_head=dict(
22 |         type='SSD3DHead',
23 |         in_channels=256,
24 |         vote_module_cfg=dict(
25 |             in_channels=256,
26 |             num_points=256,
27 |             gt_per_seed=1,
28 |             conv_channels=(128, ),
29 |             conv_cfg=dict(type='Conv1d'),
30 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31 |             with_res_feat=False,
32 |             vote_xyz_range=(3.0, 3.0, 2.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModuleMSG',
35 |             num_point=256,
36 |             radii=(4.8, 6.4),
37 |             sample_nums=(16, 32),
38 |             mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39 |             norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40 |             use_xyz=True,
41 |             normalize_xyz=False,
42 |             bias=True),
43 |         pred_layer_cfg=dict(
44 |             in_channels=1536,
45 |             shared_conv_channels=(512, 128),
46 |             cls_conv_channels=(128, ),
47 |             reg_conv_channels=(128, ),
48 |             conv_cfg=dict(type='Conv1d'),
49 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50 |             bias=True),
51 |         conv_cfg=dict(type='Conv1d'),
52 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
53 |         objectness_loss=dict(
54 |             type='CrossEntropyLoss',
55 |             use_sigmoid=True,
56 |             reduction='sum',
57 |             loss_weight=1.0),
58 |         center_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
60 |         dir_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         dir_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
64 |         size_res_loss=dict(
65 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
66 |         corner_loss=dict(
67 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
68 |         vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),
69 |     # model training and testing settings
70 |     train_cfg=dict(
71 |         sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
72 |     test_cfg=dict(
73 |         nms_cfg=dict(type='nms', iou_thr=0.1),
74 |         sample_mod='spec',
75 |         score_thr=0.0,
76 |         per_class_proposal=True,
77 |         max_output_num=100))
78 | 


--------------------------------------------------------------------------------
/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.1, 0.1, 0.2]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
 6 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 7 |     pts_middle_encoder=dict(
 8 |         type='SparseEncoder',
 9 |         in_channels=5,
10 |         sparse_shape=[41, 1024, 1024],
11 |         output_channels=128,
12 |         order=('conv', 'norm', 'act'),
13 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
14 |                                                                       128)),
15 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
16 |         block_type='basicblock'),
17 |     pts_backbone=dict(
18 |         type='SECOND',
19 |         in_channels=256,
20 |         out_channels=[128, 256],
21 |         layer_nums=[5, 5],
22 |         layer_strides=[1, 2],
23 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
24 |         conv_cfg=dict(type='Conv2d', bias=False)),
25 |     pts_neck=dict(
26 |         type='SECONDFPN',
27 |         in_channels=[128, 256],
28 |         out_channels=[256, 256],
29 |         upsample_strides=[1, 2],
30 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
31 |         upsample_cfg=dict(type='deconv', bias=False),
32 |         use_conv_for_no_stride=True),
33 |     pts_bbox_head=dict(
34 |         type='CenterHead',
35 |         in_channels=sum([256, 256]),
36 |         tasks=[
37 |             dict(num_class=1, class_names=['car']),
38 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
39 |             dict(num_class=2, class_names=['bus', 'trailer']),
40 |             dict(num_class=1, class_names=['barrier']),
41 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
42 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
43 |         ],
44 |         common_heads=dict(
45 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
46 |         share_conv_channel=64,
47 |         bbox_coder=dict(
48 |             type='CenterPointBBoxCoder',
49 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
50 |             max_num=500,
51 |             score_threshold=0.1,
52 |             out_size_factor=8,
53 |             voxel_size=voxel_size[:2],
54 |             code_size=9),
55 |         separate_head=dict(
56 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
57 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
58 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
59 |         norm_bbox=True),
60 |     # model training and testing settings
61 |     train_cfg=dict(
62 |         pts=dict(
63 |             grid_size=[1024, 1024, 40],
64 |             voxel_size=voxel_size,
65 |             out_size_factor=8,
66 |             dense_reg=1,
67 |             gaussian_overlap=0.1,
68 |             max_objs=500,
69 |             min_radius=2,
70 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
71 |     test_cfg=dict(
72 |         pts=dict(
73 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
74 |             max_per_img=500,
75 |             max_pool_nms=False,
76 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
77 |             score_threshold=0.1,
78 |             out_size_factor=8,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.2, 0.2, 8]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
 6 |     pts_voxel_encoder=dict(
 7 |         type='PillarFeatureNet',
 8 |         in_channels=5,
 9 |         feat_channels=[64],
10 |         with_distance=False,
11 |         voxel_size=(0.2, 0.2, 8),
12 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
13 |         legacy=False),
14 |     pts_middle_encoder=dict(
15 |         type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
16 |     pts_backbone=dict(
17 |         type='SECOND',
18 |         in_channels=64,
19 |         out_channels=[64, 128, 256],
20 |         layer_nums=[3, 5, 5],
21 |         layer_strides=[2, 2, 2],
22 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
23 |         conv_cfg=dict(type='Conv2d', bias=False)),
24 |     pts_neck=dict(
25 |         type='SECONDFPN',
26 |         in_channels=[64, 128, 256],
27 |         out_channels=[128, 128, 128],
28 |         upsample_strides=[0.5, 1, 2],
29 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
30 |         upsample_cfg=dict(type='deconv', bias=False),
31 |         use_conv_for_no_stride=True),
32 |     pts_bbox_head=dict(
33 |         type='CenterHead',
34 |         in_channels=sum([128, 128, 128]),
35 |         tasks=[
36 |             dict(num_class=1, class_names=['car']),
37 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
38 |             dict(num_class=2, class_names=['bus', 'trailer']),
39 |             dict(num_class=1, class_names=['barrier']),
40 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
41 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
42 |         ],
43 |         common_heads=dict(
44 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
45 |         share_conv_channel=64,
46 |         bbox_coder=dict(
47 |             type='CenterPointBBoxCoder',
48 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
49 |             max_num=500,
50 |             score_threshold=0.1,
51 |             out_size_factor=4,
52 |             voxel_size=voxel_size[:2],
53 |             code_size=9),
54 |         separate_head=dict(
55 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
56 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
57 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
58 |         norm_bbox=True),
59 |     # model training and testing settings
60 |     train_cfg=dict(
61 |         pts=dict(
62 |             grid_size=[512, 512, 1],
63 |             voxel_size=voxel_size,
64 |             out_size_factor=4,
65 |             dense_reg=1,
66 |             gaussian_overlap=0.1,
67 |             max_objs=500,
68 |             min_radius=2,
69 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
70 |     test_cfg=dict(
71 |         pts=dict(
72 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
73 |             max_per_img=500,
74 |             max_pool_nms=False,
75 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
76 |             score_threshold=0.1,
77 |             pc_range=[-51.2, -51.2],
78 |             out_size_factor=4,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/configs/_base_/models/dgcnn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='DGCNNBackbone',
 6 |         in_channels=9,  # [xyz, rgb, normal_xyz], modified with dataset
 7 |         num_samples=(20, 20, 20),
 8 |         knn_modes=('D-KNN', 'F-KNN', 'F-KNN'),
 9 |         radius=(None, None, None),
10 |         gf_channels=((64, 64), (64, 64), (64, )),
11 |         fa_channels=(1024, ),
12 |         act_cfg=dict(type='LeakyReLU', negative_slope=0.2)),
13 |     decode_head=dict(
14 |         type='DGCNNHead',
15 |         fp_channels=(1216, 512),
16 |         channels=256,
17 |         dropout_ratio=0.5,
18 |         conv_cfg=dict(type='Conv1d'),
19 |         norm_cfg=dict(type='BN1d'),
20 |         act_cfg=dict(type='LeakyReLU', negative_slope=0.2),
21 |         loss_decode=dict(
22 |             type='CrossEntropyLoss',
23 |             use_sigmoid=False,
24 |             class_weight=None,  # modified with dataset
25 |             loss_weight=1.0)),
26 |     # model training and testing settings
27 |     train_cfg=dict(),
28 |     test_cfg=dict(mode='slide'))
29 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fcaf3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='MinkSingleStage3DDetector',
 3 |     voxel_size=.01,
 4 |     backbone=dict(type='MinkResNet', in_channels=3, depth=34),
 5 |     head=dict(
 6 |         type='FCAF3DHead',
 7 |         in_channels=(64, 128, 256, 512),
 8 |         out_channels=128,
 9 |         voxel_size=.01,
10 |         pts_prune_threshold=100000,
11 |         pts_assign_threshold=27,
12 |         pts_center_threshold=18,
13 |         n_classes=18,
14 |         n_reg_outs=6),
15 |     train_cfg=dict(),
16 |     test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))
17 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fcos3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='FCOSMono3D',
 3 |     backbone=dict(
 4 |         type='ResNet',
 5 |         depth=101,
 6 |         num_stages=4,
 7 |         out_indices=(0, 1, 2, 3),
 8 |         frozen_stages=1,
 9 |         norm_cfg=dict(type='BN', requires_grad=False),
10 |         norm_eval=True,
11 |         style='caffe',
12 |         init_cfg=dict(
13 |             type='Pretrained',
14 |             checkpoint='open-mmlab://detectron2/resnet101_caffe')),
15 |     neck=dict(
16 |         type='FPN',
17 |         in_channels=[256, 512, 1024, 2048],
18 |         out_channels=256,
19 |         start_level=1,
20 |         add_extra_convs='on_output',
21 |         num_outs=5,
22 |         relu_before_extra_convs=True),
23 |     bbox_head=dict(
24 |         type='FCOSMono3DHead',
25 |         num_classes=10,
26 |         in_channels=256,
27 |         stacked_convs=2,
28 |         feat_channels=256,
29 |         use_direction_classifier=True,
30 |         diff_rad_by_sin=True,
31 |         pred_attrs=True,
32 |         pred_velo=True,
33 |         dir_offset=0.7854,  # pi/4
34 |         dir_limit_offset=0,
35 |         strides=[8, 16, 32, 64, 128],
36 |         group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
37 |         cls_branch=(256, ),
38 |         reg_branch=(
39 |             (256, ),  # offset
40 |             (256, ),  # depth
41 |             (256, ),  # size
42 |             (256, ),  # rot
43 |             ()  # velo
44 |         ),
45 |         dir_branch=(256, ),
46 |         attr_branch=(256, ),
47 |         loss_cls=dict(
48 |             type='FocalLoss',
49 |             use_sigmoid=True,
50 |             gamma=2.0,
51 |             alpha=0.25,
52 |             loss_weight=1.0),
53 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
54 |         loss_dir=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
56 |         loss_attr=dict(
57 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
58 |         loss_centerness=dict(
59 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
60 |         bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),
61 |         norm_on_bbox=True,
62 |         centerness_on_reg=True,
63 |         center_sampling=True,
64 |         conv_bias=True,
65 |         dcn_on_last_conv=True),
66 |     train_cfg=dict(
67 |         allowed_border=0,
68 |         code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
69 |         pos_weight=-1,
70 |         debug=False),
71 |     test_cfg=dict(
72 |         use_rotate_nms=True,
73 |         nms_across_levels=False,
74 |         nms_pre=1000,
75 |         nms_thr=0.8,
76 |         score_thr=0.05,
77 |         min_bbox_size=0,
78 |         max_per_img=200))
79 | 


--------------------------------------------------------------------------------
/configs/_base_/models/groupfree3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='GroupFree3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=3,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 288)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='GroupFree3DHead',
20 |         in_channels=288,
21 |         num_decoder_layers=6,
22 |         num_proposal=256,
23 |         transformerlayers=dict(
24 |             type='BaseTransformerLayer',
25 |             attn_cfgs=dict(
26 |                 type='GroupFree3DMHA',
27 |                 embed_dims=288,
28 |                 num_heads=8,
29 |                 attn_drop=0.1,
30 |                 dropout_layer=dict(type='Dropout', drop_prob=0.1)),
31 |             ffn_cfgs=dict(
32 |                 embed_dims=288,
33 |                 feedforward_channels=2048,
34 |                 ffn_drop=0.1,
35 |                 act_cfg=dict(type='ReLU', inplace=True)),
36 |             operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
37 |                              'norm')),
38 |         pred_layer_cfg=dict(
39 |             in_channels=288, shared_conv_channels=(288, 288), bias=True),
40 |         sampling_objectness_loss=dict(
41 |             type='FocalLoss',
42 |             use_sigmoid=True,
43 |             gamma=2.0,
44 |             alpha=0.25,
45 |             loss_weight=8.0),
46 |         objectness_loss=dict(
47 |             type='FocalLoss',
48 |             use_sigmoid=True,
49 |             gamma=2.0,
50 |             alpha=0.25,
51 |             loss_weight=1.0),
52 |         center_loss=dict(
53 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
54 |         dir_class_loss=dict(
55 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
56 |         dir_res_loss=dict(
57 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
58 |         size_class_loss=dict(
59 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60 |         size_res_loss=dict(
61 |             type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
62 |         semantic_loss=dict(
63 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
64 |     # model training and testing settings
65 |     train_cfg=dict(sample_mod='kps'),
66 |     test_cfg=dict(
67 |         sample_mod='kps',
68 |         nms_thr=0.25,
69 |         score_thr=0.0,
70 |         per_class_proposal=True,
71 |         prediction_stages='last'))
72 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 |     pts_middle_encoder=dict(output_shape=[640, 640]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_fpn_nus.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | # Voxel size for voxel encoder
 3 | # Usually voxel size is changed consistently with the point cloud range
 4 | # If point cloud range is modified, do remember to change all related
 5 | # keys in the config.
 6 | voxel_size = [0.25, 0.25, 8]
 7 | model = dict(
 8 |     type='MVXFasterRCNN',
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=64,
11 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
12 |         voxel_size=voxel_size,
13 |         max_voxels=(30000, 40000)),
14 |     pts_voxel_encoder=dict(
15 |         type='HardVFE',
16 |         in_channels=4,
17 |         feat_channels=[64, 64],
18 |         with_distance=False,
19 |         voxel_size=voxel_size,
20 |         with_cluster_center=True,
21 |         with_voxel_center=True,
22 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 |     pts_middle_encoder=dict(
25 |         type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
26 |     pts_backbone=dict(
27 |         type='SECOND',
28 |         in_channels=64,
29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 |         layer_nums=[3, 5, 5],
31 |         layer_strides=[2, 2, 2],
32 |         out_channels=[64, 128, 256]),
33 |     pts_neck=dict(
34 |         type='FPN',
35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 |         act_cfg=dict(type='ReLU'),
37 |         in_channels=[64, 128, 256],
38 |         out_channels=256,
39 |         start_level=0,
40 |         num_outs=3),
41 |     pts_bbox_head=dict(
42 |         type='Anchor3DHead',
43 |         num_classes=10,
44 |         in_channels=256,
45 |         feat_channels=256,
46 |         use_direction_classifier=True,
47 |         anchor_generator=dict(
48 |             type='AlignedAnchor3DRangeGenerator',
49 |             ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
50 |             scales=[1, 2, 4],
51 |             sizes=[
52 |                 [2.5981, 0.8660, 1.],  # 1.5 / sqrt(3)
53 |                 [1.7321, 0.5774, 1.],  # 1 / sqrt(3)
54 |                 [1., 1., 1.],
55 |                 [0.4, 0.4, 1],
56 |             ],
57 |             custom_values=[0, 0],
58 |             rotations=[0, 1.57],
59 |             reshape_out=True),
60 |         assigner_per_size=False,
61 |         diff_rad_by_sin=True,
62 |         dir_offset=-0.7854,  # -pi / 4
63 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
64 |         loss_cls=dict(
65 |             type='FocalLoss',
66 |             use_sigmoid=True,
67 |             gamma=2.0,
68 |             alpha=0.25,
69 |             loss_weight=1.0),
70 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
71 |         loss_dir=dict(
72 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
73 |     # model training and testing settings
74 |     train_cfg=dict(
75 |         pts=dict(
76 |             assigner=dict(
77 |                 type='MaxIoUAssigner',
78 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
79 |                 pos_iou_thr=0.6,
80 |                 neg_iou_thr=0.3,
81 |                 min_pos_iou=0.3,
82 |                 ignore_iof_thr=-1),
83 |             allowed_border=0,
84 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
85 |             pos_weight=-1,
86 |             debug=False)),
87 |     test_cfg=dict(
88 |         pts=dict(
89 |             use_rotate_nms=True,
90 |             nms_across_levels=False,
91 |             nms_pre=1000,
92 |             nms_thr=0.2,
93 |             score_thr=0.05,
94 |             min_bbox_size=0,
95 |             max_num=500)))
96 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 |     pts_middle_encoder=dict(output_shape=[800, 800]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.16, 0.16, 4]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=32,  # max_points_per_voxel
 7 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)  # (training, testing) max_voxels
10 |     ),
11 |     voxel_encoder=dict(
12 |         type='PillarFeatureNet',
13 |         in_channels=4,
14 |         feat_channels=[64],
15 |         with_distance=False,
16 |         voxel_size=voxel_size,
17 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
18 |     middle_encoder=dict(
19 |         type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
20 |     backbone=dict(
21 |         type='SECOND',
22 |         in_channels=64,
23 |         layer_nums=[3, 5, 5],
24 |         layer_strides=[2, 2, 2],
25 |         out_channels=[64, 128, 256]),
26 |     neck=dict(
27 |         type='SECONDFPN',
28 |         in_channels=[64, 128, 256],
29 |         upsample_strides=[1, 2, 4],
30 |         out_channels=[128, 128, 128]),
31 |     bbox_head=dict(
32 |         type='Anchor3DHead',
33 |         num_classes=3,
34 |         in_channels=384,
35 |         feat_channels=384,
36 |         use_direction_classifier=True,
37 |         assign_per_class=True,
38 |         anchor_generator=dict(
39 |             type='AlignedAnchor3DRangeGenerator',
40 |             ranges=[
41 |                 [0, -39.68, -0.6, 69.12, 39.68, -0.6],
42 |                 [0, -39.68, -0.6, 69.12, 39.68, -0.6],
43 |                 [0, -39.68, -1.78, 69.12, 39.68, -1.78],
44 |             ],
45 |             sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
46 |             rotations=[0, 1.57],
47 |             reshape_out=False),
48 |         diff_rad_by_sin=True,
49 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
50 |         loss_cls=dict(
51 |             type='FocalLoss',
52 |             use_sigmoid=True,
53 |             gamma=2.0,
54 |             alpha=0.25,
55 |             loss_weight=1.0),
56 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
57 |         loss_dir=dict(
58 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
59 |     # model training and testing settings
60 |     train_cfg=dict(
61 |         assigner=[
62 |             dict(  # for Pedestrian
63 |                 type='MaxIoUAssigner',
64 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
65 |                 pos_iou_thr=0.5,
66 |                 neg_iou_thr=0.35,
67 |                 min_pos_iou=0.35,
68 |                 ignore_iof_thr=-1),
69 |             dict(  # for Cyclist
70 |                 type='MaxIoUAssigner',
71 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
72 |                 pos_iou_thr=0.5,
73 |                 neg_iou_thr=0.35,
74 |                 min_pos_iou=0.35,
75 |                 ignore_iof_thr=-1),
76 |             dict(  # for Car
77 |                 type='MaxIoUAssigner',
78 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
79 |                 pos_iou_thr=0.6,
80 |                 neg_iou_thr=0.45,
81 |                 min_pos_iou=0.45,
82 |                 ignore_iof_thr=-1),
83 |         ],
84 |         allowed_border=0,
85 |         pos_weight=-1,
86 |         debug=False),
87 |     test_cfg=dict(
88 |         use_rotate_nms=True,
89 |         nms_across_levels=False,
90 |         nms_thr=0.01,
91 |         score_thr=0.1,
92 |         min_bbox_size=0,
93 |         nms_pre=100,
94 |         max_num=50))
95 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.32, 0.32, 6]
  7 | model = dict(
  8 |     type='MVXFasterRCNN',
  9 |     pts_voxel_layer=dict(
 10 |         max_num_points=20,
 11 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(32000, 32000)),
 14 |     pts_voxel_encoder=dict(
 15 |         type='HardVFE',
 16 |         in_channels=5,
 17 |         feat_channels=[64],
 18 |         with_distance=False,
 19 |         voxel_size=voxel_size,
 20 |         with_cluster_center=True,
 21 |         with_voxel_center=True,
 22 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
 24 |     pts_middle_encoder=dict(
 25 |         type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
 26 |     pts_backbone=dict(
 27 |         type='SECOND',
 28 |         in_channels=64,
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         layer_nums=[3, 5, 5],
 31 |         layer_strides=[1, 2, 2],
 32 |         out_channels=[64, 128, 256]),
 33 |     pts_neck=dict(
 34 |         type='SECONDFPN',
 35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 36 |         in_channels=[64, 128, 256],
 37 |         upsample_strides=[1, 2, 4],
 38 |         out_channels=[128, 128, 128]),
 39 |     pts_bbox_head=dict(
 40 |         type='Anchor3DHead',
 41 |         num_classes=3,
 42 |         in_channels=384,
 43 |         feat_channels=384,
 44 |         use_direction_classifier=True,
 45 |         anchor_generator=dict(
 46 |             type='AlignedAnchor3DRangeGenerator',
 47 |             ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
 48 |                     [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
 49 |                     [-74.88, -74.88, 0, 74.88, 74.88, 0]],
 50 |             sizes=[
 51 |                 [4.73, 2.08, 1.77],  # car
 52 |                 [1.81, 0.84, 1.77],  # cyclist
 53 |                 [0.91, 0.84, 1.74]  # pedestrian
 54 |             ],
 55 |             rotations=[0, 1.57],
 56 |             reshape_out=False),
 57 |         diff_rad_by_sin=True,
 58 |         dir_offset=-0.7854,  # -pi / 4
 59 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 60 |         loss_cls=dict(
 61 |             type='FocalLoss',
 62 |             use_sigmoid=True,
 63 |             gamma=2.0,
 64 |             alpha=0.25,
 65 |             loss_weight=1.0),
 66 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 67 |         loss_dir=dict(
 68 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         pts=dict(
 72 |             assigner=[
 73 |                 dict(  # car
 74 |                     type='MaxIoUAssigner',
 75 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 76 |                     pos_iou_thr=0.55,
 77 |                     neg_iou_thr=0.4,
 78 |                     min_pos_iou=0.4,
 79 |                     ignore_iof_thr=-1),
 80 |                 dict(  # cyclist
 81 |                     type='MaxIoUAssigner',
 82 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 83 |                     pos_iou_thr=0.5,
 84 |                     neg_iou_thr=0.3,
 85 |                     min_pos_iou=0.3,
 86 |                     ignore_iof_thr=-1),
 87 |                 dict(  # pedestrian
 88 |                     type='MaxIoUAssigner',
 89 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 90 |                     pos_iou_thr=0.5,
 91 |                     neg_iou_thr=0.3,
 92 |                     min_pos_iou=0.3,
 93 |                     ignore_iof_thr=-1),
 94 |             ],
 95 |             allowed_border=0,
 96 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 97 |             pos_weight=-1,
 98 |             debug=False)),
 99 |     test_cfg=dict(
100 |         pts=dict(
101 |             use_rotate_nms=True,
102 |             nms_across_levels=False,
103 |             nms_pre=4096,
104 |             nms_thr=0.25,
105 |             score_thr=0.1,
106 |             min_bbox_size=0,
107 |             max_num=500)))
108 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_second_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.05, 0.05, 0.1]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=5,
 7 |         point_cloud_range=[0, -40, -3, 70.4, 40, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)),
10 |     voxel_encoder=dict(type='HardSimpleVFE'),
11 |     middle_encoder=dict(
12 |         type='SparseEncoder',
13 |         in_channels=4,
14 |         sparse_shape=[41, 1600, 1408],
15 |         order=('conv', 'norm', 'act')),
16 |     backbone=dict(
17 |         type='SECOND',
18 |         in_channels=256,
19 |         layer_nums=[5, 5],
20 |         layer_strides=[1, 2],
21 |         out_channels=[128, 256]),
22 |     neck=dict(
23 |         type='SECONDFPN',
24 |         in_channels=[128, 256],
25 |         upsample_strides=[1, 2],
26 |         out_channels=[256, 256]),
27 |     bbox_head=dict(
28 |         type='Anchor3DHead',
29 |         num_classes=3,
30 |         in_channels=512,
31 |         feat_channels=512,
32 |         use_direction_classifier=True,
33 |         anchor_generator=dict(
34 |             type='Anchor3DRangeGenerator',
35 |             ranges=[
36 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
37 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
38 |                 [0, -40.0, -1.78, 70.4, 40.0, -1.78],
39 |             ],
40 |             sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
41 |             rotations=[0, 1.57],
42 |             reshape_out=False),
43 |         diff_rad_by_sin=True,
44 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
45 |         loss_cls=dict(
46 |             type='FocalLoss',
47 |             use_sigmoid=True,
48 |             gamma=2.0,
49 |             alpha=0.25,
50 |             loss_weight=1.0),
51 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
52 |         loss_dir=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
54 |     # model training and testing settings
55 |     train_cfg=dict(
56 |         assigner=[
57 |             dict(  # for Pedestrian
58 |                 type='MaxIoUAssigner',
59 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
60 |                 pos_iou_thr=0.35,
61 |                 neg_iou_thr=0.2,
62 |                 min_pos_iou=0.2,
63 |                 ignore_iof_thr=-1),
64 |             dict(  # for Cyclist
65 |                 type='MaxIoUAssigner',
66 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
67 |                 pos_iou_thr=0.35,
68 |                 neg_iou_thr=0.2,
69 |                 min_pos_iou=0.2,
70 |                 ignore_iof_thr=-1),
71 |             dict(  # for Car
72 |                 type='MaxIoUAssigner',
73 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
74 |                 pos_iou_thr=0.6,
75 |                 neg_iou_thr=0.45,
76 |                 min_pos_iou=0.45,
77 |                 ignore_iof_thr=-1),
78 |         ],
79 |         allowed_border=0,
80 |         pos_weight=-1,
81 |         debug=False),
82 |     test_cfg=dict(
83 |         use_rotate_nms=True,
84 |         nms_across_levels=False,
85 |         nms_thr=0.01,
86 |         score_thr=0.1,
87 |         min_bbox_size=0,
88 |         nms_pre=100,
89 |         max_num=50))
90 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_second_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.08, 0.08, 0.1]
  7 | model = dict(
  8 |     type='VoxelNet',
  9 |     voxel_layer=dict(
 10 |         max_num_points=10,
 11 |         point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(80000, 90000)),
 14 |     voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 15 |     middle_encoder=dict(
 16 |         type='SparseEncoder',
 17 |         in_channels=5,
 18 |         sparse_shape=[61, 1280, 1920],
 19 |         order=('conv', 'norm', 'act')),
 20 |     backbone=dict(
 21 |         type='SECOND',
 22 |         in_channels=384,
 23 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 24 |         layer_nums=[5, 5],
 25 |         layer_strides=[1, 2],
 26 |         out_channels=[128, 256]),
 27 |     neck=dict(
 28 |         type='SECONDFPN',
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         in_channels=[128, 256],
 31 |         upsample_strides=[1, 2],
 32 |         out_channels=[256, 256]),
 33 |     bbox_head=dict(
 34 |         type='Anchor3DHead',
 35 |         num_classes=3,
 36 |         in_channels=512,
 37 |         feat_channels=512,
 38 |         use_direction_classifier=True,
 39 |         anchor_generator=dict(
 40 |             type='AlignedAnchor3DRangeGenerator',
 41 |             ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
 42 |                     [-76.8, -51.2, 0, 76.8, 51.2, 0],
 43 |                     [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
 44 |             sizes=[
 45 |                 [4.73, 2.08, 1.77],  # car
 46 |                 [0.91, 0.84, 1.74],  # pedestrian
 47 |                 [1.81, 0.84, 1.77]  # cyclist
 48 |             ],
 49 |             rotations=[0, 1.57],
 50 |             reshape_out=False),
 51 |         diff_rad_by_sin=True,
 52 |         dir_offset=-0.7854,  # -pi / 4
 53 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 54 |         loss_cls=dict(
 55 |             type='FocalLoss',
 56 |             use_sigmoid=True,
 57 |             gamma=2.0,
 58 |             alpha=0.25,
 59 |             loss_weight=1.0),
 60 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 61 |         loss_dir=dict(
 62 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 63 |     # model training and testing settings
 64 |     train_cfg=dict(
 65 |         assigner=[
 66 |             dict(  # car
 67 |                 type='MaxIoUAssigner',
 68 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 69 |                 pos_iou_thr=0.55,
 70 |                 neg_iou_thr=0.4,
 71 |                 min_pos_iou=0.4,
 72 |                 ignore_iof_thr=-1),
 73 |             dict(  # pedestrian
 74 |                 type='MaxIoUAssigner',
 75 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 76 |                 pos_iou_thr=0.5,
 77 |                 neg_iou_thr=0.3,
 78 |                 min_pos_iou=0.3,
 79 |                 ignore_iof_thr=-1),
 80 |             dict(  # cyclist
 81 |                 type='MaxIoUAssigner',
 82 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.3,
 85 |                 min_pos_iou=0.3,
 86 |                 ignore_iof_thr=-1)
 87 |         ],
 88 |         allowed_border=0,
 89 |         code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 90 |         pos_weight=-1,
 91 |         debug=False),
 92 |     test_cfg=dict(
 93 |         use_rotate_nms=True,
 94 |         nms_across_levels=False,
 95 |         nms_pre=4096,
 96 |         nms_thr=0.25,
 97 |         score_thr=0.1,
 98 |         min_bbox_size=0,
 99 |         max_num=500))
100 | 


--------------------------------------------------------------------------------
/configs/_base_/models/imvotenet_image.py:
--------------------------------------------------------------------------------
  1 | model = dict(
  2 |     type='ImVoteNet',
  3 |     img_backbone=dict(
  4 |         type='ResNet',
  5 |         depth=50,
  6 |         num_stages=4,
  7 |         out_indices=(0, 1, 2, 3),
  8 |         frozen_stages=1,
  9 |         norm_cfg=dict(type='BN', requires_grad=False),
 10 |         norm_eval=True,
 11 |         style='caffe'),
 12 |     img_neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     img_rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[8],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[4, 8, 16, 32, 64]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     img_roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=256,
 39 |             featmap_strides=[4, 8, 16, 32]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=256,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=10,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 | 
 55 |     # model training and testing settings
 56 |     train_cfg=dict(
 57 |         img_rpn=dict(
 58 |             assigner=dict(
 59 |                 type='MaxIoUAssigner',
 60 |                 pos_iou_thr=0.7,
 61 |                 neg_iou_thr=0.3,
 62 |                 min_pos_iou=0.3,
 63 |                 match_low_quality=True,
 64 |                 ignore_iof_thr=-1),
 65 |             sampler=dict(
 66 |                 type='RandomSampler',
 67 |                 num=256,
 68 |                 pos_fraction=0.5,
 69 |                 neg_pos_ub=-1,
 70 |                 add_gt_as_proposals=False),
 71 |             allowed_border=-1,
 72 |             pos_weight=-1,
 73 |             debug=False),
 74 |         img_rpn_proposal=dict(
 75 |             nms_across_levels=False,
 76 |             nms_pre=2000,
 77 |             nms_post=1000,
 78 |             max_per_img=1000,
 79 |             nms=dict(type='nms', iou_threshold=0.7),
 80 |             min_bbox_size=0),
 81 |         img_rcnn=dict(
 82 |             assigner=dict(
 83 |                 type='MaxIoUAssigner',
 84 |                 pos_iou_thr=0.5,
 85 |                 neg_iou_thr=0.5,
 86 |                 min_pos_iou=0.5,
 87 |                 match_low_quality=False,
 88 |                 ignore_iof_thr=-1),
 89 |             sampler=dict(
 90 |                 type='RandomSampler',
 91 |                 num=512,
 92 |                 pos_fraction=0.25,
 93 |                 neg_pos_ub=-1,
 94 |                 add_gt_as_proposals=True),
 95 |             pos_weight=-1,
 96 |             debug=False)),
 97 |     test_cfg=dict(
 98 |         img_rpn=dict(
 99 |             nms_across_levels=False,
100 |             nms_pre=1000,
101 |             nms_post=1000,
102 |             max_per_img=1000,
103 |             nms=dict(type='nms', iou_threshold=0.7),
104 |             min_bbox_size=0),
105 |         img_rcnn=dict(
106 |             score_thr=0.05,
107 |             nms=dict(type='nms', iou_threshold=0.5),
108 |             max_per_img=100)))
109 | 


--------------------------------------------------------------------------------
/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_across_levels=False,
 90 |             nms_pre=2000,
 91 |             nms_post=1000,
 92 |             max_per_img=1000,
 93 |             nms=dict(type='nms', iou_threshold=0.7),
 94 |             min_bbox_size=0),
 95 |         rcnn=dict(
 96 |             assigner=dict(
 97 |                 type='MaxIoUAssigner',
 98 |                 pos_iou_thr=0.5,
 99 |                 neg_iou_thr=0.5,
100 |                 min_pos_iou=0.5,
101 |                 match_low_quality=True,
102 |                 ignore_iof_thr=-1),
103 |             sampler=dict(
104 |                 type='RandomSampler',
105 |                 num=512,
106 |                 pos_fraction=0.25,
107 |                 neg_pos_ub=-1,
108 |                 add_gt_as_proposals=True),
109 |             mask_size=28,
110 |             pos_weight=-1,
111 |             debug=False)),
112 |     test_cfg=dict(
113 |         rpn=dict(
114 |             nms_across_levels=False,
115 |             nms_pre=1000,
116 |             nms_post=1000,
117 |             max_per_img=1000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             min_bbox_size=0),
120 |         rcnn=dict(
121 |             score_thr=0.05,
122 |             nms=dict(type='nms', iou_threshold=0.5),
123 |             max_per_img=100,
124 |             mask_thr_binary=0.5)))
125 | 


--------------------------------------------------------------------------------
/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         sa_cfg=dict(
6 |             type='PAConvCUDASAModule',
7 |             scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 | 


--------------------------------------------------------------------------------
/configs/_base_/models/paconv_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=9,  # [xyz, rgb, normalized_xyz]
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(None, None, None, None),  # use kNN instead of ball query
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d', momentum=0.1),
14 |         sa_cfg=dict(
15 |             type='PAConvSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False,
19 |             paconv_num_kernels=[16, 16, 16],
20 |             paconv_kernel_input='w_neighbor',
21 |             scorenet_input='w_neighbor_dist',
22 |             scorenet_cfg=dict(
23 |                 mlp_channels=[16, 16, 16],
24 |                 score_norm='softmax',
25 |                 temp_factor=1.0,
26 |                 last_bn=False))),
27 |     decode_head=dict(
28 |         type='PAConvHead',
29 |         # PAConv model's decoder takes skip connections from beckbone
30 |         # different from PointNet++, it also concats input features in the last
31 |         # level of decoder, leading to `128 + 6` as the channel number
32 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
33 |                      (128 + 6, 128, 128, 128)),
34 |         channels=128,
35 |         dropout_ratio=0.5,
36 |         conv_cfg=dict(type='Conv1d'),
37 |         norm_cfg=dict(type='BN1d'),
38 |         act_cfg=dict(type='ReLU'),
39 |         loss_decode=dict(
40 |             type='CrossEntropyLoss',
41 |             use_sigmoid=False,
42 |             class_weight=None,  # should be modified with dataset
43 |             loss_weight=1.0)),
44 |     # correlation loss to regularize PAConv's kernel weights
45 |     loss_regularization=dict(
46 |         type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='slide'))
50 | 


--------------------------------------------------------------------------------
/configs/_base_/models/pgd.py:
--------------------------------------------------------------------------------
 1 | _base_ = './fcos3d.py'
 2 | # model settings
 3 | model = dict(
 4 |     bbox_head=dict(
 5 |         _delete_=True,
 6 |         type='PGDHead',
 7 |         num_classes=10,
 8 |         in_channels=256,
 9 |         stacked_convs=2,
10 |         feat_channels=256,
11 |         use_direction_classifier=True,
12 |         diff_rad_by_sin=True,
13 |         pred_attrs=True,
14 |         pred_velo=True,
15 |         pred_bbox2d=True,
16 |         pred_keypoints=False,
17 |         dir_offset=0.7854,  # pi/4
18 |         strides=[8, 16, 32, 64, 128],
19 |         group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
20 |         cls_branch=(256, ),
21 |         reg_branch=(
22 |             (256, ),  # offset
23 |             (256, ),  # depth
24 |             (256, ),  # size
25 |             (256, ),  # rot
26 |             ()  # velo
27 |         ),
28 |         dir_branch=(256, ),
29 |         attr_branch=(256, ),
30 |         loss_cls=dict(
31 |             type='FocalLoss',
32 |             use_sigmoid=True,
33 |             gamma=2.0,
34 |             alpha=0.25,
35 |             loss_weight=1.0),
36 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
37 |         loss_dir=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |         loss_attr=dict(
40 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
41 |         loss_centerness=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
43 |         norm_on_bbox=True,
44 |         centerness_on_reg=True,
45 |         center_sampling=True,
46 |         conv_bias=True,
47 |         dcn_on_last_conv=True,
48 |         use_depth_classifier=True,
49 |         depth_branch=(256, ),
50 |         depth_range=(0, 50),
51 |         depth_unit=10,
52 |         division='uniform',
53 |         depth_bins=6,
54 |         bbox_coder=dict(type='PGDBBoxCoder', code_size=9)),
55 |     test_cfg=dict(nms_pre=1000, nms_thr=0.8, score_thr=0.01, max_per_img=200))
56 | 


--------------------------------------------------------------------------------
/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pointnet2_ssg.py'
 2 | 
 3 | # model settings
 4 | model = dict(
 5 |     backbone=dict(
 6 |         _delete_=True,
 7 |         type='PointNet2SAMSG',
 8 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 9 |         num_points=(1024, 256, 64, 16),
10 |         radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 |         num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 |         sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 |                                                                     128)),
14 |                      ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 |                                                           (256, 384, 512))),
16 |         aggregation_channels=(None, None, None, None),
17 |         fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 |         fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 |         dilated_group=(False, False, False, False),
20 |         out_indices=(0, 1, 2, 3),
21 |         sa_cfg=dict(
22 |             type='PointSAModuleMSG',
23 |             pool_mod='max',
24 |             use_xyz=True,
25 |             normalize_xyz=False)),
26 |     decode_head=dict(
27 |         fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 |                      (128, 128, 128, 128))))
29 | 


--------------------------------------------------------------------------------
/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(0.1, 0.2, 0.4, 0.8),
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d'),
14 |         sa_cfg=dict(
15 |             type='PointSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False)),
19 |     decode_head=dict(
20 |         type='PointNet2Head',
21 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 |                      (128, 128, 128, 128)),
23 |         channels=128,
24 |         dropout_ratio=0.5,
25 |         conv_cfg=dict(type='Conv1d'),
26 |         norm_cfg=dict(type='BN1d'),
27 |         act_cfg=dict(type='ReLU'),
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss',
30 |             use_sigmoid=False,
31 |             class_weight=None,  # should be modified with dataset
32 |             loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(),
35 |     test_cfg=dict(mode='slide'))
36 | 


--------------------------------------------------------------------------------
/configs/_base_/models/smoke.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='SMOKEMono3D',
 3 |     backbone=dict(
 4 |         type='DLANet',
 5 |         depth=34,
 6 |         in_channels=3,
 7 |         norm_cfg=dict(type='GN', num_groups=32),
 8 |         init_cfg=dict(
 9 |             type='Pretrained',
10 |             checkpoint='http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth'
11 |         )),
12 |     neck=dict(
13 |         type='DLANeck',
14 |         in_channels=[16, 32, 64, 128, 256, 512],
15 |         start_level=2,
16 |         end_level=5,
17 |         norm_cfg=dict(type='GN', num_groups=32)),
18 |     bbox_head=dict(
19 |         type='SMOKEMono3DHead',
20 |         num_classes=3,
21 |         in_channels=64,
22 |         dim_channel=[3, 4, 5],
23 |         ori_channel=[6, 7],
24 |         stacked_convs=0,
25 |         feat_channels=64,
26 |         use_direction_classifier=False,
27 |         diff_rad_by_sin=False,
28 |         pred_attrs=False,
29 |         pred_velo=False,
30 |         dir_offset=0,
31 |         strides=None,
32 |         group_reg_dims=(8, ),
33 |         cls_branch=(256, ),
34 |         reg_branch=((256, ), ),
35 |         num_attrs=0,
36 |         bbox_code_size=7,
37 |         dir_branch=(),
38 |         attr_branch=(),
39 |         bbox_coder=dict(
40 |             type='SMOKECoder',
41 |             base_depth=(28.01, 16.32),
42 |             base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63,
43 |                                                                 1.53)),
44 |             code_size=7),
45 |         loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0),
46 |         loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1 / 300),
47 |         loss_dir=dict(
48 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
49 |         loss_attr=None,
50 |         conv_bias=True,
51 |         dcn_on_last_conv=False),
52 |     train_cfg=None,
53 |     test_cfg=dict(topK=100, local_maximum_kernel=3, max_per_img=100))
54 | 


--------------------------------------------------------------------------------
/configs/_base_/models/votenet.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='VoteNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=4,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 256)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='VoteHead',
20 |         vote_module_cfg=dict(
21 |             in_channels=256,
22 |             vote_per_seed=1,
23 |             gt_per_seed=3,
24 |             conv_channels=(256, 256),
25 |             conv_cfg=dict(type='Conv1d'),
26 |             norm_cfg=dict(type='BN1d'),
27 |             norm_feats=True,
28 |             vote_loss=dict(
29 |                 type='ChamferDistance',
30 |                 mode='l1',
31 |                 reduction='none',
32 |                 loss_dst_weight=10.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModule',
35 |             num_point=256,
36 |             radius=0.3,
37 |             num_sample=16,
38 |             mlp_channels=[256, 128, 128, 128],
39 |             use_xyz=True,
40 |             normalize_xyz=True),
41 |         pred_layer_cfg=dict(
42 |             in_channels=128, shared_conv_channels=(128, 128), bias=True),
43 |         conv_cfg=dict(type='Conv1d'),
44 |         norm_cfg=dict(type='BN1d'),
45 |         objectness_loss=dict(
46 |             type='CrossEntropyLoss',
47 |             class_weight=[0.2, 0.8],
48 |             reduction='sum',
49 |             loss_weight=5.0),
50 |         center_loss=dict(
51 |             type='ChamferDistance',
52 |             mode='l2',
53 |             reduction='sum',
54 |             loss_src_weight=10.0,
55 |             loss_dst_weight=10.0),
56 |         dir_class_loss=dict(
57 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
58 |         dir_res_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
60 |         size_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         size_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
64 |         semantic_loss=dict(
65 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
66 |     # model training and testing settings
67 |     train_cfg=dict(
68 |         pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
69 |     test_cfg=dict(
70 |         sample_mod='seed',
71 |         nms_thr=0.25,
72 |         score_thr=0.05,
73 |         per_class_proposal=True))
74 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
 1 | # This schedule is mainly used by models with dynamic voxelization
 2 | # optimizer
 3 | lr = 0.003  # max learning rate
 4 | optimizer = dict(
 5 |     type='AdamW',
 6 |     lr=lr,
 7 |     betas=(0.95, 0.99),  # the momentum is change during training
 8 |     weight_decay=0.001)
 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 | 
11 | lr_config = dict(
12 |     policy='CosineAnnealing',
13 |     warmup='linear',
14 |     warmup_iters=1000,
15 |     warmup_ratio=1.0 / 10,
16 |     min_lr_ratio=1e-5)
17 | 
18 | momentum_config = None
19 | 
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
 1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
 2 | # Since the models are trained by 24 epochs by default, we set evaluation
 3 | # interval to be 20. Please change the interval accordingly if you do not
 4 | # use a default schedule.
 5 | # optimizer
 6 | # This schedule is mainly used by models on nuScenes dataset
 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
 8 | # max_norm=10 is better for SECOND
 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 |     policy='cyclic',
12 |     target_ratio=(10, 1e-4),
13 |     cyclic_times=1,
14 |     step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(0.85 / 0.95, 1),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | 
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/cyclic_40e.py:
--------------------------------------------------------------------------------
 1 | # The schedule is usually used by models trained on KITTI dataset
 2 | 
 3 | # The learning rate set in the cyclic schedule is the initial learning rate
 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
 6 | lr = 0.0018
 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use
 8 | # the official AdamW optimizer implemented by PyTorch.
 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa
13 | # We implement them in mmcv, for more details, please refer to
14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa
15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa
16 | lr_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(10, 1e-4),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | momentum_config = dict(
23 |     policy='cyclic',
24 |     target_ratio=(0.85 / 0.95, 1),
25 |     cyclic_times=1,
26 |     step_ratio_up=0.4,
27 | )
28 | # Although the max_epochs is 40, this schedule is usually used we
29 | # RepeatDataset with repeat ratio N, thus the actual max epoch
30 | # number could be Nx40
31 | runner = dict(type='EpochBasedRunner', max_epochs=40)
32 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on nuScenes dataset
 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
 4 | # max_norm=10 is better for SECOND
 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=1000,
10 |     warmup_ratio=1.0 / 1000,
11 |     step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on indoor dataset,
 3 | # e.g., VoteNet on SUNRGBD and ScanNet
 4 | lr = 0.008  # max learning rate
 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_100e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on S3DIS dataset in segmentation task
3 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
6 | 
7 | # runtime settings
8 | runner = dict(type='EpochBasedRunner', max_epochs=100)
9 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on ScanNet dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 | 


--------------------------------------------------------------------------------
/docs/datasets.md:
--------------------------------------------------------------------------------
 1 | # Prepare Datasets
 2 | Currently supported datasets: Occ3D-nuScenes.
 3 | 
 4 | ## Occ3D-nuScenes
 5 | Download nuScenes V1.0 full from [here](https://www.nuscenes.org/download) to `data/nuscenes`, nuScenes-lidarseg from [here](https://www.nuscenes.org/download), GTs of Occ(gts only) from [here](https://github.com/Tsinghua-MARS-Lab/Occ3D). \
 6 | Prepare nuScenes dataset as below,
 7 | 
 8 | ```
 9 | FusionOcc
10 | ├── data
11 | │   ├── nuscenes
12 | │   │   ├── maps
13 | │   │   ├── samples
14 | │   │   ├── sweeps
15 | │   │   ├── lidarseg
16 | │   │   ├── gts
17 | |   |   ├── v1.0-trainval
18 | ```
19 | 
20 | Create the pkl file:
21 | ```python
22 | python tools/create_data_fusionocc.py
23 | ```
24 | Generate the image segmentation labels (takes a long time) by running:
25 | ```shell
26 | python img_seg/gen_segmap.py data/nuscenes --parallel=32
27 | ```
28 | 
29 | After processing, the data structure is as follows:
30 | ```
31 | FusionOcc
32 | ├── data
33 | │   ├── nuscenes
34 | │   │   ├── maps
35 | │   │   ├── samples
36 | │   │   ├── sweeps
37 | │   │   ├── lidarseg
38 | │   │   ├── imgseg
39 | │   │   ├── gts
40 | |   |   ├── v1.0-trainval
41 | |   |   ├── fusionocc-nuscenes_infos_train.pkl
42 | |   |   ├── fusionocc-nuscenes_infos_val.pkl
43 | ```


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | # Installation Instructions
 2 | The enviroment is based on [BEVDet](https://github.com/HuangJunJie2017/BEVDet/blob/dev3.0/docker/Dockerfile).
 3 | 
 4 | **1. Conda Virtual Environment**
 5 | ```shell
 6 | conda create -n fusionocc python=3.8 -y
 7 | conda activate fusionocc
 8 | ```
 9 | 
10 | **2. PyTorch**
11 | ```shell
12 | pip install torch==1.10.1+cu113 torchvision==0.10.1+cu113  -f https://download.pytorch.org/whl/torch_stable.html
13 | ```
14 | 
15 | **3. MMCV, MMDet, MMSeg**
16 | ```shell
17 | pip install mmcv-full==1.5.3 -f https://download.openmmlab.com/mmcv/dist/cu11.3/torch1.10.0/index.html
18 | pip install mmdet==2.25.1 mmsegmentation==0.25.0
19 | ```
20 | 
21 | **4. Others**
22 | ```shell
23 | pip install -r requirements.txt
24 | ```
25 | download torch-scatter=2.0.9 from https://pytorch-geometric.com/whl/
26 | ```shell
27 | pip install torch_scatter-2.0.9-cp38-cp38-linux_x86_64.whl
28 | ```
29 | 
30 | **5. Intall FusionOcc**
31 | 
32 | ```shell
33 | git clone https://github.com/ShuoZhang-code/FusionOcc.git
34 | cd FusionOcc
35 | pip install -v -e .
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/img_seg/gen_segmap.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from multiprocessing import Process
 4 | 
 5 | from nuscenes.nuscenes import NuScenes
 6 | 
 7 | from lidar.lidar_anno import nuScenesLidarSeg
 8 | from helper import *
 9 | 
10 | 
11 | def gen_seg_map(start_idx, end_idx, nusc, lidar_seg_nus, down_sample, proj_lidar=False, save_dir=None):
12 |     for i, scene in enumerate(nusc.scene[start_idx:end_idx]):
13 |         sample = nusc.get('sample', scene['first_sample_token'])
14 |         while True:
15 |             lidar_seg = lidar_seg_nus.get_lidar_seg(sample["token"])
16 |             process_one_sample(nusc,
17 |                                sample,
18 |                                down_sample,
19 |                                lidar_seg=lidar_seg,
20 |                                proj_lidar=proj_lidar,
21 |                                save_dir=save_dir)
22 |             if sample['next'] == '':
23 |                 break
24 |             sample = nusc.get('sample', sample['next'])
25 | 
26 | 
27 | def gen_labels(nusc, lidar_seg_nus, down_sample, parallel=1, proj_lidar=False, visible_level=2, save_dir=None):
28 |     total_n = len(nusc.scene)
29 |     interval = total_n // parallel
30 |     processes = []
31 |     for i in range(parallel + 1):
32 |         start_idx = i * interval
33 |         end_idx = (i + 1) * interval
34 |         p = Process(target=gen_seg_map,
35 |                     args=(start_idx, end_idx,
36 |                           nusc, lidar_seg_nus, down_sample, proj_lidar, save_dir
37 |                           )
38 |                     )
39 |         p.start()
40 |         processes.append(p)
41 |     for p in processes:
42 |         p.join()
43 | 
44 | 
45 | def parse_args():
46 |     parser = argparse.ArgumentParser(description='Generate 2d images seg label')
47 |     parser.add_argument('data_root', help='data root of nuscenes')
48 |     parser.add_argument('--down_sample', type=int, default=8, help='down sample seg img')
49 |     parser.add_argument('--parallel', type=int, default=1, help='parallel processing num')
50 |     args = parser.parse_args()
51 |     return args
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     args = parse_args()
56 |     data_root = args.data_root
57 |     version = "v1.0-trainval"
58 |     save_dir = os.path.join(args.data_root, "imgseg")
59 |     os.makedirs(save_dir, exist_ok=True)
60 |     down_sample = args.down_sample
61 |     parallel = args.parallel
62 |     nusc = NuScenes(version=version,
63 |                     dataroot=data_root,
64 |                     verbose=True)
65 |     lidar_seg_nus = nuScenesLidarSeg(nusc=nusc, data_path=data_root, version=version)
66 |     gen_labels(nusc, lidar_seg_nus, down_sample=down_sample, parallel=parallel,
67 |                proj_lidar=True, save_dir=save_dir)
68 | 


--------------------------------------------------------------------------------
/img_seg/lidar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuoZhang-code/FusionOcc/83ded3884b98b299d35d636a91e9aa2a92d89221/img_seg/lidar/__init__.py


--------------------------------------------------------------------------------
/img_seg/lidar/config/label_mapping/nuscenes.yaml:
--------------------------------------------------------------------------------
 1 | labels:
 2 |   0: 'noise'
 3 |   1: 'animal'
 4 |   2: 'human.pedestrian.adult'
 5 |   3: 'human.pedestrian.child'
 6 |   4: 'human.pedestrian.construction_worker'
 7 |   5: 'human.pedestrian.personal_mobility'
 8 |   6: 'human.pedestrian.police_officer'
 9 |   7: 'human.pedestrian.stroller'
10 |   8: 'human.pedestrian.wheelchair'
11 |   9: 'movable_object.barrier'
12 |   10: 'movable_object.debris'
13 |   11: 'movable_object.pushable_pullable'
14 |   12: 'movable_object.trafficcone'
15 |   13: 'static_object.bicycle_rack'
16 |   14: 'vehicle.bicycle'
17 |   15: 'vehicle.bus.bendy'
18 |   16: 'vehicle.bus.rigid'
19 |   17: 'vehicle.car'
20 |   18: 'vehicle.construction'
21 |   19: 'vehicle.emergency.ambulance'
22 |   20: 'vehicle.emergency.police'
23 |   21: 'vehicle.motorcycle'
24 |   22: 'vehicle.trailer'
25 |   23: 'vehicle.truck'
26 |   24: 'flat.driveable_surface'
27 |   25: 'flat.other'
28 |   26: 'flat.sidewalk'
29 |   27: 'flat.terrain'
30 |   28: 'static.manmade'
31 |   29: 'static.other'
32 |   30: 'static.vegetation'
33 |   31: 'vehicle.ego'
34 | labels_16:
35 |   0: 'noise'
36 |   1: 'barrier'
37 |   2: 'bicycle'
38 |   3: 'bus'
39 |   4: 'car'
40 |   5: 'construction_vehicle'
41 |   6: 'motorcycle'
42 |   7: 'pedestrian'
43 |   8: 'traffic_cone'
44 |   9: 'trailer'
45 |   10: 'truck'
46 |   11: 'driveable_surface'
47 |   12: 'other_flat'
48 |   13: 'sidewalk'
49 |   14: 'terrain'
50 |   15: 'manmade'
51 |   16: 'vegetation'
52 | learning_map:
53 |   1: 0
54 |   5: 0
55 |   7: 0
56 |   8: 0
57 |   10: 0
58 |   11: 0
59 |   13: 0
60 |   19: 0
61 |   20: 0
62 |   0: 0
63 |   29: 0
64 |   31: 0
65 |   9: 1
66 |   14: 2
67 |   15: 3
68 |   16: 3
69 |   17: 4
70 |   18: 5
71 |   21: 6
72 |   2: 7
73 |   3: 7
74 |   4: 7
75 |   6: 7
76 |   12: 8
77 |   22: 9
78 |   23: 10
79 |   24: 11
80 |   25: 12
81 |   26: 13
82 |   27: 14
83 |   28: 15
84 |   30: 16


--------------------------------------------------------------------------------
/mmdet3d/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import mmcv
 3 | 
 4 | import mmdet
 5 | import mmseg
 6 | from .version import __version__, short_version
 7 | 
 8 | 
 9 | def digit_version(version_str):
10 |     digit_version = []
11 |     for x in version_str.split('.'):
12 |         if x.isdigit():
13 |             digit_version.append(int(x))
14 |         elif x.find('rc') != -1:
15 |             patch_version = x.split('rc')
16 |             digit_version.append(int(patch_version[0]) - 1)
17 |             digit_version.append(int(patch_version[1]))
18 |     return digit_version
19 | 
20 | 
21 | mmcv_minimum_version = '1.5.2'
22 | mmcv_maximum_version = '1.7.0'
23 | mmcv_version = digit_version(mmcv.__version__)
24 | 
25 | 
26 | assert (mmcv_version >= digit_version(mmcv_minimum_version)
27 |         and mmcv_version <= digit_version(mmcv_maximum_version)), \
28 |     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
29 |     f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
30 | 
31 | mmdet_minimum_version = '2.24.0'
32 | mmdet_maximum_version = '3.0.0'
33 | mmdet_version = digit_version(mmdet.__version__)
34 | assert (mmdet_version >= digit_version(mmdet_minimum_version)
35 |         and mmdet_version <= digit_version(mmdet_maximum_version)), \
36 |     f'MMDET=={mmdet.__version__} is used but incompatible. ' \
37 |     f'Please install mmdet>={mmdet_minimum_version}, ' \
38 |     f'<={mmdet_maximum_version}.'
39 | 
40 | mmseg_minimum_version = '0.20.0'
41 | mmseg_maximum_version = '1.0.0'
42 | mmseg_version = digit_version(mmseg.__version__)
43 | assert (mmseg_version >= digit_version(mmseg_minimum_version)
44 |         and mmseg_version <= digit_version(mmseg_maximum_version)), \
45 |     f'MMSEG=={mmseg.__version__} is used but incompatible. ' \
46 |     f'Please install mmseg>={mmseg_minimum_version}, ' \
47 |     f'<={mmseg_maximum_version}.'
48 | 
49 | __all__ = ['__version__', 'short_version']
50 | 


--------------------------------------------------------------------------------
/mmdet3d/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .inference import (convert_SyncBN, inference_detector,
 3 |                         inference_mono_3d_detector,
 4 |                         inference_multi_modality_detector, inference_segmentor,
 5 |                         init_model, show_result_meshlab)
 6 | from .test import single_gpu_test
 7 | from .train import init_random_seed, train_model
 8 | 
 9 | __all__ = [
10 |     'inference_detector', 'init_model', 'single_gpu_test',
11 |     'inference_mono_3d_detector', 'show_result_meshlab', 'convert_SyncBN',
12 |     'train_model', 'inference_multi_modality_detector', 'inference_segmentor',
13 |     'init_random_seed'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet3d/apis/test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from os import path as osp
 3 | 
 4 | import mmcv
 5 | import torch
 6 | from mmcv.image import tensor2imgs
 7 | 
 8 | from mmdet3d.models import (Base3DDetector)
 9 | 
10 | 
11 | def single_gpu_test(model,
12 |                     data_loader,
13 |                     show=False,
14 |                     out_dir=None,
15 |                     show_score_thr=0.3):
16 |     """Test model with single gpu.
17 | 
18 |     This method tests model with single gpu and gives the 'show' option.
19 |     By setting ``show=True``, it saves the visualization results under
20 |     ``out_dir``.
21 | 
22 |     Args:
23 |         model (nn.Module): Model to be tested.
24 |         data_loader (nn.Dataloader): Pytorch data loader.
25 |         show (bool, optional): Whether to save viualization results.
26 |             Default: True.
27 |         out_dir (str, optional): The path to save visualization results.
28 |             Default: None.
29 | 
30 |     Returns:
31 |         list[dict]: The prediction results.
32 |     """
33 |     model.eval()
34 |     results = []
35 |     dataset = data_loader.dataset
36 |     prog_bar = mmcv.ProgressBar(len(dataset))
37 | 
38 |     for i, data in enumerate(data_loader):
39 |         with torch.no_grad():
40 |             result = model(return_loss=False, rescale=True, **data)
41 | 
42 |         if show:
43 |             # Visualize the results of MMDetection3D model
44 |             # 'show_results' is MMdetection3D visualization API
45 |             models_3d = (Base3DDetector, Base3DSegmentor,
46 |                          SingleStageMono3DDetector)
47 |             if isinstance(model.module, models_3d):
48 |                 model.module.show_results(
49 |                     data,
50 |                     result,
51 |                     out_dir=out_dir,
52 |                     show=show,
53 |                     score_thr=show_score_thr)
54 |             # Visualize the results of MMDetection model
55 |             # 'show_result' is MMdetection visualization API
56 |             else:
57 |                 batch_size = len(result)
58 |                 if batch_size == 1 and isinstance(data['img'][0],
59 |                                                   torch.Tensor):
60 |                     img_tensor = data['img'][0]
61 |                 else:
62 |                     img_tensor = data['img'][0].data[0]
63 |                 img_metas = data['img_metas'][0].data[0]
64 |                 imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
65 |                 assert len(imgs) == len(img_metas)
66 | 
67 |                 for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
68 |                     h, w, _ = img_meta['img_shape']
69 |                     img_show = img[:h, :w, :]
70 | 
71 |                     ori_h, ori_w = img_meta['ori_shape'][:-1]
72 |                     img_show = mmcv.imresize(img_show, (ori_w, ori_h))
73 | 
74 |                     if out_dir:
75 |                         out_file = osp.join(out_dir, img_meta['ori_filename'])
76 |                     else:
77 |                         out_file = None
78 | 
79 |                     model.module.show_result(
80 |                         img_show,
81 |                         result[i],
82 |                         show=show,
83 |                         out_file=out_file,
84 |                         score_thr=show_score_thr)
85 |         results.extend(result)
86 | 
87 |         batch_size = len(result)
88 |         for _ in range(batch_size):
89 |             prog_bar.update()
90 |     return results
91 | 


--------------------------------------------------------------------------------
/mmdet3d/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .anchor import *  # noqa: F401, F403
 3 | from .bbox import *  # noqa: F401, F403
 4 | from .evaluation import *  # noqa: F401, F403
 5 | from .hook import *  # noqa: F401, F403
 6 | from .points import *  # noqa: F401, F403
 7 | from .post_processing import *  # noqa: F401, F403
 8 | from .utils import *  # noqa: F401, F403
 9 | from .visualizer import *  # noqa: F401, F403
10 | from .voxel import *  # noqa: F401, F403
11 | 


--------------------------------------------------------------------------------
/mmdet3d/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.anchor import build_prior_generator
 3 | from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,
 4 |                                   AlignedAnchor3DRangeGeneratorPerCls,
 5 |                                   Anchor3DRangeGenerator)
 6 | 
 7 | __all__ = [
 8 |     'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',
 9 |     'build_prior_generator', 'AlignedAnchor3DRangeGeneratorPerCls'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 3 | from .coders import DeltaXYZWLHRBBoxCoder
 4 | # from .bbox_target import bbox_target
 5 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
 6 |                               BboxOverlapsNearest3D,
 7 |                               axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
 8 |                               bbox_overlaps_nearest_3d)
 9 | from .samplers import (BaseSampler, CombinedSampler,
10 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
11 |                        PseudoSampler, RandomSampler, SamplingResult)
12 | from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
13 |                          Coord3DMode, DepthInstance3DBoxes,
14 |                          LiDARInstance3DBoxes, get_box_type, limit_period,
15 |                          mono_cam_box2vis, points_cam2img, points_img2cam,
16 |                          xywhr2xyxyr)
17 | from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
18 | 
19 | __all__ = [
20 |     'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner',
21 |     'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
22 |     'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
23 |     'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D',
24 |     'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d',
25 |     'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode',
26 |     'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
27 |     'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
28 |     'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
29 |     'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
30 | ]
31 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner
3 | 
4 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.bbox import build_bbox_coder
 3 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder
 4 | from .centerpoint_bbox_coders import CenterPointBBoxCoder
 5 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
 6 | from .fcos3d_bbox_coder import FCOS3DBBoxCoder
 7 | from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder
 8 | from .monoflex_bbox_coder import MonoFlexCoder
 9 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
10 | from .pgd_bbox_coder import PGDBBoxCoder
11 | from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder
12 | from .smoke_bbox_coder import SMOKECoder
13 | 
14 | __all__ = [
15 |     'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder',
16 |     'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder',
17 |     'PointXYZWHLRBBoxCoder', 'FCOS3DBBoxCoder', 'PGDBBoxCoder', 'SMOKECoder',
18 |     'MonoFlexCoder'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | from mmdet.core.bbox.builder import BBOX_CODERS
  6 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
  7 | 
  8 | 
  9 | @BBOX_CODERS.register_module()
 10 | class AnchorFreeBBoxCoder(PartialBinBasedBBoxCoder):
 11 |     """Anchor free bbox coder for 3D boxes.
 12 | 
 13 |     Args:
 14 |         num_dir_bins (int): Number of bins to encode direction angle.
 15 |         with_rot (bool): Whether the bbox is with rotation.
 16 |     """
 17 | 
 18 |     def __init__(self, num_dir_bins, with_rot=True):
 19 |         super(AnchorFreeBBoxCoder, self).__init__(
 20 |             num_dir_bins, 0, [], with_rot=with_rot)
 21 |         self.num_dir_bins = num_dir_bins
 22 |         self.with_rot = with_rot
 23 | 
 24 |     def encode(self, gt_bboxes_3d, gt_labels_3d):
 25 |         """Encode ground truth to prediction targets.
 26 | 
 27 |         Args:
 28 |             gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes
 29 |                 with shape (n, 7).
 30 |             gt_labels_3d (torch.Tensor): Ground truth classes.
 31 | 
 32 |         Returns:
 33 |             tuple: Targets of center, size and direction.
 34 |         """
 35 |         # generate center target
 36 |         center_target = gt_bboxes_3d.gravity_center
 37 | 
 38 |         # generate bbox size target
 39 |         size_res_target = gt_bboxes_3d.dims / 2
 40 | 
 41 |         # generate dir target
 42 |         box_num = gt_labels_3d.shape[0]
 43 |         if self.with_rot:
 44 |             (dir_class_target,
 45 |              dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)
 46 |             dir_res_target /= (2 * np.pi / self.num_dir_bins)
 47 |         else:
 48 |             dir_class_target = gt_labels_3d.new_zeros(box_num)
 49 |             dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)
 50 | 
 51 |         return (center_target, size_res_target, dir_class_target,
 52 |                 dir_res_target)
 53 | 
 54 |     def decode(self, bbox_out):
 55 |         """Decode predicted parts to bbox3d.
 56 | 
 57 |         Args:
 58 |             bbox_out (dict): Predictions from model, should contain keys below.
 59 | 
 60 |                 - center: predicted bottom center of bboxes.
 61 |                 - dir_class: predicted bbox direction class.
 62 |                 - dir_res: predicted bbox direction residual.
 63 |                 - size: predicted bbox size.
 64 | 
 65 |         Returns:
 66 |             torch.Tensor: Decoded bbox3d with shape (batch, n, 7).
 67 |         """
 68 |         center = bbox_out['center']
 69 |         batch_size, num_proposal = center.shape[:2]
 70 | 
 71 |         # decode heading angle
 72 |         if self.with_rot:
 73 |             dir_class = torch.argmax(bbox_out['dir_class'], -1)
 74 |             dir_res = torch.gather(bbox_out['dir_res'], 2,
 75 |                                    dir_class.unsqueeze(-1))
 76 |             dir_res.squeeze_(2)
 77 |             dir_angle = self.class2angle(dir_class, dir_res).reshape(
 78 |                 batch_size, num_proposal, 1)
 79 |         else:
 80 |             dir_angle = center.new_zeros(batch_size, num_proposal, 1)
 81 | 
 82 |         # decode bbox size
 83 |         bbox_size = torch.clamp(bbox_out['size'] * 2, min=0.1)
 84 | 
 85 |         bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)
 86 |         return bbox3d
 87 | 
 88 |     def split_pred(self, cls_preds, reg_preds, base_xyz):
 89 |         """Split predicted features to specific parts.
 90 | 
 91 |         Args:
 92 |             cls_preds (torch.Tensor): Class predicted features to split.
 93 |             reg_preds (torch.Tensor): Regression predicted features to split.
 94 |             base_xyz (torch.Tensor): Coordinates of points.
 95 | 
 96 |         Returns:
 97 |             dict[str, torch.Tensor]: Split results.
 98 |         """
 99 |         results = {}
100 |         results['obj_scores'] = cls_preds
101 | 
102 |         start, end = 0, 0
103 |         reg_preds_trans = reg_preds.transpose(2, 1)
104 | 
105 |         # decode center
106 |         end += 3
107 |         # (batch_size, num_proposal, 3)
108 |         results['center_offset'] = reg_preds_trans[..., start:end]
109 |         results['center'] = base_xyz.detach() + reg_preds_trans[..., start:end]
110 |         start = end
111 | 
112 |         # decode center
113 |         end += 3
114 |         # (batch_size, num_proposal, 3)
115 |         results['size'] = reg_preds_trans[..., start:end]
116 |         start = end
117 | 
118 |         # decode direction
119 |         end += self.num_dir_bins
120 |         results['dir_class'] = reg_preds_trans[..., start:end]
121 |         start = end
122 | 
123 |         end += self.num_dir_bins
124 |         dir_res_norm = reg_preds_trans[..., start:end]
125 |         start = end
126 | 
127 |         results['dir_res_norm'] = dir_res_norm
128 |         results['dir_res'] = dir_res_norm * (2 * np.pi / self.num_dir_bins)
129 | 
130 |         return results
131 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | from mmdet.core.bbox import BaseBBoxCoder
 5 | from mmdet.core.bbox.builder import BBOX_CODERS
 6 | 
 7 | 
 8 | @BBOX_CODERS.register_module()
 9 | class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder):
10 |     """Bbox Coder for 3D boxes.
11 | 
12 |     Args:
13 |         code_size (int): The dimension of boxes to be encoded.
14 |     """
15 | 
16 |     def __init__(self, code_size=7):
17 |         super(DeltaXYZWLHRBBoxCoder, self).__init__()
18 |         self.code_size = code_size
19 | 
20 |     @staticmethod
21 |     def encode(src_boxes, dst_boxes):
22 |         """Get box regression transformation deltas (dx, dy, dz, dx_size,
23 |         dy_size, dz_size, dr, dv*) that can be used to transform the
24 |         `src_boxes` into the `target_boxes`.
25 | 
26 |         Args:
27 |             src_boxes (torch.Tensor): source boxes, e.g., object proposals.
28 |             dst_boxes (torch.Tensor): target of the transformation, e.g.,
29 |                 ground-truth boxes.
30 | 
31 |         Returns:
32 |             torch.Tensor: Box transformation deltas.
33 |         """
34 |         box_ndim = src_boxes.shape[-1]
35 |         cas, cgs, cts = [], [], []
36 |         if box_ndim > 7:
37 |             xa, ya, za, wa, la, ha, ra, *cas = torch.split(
38 |                 src_boxes, 1, dim=-1)
39 |             xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split(
40 |                 dst_boxes, 1, dim=-1)
41 |             cts = [g - a for g, a in zip(cgs, cas)]
42 |         else:
43 |             xa, ya, za, wa, la, ha, ra = torch.split(src_boxes, 1, dim=-1)
44 |             xg, yg, zg, wg, lg, hg, rg = torch.split(dst_boxes, 1, dim=-1)
45 |         za = za + ha / 2
46 |         zg = zg + hg / 2
47 |         diagonal = torch.sqrt(la**2 + wa**2)
48 |         xt = (xg - xa) / diagonal
49 |         yt = (yg - ya) / diagonal
50 |         zt = (zg - za) / ha
51 |         lt = torch.log(lg / la)
52 |         wt = torch.log(wg / wa)
53 |         ht = torch.log(hg / ha)
54 |         rt = rg - ra
55 |         return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)
56 | 
57 |     @staticmethod
58 |     def decode(anchors, deltas):
59 |         """Apply transformation `deltas` (dx, dy, dz, dx_size, dy_size,
60 |         dz_size, dr, dv*) to `boxes`.
61 | 
62 |         Args:
63 |             anchors (torch.Tensor): Parameters of anchors with shape (N, 7).
64 |             deltas (torch.Tensor): Encoded boxes with shape
65 |                 (N, 7+n) [x, y, z, x_size, y_size, z_size, r, velo*].
66 | 
67 |         Returns:
68 |             torch.Tensor: Decoded boxes.
69 |         """
70 |         cas, cts = [], []
71 |         box_ndim = anchors.shape[-1]
72 |         if box_ndim > 7:
73 |             xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)
74 |             xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(deltas, 1, dim=-1)
75 |         else:
76 |             xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
77 |             xt, yt, zt, wt, lt, ht, rt = torch.split(deltas, 1, dim=-1)
78 | 
79 |         za = za + ha / 2
80 |         diagonal = torch.sqrt(la**2 + wa**2)
81 |         xg = xt * diagonal + xa
82 |         yg = yt * diagonal + ya
83 |         zg = zt * ha + za
84 | 
85 |         lg = torch.exp(lt) * la
86 |         wg = torch.exp(wt) * wa
87 |         hg = torch.exp(ht) * ha
88 |         rg = rt + ra
89 |         zg = zg - hg / 2
90 |         cgs = [t + a for t, a in zip(cts, cas)]
91 |         return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1)
92 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
 3 |                                BboxOverlapsNearest3D,
 4 |                                axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
 5 |                                bbox_overlaps_nearest_3d)
 6 | 
 7 | __all__ = [
 8 |     'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
 9 |     'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',
10 |     'axis_aligned_bbox_overlaps_3d'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
 3 |                                       InstanceBalancedPosSampler,
 4 |                                       IoUBalancedNegSampler, OHEMSampler,
 5 |                                       PseudoSampler, RandomSampler,
 6 |                                       SamplingResult)
 7 | from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
 8 | 
 9 | __all__ = [
10 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
11 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
12 |     'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_box3d import BaseInstance3DBoxes
 3 | from .box_3d_mode import Box3DMode
 4 | from .cam_box3d import CameraInstance3DBoxes
 5 | from .coord_3d_mode import Coord3DMode
 6 | from .depth_box3d import DepthInstance3DBoxes
 7 | from .lidar_box3d import LiDARInstance3DBoxes
 8 | from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,
 9 |                     mono_cam_box2vis, points_cam2img, points_img2cam,
10 |                     rotation_3d_in_axis, xywhr2xyxyr)
11 | 
12 | __all__ = [
13 |     'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
14 |     'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
15 |     'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
16 |     'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis',
17 |     'get_proj_mat_by_coord_type'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical):
 6 |     """Map bboxes from testing scale to original image scale.
 7 | 
 8 |     Args:
 9 |         bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back.
10 |         scale_factor (float): Scale factor.
11 |         flip_horizontal (bool): Whether to flip horizontally.
12 |         flip_vertical (bool): Whether to flip vertically.
13 | 
14 |     Returns:
15 |         :obj:`BaseInstance3DBoxes`: Boxes mapped back.
16 |     """
17 |     new_bboxes = bboxes.clone()
18 |     if flip_horizontal:
19 |         new_bboxes.flip('horizontal')
20 |     if flip_vertical:
21 |         new_bboxes.flip('vertical')
22 |     new_bboxes.scale(1 / scale_factor)
23 | 
24 |     return new_bboxes
25 | 
26 | 
27 | def bbox3d2roi(bbox_list):
28 |     """Convert a list of bounding boxes to roi format.
29 | 
30 |     Args:
31 |         bbox_list (list[torch.Tensor]): A list of bounding boxes
32 |             corresponding to a batch of images.
33 | 
34 |     Returns:
35 |         torch.Tensor: Region of interests in shape (n, c), where
36 |             the channels are in order of [batch_ind, x, y ...].
37 |     """
38 |     rois_list = []
39 |     for img_id, bboxes in enumerate(bbox_list):
40 |         if bboxes.size(0) > 0:
41 |             img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
42 |             rois = torch.cat([img_inds, bboxes], dim=-1)
43 |         else:
44 |             rois = torch.zeros_like(bboxes)
45 |         rois_list.append(rois)
46 |     rois = torch.cat(rois_list, 0)
47 |     return rois
48 | 
49 | 
50 | def bbox3d2result(bboxes, scores, labels, attrs=None):
51 |     """Convert detection results to a list of numpy arrays.
52 | 
53 |     Args:
54 |         bboxes (torch.Tensor): Bounding boxes with shape (N, 5).
55 |         labels (torch.Tensor): Labels with shape (N, ).
56 |         scores (torch.Tensor): Scores with shape (N, ).
57 |         attrs (torch.Tensor, optional): Attributes with shape (N, ).
58 |             Defaults to None.
59 | 
60 |     Returns:
61 |         dict[str, torch.Tensor]: Bounding box results in cpu mode.
62 | 
63 |             - boxes_3d (torch.Tensor): 3D boxes.
64 |             - scores (torch.Tensor): Prediction scores.
65 |             - labels_3d (torch.Tensor): Box labels.
66 |             - attrs_3d (torch.Tensor, optional): Box attributes.
67 |     """
68 |     result_dict = dict(
69 |         boxes_3d=bboxes.to('cpu'),
70 |         scores_3d=scores.cpu(),
71 |         labels_3d=labels.cpu())
72 | 
73 |     if attrs is not None:
74 |         result_dict['attrs_3d'] = attrs.cpu()
75 | 
76 |     return result_dict
77 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .indoor_eval import indoor_eval
 3 | from .instance_seg_eval import instance_seg_eval
 4 | from .kitti_utils import kitti_eval, kitti_eval_coco_style
 5 | from .lyft_eval import lyft_eval
 6 | from .seg_eval import seg_eval
 7 | 
 8 | __all__ = [
 9 |     'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval',
10 |     'seg_eval', 'instance_seg_eval'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/kitti_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .eval import kitti_eval, kitti_eval_coco_style
3 | 
4 | __all__ = ['kitti_eval', 'kitti_eval_coco_style']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/scannet_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .evaluate_semantic_instance import evaluate_matches, scannet_eval
3 | 
4 | __all__ = ['scannet_eval', 'evaluate_matches']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/scannet_utils/util_3d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/util_3d.py # noqa
 3 | import json
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Instance:
 9 |     """Single instance for ScanNet evaluator.
10 | 
11 |     Args:
12 |         mesh_vert_instances (np.array): Instance ids for each point.
13 |         instance_id: Id of single instance.
14 |     """
15 |     instance_id = 0
16 |     label_id = 0
17 |     vert_count = 0
18 |     med_dist = -1
19 |     dist_conf = 0.0
20 | 
21 |     def __init__(self, mesh_vert_instances, instance_id):
22 |         if instance_id == -1:
23 |             return
24 |         self.instance_id = int(instance_id)
25 |         self.label_id = int(self.get_label_id(instance_id))
26 |         self.vert_count = int(
27 |             self.get_instance_verts(mesh_vert_instances, instance_id))
28 | 
29 |     @staticmethod
30 |     def get_label_id(instance_id):
31 |         return int(instance_id // 1000)
32 | 
33 |     @staticmethod
34 |     def get_instance_verts(mesh_vert_instances, instance_id):
35 |         return (mesh_vert_instances == instance_id).sum()
36 | 
37 |     def to_json(self):
38 |         return json.dumps(
39 |             self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
40 | 
41 |     def to_dict(self):
42 |         dict = {}
43 |         dict['instance_id'] = self.instance_id
44 |         dict['label_id'] = self.label_id
45 |         dict['vert_count'] = self.vert_count
46 |         dict['med_dist'] = self.med_dist
47 |         dict['dist_conf'] = self.dist_conf
48 |         return dict
49 | 
50 |     def from_json(self, data):
51 |         self.instance_id = int(data['instance_id'])
52 |         self.label_id = int(data['label_id'])
53 |         self.vert_count = int(data['vert_count'])
54 |         if 'med_dist' in data:
55 |             self.med_dist = float(data['med_dist'])
56 |             self.dist_conf = float(data['dist_conf'])
57 | 
58 |     def __str__(self):
59 |         return '(' + str(self.instance_id) + ')'
60 | 
61 | 
62 | def get_instances(ids, class_ids, class_labels, id2label):
63 |     """Transform gt instance mask to Instance objects.
64 | 
65 |     Args:
66 |         ids (np.array): Instance ids for each point.
67 |         class_ids: (tuple[int]): Ids of valid categories.
68 |         class_labels (tuple[str]): Class names.
69 |         id2label: (dict[int, str]): Mapping of valid class id to class label.
70 | 
71 |     Returns:
72 |         dict [str, list]: Instance objects grouped by class label.
73 |     """
74 |     instances = {}
75 |     for label in class_labels:
76 |         instances[label] = []
77 |     instance_ids = np.unique(ids)
78 |     for id in instance_ids:
79 |         if id == 0:
80 |             continue
81 |         inst = Instance(ids, id)
82 |         if inst.label_id in class_ids:
83 |             instances[id2label[inst.label_id]].append(inst.to_dict())
84 |     return instances
85 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/seg_eval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import numpy as np
  3 | from mmcv.utils import print_log
  4 | from terminaltables import AsciiTable
  5 | 
  6 | 
  7 | def fast_hist(preds, labels, num_classes):
  8 |     """Compute the confusion matrix for every batch.
  9 | 
 10 |     Args:
 11 |         preds (np.ndarray):  Prediction labels of points with shape of
 12 |         (num_points, ).
 13 |         labels (np.ndarray): Ground truth labels of points with shape of
 14 |         (num_points, ).
 15 |         num_classes (int): number of classes
 16 | 
 17 |     Returns:
 18 |         np.ndarray: Calculated confusion matrix.
 19 |     """
 20 | 
 21 |     k = (labels >= 0) & (labels < num_classes)
 22 |     bin_count = np.bincount(
 23 |         num_classes * labels[k].astype(int) + preds[k],
 24 |         minlength=num_classes**2)
 25 |     return bin_count[:num_classes**2].reshape(num_classes, num_classes)
 26 | 
 27 | 
 28 | def per_class_iou(hist):
 29 |     """Compute the per class iou.
 30 | 
 31 |     Args:
 32 |         hist(np.ndarray):  Overall confusion martix
 33 |         (num_classes, num_classes ).
 34 | 
 35 |     Returns:
 36 |         np.ndarray: Calculated per class iou
 37 |     """
 38 | 
 39 |     return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
 40 | 
 41 | 
 42 | def get_acc(hist):
 43 |     """Compute the overall accuracy.
 44 | 
 45 |     Args:
 46 |         hist(np.ndarray):  Overall confusion martix
 47 |         (num_classes, num_classes ).
 48 | 
 49 |     Returns:
 50 |         float: Calculated overall acc
 51 |     """
 52 | 
 53 |     return np.diag(hist).sum() / hist.sum()
 54 | 
 55 | 
 56 | def get_acc_cls(hist):
 57 |     """Compute the class average accuracy.
 58 | 
 59 |     Args:
 60 |         hist(np.ndarray):  Overall confusion martix
 61 |         (num_classes, num_classes ).
 62 | 
 63 |     Returns:
 64 |         float: Calculated class average acc
 65 |     """
 66 | 
 67 |     return np.nanmean(np.diag(hist) / hist.sum(axis=1))
 68 | 
 69 | 
 70 | def seg_eval(gt_labels, seg_preds, label2cat, ignore_index, logger=None):
 71 |     """Semantic Segmentation  Evaluation.
 72 | 
 73 |     Evaluate the result of the Semantic Segmentation.
 74 | 
 75 |     Args:
 76 |         gt_labels (list[torch.Tensor]): Ground truth labels.
 77 |         seg_preds  (list[torch.Tensor]): Predictions.
 78 |         label2cat (dict): Map from label to category name.
 79 |         ignore_index (int): Index that will be ignored in evaluation.
 80 |         logger (logging.Logger | str, optional): The way to print the mAP
 81 |             summary. See `mmdet.utils.print_log()` for details. Default: None.
 82 | 
 83 |     Returns:
 84 |         dict[str, float]: Dict of results.
 85 |     """
 86 |     assert len(seg_preds) == len(gt_labels)
 87 |     num_classes = len(label2cat)
 88 | 
 89 |     hist_list = []
 90 |     for i in range(len(gt_labels)):
 91 |         gt_seg = gt_labels[i].clone().numpy().astype(np.int)
 92 |         pred_seg = seg_preds[i].clone().numpy().astype(np.int)
 93 | 
 94 |         # filter out ignored points
 95 |         pred_seg[gt_seg == ignore_index] = -1
 96 |         gt_seg[gt_seg == ignore_index] = -1
 97 | 
 98 |         # calculate one instance result
 99 |         hist_list.append(fast_hist(pred_seg, gt_seg, num_classes))
100 | 
101 |     iou = per_class_iou(sum(hist_list))
102 |     miou = np.nanmean(iou)
103 |     acc = get_acc(sum(hist_list))
104 |     acc_cls = get_acc_cls(sum(hist_list))
105 | 
106 |     header = ['classes']
107 |     for i in range(len(label2cat)):
108 |         header.append(label2cat[i])
109 |     header.extend(['miou', 'acc', 'acc_cls'])
110 | 
111 |     ret_dict = dict()
112 |     table_columns = [['results']]
113 |     for i in range(len(label2cat)):
114 |         ret_dict[label2cat[i]] = float(iou[i])
115 |         table_columns.append([f'{iou[i]:.4f}'])
116 |     ret_dict['miou'] = float(miou)
117 |     ret_dict['acc'] = float(acc)
118 |     ret_dict['acc_cls'] = float(acc_cls)
119 | 
120 |     table_columns.append([f'{miou:.4f}'])
121 |     table_columns.append([f'{acc:.4f}'])
122 |     table_columns.append([f'{acc_cls:.4f}'])
123 | 
124 |     table_data = [header]
125 |     table_rows = list(zip(*table_columns))
126 |     table_data += table_rows
127 |     table = AsciiTable(table_data)
128 |     table.inner_footing_row_border = True
129 |     print_log('\n' + table.table, logger=logger)
130 | 
131 |     return ret_dict
132 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/waymo_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .prediction_kitti_to_waymo import KITTI2Waymo
3 | 
4 | __all__ = ['KITTI2Waymo']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/hook/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .ema import MEGVIIEMAHook
3 | from .utils import is_parallel
4 | from .sequentialcontrol import SequentialControlHook
5 | from .syncbncontrol import SyncbnControlHook
6 | 
7 | __all__ = ['MEGVIIEMAHook', 'is_parallel', 'SequentialControlHook',
8 |            'SyncbnControlHook']
9 | 


--------------------------------------------------------------------------------
/mmdet3d/core/hook/ema.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | # modified from megvii-bevdepth.
  3 | import math
  4 | import os
  5 | from copy import deepcopy
  6 | 
  7 | import torch
  8 | from mmcv.runner import load_state_dict
  9 | from mmcv.runner.dist_utils import master_only
 10 | from mmcv.runner.hooks import HOOKS, Hook
 11 | 
 12 | from mmdet3d.core.hook.utils import is_parallel
 13 | 
 14 | __all__ = ['ModelEMA']
 15 | 
 16 | 
 17 | class ModelEMA:
 18 |     """Model Exponential Moving Average from https://github.com/rwightman/
 19 |     pytorch-image-models Keep a moving average of everything in the model
 20 |     state_dict (parameters and buffers).
 21 | 
 22 |     This is intended to allow functionality like
 23 |     https://www.tensorflow.org/api_docs/python/tf/train/
 24 |     ExponentialMovingAverage
 25 |     A smoothed version of the weights is necessary for some training
 26 |     schemes to perform well.
 27 |     This class is sensitive where it is initialized in the sequence
 28 |     of model init, GPU assignment and distributed training wrappers.
 29 |     """
 30 | 
 31 |     def __init__(self, model, decay=0.9999, updates=0):
 32 |         """
 33 |         Args:
 34 |             model (nn.Module): model to apply EMA.
 35 |             decay (float): ema decay reate.
 36 |             updates (int): counter of EMA updates.
 37 |         """
 38 |         # Create EMA(FP32)
 39 |         self.ema_model = deepcopy(model).eval()
 40 |         self.ema = self.ema_model.module.module if is_parallel(
 41 |             self.ema_model.module) else self.ema_model.module
 42 |         self.updates = updates
 43 |         # decay exponential ramp (to help early epochs)
 44 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
 45 |         for p in self.ema.parameters():
 46 |             p.requires_grad_(False)
 47 | 
 48 |     def update(self, trainer, model):
 49 |         # Update EMA parameters
 50 |         with torch.no_grad():
 51 |             self.updates += 1
 52 |             d = self.decay(self.updates)
 53 | 
 54 |             msd = model.module.state_dict() if is_parallel(
 55 |                 model) else model.state_dict()  # model state_dict
 56 |             for k, v in self.ema.state_dict().items():
 57 |                 if v.dtype.is_floating_point:
 58 |                     v *= d
 59 |                     v += (1.0 - d) * msd[k].detach()
 60 | 
 61 | 
 62 | @HOOKS.register_module()
 63 | class MEGVIIEMAHook(Hook):
 64 |     """EMAHook used in BEVDepth.
 65 | 
 66 |     Modified from https://github.com/Megvii-Base
 67 |     Detection/BEVDepth/blob/main/callbacks/ema.py.
 68 |     """
 69 | 
 70 |     def __init__(self, init_updates=0, decay=0.9990, resume=None):
 71 |         super().__init__()
 72 |         self.init_updates = init_updates
 73 |         self.resume = resume
 74 |         self.decay = decay
 75 | 
 76 |     def before_run(self, runner):
 77 |         from torch.nn.modules.batchnorm import SyncBatchNorm
 78 | 
 79 |         bn_model_list = list()
 80 |         bn_model_dist_group_list = list()
 81 |         for model_ref in runner.model.modules():
 82 |             if isinstance(model_ref, SyncBatchNorm):
 83 |                 bn_model_list.append(model_ref)
 84 |                 bn_model_dist_group_list.append(model_ref.process_group)
 85 |                 model_ref.process_group = None
 86 |         runner.ema_model = ModelEMA(runner.model, self.decay)
 87 | 
 88 |         for bn_model, dist_group in zip(bn_model_list,
 89 |                                         bn_model_dist_group_list):
 90 |             bn_model.process_group = dist_group
 91 |         runner.ema_model.updates = self.init_updates
 92 | 
 93 |         if self.resume is not None:
 94 |             runner.logger.info(f'resume ema checkpoint from {self.resume}')
 95 |             cpt = torch.load(self.resume, map_location='cpu')
 96 |             load_state_dict(runner.ema_model.ema, cpt['state_dict'])
 97 |             runner.ema_model.updates = cpt['updates']
 98 | 
 99 |     def after_train_iter(self, runner):
100 |         runner.ema_model.update(runner, runner.model.module)
101 | 
102 |     def after_train_epoch(self, runner):
103 |         self.save_checkpoint(runner)
104 | 
105 |     @master_only
106 |     def save_checkpoint(self, runner):
107 |         state_dict = runner.ema_model.ema.state_dict()
108 |         ema_checkpoint = {
109 |             'epoch': runner.epoch,
110 |             'state_dict': state_dict,
111 |             'updates': runner.ema_model.updates
112 |         }
113 |         save_path = f'epoch_{runner.epoch+1}_ema.pth'
114 |         save_path = os.path.join(runner.work_dir, save_path)
115 |         torch.save(ema_checkpoint, save_path)
116 |         runner.logger.info(f'Saving ema checkpoint at {save_path}')
117 | 


--------------------------------------------------------------------------------
/mmdet3d/core/hook/sequentialcontrol.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.runner.hooks import HOOKS, Hook
 3 | from mmdet3d.core.hook.utils import is_parallel
 4 | 
 5 | __all__ = ['SequentialControlHook']
 6 | 
 7 | 
 8 | @HOOKS.register_module()
 9 | class SequentialControlHook(Hook):
10 |     """ """
11 | 
12 |     def __init__(self, temporal_start_epoch=1):
13 |         super().__init__()
14 |         self.temporal_start_epoch=temporal_start_epoch
15 | 
16 |     def set_temporal_flag(self, runner, flag):
17 |         if is_parallel(runner.model.module):
18 |             runner.model.module.module.with_prev=flag
19 |         else:
20 |             runner.model.module.with_prev = flag
21 | 
22 |     def before_run(self, runner):
23 |         self.set_temporal_flag(runner, False)
24 | 
25 |     def before_train_epoch(self, runner):
26 |         if runner.epoch > self.temporal_start_epoch:
27 |             self.set_temporal_flag(runner, True)


--------------------------------------------------------------------------------
/mmdet3d/core/hook/syncbncontrol.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.runner.hooks import HOOKS, Hook
 3 | from mmdet3d.core.hook.utils import is_parallel
 4 | from torch.nn import SyncBatchNorm
 5 | 
 6 | __all__ = ['SyncbnControlHook']
 7 | 
 8 | 
 9 | @HOOKS.register_module()
10 | class SyncbnControlHook(Hook):
11 |     """ """
12 | 
13 |     def __init__(self, syncbn_start_epoch=1):
14 |         super().__init__()
15 |         self.is_syncbn=False
16 |         self.syncbn_start_epoch = syncbn_start_epoch
17 | 
18 |     def cvt_syncbn(self, runner):
19 |         if is_parallel(runner.model.module):
20 |             runner.model.module.module=\
21 |                 SyncBatchNorm.convert_sync_batchnorm(runner.model.module.module,
22 |                                                      process_group=None)
23 |         else:
24 |             runner.model.module=\
25 |                 SyncBatchNorm.convert_sync_batchnorm(runner.model.module,
26 |                                                      process_group=None)
27 | 
28 |     def before_train_epoch(self, runner):
29 |         if runner.epoch>= self.syncbn_start_epoch and not self.is_syncbn:
30 |             print('start use syncbn')
31 |             self.cvt_syncbn(runner)
32 |             self.is_syncbn=True
33 | 
34 | 


--------------------------------------------------------------------------------
/mmdet3d/core/hook/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from torch import nn
 3 | 
 4 | __all__ = ['is_parallel']
 5 | 
 6 | 
 7 | def is_parallel(model):
 8 |     """check if model is in parallel mode."""
 9 |     parallel_type = (
10 |         nn.parallel.DataParallel,
11 |         nn.parallel.DistributedDataParallel,
12 |     )
13 |     return isinstance(model, parallel_type)
14 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | from .cam_points import CameraPoints
 4 | from .depth_points import DepthPoints
 5 | from .lidar_points import LiDARPoints
 6 | 
 7 | __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']
 8 | 
 9 | 
10 | def get_points_type(points_type):
11 |     """Get the class of points according to coordinate type.
12 | 
13 |     Args:
14 |         points_type (str): The type of points coordinate.
15 |             The valid value are "CAMERA", "LIDAR", or "DEPTH".
16 | 
17 |     Returns:
18 |         class: Points type.
19 |     """
20 |     if points_type == 'CAMERA':
21 |         points_cls = CameraPoints
22 |     elif points_type == 'LIDAR':
23 |         points_cls = LiDARPoints
24 |     elif points_type == 'DEPTH':
25 |         points_cls = DepthPoints
26 |     else:
27 |         raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"'
28 |                          f' are supported, got {points_type}')
29 | 
30 |     return points_cls
31 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/cam_points.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | 
 4 | 
 5 | class CameraPoints(BasePoints):
 6 |     """Points of instances in CAM coordinates.
 7 | 
 8 |     Args:
 9 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
10 |         points_dim (int, optional): Number of the dimension of a point.
11 |             Each row is (x, y, z). Defaults to 3.
12 |         attribute_dims (dict, optional): Dictionary to indicate the
13 |             meaning of extra dimension. Defaults to None.
14 | 
15 |     Attributes:
16 |         tensor (torch.Tensor): Float matrix of N x points_dim.
17 |         points_dim (int): Integer indicating the dimension of a point.
18 |             Each row is (x, y, z, ...).
19 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
20 |             dimension. Defaults to None.
21 |         rotation_axis (int): Default rotation axis for points rotation.
22 |     """
23 | 
24 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
25 |         super(CameraPoints, self).__init__(
26 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims)
27 |         self.rotation_axis = 1
28 | 
29 |     def flip(self, bev_direction='horizontal'):
30 |         """Flip the points along given BEV direction.
31 | 
32 |         Args:
33 |             bev_direction (str): Flip direction (horizontal or vertical).
34 |         """
35 |         if bev_direction == 'horizontal':
36 |             self.tensor[:, 0] = -self.tensor[:, 0]
37 |         elif bev_direction == 'vertical':
38 |             self.tensor[:, 2] = -self.tensor[:, 2]
39 | 
40 |     @property
41 |     def bev(self):
42 |         """torch.Tensor: BEV of the points in shape (N, 2)."""
43 |         return self.tensor[:, [0, 2]]
44 | 
45 |     def convert_to(self, dst, rt_mat=None):
46 |         """Convert self to ``dst`` mode.
47 | 
48 |         Args:
49 |             dst (:obj:`CoordMode`): The target Point mode.
50 |             rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
51 |                 translation matrix between different coordinates.
52 |                 Defaults to None.
53 |                 The conversion from `src` coordinates to `dst` coordinates
54 |                 usually comes along the change of sensors, e.g., from camera
55 |                 to LiDAR. This requires a transformation matrix.
56 | 
57 |         Returns:
58 |             :obj:`BasePoints`: The converted point of the same type
59 |                 in the `dst` mode.
60 |         """
61 |         from mmdet3d.core.bbox import Coord3DMode
62 |         return Coord3DMode.convert_point(
63 |             point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
64 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/depth_points.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | 
 4 | 
 5 | class DepthPoints(BasePoints):
 6 |     """Points of instances in DEPTH coordinates.
 7 | 
 8 |     Args:
 9 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
10 |         points_dim (int, optional): Number of the dimension of a point.
11 |             Each row is (x, y, z). Defaults to 3.
12 |         attribute_dims (dict, optional): Dictionary to indicate the
13 |             meaning of extra dimension. Defaults to None.
14 | 
15 |     Attributes:
16 |         tensor (torch.Tensor): Float matrix of N x points_dim.
17 |         points_dim (int): Integer indicating the dimension of a point.
18 |             Each row is (x, y, z, ...).
19 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
20 |             dimension. Defaults to None.
21 |         rotation_axis (int): Default rotation axis for points rotation.
22 |     """
23 | 
24 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
25 |         super(DepthPoints, self).__init__(
26 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims)
27 |         self.rotation_axis = 2
28 | 
29 |     def flip(self, bev_direction='horizontal'):
30 |         """Flip the points along given BEV direction.
31 | 
32 |         Args:
33 |             bev_direction (str): Flip direction (horizontal or vertical).
34 |         """
35 |         if bev_direction == 'horizontal':
36 |             self.tensor[:, 0] = -self.tensor[:, 0]
37 |         elif bev_direction == 'vertical':
38 |             self.tensor[:, 1] = -self.tensor[:, 1]
39 | 
40 |     def convert_to(self, dst, rt_mat=None):
41 |         """Convert self to ``dst`` mode.
42 | 
43 |         Args:
44 |             dst (:obj:`CoordMode`): The target Point mode.
45 |             rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
46 |                 translation matrix between different coordinates.
47 |                 Defaults to None.
48 |                 The conversion from `src` coordinates to `dst` coordinates
49 |                 usually comes along the change of sensors, e.g., from camera
50 |                 to LiDAR. This requires a transformation matrix.
51 | 
52 |         Returns:
53 |             :obj:`BasePoints`: The converted point of the same type
54 |                 in the `dst` mode.
55 |         """
56 |         from mmdet3d.core.bbox import Coord3DMode
57 |         return Coord3DMode.convert_point(
58 |             point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
59 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/lidar_points.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | 
 4 | 
 5 | class LiDARPoints(BasePoints):
 6 |     """Points of instances in LIDAR coordinates.
 7 | 
 8 |     Args:
 9 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
10 |         points_dim (int, optional): Number of the dimension of a point.
11 |             Each row is (x, y, z). Defaults to 3.
12 |         attribute_dims (dict, optional): Dictionary to indicate the
13 |             meaning of extra dimension. Defaults to None.
14 | 
15 |     Attributes:
16 |         tensor (torch.Tensor): Float matrix of N x points_dim.
17 |         points_dim (int): Integer indicating the dimension of a point.
18 |             Each row is (x, y, z, ...).
19 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
20 |             dimension. Defaults to None.
21 |         rotation_axis (int): Default rotation axis for points rotation.
22 |     """
23 | 
24 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
25 |         super(LiDARPoints, self).__init__(
26 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims)
27 |         self.rotation_axis = 2
28 | 
29 |     def flip(self, bev_direction='horizontal'):
30 |         """Flip the points along given BEV direction.
31 | 
32 |         Args:
33 |             bev_direction (str): Flip direction (horizontal or vertical).
34 |         """
35 |         if bev_direction == 'horizontal':
36 |             self.tensor[:, 1] = -self.tensor[:, 1]
37 |         elif bev_direction == 'vertical':
38 |             self.tensor[:, 0] = -self.tensor[:, 0]
39 | 
40 |     def convert_to(self, dst, rt_mat=None):
41 |         """Convert self to ``dst`` mode.
42 | 
43 |         Args:
44 |             dst (:obj:`CoordMode`): The target Point mode.
45 |             rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
46 |                 translation matrix between different coordinates.
47 |                 Defaults to None.
48 |                 The conversion from `src` coordinates to `dst` coordinates
49 |                 usually comes along the change of sensors, e.g., from camera
50 |                 to LiDAR. This requires a transformation matrix.
51 | 
52 |         Returns:
53 |             :obj:`BasePoints`: The converted point of the same type
54 |                 in the `dst` mode.
55 |         """
56 |         from mmdet3d.core.bbox import Coord3DMode
57 |         return Coord3DMode.convert_point(
58 |             point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
59 | 


--------------------------------------------------------------------------------
/mmdet3d/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
 3 |                                         merge_aug_proposals, merge_aug_scores,
 4 |                                         multiclass_nms)
 5 | from .box3d_nms import (aligned_3d_nms, box3d_multiclass_nms, circle_nms,
 6 |                         nms_bev, nms_normal_bev)
 7 | from .merge_augs import merge_aug_bboxes_3d
 8 | 
 9 | __all__ = [
10 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
11 |     'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',
12 |     'aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms', 'nms_bev',
13 |     'nms_normal_bev'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet3d/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
 5 | from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr
 6 | 
 7 | 
 8 | def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):
 9 |     """Merge augmented detection 3D bboxes and scores.
10 | 
11 |     Args:
12 |         aug_results (list[dict]): The dict of detection results.
13 |             The dict contains the following keys
14 | 
15 |             - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
16 |             - scores_3d (torch.Tensor): Detection scores.
17 |             - labels_3d (torch.Tensor): Predicted box labels.
18 |         img_metas (list[dict]): Meta information of each sample.
19 |         test_cfg (dict): Test config.
20 | 
21 |     Returns:
22 |         dict: Bounding boxes results in cpu mode, containing merged results.
23 | 
24 |             - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.
25 |             - scores_3d (torch.Tensor): Merged detection scores.
26 |             - labels_3d (torch.Tensor): Merged predicted box labels.
27 |     """
28 | 
29 |     assert len(aug_results) == len(img_metas), \
30 |         '"aug_results" should have the same length as "img_metas", got len(' \
31 |         f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'
32 | 
33 |     recovered_bboxes = []
34 |     recovered_scores = []
35 |     recovered_labels = []
36 | 
37 |     for bboxes, img_info in zip(aug_results, img_metas):
38 |         scale_factor = img_info[0]['pcd_scale_factor']
39 |         pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip']
40 |         pcd_vertical_flip = img_info[0]['pcd_vertical_flip']
41 |         recovered_scores.append(bboxes['scores_3d'])
42 |         recovered_labels.append(bboxes['labels_3d'])
43 |         bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor,
44 |                                      pcd_horizontal_flip, pcd_vertical_flip)
45 |         recovered_bboxes.append(bboxes)
46 | 
47 |     aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes)
48 |     aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev)
49 |     aug_scores = torch.cat(recovered_scores, dim=0)
50 |     aug_labels = torch.cat(recovered_labels, dim=0)
51 | 
52 |     # TODO: use a more elegent way to deal with nms
53 |     if test_cfg.use_rotate_nms:
54 |         nms_func = nms_bev
55 |     else:
56 |         nms_func = nms_normal_bev
57 | 
58 |     merged_bboxes = []
59 |     merged_scores = []
60 |     merged_labels = []
61 | 
62 |     # Apply multi-class nms when merge bboxes
63 |     if len(aug_labels) == 0:
64 |         return bbox3d2result(aug_bboxes, aug_scores, aug_labels)
65 | 
66 |     for class_id in range(torch.max(aug_labels).item() + 1):
67 |         class_inds = (aug_labels == class_id)
68 |         bboxes_i = aug_bboxes[class_inds]
69 |         bboxes_nms_i = aug_bboxes_for_nms[class_inds, :]
70 |         scores_i = aug_scores[class_inds]
71 |         labels_i = aug_labels[class_inds]
72 |         if len(bboxes_nms_i) == 0:
73 |             continue
74 |         selected = nms_func(bboxes_nms_i, scores_i, test_cfg.nms_thr)
75 | 
76 |         merged_bboxes.append(bboxes_i[selected, :])
77 |         merged_scores.append(scores_i[selected])
78 |         merged_labels.append(labels_i[selected])
79 | 
80 |     merged_bboxes = merged_bboxes[0].cat(merged_bboxes)
81 |     merged_scores = torch.cat(merged_scores, dim=0)
82 |     merged_labels = torch.cat(merged_labels, dim=0)
83 | 
84 |     _, order = merged_scores.sort(0, descending=True)
85 |     num = min(test_cfg.max_num, len(aug_bboxes))
86 |     order = order[:num]
87 | 
88 |     merged_bboxes = merged_bboxes[order]
89 |     merged_scores = merged_scores[order]
90 |     merged_labels = merged_labels[order]
91 | 
92 |     return bbox3d2result(merged_bboxes, merged_scores, merged_labels)
93 | 


--------------------------------------------------------------------------------
/mmdet3d/core/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .array_converter import ArrayConverter, array_converter
 3 | from .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d,
 4 |                        gaussian_radius, get_ellip_gaussian_2D)
 5 | 
 6 | __all__ = [
 7 |     'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian',
 8 |     'ArrayConverter', 'array_converter', 'ellip_gaussian2D',
 9 |     'get_ellip_gaussian_2D'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet3d/core/visualizer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .show_result import (show_multi_modality_result, show_result,
3 |                           show_seg_result)
4 | 
5 | __all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
6 | 


--------------------------------------------------------------------------------
/mmdet3d/core/voxel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .builder import build_voxel_generator
3 | from .voxel_generator import VoxelGenerator
4 | 
5 | __all__ = ['build_voxel_generator', 'VoxelGenerator']
6 | 


--------------------------------------------------------------------------------
/mmdet3d/core/voxel/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import mmcv
 3 | 
 4 | from . import voxel_generator
 5 | 
 6 | 
 7 | def build_voxel_generator(cfg, **kwargs):
 8 |     """Builder of voxel generator."""
 9 |     if isinstance(cfg, voxel_generator.VoxelGenerator):
10 |         return cfg
11 |     elif isinstance(cfg, dict):
12 |         return mmcv.runner.obj_from_dict(
13 |             cfg, voxel_generator, default_args=kwargs)
14 |     else:
15 |         raise TypeError('Invalid type {} for building a sampler'.format(
16 |             type(cfg)))
17 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.datasets.builder import build_dataloader
 3 | from .builder import DATASETS, PIPELINES, build_dataset
 4 | from .custom_3d import Custom3DDataset
 5 | from .nuscenes_dataset import NuScenesDataset
 6 | from .nuscenes_dataset_occ import NuScenesDatasetOccpancy
 7 | # yapf: disable
 8 | from .pipelines import (LoadAnnotations3D, LoadPointsFromFile, PointsLidar2Ego, PointsRangeFilter)
 9 | # yapf: enable
10 | from .utils import get_loading_pipeline
11 | 
12 | __all__ = [
13 |     'build_dataloader', 'DATASETS', 'build_dataset', 'NuScenesDataset',
14 |     'PointsRangeFilter', 'LoadPointsFromFile', 'LoadAnnotations3D',  'Custom3DDataset',
15 |     'PointsLidar2Ego', 'get_loading_pipeline',  'PIPELINES', 'NuScenesDatasetOccpancy'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import platform
 3 | 
 4 | from mmcv.utils import Registry, build_from_cfg
 5 | 
 6 | from mmdet.datasets import DATASETS as MMDET_DATASETS
 7 | from mmdet.datasets.builder import _concat_dataset
 8 | 
 9 | if platform.system() != 'Windows':
10 |     # https://github.com/pytorch/pytorch/issues/973
11 |     import resource
12 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
13 |     base_soft_limit = rlimit[0]
14 |     hard_limit = rlimit[1]
15 |     soft_limit = min(max(4096, base_soft_limit), hard_limit)
16 |     resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
17 | 
18 | OBJECTSAMPLERS = Registry('Object sampler')
19 | DATASETS = Registry('dataset')
20 | PIPELINES = Registry('pipeline')
21 | 
22 | 
23 | def build_dataset(cfg, default_args=None):
24 |     from mmdet3d.datasets.dataset_wrappers import CBGSDataset
25 |     from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
26 |                                                  ConcatDataset, RepeatDataset)
27 |     if isinstance(cfg, (list, tuple)):
28 |         dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
29 |     elif cfg['type'] == 'ConcatDataset':
30 |         dataset = ConcatDataset(
31 |             [build_dataset(c, default_args) for c in cfg['datasets']],
32 |             cfg.get('separate_eval', True))
33 |     elif cfg['type'] == 'RepeatDataset':
34 |         dataset = RepeatDataset(
35 |             build_dataset(cfg['dataset'], default_args), cfg['times'])
36 |     elif cfg['type'] == 'ClassBalancedDataset':
37 |         dataset = ClassBalancedDataset(
38 |             build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
39 |     elif cfg['type'] == 'CBGSDataset':
40 |         dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args))
41 |     elif isinstance(cfg.get('ann_file'), (list, tuple)):
42 |         dataset = _concat_dataset(cfg, default_args)
43 |     elif cfg['type'] in DATASETS._module_dict.keys():
44 |         dataset = build_from_cfg(cfg, DATASETS, default_args)
45 |     else:
46 |         dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args)
47 |     return dataset
48 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/dataset_wrappers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import numpy as np
 3 | 
 4 | from .builder import DATASETS
 5 | 
 6 | 
 7 | @DATASETS.register_module()
 8 | class CBGSDataset(object):
 9 |     """A wrapper of class sampled dataset with ann_file path. Implementation of
10 |     paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object
11 |     Detection <https://arxiv.org/abs/1908.09492.>`_.
12 | 
13 |     Balance the number of scenes under different classes.
14 | 
15 |     Args:
16 |         dataset (:obj:`CustomDataset`): The dataset to be class sampled.
17 |     """
18 | 
19 |     def __init__(self, dataset):
20 |         self.dataset = dataset
21 |         self.CLASSES = dataset.CLASSES
22 |         self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
23 |         self.sample_indices = self._get_sample_indices()
24 |         # self.dataset.data_infos = self.data_infos
25 |         if hasattr(self.dataset, 'flag'):
26 |             self.flag = np.array(
27 |                 [self.dataset.flag[ind] for ind in self.sample_indices],
28 |                 dtype=np.uint8)
29 | 
30 |     def _get_sample_indices(self):
31 |         """Load annotations from ann_file.
32 | 
33 |         Args:
34 |             ann_file (str): Path of the annotation file.
35 | 
36 |         Returns:
37 |             list[dict]: List of annotations after class sampling.
38 |         """
39 |         class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()}
40 |         for idx in range(len(self.dataset)):
41 |             sample_cat_ids = self.dataset.get_cat_ids(idx)
42 |             for cat_id in sample_cat_ids:
43 |                 class_sample_idxs[cat_id].append(idx)
44 |         duplicated_samples = sum(
45 |             [len(v) for _, v in class_sample_idxs.items()])
46 |         class_distribution = {
47 |             k: len(v) / duplicated_samples
48 |             for k, v in class_sample_idxs.items()
49 |         }
50 | 
51 |         sample_indices = []
52 | 
53 |         frac = 1.0 / len(self.CLASSES)
54 |         ratios = [frac / v for v in class_distribution.values()]
55 |         for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios):
56 |             sample_indices += np.random.choice(cls_inds,
57 |                                                int(len(cls_inds) *
58 |                                                    ratio)).tolist()
59 |         return sample_indices
60 | 
61 |     def __getitem__(self, idx):
62 |         """Get item from infos according to the given index.
63 | 
64 |         Returns:
65 |             dict: Data dictionary of the corresponding index.
66 |         """
67 |         ori_idx = self.sample_indices[idx]
68 |         return self.dataset[ori_idx]
69 | 
70 |     def __len__(self):
71 |         """Return the length of data infos.
72 | 
73 |         Returns:
74 |             int: Length of data infos.
75 |         """
76 |         return len(self.sample_indices)
77 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .compose import Compose
 3 | from .dbsampler import DataBaseSampler
 4 | from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D
 5 | from .loading import (LoadAnnotations3D, LoadAnnotationsAll,
 6 |                       LoadPointsFromFile,
 7 |                       PointToMultiViewDepth, LoadOccGTFromFile)
 8 | from .test_time_aug import MultiScaleFlipAug3D
 9 | # yapf: disable
10 | from .transforms_3d import (PointsRangeFilter, PointsLidar2Ego)
11 | 
12 | __all__ = [
13 |     'PointsRangeFilter', 'Collect3D',
14 |     'Compose', 'LoadPointsFromFile',
15 |     'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
16 |     'LoadAnnotations3D', 'MultiScaleFlipAug3D',  'PointsLidar2Ego',
17 |     'LoadAnnotationsAll', 'PointToMultiViewDepth',
18 |     'LoadOccGTFromFile'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/pipelines/aug_2d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | from PIL import Image
 5 | from PIL import ImageEnhance
 6 | 
 7 | 
 8 | class Grid(object):
 9 |     def __init__(self, d1, d2, rotate=1, ratio=0.5, mode=0, prob=1.):
10 |         self.d1 = d1
11 |         self.d2 = d2
12 |         self.rotate = rotate
13 |         self.ratio = ratio
14 |         self.mode = mode
15 |         self.st_prob = self.prob = prob
16 | 
17 |     def set_prob(self, epoch, max_epoch):
18 |         self.prob = self.st_prob * min(1, epoch / max_epoch)
19 | 
20 |     def __call__(self, img):
21 |         if np.random.rand() > self.prob:
22 |             return img
23 |         h = img.size[1]
24 |         w = img.size[0]
25 | 
26 |         # 1.5 * h, 1.5 * w works fine with the squared images
27 |         # But with rectangular input, the mask might not be able to recover back to the input image shape
28 |         # A square mask with edge length equal to the diagnoal of the input image
29 |         # will be able to cover all the image spot after the rotation. This is also the minimum square.
30 |         hh = math.ceil((math.sqrt(h * h + w * w)))
31 | 
32 |         d = np.random.randint(self.d1, self.d2)
33 |         # d = self.d
34 | 
35 |         # maybe use ceil? but i guess no big difference
36 |         self.l = math.ceil(d * self.ratio)
37 | 
38 |         mask = np.ones((hh, hh), np.float32)
39 |         st_h = np.random.randint(d)
40 |         st_w = np.random.randint(d)
41 |         for i in range(-1, hh // d + 1):
42 |             s = d * i + st_h
43 |             t = s + self.l
44 |             s = max(min(s, hh), 0)
45 |             t = max(min(t, hh), 0)
46 |             mask[s:t, :] *= 0
47 |         for i in range(-1, hh // d + 1):
48 |             s = d * i + st_w
49 |             t = s + self.l
50 |             s = max(min(s, hh), 0)
51 |             t = max(min(t, hh), 0)
52 |             mask[:, s:t] *= 0
53 |         r = np.random.randint(self.rotate)
54 |         mask = Image.fromarray(np.uint8(mask))
55 |         mask = mask.rotate(r)
56 |         mask = np.asarray(mask)
57 |         mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (hh - w) // 2:(hh - w) // 2 + w]
58 | 
59 |         # mask = torch.from_numpy(mask).float()
60 |         mask = mask.reshape(mask.shape[0], mask.shape[1], 1)
61 |         if self.mode == 1:
62 |             mask = 1 - mask
63 |         img = img * mask
64 | 
65 |         return img
66 | 
67 | 
68 | def get_grid_mask_img(img, d1=2, d2=100, rotate=(1, 120), ratio=(0.05, 0.25)):
69 |     if type(rotate) == tuple or type(rotate) == list:
70 |         rotate = np.random.randint(rotate[0], rotate[1])
71 |     if type(ratio) == tuple or type(ratio) == list:
72 |         ratio = np.random.uniform(ratio[0], ratio[1])
73 |     gd = Grid(d1, d2, rotate=rotate, ratio=ratio)
74 |     return Image.fromarray(gd(img))
75 | 
76 | 
77 | def get_darker_img(img, ratio=(0.5, 1.5)):
78 |     brighter = ImageEnhance.Brightness(img)
79 |     if type(ratio) == tuple or type(ratio) == list:
80 |         ratio = np.random.uniform(ratio[0], ratio[1])
81 |     return brighter.enhance(ratio)
82 | 
83 | 
84 | def get_contrast_img(img, ratio=(0.7, 1.3)):
85 |     contraster = ImageEnhance.Contrast(img)
86 |     if type(ratio) == tuple or type(ratio) == list:
87 |         ratio = np.random.uniform(ratio[0], ratio[1])
88 |     return contraster.enhance(ratio)
89 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import collections
 3 | 
 4 | from mmcv.utils import build_from_cfg
 5 | 
 6 | from mmdet.datasets.builder import PIPELINES as MMDET_PIPELINES
 7 | from ..builder import PIPELINES
 8 | 
 9 | 
10 | @PIPELINES.register_module()
11 | class Compose:
12 |     """Compose multiple transforms sequentially. The pipeline registry of
13 |     mmdet3d separates with mmdet, however, sometimes we may need to use mmdet's
14 |     pipeline. So the class is rewritten to be able to use pipelines from both
15 |     mmdet3d and mmdet.
16 | 
17 |     Args:
18 |         transforms (Sequence[dict | callable]): Sequence of transform object or
19 |             config dict to be composed.
20 |     """
21 | 
22 |     def __init__(self, transforms):
23 |         assert isinstance(transforms, collections.abc.Sequence)
24 |         self.transforms = []
25 |         for transform in transforms:
26 |             if isinstance(transform, dict):
27 |                 _, key = PIPELINES.split_scope_key(transform['type'])
28 |                 if key in PIPELINES._module_dict.keys():
29 |                     transform = build_from_cfg(transform, PIPELINES)
30 |                 else:
31 |                     transform = build_from_cfg(transform, MMDET_PIPELINES)
32 |                 self.transforms.append(transform)
33 |             elif callable(transform):
34 |                 self.transforms.append(transform)
35 |             else:
36 |                 raise TypeError('transform must be callable or a dict')
37 | 
38 |     def __call__(self, data):
39 |         """Call function to apply transforms sequentially.
40 | 
41 |         Args:
42 |             data (dict): A result dict contains the data to transform.
43 | 
44 |         Returns:
45 |            dict: Transformed data.
46 |         """
47 | 
48 |         for t in self.transforms:
49 |             data = t(data)
50 |             if data is None:
51 |                 return None
52 |         return data
53 | 
54 |     def __repr__(self):
55 |         format_string = self.__class__.__name__ + '('
56 |         for t in self.transforms:
57 |             format_string += '\n'
58 |             format_string += f'    {t}'
59 |         format_string += '\n)'
60 |         return format_string
61 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/pipelines/transforms_3d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from pyquaternion import Quaternion
 4 | 
 5 | from ..builder import PIPELINES
 6 | 
 7 | 
 8 | @PIPELINES.register_module()
 9 | class PointsLidar2Ego(object):
10 |     def __call__(self, input_dict):
11 |         points = input_dict['points']
12 |         lidar2ego_rots = torch.tensor(Quaternion(input_dict['curr']['lidar2ego_rotation']).rotation_matrix).float()
13 |         lidar2ego_trans = torch.tensor(input_dict['curr']['lidar2ego_translation']).float()
14 |         points.tensor[:, :3] = (
15 |                 points.tensor[:, :3] @ lidar2ego_rots.T
16 |         )
17 |         points.tensor[:, :3] += lidar2ego_trans
18 |         input_dict['points'] = points
19 |         return input_dict
20 | 
21 | 
22 | @PIPELINES.register_module()
23 | class PointsRangeFilter(object):
24 |     """Filter points by the range.
25 | 
26 |     Args:
27 |         point_cloud_range (list[float]): Point cloud range.
28 |     """
29 | 
30 |     def __init__(self, point_cloud_range):
31 |         self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
32 | 
33 |     def __call__(self, input_dict):
34 |         """Call function to filter points by the range.
35 | 
36 |         Args:
37 |             input_dict (dict): Result dict from loading pipeline.
38 | 
39 |         Returns:
40 |             dict: Results after filtering, 'points', 'pts_instance_mask'
41 |                 and 'pts_semantic_mask' keys are updated in the result dict.
42 |         """
43 |         points = input_dict['points']
44 |         eps = 0.001
45 |         self.pcd_range = [
46 |             self.pcd_range[0] + eps, self.pcd_range[1] + eps, self.pcd_range[2] + eps,
47 |             self.pcd_range[3] - eps, self.pcd_range[4] - eps, self.pcd_range[5] - eps
48 |         ]
49 | 
50 |         points_mask = points.in_range_3d(self.pcd_range)
51 |         clean_points = points[points_mask]
52 |         input_dict['points'] = clean_points
53 |         points_mask = points_mask.numpy()
54 | 
55 |         pts_instance_mask = input_dict.get('pts_instance_mask', None)
56 |         pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
57 | 
58 |         if pts_instance_mask is not None:
59 |             input_dict['pts_instance_mask'] = pts_instance_mask[points_mask]
60 | 
61 |         if pts_semantic_mask is not None:
62 |             input_dict['pts_semantic_mask'] = pts_semantic_mask[points_mask]
63 | 
64 |         return input_dict
65 | 
66 |     def __repr__(self):
67 |         """str: Return a string that describes the module."""
68 |         repr_str = self.__class__.__name__
69 |         repr_str += f'(point_cloud_range={self.pcd_range.tolist()})'
70 |         return repr_str
71 | 


--------------------------------------------------------------------------------
/mmdet3d/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES,
 4 |                       MIDDLE_ENCODERS, NECKS, ROI_EXTRACTORS, SEGMENTORS,
 5 |                       SHARED_HEADS, VOXEL_ENCODERS, build_backbone,
 6 |                       build_detector, build_fusion_layer, build_head,
 7 |                       build_loss, build_middle_encoder, build_model,
 8 |                       build_neck, build_roi_extractor, build_shared_head,
 9 |                       build_voxel_encoder)
10 | from .detectors import *  # noqa: F401,F403
11 | from .necks import *  # noqa: F401,F403
12 | 
13 | __all__ = [
14 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
15 |     'DETECTORS', 'SEGMENTORS', 'VOXEL_ENCODERS', 'MIDDLE_ENCODERS',
16 |     'FUSION_LAYERS', 'build_backbone', 'build_neck', 'build_roi_extractor',
17 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector',
18 |     'build_fusion_layer', 'build_model', 'build_middle_encoder',
19 |     'build_voxel_encoder'
20 | ]
21 | 


--------------------------------------------------------------------------------
/mmdet3d/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.models.backbones import ResNet
 3 | from .resnet import CustomResNet, CustomResNet3D
 4 | from .swin import SwinTransformer
 5 | 
 6 | 
 7 | __all__ = [
 8 |     'ResNet', 'CustomResNet', 'CustomResNet3D', 'SwinTransformer',
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet3d/models/builder.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import warnings
  3 | 
  4 | from mmcv.cnn import MODELS as MMCV_MODELS
  5 | from mmcv.utils import Registry
  6 | 
  7 | from mmdet.models.builder import BACKBONES as MMDET_BACKBONES
  8 | from mmdet.models.builder import DETECTORS as MMDET_DETECTORS
  9 | from mmdet.models.builder import HEADS as MMDET_HEADS
 10 | from mmdet.models.builder import LOSSES as MMDET_LOSSES
 11 | from mmdet.models.builder import NECKS as MMDET_NECKS
 12 | from mmdet.models.builder import ROI_EXTRACTORS as MMDET_ROI_EXTRACTORS
 13 | from mmdet.models.builder import SHARED_HEADS as MMDET_SHARED_HEADS
 14 | from mmseg.models.builder import LOSSES as MMSEG_LOSSES
 15 | 
 16 | MODELS = Registry('models', parent=MMCV_MODELS)
 17 | 
 18 | BACKBONES = MODELS
 19 | NECKS = MODELS
 20 | ROI_EXTRACTORS = MODELS
 21 | SHARED_HEADS = MODELS
 22 | HEADS = MODELS
 23 | LOSSES = MODELS
 24 | DETECTORS = MODELS
 25 | VOXEL_ENCODERS = MODELS
 26 | MIDDLE_ENCODERS = MODELS
 27 | FUSION_LAYERS = MODELS
 28 | SEGMENTORS = MODELS
 29 | 
 30 | 
 31 | def build_backbone(cfg):
 32 |     """Build backbone."""
 33 |     if cfg['type'] in BACKBONES._module_dict.keys():
 34 |         return BACKBONES.build(cfg)
 35 |     else:
 36 |         return MMDET_BACKBONES.build(cfg)
 37 | 
 38 | 
 39 | def build_neck(cfg):
 40 |     """Build neck."""
 41 |     if cfg['type'] in NECKS._module_dict.keys():
 42 |         return NECKS.build(cfg)
 43 |     else:
 44 |         return MMDET_NECKS.build(cfg)
 45 | 
 46 | 
 47 | def build_roi_extractor(cfg):
 48 |     """Build RoI feature extractor."""
 49 |     if cfg['type'] in ROI_EXTRACTORS._module_dict.keys():
 50 |         return ROI_EXTRACTORS.build(cfg)
 51 |     else:
 52 |         return MMDET_ROI_EXTRACTORS.build(cfg)
 53 | 
 54 | 
 55 | def build_shared_head(cfg):
 56 |     """Build shared head of detector."""
 57 |     if cfg['type'] in SHARED_HEADS._module_dict.keys():
 58 |         return SHARED_HEADS.build(cfg)
 59 |     else:
 60 |         return MMDET_SHARED_HEADS.build(cfg)
 61 | 
 62 | 
 63 | def build_head(cfg):
 64 |     """Build head."""
 65 |     if cfg['type'] in HEADS._module_dict.keys():
 66 |         return HEADS.build(cfg)
 67 |     else:
 68 |         return MMDET_HEADS.build(cfg)
 69 | 
 70 | 
 71 | def build_loss(cfg):
 72 |     """Build loss function."""
 73 |     if cfg['type'] in LOSSES._module_dict.keys():
 74 |         return LOSSES.build(cfg)
 75 |     elif cfg['type'] in MMDET_LOSSES._module_dict.keys():
 76 |         return MMDET_LOSSES.build(cfg)
 77 |     else:
 78 |         return MMSEG_LOSSES.build(cfg)
 79 | 
 80 | 
 81 | def build_detector(cfg, train_cfg=None, test_cfg=None):
 82 |     """Build detector."""
 83 |     if train_cfg is not None or test_cfg is not None:
 84 |         warnings.warn(
 85 |             'train_cfg and test_cfg is deprecated, '
 86 |             'please specify them in model', UserWarning)
 87 |     assert cfg.get('train_cfg') is None or train_cfg is None, \
 88 |         'train_cfg specified in both outer field and model field '
 89 |     assert cfg.get('test_cfg') is None or test_cfg is None, \
 90 |         'test_cfg specified in both outer field and model field '
 91 |     if cfg['type'] in DETECTORS._module_dict.keys():
 92 |         return DETECTORS.build(
 93 |             cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
 94 |     else:
 95 |         return MMDET_DETECTORS.build(
 96 |             cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
 97 | 
 98 | 
 99 | def build_segmentor(cfg, train_cfg=None, test_cfg=None):
100 |     """Build segmentor."""
101 |     if train_cfg is not None or test_cfg is not None:
102 |         warnings.warn(
103 |             'train_cfg and test_cfg is deprecated, '
104 |             'please specify them in model', UserWarning)
105 |     assert cfg.get('train_cfg') is None or train_cfg is None, \
106 |         'train_cfg specified in both outer field and model field '
107 |     assert cfg.get('test_cfg') is None or test_cfg is None, \
108 |         'test_cfg specified in both outer field and model field '
109 |     return SEGMENTORS.build(
110 |         cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
111 | 
112 | 
113 | def build_model(cfg, train_cfg=None, test_cfg=None):
114 |     """A function warpper for building 3D detector or segmentor according to
115 |     cfg.
116 | 
117 |     Should be deprecated in the future.
118 |     """
119 |     if cfg.type in ['EncoderDecoder3D']:
120 |         return build_segmentor(cfg, train_cfg=train_cfg, test_cfg=test_cfg)
121 |     else:
122 |         return build_detector(cfg, train_cfg=train_cfg, test_cfg=test_cfg)
123 | 
124 | 
125 | def build_voxel_encoder(cfg):
126 |     """Build voxel encoder."""
127 |     return VOXEL_ENCODERS.build(cfg)
128 | 
129 | 
130 | def build_middle_encoder(cfg):
131 |     """Build middle level encoder."""
132 |     return MIDDLE_ENCODERS.build(cfg)
133 | 
134 | 
135 | def build_fusion_layer(cfg):
136 |     """Build fusion layer."""
137 |     return FUSION_LAYERS.build(cfg)
138 | 


--------------------------------------------------------------------------------
/mmdet3d/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base import Base3DDetector
 3 | from .bevdet import BEVDepth4D, BEVDet, BEVDet4D, BEVDetTRT
 4 | from .fusion_occ import FusionOCC, FusionDepthSeg
 5 | from .centerpoint import CenterPoint
 6 | from .mvx_two_stage import MVXTwoStageDetector
 7 | 
 8 | 
 9 | __all__ = [
10 |     'Base3DDetector', 'MVXTwoStageDetector',
11 |     'CenterPoint', 'BEVDet', 'BEVDet4D', 'BEVDepth4D',
12 |     'BEVDetTRT', 'FusionDepthSeg',  'FusionOCC'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet3d/models/necks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.models.necks.fpn import FPN
 3 | from .lss_fpn import FPN_LSS
 4 | from .view_transformer import LSSViewTransformer, LSSViewTransformerBEVDepth, \
 5 |     LSSViewTransformerBEVStereo
 6 | from .fusion_view_transformer import CrossModalFusion, CrossModalLSS
 7 | 
 8 | __all__ = [
 9 |     'FPN', 'LSSViewTransformer', 'FPN_LSS', 'LSSViewTransformerBEVDepth',
10 |     'LSSViewTransformerBEVStereo',
11 |     'CrossModalFusion', 'CrossModalLSS'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
 3 |                       get_compiling_cuda_version, nms, roi_align,
 4 |                       sigmoid_focal_loss)
 5 | from mmcv.ops.assign_score_withk import assign_score_withk
 6 | from mmcv.ops.ball_query import ball_query
 7 | from mmcv.ops.furthest_point_sample import (furthest_point_sample,
 8 |                                             furthest_point_sample_with_dist)
 9 | from mmcv.ops.gather_points import gather_points
10 | from mmcv.ops.group_points import GroupAll, QueryAndGroup, grouping_operation
11 | from mmcv.ops.knn import knn
12 | from mmcv.ops.points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,
13 |                                       points_in_boxes_part)
14 | from mmcv.ops.points_sampler import PointsSampler as Points_Sampler
15 | from mmcv.ops.roiaware_pool3d import RoIAwarePool3d
16 | from mmcv.ops.roipoint_pool3d import RoIPointPool3d
17 | from mmcv.ops.scatter_points import DynamicScatter, dynamic_scatter
18 | from mmcv.ops.three_interpolate import three_interpolate
19 | from mmcv.ops.three_nn import three_nn
20 | from mmcv.ops.voxelize import Voxelization, voxelization
21 | 
22 | from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule
23 | from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
24 | from .paconv import PAConv, PAConvCUDA
25 | from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
26 |                                PAConvSAModule, PAConvSAModuleMSG,
27 |                                PointFPModule, PointSAModule, PointSAModuleMSG,
28 |                                build_sa_module)
29 | from .sparse_block import (SparseBasicBlock, SparseBottleneck,
30 |                            make_sparse_convmodule)
31 | 
32 | __all__ = [
33 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
34 |     'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
35 |     'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
36 |     'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
37 |     'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',
38 |     'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu',
39 |     'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',
40 |     'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',
41 |     'gather_points', 'grouping_operation', 'GroupAll', 'QueryAndGroup',
42 |     'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', 'DGCNNFPModule',
43 |     'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all',
44 |     'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version',
45 |     'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA',
46 |     'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule',
47 |     'PAConvCUDASAModuleMSG', 'RoIPointPool3d'
48 | ]
49 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/bev_pool_v2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Phigent Robotics. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/bev_pool_v2/src/bev_pool.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Phigent Robotics. All rights reserved.
  2 | // Reference https://arxiv.org/abs/2211.17111
  3 | #include <torch/torch.h>
  4 | #include <c10/cuda/CUDAGuard.h>
  5 | 
  6 | // CUDA function declarations
  7 | void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat,
  8 |     const int* ranks_depth, const int* ranks_feat, const int* ranks_bev,
  9 |     const int* interval_starts, const int* interval_lengths, float* out);
 10 | 
 11 | void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
 12 |   const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
 13 |   const int* ranks_bev, const int* interval_starts, const int* interval_lengths,
 14 |   float* depth_grad, float* feat_grad);
 15 | 
 16 | 
 17 | /*
 18 |   Function: pillar pooling (forward, cuda)
 19 |   Args:
 20 |     depth            : input depth, FloatTensor[n, d, h, w]
 21 |     feat             : input features, FloatTensor[n, h, w, c]
 22 |     out              : output features, FloatTensor[b, c, h_out, w_out]
 23 |     ranks_depth      : depth index of points, IntTensor[n_points]
 24 |     ranks_feat       : feat index of points, IntTensor[n_points]
 25 |     ranks_bev        : output index of points, IntTensor[n_points]
 26 |     interval_lengths : starting position for pooled point, IntTensor[n_intervals]
 27 |     interval_starts  : how many points in each pooled point, IntTensor[n_intervals]
 28 |   Return:
 29 | */
 30 | void bev_pool_v2_forward(
 31 |   const at::Tensor _depth,
 32 |   const at::Tensor _feat,
 33 |   at::Tensor _out,
 34 |   const at::Tensor _ranks_depth,
 35 |   const at::Tensor _ranks_feat,
 36 |   const at::Tensor _ranks_bev,
 37 |   const at::Tensor _interval_lengths,
 38 |   const at::Tensor _interval_starts
 39 | ) {
 40 |   int c = _feat.size(4);
 41 |   int n_intervals = _interval_lengths.size(0);
 42 |   const at::cuda::OptionalCUDAGuard device_guard(device_of(_depth));
 43 |   const float* depth = _depth.data_ptr<float>();
 44 |   const float* feat = _feat.data_ptr<float>();
 45 |   const int* ranks_depth = _ranks_depth.data_ptr<int>();
 46 |   const int* ranks_feat = _ranks_feat.data_ptr<int>();
 47 |   const int* ranks_bev = _ranks_bev.data_ptr<int>();
 48 | 
 49 |   const int* interval_lengths = _interval_lengths.data_ptr<int>();
 50 |   const int* interval_starts = _interval_starts.data_ptr<int>();
 51 | 
 52 |   float* out = _out.data_ptr<float>();
 53 |   bev_pool_v2(
 54 |     c, n_intervals, depth, feat, ranks_depth, ranks_feat,
 55 |     ranks_bev, interval_starts, interval_lengths, out
 56 |   );
 57 | }
 58 | 
 59 | 
 60 | /*
 61 |   Function: pillar pooling (backward, cuda)
 62 |   Args:
 63 |     out_grad         : grad of output bev feature, FloatTensor[b, c, h_out, w_out]
 64 |     depth_grad       : grad of input depth, FloatTensor[n, d, h, w]
 65 |     feat_grad        : grad of input feature, FloatTensor[n, h, w, c]
 66 |     depth            : input depth, FloatTensor[n, d, h, w]
 67 |     feat             : input features, FloatTensor[n, h, w, c]
 68 |     ranks_depth      : depth index of points, IntTensor[n_points]
 69 |     ranks_feat       : feat index of points, IntTensor[n_points]
 70 |     ranks_bev        : output index of points, IntTensor[n_points]
 71 |     interval_lengths : starting position for pooled point, IntTensor[n_intervals]
 72 |     interval_starts  : how many points in each pooled point, IntTensor[n_intervals]
 73 | */
 74 | void bev_pool_v2_backward(
 75 |   const at::Tensor _out_grad,
 76 |   at::Tensor _depth_grad,
 77 |   at::Tensor _feat_grad,
 78 |   const at::Tensor _depth,
 79 |   const at::Tensor _feat,
 80 |   const at::Tensor _ranks_depth,
 81 |   const at::Tensor _ranks_feat,
 82 |   const at::Tensor _ranks_bev,
 83 |   const at::Tensor _interval_lengths,
 84 |   const at::Tensor _interval_starts
 85 | ) {
 86 |   int c = _out_grad.size(4);
 87 |   int n_intervals = _interval_lengths.size(0);
 88 |   const at::cuda::OptionalCUDAGuard device_guard(device_of(_out_grad));
 89 |   const float* out_grad = _out_grad.data_ptr<float>();
 90 |   float* depth_grad = _depth_grad.data_ptr<float>();
 91 |   float* feat_grad = _feat_grad.data_ptr<float>();
 92 |   const float* depth = _depth.data_ptr<float>();
 93 |   const float* feat = _feat.data_ptr<float>();
 94 |   const int* ranks_depth = _ranks_depth.data_ptr<int>();
 95 |   const int* ranks_feat = _ranks_feat.data_ptr<int>();
 96 |   const int* ranks_bev = _ranks_bev.data_ptr<int>();
 97 |   const int* interval_lengths = _interval_lengths.data_ptr<int>();
 98 |   const int* interval_starts = _interval_starts.data_ptr<int>();
 99 | 
100 |   bev_pool_v2_grad(
101 |     c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
102 |     ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
103 |   );
104 | }
105 | 
106 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
107 |   m.def("bev_pool_v2_forward", &bev_pool_v2_forward,
108 |         "bev_pool_v2_forward");
109 |   m.def("bev_pool_v2_backward", &bev_pool_v2_backward,
110 |         "bev_pool_v2_backward");
111 | }
112 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/dgcnn_modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dgcnn_fa_module import DGCNNFAModule
3 | from .dgcnn_fp_module import DGCNNFPModule
4 | from .dgcnn_gf_module import DGCNNGFModule
5 | 
6 | __all__ = ['DGCNNFAModule', 'DGCNNFPModule', 'DGCNNGFModule']
7 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/dgcnn_modules/dgcnn_fa_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from mmcv.cnn import ConvModule
 4 | from mmcv.runner import BaseModule, force_fp32
 5 | from torch import nn as nn
 6 | 
 7 | 
 8 | class DGCNNFAModule(BaseModule):
 9 |     """Point feature aggregation module used in DGCNN.
10 | 
11 |     Aggregate all the features of points.
12 | 
13 |     Args:
14 |         mlp_channels (list[int]): List of mlp channels.
15 |         norm_cfg (dict, optional): Type of normalization method.
16 |             Defaults to dict(type='BN1d').
17 |         act_cfg (dict, optional): Type of activation method.
18 |             Defaults to dict(type='ReLU').
19 |         init_cfg (dict, optional): Initialization config. Defaults to None.
20 |     """
21 | 
22 |     def __init__(self,
23 |                  mlp_channels,
24 |                  norm_cfg=dict(type='BN1d'),
25 |                  act_cfg=dict(type='ReLU'),
26 |                  init_cfg=None):
27 |         super().__init__(init_cfg=init_cfg)
28 |         self.fp16_enabled = False
29 |         self.mlps = nn.Sequential()
30 |         for i in range(len(mlp_channels) - 1):
31 |             self.mlps.add_module(
32 |                 f'layer{i}',
33 |                 ConvModule(
34 |                     mlp_channels[i],
35 |                     mlp_channels[i + 1],
36 |                     kernel_size=(1, ),
37 |                     stride=(1, ),
38 |                     conv_cfg=dict(type='Conv1d'),
39 |                     norm_cfg=norm_cfg,
40 |                     act_cfg=act_cfg))
41 | 
42 |     @force_fp32()
43 |     def forward(self, points):
44 |         """forward.
45 | 
46 |         Args:
47 |             points (List[Tensor]): tensor of the features to be aggregated.
48 | 
49 |         Returns:
50 |             Tensor: (B, N, M) M = mlp[-1], tensor of the output points.
51 |         """
52 | 
53 |         if len(points) > 1:
54 |             new_points = torch.cat(points[1:], dim=-1)
55 |             new_points = new_points.transpose(1, 2).contiguous()  # (B, C, N)
56 |             new_points_copy = new_points
57 | 
58 |             new_points = self.mlps(new_points)
59 | 
60 |             new_fa_points = new_points.max(dim=-1, keepdim=True)[0]
61 |             new_fa_points = new_fa_points.repeat(1, 1, new_points.shape[-1])
62 | 
63 |             new_points = torch.cat([new_fa_points, new_points_copy], dim=1)
64 |             new_points = new_points.transpose(1, 2).contiguous()
65 |         else:
66 |             new_points = points
67 | 
68 |         return new_points
69 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/dgcnn_modules/dgcnn_fp_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.cnn import ConvModule
 3 | from mmcv.runner import BaseModule, force_fp32
 4 | from torch import nn as nn
 5 | 
 6 | 
 7 | class DGCNNFPModule(BaseModule):
 8 |     """Point feature propagation module used in DGCNN.
 9 | 
10 |     Propagate the features from one set to another.
11 | 
12 |     Args:
13 |         mlp_channels (list[int]): List of mlp channels.
14 |         norm_cfg (dict, optional): Type of activation method.
15 |             Defaults to dict(type='BN1d').
16 |         act_cfg (dict, optional): Type of activation method.
17 |             Defaults to dict(type='ReLU').
18 |         init_cfg (dict, optional): Initialization config. Defaults to None.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  mlp_channels,
23 |                  norm_cfg=dict(type='BN1d'),
24 |                  act_cfg=dict(type='ReLU'),
25 |                  init_cfg=None):
26 |         super().__init__(init_cfg=init_cfg)
27 |         self.fp16_enabled = False
28 |         self.mlps = nn.Sequential()
29 |         for i in range(len(mlp_channels) - 1):
30 |             self.mlps.add_module(
31 |                 f'layer{i}',
32 |                 ConvModule(
33 |                     mlp_channels[i],
34 |                     mlp_channels[i + 1],
35 |                     kernel_size=(1, ),
36 |                     stride=(1, ),
37 |                     conv_cfg=dict(type='Conv1d'),
38 |                     norm_cfg=norm_cfg,
39 |                     act_cfg=act_cfg))
40 | 
41 |     @force_fp32()
42 |     def forward(self, points):
43 |         """forward.
44 | 
45 |         Args:
46 |             points (Tensor): (B, N, C) tensor of the input points.
47 | 
48 |         Returns:
49 |             Tensor: (B, N, M) M = mlp[-1], tensor of the new points.
50 |         """
51 | 
52 |         if points is not None:
53 |             new_points = points.transpose(1, 2).contiguous()  # (B, C, N)
54 |             new_points = self.mlps(new_points)
55 |             new_points = new_points.transpose(1, 2).contiguous()
56 |         else:
57 |             new_points = points
58 | 
59 |         return new_points
60 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/paconv/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .paconv import PAConv, PAConvCUDA
3 | 
4 | __all__ = ['PAConv', 'PAConvCUDA']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/paconv/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def calc_euclidian_dist(xyz1, xyz2):
 6 |     """Calculate the Euclidean distance between two sets of points.
 7 | 
 8 |     Args:
 9 |         xyz1 (torch.Tensor): (N, 3), the first set of points.
10 |         xyz2 (torch.Tensor): (N, 3), the second set of points.
11 | 
12 |     Returns:
13 |         torch.Tensor: (N, ), the Euclidean distance between each point pair.
14 |     """
15 |     assert xyz1.shape[0] == xyz2.shape[0], 'number of points are not the same'
16 |     assert xyz1.shape[1] == xyz2.shape[1] == 3, \
17 |         'points coordinates dimension is not 3'
18 |     return torch.norm(xyz1 - xyz2, dim=-1)
19 | 
20 | 
21 | def assign_score(scores, point_features):
22 |     """Perform weighted sum to aggregate output features according to scores.
23 |     This function is used in non-CUDA version of PAConv.
24 | 
25 |     Compared to the cuda op assigh_score_withk, this pytorch implementation
26 |         pre-computes output features for the neighbors of all centers, and then
27 |         performs aggregation. It consumes more GPU memories.
28 | 
29 |     Args:
30 |         scores (torch.Tensor): (B, npoint, K, M), predicted scores to
31 |             aggregate weight matrices in the weight bank.
32 |             `npoint` is the number of sampled centers.
33 |             `K` is the number of queried neighbors.
34 |             `M` is the number of weight matrices in the weight bank.
35 |         point_features (torch.Tensor): (B, npoint, K, M, out_dim)
36 |             Pre-computed point features to be aggregated.
37 | 
38 |     Returns:
39 |         torch.Tensor: (B, npoint, K, out_dim), the aggregated features.
40 |     """
41 |     B, npoint, K, M = scores.size()
42 |     scores = scores.view(B, npoint, K, 1, M)
43 |     output = torch.matmul(scores, point_features).view(B, npoint, K, -1)
44 |     return output
45 | 
46 | 
47 | def assign_kernel_withoutk(features, kernels, M):
48 |     """Pre-compute features with weight matrices in weight bank. This function
49 |     is used before cuda op assign_score_withk in CUDA version PAConv.
50 | 
51 |     Args:
52 |         features (torch.Tensor): (B, in_dim, N), input features of all points.
53 |             `N` is the number of points in current point cloud.
54 |         kernels (torch.Tensor): (2 * in_dim, M * out_dim), weight matrices in
55 |             the weight bank, transformed from (M, 2 * in_dim, out_dim).
56 |             `2 * in_dim` is because the input features are concatenation of
57 |             (point_features - center_features, point_features).
58 |         M (int): Number of weight matrices in the weight bank.
59 | 
60 |     Returns:
61 |         Tuple[torch.Tensor]: both of shape (B, N, M, out_dim):
62 | 
63 |             - point_features: Pre-computed features for points.
64 |             - center_features: Pre-computed features for centers.
65 |     """
66 |     B, in_dim, N = features.size()
67 |     feat_trans = features.permute(0, 2, 1)  # [B, N, in_dim]
68 |     out_feat_half1 = torch.matmul(feat_trans, kernels[:in_dim]).view(
69 |         B, N, M, -1)  # [B, N, M, out_dim]
70 |     out_feat_half2 = torch.matmul(feat_trans, kernels[in_dim:]).view(
71 |         B, N, M, -1)  # [B, N, M, out_dim]
72 | 
73 |     # TODO: why this hard-coded if condition?
74 |     # when the network input is only xyz without additional features
75 |     # xyz will be used as features, so that features.size(1) == 3 % 2 != 0
76 |     # we need to compensate center_features because otherwise
77 |     # `point_features - center_features` will result in all zeros?
78 |     if features.size(1) % 2 != 0:
79 |         out_feat_half_coord = torch.matmul(
80 |             feat_trans[:, :, :3],  # [B, N, 3]
81 |             kernels[in_dim:in_dim + 3]).view(B, N, M, -1)  # [B, N, M, out_dim]
82 |     else:
83 |         out_feat_half_coord = torch.zeros_like(out_feat_half2)
84 | 
85 |     point_features = out_feat_half1 + out_feat_half2
86 |     center_features = out_feat_half1 + out_feat_half_coord
87 |     return point_features, center_features
88 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .builder import build_sa_module
 3 | from .paconv_sa_module import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
 4 |                                PAConvSAModule, PAConvSAModuleMSG)
 5 | from .point_fp_module import PointFPModule
 6 | from .point_sa_module import PointSAModule, PointSAModuleMSG
 7 | 
 8 | __all__ = [
 9 |     'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule',
10 |     'PAConvSAModule', 'PAConvSAModuleMSG', 'PAConvCUDASAModule',
11 |     'PAConvCUDASAModuleMSG'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry
 3 | 
 4 | SA_MODULES = Registry('point_sa_module')
 5 | 
 6 | 
 7 | def build_sa_module(cfg, *args, **kwargs):
 8 |     """Build PointNet2 set abstraction (SA) module.
 9 | 
10 |     Args:
11 |         cfg (None or dict): The SA module config, which should contain:
12 |             - type (str): Module type.
13 |             - module args: Args needed to instantiate an SA module.
14 |         args (argument list): Arguments passed to the `__init__`
15 |             method of the corresponding module.
16 |         kwargs (keyword arguments): Keyword arguments passed to the `__init__`
17 |             method of the corresponding SA module .
18 | 
19 |     Returns:
20 |         nn.Module: Created SA module.
21 |     """
22 |     if cfg is None:
23 |         cfg_ = dict(type='PointSAModule')
24 |     else:
25 |         if not isinstance(cfg, dict):
26 |             raise TypeError('cfg must be a dict')
27 |         if 'type' not in cfg:
28 |             raise KeyError('the cfg dict must contain the key "type"')
29 |         cfg_ = cfg.copy()
30 | 
31 |     module_type = cfg_.pop('type')
32 |     if module_type not in SA_MODULES:
33 |         raise KeyError(f'Unrecognized module type {module_type}')
34 |     else:
35 |         sa_module = SA_MODULES.get(module_type)
36 | 
37 |     module = sa_module(*args, **kwargs, **cfg_)
38 | 
39 |     return module
40 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/point_fp_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import List
 3 | 
 4 | import torch
 5 | from mmcv.cnn import ConvModule
 6 | from mmcv.ops import three_interpolate, three_nn
 7 | from mmcv.runner import BaseModule, force_fp32
 8 | from torch import nn as nn
 9 | 
10 | 
11 | class PointFPModule(BaseModule):
12 |     """Point feature propagation module used in PointNets.
13 | 
14 |     Propagate the features from one set to another.
15 | 
16 |     Args:
17 |         mlp_channels (list[int]): List of mlp channels.
18 |         norm_cfg (dict, optional): Type of normalization method.
19 |             Default: dict(type='BN2d').
20 |     """
21 | 
22 |     def __init__(self,
23 |                  mlp_channels: List[int],
24 |                  norm_cfg: dict = dict(type='BN2d'),
25 |                  init_cfg=None):
26 |         super().__init__(init_cfg=init_cfg)
27 |         self.fp16_enabled = False
28 |         self.mlps = nn.Sequential()
29 |         for i in range(len(mlp_channels) - 1):
30 |             self.mlps.add_module(
31 |                 f'layer{i}',
32 |                 ConvModule(
33 |                     mlp_channels[i],
34 |                     mlp_channels[i + 1],
35 |                     kernel_size=(1, 1),
36 |                     stride=(1, 1),
37 |                     conv_cfg=dict(type='Conv2d'),
38 |                     norm_cfg=norm_cfg))
39 | 
40 |     @force_fp32()
41 |     def forward(self, target: torch.Tensor, source: torch.Tensor,
42 |                 target_feats: torch.Tensor,
43 |                 source_feats: torch.Tensor) -> torch.Tensor:
44 |         """forward.
45 | 
46 |         Args:
47 |             target (Tensor): (B, n, 3) tensor of the xyz positions of
48 |                 the target features.
49 |             source (Tensor): (B, m, 3) tensor of the xyz positions of
50 |                 the source features.
51 |             target_feats (Tensor): (B, C1, n) tensor of the features to be
52 |                 propagated to.
53 |             source_feats (Tensor): (B, C2, m) tensor of features
54 |                 to be propagated.
55 | 
56 |         Return:
57 |             Tensor: (B, M, N) M = mlp[-1], tensor of the target features.
58 |         """
59 |         if source is not None:
60 |             dist, idx = three_nn(target, source)
61 |             dist_reciprocal = 1.0 / (dist + 1e-8)
62 |             norm = torch.sum(dist_reciprocal, dim=2, keepdim=True)
63 |             weight = dist_reciprocal / norm
64 | 
65 |             interpolated_feats = three_interpolate(source_feats, idx, weight)
66 |         else:
67 |             interpolated_feats = source_feats.expand(*source_feats.size()[0:2],
68 |                                                      target.size(1))
69 | 
70 |         if target_feats is not None:
71 |             new_features = torch.cat([interpolated_feats, target_feats],
72 |                                      dim=1)  # (B, C2 + C1, n)
73 |         else:
74 |             new_features = interpolated_feats
75 | 
76 |         new_features = new_features.unsqueeze(-1)
77 |         new_features = self.mlps(new_features)
78 | 
79 |         return new_features.squeeze(-1)
80 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/spconv/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .overwrite_spconv.write_spconv2 import register_spconv2
 3 | 
 4 | try:
 5 |     import spconv
 6 | except ImportError:
 7 |     IS_SPCONV2_AVAILABLE = False
 8 | else:
 9 |     if hasattr(spconv, '__version__') and spconv.__version__ >= '2.0.0':
10 |         IS_SPCONV2_AVAILABLE = register_spconv2()
11 |     else:
12 |         IS_SPCONV2_AVAILABLE = False
13 | 
14 | __all__ = ['IS_SPCONV2_AVAILABLE']
15 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/spconv/overwrite_spconv/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .write_spconv2 import register_spconv2
3 | 
4 | __all__ = ['register_spconv2']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/spconv/overwrite_spconv/write_spconv2.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import itertools
  3 | 
  4 | from mmcv.cnn.bricks.registry import CONV_LAYERS
  5 | from torch.nn.parameter import Parameter
  6 | 
  7 | 
  8 | def register_spconv2():
  9 |     """This func registers spconv2.0 spconv ops to overwrite the default mmcv
 10 |     spconv ops."""
 11 |     try:
 12 |         from spconv.pytorch import (SparseConv2d, SparseConv3d, SparseConv4d,
 13 |                                     SparseConvTranspose2d,
 14 |                                     SparseConvTranspose3d, SparseInverseConv2d,
 15 |                                     SparseInverseConv3d, SparseModule,
 16 |                                     SubMConv2d, SubMConv3d, SubMConv4d)
 17 |     except ImportError:
 18 |         return False
 19 |     else:
 20 |         CONV_LAYERS._register_module(SparseConv2d, 'SparseConv2d', force=True)
 21 |         CONV_LAYERS._register_module(SparseConv3d, 'SparseConv3d', force=True)
 22 |         CONV_LAYERS._register_module(SparseConv4d, 'SparseConv4d', force=True)
 23 | 
 24 |         CONV_LAYERS._register_module(
 25 |             SparseConvTranspose2d, 'SparseConvTranspose2d', force=True)
 26 |         CONV_LAYERS._register_module(
 27 |             SparseConvTranspose3d, 'SparseConvTranspose3d', force=True)
 28 | 
 29 |         CONV_LAYERS._register_module(
 30 |             SparseInverseConv2d, 'SparseInverseConv2d', force=True)
 31 |         CONV_LAYERS._register_module(
 32 |             SparseInverseConv3d, 'SparseInverseConv3d', force=True)
 33 | 
 34 |         CONV_LAYERS._register_module(SubMConv2d, 'SubMConv2d', force=True)
 35 |         CONV_LAYERS._register_module(SubMConv3d, 'SubMConv3d', force=True)
 36 |         CONV_LAYERS._register_module(SubMConv4d, 'SubMConv4d', force=True)
 37 |         SparseModule._version = 2
 38 |         SparseModule._load_from_state_dict = _load_from_state_dict
 39 |         return True
 40 | 
 41 | 
 42 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
 43 |                           missing_keys, unexpected_keys, error_msgs):
 44 |     """Rewrite this func to compat the convolutional kernel weights between
 45 |     spconv 1.x in MMCV and 2.x in spconv2.x.
 46 | 
 47 |     Kernel weights in MMCV spconv has shape in (D,H,W,in_channel,out_channel) ,
 48 |     while those in spcon2.x is in (out_channel,D,H,W,in_channel).
 49 |     """
 50 |     version = local_metadata.get('version', None)
 51 |     for hook in self._load_state_dict_pre_hooks.values():
 52 |         hook(state_dict, prefix, local_metadata, strict, missing_keys,
 53 |              unexpected_keys, error_msgs)
 54 | 
 55 |     local_name_params = itertools.chain(self._parameters.items(),
 56 |                                         self._buffers.items())
 57 |     local_state = {k: v.data for k, v in local_name_params if v is not None}
 58 | 
 59 |     for name, param in local_state.items():
 60 |         key = prefix + name
 61 |         if key in state_dict:
 62 |             input_param = state_dict[key]
 63 | 
 64 |             # Backward compatibility: loading 1-dim tensor from
 65 |             # 0.3.* to version 0.4+
 66 |             if len(param.shape) == 0 and len(input_param.shape) == 1:
 67 |                 input_param = input_param[0]
 68 |             if version != 2:
 69 |                 dims = [len(input_param.shape) - 1] + list(
 70 |                     range(len(input_param.shape) - 1))
 71 |                 input_param = input_param.permute(*dims)
 72 |             if input_param.shape != param.shape:
 73 |                 # local shape should match the one in checkpoint
 74 |                 error_msgs.append(
 75 |                     f'size mismatch for {key}: copying a param with '
 76 |                     f'shape {key, input_param.shape} from checkpoint,'
 77 |                     f'the shape in current model is {param.shape}.')
 78 |                 continue
 79 | 
 80 |             if isinstance(input_param, Parameter):
 81 |                 # backwards compatibility for serialized parameters
 82 |                 input_param = input_param.data
 83 |             try:
 84 |                 param.copy_(input_param)
 85 |             except Exception:
 86 |                 error_msgs.append(
 87 |                     f'While copying the parameter named "{key}", whose '
 88 |                     f'dimensions in the model are {param.size()} and whose '
 89 |                     f'dimensions in the checkpoint are {input_param.size()}.')
 90 |         elif strict:
 91 |             missing_keys.append(key)
 92 | 
 93 |     if strict:
 94 |         for key, input_param in state_dict.items():
 95 |             if key.startswith(prefix):
 96 |                 input_name = key[len(prefix):]
 97 |                 input_name = input_name.split(
 98 |                     '.', 1)[0]  # get the name of param/buffer/child
 99 |                 if input_name not in self._modules \
100 |                         and input_name not in local_state:
101 |                     unexpected_keys.append(key)
102 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry, build_from_cfg, print_log
 3 | 
 4 | from .collect_env import collect_env
 5 | from .compat_cfg import compat_cfg
 6 | from .logger import get_root_logger
 7 | from .misc import find_latest_checkpoint
 8 | from .setup_env import setup_multi_processes
 9 | 
10 | __all__ = [
11 |     'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env',
12 |     'print_log', 'setup_multi_processes', 'find_latest_checkpoint',
13 |     'compat_cfg'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import collect_env as collect_base_env
 3 | from mmcv.utils import get_git_hash
 4 | 
 5 | import mmdet
 6 | import mmdet3d
 7 | import mmseg
 8 | from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
 9 | 
10 | 
11 | def collect_env():
12 |     """Collect the information of the running environments."""
13 |     env_info = collect_base_env()
14 |     env_info['MMDetection'] = mmdet.__version__
15 |     env_info['MMSegmentation'] = mmseg.__version__
16 |     env_info['MMDetection3D'] = mmdet3d.__version__ + '+' + get_git_hash()[:7]
17 |     env_info['spconv2.0'] = IS_SPCONV2_AVAILABLE
18 |     return env_info
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     for name, val in collect_env().items():
23 |         print(f'{name}: {val}')
24 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import logging
 3 | 
 4 | from mmcv.utils import get_logger
 5 | 
 6 | 
 7 | def get_root_logger(log_file=None, log_level=logging.INFO, name='mmdet3d'):
 8 |     """Get root logger and add a keyword filter to it.
 9 | 
10 |     The logger will be initialized if it has not been initialized. By default a
11 |     StreamHandler will be added. If `log_file` is specified, a FileHandler will
12 |     also be added. The name of the root logger is the top-level package name,
13 |     e.g., "mmdet3d".
14 | 
15 |     Args:
16 |         log_file (str, optional): File path of log. Defaults to None.
17 |         log_level (int, optional): The level of logger.
18 |             Defaults to logging.INFO.
19 |         name (str, optional): The name of the root logger, also used as a
20 |             filter keyword. Defaults to 'mmdet3d'.
21 | 
22 |     Returns:
23 |         :obj:`logging.Logger`: The obtained logger
24 |     """
25 |     logger = get_logger(name=name, log_file=log_file, log_level=log_level)
26 | 
27 |     # add a logging filter
28 |     logging_filter = logging.Filter(name)
29 |     logging_filter.filter = lambda record: record.find(name) != -1
30 | 
31 |     return logger
32 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import glob
 3 | import os.path as osp
 4 | import warnings
 5 | 
 6 | 
 7 | def find_latest_checkpoint(path, suffix='pth'):
 8 |     """Find the latest checkpoint from the working directory. This function is
 9 |     copied from mmdetection.
10 | 
11 |     Args:
12 |         path(str): The path to find checkpoints.
13 |         suffix(str): File extension.
14 |             Defaults to pth.
15 | 
16 |     Returns:
17 |         latest_path(str | None): File path of the latest checkpoint.
18 |     References:
19 |         .. [1] https://github.com/microsoft/SoftTeacher
20 |                   /blob/main/ssod/utils/patch.py
21 |     """
22 |     if not osp.exists(path):
23 |         warnings.warn('The path of checkpoints does not exist.')
24 |         return None
25 |     if osp.exists(osp.join(path, f'latest.{suffix}')):
26 |         return osp.join(path, f'latest.{suffix}')
27 | 
28 |     checkpoints = glob.glob(osp.join(path, f'*.{suffix}'))
29 |     if len(checkpoints) == 0:
30 |         warnings.warn('There are no checkpoints in the path.')
31 |         return None
32 |     latest = -1
33 |     latest_path = None
34 |     for checkpoint in checkpoints:
35 |         count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0])
36 |         if count > latest:
37 |             latest = count
38 |             latest_path = checkpoint
39 |     return latest_path
40 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/setup_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os
 3 | import platform
 4 | import warnings
 5 | 
 6 | import cv2
 7 | from torch import multiprocessing as mp
 8 | 
 9 | 
10 | def setup_multi_processes(cfg):
11 |     """Setup multi-processing environment variables."""
12 |     # set multi-process start method as `fork` to speed up the training
13 |     if platform.system() != 'Windows':
14 |         mp_start_method = cfg.get('mp_start_method', 'fork')
15 |         current_method = mp.get_start_method(allow_none=True)
16 |         if current_method is not None and current_method != mp_start_method:
17 |             warnings.warn(
18 |                 f'Multi-processing start method `{mp_start_method}` is '
19 |                 f'different from the previous setting `{current_method}`.'
20 |                 f'It will be force set to `{mp_start_method}`. You can change '
21 |                 f'this behavior by changing `mp_start_method` in your config.')
22 |         mp.set_start_method(mp_start_method, force=True)
23 | 
24 |     # disable opencv multithreading to avoid system being overloaded
25 |     opencv_num_threads = cfg.get('opencv_num_threads', 0)
26 |     cv2.setNumThreads(opencv_num_threads)
27 | 
28 |     # setup OMP threads
29 |     # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py  # noqa
30 |     workers_per_gpu = cfg.data.get('workers_per_gpu', 1)
31 |     if 'train_dataloader' in cfg.data:
32 |         workers_per_gpu = \
33 |             max(cfg.data.train_dataloader.get('workers_per_gpu', 1),
34 |                 workers_per_gpu)
35 | 
36 |     if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1:
37 |         omp_num_threads = 1
38 |         warnings.warn(
39 |             f'Setting OMP_NUM_THREADS environment variable for each process '
40 |             f'to be {omp_num_threads} in default, to avoid your system being '
41 |             f'overloaded, please further tune the variable for optimal '
42 |             f'performance in your application as needed.')
43 |         os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)
44 | 
45 |     # setup MKL threads
46 |     if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1:
47 |         mkl_num_threads = 1
48 |         warnings.warn(
49 |             f'Setting MKL_NUM_THREADS environment variable for each process '
50 |             f'to be {mkl_num_threads} in default, to avoid your system being '
51 |             f'overloaded, please further tune the variable for optimal '
52 |             f'performance in your application as needed.')
53 |         os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)
54 | 


--------------------------------------------------------------------------------
/mmdet3d/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | 
 3 | __version__ = '1.0.0rc4'
 4 | short_version = __version__
 5 | 
 6 | 
 7 | def parse_version_info(version_str):
 8 |     version_info = []
 9 |     for x in version_str.split('.'):
10 |         if x.isdigit():
11 |             version_info.append(int(x))
12 |         elif x.find('rc') != -1:
13 |             patch_version = x.split('rc')
14 |             version_info.append(int(patch_version[0]))
15 |             version_info.append(f'rc{patch_version[1]}')
16 |     return tuple(version_info)
17 | 
18 | 
19 | version_info = parse_version_info(__version__)
20 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pycuda
 2 | lyft_dataset_sdk
 3 | networkx==2.2
 4 | numba==0.53.0
 5 | numpy
 6 | nuscenes-devkit
 7 | plyfile
 8 | scikit-image
 9 | tensorboard
10 | trimesh==2.35.39


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [yapf]
 2 | BASED_ON_STYLE = pep8
 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
 5 | 
 6 | [isort]
 7 | line_length = 79
 8 | multi_line_output = 0
 9 | extra_standard_library = setuptools
10 | known_first_party = mmdet,mmseg,mmdet3d
11 | known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,pytorch_sphinx_theme,recommonmark,requests,scannet_utils,scipy,seaborn,shapely,skimage,sphinx,tensorflow,terminaltables,torch,trimesh,ts,waymo_open_dataset
12 | no_lines_before = STDLIB,LOCALFOLDER
13 | default_section = THIRDPARTY
14 | 
15 | [codespell]
16 | ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD
17 | 


--------------------------------------------------------------------------------
/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29501}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/test.py \
19 |     $CONFIG \
20 |     $CHECKPOINT \
21 |     --launcher pytorch \
22 |     ${@:4}
23 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |     --seed 0 \
20 |     --launcher pytorch ${@:3}
21 | 


--------------------------------------------------------------------------------
/tools/misc/fuse_conv_bn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import torch
 5 | from mmcv.runner import save_checkpoint
 6 | from torch import nn as nn
 7 | 
 8 | from mmdet3d.apis import init_model
 9 | 
10 | 
11 | def fuse_conv_bn(conv, bn):
12 |     """During inference, the functionary of batch norm layers is turned off but
13 |     only the mean and var alone channels are used, which exposes the chance to
14 |     fuse it with the preceding conv layers to save computations and simplify
15 |     network structures."""
16 |     conv_w = conv.weight
17 |     conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
18 |         bn.running_mean)
19 | 
20 |     factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
21 |     conv.weight = nn.Parameter(conv_w *
22 |                                factor.reshape([conv.out_channels, 1, 1, 1]))
23 |     conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
24 |     return conv
25 | 
26 | 
27 | def fuse_module(m):
28 |     last_conv = None
29 |     last_conv_name = None
30 | 
31 |     for name, child in m.named_children():
32 |         if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
33 |             if last_conv is None:  # only fuse BN that is after Conv
34 |                 continue
35 |             fused_conv = fuse_conv_bn(last_conv, child)
36 |             m._modules[last_conv_name] = fused_conv
37 |             # To reduce changes, set BN as Identity instead of deleting it.
38 |             m._modules[name] = nn.Identity()
39 |             last_conv = None
40 |         elif isinstance(child, nn.Conv2d):
41 |             last_conv = child
42 |             last_conv_name = name
43 |         else:
44 |             fuse_module(child)
45 |     return m
46 | 
47 | 
48 | def parse_args():
49 |     parser = argparse.ArgumentParser(
50 |         description='fuse Conv and BN layers in a model')
51 |     parser.add_argument('config', help='config file path')
52 |     parser.add_argument('checkpoint', help='checkpoint file path')
53 |     parser.add_argument('out', help='output path of the converted model')
54 |     args = parser.parse_args()
55 |     return args
56 | 
57 | 
58 | def main():
59 |     args = parse_args()
60 |     # build the model from a config file and a checkpoint file
61 |     model = init_model(args.config, args.checkpoint)
62 |     # fuse conv and bn layers of the model
63 |     fused_model = fuse_module(model)
64 |     save_checkpoint(fused_model, args.out)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/tools/misc/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from mmcv import Config, DictAction
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(description='Print the whole config')
 9 |     parser.add_argument('config', help='config file path')
10 |     parser.add_argument(
11 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
12 |     args = parser.parse_args()
13 | 
14 |     return args
15 | 
16 | 
17 | def main():
18 |     args = parse_args()
19 | 
20 |     cfg = Config.fromfile(args.config)
21 |     if args.options is not None:
22 |         cfg.merge_from_dict(args.options)
23 |     print(f'Config:\n{cfg.pretty_text}')
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import mmcv
 5 | from mmcv import Config
 6 | 
 7 | from mmdet3d.datasets import build_dataset
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(
12 |         description='MMDet3D visualize the results')
13 |     parser.add_argument('config', help='test config file path')
14 |     parser.add_argument('--result', help='results file in pickle format')
15 |     parser.add_argument(
16 |         '--show-dir', help='directory where visualize results will be saved')
17 |     args = parser.parse_args()
18 | 
19 |     return args
20 | 
21 | 
22 | def main():
23 |     args = parse_args()
24 | 
25 |     if args.result is not None and \
26 |             not args.result.endswith(('.pkl', '.pickle')):
27 |         raise ValueError('The results file must be a pkl file.')
28 | 
29 |     cfg = Config.fromfile(args.config)
30 |     cfg.data.test.test_mode = True
31 | 
32 |     # build the dataset
33 |     dataset = build_dataset(cfg.data.test)
34 |     results = mmcv.load(args.result)
35 | 
36 |     if getattr(dataset, 'show', None) is not None:
37 |         # data loading pipeline for showing
38 |         eval_pipeline = cfg.get('eval_pipeline', {})
39 |         if eval_pipeline:
40 |             dataset.show(results, args.show_dir, pipeline=eval_pipeline)
41 |         else:
42 |             dataset.show(results, args.show_dir)  # use default pipeline
43 |     else:
44 |         raise NotImplementedError(
45 |             'Show is not implemented for dataset {}!'.format(
46 |                 type(dataset).__name__))
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------