├── requirements ├── build.txt ├── optional.txt ├── mminstall.txt ├── readthedocs.txt ├── docs.txt ├── runtime.txt └── tests.txt ├── fig └── framework.png ├── tools ├── data_converter │ ├── __init__.py │ └── lyft_data_fixer.py ├── dist_train.sh ├── dist_test.sh ├── update_data_coords.sh ├── create_data.sh ├── slurm_test.sh ├── slurm_train.sh ├── convert_fully2single.py ├── misc │ ├── print_config.py │ ├── visualize_results.py │ └── fuse_conv_bn.py ├── model_converters │ ├── publish_model.py │ └── regnet2mmdet.py ├── deployment │ └── test_torchserver.py └── analysis_tools │ ├── get_flops.py │ └── benchmark.py ├── data ├── scannet │ ├── meta_data │ │ ├── scannet_means.npz │ │ └── scannetv2_test.txt │ ├── README.md │ └── scannet_utils.py └── sunrgbd │ ├── matlab │ ├── extract_split.m │ ├── extract_rgbd_data_v1.m │ └── extract_rgbd_data_v2.m │ └── README.md ├── requirements.txt ├── mmdet3d ├── ops │ ├── paconv │ │ └── __init__.py │ ├── spconv │ │ ├── overwrite_spconv │ │ │ └── __init__.py │ │ └── __init__.py │ ├── dgcnn_modules │ │ ├── __init__.py │ │ ├── dgcnn_fp_module.py │ │ └── dgcnn_fa_module.py │ ├── pointnet_modules │ │ ├── __init__.py │ │ ├── builder.py │ │ └── point_fp_module.py │ └── __init__.py ├── core │ ├── evaluation │ │ ├── waymo_utils │ │ │ └── __init__.py │ │ ├── kitti_utils │ │ │ └── __init__.py │ │ ├── scannet_utils │ │ │ ├── __init__.py │ │ │ └── util_3d.py │ │ └── __init__.py │ ├── bbox │ │ ├── assigners │ │ │ └── __init__.py │ │ ├── iou_calculators │ │ │ └── __init__.py │ │ ├── samplers │ │ │ └── __init__.py │ │ ├── structures │ │ │ └── __init__.py │ │ ├── coders │ │ │ ├── __init__.py │ │ │ └── delta_xyzwhlr_bbox_coder.py │ │ ├── __init__.py │ │ └── transforms.py │ ├── voxel │ │ ├── __init__.py │ │ └── builder.py │ ├── visualizer │ │ └── __init__.py │ ├── __init__.py │ ├── utils │ │ └── __init__.py │ ├── anchor │ │ └── __init__.py │ ├── post_processing │ │ └── __init__.py │ └── points │ │ ├── __init__.py │ │ ├── depth_points.py │ │ ├── lidar_points.py │ │ └── cam_points.py ├── models │ ├── segmentors │ │ └── __init__.py │ ├── roi_heads │ │ ├── mask_heads │ │ │ └── __init__.py │ │ ├── roi_extractors │ │ │ ├── __init__.py │ │ │ ├── single_roiaware_extractor.py │ │ │ └── single_roipoint_extractor.py │ │ ├── bbox_heads │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── base_3droi_head.py │ ├── decode_heads │ │ ├── __init__.py │ │ ├── dgcnn_head.py │ │ ├── paconv_head.py │ │ └── pointnet2_head.py │ ├── model_utils │ │ ├── __init__.py │ │ └── edge_fusion_module.py │ ├── middle_encoders │ │ └── __init__.py │ ├── voxel_encoders │ │ └── __init__.py │ ├── fusion_layers │ │ └── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── clip_sigmoid.py │ │ ├── mlp.py │ │ ├── edge_indices.py │ │ └── gen_keypoints.py │ ├── necks │ │ ├── __init__.py │ │ └── pointnet2_fp_neck.py │ ├── detectors │ │ ├── smoke_mono3d.py │ │ ├── ssd3dnet.py │ │ ├── fcos_mono3d.py │ │ ├── __init__.py │ │ ├── two_stage.py │ │ ├── mvx_faster_rcnn.py │ │ └── dynamic_voxelnet.py │ ├── backbones │ │ ├── __init__.py │ │ ├── base_pointnet.py │ │ ├── lg3d_utils.py │ │ ├── second.py │ │ └── nostem_regnet.py │ ├── losses │ │ ├── __init__.py │ │ ├── rotated_iou_loss.py │ │ └── axis_aligned_iou_loss.py │ ├── dense_heads │ │ ├── __init__.py │ │ └── base_mono3d_dense_head.py │ └── __init__.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── logger.py │ ├── misc.py │ └── setup_env.py ├── version.py ├── apis │ ├── __init__.py │ └── test.py ├── __init__.py └── datasets │ ├── pipelines │ ├── __init__.py │ └── compose.py │ ├── builder.py │ ├── __init__.py │ └── dataset_wrappers.py ├── configs ├── votenet │ ├── votenet_iouloss_8x8_scannet-3d-18class.py │ ├── votenet_16x8_sunrgbd-3d-10class.py │ ├── votenet_lg3d_sunrgbd.py │ ├── votenet_8x8_scannet-3d-18class.py │ ├── votenet_lg3d_scannet.py │ └── metafile.yml ├── _base_ │ ├── schedules │ │ ├── seg_cosine_100e.py │ │ ├── mmdet_schedule_1x.py │ │ ├── seg_cosine_200e.py │ │ ├── seg_cosine_50e.py │ │ ├── seg_cosine_150e.py │ │ ├── schedule_3x.py │ │ ├── schedule_2x.py │ │ ├── cosine.py │ │ ├── cyclic_20e.py │ │ └── cyclic_40e.py │ ├── default_runtime.py │ └── models │ │ ├── groupfree3d.py │ │ └── votenet.py └── groupfree3d │ └── metafile.yml ├── setup.cfg └── .gitignore /requirements/build.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fig/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FabienCode/LG3D/HEAD/fig/framework.png -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | open3d 2 | spconv 3 | waymo-open-dataset-tf-2-1-0==1.2.0 4 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /requirements/mminstall.txt: -------------------------------------------------------------------------------- 1 | mmcv-full>=1.4.8,<=1.6.0 2 | mmdet>=2.24.0,<=3.0.0 3 | mmsegmentation>=0.20.0,<=1.0.0 4 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv>=1.4.8 2 | mmdet>=2.24.0 3 | mmsegmentation>=0.20.1 4 | torch 5 | torchvision 6 | -------------------------------------------------------------------------------- /data/scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FabienCode/LG3D/HEAD/data/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/paconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .paconv import PAConv, PAConvCUDA 3 | 4 | __all__ = ['PAConv', 'PAConvCUDA'] 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/overwrite_spconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .write_spconv2 import register_spconv2 3 | 4 | __all__ = ['register_spconv2'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/waymo_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .prediction_kitti_to_waymo import KITTI2Waymo 3 | 4 | __all__ = ['KITTI2Waymo'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/kitti_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .eval import kitti_eval, kitti_eval_coco_style 3 | 4 | __all__ = ['kitti_eval', 'kitti_eval_coco_style'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/scannet_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .evaluate_semantic_instance import evaluate_matches, scannet_eval 3 | 4 | __all__ = ['scannet_eval', 'evaluate_matches'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner 3 | 4 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] 5 | -------------------------------------------------------------------------------- /mmdet3d/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import Base3DSegmentor 3 | from .encoder_decoder import EncoderDecoder3D 4 | 5 | __all__ = ['Base3DSegmentor', 'EncoderDecoder3D'] 6 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_voxel_generator 3 | from .voxel_generator import VoxelGenerator 4 | 5 | __all__ = ['build_voxel_generator', 'VoxelGenerator'] 6 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | docutils==0.16.0 2 | m2r 3 | mistune==0.8.4 4 | myst-parser 5 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 6 | sphinx==4.0.2 7 | sphinx-copybutton 8 | sphinx_markdown_tables 9 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | lyft_dataset_sdk 2 | networkx>=2.2,<2.3 3 | numba==0.53.0 4 | numpy 5 | nuscenes-devkit 6 | plyfile 7 | scikit-image 8 | # by default we also use tensorboard to log results 9 | tensorboard 10 | trimesh>=2.35.39,<2.35.40 11 | -------------------------------------------------------------------------------- /mmdet3d/models/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .pointwise_semantic_head import PointwiseSemanticHead 3 | from .primitive_head import PrimitiveHead 4 | 5 | __all__ = ['PointwiseSemanticHead', 'PrimitiveHead'] 6 | -------------------------------------------------------------------------------- /mmdet3d/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dgcnn_head import DGCNNHead 3 | from .paconv_head import PAConvHead 4 | from .pointnet2_head import PointNet2Head 5 | 6 | __all__ = ['PointNet2Head', 'DGCNNHead', 'PAConvHead'] 7 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | pytest 9 | pytest-cov 10 | pytest-runner 11 | ubelt 12 | xdoctest >= 0.10.0 13 | yapf 14 | -------------------------------------------------------------------------------- /mmdet3d/core/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .show_result import (show_multi_modality_result, show_result, 3 | show_seg_result) 4 | 5 | __all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result'] 6 | -------------------------------------------------------------------------------- /mmdet3d/models/model_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .edge_fusion_module import EdgeFusionModule 3 | from .transformer import GroupFree3DMHA 4 | from .vote_module import VoteModule 5 | 6 | __all__ = ['VoteModule', 'GroupFree3DMHA', 'EdgeFusionModule'] 7 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dgcnn_fa_module import DGCNNFAModule 3 | from .dgcnn_fp_module import DGCNNFPModule 4 | from .dgcnn_gf_module import DGCNNGFModule 5 | 6 | __all__ = ['DGCNNFAModule', 'DGCNNFPModule', 'DGCNNGFModule'] 7 | -------------------------------------------------------------------------------- /configs/votenet/votenet_iouloss_8x8_scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./votenet_8x8_scannet-3d-18class.py'] 2 | 3 | # model settings, add iou loss 4 | model = dict( 5 | bbox_head=dict( 6 | iou_loss=dict( 7 | type='AxisAlignedIoULoss', reduction='sum', loss_weight=10.0 / 8 | 3.0))) 9 | -------------------------------------------------------------------------------- /mmdet3d/models/middle_encoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .pillar_scatter import PointPillarsScatter 3 | from .sparse_encoder import SparseEncoder, SparseEncoderSASSD 4 | from .sparse_unet import SparseUNet 5 | 6 | __all__ = [ 7 | 'PointPillarsScatter', 'SparseEncoder', 'SparseEncoderSASSD', 'SparseUNet' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet3d/models/voxel_encoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .pillar_encoder import DynamicPillarFeatureNet, PillarFeatureNet 3 | from .voxel_encoder import DynamicSimpleVFE, DynamicVFE, HardSimpleVFE, HardVFE 4 | 5 | __all__ = [ 6 | 'PillarFeatureNet', 'DynamicPillarFeatureNet', 'HardVFE', 'DynamicVFE', 7 | 'HardSimpleVFE', 'DynamicSimpleVFE' 8 | ] 9 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_100e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | 7 | # runtime settings 8 | runner = dict(type='EpochBasedRunner', max_epochs=100) 9 | -------------------------------------------------------------------------------- /configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /mmdet3d/models/roi_heads/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor 3 | from .single_roiaware_extractor import Single3DRoIAwareExtractor 4 | from .single_roipoint_extractor import Single3DRoIPointExtractor 5 | 6 | __all__ = [ 7 | 'SingleRoIExtractor', 'Single3DRoIAwareExtractor', 8 | 'Single3DRoIPointExtractor' 9 | ] 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /mmdet3d/models/fusion_layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .coord_transform import (apply_3d_transformation, bbox_2d_transform, 3 | coord_2d_transform) 4 | from .point_fusion import PointFusion 5 | from .vote_fusion import VoteFusion 6 | 7 | __all__ = [ 8 | 'PointFusion', 'VoteFusion', 'apply_3d_transformation', 9 | 'bbox_2d_transform', 'coord_2d_transform' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .anchor import * # noqa: F401, F403 3 | from .bbox import * # noqa: F401, F403 4 | from .evaluation import * # noqa: F401, F403 5 | from .points import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | from .visualizer import * # noqa: F401, F403 9 | from .voxel import * # noqa: F401, F403 10 | -------------------------------------------------------------------------------- /mmdet3d/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .clip_sigmoid import clip_sigmoid 3 | from .edge_indices import get_edge_indices 4 | from .gen_keypoints import get_keypoints 5 | from .handle_objs import filter_outside_objs, handle_proj_objs 6 | from .mlp import MLP 7 | 8 | __all__ = [ 9 | 'clip_sigmoid', 'MLP', 'get_edge_indices', 'filter_outside_objs', 10 | 'handle_proj_objs', 'get_keypoints' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .indoor_eval import indoor_eval 3 | from .instance_seg_eval import instance_seg_eval 4 | from .kitti_utils import kitti_eval, kitti_eval_coco_style 5 | from .lyft_eval import lyft_eval 6 | from .seg_eval import seg_eval 7 | 8 | __all__ = [ 9 | 'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval', 10 | 'seg_eval', 'instance_seg_eval' 11 | ] 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.004 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /mmdet3d/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .array_converter import ArrayConverter, array_converter 3 | from .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d, 4 | gaussian_radius, get_ellip_gaussian_2D) 5 | 6 | __all__ = [ 7 | 'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian', 8 | 'ArrayConverter', 'array_converter', 'ellip_gaussian2D', 9 | 'get_ellip_gaussian_2D' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .overwrite_spconv.write_spconv2 import register_spconv2 3 | 4 | try: 5 | import spconv 6 | except ImportError: 7 | IS_SPCONV2_AVAILABLE = False 8 | else: 9 | if hasattr(spconv, '__version__') and spconv.__version__ >= '2.0.0': 10 | IS_SPCONV2_AVAILABLE = register_spconv2() 11 | else: 12 | IS_SPCONV2_AVAILABLE = False 13 | 14 | __all__ = ['IS_SPCONV2_AVAILABLE'] 15 | -------------------------------------------------------------------------------- /mmdet3d/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.anchor import build_prior_generator 3 | from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator, 4 | AlignedAnchor3DRangeGeneratorPerCls, 5 | Anchor3DRangeGenerator) 6 | 7 | __all__ = [ 8 | 'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator', 9 | 'build_prior_generator', 'AlignedAnchor3DRangeGeneratorPerCls' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.necks.fpn import FPN 3 | from .dla_neck import DLANeck 4 | from .imvoxel_neck import IndoorImVoxelNeck, OutdoorImVoxelNeck 5 | from .pointnet2_fp_neck import PointNetFPNeck 6 | from .second_fpn import SECONDFPN 7 | from .view_transformer import LSSViewTransformer 8 | 9 | __all__ = [ 10 | 'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'IndoorImVoxelNeck', 11 | 'PointNetFPNeck', 'DLANeck', 'LSSViewTransformer' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet3d/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg, print_log 3 | 4 | from .collect_env import collect_env 5 | from .compat_cfg import compat_cfg 6 | from .logger import get_root_logger 7 | from .misc import find_latest_checkpoint 8 | from .setup_env import setup_multi_processes 9 | 10 | __all__ = [ 11 | 'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env', 12 | 'print_log', 'setup_multi_processes', 'find_latest_checkpoint', 13 | 'compat_cfg' 14 | ] 15 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --seed 0 \ 20 | --launcher pytorch ${@:3} 21 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 3 | BboxOverlapsNearest3D, 4 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 5 | bbox_overlaps_nearest_3d) 6 | 7 | __all__ = [ 8 | 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 9 | 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D', 10 | 'axis_aligned_bbox_overlaps_3d' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet3d/models/utils/clip_sigmoid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | 5 | def clip_sigmoid(x, eps=1e-4): 6 | """Sigmoid function for input feature. 7 | 8 | Args: 9 | x (torch.Tensor): Input feature map with the shape of [B, N, H, W]. 10 | eps (float, optional): Lower bound of the range to be clamped to. 11 | Defaults to 1e-4. 12 | 13 | Returns: 14 | torch.Tensor: Feature map after sigmoid. 15 | """ 16 | y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps) 17 | return y 18 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | 4 | from . import voxel_generator 5 | 6 | 7 | def build_voxel_generator(cfg, **kwargs): 8 | """Builder of voxel generator.""" 9 | if isinstance(cfg, voxel_generator.VoxelGenerator): 10 | return cfg 11 | elif isinstance(cfg, dict): 12 | return mmcv.runner.obj_from_dict( 13 | cfg, voxel_generator, default_args=kwargs) 14 | else: 15 | raise TypeError('Invalid type {} for building a sampler'.format( 16 | type(cfg))) 17 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_sa_module 3 | from .paconv_sa_module import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, 4 | PAConvSAModule, PAConvSAModuleMSG) 5 | from .point_fp_module import PointFPModule 6 | from .point_sa_module import PointSAModule, PointSAModuleMSG 7 | 8 | __all__ = [ 9 | 'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule', 10 | 'PAConvSAModule', 'PAConvSAModuleMSG', 'PAConvCUDASAModule', 11 | 'PAConvCUDASAModuleMSG' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet3d/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '1.0.0rc6' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /tools/update_data_coords.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | DATASET=$2 8 | GPUS=${GPUS:-1} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 10 | SRUN_ARGS=${SRUN_ARGS:-""} 11 | JOB_NAME=update_data_coords 12 | 13 | srun -p ${PARTITION} \ 14 | --job-name=${JOB_NAME} \ 15 | --gres=gpu:${GPUS_PER_NODE} \ 16 | --ntasks=${GPUS} \ 17 | --ntasks-per-node=${GPUS_PER_NODE} \ 18 | --kill-on-bad-exit=1 \ 19 | ${SRUN_ARGS} \ 20 | python -u tools/update_data_coords.py ${DATASET} \ 21 | --root-dir ./data/${DATASET} \ 22 | --out-dir ./data/${DATASET} 23 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /tools/create_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | DATASET=$3 9 | GPUS=${GPUS:-1} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | JOB_NAME=create_data 13 | 14 | srun -p ${PARTITION} \ 15 | --job-name=${JOB_NAME} \ 16 | --gres=gpu:${GPUS_PER_NODE} \ 17 | --ntasks=${GPUS} \ 18 | --ntasks-per-node=${GPUS_PER_NODE} \ 19 | --kill-on-bad-exit=1 \ 20 | ${SRUN_ARGS} \ 21 | python -u tools/create_data.py ${DATASET} \ 22 | --root-path ./data/${DATASET} \ 23 | --out-dir ./data/${DATASET} \ 24 | --extra-tag ${DATASET} 25 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /mmdet3d/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inference import (convert_SyncBN, inference_detector, 3 | inference_mono_3d_detector, 4 | inference_multi_modality_detector, inference_segmentor, 5 | init_model, show_result_meshlab) 6 | from .test import single_gpu_test 7 | from .train import init_random_seed, train_model 8 | 9 | __all__ = [ 10 | 'inference_detector', 'init_model', 'single_gpu_test', 11 | 'inference_mono_3d_detector', 'show_result_meshlab', 'convert_SyncBN', 12 | 'train_model', 'inference_multi_modality_detector', 'inference_segmentor', 13 | 'init_random_seed' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler, 3 | InstanceBalancedPosSampler, 4 | IoUBalancedNegSampler, OHEMSampler, 5 | PseudoSampler, RandomSampler, 6 | SamplingResult) 7 | from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler 8 | 9 | __all__ = [ 10 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 11 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 12 | 'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler' 13 | ] 14 | -------------------------------------------------------------------------------- /tools/convert_fully2single.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | latest = torch.load("***/votenet_scannet_fully.pth", 4 | map_location=torch.device('cpu')) # the best training pth 5 | for k, v in list(latest['state_dict'].items()): 6 | if 't_backbone' in k or 'label_encoder' in k or 'anno' in k or 'attention' in k: 7 | latest["state_dict"].pop(k) 8 | for k, v in list(latest['state_dict'].items()): 9 | if 'backbone' in k: 10 | tmp = k 11 | tmp_after = k.replace('s_backbone.', 'backbone.') 12 | latest['state_dict'][tmp_after] = latest['state_dict'][k] 13 | latest["state_dict"].pop(k) 14 | torch.save(latest, '***/votenet_scannet_final.pth') 15 | 16 | print("convert finish!") 17 | -------------------------------------------------------------------------------- /mmdet3d/models/roi_heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead, 3 | DoubleConvFCBBoxHead, 4 | Shared2FCBBoxHead, 5 | Shared4Conv1FCBBoxHead) 6 | from .h3d_bbox_head import H3DBboxHead 7 | from .parta2_bbox_head import PartA2BboxHead 8 | from .point_rcnn_bbox_head import PointRCNNBboxHead 9 | 10 | __all__ = [ 11 | 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 12 | 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead', 13 | 'H3DBboxHead', 'PointRCNNBboxHead' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores, 4 | multiclass_nms) 5 | from .box3d_nms import (aligned_3d_nms, box3d_multiclass_nms, circle_nms, 6 | nms_bev, nms_normal_bev) 7 | from .merge_augs import merge_aug_bboxes_3d 8 | 9 | __all__ = [ 10 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 11 | 'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms', 12 | 'aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms', 'nms_bev', 13 | 'nms_normal_bev' 14 | ] 15 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config, DictAction 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description='Print the whole config') 9 | parser.add_argument('config', help='config file path') 10 | parser.add_argument( 11 | '--options', nargs='+', action=DictAction, help='arguments in dict') 12 | args = parser.parse_args() 13 | 14 | return args 15 | 16 | 17 | def main(): 18 | args = parse_args() 19 | 20 | cfg = Config.fromfile(args.config) 21 | if args.options is not None: 22 | cfg.merge_from_dict(args.options) 23 | print(f'Config:\n{cfg.pretty_text}') 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/smoke_mono3d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage_mono3d import SingleStageMono3DDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class SMOKEMono3D(SingleStageMono3DDetector): 8 | r"""SMOKE `_ for monocular 3D object 9 | detection. 10 | 11 | """ 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | bbox_head, 17 | train_cfg=None, 18 | test_cfg=None, 19 | pretrained=None): 20 | super(SMOKEMono3D, self).__init__(backbone, neck, bbox_head, train_cfg, 21 | test_cfg, pretrained) 22 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | 20 | # disable opencv multithreading to avoid system being overloaded 21 | opencv_num_threads = 0 22 | # set multi-process start method as `fork` to speed up the training 23 | mp_start_method = 'fork' 24 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt 3 | from .dgcnn import DGCNNBackbone 4 | from .dla import DLANet 5 | from .mink_resnet import MinkResNet 6 | from .multi_backbone import MultiBackbone 7 | from .nostem_regnet import NoStemRegNet 8 | from .pointnet2_sa_msg import PointNet2SAMSG 9 | from .pointnet2_sa_ssg import PointNet2SASSG 10 | from .second import SECOND 11 | from .lg3d_utils import inducer_attention, pointnet 12 | 13 | __all__ = [ 14 | 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet', 15 | 'SECOND', 'DGCNNBackbone', 'PointNet2SASSG', 'PointNet2SAMSG', 16 | 'MultiBackbone', 'DLANet', 'MinkResNet', 'inducer_attention', 17 | 'pointnet' 18 | ] 19 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [yapf] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | 6 | [isort] 7 | line_length = 79 8 | multi_line_output = 0 9 | extra_standard_library = setuptools 10 | known_first_party = mmdet,mmseg,mmdet3d 11 | known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,pytorch_sphinx_theme,recommonmark,requests,scannet_utils,scipy,seaborn,shapely,skimage,sphinx,tensorflow,terminaltables,torch,trimesh,ts,waymo_open_dataset 12 | no_lines_before = STDLIB,LOCALFOLDER 13 | default_section = THIRDPARTY 14 | 15 | [codespell] 16 | ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD,warmup 17 | -------------------------------------------------------------------------------- /mmdet3d/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import collect_env as collect_base_env 3 | from mmcv.utils import get_git_hash 4 | 5 | import mmdet 6 | import mmdet3d 7 | import mmseg 8 | from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE 9 | 10 | 11 | def collect_env(): 12 | """Collect the information of the running environments.""" 13 | env_info = collect_base_env() 14 | env_info['MMDetection'] = mmdet.__version__ 15 | env_info['MMSegmentation'] = mmseg.__version__ 16 | env_info['MMDetection3D'] = mmdet3d.__version__ + '+' + get_git_hash()[:7] 17 | env_info['spconv2.0'] = IS_SPCONV2_AVAILABLE 18 | return env_info 19 | 20 | 21 | if __name__ == '__main__': 22 | for name, val in collect_env().items(): 23 | print(f'{name}: {val}') 24 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/ssd3dnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .votenet import VoteNet 4 | 5 | 6 | @DETECTORS.register_module() 7 | class SSD3DNet(VoteNet): 8 | """3DSSDNet model. 9 | 10 | https://arxiv.org/abs/2002.10187.pdf 11 | """ 12 | 13 | def __init__(self, 14 | backbone, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | init_cfg=None, 19 | pretrained=None): 20 | super(SSD3DNet, self).__init__( 21 | backbone=backbone, 22 | bbox_head=bbox_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | init_cfg=init_cfg, 26 | pretrained=pretrained) 27 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy 3 | from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss 4 | from .chamfer_distance import ChamferDistance, chamfer_distance 5 | from .multibin_loss import MultiBinLoss 6 | from .paconv_regularization_loss import PAConvRegularizationLoss 7 | from .rotated_iou_loss import RotatedIoU3DLoss 8 | from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss 9 | 10 | __all__ = [ 11 | 'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance', 12 | 'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss', 13 | 'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss', 14 | 'MultiBinLoss', 'RotatedIoU3DLoss' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet3d/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_3droi_head import Base3DRoIHead 3 | from .bbox_heads import H3DBboxHead, PartA2BboxHead, PointRCNNBboxHead 4 | from .h3d_roi_head import H3DRoIHead 5 | from .mask_heads import PointwiseSemanticHead, PrimitiveHead 6 | from .part_aggregation_roi_head import PartAggregationROIHead 7 | from .point_rcnn_roi_head import PointRCNNRoIHead 8 | from .roi_extractors import (Single3DRoIAwareExtractor, 9 | Single3DRoIPointExtractor, SingleRoIExtractor) 10 | 11 | __all__ = [ 12 | 'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead', 13 | 'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor', 14 | 'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead', 'H3DBboxHead', 15 | 'PointRCNNBboxHead', 'Single3DRoIPointExtractor' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_box3d import BaseInstance3DBoxes 3 | from .box_3d_mode import Box3DMode 4 | from .cam_box3d import CameraInstance3DBoxes 5 | from .coord_3d_mode import Coord3DMode 6 | from .depth_box3d import DepthInstance3DBoxes 7 | from .lidar_box3d import LiDARInstance3DBoxes 8 | from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period, 9 | mono_cam_box2vis, points_cam2img, points_img2cam, 10 | rotation_3d_in_axis, xywhr2xyxyr) 11 | 12 | __all__ = [ 13 | 'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes', 14 | 'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr', 15 | 'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img', 16 | 'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis', 17 | 'get_proj_mat_by_coord_type' 18 | ] 19 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/fcos_mono3d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage_mono3d import SingleStageMono3DDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class FCOSMono3D(SingleStageMono3DDetector): 8 | r"""`FCOS3D `_ for monocular 3D object detection. 9 | 10 | Currently please refer to our entry on the 11 | `leaderboard `_. 12 | """ # noqa: E501 13 | 14 | def __init__(self, 15 | backbone, 16 | neck, 17 | bbox_head, 18 | train_cfg=None, 19 | test_cfg=None, 20 | pretrained=None): 21 | super(FCOSMono3D, self).__init__(backbone, neck, bbox_head, train_cfg, 22 | test_cfg, pretrained) 23 | -------------------------------------------------------------------------------- /configs/votenet/votenet_16x8_sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/sunrgbd-3d-10class.py', '../_base_/models/votenet.py', 3 | '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py' 4 | ] 5 | # model settings 6 | model = dict( 7 | bbox_head=dict( 8 | num_classes=10, 9 | bbox_coder=dict( 10 | type='PartialBinBasedBBoxCoder', 11 | num_sizes=10, 12 | num_dir_bins=12, 13 | with_rot=True, 14 | mean_sizes=[ 15 | [2.114256, 1.620300, 0.927272], [0.791118, 1.279516, 0.718182], 16 | [0.923508, 1.867419, 0.845495], [0.591958, 0.552978, 0.827272], 17 | [0.699104, 0.454178, 0.75625], [0.69519, 1.346299, 0.736364], 18 | [0.528526, 1.002642, 1.172878], [0.500618, 0.632163, 0.683424], 19 | [0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728] 20 | ]), 21 | )) 22 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox import build_bbox_coder 3 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder 4 | from .centerpoint_bbox_coders import CenterPointBBoxCoder 5 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder 6 | from .fcos3d_bbox_coder import FCOS3DBBoxCoder 7 | from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder 8 | from .monoflex_bbox_coder import MonoFlexCoder 9 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder 10 | from .pgd_bbox_coder import PGDBBoxCoder 11 | from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder 12 | from .smoke_bbox_coder import SMOKECoder 13 | 14 | __all__ = [ 15 | 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder', 16 | 'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder', 17 | 'PointXYZWHLRBBoxCoder', 'FCOS3DBBoxCoder', 'PGDBBoxCoder', 'SMOKECoder', 18 | 'MonoFlexCoder' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet3d/core/points/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | from .cam_points import CameraPoints 4 | from .depth_points import DepthPoints 5 | from .lidar_points import LiDARPoints 6 | 7 | __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints'] 8 | 9 | 10 | def get_points_type(points_type): 11 | """Get the class of points according to coordinate type. 12 | 13 | Args: 14 | points_type (str): The type of points coordinate. 15 | The valid value are "CAMERA", "LIDAR", or "DEPTH". 16 | 17 | Returns: 18 | class: Points type. 19 | """ 20 | if points_type == 'CAMERA': 21 | points_cls = CameraPoints 22 | elif points_type == 'LIDAR': 23 | points_cls = LiDARPoints 24 | elif points_type == 'DEPTH': 25 | points_cls = DepthPoints 26 | else: 27 | raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"' 28 | f' are supported, got {points_type}') 29 | 30 | return points_cls 31 | -------------------------------------------------------------------------------- /mmdet3d/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import logging 3 | 4 | from mmcv.utils import get_logger 5 | 6 | 7 | def get_root_logger(log_file=None, log_level=logging.INFO, name='mmdet3d'): 8 | """Get root logger and add a keyword filter to it. 9 | 10 | The logger will be initialized if it has not been initialized. By default a 11 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 12 | also be added. The name of the root logger is the top-level package name, 13 | e.g., "mmdet3d". 14 | 15 | Args: 16 | log_file (str, optional): File path of log. Defaults to None. 17 | log_level (int, optional): The level of logger. 18 | Defaults to logging.INFO. 19 | name (str, optional): The name of the root logger, also used as a 20 | filter keyword. Defaults to 'mmdet3d'. 21 | 22 | Returns: 23 | :obj:`logging.Logger`: The obtained logger 24 | """ 25 | logger = get_logger(name=name, log_file=log_file, log_level=log_level) 26 | 27 | # add a logging filter 28 | logging_filter = logging.Filter(name) 29 | logging_filter.filter = lambda record: record.find(name) != -1 30 | 31 | return logger 32 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | 5 | import torch 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description='Process a checkpoint to be published') 11 | parser.add_argument('in_file', help='input checkpoint filename') 12 | parser.add_argument('out_file', help='output checkpoint filename') 13 | args = parser.parse_args() 14 | return args 15 | 16 | 17 | def process_checkpoint(in_file, out_file): 18 | checkpoint = torch.load(in_file, map_location='cpu') 19 | # remove optimizer for smaller file size 20 | if 'optimizer' in checkpoint: 21 | del checkpoint['optimizer'] 22 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 23 | # add the code here. 24 | torch.save(checkpoint, out_file) 25 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 26 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 27 | subprocess.Popen(['mv', out_file, final_file]) 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | process_checkpoint(args.in_file, args.out_file) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /mmdet3d/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .anchor3d_head import Anchor3DHead 3 | from .anchor_free_mono3d_head import AnchorFreeMono3DHead 4 | from .base_conv_bbox_head import BaseConvBboxHead 5 | from .base_mono3d_dense_head import BaseMono3DDenseHead 6 | from .centerpoint_head import CenterHead 7 | from .fcaf3d_head import FCAF3DHead 8 | from .fcos_mono3d_head import FCOSMono3DHead 9 | from .free_anchor3d_head import FreeAnchor3DHead 10 | from .groupfree3d_head import GroupFree3DHead 11 | from .imvoxel_head import ImVoxelHead 12 | from .monoflex_head import MonoFlexHead 13 | from .parta2_rpn_head import PartA2RPNHead 14 | from .pgd_head import PGDHead 15 | from .point_rpn_head import PointRPNHead 16 | from .shape_aware_head import ShapeAwareHead 17 | from .smoke_mono3d_head import SMOKEMono3DHead 18 | from .ssd_3d_head import SSD3DHead 19 | from .vote_head import VoteHead 20 | 21 | __all__ = [ 22 | 'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead', 23 | 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead', 24 | 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead', 25 | 'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead', 26 | 'MonoFlexHead', 'FCAF3DHead', 'ImVoxelHead' 27 | ] 28 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import Base3DDetector 3 | from .centerpoint import CenterPoint 4 | from .dynamic_voxelnet import DynamicVoxelNet 5 | from .fcos_mono3d import FCOSMono3D 6 | from .groupfree3dnet import GroupFree3DNet 7 | from .h3dnet import H3DNet 8 | from .imvotenet import ImVoteNet 9 | from .imvoxelnet import ImVoxelNet 10 | from .mink_single_stage import MinkSingleStage3DDetector 11 | from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN 12 | from .mvx_two_stage import MVXTwoStageDetector 13 | from .parta2 import PartA2 14 | from .point_rcnn import PointRCNN 15 | from .sassd import SASSD 16 | from .single_stage_mono3d import SingleStageMono3DDetector 17 | from .smoke_mono3d import SMOKEMono3D 18 | from .ssd3dnet import SSD3DNet 19 | from .votenet import VoteNet 20 | from .voxelnet import VoxelNet 21 | from .lg3d_votenet import lg3d_VoteNet 22 | 23 | __all__ = [ 24 | 'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector', 25 | 'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet', 26 | 'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector', 27 | 'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D', 28 | 'MinkSingleStage3DDetector', 'SASSD', 'lg3d_VoteNet' 29 | ] 30 | -------------------------------------------------------------------------------- /mmdet3d/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import glob 3 | import os.path as osp 4 | import warnings 5 | 6 | 7 | def find_latest_checkpoint(path, suffix='pth'): 8 | """Find the latest checkpoint from the working directory. This function is 9 | copied from mmdetection. 10 | 11 | Args: 12 | path(str): The path to find checkpoints. 13 | suffix(str): File extension. 14 | Defaults to pth. 15 | 16 | Returns: 17 | latest_path(str | None): File path of the latest checkpoint. 18 | References: 19 | .. [1] https://github.com/microsoft/SoftTeacher 20 | /blob/main/ssod/utils/patch.py 21 | """ 22 | if not osp.exists(path): 23 | warnings.warn('The path of checkpoints does not exist.') 24 | return None 25 | if osp.exists(osp.join(path, f'latest.{suffix}')): 26 | return osp.join(path, f'latest.{suffix}') 27 | 28 | checkpoints = glob.glob(osp.join(path, f'*.{suffix}')) 29 | if len(checkpoints) == 0: 30 | warnings.warn('There are no checkpoints in the path.') 31 | return None 32 | latest = -1 33 | latest_path = None 34 | for checkpoint in checkpoints: 35 | count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0]) 36 | if count > latest: 37 | latest = count 38 | latest_path = checkpoint 39 | return latest_path 40 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry 3 | 4 | SA_MODULES = Registry('point_sa_module') 5 | 6 | 7 | def build_sa_module(cfg, *args, **kwargs): 8 | """Build PointNet2 set abstraction (SA) module. 9 | 10 | Args: 11 | cfg (None or dict): The SA module config, which should contain: 12 | - type (str): Module type. 13 | - module args: Args needed to instantiate an SA module. 14 | args (argument list): Arguments passed to the `__init__` 15 | method of the corresponding module. 16 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 17 | method of the corresponding SA module . 18 | 19 | Returns: 20 | nn.Module: Created SA module. 21 | """ 22 | if cfg is None: 23 | cfg_ = dict(type='PointSAModule') 24 | else: 25 | if not isinstance(cfg, dict): 26 | raise TypeError('cfg must be a dict') 27 | if 'type' not in cfg: 28 | raise KeyError('the cfg dict must contain the key "type"') 29 | cfg_ = cfg.copy() 30 | 31 | module_type = cfg_.pop('type') 32 | if module_type not in SA_MODULES: 33 | raise KeyError(f'Unrecognized module type {module_type}') 34 | else: 35 | sa_module = SA_MODULES.get(module_type) 36 | 37 | module = sa_module(*args, **kwargs, **cfg_) 38 | 39 | return module 40 | -------------------------------------------------------------------------------- /configs/votenet/votenet_lg3d_sunrgbd.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/sunrgbd-3d-10class.py', '../_base_/models/lg3d_votenet', 3 | '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py' 4 | ] 5 | # model settings 6 | model = dict( 7 | anno_descriptor=dict( 8 | type='pointnet', 9 | input_channel=19, # sunrgbd 10 | 11 | ), 12 | bbox_head=dict( 13 | num_classes=10, 14 | bbox_coder=dict( 15 | type='PartialBinBasedBBoxCoder', 16 | num_sizes=10, 17 | num_dir_bins=12, 18 | with_rot=True, 19 | mean_sizes=[ 20 | [2.114256, 1.620300, 0.927272], [0.791118, 1.279516, 0.718182], 21 | [0.923508, 1.867419, 0.845495], [0.591958, 0.552978, 0.827272], 22 | [0.699104, 0.454178, 0.75625], [0.69519, 1.346299, 0.736364], 23 | [0.528526, 1.002642, 1.172878], [0.500618, 0.632163, 0.683424], 24 | [0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728] 25 | ]), 26 | )) 27 | 28 | # optimizer 29 | # This schedule is mainly used by models on indoor dataset, 30 | # e.g., VoteNet on SUNRGBD and ScanNet 31 | lr = 0.004 # max learning rate 32 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 33 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 34 | lr_config = dict(policy='step', warmup=None, step=[48, 64]) 35 | # runtime settings 36 | runner = dict(type='EpochBasedRunner', max_epochs=72) 37 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/base_pointnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | from abc import ABCMeta 4 | 5 | from mmcv.runner import BaseModule 6 | 7 | 8 | class BasePointNet(BaseModule, metaclass=ABCMeta): 9 | """Base class for PointNet.""" 10 | 11 | def __init__(self, init_cfg=None, pretrained=None): 12 | super(BasePointNet, self).__init__(init_cfg) 13 | self.fp16_enabled = False 14 | assert not (init_cfg and pretrained), \ 15 | 'init_cfg and pretrained cannot be setting at the same time' 16 | if isinstance(pretrained, str): 17 | warnings.warn('DeprecationWarning: pretrained is a deprecated, ' 18 | 'please use "init_cfg" instead') 19 | self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) 20 | 21 | @staticmethod 22 | def _split_point_feats(points): 23 | """Split coordinates and features of input points. 24 | 25 | Args: 26 | points (torch.Tensor): Point coordinates with features, 27 | with shape (B, N, 3 + input_feature_dim). 28 | 29 | Returns: 30 | torch.Tensor: Coordinates of input points. 31 | torch.Tensor: Features of input points. 32 | """ 33 | xyz = points[..., 0:3].contiguous() 34 | if points.size(-1) > 3: 35 | features = points[..., 3:].transpose(1, 2).contiguous() 36 | else: 37 | features = None 38 | 39 | return xyz, features 40 | -------------------------------------------------------------------------------- /mmdet3d/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # noqa: F401,F403 3 | from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES, 4 | MIDDLE_ENCODERS, NECKS, ROI_EXTRACTORS, SEGMENTORS, 5 | SHARED_HEADS, VOXEL_ENCODERS, build_backbone, 6 | build_detector, build_fusion_layer, build_head, 7 | build_loss, build_middle_encoder, build_model, 8 | build_neck, build_roi_extractor, build_shared_head, 9 | build_voxel_encoder) 10 | from .decode_heads import * # noqa: F401,F403 11 | from .dense_heads import * # noqa: F401,F403 12 | from .detectors import * # noqa: F401,F403 13 | from .fusion_layers import * # noqa: F401,F403 14 | from .losses import * # noqa: F401,F403 15 | from .middle_encoders import * # noqa: F401,F403 16 | from .model_utils import * # noqa: F401,F403 17 | from .necks import * # noqa: F401,F403 18 | from .roi_heads import * # noqa: F401,F403 19 | from .segmentors import * # noqa: F401,F403 20 | from .voxel_encoders import * # noqa: F401,F403 21 | 22 | __all__ = [ 23 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 24 | 'DETECTORS', 'SEGMENTORS', 'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 25 | 'FUSION_LAYERS', 'build_backbone', 'build_neck', 'build_roi_extractor', 26 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector', 27 | 'build_fusion_layer', 'build_model', 'build_middle_encoder', 28 | 'build_voxel_encoder' 29 | ] 30 | -------------------------------------------------------------------------------- /tools/data_converter/lyft_data_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | 5 | import numpy as np 6 | 7 | 8 | def fix_lyft(root_folder='./data/lyft', version='v1.01'): 9 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa 10 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin' 11 | root_folder = os.path.join(root_folder, f'{version}-train') 12 | lidar_path = os.path.join(root_folder, lidar_path) 13 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \ 14 | f'dataset and make sure {lidar_path} is present.' 15 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1) 16 | try: 17 | points.reshape([-1, 5]) 18 | print(f'This fix is not required for version {version}.') 19 | except ValueError: 20 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32') 21 | new_points.tofile(lidar_path) 22 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.') 23 | 24 | 25 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser') 26 | parser.add_argument( 27 | '--root-folder', 28 | type=str, 29 | default='./data/lyft', 30 | help='specify the root path of Lyft dataset') 31 | parser.add_argument( 32 | '--version', 33 | type=str, 34 | default='v1.01', 35 | help='specify Lyft dataset version') 36 | args = parser.parse_args() 37 | 38 | if __name__ == '__main__': 39 | fix_lyft(root_folder=args.root_folder, version=args.version) 40 | -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import mmcv 5 | from mmcv import Config 6 | 7 | from mmdet3d.datasets import build_dataset 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='MMDet3D visualize the results') 13 | parser.add_argument('config', help='test config file path') 14 | parser.add_argument('--result', help='results file in pickle format') 15 | parser.add_argument( 16 | '--show-dir', help='directory where visualize results will be saved') 17 | args = parser.parse_args() 18 | 19 | return args 20 | 21 | 22 | def main(): 23 | args = parse_args() 24 | 25 | if args.result is not None and \ 26 | not args.result.endswith(('.pkl', '.pickle')): 27 | raise ValueError('The results file must be a pkl file.') 28 | 29 | cfg = Config.fromfile(args.config) 30 | cfg.data.test.test_mode = True 31 | 32 | # build the dataset 33 | dataset = build_dataset(cfg.data.test) 34 | results = mmcv.load(args.result) 35 | 36 | if getattr(dataset, 'show', None) is not None: 37 | # data loading pipeline for showing 38 | eval_pipeline = cfg.get('eval_pipeline', {}) 39 | if eval_pipeline: 40 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 41 | else: 42 | dataset.show(results, args.show_dir) # use default pipeline 43 | else: 44 | raise NotImplementedError( 45 | 'Show is not implemented for dataset {}!'.format( 46 | type(dataset).__name__)) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the official AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /configs/votenet/votenet_8x8_scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/scannet-3d-18class.py', '../_base_/models/votenet.py', 3 | '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | bbox_head=dict( 9 | num_classes=18, 10 | bbox_coder=dict( 11 | type='PartialBinBasedBBoxCoder', 12 | num_sizes=18, 13 | num_dir_bins=1, 14 | with_rot=False, 15 | mean_sizes=[[0.76966727, 0.8116021, 0.92573744], 16 | [1.876858, 1.8425595, 1.1931566], 17 | [0.61328, 0.6148609, 0.7182701], 18 | [1.3955007, 1.5121545, 0.83443564], 19 | [0.97949594, 1.0675149, 0.6329687], 20 | [0.531663, 0.5955577, 1.7500148], 21 | [0.9624706, 0.72462326, 1.1481868], 22 | [0.83221924, 1.0490936, 1.6875663], 23 | [0.21132214, 0.4206159, 0.5372846], 24 | [1.4440073, 1.8970833, 0.26985747], 25 | [1.0294262, 1.4040797, 0.87554324], 26 | [1.3766412, 0.65521795, 1.6813129], 27 | [0.6650819, 0.71111923, 1.298853], 28 | [0.41999173, 0.37906948, 1.7513971], 29 | [0.59359556, 0.5912492, 0.73919016], 30 | [0.50867593, 0.50656086, 0.30136237], 31 | [1.1511526, 1.0546296, 0.49706793], 32 | [0.47535285, 0.49249494, 0.5802117]]))) 33 | 34 | # yapf:disable 35 | log_config = dict(interval=30) 36 | # yapf:enable 37 | -------------------------------------------------------------------------------- /data/sunrgbd/matlab/extract_split.m: -------------------------------------------------------------------------------- 1 | % Modified from 2 | % https://github.com/facebookresearch/votenet/blob/master/sunrgbd/matlab/extract_split.m 3 | % Copyright (c) Facebook, Inc. and its affiliates. 4 | % 5 | % This source code is licensed under the MIT license found in the 6 | % LICENSE file in the root directory of this source tree. 7 | 8 | %% Dump train/val split. 9 | % Author: Charles R. Qi 10 | 11 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 12 | 13 | %% Construct Hash Map 14 | hash_train = java.util.Hashtable; 15 | hash_val = java.util.Hashtable; 16 | 17 | split = load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/traintestSUNRGBD/allsplit.mat'); 18 | 19 | N_train = length(split.alltrain); 20 | N_val = length(split.alltest); 21 | 22 | for i = 1:N_train 23 | folder_path = split.alltrain{i}; 24 | folder_path(1:16) = ''; 25 | hash_train.put(folder_path,0); 26 | end 27 | for i = 1:N_val 28 | folder_path = split.alltest{i}; 29 | folder_path(1:16) = ''; 30 | hash_val.put(folder_path,0); 31 | end 32 | 33 | %% Map data to train or val set. 34 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat'); 35 | if exist('../sunrgbd_trainval','dir')==0 36 | mkdir('../sunrgbd_trainval'); 37 | end 38 | fid_train = fopen('../sunrgbd_trainval/train_data_idx.txt', 'w'); 39 | fid_val = fopen('../sunrgbd_trainval/val_data_idx.txt', 'w'); 40 | 41 | for imageId = 1:10335 42 | data = SUNRGBDMeta(imageId); 43 | depthpath = data.depthpath; 44 | depthpath(1:16) = ''; 45 | [filepath,name,ext] = fileparts(depthpath); 46 | [filepath,name,ext] = fileparts(filepath); 47 | if hash_train.containsKey(filepath) 48 | fprintf(fid_train, '%d\n', imageId); 49 | elseif hash_val.containsKey(filepath) 50 | fprintf(fid_val, '%d\n', imageId); 51 | else 52 | a = 1; 53 | end 54 | end 55 | fclose(fid_train); 56 | fclose(fid_val); 57 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 3 | from .coders import DeltaXYZWLHRBBoxCoder 4 | # from .bbox_target import bbox_target 5 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 6 | BboxOverlapsNearest3D, 7 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 8 | bbox_overlaps_nearest_3d) 9 | from .samplers import (BaseSampler, CombinedSampler, 10 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 11 | PseudoSampler, RandomSampler, SamplingResult) 12 | from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, 13 | Coord3DMode, DepthInstance3DBoxes, 14 | LiDARInstance3DBoxes, get_box_type, limit_period, 15 | mono_cam_box2vis, points_cam2img, points_img2cam, 16 | xywhr2xyxyr) 17 | from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back 18 | 19 | __all__ = [ 20 | 'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner', 21 | 'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler', 22 | 'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult', 23 | 'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 24 | 'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d', 25 | 'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode', 26 | 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi', 27 | 'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes', 28 | 'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img', 29 | 'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis' 30 | ] 31 | -------------------------------------------------------------------------------- /data/scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /mmdet3d/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | 4 | import mmdet 5 | import mmseg 6 | from .version import __version__, short_version 7 | 8 | 9 | def digit_version(version_str): 10 | digit_version = [] 11 | for x in version_str.split('.'): 12 | if x.isdigit(): 13 | digit_version.append(int(x)) 14 | elif x.find('rc') != -1: 15 | patch_version = x.split('rc') 16 | digit_version.append(int(patch_version[0]) - 1) 17 | digit_version.append(int(patch_version[1])) 18 | return digit_version 19 | 20 | 21 | mmcv_minimum_version = '1.5.2' 22 | mmcv_maximum_version = '1.7.0' 23 | mmcv_version = digit_version(mmcv.__version__) 24 | 25 | 26 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 27 | and mmcv_version <= digit_version(mmcv_maximum_version)), \ 28 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 29 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 30 | 31 | mmdet_minimum_version = '2.24.0' 32 | mmdet_maximum_version = '3.0.0' 33 | mmdet_version = digit_version(mmdet.__version__) 34 | assert (mmdet_version >= digit_version(mmdet_minimum_version) 35 | and mmdet_version <= digit_version(mmdet_maximum_version)), \ 36 | f'MMDET=={mmdet.__version__} is used but incompatible. ' \ 37 | f'Please install mmdet>={mmdet_minimum_version}, ' \ 38 | f'<={mmdet_maximum_version}.' 39 | 40 | mmseg_minimum_version = '0.20.0' 41 | mmseg_maximum_version = '1.0.0' 42 | mmseg_version = digit_version(mmseg.__version__) 43 | assert (mmseg_version >= digit_version(mmseg_minimum_version) 44 | and mmseg_version <= digit_version(mmseg_maximum_version)), \ 45 | f'MMSEG=={mmseg.__version__} is used but incompatible. ' \ 46 | f'Please install mmseg>={mmseg_minimum_version}, ' \ 47 | f'<={mmseg_maximum_version}.' 48 | 49 | __all__ = ['__version__', 'short_version'] 50 | -------------------------------------------------------------------------------- /mmdet3d/models/utils/mlp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import ConvModule 3 | from mmcv.runner import BaseModule 4 | from torch import nn as nn 5 | 6 | 7 | class MLP(BaseModule): 8 | """A simple MLP module. 9 | 10 | Pass features (B, C, N) through an MLP. 11 | 12 | Args: 13 | in_channels (int, optional): Number of channels of input features. 14 | Default: 18. 15 | conv_channels (tuple[int], optional): Out channels of the convolution. 16 | Default: (256, 256). 17 | conv_cfg (dict, optional): Config of convolution. 18 | Default: dict(type='Conv1d'). 19 | norm_cfg (dict, optional): Config of normalization. 20 | Default: dict(type='BN1d'). 21 | act_cfg (dict, optional): Config of activation. 22 | Default: dict(type='ReLU'). 23 | """ 24 | 25 | def __init__(self, 26 | in_channel=18, 27 | conv_channels=(256, 256), 28 | conv_cfg=dict(type='Conv1d'), 29 | norm_cfg=dict(type='BN1d'), 30 | act_cfg=dict(type='ReLU'), 31 | init_cfg=None): 32 | super().__init__(init_cfg=init_cfg) 33 | self.mlp = nn.Sequential() 34 | prev_channels = in_channel 35 | for i, conv_channel in enumerate(conv_channels): 36 | self.mlp.add_module( 37 | f'layer{i}', 38 | ConvModule( 39 | prev_channels, 40 | conv_channels[i], 41 | 1, 42 | padding=0, 43 | conv_cfg=conv_cfg, 44 | norm_cfg=norm_cfg, 45 | act_cfg=act_cfg, 46 | bias=True, 47 | inplace=True)) 48 | prev_channels = conv_channels[i] 49 | 50 | def forward(self, img_features): 51 | return self.mlp(img_features) 52 | -------------------------------------------------------------------------------- /data/sunrgbd/matlab/extract_rgbd_data_v1.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump SUNRGBD data to our format 7 | % for each sample, we have RGB image, 2d boxes. 8 | % point cloud (in camera coordinate), calibration and 3d boxes. 9 | % 10 | % Extract using V1 labels. 11 | % 12 | % Author: Charles R. Qi 13 | % 14 | clear; close all; clc; 15 | addpath(genpath('.')) 16 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 17 | %% V1 2D&3D BB and Seg masks 18 | load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/Metadata/SUNRGBDMeta.mat') 19 | % load('./Metadata/SUNRGBD2Dseg.mat') 20 | 21 | %% Create folders 22 | det_label_folder = '../sunrgbd_trainval/label_v1/'; 23 | mkdir(det_label_folder); 24 | %% Read 25 | for imageId = 1:10335 26 | imageId 27 | try 28 | data = SUNRGBDMeta(imageId); 29 | data.depthpath(1:16) = ''; 30 | data.depthpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.depthpath); 31 | data.rgbpath(1:16) = ''; 32 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.rgbpath); 33 | 34 | % MAT files are 3x smaller than TXT files. In Python we can use 35 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data. 36 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat'); 37 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt'); 38 | 39 | % Write 2D and 3D box label 40 | data2d = data; 41 | fid = fopen(strcat(det_label_folder, txt_filename), 'w'); 42 | for j = 1:length(data.groundtruth3DBB) 43 | centroid = data.groundtruth3DBB(j).centroid; 44 | classname = data.groundtruth3DBB(j).classname; 45 | orientation = data.groundtruth3DBB(j).orientation; 46 | coeffs = abs(data.groundtruth3DBB(j).coeffs); 47 | box2d = data2d.groundtruth2DBB(j).gtBb2D; 48 | fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2)); 49 | end 50 | fclose(fid); 51 | 52 | catch 53 | end 54 | 55 | end 56 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .compose import Compose 3 | from .dbsampler import DataBaseSampler 4 | from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D 5 | from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D, 6 | LoadMultiViewImageFromFiles, LoadPointsFromDict, 7 | LoadPointsFromFile, LoadPointsFromMultiSweeps, 8 | NormalizePointsColor, PointSegClassMapping) 9 | from .test_time_aug import MultiScaleFlipAug3D 10 | # yapf: disable 11 | from .transforms_3d import (AffineResize, BackgroundPointsFilter, 12 | GlobalAlignment, 13 | GlobalRotScaleTrans, 14 | IndoorPatchPointSample, IndoorPointSample, 15 | MultiViewWrapper, ObjectNameFilter, ObjectNoise, 16 | ObjectRangeFilter, ObjectSample, PointSample, 17 | PointShuffle, PointsRangeFilter, 18 | RandomDropPointsColor, RandomFlip3D, 19 | RandomJitterPoints, RandomRotate, RandomShiftScale, 20 | RangeLimitedRandomCrop) 21 | 22 | __all__ = [ 23 | 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 24 | 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 25 | 'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile', 26 | 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 27 | 'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample', 28 | 'PointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D', 29 | 'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter', 30 | 'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample', 31 | 'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor', 32 | 'RandomJitterPoints', 'AffineResize', 'RandomShiftScale', 33 | 'LoadPointsFromDict', 'MultiViewWrapper', 'RandomRotate', 34 | 'RangeLimitedRandomCrop' 35 | ] 36 | -------------------------------------------------------------------------------- /configs/votenet/votenet_lg3d_scannet.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/scannet-3d-18class.py', '../_base_/models/lg3d_votenet.py', 3 | '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | bbox_head=dict( 9 | num_classes=18, 10 | bbox_coder=dict( 11 | type='PartialBinBasedBBoxCoder', 12 | num_sizes=18, 13 | num_dir_bins=1, 14 | with_rot=False, 15 | mean_sizes=[[0.76966727, 0.8116021, 0.92573744], 16 | [1.876858, 1.8425595, 1.1931566], 17 | [0.61328, 0.6148609, 0.7182701], 18 | [1.3955007, 1.5121545, 0.83443564], 19 | [0.97949594, 1.0675149, 0.6329687], 20 | [0.531663, 0.5955577, 1.7500148], 21 | [0.9624706, 0.72462326, 1.1481868], 22 | [0.83221924, 1.0490936, 1.6875663], 23 | [0.21132214, 0.4206159, 0.5372846], 24 | [1.4440073, 1.8970833, 0.26985747], 25 | [1.0294262, 1.4040797, 0.87554324], 26 | [1.3766412, 0.65521795, 1.6813129], 27 | [0.6650819, 0.71111923, 1.298853], 28 | [0.41999173, 0.37906948, 1.7513971], 29 | [0.59359556, 0.5912492, 0.73919016], 30 | [0.50867593, 0.50656086, 0.30136237], 31 | [1.1511526, 1.0546296, 0.49706793], 32 | [0.47535285, 0.49249494, 0.5802117]]))) 33 | 34 | # yapf:disable 35 | log_config = dict(interval=30) 36 | # yapf:enable 37 | 38 | 39 | # optimizer 40 | # This schedule is mainly used by models on indoor dataset, 41 | # e.g., VoteNet on SUNRGBD and ScanNet 42 | lr = 0.004 # max learning rate 43 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 44 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 45 | lr_config = dict(policy='step', warmup=None, step=[48, 64]) 46 | # runtime settings 47 | runner = dict(type='EpochBasedRunner', max_epochs=72) 48 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/two_stage.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmdet.models import TwoStageDetector 5 | from ..builder import DETECTORS, build_backbone, build_head, build_neck 6 | from .base import Base3DDetector 7 | 8 | 9 | @DETECTORS.register_module() 10 | class TwoStage3DDetector(Base3DDetector, TwoStageDetector): 11 | """Base class of two-stage 3D detector. 12 | 13 | It inherits original ``:class:TwoStageDetector`` and 14 | ``:class:Base3DDetector``. This class could serve as a base class for all 15 | two-stage 3D detectors. 16 | """ 17 | 18 | def __init__(self, 19 | backbone, 20 | neck=None, 21 | rpn_head=None, 22 | roi_head=None, 23 | train_cfg=None, 24 | test_cfg=None, 25 | pretrained=None, 26 | init_cfg=None): 27 | super(TwoStageDetector, self).__init__(init_cfg) 28 | if pretrained: 29 | warnings.warn('DeprecationWarning: pretrained is deprecated, ' 30 | 'please use "init_cfg" instead') 31 | backbone.pretrained = pretrained 32 | self.backbone = build_backbone(backbone) 33 | self.train_cfg = train_cfg 34 | self.test_cfg = test_cfg 35 | if neck is not None: 36 | self.neck = build_neck(neck) 37 | 38 | if rpn_head is not None: 39 | rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None 40 | rpn_head_ = rpn_head.copy() 41 | rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn) 42 | self.rpn_head = build_head(rpn_head_) 43 | 44 | if roi_head is not None: 45 | # update train and test cfg here for now 46 | # TODO: refactor assigner & sampler 47 | rcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else None 48 | roi_head.update(train_cfg=rcnn_train_cfg) 49 | roi_head.update(test_cfg=test_cfg.rcnn) 50 | roi_head.pretrained = pretrained 51 | self.roi_head = build_head(roi_head) 52 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/dgcnn_fp_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import ConvModule 3 | from mmcv.runner import BaseModule, force_fp32 4 | from torch import nn as nn 5 | 6 | 7 | class DGCNNFPModule(BaseModule): 8 | """Point feature propagation module used in DGCNN. 9 | 10 | Propagate the features from one set to another. 11 | 12 | Args: 13 | mlp_channels (list[int]): List of mlp channels. 14 | norm_cfg (dict, optional): Type of activation method. 15 | Defaults to dict(type='BN1d'). 16 | act_cfg (dict, optional): Type of activation method. 17 | Defaults to dict(type='ReLU'). 18 | init_cfg (dict, optional): Initialization config. Defaults to None. 19 | """ 20 | 21 | def __init__(self, 22 | mlp_channels, 23 | norm_cfg=dict(type='BN1d'), 24 | act_cfg=dict(type='ReLU'), 25 | init_cfg=None): 26 | super().__init__(init_cfg=init_cfg) 27 | self.fp16_enabled = False 28 | self.mlps = nn.Sequential() 29 | for i in range(len(mlp_channels) - 1): 30 | self.mlps.add_module( 31 | f'layer{i}', 32 | ConvModule( 33 | mlp_channels[i], 34 | mlp_channels[i + 1], 35 | kernel_size=(1, ), 36 | stride=(1, ), 37 | conv_cfg=dict(type='Conv1d'), 38 | norm_cfg=norm_cfg, 39 | act_cfg=act_cfg)) 40 | 41 | @force_fp32() 42 | def forward(self, points): 43 | """forward. 44 | 45 | Args: 46 | points (Tensor): (B, N, C) tensor of the input points. 47 | 48 | Returns: 49 | Tensor: (B, N, M) M = mlp[-1], tensor of the new points. 50 | """ 51 | 52 | if points is not None: 53 | new_points = points.transpose(1, 2).contiguous() # (B, C, N) 54 | new_points = self.mlps(new_points) 55 | new_points = new_points.transpose(1, 2).contiguous() 56 | else: 57 | new_points = points 58 | 59 | return new_points 60 | -------------------------------------------------------------------------------- /tools/deployment/test_torchserver.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import numpy as np 4 | import requests 5 | 6 | from mmdet3d.apis import inference_detector, init_model 7 | 8 | 9 | def parse_args(): 10 | parser = ArgumentParser() 11 | parser.add_argument('pcd', help='Point cloud file') 12 | parser.add_argument('config', help='Config file') 13 | parser.add_argument('checkpoint', help='Checkpoint file') 14 | parser.add_argument('model_name', help='The model name in the server') 15 | parser.add_argument( 16 | '--inference-addr', 17 | default='127.0.0.1:8080', 18 | help='Address and port of the inference server') 19 | parser.add_argument( 20 | '--device', default='cuda:0', help='Device used for inference') 21 | parser.add_argument( 22 | '--score-thr', type=float, default=0.5, help='3d bbox score threshold') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def parse_result(input): 28 | bbox = input[0]['3dbbox'] 29 | result = np.array(bbox) 30 | return result 31 | 32 | 33 | def main(args): 34 | # build the model from a config file and a checkpoint file 35 | model = init_model(args.config, args.checkpoint, device=args.device) 36 | # test a single point cloud file 37 | model_result, _ = inference_detector(model, args.pcd) 38 | # filter the 3d bboxes whose scores > 0.5 39 | if 'pts_bbox' in model_result[0].keys(): 40 | pred_bboxes = model_result[0]['pts_bbox']['boxes_3d'].tensor.numpy() 41 | pred_scores = model_result[0]['pts_bbox']['scores_3d'].numpy() 42 | else: 43 | pred_bboxes = model_result[0]['boxes_3d'].tensor.numpy() 44 | pred_scores = model_result[0]['scores_3d'].numpy() 45 | model_result = pred_bboxes[pred_scores > 0.5] 46 | 47 | url = 'http://' + args.inference_addr + '/predictions/' + args.model_name 48 | with open(args.pcd, 'rb') as points: 49 | response = requests.post(url, points) 50 | server_result = parse_result(response.json()) 51 | assert np.allclose(model_result, server_result) 52 | 53 | 54 | if __name__ == '__main__': 55 | args = parse_args() 56 | main(args) 57 | -------------------------------------------------------------------------------- /mmdet3d/datasets/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import platform 3 | 4 | from mmcv.utils import Registry, build_from_cfg 5 | 6 | from mmdet.datasets import DATASETS as MMDET_DATASETS 7 | from mmdet.datasets.builder import _concat_dataset 8 | 9 | if platform.system() != 'Windows': 10 | # https://github.com/pytorch/pytorch/issues/973 11 | import resource 12 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 13 | base_soft_limit = rlimit[0] 14 | hard_limit = rlimit[1] 15 | soft_limit = min(max(4096, base_soft_limit), hard_limit) 16 | resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) 17 | 18 | OBJECTSAMPLERS = Registry('Object sampler') 19 | DATASETS = Registry('dataset') 20 | PIPELINES = Registry('pipeline') 21 | 22 | 23 | def build_dataset(cfg, default_args=None): 24 | from mmdet3d.datasets.dataset_wrappers import CBGSDataset 25 | from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset, 26 | ConcatDataset, RepeatDataset) 27 | if isinstance(cfg, (list, tuple)): 28 | dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) 29 | elif cfg['type'] == 'ConcatDataset': 30 | dataset = ConcatDataset( 31 | [build_dataset(c, default_args) for c in cfg['datasets']], 32 | cfg.get('separate_eval', True)) 33 | elif cfg['type'] == 'RepeatDataset': 34 | dataset = RepeatDataset( 35 | build_dataset(cfg['dataset'], default_args), cfg['times']) 36 | elif cfg['type'] == 'ClassBalancedDataset': 37 | dataset = ClassBalancedDataset( 38 | build_dataset(cfg['dataset'], default_args), cfg['oversample_thr']) 39 | elif cfg['type'] == 'CBGSDataset': 40 | dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args)) 41 | elif isinstance(cfg.get('ann_file'), (list, tuple)): 42 | dataset = _concat_dataset(cfg, default_args) 43 | elif cfg['type'] in DATASETS._module_dict.keys(): 44 | dataset = build_from_cfg(cfg, DATASETS, default_args) 45 | else: 46 | dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args) 47 | return dataset 48 | -------------------------------------------------------------------------------- /mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv import ops 4 | from mmcv.runner import BaseModule 5 | 6 | from mmdet3d.models.builder import ROI_EXTRACTORS 7 | 8 | 9 | @ROI_EXTRACTORS.register_module() 10 | class Single3DRoIAwareExtractor(BaseModule): 11 | """Point-wise roi-aware Extractor. 12 | 13 | Extract Point-wise roi features. 14 | 15 | Args: 16 | roi_layer (dict): The config of roi layer. 17 | """ 18 | 19 | def __init__(self, roi_layer=None, init_cfg=None): 20 | super(Single3DRoIAwareExtractor, self).__init__(init_cfg=init_cfg) 21 | self.roi_layer = self.build_roi_layers(roi_layer) 22 | 23 | def build_roi_layers(self, layer_cfg): 24 | """Build roi layers using `layer_cfg`""" 25 | cfg = layer_cfg.copy() 26 | layer_type = cfg.pop('type') 27 | assert hasattr(ops, layer_type) 28 | layer_cls = getattr(ops, layer_type) 29 | roi_layers = layer_cls(**cfg) 30 | return roi_layers 31 | 32 | def forward(self, feats, coordinate, batch_inds, rois): 33 | """Extract point-wise roi features. 34 | 35 | Args: 36 | feats (torch.FloatTensor): Point-wise features with 37 | shape (batch, npoints, channels) for pooling. 38 | coordinate (torch.FloatTensor): Coordinate of each point. 39 | batch_inds (torch.LongTensor): Indicate the batch of each point. 40 | rois (torch.FloatTensor): Roi boxes with batch indices. 41 | 42 | Returns: 43 | torch.FloatTensor: Pooled features 44 | """ 45 | pooled_roi_feats = [] 46 | for batch_idx in range(int(batch_inds.max()) + 1): 47 | roi_inds = (rois[..., 0].int() == batch_idx) 48 | coors_inds = (batch_inds.int() == batch_idx) 49 | pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds], 50 | coordinate[coors_inds], 51 | feats[coors_inds]) 52 | pooled_roi_feats.append(pooled_roi_feat) 53 | pooled_roi_feats = torch.cat(pooled_roi_feats, 0) 54 | return pooled_roi_feats 55 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import collections 3 | 4 | from mmcv.utils import build_from_cfg 5 | 6 | from mmdet.datasets.builder import PIPELINES as MMDET_PIPELINES 7 | from ..builder import PIPELINES 8 | 9 | 10 | @PIPELINES.register_module() 11 | class Compose: 12 | """Compose multiple transforms sequentially. The pipeline registry of 13 | mmdet3d separates with mmdet, however, sometimes we may need to use mmdet's 14 | pipeline. So the class is rewritten to be able to use pipelines from both 15 | mmdet3d and mmdet. 16 | 17 | Args: 18 | transforms (Sequence[dict | callable]): Sequence of transform object or 19 | config dict to be composed. 20 | """ 21 | 22 | def __init__(self, transforms): 23 | assert isinstance(transforms, collections.abc.Sequence) 24 | self.transforms = [] 25 | for transform in transforms: 26 | if isinstance(transform, dict): 27 | _, key = PIPELINES.split_scope_key(transform['type']) 28 | if key in PIPELINES._module_dict.keys(): 29 | transform = build_from_cfg(transform, PIPELINES) 30 | else: 31 | transform = build_from_cfg(transform, MMDET_PIPELINES) 32 | self.transforms.append(transform) 33 | elif callable(transform): 34 | self.transforms.append(transform) 35 | else: 36 | raise TypeError('transform must be callable or a dict') 37 | 38 | def __call__(self, data): 39 | """Call function to apply transforms sequentially. 40 | 41 | Args: 42 | data (dict): A result dict contains the data to transform. 43 | 44 | Returns: 45 | dict: Transformed data. 46 | """ 47 | 48 | for t in self.transforms: 49 | data = t(data) 50 | if data is None: 51 | return None 52 | return data 53 | 54 | def __repr__(self): 55 | format_string = self.__class__.__name__ + '(' 56 | for t in self.transforms: 57 | format_string += '\n' 58 | format_string += f' {t}' 59 | format_string += '\n)' 60 | return format_string 61 | -------------------------------------------------------------------------------- /mmdet3d/models/decode_heads/dgcnn_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn.bricks import ConvModule 3 | 4 | from mmdet3d.ops import DGCNNFPModule 5 | from ..builder import HEADS 6 | from .decode_head import Base3DDecodeHead 7 | 8 | 9 | @HEADS.register_module() 10 | class DGCNNHead(Base3DDecodeHead): 11 | r"""DGCNN decoder head. 12 | 13 | Decoder head used in `DGCNN `_. 14 | Refer to the 15 | `reimplementation code `_. 16 | 17 | Args: 18 | fp_channels (tuple[int], optional): Tuple of mlp channels in feature 19 | propagation (FP) modules. Defaults to (1216, 512). 20 | """ 21 | 22 | def __init__(self, fp_channels=(1216, 512), **kwargs): 23 | super(DGCNNHead, self).__init__(**kwargs) 24 | 25 | self.FP_module = DGCNNFPModule( 26 | mlp_channels=fp_channels, act_cfg=self.act_cfg) 27 | 28 | # https://github.com/charlesq34/pointnet2/blob/master/models/pointnet2_sem_seg.py#L40 29 | self.pre_seg_conv = ConvModule( 30 | fp_channels[-1], 31 | self.channels, 32 | kernel_size=1, 33 | bias=False, 34 | conv_cfg=self.conv_cfg, 35 | norm_cfg=self.norm_cfg, 36 | act_cfg=self.act_cfg) 37 | 38 | def _extract_input(self, feat_dict): 39 | """Extract inputs from features dictionary. 40 | 41 | Args: 42 | feat_dict (dict): Feature dict from backbone. 43 | 44 | Returns: 45 | torch.Tensor: points for decoder. 46 | """ 47 | fa_points = feat_dict['fa_points'] 48 | 49 | return fa_points 50 | 51 | def forward(self, feat_dict): 52 | """Forward pass. 53 | 54 | Args: 55 | feat_dict (dict): Feature dict from backbone. 56 | 57 | Returns: 58 | torch.Tensor: Segmentation map of shape [B, num_classes, N]. 59 | """ 60 | fa_points = self._extract_input(feat_dict) 61 | 62 | fp_points = self.FP_module(fa_points) 63 | fp_points = fp_points.transpose(1, 2).contiguous() 64 | output = self.pre_seg_conv(fp_points) 65 | output = self.cls_seg(output) 66 | 67 | return output 68 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/mvx_faster_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.runner import force_fp32 4 | from torch.nn import functional as F 5 | 6 | from ..builder import DETECTORS 7 | from .mvx_two_stage import MVXTwoStageDetector 8 | 9 | 10 | @DETECTORS.register_module() 11 | class MVXFasterRCNN(MVXTwoStageDetector): 12 | """Multi-modality VoxelNet using Faster R-CNN.""" 13 | 14 | def __init__(self, **kwargs): 15 | super(MVXFasterRCNN, self).__init__(**kwargs) 16 | 17 | 18 | @DETECTORS.register_module() 19 | class DynamicMVXFasterRCNN(MVXTwoStageDetector): 20 | """Multi-modality VoxelNet using Faster R-CNN and dynamic voxelization.""" 21 | 22 | def __init__(self, **kwargs): 23 | super(DynamicMVXFasterRCNN, self).__init__(**kwargs) 24 | 25 | @torch.no_grad() 26 | @force_fp32() 27 | def voxelize(self, points): 28 | """Apply dynamic voxelization to points. 29 | 30 | Args: 31 | points (list[torch.Tensor]): Points of each sample. 32 | 33 | Returns: 34 | tuple[torch.Tensor]: Concatenated points and coordinates. 35 | """ 36 | coors = [] 37 | # dynamic voxelization only provide a coors mapping 38 | for res in points: 39 | res_coors = self.pts_voxel_layer(res) 40 | coors.append(res_coors) 41 | points = torch.cat(points, dim=0) 42 | coors_batch = [] 43 | for i, coor in enumerate(coors): 44 | coor_pad = F.pad(coor, (1, 0), mode='constant', value=i) 45 | coors_batch.append(coor_pad) 46 | coors_batch = torch.cat(coors_batch, dim=0) 47 | return points, coors_batch 48 | 49 | def extract_pts_feat(self, points, img_feats, img_metas): 50 | """Extract point features.""" 51 | if not self.with_pts_bbox: 52 | return None 53 | voxels, coors = self.voxelize(points) 54 | voxel_features, feature_coors = self.pts_voxel_encoder( 55 | voxels, coors, points, img_feats, img_metas) 56 | batch_size = coors[-1, 0] + 1 57 | x = self.pts_middle_encoder(voxel_features, feature_coors, batch_size) 58 | x = self.pts_backbone(x) 59 | if self.with_pts_neck: 60 | x = self.pts_neck(x) 61 | return x 62 | -------------------------------------------------------------------------------- /configs/votenet/metafile.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Name: VoteNet 3 | Metadata: 4 | Training Techniques: 5 | - AdamW 6 | Training Resources: 8x V100 GPUs 7 | Architecture: 8 | - PointNet++ 9 | Paper: 10 | URL: https://arxiv.org/abs/1904.09664 11 | Title: 'Deep Hough Voting for 3D Object Detection in Point Clouds' 12 | README: configs/votenet/README.md 13 | Code: 14 | URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/votenet.py#L10 15 | Version: v0.5.0 16 | 17 | Models: 18 | - Name: votenet_16x8_sunrgbd-3d-10class 19 | In Collection: VoteNet 20 | Config: configs/votenet/votenet_16x8_sunrgbd-3d-10class.py 21 | Metadata: 22 | Training Data: SUNRGBD 23 | Training Memory (GB): 8.1 24 | Results: 25 | - Task: 3D Object Detection 26 | Dataset: SUNRGBD 27 | Metrics: 28 | AP@0.25: 59.78 29 | AP@0.5: 35.77 30 | Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_16x8_sunrgbd-3d-10class/votenet_16x8_sunrgbd-3d-10class_20210820_162823-bf11f014.pth 31 | 32 | - Name: votenet_8x8_scannet-3d-18class 33 | In Collection: VoteNet 34 | Config: configs/votenet/votenet_8x8_scannet-3d-18class.py 35 | Metadata: 36 | Training Data: ScanNet 37 | Training Memory (GB): 4.1 38 | Results: 39 | - Task: 3D Object Detection 40 | Dataset: ScanNet 41 | Metrics: 42 | AP@0.25: 62.34 43 | AP@0.5: 40.82 44 | Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20210823_234503-cf8134fa.pth 45 | 46 | - Name: votenet_iouloss_8x8_scannet-3d-18class 47 | In Collection: VoteNet 48 | Config: configs/votenet/votenet_iouloss_8x8_scannet-3d-18class.py 49 | Metadata: 50 | Training Data: ScanNet 51 | Training Memory (GB): 4.1 52 | Architecture: 53 | - IoU Loss 54 | Results: 55 | - Task: 3D Object Detection 56 | Dataset: ScanNet 57 | Metrics: 58 | AP@0.25: 63.81 59 | AP@0.5: 44.21 60 | Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20210823_234503-cf8134fa.pth 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.ipynb 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/en/_build/ 69 | docs/zh_cn/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # cython generated cpp 109 | # data 110 | .vscode 111 | .idea 112 | 113 | # custom 114 | *.pkl 115 | *.pkl.json 116 | *.log.json 117 | work_dirs/ 118 | exps/ 119 | *~ 120 | mmdet3d/.mim 121 | 122 | # Pytorch 123 | *.pth 124 | 125 | # demo 126 | *.jpg 127 | # *.png 128 | data/s3dis/Stanford3dDataset_v1.2_Aligned_Version/ 129 | data/scannet/scans/ 130 | data/sunrgbd/OFFICIAL_SUNRGBD/ 131 | *.obj 132 | *.ply 133 | 134 | # Waymo evaluation 135 | mmdet3d/core/evaluation/waymo_utils/compute_detection_metrics_main 136 | 137 | log/* -------------------------------------------------------------------------------- /mmdet3d/models/decode_heads/paconv_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn.bricks import ConvModule 3 | 4 | from ..builder import HEADS 5 | from .pointnet2_head import PointNet2Head 6 | 7 | 8 | @HEADS.register_module() 9 | class PAConvHead(PointNet2Head): 10 | r"""PAConv decoder head. 11 | 12 | Decoder head used in `PAConv `_. 13 | Refer to the `official code `_. 14 | 15 | Args: 16 | fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules. 17 | fp_norm_cfg (dict): Config of norm layers used in FP modules. 18 | Default: dict(type='BN2d'). 19 | """ 20 | 21 | def __init__(self, 22 | fp_channels=((768, 256, 256), (384, 256, 256), 23 | (320, 256, 128), (128 + 6, 128, 128, 128)), 24 | fp_norm_cfg=dict(type='BN2d'), 25 | **kwargs): 26 | super(PAConvHead, self).__init__(fp_channels, fp_norm_cfg, **kwargs) 27 | 28 | # https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/pointnet2/pointnet2_paconv_seg.py#L53 29 | # PointNet++'s decoder conv has bias while PAConv's doesn't have 30 | # so we need to rebuild it here 31 | self.pre_seg_conv = ConvModule( 32 | fp_channels[-1][-1], 33 | self.channels, 34 | kernel_size=1, 35 | bias=False, 36 | conv_cfg=self.conv_cfg, 37 | norm_cfg=self.norm_cfg, 38 | act_cfg=self.act_cfg) 39 | 40 | def forward(self, feat_dict): 41 | """Forward pass. 42 | 43 | Args: 44 | feat_dict (dict): Feature dict from backbone. 45 | 46 | Returns: 47 | torch.Tensor: Segmentation map of shape [B, num_classes, N]. 48 | """ 49 | sa_xyz, sa_features = self._extract_input(feat_dict) 50 | 51 | # PointNet++ doesn't use the first level of `sa_features` as input 52 | # while PAConv inputs it through skip-connection 53 | fp_feature = sa_features[-1] 54 | 55 | for i in range(self.num_fp): 56 | # consume the points in a bottom-up manner 57 | fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)], 58 | sa_features[-(i + 2)], fp_feature) 59 | 60 | output = self.pre_seg_conv(fp_feature) 61 | output = self.cls_seg(output) 62 | 63 | return output 64 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import torch 5 | from mmcv.runner import save_checkpoint 6 | from torch import nn as nn 7 | 8 | from mmdet3d.apis import init_model 9 | 10 | 11 | def fuse_conv_bn(conv, bn): 12 | """During inference, the functionary of batch norm layers is turned off but 13 | only the mean and var alone channels are used, which exposes the chance to 14 | fuse it with the preceding conv layers to save computations and simplify 15 | network structures.""" 16 | conv_w = conv.weight 17 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 18 | bn.running_mean) 19 | 20 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 21 | conv.weight = nn.Parameter(conv_w * 22 | factor.reshape([conv.out_channels, 1, 1, 1])) 23 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 24 | return conv 25 | 26 | 27 | def fuse_module(m): 28 | last_conv = None 29 | last_conv_name = None 30 | 31 | for name, child in m.named_children(): 32 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 33 | if last_conv is None: # only fuse BN that is after Conv 34 | continue 35 | fused_conv = fuse_conv_bn(last_conv, child) 36 | m._modules[last_conv_name] = fused_conv 37 | # To reduce changes, set BN as Identity instead of deleting it. 38 | m._modules[name] = nn.Identity() 39 | last_conv = None 40 | elif isinstance(child, nn.Conv2d): 41 | last_conv = child 42 | last_conv_name = name 43 | else: 44 | fuse_module(child) 45 | return m 46 | 47 | 48 | def parse_args(): 49 | parser = argparse.ArgumentParser( 50 | description='fuse Conv and BN layers in a model') 51 | parser.add_argument('config', help='config file path') 52 | parser.add_argument('checkpoint', help='checkpoint file path') 53 | parser.add_argument('out', help='output path of the converted model') 54 | args = parser.parse_args() 55 | return args 56 | 57 | 58 | def main(): 59 | args = parse_args() 60 | # build the model from a config file and a checkpoint file 61 | model = init_model(args.config, args.checkpoint) 62 | # fuse conv and bn layers of the model 63 | fused_model = fuse_module(model) 64 | save_checkpoint(fused_model, args.out) 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/dgcnn_fa_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.cnn import ConvModule 4 | from mmcv.runner import BaseModule, force_fp32 5 | from torch import nn as nn 6 | 7 | 8 | class DGCNNFAModule(BaseModule): 9 | """Point feature aggregation module used in DGCNN. 10 | 11 | Aggregate all the features of points. 12 | 13 | Args: 14 | mlp_channels (list[int]): List of mlp channels. 15 | norm_cfg (dict, optional): Type of normalization method. 16 | Defaults to dict(type='BN1d'). 17 | act_cfg (dict, optional): Type of activation method. 18 | Defaults to dict(type='ReLU'). 19 | init_cfg (dict, optional): Initialization config. Defaults to None. 20 | """ 21 | 22 | def __init__(self, 23 | mlp_channels, 24 | norm_cfg=dict(type='BN1d'), 25 | act_cfg=dict(type='ReLU'), 26 | init_cfg=None): 27 | super().__init__(init_cfg=init_cfg) 28 | self.fp16_enabled = False 29 | self.mlps = nn.Sequential() 30 | for i in range(len(mlp_channels) - 1): 31 | self.mlps.add_module( 32 | f'layer{i}', 33 | ConvModule( 34 | mlp_channels[i], 35 | mlp_channels[i + 1], 36 | kernel_size=(1, ), 37 | stride=(1, ), 38 | conv_cfg=dict(type='Conv1d'), 39 | norm_cfg=norm_cfg, 40 | act_cfg=act_cfg)) 41 | 42 | @force_fp32() 43 | def forward(self, points): 44 | """forward. 45 | 46 | Args: 47 | points (List[Tensor]): tensor of the features to be aggregated. 48 | 49 | Returns: 50 | Tensor: (B, N, M) M = mlp[-1], tensor of the output points. 51 | """ 52 | 53 | if len(points) > 1: 54 | new_points = torch.cat(points[1:], dim=-1) 55 | new_points = new_points.transpose(1, 2).contiguous() # (B, C, N) 56 | new_points_copy = new_points 57 | 58 | new_points = self.mlps(new_points) 59 | 60 | new_fa_points = new_points.max(dim=-1, keepdim=True)[0] 61 | new_fa_points = new_fa_points.repeat(1, 1, new_points.shape[-1]) 62 | 63 | new_points = torch.cat([new_fa_points, new_points_copy], dim=1) 64 | new_points = new_points.transpose(1, 2).contiguous() 65 | else: 66 | new_points = points 67 | 68 | return new_points 69 | -------------------------------------------------------------------------------- /mmdet3d/core/points/depth_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class DepthPoints(BasePoints): 6 | """Points of instances in DEPTH coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(DepthPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 0] = -self.tensor[:, 0] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 1] = -self.tensor[:, 1] 39 | 40 | def convert_to(self, dst, rt_mat=None): 41 | """Convert self to ``dst`` mode. 42 | 43 | Args: 44 | dst (:obj:`CoordMode`): The target Point mode. 45 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 46 | translation matrix between different coordinates. 47 | Defaults to None. 48 | The conversion from `src` coordinates to `dst` coordinates 49 | usually comes along the change of sensors, e.g., from camera 50 | to LiDAR. This requires a transformation matrix. 51 | 52 | Returns: 53 | :obj:`BasePoints`: The converted point of the same type 54 | in the `dst` mode. 55 | """ 56 | from mmdet3d.core.bbox import Coord3DMode 57 | return Coord3DMode.convert_point( 58 | point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat) 59 | -------------------------------------------------------------------------------- /mmdet3d/core/points/lidar_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class LiDARPoints(BasePoints): 6 | """Points of instances in LIDAR coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(LiDARPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 1] = -self.tensor[:, 1] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 0] = -self.tensor[:, 0] 39 | 40 | def convert_to(self, dst, rt_mat=None): 41 | """Convert self to ``dst`` mode. 42 | 43 | Args: 44 | dst (:obj:`CoordMode`): The target Point mode. 45 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 46 | translation matrix between different coordinates. 47 | Defaults to None. 48 | The conversion from `src` coordinates to `dst` coordinates 49 | usually comes along the change of sensors, e.g., from camera 50 | to LiDAR. This requires a transformation matrix. 51 | 52 | Returns: 53 | :obj:`BasePoints`: The converted point of the same type 54 | in the `dst` mode. 55 | """ 56 | from mmdet3d.core.bbox import Coord3DMode 57 | return Coord3DMode.convert_point( 58 | point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat) 59 | -------------------------------------------------------------------------------- /mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv import ops 4 | from torch import nn as nn 5 | 6 | from mmdet3d.core.bbox.structures import rotation_3d_in_axis 7 | from mmdet3d.models.builder import ROI_EXTRACTORS 8 | 9 | 10 | @ROI_EXTRACTORS.register_module() 11 | class Single3DRoIPointExtractor(nn.Module): 12 | """Point-wise roi-aware Extractor. 13 | 14 | Extract Point-wise roi features. 15 | 16 | Args: 17 | roi_layer (dict): The config of roi layer. 18 | """ 19 | 20 | def __init__(self, roi_layer=None): 21 | super(Single3DRoIPointExtractor, self).__init__() 22 | self.roi_layer = self.build_roi_layers(roi_layer) 23 | 24 | def build_roi_layers(self, layer_cfg): 25 | """Build roi layers using `layer_cfg`""" 26 | cfg = layer_cfg.copy() 27 | layer_type = cfg.pop('type') 28 | assert hasattr(ops, layer_type) 29 | layer_cls = getattr(ops, layer_type) 30 | roi_layers = layer_cls(**cfg) 31 | return roi_layers 32 | 33 | def forward(self, feats, coordinate, batch_inds, rois): 34 | """Extract point-wise roi features. 35 | 36 | Args: 37 | feats (torch.FloatTensor): Point-wise features with 38 | shape (batch, npoints, channels) for pooling. 39 | coordinate (torch.FloatTensor): Coordinate of each point. 40 | batch_inds (torch.LongTensor): Indicate the batch of each point. 41 | rois (torch.FloatTensor): Roi boxes with batch indices. 42 | 43 | Returns: 44 | torch.FloatTensor: Pooled features 45 | """ 46 | rois = rois[..., 1:] 47 | rois = rois.view(batch_inds, -1, rois.shape[-1]) 48 | with torch.no_grad(): 49 | pooled_roi_feat, pooled_empty_flag = self.roi_layer( 50 | coordinate, feats, rois) 51 | 52 | # canonical transformation 53 | roi_center = rois[:, :, 0:3] 54 | pooled_roi_feat[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2) 55 | pooled_roi_feat = pooled_roi_feat.view(-1, 56 | pooled_roi_feat.shape[-2], 57 | pooled_roi_feat.shape[-1]) 58 | pooled_roi_feat[:, :, 0:3] = rotation_3d_in_axis( 59 | pooled_roi_feat[:, :, 0:3], 60 | -(rois.view(-1, rois.shape[-1])[:, 6]), 61 | axis=2) 62 | pooled_roi_feat[pooled_empty_flag.view(-1) > 0] = 0 63 | 64 | return pooled_roi_feat 65 | -------------------------------------------------------------------------------- /mmdet3d/models/detectors/dynamic_voxelnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.runner import force_fp32 4 | from torch.nn import functional as F 5 | 6 | from ..builder import DETECTORS 7 | from .voxelnet import VoxelNet 8 | 9 | 10 | @DETECTORS.register_module() 11 | class DynamicVoxelNet(VoxelNet): 12 | r"""VoxelNet using `dynamic voxelization 13 | `_. 14 | """ 15 | 16 | def __init__(self, 17 | voxel_layer, 18 | voxel_encoder, 19 | middle_encoder, 20 | backbone, 21 | neck=None, 22 | bbox_head=None, 23 | train_cfg=None, 24 | test_cfg=None, 25 | pretrained=None, 26 | init_cfg=None): 27 | super(DynamicVoxelNet, self).__init__( 28 | voxel_layer=voxel_layer, 29 | voxel_encoder=voxel_encoder, 30 | middle_encoder=middle_encoder, 31 | backbone=backbone, 32 | neck=neck, 33 | bbox_head=bbox_head, 34 | train_cfg=train_cfg, 35 | test_cfg=test_cfg, 36 | pretrained=pretrained, 37 | init_cfg=init_cfg) 38 | 39 | def extract_feat(self, points, img_metas): 40 | """Extract features from points.""" 41 | voxels, coors = self.voxelize(points) 42 | voxel_features, feature_coors = self.voxel_encoder(voxels, coors) 43 | batch_size = coors[-1, 0].item() + 1 44 | x = self.middle_encoder(voxel_features, feature_coors, batch_size) 45 | x = self.backbone(x) 46 | if self.with_neck: 47 | x = self.neck(x) 48 | return x 49 | 50 | @torch.no_grad() 51 | @force_fp32() 52 | def voxelize(self, points): 53 | """Apply dynamic voxelization to points. 54 | 55 | Args: 56 | points (list[torch.Tensor]): Points of each sample. 57 | 58 | Returns: 59 | tuple[torch.Tensor]: Concatenated points and coordinates. 60 | """ 61 | coors = [] 62 | # dynamic voxelization only provide a coors mapping 63 | for res in points: 64 | res_coors = self.voxel_layer(res) 65 | coors.append(res_coors) 66 | points = torch.cat(points, dim=0) 67 | coors_batch = [] 68 | for i, coor in enumerate(coors): 69 | coor_pad = F.pad(coor, (1, 0), mode='constant', value=i) 70 | coors_batch.append(coor_pad) 71 | coors_batch = torch.cat(coors_batch, dim=0) 72 | return points, coors_batch 73 | -------------------------------------------------------------------------------- /mmdet3d/utils/setup_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | import platform 4 | import warnings 5 | 6 | import cv2 7 | from torch import multiprocessing as mp 8 | 9 | 10 | def setup_multi_processes(cfg): 11 | """Setup multi-processing environment variables.""" 12 | # set multi-process start method as `fork` to speed up the training 13 | if platform.system() != 'Windows': 14 | mp_start_method = cfg.get('mp_start_method', 'fork') 15 | current_method = mp.get_start_method(allow_none=True) 16 | if current_method is not None and current_method != mp_start_method: 17 | warnings.warn( 18 | f'Multi-processing start method `{mp_start_method}` is ' 19 | f'different from the previous setting `{current_method}`.' 20 | f'It will be force set to `{mp_start_method}`. You can change ' 21 | f'this behavior by changing `mp_start_method` in your config.') 22 | mp.set_start_method(mp_start_method, force=True) 23 | 24 | # disable opencv multithreading to avoid system being overloaded 25 | opencv_num_threads = cfg.get('opencv_num_threads', 0) 26 | cv2.setNumThreads(opencv_num_threads) 27 | 28 | # setup OMP threads 29 | # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py # noqa 30 | workers_per_gpu = cfg.data.get('workers_per_gpu', 1) 31 | if 'train_dataloader' in cfg.data: 32 | workers_per_gpu = \ 33 | max(cfg.data.train_dataloader.get('workers_per_gpu', 1), 34 | workers_per_gpu) 35 | 36 | if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1: 37 | omp_num_threads = 1 38 | warnings.warn( 39 | f'Setting OMP_NUM_THREADS environment variable for each process ' 40 | f'to be {omp_num_threads} in default, to avoid your system being ' 41 | f'overloaded, please further tune the variable for optimal ' 42 | f'performance in your application as needed.') 43 | os.environ['OMP_NUM_THREADS'] = str(omp_num_threads) 44 | 45 | # setup MKL threads 46 | if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1: 47 | mkl_num_threads = 1 48 | warnings.warn( 49 | f'Setting MKL_NUM_THREADS environment variable for each process ' 50 | f'to be {mkl_num_threads} in default, to avoid your system being ' 51 | f'overloaded, please further tune the variable for optimal ' 52 | f'performance in your application as needed.') 53 | os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads) 54 | -------------------------------------------------------------------------------- /data/scannet/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare ScanNet Data for Indoor Detection or Segmentation Task 2 | 3 | We follow the procedure in [votenet](https://github.com/facebookresearch/votenet/). 4 | 5 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Link or move the 'scans' folder to this level of directory. If you are performing segmentation tasks and want to upload the results to its official [benchmark](http://kaldir.vc.in.tum.de/scannet_benchmark/), please also link or move the 'scans_test' folder to this directory. 6 | 7 | 2. In this directory, extract point clouds and annotations by running `python batch_load_scannet_data.py`. Add the `--max_num_point 50000` flag if you only use the ScanNet data for the detection task. It will downsample the scenes to less points. 8 | 9 | 3. In this directory, extract RGB image with poses by running `python extract_posed_images.py`. This step is optional. Skip it if you don't plan to use multi-view RGB images. Add `--max-images-per-scene -1` to disable limiting number of images per scene. ScanNet scenes contain up to 5000+ frames per each. After extraction, all the .jpg images require 2 Tb disk space. The recommended 300 images per scene require less then 100 Gb. For example multi-view 3d detector ImVoxelNet samples 50 and 100 images per training and test scene. 10 | 11 | 4. Enter the project root directory, generate training data by running 12 | 13 | ```bash 14 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet 15 | ``` 16 | 17 | The overall process could be achieved through the following script 18 | 19 | ```bash 20 | python batch_load_scannet_data.py 21 | python extract_posed_images.py 22 | cd ../.. 23 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet 24 | ``` 25 | 26 | The directory structure after pre-processing should be as below 27 | 28 | ``` 29 | scannet 30 | ├── meta_data 31 | ├── batch_load_scannet_data.py 32 | ├── load_scannet_data.py 33 | ├── scannet_utils.py 34 | ├── README.md 35 | ├── scans 36 | ├── scans_test 37 | ├── scannet_instance_data 38 | ├── points 39 | │ ├── xxxxx.bin 40 | ├── instance_mask 41 | │ ├── xxxxx.bin 42 | ├── semantic_mask 43 | │ ├── xxxxx.bin 44 | ├── seg_info 45 | │ ├── train_label_weight.npy 46 | │ ├── train_resampled_scene_idxs.npy 47 | │ ├── val_label_weight.npy 48 | │ ├── val_resampled_scene_idxs.npy 49 | ├── posed_images 50 | │ ├── scenexxxx_xx 51 | │ │ ├── xxxxxx.txt 52 | │ │ ├── xxxxxx.jpg 53 | │ │ ├── intrinsic.txt 54 | ├── scannet_infos_train.pkl 55 | ├── scannet_infos_val.pkl 56 | ├── scannet_infos_test.pkl 57 | 58 | ``` 59 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/lg3d_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from mmdet.models import BACKBONES 7 | from torch import nn as nn 8 | import numpy as np 9 | import torch 10 | 11 | from torch.nn import functional as F 12 | 13 | from mmdet3d.models.losses import chamfer_distance 14 | 15 | from mmdet.core import multi_apply 16 | import copy 17 | import warnings 18 | from abc import ABCMeta 19 | from collections import defaultdict 20 | from logging import FileHandler 21 | from mmcv.runner.dist_utils import master_only 22 | from mmcv.utils.logging import get_logger, logger_initialized, print_log 23 | 24 | 25 | @BACKBONES.register_module() 26 | class pointnet(nn.Module): 27 | def __init__(self, input_channel, init_cfg=None): 28 | super(pointnet, self).__init__() 29 | self.conv1 = torch.nn.Conv1d(input_channel, 64, 1) 30 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 31 | self.conv3 = torch.nn.Conv1d(128, 256, 1) 32 | self.bn1 = nn.BatchNorm1d(64) 33 | self.bn2 = nn.BatchNorm1d(128) 34 | self.bn3 = nn.BatchNorm1d(256) 35 | 36 | def forward(self, x): 37 | x_trans = x.transpose(2, 1) 38 | x = F.relu(self.bn1(self.conv1(x_trans))) 39 | x = F.relu(self.bn2(self.conv2(x))) 40 | x = self.bn3(self.conv3(x)) 41 | return x 42 | 43 | 44 | @BACKBONES.register_module() 45 | class inducer_attention(nn.Module): 46 | def __init__(self, input_channel, ratio=8): 47 | super(inducer_attention, self).__init__() 48 | self.conv1 = nn.Conv1d(in_channels=input_channel, out_channels=input_channel // ratio, kernel_size=1, 49 | bias=False) 50 | self.attention_bn1 = nn.BatchNorm1d(input_channel // ratio) 51 | 52 | self.conv2 = nn.Conv1d(in_channels=input_channel, out_channels=input_channel // ratio, kernel_size=1, 53 | bias=False) 54 | self.attention_bn2 = nn.BatchNorm1d(input_channel // ratio) 55 | 56 | self.conv3 = nn.Conv1d( 57 | in_channels=input_channel, out_channels=input_channel, kernel_size=1, bias=False) 58 | self.attention_bn3 = nn.BatchNorm1d(input_channel) 59 | self.softmax = nn.Softmax(dim=-1) 60 | 61 | def forward(self, q, k, v): 62 | b, c, n = q.shape 63 | a = F.relu(self.attention_bn1(self.conv1(k))).permute(0, 2, 1) 64 | b = F.relu(self.attention_bn2(self.conv2(q))) # b, c/ratio, n 65 | s = self.softmax(torch.bmm(a, b)) / math.sqrt(c) # b,n,n 66 | d = F.relu(self.attention_bn3(self.conv3(v))) # b,c,n 67 | out = q + torch.bmm(d, s.permute(0, 2, 1)) 68 | return out 69 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | 5 | def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical): 6 | """Map bboxes from testing scale to original image scale. 7 | 8 | Args: 9 | bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back. 10 | scale_factor (float): Scale factor. 11 | flip_horizontal (bool): Whether to flip horizontally. 12 | flip_vertical (bool): Whether to flip vertically. 13 | 14 | Returns: 15 | :obj:`BaseInstance3DBoxes`: Boxes mapped back. 16 | """ 17 | new_bboxes = bboxes.clone() 18 | if flip_horizontal: 19 | new_bboxes.flip('horizontal') 20 | if flip_vertical: 21 | new_bboxes.flip('vertical') 22 | new_bboxes.scale(1 / scale_factor) 23 | 24 | return new_bboxes 25 | 26 | 27 | def bbox3d2roi(bbox_list): 28 | """Convert a list of bounding boxes to roi format. 29 | 30 | Args: 31 | bbox_list (list[torch.Tensor]): A list of bounding boxes 32 | corresponding to a batch of images. 33 | 34 | Returns: 35 | torch.Tensor: Region of interests in shape (n, c), where 36 | the channels are in order of [batch_ind, x, y ...]. 37 | """ 38 | rois_list = [] 39 | for img_id, bboxes in enumerate(bbox_list): 40 | if bboxes.size(0) > 0: 41 | img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) 42 | rois = torch.cat([img_inds, bboxes], dim=-1) 43 | else: 44 | rois = torch.zeros_like(bboxes) 45 | rois_list.append(rois) 46 | rois = torch.cat(rois_list, 0) 47 | return rois 48 | 49 | 50 | def bbox3d2result(bboxes, scores, labels, attrs=None): 51 | """Convert detection results to a list of numpy arrays. 52 | 53 | Args: 54 | bboxes (torch.Tensor): Bounding boxes with shape (N, 5). 55 | labels (torch.Tensor): Labels with shape (N, ). 56 | scores (torch.Tensor): Scores with shape (N, ). 57 | attrs (torch.Tensor, optional): Attributes with shape (N, ). 58 | Defaults to None. 59 | 60 | Returns: 61 | dict[str, torch.Tensor]: Bounding box results in cpu mode. 62 | 63 | - boxes_3d (torch.Tensor): 3D boxes. 64 | - scores (torch.Tensor): Prediction scores. 65 | - labels_3d (torch.Tensor): Box labels. 66 | - attrs_3d (torch.Tensor, optional): Box attributes. 67 | """ 68 | result_dict = dict( 69 | boxes_3d=bboxes.to('cpu'), 70 | scores_3d=scores.cpu(), 71 | labels_3d=labels.cpu()) 72 | 73 | if attrs is not None: 74 | result_dict['attrs_3d'] = attrs.cpu() 75 | 76 | return result_dict 77 | -------------------------------------------------------------------------------- /mmdet3d/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.datasets.builder import build_dataloader 3 | from .builder import DATASETS, PIPELINES, build_dataset 4 | from .custom_3d import Custom3DDataset 5 | from .custom_3d_seg import Custom3DSegDataset 6 | from .kitti_dataset import KittiDataset 7 | from .kitti_mono_dataset import KittiMonoDataset 8 | from .lyft_dataset import LyftDataset 9 | from .nuscenes_dataset import NuScenesDataset 10 | from .nuscenes_mono_dataset import NuScenesMonoDataset 11 | # yapf: disable 12 | from .pipelines import (AffineResize, BackgroundPointsFilter, GlobalAlignment, 13 | GlobalRotScaleTrans, IndoorPatchPointSample, 14 | IndoorPointSample, LoadAnnotations3D, 15 | LoadPointsFromDict, LoadPointsFromFile, 16 | LoadPointsFromMultiSweeps, MultiViewWrapper, 17 | NormalizePointsColor, ObjectNameFilter, ObjectNoise, 18 | ObjectRangeFilter, ObjectSample, PointSample, 19 | PointShuffle, PointsRangeFilter, RandomDropPointsColor, 20 | RandomFlip3D, RandomJitterPoints, RandomRotate, 21 | RandomShiftScale, RangeLimitedRandomCrop) 22 | # yapf: enable 23 | from .s3dis_dataset import S3DISDataset, S3DISSegDataset 24 | from .scannet_dataset import (ScanNetDataset, ScanNetInstanceSegDataset, 25 | ScanNetSegDataset) 26 | from .semantickitti_dataset import SemanticKITTIDataset 27 | from .sunrgbd_dataset import SUNRGBDDataset 28 | from .utils import get_loading_pipeline 29 | from .waymo_dataset import WaymoDataset 30 | 31 | __all__ = [ 32 | 'KittiDataset', 'KittiMonoDataset', 'build_dataloader', 'DATASETS', 33 | 'build_dataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset', 34 | 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 35 | 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 36 | 'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset', 37 | 'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample', 38 | 'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset', 39 | 'ScanNetDataset', 'ScanNetSegDataset', 'ScanNetInstanceSegDataset', 40 | 'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset', 41 | 'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter', 42 | 'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor', 43 | 'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize', 44 | 'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES', 45 | 'RangeLimitedRandomCrop', 'RandomRotate', 'MultiViewWrapper' 46 | ] 47 | -------------------------------------------------------------------------------- /mmdet3d/core/points/cam_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class CameraPoints(BasePoints): 6 | """Points of instances in CAM coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(CameraPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 1 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 0] = -self.tensor[:, 0] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 2] = -self.tensor[:, 2] 39 | 40 | @property 41 | def bev(self): 42 | """torch.Tensor: BEV of the points in shape (N, 2).""" 43 | return self.tensor[:, [0, 2]] 44 | 45 | def convert_to(self, dst, rt_mat=None): 46 | """Convert self to ``dst`` mode. 47 | 48 | Args: 49 | dst (:obj:`CoordMode`): The target Point mode. 50 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 51 | translation matrix between different coordinates. 52 | Defaults to None. 53 | The conversion from `src` coordinates to `dst` coordinates 54 | usually comes along the change of sensors, e.g., from camera 55 | to LiDAR. This requires a transformation matrix. 56 | 57 | Returns: 58 | :obj:`BasePoints`: The converted point of the same type 59 | in the `dst` mode. 60 | """ 61 | from mmdet3d.core.bbox import Coord3DMode 62 | return Coord3DMode.convert_point( 63 | point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat) 64 | -------------------------------------------------------------------------------- /configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /mmdet3d/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version, 3 | get_compiling_cuda_version, nms, roi_align, 4 | sigmoid_focal_loss) 5 | from mmcv.ops.assign_score_withk import assign_score_withk 6 | from mmcv.ops.ball_query import ball_query 7 | from mmcv.ops.furthest_point_sample import (furthest_point_sample, 8 | furthest_point_sample_with_dist) 9 | from mmcv.ops.gather_points import gather_points 10 | from mmcv.ops.group_points import GroupAll, QueryAndGroup, grouping_operation 11 | from mmcv.ops.knn import knn 12 | from mmcv.ops.points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, 13 | points_in_boxes_part) 14 | from mmcv.ops.points_sampler import PointsSampler as Points_Sampler 15 | from mmcv.ops.roiaware_pool3d import RoIAwarePool3d 16 | from mmcv.ops.roipoint_pool3d import RoIPointPool3d 17 | from mmcv.ops.scatter_points import DynamicScatter, dynamic_scatter 18 | from mmcv.ops.three_interpolate import three_interpolate 19 | from mmcv.ops.three_nn import three_nn 20 | from mmcv.ops.voxelize import Voxelization, voxelization 21 | 22 | from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule 23 | from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d 24 | from .paconv import PAConv, PAConvCUDA 25 | from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, 26 | PAConvSAModule, PAConvSAModuleMSG, 27 | PointFPModule, PointSAModule, PointSAModuleMSG, 28 | build_sa_module) 29 | from .sparse_block import (SparseBasicBlock, SparseBottleneck, 30 | make_sparse_convmodule) 31 | 32 | __all__ = [ 33 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version', 34 | 'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d', 35 | 'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization', 36 | 'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss', 37 | 'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck', 38 | 'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu', 39 | 'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample', 40 | 'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn', 41 | 'gather_points', 'grouping_operation', 'GroupAll', 'QueryAndGroup', 42 | 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', 'DGCNNFPModule', 43 | 'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all', 44 | 'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version', 45 | 'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA', 46 | 'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule', 47 | 'PAConvCUDASAModuleMSG', 'RoIPointPool3d' 48 | ] 49 | -------------------------------------------------------------------------------- /data/sunrgbd/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare SUN RGB-D Data 2 | 3 | We follow the procedure in [votenet](https://github.com/facebookresearch/votenet/). 4 | 5 | 1. Download SUNRGBD data [HERE](http://rgbd.cs.princeton.edu/data/). Then, move SUNRGBD.zip, SUNRGBDMeta2DBB_v2.mat, SUNRGBDMeta3DBB_v2.mat and SUNRGBDtoolbox.zip to the OFFICIAL_SUNRGBD folder, unzip the zip files. 6 | 7 | 2. Enter the `matlab` folder, Extract point clouds and annotations by running `extract_split.m`, `extract_rgbd_data_v2.m` and `extract_rgbd_data_v1.m`. 8 | 9 | 3. Enter the project root directory, Generate training data by running 10 | 11 | ```bash 12 | python tools/create_data.py sunrgbd --root-path ./data/sunrgbd --out-dir ./data/sunrgbd --extra-tag sunrgbd 13 | ``` 14 | 15 | The overall process could be achieved through the following script 16 | 17 | ```bash 18 | cd matlab 19 | matlab -nosplash -nodesktop -r 'extract_split;quit;' 20 | matlab -nosplash -nodesktop -r 'extract_rgbd_data_v2;quit;' 21 | matlab -nosplash -nodesktop -r 'extract_rgbd_data_v1;quit;' 22 | cd ../../.. 23 | python tools/create_data.py sunrgbd --root-path ./data/sunrgbd --out-dir ./data/sunrgbd --extra-tag sunrgbd 24 | ``` 25 | 26 | NOTE: SUNRGBDtoolbox.zip should have MD5 hash `18d22e1761d36352f37232cba102f91f` (you can check the hash with `md5 SUNRGBDtoolbox.zip` on Mac OS or `md5sum SUNRGBDtoolbox.zip` on Linux) 27 | 28 | NOTE: If you would like to play around with [ImVoteNet](../../configs/imvotenet/README.md), the image data (`./data/sunrgbd/sunrgbd_trainval/image`) are required. If you pre-processed the data before mmdet3d version 0.12.0, please pre-process the data again due to some updates in data pre-processing 29 | 30 | NOTE: Before mmdet3d version 1.0.0 we sampled 50000 points following VoteNet preprocessing. On training and evaluation we use `PointSample` to sample the amount of points needed for each detector e.g. 20000 for VoteNet and GroupFree. However, modern voxel-based detectors (e.g. FCAF3D) utilize 100000 points and are able to utilize all of them. So since 1.0.0 version we do not limit the maximum number of points during preprocessing, giving the users more flexibility with `PointSample`. If you have some reasons to keep only 50000 points here please set `--num-points=50000` for `create_data.py`. 31 | 32 | The directory structure after pre-processing should be as below 33 | 34 | ``` 35 | sunrgbd 36 | ├── README.md 37 | ├── matlab 38 | │ ├── extract_rgbd_data_v1.m 39 | │ ├── extract_rgbd_data_v2.m 40 | │ ├── extract_split.m 41 | ├── OFFICIAL_SUNRGBD 42 | │ ├── SUNRGBD 43 | │ ├── SUNRGBDMeta2DBB_v2.mat 44 | │ ├── SUNRGBDMeta3DBB_v2.mat 45 | │ ├── SUNRGBDtoolbox 46 | ├── sunrgbd_trainval 47 | │ ├── calib 48 | │ ├── depth 49 | │ ├── image 50 | │ ├── label 51 | │ ├── label_v1 52 | │ ├── seg_label 53 | │ ├── train_data_idx.txt 54 | │ ├── val_data_idx.txt 55 | ├── points 56 | ├── sunrgbd_infos_train.pkl 57 | ├── sunrgbd_infos_val.pkl 58 | 59 | ``` 60 | -------------------------------------------------------------------------------- /mmdet3d/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | 4 | from .builder import DATASETS 5 | 6 | 7 | @DATASETS.register_module() 8 | class CBGSDataset(object): 9 | """A wrapper of class sampled dataset with ann_file path. Implementation of 10 | paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object 11 | Detection `_. 12 | 13 | Balance the number of scenes under different classes. 14 | 15 | Args: 16 | dataset (:obj:`CustomDataset`): The dataset to be class sampled. 17 | """ 18 | 19 | def __init__(self, dataset): 20 | self.dataset = dataset 21 | self.CLASSES = dataset.CLASSES 22 | self.cat2id = {name: i for i, name in enumerate(self.CLASSES)} 23 | self.sample_indices = self._get_sample_indices() 24 | # self.dataset.data_infos = self.data_infos 25 | if hasattr(self.dataset, 'flag'): 26 | self.flag = np.array( 27 | [self.dataset.flag[ind] for ind in self.sample_indices], 28 | dtype=np.uint8) 29 | 30 | def _get_sample_indices(self): 31 | """Load annotations from ann_file. 32 | 33 | Args: 34 | ann_file (str): Path of the annotation file. 35 | 36 | Returns: 37 | list[dict]: List of annotations after class sampling. 38 | """ 39 | class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()} 40 | for idx in range(len(self.dataset)): 41 | sample_cat_ids = self.dataset.get_cat_ids(idx) 42 | for cat_id in sample_cat_ids: 43 | class_sample_idxs[cat_id].append(idx) 44 | duplicated_samples = sum( 45 | [len(v) for _, v in class_sample_idxs.items()]) 46 | class_distribution = { 47 | k: len(v) / duplicated_samples 48 | for k, v in class_sample_idxs.items() 49 | } 50 | 51 | sample_indices = [] 52 | 53 | frac = 1.0 / len(self.CLASSES) 54 | ratios = [frac / v for v in class_distribution.values()] 55 | for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios): 56 | sample_indices += np.random.choice(cls_inds, 57 | int(len(cls_inds) * 58 | ratio)).tolist() 59 | return sample_indices 60 | 61 | def __getitem__(self, idx): 62 | """Get item from infos according to the given index. 63 | 64 | Returns: 65 | dict: Data dictionary of the corresponding index. 66 | """ 67 | ori_idx = self.sample_indices[idx] 68 | return self.dataset[ori_idx] 69 | 70 | def __len__(self): 71 | """Return the length of data infos. 72 | 73 | Returns: 74 | int: Length of data infos. 75 | """ 76 | return len(self.sample_indices) 77 | -------------------------------------------------------------------------------- /data/scannet/scannet_utils.py: -------------------------------------------------------------------------------- 1 | # Modified from 2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/scannet_utils.py 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | # 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | """Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts 8 | """ 9 | 10 | import csv 11 | import os 12 | 13 | import numpy as np 14 | from plyfile import PlyData 15 | 16 | 17 | def represents_int(s): 18 | """Judge whether string s represents an int. 19 | 20 | Args: 21 | s(str): The input string to be judged. 22 | 23 | Returns: 24 | bool: Whether s represents int or not. 25 | """ 26 | try: 27 | int(s) 28 | return True 29 | except ValueError: 30 | return False 31 | 32 | 33 | def read_label_mapping(filename, 34 | label_from='raw_category', 35 | label_to='nyu40id'): 36 | assert os.path.isfile(filename) 37 | mapping = dict() 38 | with open(filename) as csvfile: 39 | reader = csv.DictReader(csvfile, delimiter='\t') 40 | for row in reader: 41 | mapping[row[label_from]] = int(row[label_to]) 42 | if represents_int(list(mapping.keys())[0]): 43 | mapping = {int(k): v for k, v in mapping.items()} 44 | return mapping 45 | 46 | 47 | def read_mesh_vertices(filename): 48 | """Read XYZ for each vertex. 49 | 50 | Args: 51 | filename(str): The name of the mesh vertices file. 52 | 53 | Returns: 54 | ndarray: Vertices. 55 | """ 56 | assert os.path.isfile(filename) 57 | with open(filename, 'rb') as f: 58 | plydata = PlyData.read(f) 59 | num_verts = plydata['vertex'].count 60 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 61 | vertices[:, 0] = plydata['vertex'].data['x'] 62 | vertices[:, 1] = plydata['vertex'].data['y'] 63 | vertices[:, 2] = plydata['vertex'].data['z'] 64 | return vertices 65 | 66 | 67 | def read_mesh_vertices_rgb(filename): 68 | """Read XYZ and RGB for each vertex. 69 | 70 | Args: 71 | filename(str): The name of the mesh vertices file. 72 | 73 | Returns: 74 | Vertices. Note that RGB values are in 0-255. 75 | """ 76 | assert os.path.isfile(filename) 77 | with open(filename, 'rb') as f: 78 | plydata = PlyData.read(f) 79 | num_verts = plydata['vertex'].count 80 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32) 81 | vertices[:, 0] = plydata['vertex'].data['x'] 82 | vertices[:, 1] = plydata['vertex'].data['y'] 83 | vertices[:, 2] = plydata['vertex'].data['z'] 84 | vertices[:, 3] = plydata['vertex'].data['red'] 85 | vertices[:, 4] = plydata['vertex'].data['green'] 86 | vertices[:, 5] = plydata['vertex'].data['blue'] 87 | return vertices 88 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/scannet_utils/util_3d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/util_3d.py # noqa 3 | import json 4 | 5 | import numpy as np 6 | 7 | 8 | class Instance: 9 | """Single instance for ScanNet evaluator. 10 | 11 | Args: 12 | mesh_vert_instances (np.array): Instance ids for each point. 13 | instance_id: Id of single instance. 14 | """ 15 | instance_id = 0 16 | label_id = 0 17 | vert_count = 0 18 | med_dist = -1 19 | dist_conf = 0.0 20 | 21 | def __init__(self, mesh_vert_instances, instance_id): 22 | if instance_id == -1: 23 | return 24 | self.instance_id = int(instance_id) 25 | self.label_id = int(self.get_label_id(instance_id)) 26 | self.vert_count = int( 27 | self.get_instance_verts(mesh_vert_instances, instance_id)) 28 | 29 | @staticmethod 30 | def get_label_id(instance_id): 31 | return int(instance_id // 1000) 32 | 33 | @staticmethod 34 | def get_instance_verts(mesh_vert_instances, instance_id): 35 | return (mesh_vert_instances == instance_id).sum() 36 | 37 | def to_json(self): 38 | return json.dumps( 39 | self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 40 | 41 | def to_dict(self): 42 | dict = {} 43 | dict['instance_id'] = self.instance_id 44 | dict['label_id'] = self.label_id 45 | dict['vert_count'] = self.vert_count 46 | dict['med_dist'] = self.med_dist 47 | dict['dist_conf'] = self.dist_conf 48 | return dict 49 | 50 | def from_json(self, data): 51 | self.instance_id = int(data['instance_id']) 52 | self.label_id = int(data['label_id']) 53 | self.vert_count = int(data['vert_count']) 54 | if 'med_dist' in data: 55 | self.med_dist = float(data['med_dist']) 56 | self.dist_conf = float(data['dist_conf']) 57 | 58 | def __str__(self): 59 | return '(' + str(self.instance_id) + ')' 60 | 61 | 62 | def get_instances(ids, class_ids, class_labels, id2label): 63 | """Transform gt instance mask to Instance objects. 64 | 65 | Args: 66 | ids (np.array): Instance ids for each point. 67 | class_ids: (tuple[int]): Ids of valid categories. 68 | class_labels (tuple[str]): Class names. 69 | id2label: (dict[int, str]): Mapping of valid class id to class label. 70 | 71 | Returns: 72 | dict [str, list]: Instance objects grouped by class label. 73 | """ 74 | instances = {} 75 | for label in class_labels: 76 | instances[label] = [] 77 | instance_ids = np.unique(ids) 78 | for id in instance_ids: 79 | if id == 0: 80 | continue 81 | inst = Instance(ids, id) 82 | if inst.label_id in class_ids: 83 | instances[id2label[inst.label_id]].append(inst.to_dict()) 84 | return instances 85 | -------------------------------------------------------------------------------- /configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | # sa_channels=((64*2, 64*2, 128*2), (128*2, 128*2, 256*2), (128*2, 128*2, 256*2), 13 | # (128*2, 128*2, 256*2)), 14 | # fp_channels=((256*2, 256*2), (256*2, 256)), 15 | norm_cfg=dict(type='BN2d'), 16 | sa_cfg=dict( 17 | type='PointSAModule', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=True)), 21 | bbox_head=dict( 22 | type='VoteHead', 23 | vote_module_cfg=dict( 24 | in_channels=256, 25 | vote_per_seed=1, 26 | gt_per_seed=3, 27 | conv_channels=(256, 256), 28 | conv_cfg=dict(type='Conv1d'), 29 | norm_cfg=dict(type='BN1d'), 30 | norm_feats=True, 31 | vote_loss=dict( 32 | type='ChamferDistance', 33 | mode='l1', 34 | reduction='none', 35 | loss_dst_weight=10.0)), 36 | vote_aggregation_cfg=dict( 37 | type='PointSAModule', 38 | num_point=256, 39 | radius=0.3, 40 | num_sample=16, 41 | mlp_channels=[256, 128, 128, 128], 42 | use_xyz=True, 43 | normalize_xyz=True), 44 | pred_layer_cfg=dict( 45 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 46 | conv_cfg=dict(type='Conv1d'), 47 | norm_cfg=dict(type='BN1d'), 48 | objectness_loss=dict( 49 | type='CrossEntropyLoss', 50 | class_weight=[0.2, 0.8], 51 | reduction='sum', 52 | loss_weight=5.0), 53 | center_loss=dict( 54 | type='ChamferDistance', 55 | mode='l2', 56 | reduction='sum', 57 | loss_src_weight=10.0, 58 | loss_dst_weight=10.0), 59 | dir_class_loss=dict( 60 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 61 | dir_res_loss=dict( 62 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 63 | size_class_loss=dict( 64 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 65 | size_res_loss=dict( 66 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 67 | semantic_loss=dict( 68 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 72 | test_cfg=dict( 73 | sample_mod='seed', 74 | nms_thr=0.25, 75 | score_thr=0.05, 76 | per_class_proposal=True)) 77 | -------------------------------------------------------------------------------- /configs/groupfree3d/metafile.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Name: Group-Free-3D 3 | Metadata: 4 | Training Techniques: 5 | - AdamW 6 | Training Resources: 4x V100 GPUs 7 | Architecture: 8 | - PointNet++ 9 | Paper: 10 | URL: https://arxiv.org/abs/2104.00678 11 | Title: 'Group-Free 3D Object Detection via Transformers' 12 | README: configs/groupfree3d/README.md 13 | Code: 14 | URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/groupfree3dnet.py#L10 15 | Version: v0.15.0 16 | 17 | Models: 18 | - Name: groupfree3d_8x4_scannet-3d-18class-L6-O256.py 19 | In Collection: Group-Free-3D 20 | Config: configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py 21 | Metadata: 22 | Training Data: ScanNet 23 | Training Memory (GB): 6.7 24 | Results: 25 | - Task: 3D Object Detection 26 | Dataset: ScanNet 27 | Metrics: 28 | AP@0.25: 66.32 29 | AP@0.5: 47.82 30 | Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256/groupfree3d_8x4_scannet-3d-18class-L6-O256_20210702_145347-3499eb55.pth 31 | 32 | - Name: groupfree3d_8x4_scannet-3d-18class-L12-O256.py 33 | In Collection: Group-Free-3D 34 | Config: configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256.py 35 | Metadata: 36 | Training Data: ScanNet 37 | Training Memory (GB): 9.4 38 | Results: 39 | - Task: 3D Object Detection 40 | Dataset: ScanNet 41 | Metrics: 42 | AP@0.25: 66.57 43 | AP@0.5: 48.21 44 | Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/groupfree3d/groupfree3d_8x4_scannet-3d-18class-L12-O256/groupfree3d_8x4_scannet-3d-18class-L12-O256_20210702_150907-1c5551ad.pth 45 | 46 | - Name: groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py 47 | In Collection: Group-Free-3D 48 | Config: configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256.py 49 | Metadata: 50 | Training Data: ScanNet 51 | Training Memory (GB): 13.3 52 | Results: 53 | - Task: 3D Object Detection 54 | Dataset: ScanNet 55 | Metrics: 56 | AP@0.25: 68.20 57 | AP@0.5: 51.02 58 | Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O256_20210702_200301-944f0ac0.pth 59 | 60 | - Name: groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py 61 | In Collection: Group-Free-3D 62 | Config: configs/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512.py 63 | Metadata: 64 | Training Data: ScanNet 65 | Training Memory (GB): 18.8 66 | Results: 67 | - Task: 3D Object Detection 68 | Dataset: ScanNet 69 | Metrics: 70 | AP@0.25: 68.22 71 | AP@0.5: 52.61 72 | Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/groupfree3d/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512/groupfree3d_8x4_scannet-3d-18class-w2x-L12-O512_20210702_220204-187b71c7.pth 73 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/point_fp_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import List 3 | 4 | import torch 5 | from mmcv.cnn import ConvModule 6 | from mmcv.ops import three_interpolate, three_nn 7 | from mmcv.runner import BaseModule, force_fp32 8 | from torch import nn as nn 9 | 10 | 11 | class PointFPModule(BaseModule): 12 | """Point feature propagation module used in PointNets. 13 | 14 | Propagate the features from one set to another. 15 | 16 | Args: 17 | mlp_channels (list[int]): List of mlp channels. 18 | norm_cfg (dict, optional): Type of normalization method. 19 | Default: dict(type='BN2d'). 20 | """ 21 | 22 | def __init__(self, 23 | mlp_channels: List[int], 24 | norm_cfg: dict = dict(type='BN2d'), 25 | init_cfg=None): 26 | super().__init__(init_cfg=init_cfg) 27 | self.fp16_enabled = False 28 | self.mlps = nn.Sequential() 29 | for i in range(len(mlp_channels) - 1): 30 | self.mlps.add_module( 31 | f'layer{i}', 32 | ConvModule( 33 | mlp_channels[i], 34 | mlp_channels[i + 1], 35 | kernel_size=(1, 1), 36 | stride=(1, 1), 37 | conv_cfg=dict(type='Conv2d'), 38 | norm_cfg=norm_cfg)) 39 | 40 | @force_fp32() 41 | def forward(self, target: torch.Tensor, source: torch.Tensor, 42 | target_feats: torch.Tensor, 43 | source_feats: torch.Tensor) -> torch.Tensor: 44 | """forward. 45 | 46 | Args: 47 | target (Tensor): (B, n, 3) tensor of the xyz positions of 48 | the target features. 49 | source (Tensor): (B, m, 3) tensor of the xyz positions of 50 | the source features. 51 | target_feats (Tensor): (B, C1, n) tensor of the features to be 52 | propagated to. 53 | source_feats (Tensor): (B, C2, m) tensor of features 54 | to be propagated. 55 | 56 | Return: 57 | Tensor: (B, M, N) M = mlp[-1], tensor of the target features. 58 | """ 59 | if source is not None: 60 | dist, idx = three_nn(target, source) 61 | dist_reciprocal = 1.0 / (dist + 1e-8) 62 | norm = torch.sum(dist_reciprocal, dim=2, keepdim=True) 63 | weight = dist_reciprocal / norm 64 | 65 | interpolated_feats = three_interpolate(source_feats, idx, weight) 66 | else: 67 | interpolated_feats = source_feats.expand(*source_feats.size()[0:2], 68 | target.size(1)) 69 | 70 | if target_feats is not None: 71 | new_features = torch.cat([interpolated_feats, target_feats], 72 | dim=1) # (B, C2 + C1, n) 73 | else: 74 | new_features = interpolated_feats 75 | 76 | new_features = new_features.unsqueeze(-1) 77 | new_features = self.mlps(new_features) 78 | 79 | return new_features.squeeze(-1) 80 | -------------------------------------------------------------------------------- /mmdet3d/models/model_utils/edge_fusion_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import ConvModule 3 | from mmcv.runner import BaseModule 4 | from torch import nn as nn 5 | from torch.nn import functional as F 6 | 7 | 8 | class EdgeFusionModule(BaseModule): 9 | """Edge Fusion Module for feature map. 10 | 11 | Args: 12 | out_channels (int): The number of output channels. 13 | feat_channels (int): The number of channels in feature map 14 | during edge feature fusion. 15 | kernel_size (int, optional): Kernel size of convolution. 16 | Default: 3. 17 | act_cfg (dict, optional): Config of activation. 18 | Default: dict(type='ReLU'). 19 | norm_cfg (dict, optional): Config of normalization. 20 | Default: dict(type='BN1d')). 21 | """ 22 | 23 | def __init__(self, 24 | out_channels, 25 | feat_channels, 26 | kernel_size=3, 27 | act_cfg=dict(type='ReLU'), 28 | norm_cfg=dict(type='BN1d')): 29 | super().__init__() 30 | self.edge_convs = nn.Sequential( 31 | ConvModule( 32 | feat_channels, 33 | feat_channels, 34 | kernel_size=kernel_size, 35 | padding=kernel_size // 2, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=norm_cfg, 38 | act_cfg=act_cfg), 39 | nn.Conv1d(feat_channels, out_channels, kernel_size=1)) 40 | self.feat_channels = feat_channels 41 | 42 | def forward(self, features, fused_features, edge_indices, edge_lens, 43 | output_h, output_w): 44 | """Forward pass. 45 | 46 | Args: 47 | features (torch.Tensor): Different representative features 48 | for fusion. 49 | fused_features (torch.Tensor): Different representative 50 | features to be fused. 51 | edge_indices (torch.Tensor): Batch image edge indices. 52 | edge_lens (list[int]): List of edge length of each image. 53 | output_h (int): Height of output feature map. 54 | output_w (int): Width of output feature map. 55 | 56 | Returns: 57 | torch.Tensor: Fused feature maps. 58 | """ 59 | batch_size = features.shape[0] 60 | # normalize 61 | grid_edge_indices = edge_indices.view(batch_size, -1, 1, 2).float() 62 | grid_edge_indices[..., 0] = \ 63 | grid_edge_indices[..., 0] / (output_w - 1) * 2 - 1 64 | grid_edge_indices[..., 1] = \ 65 | grid_edge_indices[..., 1] / (output_h - 1) * 2 - 1 66 | 67 | # apply edge fusion 68 | edge_features = F.grid_sample( 69 | features, grid_edge_indices, align_corners=True).squeeze(-1) 70 | edge_output = self.edge_convs(edge_features) 71 | 72 | for k in range(batch_size): 73 | edge_indice_k = edge_indices[k, :edge_lens[k]] 74 | fused_features[k, :, edge_indice_k[:, 1], 75 | edge_indice_k[:, 0]] += edge_output[ 76 | k, :, :edge_lens[k]] 77 | 78 | return fused_features 79 | -------------------------------------------------------------------------------- /mmdet3d/models/decode_heads/pointnet2_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn.bricks import ConvModule 3 | from torch import nn as nn 4 | 5 | from mmdet3d.ops import PointFPModule 6 | from ..builder import HEADS 7 | from .decode_head import Base3DDecodeHead 8 | 9 | 10 | @HEADS.register_module() 11 | class PointNet2Head(Base3DDecodeHead): 12 | r"""PointNet2 decoder head. 13 | 14 | Decoder head used in `PointNet++ `_. 15 | Refer to the `official code `_. 16 | 17 | Args: 18 | fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules. 19 | fp_norm_cfg (dict): Config of norm layers used in FP modules. 20 | Default: dict(type='BN2d'). 21 | """ 22 | 23 | def __init__(self, 24 | fp_channels=((768, 256, 256), (384, 256, 256), 25 | (320, 256, 128), (128, 128, 128, 128)), 26 | fp_norm_cfg=dict(type='BN2d'), 27 | **kwargs): 28 | super(PointNet2Head, self).__init__(**kwargs) 29 | 30 | self.num_fp = len(fp_channels) 31 | self.FP_modules = nn.ModuleList() 32 | for cur_fp_mlps in fp_channels: 33 | self.FP_modules.append( 34 | PointFPModule(mlp_channels=cur_fp_mlps, norm_cfg=fp_norm_cfg)) 35 | 36 | # https://github.com/charlesq34/pointnet2/blob/master/models/pointnet2_sem_seg.py#L40 37 | self.pre_seg_conv = ConvModule( 38 | fp_channels[-1][-1], 39 | self.channels, 40 | kernel_size=1, 41 | bias=True, 42 | conv_cfg=self.conv_cfg, 43 | norm_cfg=self.norm_cfg, 44 | act_cfg=self.act_cfg) 45 | 46 | def _extract_input(self, feat_dict): 47 | """Extract inputs from features dictionary. 48 | 49 | Args: 50 | feat_dict (dict): Feature dict from backbone. 51 | 52 | Returns: 53 | list[torch.Tensor]: Coordinates of multiple levels of points. 54 | list[torch.Tensor]: Features of multiple levels of points. 55 | """ 56 | sa_xyz = feat_dict['sa_xyz'] 57 | sa_features = feat_dict['sa_features'] 58 | assert len(sa_xyz) == len(sa_features) 59 | 60 | return sa_xyz, sa_features 61 | 62 | def forward(self, feat_dict): 63 | """Forward pass. 64 | 65 | Args: 66 | feat_dict (dict): Feature dict from backbone. 67 | 68 | Returns: 69 | torch.Tensor: Segmentation map of shape [B, num_classes, N]. 70 | """ 71 | sa_xyz, sa_features = self._extract_input(feat_dict) 72 | 73 | # https://github.com/charlesq34/pointnet2/blob/master/models/pointnet2_sem_seg.py#L24 74 | sa_features[0] = None 75 | 76 | fp_feature = sa_features[-1] 77 | 78 | for i in range(self.num_fp): 79 | # consume the points in a bottom-up manner 80 | fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)], 81 | sa_features[-(i + 2)], fp_feature) 82 | output = self.pre_seg_conv(fp_feature) 83 | output = self.cls_seg(output) 84 | 85 | return output 86 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/rotated_iou_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.ops import diff_iou_rotated_3d 4 | from torch import nn as nn 5 | 6 | from mmdet.models.losses.utils import weighted_loss 7 | from ..builder import LOSSES 8 | 9 | 10 | @weighted_loss 11 | def rotated_iou_3d_loss(pred, target): 12 | """Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes. 13 | Note that predictions and targets are one-to-one corresponded. 14 | 15 | Args: 16 | pred (torch.Tensor): Bbox predictions with shape [N, 7] 17 | (x, y, z, w, l, h, alpha). 18 | target (torch.Tensor): Bbox targets (gt) with shape [N, 7] 19 | (x, y, z, w, l, h, alpha). 20 | 21 | Returns: 22 | torch.Tensor: IoU loss between predictions and targets. 23 | """ 24 | iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0), 25 | target.unsqueeze(0))[0] 26 | return iou_loss 27 | 28 | 29 | @LOSSES.register_module() 30 | class RotatedIoU3DLoss(nn.Module): 31 | """Calculate the IoU loss (1-IoU) of rotated bounding boxes. 32 | 33 | Args: 34 | reduction (str): Method to reduce losses. 35 | The valid reduction method are none, sum or mean. 36 | loss_weight (float, optional): Weight of loss. Defaults to 1.0. 37 | """ 38 | 39 | def __init__(self, reduction='mean', loss_weight=1.0): 40 | super().__init__() 41 | self.reduction = reduction 42 | self.loss_weight = loss_weight 43 | 44 | def forward(self, 45 | pred, 46 | target, 47 | weight=None, 48 | avg_factor=None, 49 | reduction_override=None, 50 | **kwargs): 51 | """Forward function of loss calculation. 52 | 53 | Args: 54 | pred (torch.Tensor): Bbox predictions with shape [..., 7] 55 | (x, y, z, w, l, h, alpha). 56 | target (torch.Tensor): Bbox targets (gt) with shape [..., 7] 57 | (x, y, z, w, l, h, alpha). 58 | weight (torch.Tensor | float, optional): Weight of loss. 59 | Defaults to None. 60 | avg_factor (int, optional): Average factor that is used to average 61 | the loss. Defaults to None. 62 | reduction_override (str, optional): Method to reduce losses. 63 | The valid reduction method are 'none', 'sum' or 'mean'. 64 | Defaults to None. 65 | 66 | Returns: 67 | torch.Tensor: IoU loss between predictions and targets. 68 | """ 69 | if weight is not None and not torch.any(weight > 0): 70 | return pred.sum() * weight.sum() # 0 71 | assert reduction_override in (None, 'none', 'mean', 'sum') 72 | reduction = ( 73 | reduction_override if reduction_override else self.reduction) 74 | if weight is not None and weight.dim() > 1: 75 | weight = weight.mean(-1) 76 | loss = self.loss_weight * rotated_iou_3d_loss( 77 | pred, 78 | target, 79 | weight, 80 | reduction=reduction, 81 | avg_factor=avg_factor, 82 | **kwargs) 83 | 84 | return loss 85 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/axis_aligned_iou_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from torch import nn as nn 4 | 5 | from mmdet.models.losses.utils import weighted_loss 6 | from ...core.bbox import AxisAlignedBboxOverlaps3D 7 | from ..builder import LOSSES 8 | 9 | 10 | @weighted_loss 11 | def axis_aligned_iou_loss(pred, target): 12 | """Calculate the IoU loss (1-IoU) of two sets of axis aligned bounding 13 | boxes. Note that predictions and targets are one-to-one corresponded. 14 | 15 | Args: 16 | pred (torch.Tensor): Bbox predictions with shape [..., 6] 17 | (x1, y1, z1, x2, y2, z2). 18 | target (torch.Tensor): Bbox targets (gt) with shape [..., 6] 19 | (x1, y1, z1, x2, y2, z2). 20 | 21 | Returns: 22 | torch.Tensor: IoU loss between predictions and targets. 23 | """ 24 | axis_aligned_iou = AxisAlignedBboxOverlaps3D()( 25 | pred, target, is_aligned=True) 26 | iou_loss = 1 - axis_aligned_iou 27 | return iou_loss 28 | 29 | 30 | @LOSSES.register_module() 31 | class AxisAlignedIoULoss(nn.Module): 32 | """Calculate the IoU loss (1-IoU) of axis aligned bounding boxes. 33 | 34 | Args: 35 | reduction (str): Method to reduce losses. 36 | The valid reduction method are none, sum or mean. 37 | loss_weight (float, optional): Weight of loss. Defaults to 1.0. 38 | """ 39 | 40 | def __init__(self, reduction='mean', loss_weight=1.0): 41 | super(AxisAlignedIoULoss, self).__init__() 42 | assert reduction in ['none', 'sum', 'mean'] 43 | self.reduction = reduction 44 | self.loss_weight = loss_weight 45 | 46 | def forward(self, 47 | pred, 48 | target, 49 | weight=None, 50 | avg_factor=None, 51 | reduction_override=None, 52 | **kwargs): 53 | """Forward function of loss calculation. 54 | 55 | Args: 56 | pred (torch.Tensor): Bbox predictions with shape [..., 6] 57 | (x1, y1, z1, x2, y2, z2). 58 | target (torch.Tensor): Bbox targets (gt) with shape [..., 6] 59 | (x1, y1, z1, x2, y2, z2). 60 | weight (torch.Tensor | float, optional): Weight of loss. 61 | Defaults to None. 62 | avg_factor (int, optional): Average factor that is used to average 63 | the loss. Defaults to None. 64 | reduction_override (str, optional): Method to reduce losses. 65 | The valid reduction method are 'none', 'sum' or 'mean'. 66 | Defaults to None. 67 | 68 | Returns: 69 | torch.Tensor: IoU loss between predictions and targets. 70 | """ 71 | assert reduction_override in (None, 'none', 'mean', 'sum') 72 | reduction = ( 73 | reduction_override if reduction_override else self.reduction) 74 | if (weight is not None) and (not torch.any(weight > 0)) and ( 75 | reduction != 'none'): 76 | return (pred * weight).sum() 77 | return axis_aligned_iou_loss( 78 | pred, 79 | target, 80 | weight=weight, 81 | avg_factor=avg_factor, 82 | reduction=reduction) * self.loss_weight 83 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import torch 5 | from mmcv import Config, DictAction 6 | 7 | from mmdet3d.models import build_model 8 | 9 | try: 10 | from mmcv.cnn import get_model_complexity_info 11 | except ImportError: 12 | raise ImportError('Please upgrade mmcv to >0.6.2') 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Train a detector') 17 | parser.add_argument('config', help='train config file path') 18 | parser.add_argument( 19 | '--shape', 20 | type=int, 21 | nargs='+', 22 | default=[40000, 4], 23 | help='input point cloud size') 24 | parser.add_argument( 25 | '--modality', 26 | type=str, 27 | default='point', 28 | choices=['point', 'image', 'multi'], 29 | help='input data modality') 30 | parser.add_argument( 31 | '--cfg-options', 32 | nargs='+', 33 | action=DictAction, 34 | help='override some settings in the used config, the key-value pair ' 35 | 'in xxx=yyy format will be merged into config file. If the value to ' 36 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 37 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 38 | 'Note that the quotation marks are necessary and that no white space ' 39 | 'is allowed.') 40 | args = parser.parse_args() 41 | return args 42 | 43 | 44 | def main(): 45 | 46 | args = parse_args() 47 | 48 | if args.modality == 'point': 49 | assert len(args.shape) == 2, 'invalid input shape' 50 | input_shape = tuple(args.shape) 51 | elif args.modality == 'image': 52 | if len(args.shape) == 1: 53 | input_shape = (3, args.shape[0], args.shape[0]) 54 | elif len(args.shape) == 2: 55 | input_shape = (3, ) + tuple(args.shape) 56 | else: 57 | raise ValueError('invalid input shape') 58 | elif args.modality == 'multi': 59 | raise NotImplementedError( 60 | 'FLOPs counter is currently not supported for models with ' 61 | 'multi-modality input') 62 | 63 | cfg = Config.fromfile(args.config) 64 | if args.cfg_options is not None: 65 | cfg.merge_from_dict(args.cfg_options) 66 | 67 | model = build_model( 68 | cfg.model, 69 | train_cfg=cfg.get('train_cfg'), 70 | test_cfg=cfg.get('test_cfg')) 71 | if torch.cuda.is_available(): 72 | model.cuda() 73 | model.eval() 74 | 75 | if hasattr(model, 'forward_dummy'): 76 | model.forward = model.forward_dummy 77 | else: 78 | raise NotImplementedError( 79 | 'FLOPs counter is currently not supported for {}'.format( 80 | model.__class__.__name__)) 81 | 82 | flops, params = get_model_complexity_info(model, input_shape) 83 | split_line = '=' * 30 84 | print(f'{split_line}\nInput shape: {input_shape}\n' 85 | f'Flops: {flops}\nParams: {params}\n{split_line}') 86 | print('!!!Please be cautious if you use the results in papers. ' 87 | 'You may need to check if all ops are supported and verify that the ' 88 | 'flops computation is correct.') 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /data/sunrgbd/matlab/extract_rgbd_data_v2.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump SUNRGBD data to our format 7 | % for each sample, we have RGB image, 2d boxes. 8 | % point cloud (in camera coordinate), calibration and 3d boxes. 9 | % 10 | % Compared to extract_rgbd_data.m in frustum_pointents, use v2 2D and 3D 11 | % bboxes. 12 | % 13 | % Author: Charles R. Qi 14 | % 15 | clear; close all; clc; 16 | addpath(genpath('.')) 17 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/readData') 18 | %% V1 2D&3D BB and Seg masks 19 | % load('./Metadata/SUNRGBDMeta.mat') 20 | % load('./Metadata/SUNRGBD2Dseg.mat') 21 | 22 | %% V2 3DBB annotations (overwrites SUNRGBDMeta) 23 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat'); 24 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta2DBB_v2.mat'); 25 | %% Create folders 26 | depth_folder = '../sunrgbd_trainval/depth/'; 27 | image_folder = '../sunrgbd_trainval/image/'; 28 | calib_folder = '../sunrgbd_trainval/calib/'; 29 | det_label_folder = '../sunrgbd_trainval/label/'; 30 | seg_label_folder = '../sunrgbd_trainval/seg_label/'; 31 | mkdir(depth_folder); 32 | mkdir(image_folder); 33 | mkdir(calib_folder); 34 | mkdir(det_label_folder); 35 | mkdir(seg_label_folder); 36 | %% Read 37 | parfor imageId = 1:10335 38 | imageId 39 | try 40 | data = SUNRGBDMeta(imageId); 41 | data.depthpath(1:16) = ''; 42 | data.depthpath = strcat('../OFFICIAL_SUNRGBD', data.depthpath); 43 | data.rgbpath(1:16) = ''; 44 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD', data.rgbpath); 45 | 46 | % Write point cloud in depth map 47 | [rgb,points3d,depthInpaint,imsize]=read3dPoints(data); 48 | rgb(isnan(points3d(:,1)),:) = []; 49 | points3d(isnan(points3d(:,1)),:) = []; 50 | points3d_rgb = [points3d, rgb]; 51 | 52 | % MAT files are 3x smaller than TXT files. In Python we can use 53 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data. 54 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat'); 55 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt'); 56 | parsave(strcat(depth_folder, mat_filename), points3d_rgb); 57 | 58 | % Write images 59 | copyfile(data.rgbpath, sprintf('%s/%06d.jpg', image_folder, imageId)); 60 | 61 | % Write calibration 62 | dlmwrite(strcat(calib_folder, txt_filename), data.Rtilt(:)', 'delimiter', ' '); 63 | dlmwrite(strcat(calib_folder, txt_filename), data.K(:)', 'delimiter', ' ', '-append'); 64 | 65 | % Write 2D and 3D box label 66 | data2d = SUNRGBDMeta2DBB(imageId); 67 | fid = fopen(strcat(det_label_folder, txt_filename), 'w'); 68 | for j = 1:length(data.groundtruth3DBB) 69 | centroid = data.groundtruth3DBB(j).centroid; 70 | classname = data.groundtruth3DBB(j).classname; 71 | orientation = data.groundtruth3DBB(j).orientation; 72 | coeffs = abs(data.groundtruth3DBB(j).coeffs); 73 | box2d = data2d.groundtruth2DBB(j).gtBb2D; 74 | assert(strcmp(data2d.groundtruth2DBB(j).classname, classname)); 75 | fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2)); 76 | end 77 | fclose(fid); 78 | 79 | catch 80 | end 81 | 82 | end 83 | 84 | function parsave(filename, instance) 85 | save(filename, 'instance'); 86 | end 87 | -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | 5 | import torch 6 | from mmcv import Config 7 | from mmcv.parallel import MMDataParallel 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model 9 | 10 | from mmdet3d.datasets import build_dataloader, build_dataset 11 | from mmdet3d.models import build_detector 12 | from tools.misc.fuse_conv_bn import fuse_module 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 17 | parser.add_argument('config', help='test config file path') 18 | parser.add_argument('checkpoint', help='checkpoint file') 19 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 20 | parser.add_argument( 21 | '--log-interval', default=50, help='interval of logging') 22 | parser.add_argument( 23 | '--fuse-conv-bn', 24 | action='store_true', 25 | help='Whether to fuse conv and bn, this will slightly increase' 26 | 'the inference speed') 27 | args = parser.parse_args() 28 | return args 29 | 30 | 31 | def main(): 32 | args = parse_args() 33 | 34 | cfg = Config.fromfile(args.config) 35 | # set cudnn_benchmark 36 | if cfg.get('cudnn_benchmark', False): 37 | torch.backends.cudnn.benchmark = True 38 | cfg.model.pretrained = None 39 | cfg.data.test.test_mode = True 40 | 41 | # build the dataloader 42 | # TODO: support multiple images per gpu (only minor changes are needed) 43 | dataset = build_dataset(cfg.data.test) 44 | data_loader = build_dataloader( 45 | dataset, 46 | samples_per_gpu=1, 47 | workers_per_gpu=cfg.data.workers_per_gpu, 48 | dist=False, 49 | shuffle=False) 50 | 51 | # build the model and load checkpoint 52 | cfg.model.train_cfg = None 53 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 54 | fp16_cfg = cfg.get('fp16', None) 55 | if fp16_cfg is not None: 56 | wrap_fp16_model(model) 57 | load_checkpoint(model, args.checkpoint, map_location='cpu') 58 | if args.fuse_conv_bn: 59 | model = fuse_module(model) 60 | 61 | model = MMDataParallel(model, device_ids=[0]) 62 | 63 | model.eval() 64 | 65 | # the first several iterations may be very slow so skip them 66 | num_warmup = 5 67 | pure_inf_time = 0 68 | 69 | # benchmark with several samples and take the average 70 | for i, data in enumerate(data_loader): 71 | 72 | torch.cuda.synchronize() 73 | start_time = time.perf_counter() 74 | 75 | with torch.no_grad(): 76 | model(return_loss=False, rescale=True, **data) 77 | 78 | torch.cuda.synchronize() 79 | elapsed = time.perf_counter() - start_time 80 | 81 | if i >= num_warmup: 82 | pure_inf_time += elapsed 83 | if (i + 1) % args.log_interval == 0: 84 | fps = (i + 1 - num_warmup) / pure_inf_time 85 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 86 | f'fps: {fps:.1f} img / s') 87 | 88 | if (i + 1) == args.samples: 89 | pure_inf_time += elapsed 90 | fps = (i + 1 - num_warmup) / pure_inf_time 91 | print(f'Overall fps: {fps:.1f} img / s') 92 | break 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /mmdet3d/models/roi_heads/base_3droi_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | from mmcv.runner import BaseModule 5 | 6 | 7 | class Base3DRoIHead(BaseModule, metaclass=ABCMeta): 8 | """Base class for 3d RoIHeads.""" 9 | 10 | def __init__(self, 11 | bbox_head=None, 12 | mask_roi_extractor=None, 13 | mask_head=None, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(Base3DRoIHead, self).__init__(init_cfg=init_cfg) 19 | self.train_cfg = train_cfg 20 | self.test_cfg = test_cfg 21 | 22 | if bbox_head is not None: 23 | self.init_bbox_head(bbox_head) 24 | 25 | if mask_head is not None: 26 | self.init_mask_head(mask_roi_extractor, mask_head) 27 | 28 | self.init_assigner_sampler() 29 | 30 | @property 31 | def with_bbox(self): 32 | """bool: whether the RoIHead has box head""" 33 | return hasattr(self, 'bbox_head') and self.bbox_head is not None 34 | 35 | @property 36 | def with_mask(self): 37 | """bool: whether the RoIHead has mask head""" 38 | return hasattr(self, 'mask_head') and self.mask_head is not None 39 | 40 | @abstractmethod 41 | def init_bbox_head(self): 42 | """Initialize the box head.""" 43 | pass 44 | 45 | @abstractmethod 46 | def init_mask_head(self): 47 | """Initialize maek head.""" 48 | pass 49 | 50 | @abstractmethod 51 | def init_assigner_sampler(self): 52 | """Initialize assigner and sampler.""" 53 | pass 54 | 55 | @abstractmethod 56 | def forward_train(self, 57 | x, 58 | img_metas, 59 | proposal_list, 60 | gt_bboxes, 61 | gt_labels, 62 | gt_bboxes_ignore=None, 63 | **kwargs): 64 | """Forward function during training. 65 | 66 | Args: 67 | x (dict): Contains features from the first stage. 68 | img_metas (list[dict]): Meta info of each image. 69 | proposal_list (list[dict]): Proposal information from rpn. 70 | gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): 71 | GT bboxes of each sample. The bboxes are encapsulated 72 | by 3D box structures. 73 | gt_labels (list[torch.LongTensor]): GT labels of each sample. 74 | gt_bboxes_ignore (list[torch.Tensor], optional): 75 | Ground truth boxes to be ignored. 76 | 77 | Returns: 78 | dict[str, torch.Tensor]: Losses from each head. 79 | """ 80 | pass 81 | 82 | def simple_test(self, 83 | x, 84 | proposal_list, 85 | img_metas, 86 | proposals=None, 87 | rescale=False, 88 | **kwargs): 89 | """Test without augmentation.""" 90 | pass 91 | 92 | def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs): 93 | """Test with augmentations. 94 | 95 | If rescale is False, then returned bboxes and masks will fit the scale 96 | of imgs[0]. 97 | """ 98 | pass 99 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert_stem(model_key, model_weight, state_dict, converted_names): 9 | new_key = model_key.replace('stem.conv', 'conv1') 10 | new_key = new_key.replace('stem.bn', 'bn1') 11 | state_dict[new_key] = model_weight 12 | converted_names.add(model_key) 13 | print(f'Convert {model_key} to {new_key}') 14 | 15 | 16 | def convert_head(model_key, model_weight, state_dict, converted_names): 17 | new_key = model_key.replace('head.fc', 'fc') 18 | state_dict[new_key] = model_weight 19 | converted_names.add(model_key) 20 | print(f'Convert {model_key} to {new_key}') 21 | 22 | 23 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 24 | split_keys = model_key.split('.') 25 | layer, block, module = split_keys[:3] 26 | block_id = int(block[1:]) 27 | layer_name = f'layer{int(layer[1:])}' 28 | block_name = f'{block_id - 1}' 29 | 30 | if block_id == 1 and module == 'bn': 31 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 32 | elif block_id == 1 and module == 'proj': 33 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 34 | elif module == 'f': 35 | if split_keys[3] == 'a_bn': 36 | module_name = 'bn1' 37 | elif split_keys[3] == 'b_bn': 38 | module_name = 'bn2' 39 | elif split_keys[3] == 'c_bn': 40 | module_name = 'bn3' 41 | elif split_keys[3] == 'a': 42 | module_name = 'conv1' 43 | elif split_keys[3] == 'b': 44 | module_name = 'conv2' 45 | elif split_keys[3] == 'c': 46 | module_name = 'conv3' 47 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 48 | else: 49 | raise ValueError(f'Unsupported conversion of key {model_key}') 50 | print(f'Convert {model_key} to {new_key}') 51 | state_dict[new_key] = model_weight 52 | converted_names.add(model_key) 53 | 54 | 55 | def convert(src, dst): 56 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 57 | # load caffe model 58 | regnet_model = torch.load(src) 59 | blobs = regnet_model['model_state'] 60 | # convert to pytorch style 61 | state_dict = OrderedDict() 62 | converted_names = set() 63 | for key, weight in blobs.items(): 64 | if 'stem' in key: 65 | convert_stem(key, weight, state_dict, converted_names) 66 | elif 'head' in key: 67 | convert_head(key, weight, state_dict, converted_names) 68 | elif key.startswith('s'): 69 | convert_reslayer(key, weight, state_dict, converted_names) 70 | 71 | # check if all layers are converted 72 | for key in blobs: 73 | if key not in converted_names: 74 | print(f'not converted: {key}') 75 | # save checkpoint 76 | checkpoint = dict() 77 | checkpoint['state_dict'] = state_dict 78 | torch.save(checkpoint, dst) 79 | 80 | 81 | def main(): 82 | parser = argparse.ArgumentParser(description='Convert model keys') 83 | parser.add_argument('src', help='src detectron model path') 84 | parser.add_argument('dst', help='save path') 85 | args = parser.parse_args() 86 | convert(args.src, args.dst) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /mmdet3d/models/utils/edge_indices.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def get_edge_indices(img_metas, 7 | downsample_ratio, 8 | step=1, 9 | pad_mode='default', 10 | dtype=np.float32, 11 | device='cpu'): 12 | """Function to filter the objects label outside the image. 13 | The edge_indices are generated using numpy on cpu rather 14 | than on CUDA due to the latency issue. When batch size = 8, 15 | this function with numpy array is ~8 times faster than that 16 | with CUDA tensor (0.09s and 0.72s in 100 runs). 17 | 18 | Args: 19 | img_metas (list[dict]): Meta information of each image, e.g., 20 | image size, scaling factor, etc. 21 | downsample_ratio (int): Downsample ratio of output feature, 22 | step (int, optional): Step size used for generateing 23 | edge indices. Default: 1. 24 | pad_mode (str, optional): Padding mode during data pipeline. 25 | Default: 'default'. 26 | dtype (torch.dtype, optional): Dtype of edge indices tensor. 27 | Default: np.float32. 28 | device (str, optional): Device of edge indices tensor. 29 | Default: 'cpu'. 30 | 31 | Returns: 32 | list[Tensor]: Edge indices for each image in batch data. 33 | """ 34 | edge_indices_list = [] 35 | for i in range(len(img_metas)): 36 | img_shape = img_metas[i]['img_shape'] 37 | pad_shape = img_metas[i]['pad_shape'] 38 | h, w = img_shape[:2] 39 | pad_h, pad_w = pad_shape 40 | edge_indices = [] 41 | 42 | if pad_mode == 'default': 43 | x_min = 0 44 | y_min = 0 45 | x_max = (w - 1) // downsample_ratio 46 | y_max = (h - 1) // downsample_ratio 47 | elif pad_mode == 'center': 48 | x_min = np.ceil((pad_w - w) / 2 * downsample_ratio) 49 | y_min = np.ceil((pad_h - h) / 2 * downsample_ratio) 50 | x_max = x_min + w // downsample_ratio 51 | y_max = y_min + h // downsample_ratio 52 | else: 53 | raise NotImplementedError 54 | 55 | # left 56 | y = np.arange(y_min, y_max, step, dtype=dtype) 57 | x = np.ones(len(y)) * x_min 58 | 59 | edge_indices_edge = np.stack((x, y), axis=1) 60 | edge_indices.append(edge_indices_edge) 61 | 62 | # bottom 63 | x = np.arange(x_min, x_max, step, dtype=dtype) 64 | y = np.ones(len(x)) * y_max 65 | 66 | edge_indices_edge = np.stack((x, y), axis=1) 67 | edge_indices.append(edge_indices_edge) 68 | 69 | # right 70 | y = np.arange(y_max, y_min, -step, dtype=dtype) 71 | x = np.ones(len(y)) * x_max 72 | 73 | edge_indices_edge = np.stack((x, y), axis=1) 74 | edge_indices.append(edge_indices_edge) 75 | 76 | # top 77 | x = np.arange(x_max, x_min, -step, dtype=dtype) 78 | y = np.ones(len(x)) * y_min 79 | 80 | edge_indices_edge = np.stack((x, y), axis=1) 81 | edge_indices.append(edge_indices_edge) 82 | 83 | edge_indices = \ 84 | np.concatenate([index for index in edge_indices], axis=0) 85 | edge_indices = torch.from_numpy(edge_indices).to(device).long() 86 | edge_indices_list.append(edge_indices) 87 | 88 | return edge_indices_list 89 | -------------------------------------------------------------------------------- /mmdet3d/models/utils/gen_keypoints.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from mmdet3d.core.bbox import points_cam2img 5 | 6 | 7 | def get_keypoints(gt_bboxes_3d_list, 8 | centers2d_list, 9 | img_metas, 10 | use_local_coords=True): 11 | """Function to filter the objects label outside the image. 12 | 13 | Args: 14 | gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image, 15 | shape (num_gt, 4). 16 | centers2d_list (list[Tensor]): Projected 3D centers onto 2D image, 17 | shape (num_gt, 2). 18 | img_metas (list[dict]): Meta information of each image, e.g., 19 | image size, scaling factor, etc. 20 | use_local_coords (bool, optional): Wheher to use local coordinates 21 | for keypoints. Default: True. 22 | 23 | Returns: 24 | tuple[list[Tensor]]: It contains two elements, the first is the 25 | keypoints for each projected 2D bbox in batch data. The second is 26 | the visible mask of depth calculated by keypoints. 27 | """ 28 | 29 | assert len(gt_bboxes_3d_list) == len(centers2d_list) 30 | bs = len(gt_bboxes_3d_list) 31 | keypoints2d_list = [] 32 | keypoints_depth_mask_list = [] 33 | 34 | for i in range(bs): 35 | gt_bboxes_3d = gt_bboxes_3d_list[i] 36 | centers2d = centers2d_list[i] 37 | img_shape = img_metas[i]['img_shape'] 38 | cam2img = img_metas[i]['cam2img'] 39 | h, w = img_shape[:2] 40 | # (N, 8, 3) 41 | corners3d = gt_bboxes_3d.corners 42 | top_centers3d = torch.mean(corners3d[:, [0, 1, 4, 5], :], dim=1) 43 | bot_centers3d = torch.mean(corners3d[:, [2, 3, 6, 7], :], dim=1) 44 | # (N, 2, 3) 45 | top_bot_centers3d = torch.stack((top_centers3d, bot_centers3d), dim=1) 46 | keypoints3d = torch.cat((corners3d, top_bot_centers3d), dim=1) 47 | # (N, 10, 2) 48 | keypoints2d = points_cam2img(keypoints3d, cam2img) 49 | 50 | # keypoints mask: keypoints must be inside 51 | # the image and in front of the camera 52 | keypoints_x_visible = (keypoints2d[..., 0] >= 0) & ( 53 | keypoints2d[..., 0] <= w - 1) 54 | keypoints_y_visible = (keypoints2d[..., 1] >= 0) & ( 55 | keypoints2d[..., 1] <= h - 1) 56 | keypoints_z_visible = (keypoints3d[..., -1] > 0) 57 | 58 | # (N, 1O) 59 | keypoints_visible = keypoints_x_visible & \ 60 | keypoints_y_visible & keypoints_z_visible 61 | # center, diag-02, diag-13 62 | keypoints_depth_valid = torch.stack( 63 | (keypoints_visible[:, [8, 9]].all(dim=1), 64 | keypoints_visible[:, [0, 3, 5, 6]].all(dim=1), 65 | keypoints_visible[:, [1, 2, 4, 7]].all(dim=1)), 66 | dim=1) 67 | keypoints_visible = keypoints_visible.float() 68 | 69 | if use_local_coords: 70 | keypoints2d = torch.cat((keypoints2d - centers2d.unsqueeze(1), 71 | keypoints_visible.unsqueeze(-1)), 72 | dim=2) 73 | else: 74 | keypoints2d = torch.cat( 75 | (keypoints2d, keypoints_visible.unsqueeze(-1)), dim=2) 76 | 77 | keypoints2d_list.append(keypoints2d) 78 | keypoints_depth_mask_list.append(keypoints_depth_valid) 79 | 80 | return (keypoints2d_list, keypoints_depth_mask_list) 81 | -------------------------------------------------------------------------------- /mmdet3d/models/dense_heads/base_mono3d_dense_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | from mmcv.runner import BaseModule 5 | 6 | 7 | class BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta): 8 | """Base class for Monocular 3D DenseHeads.""" 9 | 10 | def __init__(self, init_cfg=None): 11 | super(BaseMono3DDenseHead, self).__init__(init_cfg=init_cfg) 12 | 13 | @abstractmethod 14 | def loss(self, **kwargs): 15 | """Compute losses of the head.""" 16 | pass 17 | 18 | @abstractmethod 19 | def get_bboxes(self, **kwargs): 20 | """Transform network output for a batch into bbox predictions.""" 21 | pass 22 | 23 | def forward_train(self, 24 | x, 25 | img_metas, 26 | gt_bboxes, 27 | gt_labels=None, 28 | gt_bboxes_3d=None, 29 | gt_labels_3d=None, 30 | centers2d=None, 31 | depths=None, 32 | attr_labels=None, 33 | gt_bboxes_ignore=None, 34 | proposal_cfg=None, 35 | **kwargs): 36 | """ 37 | Args: 38 | x (list[Tensor]): Features from FPN. 39 | img_metas (list[dict]): Meta information of each image, e.g., 40 | image size, scaling factor, etc. 41 | gt_bboxes (list[Tensor]): Ground truth bboxes of the image, 42 | shape (num_gts, 4). 43 | gt_labels (list[Tensor]): Ground truth labels of each box, 44 | shape (num_gts,). 45 | gt_bboxes_3d (list[Tensor]): 3D ground truth bboxes of the image, 46 | shape (num_gts, self.bbox_code_size). 47 | gt_labels_3d (list[Tensor]): 3D ground truth labels of each box, 48 | shape (num_gts,). 49 | centers2d (list[Tensor]): Projected 3D center of each box, 50 | shape (num_gts, 2). 51 | depths (list[Tensor]): Depth of projected 3D center of each box, 52 | shape (num_gts,). 53 | attr_labels (list[Tensor]): Attribute labels of each box, 54 | shape (num_gts,). 55 | gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be 56 | ignored, shape (num_ignored_gts, 4). 57 | proposal_cfg (mmcv.Config): Test / postprocessing configuration, 58 | if None, test_cfg would be used 59 | 60 | Returns: 61 | tuple: 62 | losses: (dict[str, Tensor]): A dictionary of loss components. 63 | proposal_list (list[Tensor]): Proposals of each image. 64 | """ 65 | outs = self(x) 66 | if gt_labels is None: 67 | loss_inputs = outs + (gt_bboxes, gt_bboxes_3d, centers2d, depths, 68 | attr_labels, img_metas) 69 | else: 70 | loss_inputs = outs + (gt_bboxes, gt_labels, gt_bboxes_3d, 71 | gt_labels_3d, centers2d, depths, attr_labels, 72 | img_metas) 73 | losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 74 | if proposal_cfg is None: 75 | return losses 76 | else: 77 | proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg) 78 | return losses, proposal_list 79 | -------------------------------------------------------------------------------- /mmdet3d/models/necks/pointnet2_fp_neck.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.runner import BaseModule 3 | from torch import nn as nn 4 | 5 | from mmdet3d.ops import PointFPModule 6 | from ..builder import NECKS 7 | 8 | 9 | @NECKS.register_module() 10 | class PointNetFPNeck(BaseModule): 11 | r"""PointNet FP Module used in PointRCNN. 12 | 13 | Refer to the `official code `_. 14 | 15 | .. code-block:: none 16 | 17 | sa_n ---------------------------------------- 18 | | 19 | ... --------------------------------- | 20 | | | 21 | sa_1 ------------- | | 22 | | | | 23 | sa_0 -> fp_0 -> fp_module ->fp_1 -> ... -> fp_module -> fp_n 24 | 25 | sa_n including sa_xyz (torch.Tensor) and sa_features (torch.Tensor) 26 | fp_n including fp_xyz (torch.Tensor) and fp_features (torch.Tensor) 27 | 28 | Args: 29 | fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules. 30 | init_cfg (dict or list[dict], optional): Initialization config dict. 31 | Default: None 32 | """ 33 | 34 | def __init__(self, fp_channels, init_cfg=None): 35 | super(PointNetFPNeck, self).__init__(init_cfg=init_cfg) 36 | 37 | self.num_fp = len(fp_channels) 38 | self.FP_modules = nn.ModuleList() 39 | for cur_fp_mlps in fp_channels: 40 | self.FP_modules.append(PointFPModule(mlp_channels=cur_fp_mlps)) 41 | 42 | def _extract_input(self, feat_dict): 43 | """Extract inputs from features dictionary. 44 | 45 | Args: 46 | feat_dict (dict): Feature dict from backbone, which may contain 47 | the following keys and values: 48 | 49 | - sa_xyz (list[torch.Tensor]): Points of each sa module 50 | in shape (N, 3). 51 | - sa_features (list[torch.Tensor]): Output features of 52 | each sa module in shape (N, M). 53 | 54 | Returns: 55 | list[torch.Tensor]: Coordinates of multiple levels of points. 56 | list[torch.Tensor]: Features of multiple levels of points. 57 | """ 58 | sa_xyz = feat_dict['sa_xyz'] 59 | sa_features = feat_dict['sa_features'] 60 | assert len(sa_xyz) == len(sa_features) 61 | 62 | return sa_xyz, sa_features 63 | 64 | def forward(self, feat_dict): 65 | """Forward pass. 66 | 67 | Args: 68 | feat_dict (dict): Feature dict from backbone. 69 | 70 | Returns: 71 | dict[str, torch.Tensor]: Outputs of the Neck. 72 | 73 | - fp_xyz (torch.Tensor): The coordinates of fp features. 74 | - fp_features (torch.Tensor): The features from the last 75 | feature propagation layers. 76 | """ 77 | sa_xyz, sa_features = self._extract_input(feat_dict) 78 | 79 | fp_feature = sa_features[-1] 80 | fp_xyz = sa_xyz[-1] 81 | 82 | for i in range(self.num_fp): 83 | # consume the points in a bottom-up manner 84 | fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)], 85 | sa_features[-(i + 2)], fp_feature) 86 | fp_xyz = sa_xyz[-(i + 2)] 87 | 88 | ret = dict(fp_xyz=fp_xyz, fp_features=fp_feature) 89 | return ret 90 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from mmdet.core.bbox import BaseBBoxCoder 5 | from mmdet.core.bbox.builder import BBOX_CODERS 6 | 7 | 8 | @BBOX_CODERS.register_module() 9 | class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder): 10 | """Bbox Coder for 3D boxes. 11 | 12 | Args: 13 | code_size (int): The dimension of boxes to be encoded. 14 | """ 15 | 16 | def __init__(self, code_size=7): 17 | super(DeltaXYZWLHRBBoxCoder, self).__init__() 18 | self.code_size = code_size 19 | 20 | @staticmethod 21 | def encode(src_boxes, dst_boxes): 22 | """Get box regression transformation deltas (dx, dy, dz, dx_size, 23 | dy_size, dz_size, dr, dv*) that can be used to transform the 24 | `src_boxes` into the `target_boxes`. 25 | 26 | Args: 27 | src_boxes (torch.Tensor): source boxes, e.g., object proposals. 28 | dst_boxes (torch.Tensor): target of the transformation, e.g., 29 | ground-truth boxes. 30 | 31 | Returns: 32 | torch.Tensor: Box transformation deltas. 33 | """ 34 | box_ndim = src_boxes.shape[-1] 35 | cas, cgs, cts = [], [], [] 36 | if box_ndim > 7: 37 | xa, ya, za, wa, la, ha, ra, *cas = torch.split( 38 | src_boxes, 1, dim=-1) 39 | xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split( 40 | dst_boxes, 1, dim=-1) 41 | cts = [g - a for g, a in zip(cgs, cas)] 42 | else: 43 | xa, ya, za, wa, la, ha, ra = torch.split(src_boxes, 1, dim=-1) 44 | xg, yg, zg, wg, lg, hg, rg = torch.split(dst_boxes, 1, dim=-1) 45 | za = za + ha / 2 46 | zg = zg + hg / 2 47 | diagonal = torch.sqrt(la**2 + wa**2) 48 | xt = (xg - xa) / diagonal 49 | yt = (yg - ya) / diagonal 50 | zt = (zg - za) / ha 51 | lt = torch.log(lg / la) 52 | wt = torch.log(wg / wa) 53 | ht = torch.log(hg / ha) 54 | rt = rg - ra 55 | return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1) 56 | 57 | @staticmethod 58 | def decode(anchors, deltas): 59 | """Apply transformation `deltas` (dx, dy, dz, dx_size, dy_size, 60 | dz_size, dr, dv*) to `boxes`. 61 | 62 | Args: 63 | anchors (torch.Tensor): Parameters of anchors with shape (N, 7). 64 | deltas (torch.Tensor): Encoded boxes with shape 65 | (N, 7+n) [x, y, z, x_size, y_size, z_size, r, velo*]. 66 | 67 | Returns: 68 | torch.Tensor: Decoded boxes. 69 | """ 70 | cas, cts = [], [] 71 | box_ndim = anchors.shape[-1] 72 | if box_ndim > 7: 73 | xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1) 74 | xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(deltas, 1, dim=-1) 75 | else: 76 | xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1) 77 | xt, yt, zt, wt, lt, ht, rt = torch.split(deltas, 1, dim=-1) 78 | 79 | za = za + ha / 2 80 | diagonal = torch.sqrt(la**2 + wa**2) 81 | xg = xt * diagonal + xa 82 | yg = yt * diagonal + ya 83 | zg = zt * ha + za 84 | 85 | lg = torch.exp(lt) * la 86 | wg = torch.exp(wt) * wa 87 | hg = torch.exp(ht) * ha 88 | rg = rt + ra 89 | zg = zg - hg / 2 90 | cgs = [t + a for t, a in zip(cts, cas)] 91 | return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1) 92 | -------------------------------------------------------------------------------- /mmdet3d/apis/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from os import path as osp 3 | 4 | import mmcv 5 | import torch 6 | from mmcv.image import tensor2imgs 7 | 8 | from mmdet3d.models import (Base3DDetector, Base3DSegmentor, 9 | SingleStageMono3DDetector) 10 | 11 | 12 | def single_gpu_test(model, 13 | data_loader, 14 | show=False, 15 | out_dir=None, 16 | show_score_thr=0.3): 17 | """Test model with single gpu. 18 | 19 | This method tests model with single gpu and gives the 'show' option. 20 | By setting ``show=True``, it saves the visualization results under 21 | ``out_dir``. 22 | 23 | Args: 24 | model (nn.Module): Model to be tested. 25 | data_loader (nn.Dataloader): Pytorch data loader. 26 | show (bool, optional): Whether to save viualization results. 27 | Default: True. 28 | out_dir (str, optional): The path to save visualization results. 29 | Default: None. 30 | 31 | Returns: 32 | list[dict]: The prediction results. 33 | """ 34 | model.eval() 35 | results = [] 36 | dataset = data_loader.dataset 37 | prog_bar = mmcv.ProgressBar(len(dataset)) 38 | for i, data in enumerate(data_loader): 39 | with torch.no_grad(): 40 | result = model(return_loss=False, rescale=True, **data) 41 | 42 | if show: 43 | # Visualize the results of MMDetection3D model 44 | # 'show_results' is MMdetection3D visualization API 45 | models_3d = (Base3DDetector, Base3DSegmentor, 46 | SingleStageMono3DDetector) 47 | if isinstance(model.module, models_3d): 48 | model.module.show_results( 49 | data, 50 | result, 51 | out_dir=out_dir, 52 | show=show, 53 | score_thr=show_score_thr) 54 | # Visualize the results of MMDetection model 55 | # 'show_result' is MMdetection visualization API 56 | else: 57 | batch_size = len(result) 58 | if batch_size == 1 and isinstance(data['img'][0], 59 | torch.Tensor): 60 | img_tensor = data['img'][0] 61 | else: 62 | img_tensor = data['img'][0].data[0] 63 | img_metas = data['img_metas'][0].data[0] 64 | imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) 65 | assert len(imgs) == len(img_metas) 66 | 67 | for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): 68 | h, w, _ = img_meta['img_shape'] 69 | img_show = img[:h, :w, :] 70 | 71 | ori_h, ori_w = img_meta['ori_shape'][:-1] 72 | img_show = mmcv.imresize(img_show, (ori_w, ori_h)) 73 | 74 | if out_dir: 75 | out_file = osp.join(out_dir, img_meta['ori_filename']) 76 | else: 77 | out_file = None 78 | 79 | model.module.show_result( 80 | img_show, 81 | result[i], 82 | show=show, 83 | out_file=out_file, 84 | score_thr=show_score_thr) 85 | results.extend(result) 86 | 87 | batch_size = len(result) 88 | for _ in range(batch_size): 89 | prog_bar.update() 90 | return results 91 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/second.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.cnn import build_conv_layer, build_norm_layer 5 | from mmcv.runner import BaseModule 6 | from torch import nn as nn 7 | 8 | from ..builder import BACKBONES 9 | 10 | 11 | @BACKBONES.register_module() 12 | class SECOND(BaseModule): 13 | """Backbone network for SECOND/PointPillars/PartA2/MVXNet. 14 | 15 | Args: 16 | in_channels (int): Input channels. 17 | out_channels (list[int]): Output channels for multi-scale feature maps. 18 | layer_nums (list[int]): Number of layers in each stage. 19 | layer_strides (list[int]): Strides of each stage. 20 | norm_cfg (dict): Config dict of normalization layers. 21 | conv_cfg (dict): Config dict of convolutional layers. 22 | """ 23 | 24 | def __init__(self, 25 | in_channels=128, 26 | out_channels=[128, 128, 256], 27 | layer_nums=[3, 5, 5], 28 | layer_strides=[2, 2, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | conv_cfg=dict(type='Conv2d', bias=False), 31 | init_cfg=None, 32 | pretrained=None): 33 | super(SECOND, self).__init__(init_cfg=init_cfg) 34 | assert len(layer_strides) == len(layer_nums) 35 | assert len(out_channels) == len(layer_nums) 36 | 37 | in_filters = [in_channels, *out_channels[:-1]] 38 | # note that when stride > 1, conv2d with same padding isn't 39 | # equal to pad-conv2d. we should use pad-conv2d. 40 | blocks = [] 41 | for i, layer_num in enumerate(layer_nums): 42 | block = [ 43 | build_conv_layer( 44 | conv_cfg, 45 | in_filters[i], 46 | out_channels[i], 47 | 3, 48 | stride=layer_strides[i], 49 | padding=1), 50 | build_norm_layer(norm_cfg, out_channels[i])[1], 51 | nn.ReLU(inplace=True), 52 | ] 53 | for j in range(layer_num): 54 | block.append( 55 | build_conv_layer( 56 | conv_cfg, 57 | out_channels[i], 58 | out_channels[i], 59 | 3, 60 | padding=1)) 61 | block.append(build_norm_layer(norm_cfg, out_channels[i])[1]) 62 | block.append(nn.ReLU(inplace=True)) 63 | 64 | block = nn.Sequential(*block) 65 | blocks.append(block) 66 | 67 | self.blocks = nn.ModuleList(blocks) 68 | 69 | assert not (init_cfg and pretrained), \ 70 | 'init_cfg and pretrained cannot be setting at the same time' 71 | if isinstance(pretrained, str): 72 | warnings.warn('DeprecationWarning: pretrained is a deprecated, ' 73 | 'please use "init_cfg" instead') 74 | self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) 75 | else: 76 | self.init_cfg = dict(type='Kaiming', layer='Conv2d') 77 | 78 | def forward(self, x): 79 | """Forward function. 80 | 81 | Args: 82 | x (torch.Tensor): Input with shape (N, C, H, W). 83 | 84 | Returns: 85 | tuple[torch.Tensor]: Multi-scale features. 86 | """ 87 | outs = [] 88 | for i in range(len(self.blocks)): 89 | x = self.blocks[i](x) 90 | outs.append(x) 91 | return tuple(outs) 92 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/nostem_regnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.backbones import RegNet 3 | from ..builder import BACKBONES 4 | 5 | 6 | @BACKBONES.register_module() 7 | class NoStemRegNet(RegNet): 8 | """RegNet backbone without Stem for 3D detection. 9 | 10 | More details can be found in `paper `_ . 11 | 12 | Args: 13 | arch (dict): The parameter of RegNets. 14 | - w0 (int): Initial width. 15 | - wa (float): Slope of width. 16 | - wm (float): Quantization parameter to quantize the width. 17 | - depth (int): Depth of the backbone. 18 | - group_w (int): Width of group. 19 | - bot_mul (float): Bottleneck ratio, i.e. expansion of bottleneck. 20 | strides (Sequence[int]): Strides of the first block of each stage. 21 | base_channels (int): Base channels after stem layer. 22 | in_channels (int): Number of input image channels. Normally 3. 23 | dilations (Sequence[int]): Dilation of each stage. 24 | out_indices (Sequence[int]): Output from which stages. 25 | style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two 26 | layer is the 3x3 conv layer, otherwise the stride-two layer is 27 | the first 1x1 conv layer. 28 | frozen_stages (int): Stages to be frozen (all param fixed). -1 means 29 | not freezing any parameters. 30 | norm_cfg (dict): Dictionary to construct and config norm layer. 31 | norm_eval (bool): Whether to set norm layers to eval mode, namely, 32 | freeze running stats (mean and var). Note: Effect on Batch Norm 33 | and its variants only. 34 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 35 | memory while slowing down the training speed. 36 | zero_init_residual (bool): Whether to use zero init for last norm layer 37 | in resblocks to let them behave as identity. 38 | 39 | Example: 40 | >>> from mmdet3d.models import NoStemRegNet 41 | >>> import torch 42 | >>> self = NoStemRegNet( 43 | arch=dict( 44 | w0=88, 45 | wa=26.31, 46 | wm=2.25, 47 | group_w=48, 48 | depth=25, 49 | bot_mul=1.0)) 50 | >>> self.eval() 51 | >>> inputs = torch.rand(1, 64, 16, 16) 52 | >>> level_outputs = self.forward(inputs) 53 | >>> for level_out in level_outputs: 54 | ... print(tuple(level_out.shape)) 55 | (1, 96, 8, 8) 56 | (1, 192, 4, 4) 57 | (1, 432, 2, 2) 58 | (1, 1008, 1, 1) 59 | """ 60 | 61 | def __init__(self, arch, init_cfg=None, **kwargs): 62 | super(NoStemRegNet, self).__init__(arch, init_cfg=init_cfg, **kwargs) 63 | 64 | def _make_stem_layer(self, in_channels, base_channels): 65 | """Override the original function that do not initialize a stem layer 66 | since 3D detector's voxel encoder works like a stem layer.""" 67 | return 68 | 69 | def forward(self, x): 70 | """Forward function of backbone. 71 | 72 | Args: 73 | x (torch.Tensor): Features in shape (N, C, H, W). 74 | 75 | Returns: 76 | tuple[torch.Tensor]: Multi-scale features. 77 | """ 78 | outs = [] 79 | for i, layer_name in enumerate(self.res_layers): 80 | res_layer = getattr(self, layer_name) 81 | x = res_layer(x) 82 | if i in self.out_indices: 83 | outs.append(x) 84 | return tuple(outs) 85 | --------------------------------------------------------------------------------