├── data └── .gitkeep ├── mmdet3d ├── __init__.py ├── models │ ├── utils │ │ └── __init__.py │ ├── heads │ │ ├── segm │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── bbox │ │ │ └── __init__.py │ ├── temporal │ │ └── __init__.py │ ├── vtransforms │ │ ├── __init__.py │ │ └── lss.py │ ├── fusers │ │ ├── __init__.py │ │ ├── concat.py │ │ ├── conv.py │ │ └── add.py │ ├── fusion_models │ │ └── __init__.py │ ├── necks │ │ ├── __init__.py │ │ └── lss.py │ ├── losses │ │ └── __init__.py │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ └── resnet.py │ └── builder.py ├── ops │ ├── bev_pool │ │ ├── __init__.py │ │ └── bev_pool.py │ ├── knn │ │ ├── __init__.py │ │ ├── src │ │ │ └── knn.cpp │ │ └── knn.py │ ├── ball_query │ │ ├── __init__.py │ │ ├── ball_query.py │ │ └── src │ │ │ ├── ball_query.cpp │ │ │ └── ball_query_cuda.cu │ ├── gather_points │ │ ├── __init__.py │ │ ├── gather_points.py │ │ └── src │ │ │ └── gather_points.cpp │ ├── iou3d │ │ ├── __init__.py │ │ └── iou3d_utils.py │ ├── interpolate │ │ ├── __init__.py │ │ ├── three_nn.py │ │ ├── three_interpolate.py │ │ └── src │ │ │ └── three_nn_cuda.cu │ ├── group_points │ │ ├── __init__.py │ │ └── src │ │ │ └── group_points.cpp │ ├── paconv │ │ ├── __init__.py │ │ └── src │ │ │ └── assign_score_withk.cpp │ ├── voxel │ │ ├── __init__.py │ │ └── src │ │ │ └── voxelization.cpp │ ├── furthest_point_sample │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── furthest_point_sample.py │ │ └── src │ │ │ └── furthest_point_sample.cpp │ ├── roiaware_pool3d │ │ ├── __init__.py │ │ └── src │ │ │ └── points_in_boxes_cpu.cpp │ ├── spconv │ │ └── __init__.py │ ├── pointnet_modules │ │ ├── __init__.py │ │ ├── builder.py │ │ └── point_fp_module.py │ └── __init__.py ├── apis │ ├── __init__.py │ └── test.py ├── core │ ├── utils │ │ ├── __init__.py │ │ └── gaussian.py │ ├── voxel │ │ ├── __init__.py │ │ └── builder.py │ ├── bbox │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── match_cost.py │ │ ├── assigners │ │ │ └── __init__.py │ │ ├── iou_calculators │ │ │ └── __init__.py │ │ ├── samplers │ │ │ └── __init__.py │ │ ├── coders │ │ │ └── __init__.py │ │ ├── structures │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── util.py │ ├── __init__.py │ ├── anchor │ │ └── __init__.py │ ├── post_processing │ │ └── __init__.py │ └── points │ │ ├── __init__.py │ │ ├── cam_points.py │ │ ├── depth_points.py │ │ └── lidar_points.py ├── runner │ ├── __init__.py │ └── epoch_based_runner.py ├── datasets │ ├── pipelines │ │ └── __init__.py │ ├── __init__.py │ ├── utils.py │ └── builder.py └── utils │ ├── __init__.py │ ├── syncbn.py │ ├── config.py │ └── logger.py ├── tools ├── data_converter │ └── __init__.py ├── preprocessing │ └── __init__.py ├── download_pretrained.sh ├── visualization │ ├── utils │ │ └── __init__.py │ ├── __init__.py │ └── create_video.py ├── convert_checkpoints_to_torchsparse.py ├── create_swint_checkpoint.py ├── create_combined_checkpoint.py └── debug_train.py ├── pyproject.toml ├── requirements-visual.txt ├── requirements-dev.txt ├── configs ├── osdar23 │ ├── baseline │ │ ├── centerhead │ │ │ ├── camera │ │ │ │ └── default.yaml │ │ │ └── default.yaml │ │ ├── transfusion │ │ │ ├── lidar │ │ │ │ ├── default.yaml │ │ │ │ ├── voxelnet-1600g-0xy16-0z4.yaml │ │ │ │ └── voxelnet-1600g-0xy16-0z4-gtp15.yaml │ │ │ └── default.yaml │ │ └── default.yaml │ ├── temporal │ │ ├── transfusion │ │ │ └── lidar │ │ │ │ ├── default.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml │ │ ├── centerhead │ │ │ ├── camera │ │ │ │ └── default.yaml │ │ │ └── default.yaml │ │ └── default.yaml │ └── temporal-gru │ │ ├── transfusion │ │ ├── lidar │ │ │ ├── default.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml │ │ └── default.yaml │ │ ├── centerhead │ │ ├── camera │ │ │ └── default.yaml │ │ └── default.yaml │ │ └── default.yaml ├── tumtraf-i │ ├── baseline │ │ ├── centerhead │ │ │ ├── camera │ │ │ │ └── default.yaml │ │ │ └── default.yaml │ │ ├── transfusion │ │ │ └── lidar │ │ │ │ ├── default.yaml │ │ │ │ ├── voxelnet-1600g-0xy1-0z20.yaml │ │ │ │ └── voxelnet-1600g-0xy1-0z20-gtp15.yaml │ │ └── default.yaml │ ├── temporal │ │ ├── transfusion │ │ │ └── lidar │ │ │ │ ├── default.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml │ │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml │ │ │ │ └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml │ │ ├── centerhead │ │ │ ├── camera │ │ │ │ └── default.yaml │ │ │ └── default.yaml │ │ └── default.yaml │ └── temporal-gru │ │ ├── transfusion │ │ └── lidar │ │ │ ├── default.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml │ │ │ ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml │ │ │ └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml │ │ └── default.yaml └── default.yaml ├── docs └── figures │ └── teaser.jpg ├── setup.cfg ├── requirements.txt ├── Makefile ├── Dockerfile.dev └── Dockerfile.prod /data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet3d/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | -------------------------------------------------------------------------------- /mmdet3d/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import * 2 | -------------------------------------------------------------------------------- /mmdet3d/models/heads/segm/__init__.py: -------------------------------------------------------------------------------- 1 | from .vanilla import * 2 | -------------------------------------------------------------------------------- /mmdet3d/ops/bev_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .bev_pool import bev_pool 2 | -------------------------------------------------------------------------------- /mmdet3d/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .test import * 2 | from .train import * 3 | -------------------------------------------------------------------------------- /requirements-visual.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | pytransform3d 3 | opencv-python -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | mypy 2 | flake8 3 | black 4 | isort 5 | pydocstyle 6 | -------------------------------------------------------------------------------- /mmdet3d/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox import * 2 | from .segm import * 3 | -------------------------------------------------------------------------------- /mmdet3d/models/temporal/__init__.py: -------------------------------------------------------------------------------- 1 | from .gru import * 2 | from .lstm import * 3 | -------------------------------------------------------------------------------- /mmdet3d/ops/knn/__init__.py: -------------------------------------------------------------------------------- 1 | from .knn import knn 2 | 3 | __all__ = ["knn"] 4 | -------------------------------------------------------------------------------- /mmdet3d/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .gaussian import * 2 | from .visualize import * 3 | -------------------------------------------------------------------------------- /mmdet3d/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .epoch_based_runner import CustomEpochBasedRunner 2 | -------------------------------------------------------------------------------- /mmdet3d/models/vtransforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .lss import * 2 | from .depth_lss import * 3 | -------------------------------------------------------------------------------- /configs/osdar23/baseline/centerhead/camera/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | lidar: null 4 | -------------------------------------------------------------------------------- /configs/osdar23/baseline/transfusion/lidar/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | camera: null 4 | -------------------------------------------------------------------------------- /configs/osdar23/temporal/transfusion/lidar/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | camera: null 4 | -------------------------------------------------------------------------------- /mmdet3d/models/heads/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .centerpoint import * 2 | from .transfusion import * 3 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/transfusion/lidar/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | camera: null 4 | -------------------------------------------------------------------------------- /configs/tumtraf-i/baseline/centerhead/camera/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | lidar: null 4 | -------------------------------------------------------------------------------- /configs/tumtraf-i/baseline/transfusion/lidar/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | camera: null 4 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | camera: null 4 | -------------------------------------------------------------------------------- /mmdet3d/models/fusers/__init__.py: -------------------------------------------------------------------------------- 1 | from .add import * 2 | from .concat import * 3 | from .conv import * 4 | -------------------------------------------------------------------------------- /mmdet3d/ops/ball_query/__init__.py: -------------------------------------------------------------------------------- 1 | from .ball_query import ball_query 2 | 3 | __all__ = ["ball_query"] 4 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | camera: null 4 | -------------------------------------------------------------------------------- /docs/figures/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/egemenkopuz/temporal-bevfusion/HEAD/docs/figures/teaser.jpg -------------------------------------------------------------------------------- /mmdet3d/models/fusion_models/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | from .bevfusion import * 3 | from .tbevfusion import * 4 | -------------------------------------------------------------------------------- /mmdet3d/ops/gather_points/__init__.py: -------------------------------------------------------------------------------- 1 | from .gather_points import gather_points 2 | 3 | __all__ = ["gather_points"] 4 | -------------------------------------------------------------------------------- /mmdet3d/ops/iou3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .iou3d_utils import boxes_iou_bev, nms_gpu, nms_normal_gpu 2 | 3 | __all__ = ["boxes_iou_bev", "nms_gpu", "nms_normal_gpu"] 4 | -------------------------------------------------------------------------------- /tools/download_pretrained.sh: -------------------------------------------------------------------------------- 1 | mkdir pretrained && 2 | cd pretrained && 3 | wget https://bevfusion.mit.edu/files/pretrained_updated/swint-nuimages-pretrained.pth 4 | -------------------------------------------------------------------------------- /configs/osdar23/temporal/centerhead/camera/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | lidar: null 4 | temporal: 5 | in_channels: 80 6 | hidden_channels: [80] 7 | -------------------------------------------------------------------------------- /tools/visualization/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .osdar23_meta import OSDAR23Meta 2 | from .tumtraf_meta import TUMTrafMeta 3 | 4 | __all__ = ["TUMTrafMeta", "OSDAR23Meta"] 5 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/centerhead/camera/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | lidar: null 4 | temporal: 5 | in_channels: 80 6 | hidden_channels: [80] 7 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/centerhead/camera/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | encoders: 3 | lidar: null 4 | temporal: 5 | in_channels: 80 6 | hidden_channels: [80] 7 | -------------------------------------------------------------------------------- /mmdet3d/ops/interpolate/__init__.py: -------------------------------------------------------------------------------- 1 | from .three_interpolate import three_interpolate 2 | from .three_nn import three_nn 3 | 4 | __all__ = ["three_nn", "three_interpolate"] 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/group_points/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_points import GroupAll, QueryAndGroup, grouping_operation 2 | 3 | __all__ = ["QueryAndGroup", "GroupAll", "grouping_operation"] 4 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_voxel_generator 2 | from .voxel_generator import VoxelGenerator 3 | 4 | __all__ = ["build_voxel_generator", "VoxelGenerator"] 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/paconv/__init__.py: -------------------------------------------------------------------------------- 1 | from .assign_score import assign_score_withk 2 | from .paconv import PAConv, PAConvCUDA 3 | 4 | __all__ = ["assign_score_withk", "PAConv", "PAConvCUDA"] 5 | -------------------------------------------------------------------------------- /mmdet3d/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.necks.fpn import FPN 2 | 3 | from .lss import * 4 | from .second import * 5 | from .generalized_lss import * 6 | from .detectron_fpn import * -------------------------------------------------------------------------------- /mmdet3d/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost 3 | 4 | __all__ = ["build_match_cost", "BBox3DL1Cost"] 5 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets.pipelines import Compose 2 | 3 | from .dbsampler import * 4 | from .formating import * 5 | from .loading import * 6 | from .transforms_3d import * 7 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy 2 | 3 | __all__ = [ 4 | "FocalLoss", 5 | "SmoothL1Loss", 6 | "binary_cross_entropy", 7 | ] 8 | -------------------------------------------------------------------------------- /configs/osdar23/baseline/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: BEVFusion 3 | encoders: null 4 | fuser: null 5 | heads: 6 | map: null 7 | 8 | temporal_mode: false 9 | val_online_mode: false 10 | test_online_mode: false 11 | -------------------------------------------------------------------------------- /configs/tumtraf-i/baseline/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: BEVFusion 3 | encoders: null 4 | fuser: null 5 | heads: 6 | map: null 7 | 8 | temporal_mode: false 9 | val_online_mode: false 10 | test_online_mode: false 11 | -------------------------------------------------------------------------------- /mmdet3d/ops/voxel/__init__.py: -------------------------------------------------------------------------------- 1 | from .scatter_points import DynamicScatter, dynamic_scatter 2 | from .voxelize import Voxelization, voxelization 3 | 4 | __all__ = ["Voxelization", "voxelization", "dynamic_scatter", "DynamicScatter"] 5 | -------------------------------------------------------------------------------- /mmdet3d/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .builder import * 3 | from .fusers import * 4 | from .fusion_models import * 5 | from .heads import * 6 | from .losses import * 7 | from .necks import * 8 | from .temporal import * 9 | from .vtransforms import * 10 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt 2 | 3 | from .resnet import * 4 | from .second import * 5 | from .sparse_encoder import * 6 | from .pillar_encoder import * 7 | from .vovnet import * 8 | from .dla import * -------------------------------------------------------------------------------- /mmdet3d/ops/furthest_point_sample/__init__.py: -------------------------------------------------------------------------------- 1 | from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist 2 | from .points_sampler import Points_Sampler 3 | 4 | __all__ = ["furthest_point_sample", "furthest_point_sample_with_dist", "Points_Sampler"] 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/roiaware_pool3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .points_in_boxes import points_in_boxes_batch, points_in_boxes_cpu, points_in_boxes_gpu 2 | from .roiaware_pool3d import RoIAwarePool3d 3 | 4 | __all__ = ["RoIAwarePool3d", "points_in_boxes_gpu", "points_in_boxes_cpu", "points_in_boxes_batch"] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .points import * # noqa: F401, F403 4 | from .post_processing import * # noqa: F401, F403 5 | from .utils import * # noqa: F401, F403 6 | from .voxel import * # noqa: F401, F403 7 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner 2 | from .hungarian_assigner import HungarianAssigner3D, HeuristicAssigner3D 3 | 4 | __all__ = ["BaseAssigner", "MaxIoUAssigner", "AssignResult", "HungarianAssigner3D", "HeuristicAssigner3D"] 5 | -------------------------------------------------------------------------------- /mmdet3d/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets.builder import build_dataloader 2 | 3 | from .builder import * 4 | from .custom_3d import * 5 | from .nuscenes_dataset import * 6 | from .osdar23_dataset import * 7 | from .pipelines import * 8 | from .tumtraf_intersection_dataset import * 9 | from .utils import * 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | profile = black 3 | 4 | [flake8] 5 | max-line-length = 100 6 | ignore = E203, E402, W503, W504, F821, E501 7 | exclude = venv,demo 8 | 9 | [mypy] 10 | ignore_missing_imports = True 11 | disallow_untyped_defs = True 12 | exclude = venv|docs|demo 13 | 14 | [pydocstyle] 15 | ignore = D100 16 | -------------------------------------------------------------------------------- /mmdet3d/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg, print_log 2 | 3 | from .logger import get_root_logger 4 | from .syncbn import convert_sync_batchnorm 5 | from .config import recursive_eval 6 | 7 | __all__ = ["Registry", "build_from_cfg", "get_root_logger", "print_log", "convert_sync_batchnorm", "recursive_eval"] 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow==8.4.0 2 | numpy==1.19.5 3 | tqdm 4 | torchpack 5 | mmcv==1.4.0 6 | mmcv-full==1.4.0 7 | mmdet==2.20.0 8 | nuscenes-devkit 9 | mpi4py==3.0.3 10 | numba==0.48.0 11 | git+https://github.com/DanielPollithy/pypcd.git 12 | git+https://github.com/facebookresearch/pytorch3d.git@stable 13 | optuna==3.3.0 14 | pandas==1.3.4 15 | plotly==5.17.0 16 | kaleido==0.2.1 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | 3 | all: dev 4 | 5 | dev: install-pkgs install-dev 6 | prod: install-pkgs install-prod 7 | 8 | install-pkgs: 9 | pip install --extra-index-url http://24.199.104.228/simple --trusted-host 24.199.104.228 torchsparse==2.1.0+torch110cu113 --force-reinstall 10 | pip install -r requirements.txt 11 | 12 | install-dev: 13 | python setup.py develop 14 | 15 | install-prod: 16 | python setup.py install -------------------------------------------------------------------------------- /mmdet3d/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.anchor import build_prior_generator 2 | from .anchor_3d_generator import ( 3 | AlignedAnchor3DRangeGenerator, 4 | AlignedAnchor3DRangeGeneratorPerCls, 5 | Anchor3DRangeGenerator, 6 | ) 7 | 8 | __all__ = [ 9 | "AlignedAnchor3DRangeGenerator", 10 | "Anchor3DRangeGenerator", 11 | "build_prior_generator", 12 | "AlignedAnchor3DRangeGeneratorPerCls", 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet3d/models/fusers/concat.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | 5 | from mmdet3d.models.builder import FUSERS 6 | 7 | __all__ = ["ConcatFuser"] 8 | 9 | 10 | @FUSERS.register_module() 11 | class ConcatFuser: 12 | def __init__(self, dim: int = 1) -> None: 13 | self.dim = dim 14 | 15 | def __call__(self, inputs: List[torch.Tensor]) -> torch.Tensor: 16 | return torch.cat(inputs, dim=self.dim) 17 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import voxel_generator 4 | 5 | 6 | def build_voxel_generator(cfg, **kwargs): 7 | """Builder of voxel generator.""" 8 | if isinstance(cfg, voxel_generator.VoxelGenerator): 9 | return cfg 10 | elif isinstance(cfg, dict): 11 | return mmcv.runner.obj_from_dict(cfg, voxel_generator, default_args=kwargs) 12 | else: 13 | raise TypeError("Invalid type {} for building a sampler".format(type(cfg))) 14 | -------------------------------------------------------------------------------- /mmdet3d/utils/syncbn.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | from collections import deque 4 | 5 | 6 | __all__ = ["convert_sync_batchnorm"] 7 | 8 | 9 | def convert_sync_batchnorm(input_model, exclude=[]): 10 | for name, module in input_model._modules.items(): 11 | skip = sum([ex in name for ex in exclude]) 12 | if skip: 13 | continue 14 | input_model._modules[name] = torch.nn.SyncBatchNorm.convert_sync_batchnorm(module) 15 | return input_model 16 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | from .iou3d_calculator import ( 2 | AxisAlignedBboxOverlaps3D, 3 | BboxOverlaps3D, 4 | BboxOverlapsNearest3D, 5 | axis_aligned_bbox_overlaps_3d, 6 | bbox_overlaps_3d, 7 | bbox_overlaps_nearest_3d, 8 | ) 9 | 10 | __all__ = [ 11 | "BboxOverlapsNearest3D", 12 | "BboxOverlaps3D", 13 | "bbox_overlaps_nearest_3d", 14 | "bbox_overlaps_3d", 15 | "AxisAlignedBboxOverlaps3D", 16 | "axis_aligned_bbox_overlaps_3d", 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/__init__.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from mmcv.cnn.bricks.registry import CONV_LAYERS, NORM_LAYERS 4 | from torch.nn.parameter import Parameter 5 | 6 | 7 | def register_torchsparse(): 8 | """This func registers torchsparse ops.""" 9 | from torchsparse.nn import BatchNorm, Conv3d 10 | 11 | CONV_LAYERS._register_module(Conv3d, "TorchSparseConv3d", force=True) 12 | NORM_LAYERS._register_module(BatchNorm, "TorchSparseBatchNorm", force=True) 13 | 14 | 15 | register_torchsparse() 16 | -------------------------------------------------------------------------------- /mmdet3d/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import EpochBasedRunner 2 | from mmcv.runner.builder import RUNNERS 3 | 4 | @RUNNERS.register_module() 5 | class CustomEpochBasedRunner(EpochBasedRunner): 6 | def set_dataset(self, dataset): 7 | self._dataset = dataset 8 | 9 | 10 | def train(self, data_loader, **kwargs): 11 | # update the schedule for data augmentation 12 | for dataset in self._dataset: 13 | dataset.set_epoch(self.epoch) 14 | super().train(data_loader, **kwargs) 15 | -------------------------------------------------------------------------------- /mmdet3d/apis/test.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import torch 3 | 4 | 5 | def single_gpu_test(model, data_loader): 6 | model.eval() 7 | results = [] 8 | dataset = data_loader.dataset 9 | prog_bar = mmcv.ProgressBar(len(dataset)) 10 | for data in data_loader: 11 | with torch.no_grad(): 12 | result = model(return_loss=False, rescale=True, **data) 13 | results.extend(result) 14 | 15 | batch_size = len(result) 16 | for _ in range(batch_size): 17 | prog_bar.update() 18 | return results 19 | -------------------------------------------------------------------------------- /mmdet3d/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, 2 | merge_aug_proposals, merge_aug_scores, 3 | multiclass_nms) 4 | 5 | from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms, circle_nms 6 | 7 | __all__ = [ 8 | "multiclass_nms", 9 | "merge_aug_proposals", 10 | "merge_aug_bboxes", 11 | "merge_aug_scores", 12 | "merge_aug_masks", 13 | "box3d_multiclass_nms", 14 | "aligned_3d_nms", 15 | "circle_nms", 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_sa_module 2 | from .paconv_sa_module import ( 3 | PAConvCUDASAModule, 4 | PAConvCUDASAModuleMSG, 5 | PAConvSAModule, 6 | PAConvSAModuleMSG, 7 | ) 8 | from .point_fp_module import PointFPModule 9 | from .point_sa_module import PointSAModule, PointSAModuleMSG 10 | 11 | __all__ = [ 12 | "build_sa_module", 13 | "PointSAModuleMSG", 14 | "PointSAModule", 15 | "PointFPModule", 16 | "PAConvSAModule", 17 | "PAConvSAModuleMSG", 18 | "PAConvCUDASAModule", 19 | "PAConvCUDASAModuleMSG", 20 | ] 21 | -------------------------------------------------------------------------------- /mmdet3d/ops/voxel/src/voxelization.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "voxelization.h" 3 | 4 | namespace voxelization { 5 | 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 7 | m.def("hard_voxelize", &hard_voxelize, "hard voxelize"); 8 | m.def("dynamic_voxelize", &dynamic_voxelize, "dynamic voxelization"); 9 | m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward, "dynamic point to voxel forward"); 10 | m.def("dynamic_point_to_voxel_backward", &dynamic_point_to_voxel_backward, "dynamic point to voxel backward"); 11 | } 12 | 13 | } // namespace voxelization 14 | -------------------------------------------------------------------------------- /configs/default.yaml: -------------------------------------------------------------------------------- 1 | seed: 1337 2 | deterministic: false 3 | 4 | checkpoint_config: 5 | interval: 1 6 | max_keep_ckpts: 5 7 | 8 | log_config: 9 | interval: 50 10 | hooks: 11 | - type: TextLoggerHook 12 | - type: TensorboardLoggerHook 13 | 14 | load_from: null 15 | resume_from: null 16 | 17 | cudnn_benchmark: false 18 | fp16: 19 | loss_scale: 20 | growth_interval: 2000 21 | 22 | data: 23 | samples_per_gpu: ${samples_per_gpu} 24 | workers_per_gpu: ${workers_per_gpu} 25 | 26 | max_epochs: 20 27 | runner: 28 | type: CustomEpochBasedRunner 29 | max_epochs: ${max_epochs} 30 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.samplers import ( 2 | BaseSampler, 3 | CombinedSampler, 4 | InstanceBalancedPosSampler, 5 | IoUBalancedNegSampler, 6 | OHEMSampler, 7 | PseudoSampler, 8 | RandomSampler, 9 | SamplingResult, 10 | ) 11 | from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler 12 | 13 | __all__ = [ 14 | "BaseSampler", 15 | "PseudoSampler", 16 | "RandomSampler", 17 | "InstanceBalancedPosSampler", 18 | "IoUBalancedNegSampler", 19 | "CombinedSampler", 20 | "OHEMSampler", 21 | "SamplingResult", 22 | "IoUNegPiecewiseSampler", 23 | ] 24 | -------------------------------------------------------------------------------- /mmdet3d/utils/config.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | __all__ = ["recursive_eval"] 4 | 5 | 6 | def recursive_eval(obj, globals=None): 7 | if globals is None: 8 | globals = copy.deepcopy(obj) 9 | 10 | if isinstance(obj, dict): 11 | for key in obj: 12 | obj[key] = recursive_eval(obj[key], globals) 13 | elif isinstance(obj, list): 14 | for k, val in enumerate(obj): 15 | obj[k] = recursive_eval(val, globals) 16 | elif isinstance(obj, str) and obj.startswith("${") and obj.endswith("}"): 17 | obj = eval(obj[2:-1], globals) 18 | obj = recursive_eval(obj, globals) 19 | 20 | return obj 21 | -------------------------------------------------------------------------------- /tools/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .osdar23_plot_3d_boxes import osdar23_plot_3d_boxes 2 | from .osdar23_plot_image_w_labels import osdar23_plot_image_w_labels 3 | from .osdar23_plot_image_w_lidar_points import osdar23_plot_image_w_lidar_points 4 | from .tumtraf_plot_3d_boxes import tumtraf_plot_3d_boxes 5 | from .tumtraf_plot_image_w_labels import tumtraf_plot_image_w_labels 6 | from .tumtraf_plot_image_w_lidar_points import tumtraf_plot_image_w_lidar_points 7 | 8 | __all__ = [ 9 | "tumtraf_plot_3d_boxes", 10 | "tumtraf_plot_image_w_labels", 11 | "tumtraf_plot_image_w_lidar_points", 12 | "osdar23_plot_3d_boxes", 13 | "osdar23_plot_image_w_labels", 14 | "osdar23_plot_image_w_lidar_points", 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet3d/models/fusers/conv.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch import nn 5 | 6 | from mmdet3d.models.builder import FUSERS 7 | 8 | __all__ = ["ConvFuser"] 9 | 10 | 11 | @FUSERS.register_module() 12 | class ConvFuser(nn.Sequential): 13 | def __init__(self, in_channels: int, out_channels: int) -> None: 14 | self.in_channels = in_channels 15 | self.out_channels = out_channels 16 | super().__init__( 17 | nn.Conv2d(sum(in_channels), out_channels, 3, padding=1, bias=False), 18 | nn.BatchNorm2d(out_channels), 19 | nn.ReLU(True), 20 | ) 21 | 22 | def forward(self, inputs: List[torch.Tensor]) -> torch.Tensor: 23 | return super().forward(torch.cat(inputs, dim=1)) 24 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox import build_bbox_coder 2 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder 3 | from .centerpoint_bbox_coders import CenterPointBBoxCoder 4 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder 5 | from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder 6 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder 7 | from .nms_free_coder import NMSFreeCoder 8 | from .transfusion_bbox_coder import TransFusionBBoxCoder 9 | 10 | __all__ = [ 11 | "build_bbox_coder", 12 | "DeltaXYZWLHRBBoxCoder", 13 | "PartialBinBasedBBoxCoder", 14 | "CenterPointBBoxCoder", 15 | "AnchorFreeBBoxCoder", 16 | "GroupFree3DBBoxCoder", 17 | "NMSFreeCoder", 18 | "TransFusionBBoxCoder" 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_box3d import BaseInstance3DBoxes 2 | from .box_3d_mode import Box3DMode 3 | from .cam_box3d import CameraInstance3DBoxes 4 | from .coord_3d_mode import Coord3DMode 5 | from .depth_box3d import DepthInstance3DBoxes 6 | from .lidar_box3d import LiDARInstance3DBoxes 7 | from .utils import ( 8 | get_box_type, 9 | get_proj_mat_by_coord_type, 10 | limit_period, 11 | mono_cam_box2vis, 12 | points_cam2img, 13 | rotation_3d_in_axis, 14 | xywhr2xyxyr, 15 | ) 16 | 17 | __all__ = [ 18 | "Box3DMode", 19 | "BaseInstance3DBoxes", 20 | "LiDARInstance3DBoxes", 21 | "CameraInstance3DBoxes", 22 | "DepthInstance3DBoxes", 23 | "xywhr2xyxyr", 24 | "get_box_type", 25 | "rotation_3d_in_axis", 26 | "limit_period", 27 | "points_cam2img", 28 | "Coord3DMode", 29 | "mono_cam_box2vis", 30 | "get_proj_mat_by_coord_type", 31 | ] 32 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 2 | from .coders import DeltaXYZWLHRBBoxCoder 3 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 4 | BboxOverlapsNearest3D, 5 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 6 | bbox_overlaps_nearest_3d) 7 | from .match_costs import BBox3DL1Cost 8 | from .samplers import (BaseSampler, CombinedSampler, 9 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 10 | PseudoSampler, RandomSampler, SamplingResult) 11 | from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, 12 | Coord3DMode, DepthInstance3DBoxes, 13 | LiDARInstance3DBoxes, get_box_type, limit_period, 14 | mono_cam_box2vis, points_cam2img, xywhr2xyxyr) 15 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 3 | 4 | 5 | @MATCH_COST.register_module() 6 | class BBox3DL1Cost: 7 | """BBox3DL1Cost. 8 | Args: 9 | weight (int | float, optional): loss_weight 10 | """ 11 | 12 | def __init__(self, weight=1.0): 13 | self.weight = weight 14 | 15 | def __call__(self, bbox_pred, gt_bboxes): 16 | """ 17 | Args: 18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 19 | (cx, cy, w, h), which are all in range [0, 1]. Shape 20 | [num_query, 4]. 21 | gt_bboxes (Tensor): Ground truth boxes with normalized 22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 23 | Returns: 24 | torch.Tensor: bbox_cost value with weight 25 | """ 26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 27 | return bbox_cost * self.weight 28 | -------------------------------------------------------------------------------- /mmdet3d/core/points/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_points import BasePoints 2 | from .cam_points import CameraPoints 3 | from .depth_points import DepthPoints 4 | from .lidar_points import LiDARPoints 5 | 6 | __all__ = ["BasePoints", "CameraPoints", "DepthPoints", "LiDARPoints"] 7 | 8 | 9 | def get_points_type(points_type): 10 | """Get the class of points according to coordinate type. 11 | 12 | Args: 13 | points_type (str): The type of points coordinate. 14 | The valid value are "CAMERA", "LIDAR", or "DEPTH". 15 | 16 | Returns: 17 | class: Points type. 18 | """ 19 | if points_type == "CAMERA": 20 | points_cls = CameraPoints 21 | elif points_type == "LIDAR": 22 | points_cls = LiDARPoints 23 | elif points_type == "DEPTH": 24 | points_cls = DepthPoints 25 | else: 26 | raise ValueError( 27 | 'Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"' 28 | f" are supported, got {points_type}" 29 | ) 30 | 31 | return points_cls 32 | -------------------------------------------------------------------------------- /mmdet3d/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def extract_result_dict(results, key): 5 | """Extract and return the data corresponding to key in result dict. 6 | 7 | ``results`` is a dict output from `pipeline(input_dict)`, which is the 8 | loaded data from ``Dataset`` class. 9 | The data terms inside may be wrapped in list, tuple and DataContainer, so 10 | this function essentially extracts data from these wrappers. 11 | 12 | Args: 13 | results (dict): Data loaded using pipeline. 14 | key (str): Key of the desired data. 15 | 16 | Returns: 17 | np.ndarray | torch.Tensor | None: Data term. 18 | """ 19 | if key not in results.keys(): 20 | return None 21 | # results[key] may be data or list[data] or tuple[data] 22 | # data may be wrapped inside DataContainer 23 | data = results[key] 24 | if isinstance(data, (list, tuple)): 25 | data = data[0] 26 | if isinstance(data, mmcv.parallel.DataContainer): 27 | data = data._data 28 | return data 29 | -------------------------------------------------------------------------------- /mmdet3d/models/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry 2 | from mmdet.models.builder import BACKBONES, HEADS, LOSSES, NECKS 3 | 4 | FUSIONMODELS = Registry("fusion_models") 5 | VTRANSFORMS = Registry("vtransforms") 6 | FUSERS = Registry("fusers") 7 | TEMPORAL = Registry("temporal") 8 | 9 | 10 | def build_backbone(cfg): 11 | return BACKBONES.build(cfg) 12 | 13 | 14 | def build_neck(cfg): 15 | return NECKS.build(cfg) 16 | 17 | 18 | def build_vtransform(cfg): 19 | return VTRANSFORMS.build(cfg) 20 | 21 | 22 | def build_fuser(cfg): 23 | return FUSERS.build(cfg) 24 | 25 | 26 | def build_temporal(cfg): 27 | return TEMPORAL.build(cfg) 28 | 29 | 30 | def build_head(cfg): 31 | return HEADS.build(cfg) 32 | 33 | 34 | def build_loss(cfg): 35 | return LOSSES.build(cfg) 36 | 37 | 38 | def build_fusion_model(cfg, train_cfg=None, test_cfg=None): 39 | return FUSIONMODELS.build(cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 40 | 41 | 42 | def build_model(cfg, train_cfg=None, test_cfg=None): 43 | return build_fusion_model(cfg, train_cfg=train_cfg, test_cfg=test_cfg) 44 | -------------------------------------------------------------------------------- /Dockerfile.dev: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.3.1-devel-ubuntu20.04 2 | 3 | ENV PYTHON_VERSION=3.8 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN apt-get update \ 7 | && apt-get install -y \ 8 | wget curl build-essential g++ gcc ffmpeg ninja-build git \ 9 | libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl \ 10 | libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ 11 | libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 \ 12 | openmpi-bin openmpi-common libopenmpi-dev libgtk2.0-dev \ 13 | && apt-get clean \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv 17 | ENV PYENV_ROOT="${HOME}/.pyenv" 18 | ENV PATH="${PYENV_ROOT}/shims:${PYENV_ROOT}/bin:$HOME/.local/bin:${PATH}" 19 | 20 | RUN pyenv install ${PYTHON_VERSION} 21 | RUN pyenv global ${PYTHON_VERSION} 22 | 23 | RUN pip3 install torch==1.10.1+cu113 torchvision==0.11.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html 24 | RUN pip3 install setuptools==59.5.0 25 | 26 | WORKDIR /root/mmdet3d 27 | ENTRYPOINT ["tail", "-f", "/dev/null"] 28 | -------------------------------------------------------------------------------- /mmdet3d/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from mmcv.utils import get_logger 3 | 4 | 5 | def get_root_logger(log_file=None, log_level=logging.INFO, name="mmdet3d"): 6 | """Get root logger and add a keyword filter to it. 7 | 8 | The logger will be initialized if it has not been initialized. By default a 9 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 10 | also be added. The name of the root logger is the top-level package name, 11 | e.g., "mmdet3d". 12 | 13 | Args: 14 | log_file (str, optional): File path of log. Defaults to None. 15 | log_level (int, optional): The level of logger. 16 | Defaults to logging.INFO. 17 | name (str, optional): The name of the root logger, also used as a 18 | filter keyword. Defaults to 'mmdet3d'. 19 | 20 | Returns: 21 | :obj:`logging.Logger`: The obtained logger 22 | """ 23 | logger = get_logger(name=name, log_file=log_file, log_level=log_level) 24 | 25 | # add a logging filter 26 | logging_filter = logging.Filter(name) 27 | logging_filter.filter = lambda record: record.find(name) != -1 28 | 29 | return logger 30 | -------------------------------------------------------------------------------- /mmdet3d/ops/furthest_point_sample/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def calc_square_dist(point_feat_a, point_feat_b, norm=True): 5 | """Calculating square distance between a and b. 6 | 7 | Args: 8 | point_feat_a (Tensor): (B, N, C) Feature vector of each point. 9 | point_feat_b (Tensor): (B, M, C) Feature vector of each point. 10 | norm (Bool): Whether to normalize the distance. 11 | Default: True. 12 | 13 | Returns: 14 | Tensor: (B, N, M) Distance between each pair points. 15 | """ 16 | length_a = point_feat_a.shape[1] 17 | length_b = point_feat_b.shape[1] 18 | num_channel = point_feat_a.shape[-1] 19 | # [bs, n, 1] 20 | a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) 21 | # [bs, 1, m] 22 | b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) 23 | a_square = a_square.repeat((1, 1, length_b)) # [bs, n, m] 24 | b_square = b_square.repeat((1, length_a, 1)) # [bs, n, m] 25 | 26 | coor = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) 27 | 28 | dist = a_square + b_square - 2 * coor 29 | if norm: 30 | dist = torch.sqrt(dist) / num_channel 31 | return dist 32 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/resnet.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import torch 4 | from mmcv.cnn.resnet import BasicBlock, make_res_layer 5 | from torch import nn 6 | 7 | from mmdet.models import BACKBONES 8 | 9 | __all__ = ["GeneralizedResNet"] 10 | 11 | 12 | @BACKBONES.register_module() 13 | class GeneralizedResNet(nn.ModuleList): 14 | def __init__( 15 | self, 16 | in_channels: int, 17 | blocks: List[Tuple[int, int, int]], 18 | ) -> None: 19 | super().__init__() 20 | self.in_channels = in_channels 21 | self.blocks = blocks 22 | 23 | for num_blocks, out_channels, stride in self.blocks: 24 | blocks = make_res_layer( 25 | BasicBlock, 26 | in_channels, 27 | out_channels, 28 | num_blocks, 29 | stride=stride, 30 | dilation=1, 31 | ) 32 | in_channels = out_channels 33 | self.append(blocks) 34 | 35 | def forward(self, x: torch.Tensor) -> List[torch.Tensor]: 36 | outputs = [] 37 | for module in self: 38 | x = module(x) 39 | outputs.append(x) 40 | return outputs 41 | -------------------------------------------------------------------------------- /mmdet3d/ops/paconv/src/assign_score_withk.cpp: -------------------------------------------------------------------------------- 1 | // Modified from https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu 2 | 3 | #include 4 | #include 5 | 6 | void assign_score_withk_forward_wrapper( 7 | int B, int N0, int N1, int M, 8 | int K, int O, int aggregate, 9 | const at::Tensor& points, 10 | const at::Tensor& centers, 11 | const at::Tensor& scores, 12 | const at::Tensor& knn_idx, 13 | at::Tensor& output 14 | ); 15 | 16 | void assign_score_withk_backward_wrapper( 17 | int B, int N0, int N1, int M, 18 | int K, int O, int aggregate, 19 | const at::Tensor& grad_out, 20 | const at::Tensor& points, 21 | const at::Tensor& centers, 22 | const at::Tensor& scores, 23 | const at::Tensor& knn_idx, 24 | at::Tensor& grad_points, 25 | at::Tensor& grad_centers, 26 | at::Tensor& grad_scores 27 | ); 28 | 29 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 30 | m.def("assign_score_withk_forward_wrapper", 31 | &assign_score_withk_forward_wrapper, 32 | "Assign score kernel forward (GPU), save memory version"); 33 | m.def("assign_score_withk_backward_wrapper", 34 | &assign_score_withk_backward_wrapper, 35 | "Assign score kernel backward (GPU), save memory version"); 36 | } 37 | -------------------------------------------------------------------------------- /Dockerfile.prod: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.3.1-devel-ubuntu20.04 2 | 3 | ENV PYTHON_VERSION=3.8 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6+PTX" \ 7 | TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ 8 | FORCE_CUDA="1" 9 | 10 | RUN apt-get update \ 11 | && apt-get install -y \ 12 | wget curl build-essential g++ gcc ffmpeg ninja-build git \ 13 | libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl \ 14 | libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ 15 | libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 \ 16 | openmpi-bin openmpi-common libopenmpi-dev libgtk2.0-dev \ 17 | && apt-get clean \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv 21 | ENV PYENV_ROOT="${HOME}/.pyenv" 22 | ENV PATH="${PYENV_ROOT}/shims:${PYENV_ROOT}/bin:$HOME/.local/bin:${PATH}" 23 | 24 | RUN pyenv install ${PYTHON_VERSION} 25 | RUN pyenv global ${PYTHON_VERSION} 26 | 27 | RUN pip3 install torch==1.10.1+cu113 torchvision==0.11.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html 28 | RUN pip3 install setuptools==59.5.0 29 | 30 | WORKDIR /root/mmdet3d 31 | 32 | COPY . . 33 | RUN make prod 34 | 35 | ENTRYPOINT ["tail", "-f", "/dev/null"] 36 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry 2 | 3 | SA_MODULES = Registry("point_sa_module") 4 | 5 | 6 | def build_sa_module(cfg, *args, **kwargs): 7 | """Build PointNet2 set abstraction (SA) module. 8 | 9 | Args: 10 | cfg (None or dict): The SA module config, which should contain: 11 | - type (str): Module type. 12 | - module args: Args needed to instantiate an SA module. 13 | args (argument list): Arguments passed to the `__init__` 14 | method of the corresponding module. 15 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 16 | method of the corresponding SA module . 17 | 18 | Returns: 19 | nn.Module: Created SA module. 20 | """ 21 | if cfg is None: 22 | cfg_ = dict(type="PointSAModule") 23 | else: 24 | if not isinstance(cfg, dict): 25 | raise TypeError("cfg must be a dict") 26 | if "type" not in cfg: 27 | raise KeyError('the cfg dict must contain the key "type"') 28 | cfg_ = cfg.copy() 29 | 30 | module_type = cfg_.pop("type") 31 | if module_type not in SA_MODULES: 32 | raise KeyError(f"Unrecognized module type {module_type}") 33 | else: 34 | sa_module = SA_MODULES.get(module_type) 35 | 36 | module = sa_module(*args, **kwargs, **cfg_) 37 | 38 | return module 39 | -------------------------------------------------------------------------------- /mmdet3d/models/fusers/add.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import List 3 | 4 | import torch 5 | from torch import nn 6 | 7 | from mmdet3d.models.builder import FUSERS 8 | 9 | __all__ = ["AddFuser"] 10 | 11 | 12 | @FUSERS.register_module() 13 | class AddFuser(nn.Module): 14 | def __init__(self, in_channels: int, out_channels: int, dropout: float = 0) -> None: 15 | super().__init__() 16 | self.in_channels = in_channels 17 | self.out_channels = out_channels 18 | self.dropout = dropout 19 | 20 | self.transforms = nn.ModuleList() 21 | for k in range(len(in_channels)): 22 | self.transforms.append( 23 | nn.Sequential( 24 | nn.Conv2d(in_channels[k], out_channels, 3, padding=1, bias=False), 25 | nn.BatchNorm2d(out_channels), 26 | nn.ReLU(True), 27 | ) 28 | ) 29 | 30 | def forward(self, inputs: List[torch.Tensor]) -> torch.Tensor: 31 | features = [] 32 | for transform, input in zip(self.transforms, inputs): 33 | features.append(transform(input)) 34 | 35 | weights = [1] * len(inputs) 36 | if self.training and random.random() < self.dropout: 37 | index = random.randint(0, len(inputs) - 1) 38 | weights[index] = 0 39 | 40 | return sum(w * f for w, f in zip(weights, features)) / sum(weights) 41 | -------------------------------------------------------------------------------- /mmdet3d/ops/interpolate/three_nn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from typing import Tuple 4 | 5 | from . import interpolate_ext 6 | 7 | 8 | class ThreeNN(Function): 9 | @staticmethod 10 | def forward( 11 | ctx, target: torch.Tensor, source: torch.Tensor 12 | ) -> Tuple[torch.Tensor, torch.Tensor]: 13 | """Find the top-3 nearest neighbors of the target set from the source 14 | set. 15 | 16 | Args: 17 | target (Tensor): shape (B, N, 3), points set that needs to 18 | find the nearest neighbors. 19 | source (Tensor): shape (B, M, 3), points set that is used 20 | to find the nearest neighbors of points in target set. 21 | 22 | Returns: 23 | Tensor: shape (B, N, 3), L2 distance of each point in target 24 | set to their corresponding nearest neighbors. 25 | """ 26 | assert target.is_contiguous() 27 | assert source.is_contiguous() 28 | 29 | B, N, _ = target.size() 30 | m = source.size(1) 31 | dist2 = torch.cuda.FloatTensor(B, N, 3) 32 | idx = torch.cuda.IntTensor(B, N, 3) 33 | 34 | interpolate_ext.three_nn_wrapper(B, N, m, target, source, dist2, idx) 35 | 36 | ctx.mark_non_differentiable(idx) 37 | 38 | return torch.sqrt(dist2), idx 39 | 40 | @staticmethod 41 | def backward(ctx, a=None, b=None): 42 | return None, None 43 | 44 | 45 | three_nn = ThreeNN.apply 46 | -------------------------------------------------------------------------------- /tools/visualization/create_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import ArgumentParser, Namespace 3 | 4 | import cv2 5 | 6 | 7 | def get_args() -> Namespace: 8 | """ 9 | Parse given arguments for create_video function. 10 | 11 | Returns: 12 | Namespace: parsed arguments 13 | """ 14 | parser = ArgumentParser() 15 | 16 | parser.add_argument("-s", "--source-folder-dir", type=str, required=True) 17 | parser.add_argument("-t", "--target-path", type=str, required=True) 18 | 19 | return parser.parse_args() 20 | 21 | 22 | def create_video(source_folder_dir: str, target_path: str) -> None: 23 | images = sorted( 24 | [ 25 | img 26 | for img in os.listdir(source_folder_dir) 27 | if img.lower().endswith((".png", ".jpg", ".jpeg")) 28 | ] 29 | ) 30 | 31 | if len(images) == 0: 32 | return 33 | 34 | os.makedirs(os.path.dirname(target_path), exist_ok=True, mode=0o777) 35 | 36 | frame = cv2.imread(os.path.join(source_folder_dir, images[0])) 37 | height, width, _ = frame.shape 38 | 39 | video_name = os.path.join(target_path) 40 | video = cv2.VideoWriter(video_name, 0x7634706D, 10, (width, height)) 41 | 42 | for image in images: 43 | video.write(cv2.imread(os.path.join(source_folder_dir, image))) 44 | 45 | cv2.destroyAllWindows() 46 | video.release() 47 | 48 | 49 | if __name__ == "__main__": 50 | args = get_args() 51 | create_video(args.source_folder_dir, args.target_path) 52 | -------------------------------------------------------------------------------- /mmdet3d/ops/knn/src/knn.cpp: -------------------------------------------------------------------------------- 1 | // Modified from https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | extern THCState *state; 10 | 11 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ") 12 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 13 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 14 | 15 | 16 | void knn_kernel_launcher( 17 | int b, 18 | int n, 19 | int m, 20 | int nsample, 21 | const float *xyz, 22 | const float *new_xyz, 23 | int *idx, 24 | float *dist2, 25 | cudaStream_t stream 26 | ); 27 | 28 | void knn_wrapper(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 29 | { 30 | CHECK_INPUT(new_xyz_tensor); 31 | CHECK_INPUT(xyz_tensor); 32 | 33 | const float *new_xyz = new_xyz_tensor.data_ptr(); 34 | const float *xyz = xyz_tensor.data_ptr(); 35 | int *idx = idx_tensor.data_ptr(); 36 | float *dist2 = dist2_tensor.data_ptr(); 37 | 38 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 39 | 40 | knn_kernel_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream); 41 | } 42 | 43 | 44 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 45 | m.def("knn_wrapper", &knn_wrapper, "knn_wrapper"); 46 | } 47 | -------------------------------------------------------------------------------- /configs/osdar23/baseline/transfusion/lidar/voxelnet-1600g-0xy16-0z4.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | voxel_max_points: 15 8 | voxel_max_voxels: [200000, 200000] 9 | out_size_factor: 8 10 | 11 | samples_per_gpu: 6 12 | workers_per_gpu: 6 13 | 14 | max_epochs: 20 15 | 16 | optimizer: 17 | type: AdamW 18 | lr: 6.6e-05 19 | weight_decay: 0.01 20 | 21 | momentum_config: 22 | policy: cyclic 23 | cyclic_times: 1 24 | step_ratio_up: 0.4 25 | 26 | lr_config: 27 | cyclic_times: 1 28 | policy: cyclic 29 | step_ratio_up: 0.4 30 | 31 | optimizer_config: 32 | grad_clip: 33 | max_norm: 25 34 | norm_type: 2 35 | 36 | model: 37 | encoders: 38 | lidar: 39 | voxelize: 40 | max_num_points: ${voxel_max_points} 41 | point_cloud_range: ${point_cloud_range} 42 | voxel_size: ${voxel_size} 43 | max_voxels: ${voxel_max_voxels} 44 | backbone: 45 | type: SparseEncoder 46 | in_channels: ${use_dim} 47 | sparse_shape: ${grid_size} 48 | output_channels: 128 49 | order: 50 | - conv 51 | - norm 52 | - act 53 | encoder_channels: 54 | - [16, 16, 32] 55 | - [32, 32, 64] 56 | - [64, 64, 128] 57 | - [128, 128] 58 | encoder_paddings: 59 | - [0, 0, 1] 60 | - [0, 0, 1] 61 | - [0, 0, [1, 1, 0]] 62 | - [0, 0] 63 | block_type: basicblock 64 | -------------------------------------------------------------------------------- /configs/tumtraf-i/baseline/transfusion/lidar/voxelnet-1600g-0xy1-0z20.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | voxel_max_points: 10 8 | voxel_max_voxels: [120000, 120000] 9 | 10 | deterministic: True 11 | 12 | samples_per_gpu: 6 13 | workers_per_gpu: 6 14 | 15 | max_epochs: 20 16 | 17 | optimizer: 18 | type: AdamW 19 | lr: 6.6e-05 20 | weight_decay: 0.01 21 | 22 | momentum_config: 23 | policy: cyclic 24 | cyclic_times: 1 25 | step_ratio_up: 0.4 26 | 27 | lr_config: 28 | cyclic_times: 1 29 | policy: cyclic 30 | step_ratio_up: 0.4 31 | 32 | optimizer_config: 33 | grad_clip: 34 | max_norm: 25 35 | norm_type: 2 36 | 37 | model: 38 | encoders: 39 | lidar: 40 | voxelize: 41 | max_num_points: ${voxel_max_points} 42 | point_cloud_range: ${point_cloud_range} 43 | voxel_size: ${voxel_size} 44 | max_voxels: ${voxel_max_voxels} 45 | backbone: 46 | type: SparseEncoder 47 | in_channels: ${use_dim} 48 | sparse_shape: ${grid_size} 49 | output_channels: 128 50 | order: 51 | - conv 52 | - norm 53 | - act 54 | encoder_channels: 55 | - [16, 16, 32] 56 | - [32, 32, 64] 57 | - [64, 64, 128] 58 | - [128, 128] 59 | encoder_paddings: 60 | - [0, 0, 1] 61 | - [0, 0, 1] 62 | - [0, 0, [1, 1, 0]] 63 | - [0, 0] 64 | block_type: basicblock 65 | -------------------------------------------------------------------------------- /mmdet3d/ops/gather_points/gather_points.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from . import gather_points_ext 5 | 6 | 7 | class GatherPoints(Function): 8 | """Gather Points. 9 | 10 | Gather points with given index. 11 | """ 12 | 13 | @staticmethod 14 | def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: 15 | """forward. 16 | 17 | Args: 18 | features (Tensor): (B, C, N) features to gather. 19 | indices (Tensor): (B, M) where M is the number of points. 20 | 21 | Returns: 22 | Tensor: (B, C, M) where M is the number of points. 23 | """ 24 | assert features.is_contiguous() 25 | assert indices.is_contiguous() 26 | 27 | B, npoint = indices.size() 28 | _, C, N = features.size() 29 | output = torch.cuda.FloatTensor(B, C, npoint) 30 | 31 | gather_points_ext.gather_points_wrapper(B, C, N, npoint, features, indices, output) 32 | 33 | ctx.for_backwards = (indices, C, N) 34 | ctx.mark_non_differentiable(indices) 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_out): 39 | idx, C, N = ctx.for_backwards 40 | B, npoint = idx.size() 41 | 42 | grad_features = torch.cuda.FloatTensor(B, C, N).zero_() 43 | grad_out_data = grad_out.data.contiguous() 44 | gather_points_ext.gather_points_grad_wrapper( 45 | B, C, N, npoint, grad_out_data, idx, grad_features.data 46 | ) 47 | return grad_features, None 48 | 49 | 50 | gather_points = GatherPoints.apply 51 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def normalize_bbox(bboxes, pc_range): 5 | 6 | cx = bboxes[..., 0:1] 7 | cy = bboxes[..., 1:2] 8 | cz = bboxes[..., 2:3] 9 | w = bboxes[..., 3:4].log() 10 | l = bboxes[..., 4:5].log() 11 | h = bboxes[..., 5:6].log() 12 | 13 | rot = bboxes[..., 6:7] 14 | if bboxes.size(-1) > 7: 15 | vx = bboxes[..., 7:8] 16 | vy = bboxes[..., 8:9] 17 | normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1) 18 | else: 19 | normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1) 20 | return normalized_bboxes 21 | 22 | 23 | def denormalize_bbox(normalized_bboxes, pc_range): 24 | # rotation 25 | rot_sine = normalized_bboxes[..., 6:7] 26 | 27 | rot_cosine = normalized_bboxes[..., 7:8] 28 | rot = torch.atan2(rot_sine, rot_cosine) 29 | 30 | # center in the bev 31 | cx = normalized_bboxes[..., 0:1] 32 | cy = normalized_bboxes[..., 1:2] 33 | cz = normalized_bboxes[..., 4:5] 34 | 35 | # size 36 | w = normalized_bboxes[..., 2:3] 37 | l = normalized_bboxes[..., 3:4] 38 | h = normalized_bboxes[..., 5:6] 39 | 40 | w = w.exp() 41 | l = l.exp() 42 | h = h.exp() 43 | if normalized_bboxes.size(-1) > 8: 44 | # velocity 45 | vx = normalized_bboxes[:, 8:9] 46 | vy = normalized_bboxes[:, 9:10] 47 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 48 | else: 49 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 50 | return denormalized_bboxes 51 | -------------------------------------------------------------------------------- /configs/osdar23/baseline/transfusion/lidar/voxelnet-1600g-0xy16-0z4-gtp15.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | voxel_max_points: 15 8 | voxel_max_voxels: [200000, 200000] 9 | out_size_factor: 8 10 | 11 | samples_per_gpu: 6 12 | workers_per_gpu: 6 13 | 14 | max_epochs: 20 15 | 16 | augment_gt_paste: 17 | max_epoch: 15 18 | 19 | optimizer: 20 | type: AdamW 21 | lr: 6.6e-05 22 | weight_decay: 0.01 23 | 24 | momentum_config: 25 | policy: cyclic 26 | cyclic_times: 1 27 | step_ratio_up: 0.4 28 | 29 | lr_config: 30 | cyclic_times: 1 31 | policy: cyclic 32 | step_ratio_up: 0.4 33 | 34 | optimizer_config: 35 | grad_clip: 36 | max_norm: 25 37 | norm_type: 2 38 | 39 | model: 40 | encoders: 41 | lidar: 42 | voxelize: 43 | max_num_points: ${voxel_max_points} 44 | point_cloud_range: ${point_cloud_range} 45 | voxel_size: ${voxel_size} 46 | max_voxels: ${voxel_max_voxels} 47 | backbone: 48 | type: SparseEncoder 49 | in_channels: ${use_dim} 50 | sparse_shape: ${grid_size} 51 | output_channels: 128 52 | order: 53 | - conv 54 | - norm 55 | - act 56 | encoder_channels: 57 | - [16, 16, 32] 58 | - [32, 32, 64] 59 | - [64, 64, 128] 60 | - [128, 128] 61 | encoder_paddings: 62 | - [0, 0, 1] 63 | - [0, 0, 1] 64 | - [0, 0, [1, 1, 0]] 65 | - [0, 0] 66 | block_type: basicblock 67 | -------------------------------------------------------------------------------- /configs/tumtraf-i/baseline/transfusion/lidar/voxelnet-1600g-0xy1-0z20-gtp15.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | voxel_max_points: 10 8 | voxel_max_voxels: [120000, 120000] 9 | 10 | deterministic: True 11 | 12 | samples_per_gpu: 6 13 | workers_per_gpu: 6 14 | 15 | max_epochs: 20 16 | 17 | augment_gt_paste: 18 | max_epoch: 15 19 | 20 | optimizer: 21 | type: AdamW 22 | lr: 6.6e-05 23 | weight_decay: 0.01 24 | 25 | momentum_config: 26 | policy: cyclic 27 | cyclic_times: 1 28 | step_ratio_up: 0.4 29 | 30 | lr_config: 31 | cyclic_times: 1 32 | policy: cyclic 33 | step_ratio_up: 0.4 34 | 35 | optimizer_config: 36 | grad_clip: 37 | max_norm: 25 38 | norm_type: 2 39 | 40 | model: 41 | encoders: 42 | lidar: 43 | voxelize: 44 | max_num_points: ${voxel_max_points} 45 | point_cloud_range: ${point_cloud_range} 46 | voxel_size: ${voxel_size} 47 | max_voxels: ${voxel_max_voxels} 48 | backbone: 49 | type: SparseEncoder 50 | in_channels: ${use_dim} 51 | sparse_shape: ${grid_size} 52 | output_channels: 128 53 | order: 54 | - conv 55 | - norm 56 | - act 57 | encoder_channels: 58 | - [16, 16, 32] 59 | - [32, 32, 64] 60 | - [64, 64, 128] 61 | - [128, 128] 62 | encoder_paddings: 63 | - [0, 0, 1] 64 | - [0, 0, 1] 65 | - [0, 0, [1, 1, 0]] 66 | - [0, 0] 67 | block_type: basicblock 68 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: TBEVFusion 3 | max_queue_length: ${temporal_cache_length} 4 | encoders: null 5 | fuser: null 6 | temporal: 7 | type: ConvGRU 8 | in_channels: 256 9 | hidden_channels: [256] 10 | kernel_size: [3, 3] 11 | bias: true 12 | heads: 13 | map: null 14 | 15 | temporal_mode: true 16 | val_online_mode: true 17 | test_online_mode: true 18 | 19 | temporal_aware_gt_paste: true 20 | apply_same_aug_to_seq: true 21 | 22 | augment_gt_paste: # default 23 | apply_same_aug_to_seq: true 24 | apply_collision_check: true 25 | apply_temporal_forward: true 26 | cls_rot_lim: 27 | lidar__cuboid__buffer_stop: 28 | - normal 29 | - 0.0 30 | - 0.0 31 | lidar__cuboid__catenary_pole: 32 | - normal 33 | - 0.0 34 | - 0.16490484576995193 35 | lidar__cuboid__person: 36 | - normal 37 | - 0.0 38 | - 0.16723854104003127 39 | lidar__cuboid__road_vehicle: 40 | - normal 41 | - 0.0 42 | - 0.07090241143317916 43 | lidar__cuboid__signal_pole: 44 | - normal 45 | - 0.0 46 | - 0.06715749315684862 47 | cls_trans_lim: 48 | lidar__cuboid__buffer_stop: 49 | - uniform 50 | - 0.0 51 | - 0.0 52 | lidar__cuboid__catenary_pole: 53 | - uniform 54 | - 0.0 55 | - 0.837096823556553 56 | lidar__cuboid__person: 57 | - uniform 58 | - 0.0 59 | - 2.3655256268321976 60 | lidar__cuboid__road_vehicle: 61 | - uniform 62 | - 0.0 63 | - 0.8333046350100968 64 | lidar__cuboid__signal_pole: 65 | - uniform 66 | - 0.0 67 | - 3.3953008285651545 68 | -------------------------------------------------------------------------------- /configs/osdar23/temporal/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: TBEVFusion 3 | max_queue_length: ${temporal_cache_length} 4 | encoders: null 5 | fuser: null 6 | temporal: 7 | type: ConvLSTM 8 | in_channels: 256 9 | hidden_channels: [256] 10 | kernel_size: [3, 3] 11 | bias: true 12 | heads: 13 | map: null 14 | 15 | temporal_mode: true 16 | val_online_mode: true 17 | test_online_mode: true 18 | 19 | apply_same_aug_to_seq: true 20 | 21 | temporal_aware_gt_paste: true 22 | apply_same_aug_to_seq: true 23 | 24 | augment_gt_paste: # default 25 | apply_same_aug_to_seq: true 26 | apply_collision_check: true 27 | apply_temporal_forward: true 28 | cls_rot_lim: 29 | lidar__cuboid__buffer_stop: 30 | - normal 31 | - 0.0 32 | - 0.0 33 | lidar__cuboid__catenary_pole: 34 | - normal 35 | - 0.0 36 | - 0.16490484576995193 37 | lidar__cuboid__person: 38 | - normal 39 | - 0.0 40 | - 0.16723854104003127 41 | lidar__cuboid__road_vehicle: 42 | - normal 43 | - 0.0 44 | - 0.07090241143317916 45 | lidar__cuboid__signal_pole: 46 | - normal 47 | - 0.0 48 | - 0.06715749315684862 49 | cls_trans_lim: 50 | lidar__cuboid__buffer_stop: 51 | - uniform 52 | - 0.0 53 | - 0.0 54 | lidar__cuboid__catenary_pole: 55 | - uniform 56 | - 0.0 57 | - 0.837096823556553 58 | lidar__cuboid__person: 59 | - uniform 60 | - 0.0 61 | - 2.3655256268321976 62 | lidar__cuboid__road_vehicle: 63 | - uniform 64 | - 0.0 65 | - 0.8333046350100968 66 | lidar__cuboid__signal_pole: 67 | - uniform 68 | - 0.0 69 | - 3.3953008285651545 70 | -------------------------------------------------------------------------------- /mmdet3d/ops/ball_query/ball_query.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from . import ball_query_ext 5 | 6 | 7 | class BallQuery(Function): 8 | """Ball Query. 9 | 10 | Find nearby points in spherical space. 11 | """ 12 | 13 | @staticmethod 14 | def forward( 15 | ctx, 16 | min_radius: float, 17 | max_radius: float, 18 | sample_num: int, 19 | xyz: torch.Tensor, 20 | center_xyz: torch.Tensor, 21 | ) -> torch.Tensor: 22 | """forward. 23 | 24 | Args: 25 | min_radius (float): minimum radius of the balls. 26 | max_radius (float): maximum radius of the balls. 27 | sample_num (int): maximum number of features in the balls. 28 | xyz (Tensor): (B, N, 3) xyz coordinates of the features. 29 | center_xyz (Tensor): (B, npoint, 3) centers of the ball query. 30 | 31 | Returns: 32 | Tensor: (B, npoint, nsample) tensor with the indicies of 33 | the features that form the query balls. 34 | """ 35 | assert center_xyz.is_contiguous() 36 | assert xyz.is_contiguous() 37 | assert min_radius < max_radius 38 | 39 | B, N, _ = xyz.size() 40 | npoint = center_xyz.size(1) 41 | idx = torch.cuda.IntTensor(B, npoint, sample_num).zero_() 42 | 43 | ball_query_ext.ball_query_wrapper( 44 | B, N, npoint, min_radius, max_radius, sample_num, center_xyz, xyz, idx 45 | ) 46 | ctx.mark_non_differentiable(idx) 47 | return idx 48 | 49 | @staticmethod 50 | def backward(ctx, a=None): 51 | return None, None, None, None 52 | 53 | 54 | ball_query = BallQuery.apply 55 | -------------------------------------------------------------------------------- /configs/osdar23/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 15 14 | voxel_max_voxels: [200000, 200000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | optimizer: 22 | type: AdamW 23 | lr: 6.6e-05 24 | weight_decay: 0.01 25 | paramwise_cfg: 26 | custom_keys: 27 | encoders.lidar.backbone: 28 | lr_mult: 0.0 29 | 30 | momentum_config: 31 | policy: cyclic 32 | cyclic_times: 1 33 | step_ratio_up: 0.4 34 | 35 | lr_config: 36 | policy: CosineAnnealing 37 | warmup: linear 38 | warmup_iters: 500 39 | warmup_ratio: 0.33333333 40 | min_lr_ratio: 1.0e-3 41 | 42 | optimizer_config: 43 | grad_clip: 44 | max_norm: 25 45 | norm_type: 2 46 | 47 | model: 48 | encoders: 49 | lidar: 50 | voxelize: 51 | max_num_points: ${voxel_max_points} 52 | point_cloud_range: ${point_cloud_range} 53 | voxel_size: ${voxel_size} 54 | max_voxels: ${voxel_max_voxels} 55 | backbone: 56 | type: SparseEncoder 57 | in_channels: ${use_dim} 58 | sparse_shape: ${grid_size} 59 | output_channels: 128 60 | order: 61 | - conv 62 | - norm 63 | - act 64 | encoder_channels: 65 | - [16, 16, 32] 66 | - [32, 32, 64] 67 | - [64, 64, 128] 68 | - [128, 128] 69 | encoder_paddings: 70 | - [0, 0, 1] 71 | - [0, 0, 1] 72 | - [0, 0, [1, 1, 0]] 73 | - [0, 0] 74 | block_type: basicblock 75 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 15 14 | voxel_max_voxels: [200000, 200000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | optimizer: 22 | type: AdamW 23 | lr: 6.6e-05 24 | weight_decay: 0.01 25 | paramwise_cfg: 26 | custom_keys: 27 | encoders.lidar.backbone: 28 | lr_mult: 0.0 29 | 30 | momentum_config: 31 | policy: cyclic 32 | cyclic_times: 1 33 | step_ratio_up: 0.4 34 | 35 | lr_config: 36 | policy: CosineAnnealing 37 | warmup: linear 38 | warmup_iters: 500 39 | warmup_ratio: 0.33333333 40 | min_lr_ratio: 1.0e-3 41 | 42 | optimizer_config: 43 | grad_clip: 44 | max_norm: 25 45 | norm_type: 2 46 | 47 | model: 48 | encoders: 49 | lidar: 50 | voxelize: 51 | max_num_points: ${voxel_max_points} 52 | point_cloud_range: ${point_cloud_range} 53 | voxel_size: ${voxel_size} 54 | max_voxels: ${voxel_max_voxels} 55 | backbone: 56 | type: SparseEncoder 57 | in_channels: ${use_dim} 58 | sparse_shape: ${grid_size} 59 | output_channels: 128 60 | order: 61 | - conv 62 | - norm 63 | - act 64 | encoder_channels: 65 | - [16, 16, 32] 66 | - [32, 32, 64] 67 | - [64, 64, 128] 68 | - [128, 128] 69 | encoder_paddings: 70 | - [0, 0, 1] 71 | - [0, 0, 1] 72 | - [0, 0, [1, 1, 0]] 73 | - [0, 0] 74 | block_type: basicblock 75 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: TBEVFusion 3 | max_queue_length: ${temporal_cache_length} 4 | encoders: null 5 | fuser: null 6 | temporal: 7 | type: ConvGRU 8 | in_channels: 256 9 | hidden_channels: [256] 10 | kernel_size: [3, 3] 11 | bias: true 12 | heads: 13 | map: null 14 | 15 | temporal_mode: true 16 | val_online_mode: true 17 | test_online_mode: true 18 | 19 | apply_same_aug_to_seq: true 20 | 21 | augment_gt_paste: # default 22 | apply_same_aug_to_seq: true 23 | apply_collision_check: true 24 | apply_temporal_forward: true 25 | sampler: 26 | cls_trans_lim: 27 | CAR: ["uniform", 0.0, 0.21142457549557347] 28 | TRAILER: ["uniform", 0.0, 2.061603454258997] 29 | TRUCK: ["uniform", 0.0, 0.19225818659676847] 30 | VAN: ["uniform", 0.0, 0.12440957907943972] 31 | # PEDESTRIAN: ["uniform", 0.0, 1.9631158717540234] WE DO NOT SAMPLE PEDESTRIANS 32 | PEDESTRIAN: ["uniform", 0.0, 0.0] 33 | BUS: ["uniform", 0.0, 1.1107448011494194] 34 | # MOTORCYCLE: ["uniform", 0.0, 0.25374656183458383] WE DO NOT SAMPLE MOTORCYCLES 35 | MOTORCYCLE: ["uniform", 0.0, 0.0] 36 | BICYCLE: ["uniform", 0.0, 0.5918484046343995] 37 | EMERGENCY_VEHICLE: ["uniform", 0.0, 0.6620038588093282] 38 | cls_rot_lim: 39 | CAR: ["normal", 0.0, 0.08516856382385488] 40 | TRAILER: ["normal", 0.0, 0.19199153770261218] 41 | TRUCK: ["normal", 0.0, 0.12290075954655998] 42 | VAN: ["normal", 0.0, 0.18803376690181833] 43 | # PEDESTRIAN: ["normal", 0.0, 0.2227474538090619] WE DO NOT SAMPLE PEDESTRIANS 44 | PEDESTRIAN: ["normal", 0.0, 0.0] 45 | BUS: ["normal", 0.0, 0.19959521881110118] 46 | # MOTORCYCLE: ["normal", 0.0, 0.018643425075835468] WE DO NOT SAMPLE MOTORCYCLES 47 | MOTORCYCLE: ["normal", 0.0, 0.0] 48 | BICYCLE: ["normal", 0.0, 0.21636682539331192] 49 | EMERGENCY_VEHICLE: ["normal", 0.0, 0.1328186144435953] 50 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: TBEVFusion 3 | max_queue_length: ${temporal_cache_length} 4 | encoders: null 5 | fuser: null 6 | temporal: 7 | type: ConvLSTM 8 | in_channels: 256 9 | hidden_channels: [256] 10 | kernel_size: [3, 3] 11 | bias: true 12 | heads: 13 | map: null 14 | 15 | temporal_mode: true 16 | val_online_mode: true 17 | test_online_mode: true 18 | 19 | apply_same_aug_to_seq: true 20 | 21 | augment_gt_paste: # default 22 | apply_same_aug_to_seq: true 23 | apply_collision_check: true 24 | apply_temporal_forward: true 25 | sampler: 26 | cls_trans_lim: 27 | CAR: ["uniform", 0.0, 0.21142457549557347] 28 | TRAILER: ["uniform", 0.0, 2.061603454258997] 29 | TRUCK: ["uniform", 0.0, 0.19225818659676847] 30 | VAN: ["uniform", 0.0, 0.12440957907943972] 31 | # PEDESTRIAN: ["uniform", 0.0, 1.9631158717540234] WE DO NOT SAMPLE PEDESTRIANS 32 | PEDESTRIAN: ["uniform", 0.0, 0.0] 33 | BUS: ["uniform", 0.0, 1.1107448011494194] 34 | # MOTORCYCLE: ["uniform", 0.0, 0.25374656183458383] WE DO NOT SAMPLE MOTORCYCLES 35 | MOTORCYCLE: ["uniform", 0.0, 0.0] 36 | BICYCLE: ["uniform", 0.0, 0.5918484046343995] 37 | EMERGENCY_VEHICLE: ["uniform", 0.0, 0.6620038588093282] 38 | cls_rot_lim: 39 | CAR: ["normal", 0.0, 0.08516856382385488] 40 | TRAILER: ["normal", 0.0, 0.19199153770261218] 41 | TRUCK: ["normal", 0.0, 0.12290075954655998] 42 | VAN: ["normal", 0.0, 0.18803376690181833] 43 | # PEDESTRIAN: ["normal", 0.0, 0.2227474538090619] WE DO NOT SAMPLE PEDESTRIANS 44 | PEDESTRIAN: ["normal", 0.0, 0.0] 45 | BUS: ["normal", 0.0, 0.19959521881110118] 46 | # MOTORCYCLE: ["normal", 0.0, 0.018643425075835468] WE DO NOT SAMPLE MOTORCYCLES 47 | MOTORCYCLE: ["normal", 0.0, 0.0] 48 | BICYCLE: ["normal", 0.0, 0.21636682539331192] 49 | EMERGENCY_VEHICLE: ["normal", 0.0, 0.1328186144435953] 50 | -------------------------------------------------------------------------------- /mmdet3d/ops/ball_query/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Modified from 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | extern THCState *state; 13 | 14 | #define CHECK_CUDA(x) \ 15 | TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 16 | #define CHECK_CONTIGUOUS(x) \ 17 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 18 | #define CHECK_INPUT(x) \ 19 | CHECK_CUDA(x); \ 20 | CHECK_CONTIGUOUS(x) 21 | 22 | int ball_query_wrapper(int b, int n, int m, float min_radius, float max_radius, int nsample, 23 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, 24 | at::Tensor idx_tensor); 25 | 26 | void ball_query_kernel_launcher(int b, int n, int m, float min_radius, float max_radius, 27 | int nsample, const float *xyz, const float *new_xyz, 28 | int *idx, cudaStream_t stream); 29 | 30 | int ball_query_wrapper(int b, int n, int m, float min_radius, float max_radius, int nsample, 31 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, 32 | at::Tensor idx_tensor) { 33 | CHECK_INPUT(new_xyz_tensor); 34 | CHECK_INPUT(xyz_tensor); 35 | const float *new_xyz = new_xyz_tensor.data_ptr(); 36 | const float *xyz = xyz_tensor.data_ptr(); 37 | int *idx = idx_tensor.data_ptr(); 38 | 39 | cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); 40 | ball_query_kernel_launcher(b, n, m, min_radius, max_radius, 41 | nsample, new_xyz, xyz, idx, stream); 42 | return 1; 43 | } 44 | 45 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 46 | m.def("ball_query_wrapper", &ball_query_wrapper, "ball_query_wrapper"); 47 | } 48 | -------------------------------------------------------------------------------- /configs/osdar23/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 15 14 | voxel_max_voxels: [200000, 200000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 2 8 | temporal_cache_length: 2 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 0 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 2 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 4 8 | temporal_cache_length: 4 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 15 14 | voxel_max_voxels: [200000, 200000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 2 8 | temporal_cache_length: 2 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 0 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 2 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 4 8 | temporal_cache_length: 4 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | 25 | optimizer: 26 | type: AdamW 27 | lr: 6.6e-05 28 | weight_decay: 0.01 29 | paramwise_cfg: 30 | custom_keys: 31 | encoders.lidar.backbone: 32 | lr_mult: 0.0 33 | 34 | momentum_config: 35 | policy: cyclic 36 | cyclic_times: 1 37 | step_ratio_up: 0.4 38 | 39 | lr_config: 40 | policy: CosineAnnealing 41 | warmup: linear 42 | warmup_iters: 500 43 | warmup_ratio: 0.33333333 44 | min_lr_ratio: 1.0e-3 45 | 46 | optimizer_config: 47 | grad_clip: 48 | max_norm: 25 49 | norm_type: 2 50 | 51 | model: 52 | encoders: 53 | lidar: 54 | voxelize: 55 | max_num_points: ${voxel_max_points} 56 | point_cloud_range: ${point_cloud_range} 57 | voxel_size: ${voxel_size} 58 | max_voxels: ${voxel_max_voxels} 59 | backbone: 60 | type: SparseEncoder 61 | in_channels: ${use_dim} 62 | sparse_shape: ${grid_size} 63 | output_channels: 128 64 | order: 65 | - conv 66 | - norm 67 | - act 68 | encoder_channels: 69 | - [16, 16, 32] 70 | - [32, 32, 64] 71 | - [64, 64, 128] 72 | - [128, 128] 73 | encoder_paddings: 74 | - [0, 0, 1] 75 | - [0, 0, 1] 76 | - [0, 0, [1, 1, 0]] 77 | - [0, 0] 78 | block_type: basicblock 79 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | apply_same_aug_to_seq: false 22 | 23 | augment_gt_paste: 24 | max_epoch: 3 25 | apply_same_aug_to_seq: true 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | apply_same_aug_to_seq: false 22 | 23 | augment_gt_paste: 24 | max_epoch: 3 25 | apply_same_aug_to_seq: true 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | reduce_beams: 16 5 | 6 | voxel_size: [0.1, 0.1, 0.2] 7 | grid_size: [1600, 1600, 51] 8 | 9 | queue_length: 3 10 | temporal_cache_length: 3 11 | queue_range_threshold: 1 12 | 13 | deterministic: True 14 | 15 | voxel_max_points: 10 16 | voxel_max_voxels: [120000, 120000] 17 | 18 | samples_per_gpu: 6 19 | workers_per_gpu: 6 20 | 21 | max_epochs: 4 22 | 23 | augment_gt_paste: 24 | max_epoch: 3 25 | apply_same_aug_to_seq: true 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | reduce_beams: 4 5 | 6 | voxel_size: [0.1, 0.1, 0.2] 7 | grid_size: [1600, 1600, 51] 8 | 9 | queue_length: 3 10 | temporal_cache_length: 3 11 | queue_range_threshold: 1 12 | 13 | deterministic: True 14 | 15 | voxel_max_points: 10 16 | voxel_max_voxels: [120000, 120000] 17 | 18 | samples_per_gpu: 6 19 | workers_per_gpu: 6 20 | 21 | max_epochs: 4 22 | 23 | augment_gt_paste: 24 | max_epoch: 3 25 | apply_same_aug_to_seq: true 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | reduce_beams: 16 5 | 6 | voxel_size: [0.1, 0.1, 0.2] 7 | grid_size: [1600, 1600, 51] 8 | 9 | queue_length: 3 10 | temporal_cache_length: 3 11 | queue_range_threshold: 1 12 | 13 | deterministic: True 14 | 15 | voxel_max_points: 10 16 | voxel_max_voxels: [120000, 120000] 17 | 18 | samples_per_gpu: 6 19 | workers_per_gpu: 6 20 | 21 | max_epochs: 4 22 | 23 | augment_gt_paste: 24 | max_epoch: 3 25 | apply_same_aug_to_seq: true 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | reduce_beams: 4 5 | 6 | voxel_size: [0.1, 0.1, 0.2] 7 | grid_size: [1600, 1600, 51] 8 | 9 | queue_length: 3 10 | temporal_cache_length: 3 11 | queue_range_threshold: 1 12 | 13 | deterministic: True 14 | 15 | voxel_max_points: 10 16 | voxel_max_voxels: [120000, 120000] 17 | 18 | samples_per_gpu: 6 19 | workers_per_gpu: 6 20 | 21 | max_epochs: 4 22 | 23 | augment_gt_paste: 24 | max_epoch: 3 25 | apply_same_aug_to_seq: true 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | cls_rot_lim: null 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | cls_rot_lim: null 26 | 27 | optimizer: 28 | type: AdamW 29 | lr: 6.6e-05 30 | weight_decay: 0.01 31 | paramwise_cfg: 32 | custom_keys: 33 | encoders.lidar.backbone: 34 | lr_mult: 0.0 35 | 36 | momentum_config: 37 | policy: cyclic 38 | cyclic_times: 1 39 | step_ratio_up: 0.4 40 | 41 | lr_config: 42 | policy: CosineAnnealing 43 | warmup: linear 44 | warmup_iters: 500 45 | warmup_ratio: 0.33333333 46 | min_lr_ratio: 1.0e-3 47 | 48 | optimizer_config: 49 | grad_clip: 50 | max_norm: 25 51 | norm_type: 2 52 | 53 | model: 54 | encoders: 55 | lidar: 56 | voxelize: 57 | max_num_points: ${voxel_max_points} 58 | point_cloud_range: ${point_cloud_range} 59 | voxel_size: ${voxel_size} 60 | max_voxels: ${voxel_max_voxels} 61 | backbone: 62 | type: SparseEncoder 63 | in_channels: ${use_dim} 64 | sparse_shape: ${grid_size} 65 | output_channels: 128 66 | order: 67 | - conv 68 | - norm 69 | - act 70 | encoder_channels: 71 | - [16, 16, 32] 72 | - [32, 32, 64] 73 | - [64, 64, 128] 74 | - [128, 128] 75 | encoder_paddings: 76 | - [0, 0, 1] 77 | - [0, 0, 1] 78 | - [0, 0, [1, 1, 0]] 79 | - [0, 0] 80 | block_type: basicblock 81 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: false 24 | sampler: 25 | cls_trans_lim: null 26 | cls_rot_lim: null 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: false 24 | sampler: 25 | cls_trans_lim: null 26 | cls_rot_lim: null 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | cls_trans_lim: null 26 | cls_rot_lim: null 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | cls_trans_lim: null 26 | cls_rot_lim: null 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/osdar23/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 15 14 | voxel_max_voxels: [200000, 200000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | reduce_points_by_distance: 26 | prob: 0.5 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 0 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | reduce_points_by_distance: 26 | prob: 0.5 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | reduce_points_by_distance: 26 | prob: 0.5 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0] 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0] 3 | 4 | voxel_size: [0.16, 0.16, 0.4] 5 | grid_size: [1600, 1600, 41] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 15 14 | voxel_max_voxels: [200000, 200000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | reduce_points_by_distance: 26 | prob: 0.5 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 0 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | reduce_points_by_distance: 26 | prob: 0.5 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml: -------------------------------------------------------------------------------- 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0] 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0] 3 | 4 | voxel_size: [0.1, 0.1, 0.2] 5 | grid_size: [1600, 1600, 51] 6 | 7 | queue_length: 3 8 | temporal_cache_length: 3 9 | queue_range_threshold: 1 10 | 11 | deterministic: True 12 | 13 | voxel_max_points: 10 14 | voxel_max_voxels: [120000, 120000] 15 | 16 | samples_per_gpu: 6 17 | workers_per_gpu: 6 18 | 19 | max_epochs: 4 20 | 21 | augment_gt_paste: 22 | max_epoch: 3 23 | apply_same_aug_to_seq: true 24 | sampler: 25 | reduce_points_by_distance: 26 | prob: 0.5 27 | 28 | optimizer: 29 | type: AdamW 30 | lr: 6.6e-05 31 | weight_decay: 0.01 32 | paramwise_cfg: 33 | custom_keys: 34 | encoders.lidar.backbone: 35 | lr_mult: 0.0 36 | 37 | momentum_config: 38 | policy: cyclic 39 | cyclic_times: 1 40 | step_ratio_up: 0.4 41 | 42 | lr_config: 43 | policy: CosineAnnealing 44 | warmup: linear 45 | warmup_iters: 500 46 | warmup_ratio: 0.33333333 47 | min_lr_ratio: 1.0e-3 48 | 49 | optimizer_config: 50 | grad_clip: 51 | max_norm: 25 52 | norm_type: 2 53 | 54 | model: 55 | encoders: 56 | lidar: 57 | voxelize: 58 | max_num_points: ${voxel_max_points} 59 | point_cloud_range: ${point_cloud_range} 60 | voxel_size: ${voxel_size} 61 | max_voxels: ${voxel_max_voxels} 62 | backbone: 63 | type: SparseEncoder 64 | in_channels: ${use_dim} 65 | sparse_shape: ${grid_size} 66 | output_channels: 128 67 | order: 68 | - conv 69 | - norm 70 | - act 71 | encoder_channels: 72 | - [16, 16, 32] 73 | - [32, 32, 64] 74 | - [64, 64, 128] 75 | - [128, 128] 76 | encoder_paddings: 77 | - [0, 0, 1] 78 | - [0, 0, 1] 79 | - [0, 0, [1, 1, 0]] 80 | - [0, 0] 81 | block_type: basicblock 82 | -------------------------------------------------------------------------------- /tools/convert_checkpoints_to_torchsparse.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | 5 | def main(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("ckpt_before", metavar="FILE", help="Original checkpoint.") 8 | parser.add_argument("ckpt_after", metavar="FILE", help="Converted checkpoint.") 9 | args, opts = parser.parse_known_args() 10 | 11 | ckpt_before = args.ckpt_before 12 | ckpt_after = args.ckpt_after 13 | 14 | cp_old = torch.load(ckpt_before, map_location="cpu") 15 | model = cp_old["state_dict"] 16 | new_model = dict() 17 | 18 | for key in model: 19 | if key.startswith("encoders.lidar.backbone") and ".bn." not in key: 20 | is_sparseconv_weight = len(model[key].shape) > 1 21 | else: 22 | is_sparseconv_weight = False 23 | if is_sparseconv_weight: 24 | new_key = key.replace(".weight", ".kernel") 25 | weights = model[key] 26 | 27 | kx, ky, kz, ic, oc = weights.shape 28 | converted_weights = weights.reshape(-1, ic, oc) 29 | if converted_weights.shape[0] == 1: 30 | converted_weights = converted_weights[0] 31 | 32 | elif converted_weights.shape[0] == 27: 33 | offsets = [list(range(kz)), list(range(ky)), list(range(kx))] 34 | kykx = ky * kx 35 | offsets = [ 36 | (x * kykx + y * kx + z) 37 | for z in offsets[0] 38 | for y in offsets[1] 39 | for x in offsets[2] 40 | ] 41 | offsets = torch.tensor( 42 | offsets, dtype=torch.int64, device=converted_weights.device 43 | ) 44 | converted_weights = converted_weights[offsets] 45 | 46 | else: 47 | new_key = key 48 | converted_weights = model[key] 49 | new_model[new_key] = converted_weights 50 | 51 | cp_old["state_dict"] = new_model 52 | torch.save(cp_old, ckpt_after) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /mmdet3d/datasets/builder.py: -------------------------------------------------------------------------------- 1 | import platform 2 | 3 | from mmcv.utils import Registry, build_from_cfg 4 | from mmdet.datasets import DATASETS 5 | from mmdet.datasets.builder import _concat_dataset 6 | 7 | if platform.system() != "Windows": 8 | # https://github.com/pytorch/pytorch/issues/973 9 | import resource 10 | 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | base_soft_limit = rlimit[0] 13 | hard_limit = rlimit[1] 14 | soft_limit = min(max(4096, base_soft_limit), hard_limit) 15 | resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) 16 | 17 | OBJECTSAMPLERS = Registry("Object sampler") 18 | 19 | 20 | def build_dataset(cfg, default_args=None): 21 | from mmdet.datasets.dataset_wrappers import ( 22 | ClassBalancedDataset, 23 | ConcatDataset, 24 | RepeatDataset, 25 | ) 26 | 27 | from mmdet3d.datasets.dataset_wrappers import CBGSDataset 28 | 29 | if isinstance(cfg, (list, tuple)): 30 | dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) 31 | elif cfg["type"] == "ConcatDataset": 32 | dataset = ConcatDataset( 33 | [build_dataset(c, default_args) for c in cfg["datasets"]], 34 | cfg.get("separate_eval", True), 35 | ) 36 | elif cfg["type"] == "RepeatDataset": 37 | dataset = RepeatDataset(build_dataset(cfg["dataset"], default_args), cfg["times"]) 38 | elif cfg["type"] == "ClassBalancedDataset": 39 | dataset = ClassBalancedDataset( 40 | build_dataset(cfg["dataset"], default_args), cfg["oversample_thr"] 41 | ) 42 | elif cfg["type"] == "CBGSDataset": 43 | if "temporal" in cfg: 44 | dataset = CBGSDataset(build_dataset(cfg["dataset"], default_args), cfg["temporal"]) 45 | else: 46 | dataset = CBGSDataset(build_dataset(cfg["dataset"], default_args)) 47 | elif isinstance(cfg.get("ann_file"), (list, tuple)): 48 | dataset = _concat_dataset(cfg, default_args) 49 | else: 50 | dataset = build_from_cfg(cfg, DATASETS, default_args) 51 | 52 | return dataset 53 | -------------------------------------------------------------------------------- /mmdet3d/models/necks/lss.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | from mmdet.models import NECKS 8 | 9 | __all__ = ["LSSFPN"] 10 | 11 | 12 | @NECKS.register_module() 13 | class LSSFPN(nn.Module): 14 | def __init__( 15 | self, 16 | in_indices: Tuple[int, int], 17 | in_channels: Tuple[int, int], 18 | out_channels: int, 19 | scale_factor: int = 1, 20 | ) -> None: 21 | super().__init__() 22 | self.in_indices = in_indices 23 | self.in_channels = in_channels 24 | self.out_channels = out_channels 25 | self.scale_factor = scale_factor 26 | 27 | self.fuse = nn.Sequential( 28 | nn.Conv2d(in_channels[0] + in_channels[1], out_channels, 1, bias=False), 29 | nn.BatchNorm2d(out_channels), 30 | nn.ReLU(True), 31 | nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), 32 | nn.BatchNorm2d(out_channels), 33 | nn.ReLU(True), 34 | ) 35 | if scale_factor > 1: 36 | self.upsample = nn.Sequential( 37 | nn.Upsample( 38 | scale_factor=scale_factor, 39 | mode="bilinear", 40 | align_corners=True, 41 | ), 42 | nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), 43 | nn.BatchNorm2d(out_channels), 44 | nn.ReLU(True), 45 | ) 46 | 47 | def forward(self, x: List[torch.Tensor]) -> torch.Tensor: 48 | x1 = x[self.in_indices[0]] 49 | assert x1.shape[1] == self.in_channels[0] 50 | 51 | x2 = x[self.in_indices[1]] 52 | assert x2.shape[1] == self.in_channels[1] 53 | 54 | x1 = F.interpolate( 55 | x1, 56 | size=x2.shape[-2:], 57 | mode="bilinear", 58 | align_corners=True, 59 | ) 60 | x = torch.cat([x1, x2], dim=1) 61 | 62 | x = self.fuse(x) 63 | if self.scale_factor > 1: 64 | x = self.upsample(x) 65 | return x 66 | -------------------------------------------------------------------------------- /mmdet3d/ops/interpolate/three_interpolate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from typing import Tuple 4 | 5 | from . import interpolate_ext 6 | 7 | 8 | class ThreeInterpolate(Function): 9 | @staticmethod 10 | def forward( 11 | ctx, features: torch.Tensor, indices: torch.Tensor, weight: torch.Tensor 12 | ) -> torch.Tensor: 13 | """Performs weighted linear interpolation on 3 features. 14 | 15 | Args: 16 | features (Tensor): (B, C, M) Features descriptors to be 17 | interpolated from 18 | indices (Tensor): (B, n, 3) index three nearest neighbors 19 | of the target features in features 20 | weight (Tensor): (B, n, 3) weights of interpolation 21 | 22 | Returns: 23 | Tensor: (B, C, N) tensor of the interpolated features 24 | """ 25 | assert features.is_contiguous() 26 | assert indices.is_contiguous() 27 | assert weight.is_contiguous() 28 | 29 | B, c, m = features.size() 30 | n = indices.size(1) 31 | ctx.three_interpolate_for_backward = (indices, weight, m) 32 | output = torch.cuda.FloatTensor(B, c, n) 33 | 34 | interpolate_ext.three_interpolate_wrapper(B, c, m, n, features, indices, weight, output) 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 39 | """Backward of three interpolate. 40 | 41 | Args: 42 | grad_out (Tensor): (B, C, N) tensor with gradients of outputs 43 | 44 | Returns: 45 | Tensor: (B, C, M) tensor with gradients of features 46 | """ 47 | idx, weight, m = ctx.three_interpolate_for_backward 48 | B, c, n = grad_out.size() 49 | 50 | grad_features = torch.cuda.FloatTensor(B, c, m).zero_() 51 | grad_out_data = grad_out.data.contiguous() 52 | 53 | interpolate_ext.three_interpolate_grad_wrapper( 54 | B, c, n, m, grad_out_data, idx, weight, grad_features.data 55 | ) 56 | return grad_features, None, None 57 | 58 | 59 | three_interpolate = ThreeInterpolate.apply 60 | -------------------------------------------------------------------------------- /tools/create_swint_checkpoint.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser, Namespace 2 | 3 | import torch 4 | 5 | 6 | def get_args() -> Namespace: 7 | """ 8 | Parse given arguments for create_swint_checkpoint function. 9 | 10 | Returns: 11 | Namespace: parsed arguments 12 | """ 13 | parser = ArgumentParser() 14 | 15 | parser.add_argument("-m", type=str, required=True, help="path to pretrained swint") 16 | parser.add_argument("-s", type=str, required=True, help="path to source model") 17 | parser.add_argument("-t", type=str, required=True, help="path to save target model") 18 | 19 | return parser.parse_args() 20 | 21 | 22 | def convert_to_swint_pth( 23 | pretrained_swint_path: str, 24 | source_model_path: str, 25 | target_save_path: str, 26 | prefix: str = "encoders.camera.backbone", 27 | ) -> None: 28 | pretrained_swint = torch.load(pretrained_swint_path, map_location=torch.device("cpu")) 29 | source_model = torch.load(source_model_path, map_location=torch.device("cpu")) 30 | 31 | print("total keys in pretrained swint", len(pretrained_swint["state_dict"].keys())) 32 | print("total keys in source model", len(source_model["state_dict"].keys())) 33 | 34 | common_keys = [] 35 | other_keys = [] 36 | 37 | for x in source_model["state_dict"].keys(): 38 | if x.startswith(prefix): 39 | common_keys.append(x) 40 | else: 41 | other_keys.append(x) 42 | 43 | print("total common keys", len(common_keys)) 44 | print("total other keys", len(other_keys)) 45 | 46 | # create a new state dict 47 | new_state_dict = {} 48 | for key in common_keys: 49 | new_key_name = key[len(prefix) + 1 :] 50 | new_state_dict[new_key_name] = source_model["state_dict"][key] 51 | 52 | # assert that keys in pretrained_swint and new_state_dict are the same 53 | for key in new_state_dict.keys(): 54 | assert key in pretrained_swint["state_dict"].keys(), "key not found in pretrained swint" 55 | 56 | print("total keys in new state dict", len(new_state_dict.keys())) 57 | 58 | # save the new state dict 59 | pretrained_swint["state_dict"] = new_state_dict 60 | torch.save(pretrained_swint, target_save_path) 61 | 62 | 63 | if __name__ == "__main__": 64 | args = get_args() 65 | convert_to_swint_pth( 66 | pretrained_swint_path=args.m, 67 | source_model_path=args.s, 68 | target_save_path=args.t, 69 | ) 70 | -------------------------------------------------------------------------------- /mmdet3d/ops/iou3d/iou3d_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from . import iou3d_cuda 4 | 5 | 6 | def boxes_iou_bev(boxes_a, boxes_b): 7 | """Calculate boxes IoU in the bird view. 8 | 9 | Args: 10 | boxes_a (torch.Tensor): Input boxes a with shape (M, 5). 11 | boxes_b (torch.Tensor): Input boxes b with shape (N, 5). 12 | 13 | Returns: 14 | ans_iou (torch.Tensor): IoU result with shape (M, N). 15 | """ 16 | ans_iou = boxes_a.new_zeros(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) 17 | 18 | iou3d_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) 19 | 20 | return ans_iou 21 | 22 | 23 | def nms_gpu(boxes, scores, thresh, pre_maxsize=None, post_max_size=None): 24 | """Nms function with gpu implementation. 25 | 26 | Args: 27 | boxes (torch.Tensor): Input boxes with the shape of [N, 5] 28 | ([x1, y1, x2, y2, ry]). 29 | scores (torch.Tensor): Scores of boxes with the shape of [N]. 30 | thresh (int): Threshold. 31 | pre_maxsize (int): Max size of boxes before nms. Default: None. 32 | post_maxsize (int): Max size of boxes after nms. Default: None. 33 | 34 | Returns: 35 | torch.Tensor: Indexes after nms. 36 | """ 37 | order = scores.sort(0, descending=True)[1] 38 | 39 | if pre_maxsize is not None: 40 | order = order[:pre_maxsize] 41 | boxes = boxes[order].contiguous() 42 | 43 | keep = torch.zeros(boxes.size(0), dtype=torch.long) 44 | num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh, boxes.device.index) 45 | keep = order[keep[:num_out].cuda(boxes.device)].contiguous() 46 | if post_max_size is not None: 47 | keep = keep[:post_max_size] 48 | return keep 49 | 50 | 51 | def nms_normal_gpu(boxes, scores, thresh): 52 | """Normal non maximum suppression on GPU. 53 | 54 | Args: 55 | boxes (torch.Tensor): Input boxes with shape (N, 5). 56 | scores (torch.Tensor): Scores of predicted boxes with shape (N). 57 | thresh (torch.Tensor): Threshold of non maximum suppression. 58 | 59 | Returns: 60 | torch.Tensor: Remaining indices with scores in descending order. 61 | """ 62 | order = scores.sort(0, descending=True)[1] 63 | 64 | boxes = boxes[order].contiguous() 65 | 66 | keep = torch.zeros(boxes.size(0), dtype=torch.long) 67 | num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh, boxes.device.index) 68 | return order[keep[:num_out].cuda(boxes.device)].contiguous() 69 | -------------------------------------------------------------------------------- /mmdet3d/ops/knn/knn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from . import knn_ext 5 | 6 | 7 | class KNN(Function): 8 | r"""KNN (CUDA) based on heap data structure. 9 | Modified from `PAConv `_. 11 | 12 | Find k-nearest points. 13 | """ 14 | 15 | @staticmethod 16 | def forward( 17 | ctx, k: int, xyz: torch.Tensor, center_xyz: torch.Tensor = None, transposed: bool = False 18 | ) -> torch.Tensor: 19 | """Forward. 20 | 21 | Args: 22 | k (int): number of nearest neighbors. 23 | xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). 24 | xyz coordinates of the features. 25 | center_xyz (Tensor): (B, npoint, 3) if transposed == False, 26 | else (B, 3, npoint). centers of the knn query. 27 | transposed (bool): whether the input tensors are transposed. 28 | defaults to False. Should not expicitly use this keyword 29 | when calling knn (=KNN.apply), just add the fourth param. 30 | 31 | Returns: 32 | Tensor: (B, k, npoint) tensor with the indicies of 33 | the features that form k-nearest neighbours. 34 | """ 35 | assert k > 0 36 | 37 | if center_xyz is None: 38 | center_xyz = xyz 39 | 40 | if transposed: 41 | xyz = xyz.transpose(2, 1).contiguous() 42 | center_xyz = center_xyz.transpose(2, 1).contiguous() 43 | 44 | assert xyz.is_contiguous() # [B, N, 3] 45 | assert center_xyz.is_contiguous() # [B, npoint, 3] 46 | 47 | center_xyz_device = center_xyz.get_device() 48 | assert ( 49 | center_xyz_device == xyz.get_device() 50 | ), "center_xyz and xyz should be put on the same device" 51 | if torch.cuda.current_device() != center_xyz_device: 52 | torch.cuda.set_device(center_xyz_device) 53 | 54 | B, npoint, _ = center_xyz.shape 55 | N = xyz.shape[1] 56 | 57 | idx = center_xyz.new_zeros((B, npoint, k)).int() 58 | dist2 = center_xyz.new_zeros((B, npoint, k)).float() 59 | 60 | knn_ext.knn_wrapper(B, N, npoint, k, xyz, center_xyz, idx, dist2) 61 | # idx shape to [B, k, npoint] 62 | idx = idx.transpose(2, 1).contiguous() 63 | ctx.mark_non_differentiable(idx) 64 | return idx 65 | 66 | @staticmethod 67 | def backward(ctx, a=None): 68 | return None, None, None 69 | 70 | 71 | knn = KNN.apply 72 | -------------------------------------------------------------------------------- /mmdet3d/models/vtransforms/lss.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from mmcv.runner import force_fp32 4 | from torch import nn 5 | 6 | from mmdet3d.models.builder import VTRANSFORMS 7 | 8 | from .base import BaseTransform 9 | 10 | __all__ = ["LSSTransform"] 11 | 12 | 13 | @VTRANSFORMS.register_module() 14 | class LSSTransform(BaseTransform): 15 | def __init__( 16 | self, 17 | in_channels: int, 18 | out_channels: int, 19 | image_size: Tuple[int, int], 20 | feature_size: Tuple[int, int], 21 | xbound: Tuple[float, float, float], 22 | ybound: Tuple[float, float, float], 23 | zbound: Tuple[float, float, float], 24 | dbound: Tuple[float, float, float], 25 | downsample: int = 1, 26 | ) -> None: 27 | super().__init__( 28 | in_channels=in_channels, 29 | out_channels=out_channels, 30 | image_size=image_size, 31 | feature_size=feature_size, 32 | xbound=xbound, 33 | ybound=ybound, 34 | zbound=zbound, 35 | dbound=dbound, 36 | ) 37 | self.depthnet = nn.Conv2d(in_channels, self.D + self.C, 1) 38 | if downsample > 1: 39 | assert downsample == 2, downsample 40 | self.downsample = nn.Sequential( 41 | nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), 42 | nn.BatchNorm2d(out_channels), 43 | nn.ReLU(True), 44 | nn.Conv2d( 45 | out_channels, 46 | out_channels, 47 | 3, 48 | stride=downsample, 49 | padding=1, 50 | bias=False, 51 | ), 52 | nn.BatchNorm2d(out_channels), 53 | nn.ReLU(True), 54 | nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False), 55 | nn.BatchNorm2d(out_channels), 56 | nn.ReLU(True), 57 | ) 58 | else: 59 | self.downsample = nn.Identity() 60 | 61 | @force_fp32() 62 | def get_cam_feats(self, x): 63 | B, N, C, fH, fW = x.shape 64 | 65 | x = x.view(B * N, C, fH, fW) 66 | 67 | x = self.depthnet(x) 68 | depth = x[:, : self.D].softmax(dim=1) 69 | x = depth.unsqueeze(1) * x[:, self.D : (self.D + self.C)].unsqueeze(2) 70 | 71 | x = x.view(B, N, self.C, self.D, fH, fW) 72 | x = x.permute(0, 1, 3, 4, 5, 2) 73 | return x 74 | 75 | def forward(self, *args, **kwargs): 76 | x = super().forward(*args, **kwargs) 77 | x = self.downsample(x) 78 | return x 79 | -------------------------------------------------------------------------------- /mmdet3d/ops/gather_points/src/gather_points.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | extern THCState *state; 9 | 10 | int gather_points_wrapper(int b, int c, int n, int npoints, 11 | at::Tensor points_tensor, at::Tensor idx_tensor, 12 | at::Tensor out_tensor); 13 | 14 | void gather_points_kernel_launcher(int b, int c, int n, int npoints, 15 | const float *points, const int *idx, 16 | float *out, cudaStream_t stream); 17 | 18 | int gather_points_grad_wrapper(int b, int c, int n, int npoints, 19 | at::Tensor grad_out_tensor, 20 | at::Tensor idx_tensor, 21 | at::Tensor grad_points_tensor); 22 | 23 | void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints, 24 | const float *grad_out, const int *idx, 25 | float *grad_points, 26 | cudaStream_t stream); 27 | 28 | int gather_points_wrapper(int b, int c, int n, int npoints, 29 | at::Tensor points_tensor, at::Tensor idx_tensor, 30 | at::Tensor out_tensor) { 31 | const float *points = points_tensor.data_ptr(); 32 | const int *idx = idx_tensor.data_ptr(); 33 | float *out = out_tensor.data_ptr(); 34 | 35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); 36 | gather_points_kernel_launcher(b, c, n, npoints, points, idx, out, stream); 37 | return 1; 38 | } 39 | 40 | int gather_points_grad_wrapper(int b, int c, int n, int npoints, 41 | at::Tensor grad_out_tensor, 42 | at::Tensor idx_tensor, 43 | at::Tensor grad_points_tensor) { 44 | const float *grad_out = grad_out_tensor.data_ptr(); 45 | const int *idx = idx_tensor.data_ptr(); 46 | float *grad_points = grad_points_tensor.data_ptr(); 47 | 48 | cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); 49 | gather_points_grad_kernel_launcher(b, c, n, npoints, grad_out, idx, 50 | grad_points, stream); 51 | return 1; 52 | } 53 | 54 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 55 | m.def("gather_points_wrapper", &gather_points_wrapper, 56 | "gather_points_wrapper"); 57 | m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper, 58 | "gather_points_grad_wrapper"); 59 | } 60 | -------------------------------------------------------------------------------- /mmdet3d/ops/furthest_point_sample/furthest_point_sample.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from . import furthest_point_sample_ext 5 | 6 | 7 | class FurthestPointSampling(Function): 8 | """Furthest Point Sampling. 9 | 10 | Uses iterative furthest point sampling to select a set of features whose 11 | corresponding points have the furthest distance. 12 | """ 13 | 14 | @staticmethod 15 | def forward(ctx, points_xyz: torch.Tensor, num_points: int) -> torch.Tensor: 16 | """forward. 17 | 18 | Args: 19 | points_xyz (Tensor): (B, N, 3) where N > num_points. 20 | num_points (int): Number of points in the sampled set. 21 | 22 | Returns: 23 | Tensor: (B, num_points) indices of the sampled points. 24 | """ 25 | assert points_xyz.is_contiguous() 26 | 27 | B, N = points_xyz.size()[:2] 28 | output = torch.cuda.IntTensor(B, num_points) 29 | temp = torch.cuda.FloatTensor(B, N).fill_(1e10) 30 | 31 | furthest_point_sample_ext.furthest_point_sampling_wrapper( 32 | B, N, num_points, points_xyz, temp, output 33 | ) 34 | ctx.mark_non_differentiable(output) 35 | return output 36 | 37 | @staticmethod 38 | def backward(xyz, a=None): 39 | return None, None 40 | 41 | 42 | class FurthestPointSamplingWithDist(Function): 43 | """Furthest Point Sampling With Distance. 44 | 45 | Uses iterative furthest point sampling to select a set of features whose 46 | corresponding points have the furthest distance. 47 | """ 48 | 49 | @staticmethod 50 | def forward(ctx, points_dist: torch.Tensor, num_points: int) -> torch.Tensor: 51 | """forward. 52 | 53 | Args: 54 | points_dist (Tensor): (B, N, N) Distance between each point pair. 55 | num_points (int): Number of points in the sampled set. 56 | 57 | Returns: 58 | Tensor: (B, num_points) indices of the sampled points. 59 | """ 60 | assert points_dist.is_contiguous() 61 | 62 | B, N, _ = points_dist.size() 63 | output = points_dist.new_zeros([B, num_points], dtype=torch.int32) 64 | temp = points_dist.new_zeros([B, N]).fill_(1e10) 65 | 66 | furthest_point_sample_ext.furthest_point_sampling_with_dist_wrapper( 67 | B, N, num_points, points_dist, temp, output 68 | ) 69 | ctx.mark_non_differentiable(output) 70 | return output 71 | 72 | @staticmethod 73 | def backward(xyz, a=None): 74 | return None, None 75 | 76 | 77 | furthest_point_sample = FurthestPointSampling.apply 78 | furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply 79 | -------------------------------------------------------------------------------- /mmdet3d/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from mmcv.ops import ( 2 | RoIAlign, 3 | SigmoidFocalLoss, 4 | get_compiler_version, 5 | get_compiling_cuda_version, 6 | nms, 7 | roi_align, 8 | sigmoid_focal_loss, 9 | ) 10 | 11 | from .ball_query import ball_query 12 | from .furthest_point_sample import ( 13 | Points_Sampler, 14 | furthest_point_sample, 15 | furthest_point_sample_with_dist, 16 | ) 17 | from .gather_points import gather_points 18 | from .group_points import GroupAll, QueryAndGroup, group_points, grouping_operation 19 | from .interpolate import three_interpolate, three_nn 20 | from .knn import knn 21 | from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d 22 | from .paconv import PAConv, PAConvCUDA, assign_score_withk 23 | from .bev_pool import * 24 | from .pointnet_modules import ( 25 | PAConvCUDASAModule, 26 | PAConvCUDASAModuleMSG, 27 | PAConvSAModule, 28 | PAConvSAModuleMSG, 29 | PointFPModule, 30 | PointSAModule, 31 | PointSAModuleMSG, 32 | build_sa_module, 33 | ) 34 | from .roiaware_pool3d import ( 35 | RoIAwarePool3d, 36 | points_in_boxes_batch, 37 | points_in_boxes_cpu, 38 | points_in_boxes_gpu, 39 | ) 40 | # from .sparse_block import SparseBasicBlock, SparseBottleneck, make_sparse_convmodule 41 | from .sparse_block import SparseBasicBlock, make_sparse_convmodule 42 | from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization 43 | from .spconv import * 44 | 45 | __all__ = [ 46 | "nms", 47 | "soft_nms", 48 | "RoIAlign", 49 | "roi_align", 50 | "get_compiler_version", 51 | "get_compiling_cuda_version", 52 | "NaiveSyncBatchNorm1d", 53 | "NaiveSyncBatchNorm2d", 54 | "batched_nms", 55 | "Voxelization", 56 | "voxelization", 57 | "dynamic_scatter", 58 | "DynamicScatter", 59 | "sigmoid_focal_loss", 60 | "SigmoidFocalLoss", 61 | "SparseBasicBlock", 62 | # "SparseBottleneck", 63 | "RoIAwarePool3d", 64 | "points_in_boxes_gpu", 65 | "points_in_boxes_cpu", 66 | "make_sparse_convmodule", 67 | "ball_query", 68 | "knn", 69 | "furthest_point_sample", 70 | "furthest_point_sample_with_dist", 71 | "three_interpolate", 72 | "three_nn", 73 | "gather_points", 74 | "grouping_operation", 75 | "group_points", 76 | "GroupAll", 77 | "QueryAndGroup", 78 | "PointSAModule", 79 | "PointSAModuleMSG", 80 | "PointFPModule", 81 | "points_in_boxes_batch", 82 | "get_compiler_version", 83 | "assign_score_withk", 84 | "get_compiling_cuda_version", 85 | "Points_Sampler", 86 | "build_sa_module", 87 | "PAConv", 88 | "PAConvCUDA", 89 | "PAConvSAModuleMSG", 90 | "PAConvSAModule", 91 | "PAConvCUDASAModule", 92 | "PAConvCUDASAModuleMSG", 93 | "bev_pool", 94 | ] 95 | -------------------------------------------------------------------------------- /mmdet3d/core/utils/gaussian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def gaussian_2d(shape, sigma=1): 6 | """Generate gaussian map. 7 | 8 | Args: 9 | shape (list[int]): Shape of the map. 10 | sigma (float): Sigma to generate gaussian map. 11 | Defaults to 1. 12 | 13 | Returns: 14 | np.ndarray: Generated gaussian map. 15 | """ 16 | m, n = [(ss - 1.0) / 2.0 for ss in shape] 17 | y, x = np.ogrid[-m : m + 1, -n : n + 1] 18 | 19 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 20 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 21 | return h 22 | 23 | 24 | def draw_heatmap_gaussian(heatmap, center, radius, k=1): 25 | """Get gaussian masked heatmap. 26 | 27 | Args: 28 | heatmap (torch.Tensor): Heatmap to be masked. 29 | center (torch.Tensor): Center coord of the heatmap. 30 | radius (int): Radius of gausian. 31 | K (int): Multiple of masked_gaussian. Defaults to 1. 32 | 33 | Returns: 34 | torch.Tensor: Masked heatmap. 35 | """ 36 | diameter = 2 * radius + 1 37 | gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6) 38 | 39 | x, y = int(center[0]), int(center[1]) 40 | 41 | height, width = heatmap.shape[0:2] 42 | 43 | left, right = min(x, radius), min(width - x, radius + 1) 44 | top, bottom = min(y, radius), min(height - y, radius + 1) 45 | 46 | masked_heatmap = heatmap[y - top : y + bottom, x - left : x + right] 47 | masked_gaussian = torch.from_numpy( 48 | gaussian[radius - top : radius + bottom, radius - left : radius + right] 49 | ).to(heatmap.device, torch.float32) 50 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: 51 | torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 52 | return heatmap 53 | 54 | 55 | def gaussian_radius(det_size, min_overlap=0.5): 56 | """Get radius of gaussian. 57 | 58 | Args: 59 | det_size (tuple[torch.Tensor]): Size of the detection result. 60 | min_overlap (float): Gaussian_overlap. Defaults to 0.5. 61 | 62 | Returns: 63 | torch.Tensor: Computed radius. 64 | """ 65 | height, width = det_size 66 | 67 | a1 = 1 68 | b1 = height + width 69 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 70 | sq1 = torch.sqrt(b1**2 - 4 * a1 * c1) 71 | r1 = (b1 + sq1) / 2 72 | 73 | a2 = 4 74 | b2 = 2 * (height + width) 75 | c2 = (1 - min_overlap) * width * height 76 | sq2 = torch.sqrt(b2**2 - 4 * a2 * c2) 77 | r2 = (b2 + sq2) / 2 78 | 79 | a3 = 4 * min_overlap 80 | b3 = -2 * min_overlap * (height + width) 81 | c3 = (min_overlap - 1) * width * height 82 | sq3 = torch.sqrt(b3**2 - 4 * a3 * c3) 83 | r3 = (b3 + sq3) / 2 84 | return min(r1, r2, r3) 85 | -------------------------------------------------------------------------------- /mmdet3d/ops/group_points/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Modified from 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | extern THCState *state; 13 | 14 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample, 15 | at::Tensor points_tensor, at::Tensor idx_tensor, 16 | at::Tensor out_tensor); 17 | 18 | void group_points_kernel_launcher(int b, int c, int n, int npoints, int nsample, 19 | const float *points, const int *idx, 20 | float *out, cudaStream_t stream); 21 | 22 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample, 23 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, 24 | at::Tensor grad_points_tensor); 25 | 26 | void group_points_grad_kernel_launcher(int b, int c, int n, int npoints, 27 | int nsample, const float *grad_out, 28 | const int *idx, float *grad_points, 29 | cudaStream_t stream); 30 | 31 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample, 32 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, 33 | at::Tensor grad_points_tensor) { 34 | float *grad_points = grad_points_tensor.data_ptr(); 35 | const int *idx = idx_tensor.data_ptr(); 36 | const float *grad_out = grad_out_tensor.data_ptr(); 37 | 38 | cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); 39 | 40 | group_points_grad_kernel_launcher(b, c, n, npoints, nsample, grad_out, idx, 41 | grad_points, stream); 42 | return 1; 43 | } 44 | 45 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample, 46 | at::Tensor points_tensor, at::Tensor idx_tensor, 47 | at::Tensor out_tensor) { 48 | const float *points = points_tensor.data_ptr(); 49 | const int *idx = idx_tensor.data_ptr(); 50 | float *out = out_tensor.data_ptr(); 51 | 52 | cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); 53 | 54 | group_points_kernel_launcher(b, c, n, npoints, nsample, points, idx, out, 55 | stream); 56 | return 1; 57 | } 58 | 59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 60 | m.def("forward", &group_points_wrapper, "group_points_wrapper"); 61 | m.def("backward", &group_points_grad_wrapper, "group_points_grad_wrapper"); 62 | } 63 | -------------------------------------------------------------------------------- /mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Modified from 2 | // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu 3 | // Written by Shaoshuai Shi 4 | // All Rights Reserved 2019. 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define CHECK_CONTIGUOUS(x) \ 13 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 14 | // #define DEBUG 15 | 16 | inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz, 17 | float &local_x, float &local_y) { 18 | // should rotate pi/2 + alpha to translate LiDAR to local 19 | float rot_angle = rz + M_PI / 2; 20 | float cosa = cos(rot_angle), sina = sin(rot_angle); 21 | local_x = shift_x * cosa + shift_y * (-sina); 22 | local_y = shift_x * sina + shift_y * cosa; 23 | } 24 | 25 | inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d, 26 | float &local_x, float &local_y) { 27 | // param pt: (x, y, z) 28 | // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the 29 | // bottom center 30 | float x = pt[0], y = pt[1], z = pt[2]; 31 | float cx = box3d[0], cy = box3d[1], cz = box3d[2]; 32 | float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6]; 33 | cz += h / 2.0; // shift to the center since cz in box3d is the bottom center 34 | 35 | if (fabsf(z - cz) > h / 2.0) return 0; 36 | lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y); 37 | float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & 38 | (local_y > -w / 2.0) & (local_y < w / 2.0); 39 | return in_flag; 40 | } 41 | 42 | int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, 43 | at::Tensor pts_indices_tensor) { 44 | // params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the 45 | // bottom center, each box DO NOT overlaps params pts: (npoints, 3) [x, y, z] 46 | // in LiDAR coordinate params pts_indices: (N, npoints) 47 | 48 | CHECK_CONTIGUOUS(boxes_tensor); 49 | CHECK_CONTIGUOUS(pts_tensor); 50 | CHECK_CONTIGUOUS(pts_indices_tensor); 51 | 52 | int boxes_num = boxes_tensor.size(0); 53 | int pts_num = pts_tensor.size(0); 54 | 55 | const float *boxes = boxes_tensor.data_ptr(); 56 | const float *pts = pts_tensor.data_ptr(); 57 | int *pts_indices = pts_indices_tensor.data_ptr(); 58 | 59 | float local_x = 0, local_y = 0; 60 | for (int i = 0; i < boxes_num; i++) { 61 | for (int j = 0; j < pts_num; j++) { 62 | int cur_in_flag = 63 | check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y); 64 | pts_indices[i * pts_num + j] = cur_in_flag; 65 | } 66 | } 67 | 68 | return 1; 69 | } 70 | -------------------------------------------------------------------------------- /tools/create_combined_checkpoint.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser, Namespace 2 | from typing import List 3 | 4 | import torch 5 | 6 | 7 | def get_args() -> Namespace: 8 | """ 9 | Parse given arguments for create_swint_checkpoint function. 10 | 11 | Returns: 12 | Namespace: parsed arguments 13 | """ 14 | parser = ArgumentParser() 15 | 16 | parser.add_argument("-l", type=str, required=True, help="path to lidar model") 17 | parser.add_argument("-c", type=str, required=True, help="path to camera model") 18 | parser.add_argument("-t", type=str, required=True, help="path to save target model") 19 | parser.add_argument("--full", action="store_true", help="whether to save full model") 20 | 21 | return parser.parse_args() 22 | 23 | 24 | def convert_to_combined_pth( 25 | lidar_model_path: str, 26 | camera_model_path: str, 27 | target_save_path: str, 28 | camera_prefixes: List[str] = [ 29 | "encoders.camera.backbone", 30 | "encoders.camera.vtransform", 31 | "encoders.camera.neck", 32 | ], 33 | blacklist_prefixes: List[str] = [ 34 | "temporal_fuser", 35 | ], 36 | ) -> None: 37 | lidar_model = torch.load(lidar_model_path, map_location=torch.device("cpu")) 38 | camera_model = torch.load(camera_model_path, map_location=torch.device("cpu")) 39 | 40 | print("total keys in lidar model", len(lidar_model["state_dict"].keys())) 41 | print("total keys in camera model", len(camera_model["state_dict"].keys())) 42 | 43 | camera_keys = [] 44 | for x in camera_model["state_dict"].keys(): 45 | for prefix in camera_prefixes: 46 | if x.startswith(prefix): 47 | camera_keys.append(x) 48 | break 49 | 50 | # create a new state dict 51 | new_state_dict = {} 52 | for key, value in lidar_model["state_dict"].items(): 53 | skip = False 54 | for x in blacklist_prefixes: 55 | if key.startswith(x): 56 | skip = True 57 | break 58 | if not skip: 59 | new_state_dict[key] = value 60 | 61 | for key, value in camera_model["state_dict"].items(): 62 | if key in camera_keys and key not in blacklist_prefixes: 63 | new_state_dict[key] = value 64 | 65 | print("total keys in new state dict", len(new_state_dict.keys())) 66 | for x in new_state_dict: 67 | print(x) 68 | 69 | # save the new state dict 70 | lidar_model["state_dict"] = new_state_dict 71 | torch.save(lidar_model, target_save_path) 72 | 73 | 74 | if __name__ == "__main__": 75 | args = get_args() 76 | convert_to_combined_pth( 77 | lidar_model_path=args.l, 78 | camera_model_path=args.c, 79 | target_save_path=args.t, 80 | blacklist_prefixes=["temporal_fuser"] if not args.full else [], 81 | ) 82 | -------------------------------------------------------------------------------- /mmdet3d/ops/ball_query/src/ball_query_cuda.cu: -------------------------------------------------------------------------------- 1 | // Modified from 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | __global__ void ball_query_kernel(int b, int n, int m, 12 | float min_radius, 13 | float max_radius, 14 | int nsample, 15 | const float *__restrict__ new_xyz, 16 | const float *__restrict__ xyz, 17 | int *__restrict__ idx) { 18 | // new_xyz: (B, M, 3) 19 | // xyz: (B, N, 3) 20 | // output: 21 | // idx: (B, M, nsample) 22 | int bs_idx = blockIdx.y; 23 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (bs_idx >= b || pt_idx >= m) return; 25 | 26 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 27 | xyz += bs_idx * n * 3; 28 | idx += bs_idx * m * nsample + pt_idx * nsample; 29 | 30 | float max_radius2 = max_radius * max_radius; 31 | float min_radius2 = min_radius * min_radius; 32 | float new_x = new_xyz[0]; 33 | float new_y = new_xyz[1]; 34 | float new_z = new_xyz[2]; 35 | 36 | int cnt = 0; 37 | for (int k = 0; k < n; ++k) { 38 | float x = xyz[k * 3 + 0]; 39 | float y = xyz[k * 3 + 1]; 40 | float z = xyz[k * 3 + 2]; 41 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 42 | (new_z - z) * (new_z - z); 43 | if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) { 44 | if (cnt == 0) { 45 | for (int l = 0; l < nsample; ++l) { 46 | idx[l] = k; 47 | } 48 | } 49 | idx[cnt] = k; 50 | ++cnt; 51 | if (cnt >= nsample) break; 52 | } 53 | } 54 | } 55 | 56 | void ball_query_kernel_launcher(int b, int n, int m, float min_radius, float max_radius, 57 | int nsample, const float *new_xyz, const float *xyz, 58 | int *idx, cudaStream_t stream) { 59 | // new_xyz: (B, M, 3) 60 | // xyz: (B, N, 3) 61 | // output: 62 | // idx: (B, M, nsample) 63 | 64 | cudaError_t err; 65 | 66 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), 67 | b); // blockIdx.x(col), blockIdx.y(row) 68 | dim3 threads(THREADS_PER_BLOCK); 69 | 70 | ball_query_kernel<<>>(b, n, m, min_radius, max_radius, 71 | nsample, new_xyz, xyz, idx); 72 | // cudaDeviceSynchronize(); // for using printf in kernel function 73 | err = cudaGetLastError(); 74 | if (cudaSuccess != err) { 75 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 76 | exit(-1); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /configs/osdar23/baseline/centerhead/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | decoder: 3 | backbone: 4 | type: GeneralizedResNet 5 | in_channels: 336 6 | blocks: 7 | - [2, 160, 2] 8 | - [2, 320, 2] 9 | - [2, 640, 1] 10 | neck: 11 | type: LSSFPN 12 | in_indices: [-1, 0] 13 | in_channels: [640, 160] 14 | out_channels: 256 15 | scale_factor: 2 16 | heads: 17 | object: 18 | type: CenterHead 19 | in_channels: 256 20 | train_cfg: 21 | point_cloud_range: ${point_cloud_range} 22 | grid_size: ${grid_size} 23 | voxel_size: ${voxel_size} 24 | out_size_factor: ${out_size_factor} 25 | dense_reg: 1 26 | gaussian_overlap: 0.1 27 | max_objs: 500 28 | min_radius: 2 29 | code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] 30 | test_cfg: 31 | post_center_limit_range: ${post_center_range} 32 | max_per_img: 500 33 | max_pool_nms: false 34 | min_radius: [4, 12, 10, 1, 0.85, 0.175] 35 | score_threshold: ${score_threshold} 36 | out_size_factor: ${out_size_factor} 37 | voxel_size: ${voxel_size[:2]} 38 | pre_max_size: 1000 39 | post_max_size: 83 40 | nms_thr: ${nms_threshold} 41 | nms_type: 42 | - rotate # lidar__cuboid__person 43 | - circle # lidar__cuboid__catenary_pole 44 | - circle # lidar__cuboid__signal_pole 45 | - circle # lidar__cuboid__road_vehicle 46 | - rotate # lidar__cuboid__buffer_stop 47 | nms_scale: 48 | - [0.4] # lidar__cuboid__person 49 | - [1.0] # lidar__cuboid__catenary_pole 50 | - [1.0] # lidar__cuboid__signal_pole 51 | - [1.0] # lidar__cuboid__road_vehicle 52 | - [1.0] # lidar__cuboid__buffer_stop 53 | tasks: 54 | - ["lidar__cuboid__person"] 55 | - ["lidar__cuboid__catenary_pole"] 56 | - ["lidar__cuboid__signal_pole"] 57 | - ["lidar__cuboid__road_vehicle"] 58 | - ["lidar__cuboid__buffer_stop"] 59 | common_heads: 60 | reg: [2, 2] 61 | height: [1, 2] 62 | dim: [3, 2] 63 | rot: [2, 2] 64 | share_conv_channel: 64 65 | bbox_coder: 66 | type: CenterPointBBoxCoder 67 | pc_range: ${point_cloud_range} 68 | post_center_range: ${post_center_range} 69 | max_num: 500 70 | score_threshold: ${score_threshold} 71 | out_size_factor: 8 72 | voxel_size: ${voxel_size[:2]} 73 | code_size: 7 74 | separate_head: 75 | type: SeparateHead 76 | init_bias: -2.19 77 | final_kernel: 3 78 | loss_cls: 79 | type: GaussianFocalLoss 80 | reduction: mean 81 | loss_bbox: 82 | type: L1Loss 83 | reduction: mean 84 | loss_weight: 0.25 85 | norm_bbox: true 86 | -------------------------------------------------------------------------------- /configs/tumtraf-i/baseline/centerhead/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | decoder: 3 | backbone: 4 | type: GeneralizedResNet 5 | in_channels: 336 6 | blocks: 7 | - [2, 160, 2] 8 | - [2, 320, 2] 9 | - [2, 640, 1] 10 | neck: 11 | type: LSSFPN 12 | in_indices: [-1, 0] 13 | in_channels: [640, 160] 14 | out_channels: 256 15 | scale_factor: 2 16 | heads: 17 | object: 18 | type: CenterHead 19 | in_channels: 256 20 | train_cfg: 21 | point_cloud_range: ${point_cloud_range} 22 | grid_size: ${grid_size} 23 | voxel_size: ${voxel_size} 24 | out_size_factor: ${out_size_factor} 25 | dense_reg: 1 26 | gaussian_overlap: 0.1 27 | max_objs: 500 28 | min_radius: 2 29 | code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] 30 | test_cfg: 31 | post_center_limit_range: ${post_center_range} 32 | max_per_img: 500 33 | max_pool_nms: false 34 | min_radius: [4, 12, 10, 1, 0.85, 0.175] 35 | score_threshold: ${score_threshold} 36 | out_size_factor: ${out_size_factor} 37 | voxel_size: ${voxel_size[:2]} 38 | pre_max_size: 1000 39 | post_max_size: 83 40 | nms_thr: ${nms_threshold} 41 | nms_type: 42 | - circle # CAR 43 | - rotate # TRUCK 44 | - rotate # VAN 45 | - rotate # BUS, TRAILER 46 | - rotate # MOTORCYCLE, BICYCLE 47 | - rotate # PEDESTRIAN 48 | - rotate # EMERGENCY_VEHICLE 49 | nms_scale: 50 | - [1.0] # CAR 51 | - [1.0] # TRUCK 52 | - [1.0] # VAN 53 | - [1.0, 1.0] # BUS, TRAILER 54 | - [1.0, 1.0] # MOTORCYCLE, BICYCLE 55 | - [1.0] # PEDESTRIAN 56 | - [1.0] # EMERGENCY_VEHICLE 57 | tasks: 58 | - ["CAR"] 59 | - ["TRUCK"] 60 | - ["VAN"] 61 | - ["BUS", "TRAILER"] 62 | - ["MOTORCYCLE", "BICYCLE"] 63 | - ["PEDESTRIAN"] 64 | - ["EMERGENCY_VEHICLE"] 65 | common_heads: 66 | reg: [2, 2] 67 | height: [1, 2] 68 | dim: [3, 2] 69 | rot: [2, 2] 70 | share_conv_channel: 64 71 | bbox_coder: 72 | type: CenterPointBBoxCoder 73 | pc_range: ${point_cloud_range} 74 | post_center_range: ${post_center_range} 75 | max_num: 500 76 | score_threshold: ${score_threshold} 77 | out_size_factor: ${out_size_factor} 78 | voxel_size: ${voxel_size[:2]} 79 | code_size: 7 80 | separate_head: 81 | type: SeparateHead 82 | init_bias: -2.19 83 | final_kernel: 3 84 | loss_cls: 85 | type: GaussianFocalLoss 86 | reduction: mean 87 | loss_bbox: 88 | type: L1Loss 89 | reduction: mean 90 | loss_weight: 0.25 91 | norm_bbox: true 92 | -------------------------------------------------------------------------------- /configs/tumtraf-i/temporal/centerhead/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | decoder: 3 | backbone: 4 | type: GeneralizedResNet 5 | in_channels: 336 6 | blocks: 7 | - [2, 160, 2] 8 | - [2, 320, 2] 9 | - [2, 640, 1] 10 | neck: 11 | type: LSSFPN 12 | in_indices: [-1, 0] 13 | in_channels: [640, 160] 14 | out_channels: 256 15 | scale_factor: 2 16 | heads: 17 | object: 18 | type: CenterHead 19 | in_channels: 256 20 | train_cfg: 21 | point_cloud_range: ${point_cloud_range} 22 | grid_size: ${grid_size} 23 | voxel_size: ${voxel_size} 24 | out_size_factor: ${out_size_factor} 25 | dense_reg: 1 26 | gaussian_overlap: 0.1 27 | max_objs: 500 28 | min_radius: 2 29 | code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] 30 | test_cfg: 31 | post_center_limit_range: ${post_center_range} 32 | max_per_img: 500 33 | max_pool_nms: false 34 | min_radius: [4, 12, 10, 1, 0.85, 0.175] 35 | score_threshold: ${score_threshold} 36 | out_size_factor: ${out_size_factor} 37 | voxel_size: ${voxel_size[:2]} 38 | pre_max_size: 1000 39 | post_max_size: 83 40 | nms_thr: ${nms_threshold} 41 | nms_type: 42 | - circle # CAR 43 | - rotate # TRUCK 44 | - rotate # VAN 45 | - rotate # BUS, TRAILER 46 | - rotate # MOTORCYCLE, BICYCLE 47 | - rotate # PEDESTRIAN 48 | - rotate # EMERGENCY_VEHICLE 49 | nms_scale: 50 | - [1.0] # CAR 51 | - [1.0] # TRUCK 52 | - [1.0] # VAN 53 | - [1.0, 1.0] # BUS, TRAILER 54 | - [1.0, 1.0] # MOTORCYCLE, BICYCLE 55 | - [1.0] # PEDESTRIAN 56 | - [1.0] # EMERGENCY_VEHICLE 57 | tasks: 58 | - ["CAR"] 59 | - ["TRUCK"] 60 | - ["VAN"] 61 | - ["BUS", "TRAILER"] 62 | - ["MOTORCYCLE", "BICYCLE"] 63 | - ["PEDESTRIAN"] 64 | - ["EMERGENCY_VEHICLE"] 65 | common_heads: 66 | reg: [2, 2] 67 | height: [1, 2] 68 | dim: [3, 2] 69 | rot: [2, 2] 70 | share_conv_channel: 64 71 | bbox_coder: 72 | type: CenterPointBBoxCoder 73 | pc_range: ${point_cloud_range} 74 | post_center_range: ${post_center_range} 75 | max_num: 500 76 | score_threshold: ${score_threshold} 77 | out_size_factor: ${out_size_factor} 78 | voxel_size: ${voxel_size[:2]} 79 | code_size: 7 80 | separate_head: 81 | type: SeparateHead 82 | init_bias: -2.19 83 | final_kernel: 3 84 | loss_cls: 85 | type: GaussianFocalLoss 86 | reduction: mean 87 | loss_bbox: 88 | type: L1Loss 89 | reduction: mean 90 | loss_weight: 0.25 91 | norm_bbox: true 92 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/centerhead/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | decoder: 3 | backbone: 4 | type: GeneralizedResNet 5 | in_channels: 336 6 | blocks: 7 | - [2, 160, 2] 8 | - [2, 320, 2] 9 | - [2, 640, 1] 10 | neck: 11 | type: LSSFPN 12 | in_indices: [-1, 0] 13 | in_channels: [640, 160] 14 | out_channels: 256 15 | scale_factor: 2 16 | heads: 17 | object: 18 | type: CenterHead 19 | in_channels: 256 20 | train_cfg: 21 | point_cloud_range: ${point_cloud_range} 22 | grid_size: ${grid_size} 23 | voxel_size: ${voxel_size} 24 | out_size_factor: ${out_size_factor} 25 | dense_reg: 1 26 | gaussian_overlap: 0.1 27 | max_objs: 500 28 | min_radius: 2 29 | code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] 30 | test_cfg: 31 | post_center_limit_range: ${post_center_range} 32 | max_per_img: 500 33 | max_pool_nms: false 34 | min_radius: [4, 12, 10, 1, 0.85, 0.175] 35 | score_threshold: ${score_threshold} 36 | out_size_factor: ${out_size_factor} 37 | voxel_size: ${voxel_size[:2]} 38 | pre_max_size: 1000 39 | post_max_size: 83 40 | nms_thr: ${nms_threshold} 41 | nms_type: 42 | - circle # CAR 43 | - rotate # TRUCK 44 | - rotate # VAN 45 | - rotate # BUS, TRAILER 46 | - rotate # MOTORCYCLE, BICYCLE 47 | - rotate # PEDESTRIAN 48 | - rotate # EMERGENCY_VEHICLE 49 | nms_scale: 50 | - [1.0] # CAR 51 | - [1.0] # TRUCK 52 | - [1.0] # VAN 53 | - [1.0, 1.0] # BUS, TRAILER 54 | - [1.0, 1.0] # MOTORCYCLE, BICYCLE 55 | - [1.0] # PEDESTRIAN 56 | - [1.0] # EMERGENCY_VEHICLE 57 | tasks: 58 | - ["CAR"] 59 | - ["TRUCK"] 60 | - ["VAN"] 61 | - ["BUS", "TRAILER"] 62 | - ["MOTORCYCLE", "BICYCLE"] 63 | - ["PEDESTRIAN"] 64 | - ["EMERGENCY_VEHICLE"] 65 | common_heads: 66 | reg: [2, 2] 67 | height: [1, 2] 68 | dim: [3, 2] 69 | rot: [2, 2] 70 | share_conv_channel: 64 71 | bbox_coder: 72 | type: CenterPointBBoxCoder 73 | pc_range: ${point_cloud_range} 74 | post_center_range: ${post_center_range} 75 | max_num: 500 76 | score_threshold: ${score_threshold} 77 | out_size_factor: ${out_size_factor} 78 | voxel_size: ${voxel_size[:2]} 79 | code_size: 7 80 | separate_head: 81 | type: SeparateHead 82 | init_bias: -2.19 83 | final_kernel: 3 84 | loss_cls: 85 | type: GaussianFocalLoss 86 | reduction: mean 87 | loss_bbox: 88 | type: L1Loss 89 | reduction: mean 90 | loss_weight: 0.25 91 | norm_bbox: true 92 | -------------------------------------------------------------------------------- /configs/osdar23/temporal/centerhead/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | decoder: 3 | backbone: 4 | type: GeneralizedResNet 5 | in_channels: 336 6 | blocks: 7 | - [2, 160, 2] 8 | - [2, 320, 2] 9 | - [2, 640, 1] 10 | neck: 11 | type: LSSFPN 12 | in_indices: [-1, 0] 13 | in_channels: [640, 160] 14 | out_channels: 256 15 | scale_factor: 2 16 | heads: 17 | object: 18 | type: CenterHead 19 | in_channels: 256 20 | train_cfg: 21 | point_cloud_range: ${point_cloud_range} 22 | grid_size: ${grid_size} 23 | voxel_size: ${voxel_size} 24 | out_size_factor: ${out_size_factor} 25 | dense_reg: 1 26 | gaussian_overlap: 0.1 27 | max_objs: 500 28 | min_radius: 2 29 | code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] 30 | test_cfg: 31 | post_center_limit_range: ${post_center_range} 32 | max_per_img: 500 33 | max_pool_nms: false 34 | min_radius: [4, 12, 10, 1, 0.85, 0.175] 35 | score_threshold: ${score_threshold} 36 | out_size_factor: ${out_size_factor} 37 | voxel_size: ${voxel_size[:2]} 38 | pre_max_size: 1000 39 | post_max_size: 83 40 | nms_thr: ${nms_threshold} 41 | nms_type: 42 | - rotate # lidar__cuboid__person 43 | - circle # lidar__cuboid__catenary_pole 44 | - circle # lidar__cuboid__signal_pole 45 | - circle # lidar__cuboid__road_vehicle 46 | - rotate # lidar__cuboid__buffer_stop 47 | nms_scale: 48 | - [0.4] # lidar__cuboid__person 49 | - [1.0] # lidar__cuboid__catenary_pole 50 | - [1.0] # lidar__cuboid__signal_pole 51 | - [1.0] # lidar__cuboid__road_vehicle 52 | - [1.0] # lidar__cuboid__buffer_stop 53 | tasks: 54 | - ["lidar__cuboid__person"] 55 | - ["lidar__cuboid__catenary_pole"] 56 | - ["lidar__cuboid__signal_pole"] 57 | - ["lidar__cuboid__road_vehicle"] 58 | - ["lidar__cuboid__buffer_stop"] 59 | common_heads: 60 | reg: [2, 2] 61 | height: [1, 2] 62 | dim: [3, 2] 63 | rot: [2, 2] 64 | share_conv_channel: 64 65 | bbox_coder: 66 | type: CenterPointBBoxCoder 67 | pc_range: ${point_cloud_range} 68 | post_center_range: ${post_center_range} 69 | max_num: 500 70 | score_threshold: ${score_threshold} 71 | out_size_factor: ${out_size_factor} 72 | voxel_size: ${voxel_size[:2]} 73 | code_size: 7 74 | separate_head: 75 | type: SeparateHead 76 | init_bias: -2.19 77 | final_kernel: 3 78 | loss_cls: 79 | type: GaussianFocalLoss 80 | reduction: mean 81 | loss_bbox: 82 | type: L1Loss 83 | reduction: mean 84 | loss_weight: 0.25 85 | norm_bbox: true 86 | -------------------------------------------------------------------------------- /mmdet3d/ops/interpolate/src/three_nn_cuda.cu: -------------------------------------------------------------------------------- 1 | // Modified from 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | __global__ void three_nn_kernel(int b, int n, int m, 12 | const float *__restrict__ unknown, 13 | const float *__restrict__ known, 14 | float *__restrict__ dist2, 15 | int *__restrict__ idx) { 16 | // unknown: (B, N, 3) 17 | // known: (B, M, 3) 18 | // output: 19 | // dist2: (B, N, 3) 20 | // idx: (B, N, 3) 21 | 22 | int bs_idx = blockIdx.y; 23 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (bs_idx >= b || pt_idx >= n) return; 25 | 26 | unknown += bs_idx * n * 3 + pt_idx * 3; 27 | known += bs_idx * m * 3; 28 | dist2 += bs_idx * n * 3 + pt_idx * 3; 29 | idx += bs_idx * n * 3 + pt_idx * 3; 30 | 31 | float ux = unknown[0]; 32 | float uy = unknown[1]; 33 | float uz = unknown[2]; 34 | 35 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 36 | int besti1 = 0, besti2 = 0, besti3 = 0; 37 | for (int k = 0; k < m; ++k) { 38 | float x = known[k * 3 + 0]; 39 | float y = known[k * 3 + 1]; 40 | float z = known[k * 3 + 2]; 41 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 42 | if (d < best1) { 43 | best3 = best2; 44 | besti3 = besti2; 45 | best2 = best1; 46 | besti2 = besti1; 47 | best1 = d; 48 | besti1 = k; 49 | } else if (d < best2) { 50 | best3 = best2; 51 | besti3 = besti2; 52 | best2 = d; 53 | besti2 = k; 54 | } else if (d < best3) { 55 | best3 = d; 56 | besti3 = k; 57 | } 58 | } 59 | dist2[0] = best1; 60 | dist2[1] = best2; 61 | dist2[2] = best3; 62 | idx[0] = besti1; 63 | idx[1] = besti2; 64 | idx[2] = besti3; 65 | } 66 | 67 | void three_nn_kernel_launcher(int b, int n, int m, const float *unknown, 68 | const float *known, float *dist2, int *idx, 69 | cudaStream_t stream) { 70 | // unknown: (B, N, 3) 71 | // known: (B, M, 3) 72 | // output: 73 | // dist2: (B, N, 3) 74 | // idx: (B, N, 3) 75 | 76 | cudaError_t err; 77 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), 78 | b); // blockIdx.x(col), blockIdx.y(row) 79 | dim3 threads(THREADS_PER_BLOCK); 80 | 81 | three_nn_kernel<<>>(b, n, m, unknown, known, 82 | dist2, idx); 83 | 84 | err = cudaGetLastError(); 85 | if (cudaSuccess != err) { 86 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 87 | exit(-1); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/point_fp_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmcv.cnn import ConvModule 3 | from mmcv.runner import BaseModule, force_fp32 4 | from torch import nn as nn 5 | from typing import List 6 | 7 | from mmdet3d.ops import three_interpolate, three_nn 8 | 9 | 10 | class PointFPModule(BaseModule): 11 | """Point feature propagation module used in PointNets. 12 | 13 | Propagate the features from one set to another. 14 | 15 | Args: 16 | mlp_channels (list[int]): List of mlp channels. 17 | norm_cfg (dict): Type of normalization method. 18 | Default: dict(type='BN2d'). 19 | """ 20 | 21 | def __init__(self, mlp_channels: List[int], norm_cfg: dict = dict(type="BN2d"), init_cfg=None): 22 | super().__init__(init_cfg=init_cfg) 23 | self.fp16_enabled = False 24 | self.mlps = nn.Sequential() 25 | for i in range(len(mlp_channels) - 1): 26 | self.mlps.add_module( 27 | f"layer{i}", 28 | ConvModule( 29 | mlp_channels[i], 30 | mlp_channels[i + 1], 31 | kernel_size=(1, 1), 32 | stride=(1, 1), 33 | conv_cfg=dict(type="Conv2d"), 34 | norm_cfg=norm_cfg, 35 | ), 36 | ) 37 | 38 | @force_fp32() 39 | def forward( 40 | self, 41 | target: torch.Tensor, 42 | source: torch.Tensor, 43 | target_feats: torch.Tensor, 44 | source_feats: torch.Tensor, 45 | ) -> torch.Tensor: 46 | """forward. 47 | 48 | Args: 49 | target (Tensor): (B, n, 3) tensor of the xyz positions of 50 | the target features. 51 | source (Tensor): (B, m, 3) tensor of the xyz positions of 52 | the source features. 53 | target_feats (Tensor): (B, C1, n) tensor of the features to be 54 | propagated to. 55 | source_feats (Tensor): (B, C2, m) tensor of features 56 | to be propagated. 57 | 58 | Return: 59 | Tensor: (B, M, N) M = mlp[-1], tensor of the target features. 60 | """ 61 | if source is not None: 62 | dist, idx = three_nn(target, source) 63 | dist_reciprocal = 1.0 / (dist + 1e-8) 64 | norm = torch.sum(dist_reciprocal, dim=2, keepdim=True) 65 | weight = dist_reciprocal / norm 66 | 67 | interpolated_feats = three_interpolate(source_feats, idx, weight) 68 | else: 69 | interpolated_feats = source_feats.expand(*source_feats.size()[0:2], target.size(1)) 70 | 71 | if target_feats is not None: 72 | new_features = torch.cat([interpolated_feats, target_feats], dim=1) # (B, C2 + C1, n) 73 | else: 74 | new_features = interpolated_feats 75 | 76 | new_features = new_features.unsqueeze(-1) 77 | new_features = self.mlps(new_features) 78 | 79 | return new_features.squeeze(-1) 80 | -------------------------------------------------------------------------------- /mmdet3d/ops/bev_pool/bev_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from . import bev_pool_ext 4 | 5 | __all__ = ["bev_pool"] 6 | 7 | 8 | class QuickCumsum(torch.autograd.Function): 9 | @staticmethod 10 | def forward(ctx, x, geom_feats, ranks): 11 | x = x.cumsum(0) 12 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool) 13 | kept[:-1] = ranks[1:] != ranks[:-1] 14 | 15 | x, geom_feats = x[kept], geom_feats[kept] 16 | x = torch.cat((x[:1], x[1:] - x[:-1])) 17 | 18 | # save kept for backward 19 | ctx.save_for_backward(kept) 20 | 21 | # no gradient for geom_feats 22 | ctx.mark_non_differentiable(geom_feats) 23 | 24 | return x, geom_feats 25 | 26 | @staticmethod 27 | def backward(ctx, gradx, gradgeom): 28 | (kept,) = ctx.saved_tensors 29 | back = torch.cumsum(kept, 0) 30 | back[kept] -= 1 31 | 32 | val = gradx[back] 33 | 34 | return val, None, None 35 | 36 | 37 | class QuickCumsumCuda(torch.autograd.Function): 38 | @staticmethod 39 | def forward(ctx, x, geom_feats, ranks, B, D, H, W): 40 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool) 41 | kept[1:] = ranks[1:] != ranks[:-1] 42 | interval_starts = torch.where(kept)[0].int() 43 | interval_lengths = torch.zeros_like(interval_starts) 44 | interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1] 45 | interval_lengths[-1] = x.shape[0] - interval_starts[-1] 46 | geom_feats = geom_feats.int() 47 | 48 | out = bev_pool_ext.bev_pool_forward( 49 | x, 50 | geom_feats, 51 | interval_lengths, 52 | interval_starts, 53 | B, 54 | D, 55 | H, 56 | W, 57 | ) 58 | 59 | ctx.save_for_backward(interval_starts, interval_lengths, geom_feats) 60 | ctx.saved_shapes = B, D, H, W 61 | return out 62 | 63 | @staticmethod 64 | def backward(ctx, out_grad): 65 | interval_starts, interval_lengths, geom_feats = ctx.saved_tensors 66 | B, D, H, W = ctx.saved_shapes 67 | 68 | out_grad = out_grad.contiguous() 69 | x_grad = bev_pool_ext.bev_pool_backward( 70 | out_grad, 71 | geom_feats, 72 | interval_lengths, 73 | interval_starts, 74 | B, 75 | D, 76 | H, 77 | W, 78 | ) 79 | 80 | return x_grad, None, None, None, None, None, None 81 | 82 | 83 | def bev_pool(feats, coords, B, D, H, W): 84 | assert feats.shape[0] == coords.shape[0] 85 | 86 | ranks = ( 87 | coords[:, 0] * (W * D * B) 88 | + coords[:, 1] * (D * B) 89 | + coords[:, 2] * B 90 | + coords[:, 3] 91 | ) 92 | indices = ranks.argsort() 93 | feats, coords, ranks = feats[indices], coords[indices], ranks[indices] 94 | 95 | x = QuickCumsumCuda.apply(feats, coords, ranks, B, D, H, W) 96 | x = x.permute(0, 4, 1, 2, 3).contiguous() 97 | return x 98 | -------------------------------------------------------------------------------- /mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp: -------------------------------------------------------------------------------- 1 | // Modified from 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | extern THCState *state; 12 | 13 | int furthest_point_sampling_wrapper(int b, int n, int m, 14 | at::Tensor points_tensor, 15 | at::Tensor temp_tensor, 16 | at::Tensor idx_tensor); 17 | 18 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 19 | const float *dataset, float *temp, 20 | int *idxs, cudaStream_t stream); 21 | 22 | int furthest_point_sampling_with_dist_wrapper(int b, int n, int m, 23 | at::Tensor points_tensor, 24 | at::Tensor temp_tensor, 25 | at::Tensor idx_tensor); 26 | 27 | void furthest_point_sampling_with_dist_kernel_launcher(int b, int n, int m, 28 | const float *dataset, 29 | float *temp, int *idxs, 30 | cudaStream_t stream); 31 | 32 | int furthest_point_sampling_wrapper(int b, int n, int m, 33 | at::Tensor points_tensor, 34 | at::Tensor temp_tensor, 35 | at::Tensor idx_tensor) { 36 | const float *points = points_tensor.data_ptr(); 37 | float *temp = temp_tensor.data_ptr(); 38 | int *idx = idx_tensor.data_ptr(); 39 | 40 | cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); 41 | furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream); 42 | return 1; 43 | } 44 | 45 | int furthest_point_sampling_with_dist_wrapper(int b, int n, int m, 46 | at::Tensor points_tensor, 47 | at::Tensor temp_tensor, 48 | at::Tensor idx_tensor) { 49 | 50 | const float *points = points_tensor.data(); 51 | float *temp = temp_tensor.data(); 52 | int *idx = idx_tensor.data(); 53 | 54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); 55 | furthest_point_sampling_with_dist_kernel_launcher(b, n, m, points, temp, idx, stream); 56 | return 1; 57 | } 58 | 59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 60 | m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, 61 | "furthest_point_sampling_wrapper"); 62 | m.def("furthest_point_sampling_with_dist_wrapper", 63 | &furthest_point_sampling_with_dist_wrapper, 64 | "furthest_point_sampling_with_dist_wrapper"); 65 | } 66 | -------------------------------------------------------------------------------- /mmdet3d/core/points/cam_points.py: -------------------------------------------------------------------------------- 1 | from .base_points import BasePoints 2 | 3 | 4 | class CameraPoints(BasePoints): 5 | """Points of instances in CAM coordinates. 6 | 7 | Args: 8 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 9 | points_dim (int): Number of the dimension of a point. 10 | Each row is (x, y, z). Default to 3. 11 | attribute_dims (dict): Dictionary to indicate the meaning of extra 12 | dimension. Default to None. 13 | 14 | Attributes: 15 | tensor (torch.Tensor): Float matrix of N x points_dim. 16 | points_dim (int): Integer indicating the dimension of a point. 17 | Each row is (x, y, z, ...). 18 | attribute_dims (bool): Dictionary to indicate the meaning of extra 19 | dimension. Default to None. 20 | rotation_axis (int): Default rotation axis for points rotation. 21 | """ 22 | 23 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 24 | super(CameraPoints, self).__init__( 25 | tensor, points_dim=points_dim, attribute_dims=attribute_dims 26 | ) 27 | self.rotation_axis = 1 28 | 29 | def flip(self, bev_direction="horizontal"): 30 | """Flip the boxes in BEV along given BEV direction.""" 31 | if bev_direction == "horizontal": 32 | self.tensor[:, 0] = -self.tensor[:, 0] 33 | elif bev_direction == "vertical": 34 | self.tensor[:, 2] = -self.tensor[:, 2] 35 | 36 | def in_range_bev(self, point_range): 37 | """Check whether the points are in the given range. 38 | 39 | Args: 40 | point_range (list | torch.Tensor): The range of point 41 | in order of (x_min, y_min, x_max, y_max). 42 | 43 | Returns: 44 | torch.Tensor: Indicating whether each point is inside \ 45 | the reference range. 46 | """ 47 | in_range_flags = ( 48 | (self.tensor[:, 0] > point_range[0]) 49 | & (self.tensor[:, 2] > point_range[1]) 50 | & (self.tensor[:, 0] < point_range[2]) 51 | & (self.tensor[:, 2] < point_range[3]) 52 | ) 53 | return in_range_flags 54 | 55 | def convert_to(self, dst, rt_mat=None): 56 | """Convert self to ``dst`` mode. 57 | 58 | Args: 59 | dst (:obj:`CoordMode`): The target Point mode. 60 | rt_mat (np.ndarray | torch.Tensor): The rotation and translation 61 | matrix between different coordinates. Defaults to None. 62 | The conversion from `src` coordinates to `dst` coordinates 63 | usually comes along the change of sensors, e.g., from camera 64 | to LiDAR. This requires a transformation matrix. 65 | 66 | Returns: 67 | :obj:`BasePoints`: The converted point of the same type \ 68 | in the `dst` mode. 69 | """ 70 | from mmdet3d.core.bbox import Coord3DMode 71 | 72 | return Coord3DMode.convert_point(point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat) 73 | -------------------------------------------------------------------------------- /mmdet3d/core/points/depth_points.py: -------------------------------------------------------------------------------- 1 | from .base_points import BasePoints 2 | 3 | 4 | class DepthPoints(BasePoints): 5 | """Points of instances in DEPTH coordinates. 6 | 7 | Args: 8 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 9 | points_dim (int): Number of the dimension of a point. 10 | Each row is (x, y, z). Default to 3. 11 | attribute_dims (dict): Dictionary to indicate the meaning of extra 12 | dimension. Default to None. 13 | 14 | Attributes: 15 | tensor (torch.Tensor): Float matrix of N x points_dim. 16 | points_dim (int): Integer indicating the dimension of a point. 17 | Each row is (x, y, z, ...). 18 | attribute_dims (bool): Dictionary to indicate the meaning of extra 19 | dimension. Default to None. 20 | rotation_axis (int): Default rotation axis for points rotation. 21 | """ 22 | 23 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 24 | super(DepthPoints, self).__init__( 25 | tensor, points_dim=points_dim, attribute_dims=attribute_dims 26 | ) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction="horizontal"): 30 | """Flip the boxes in BEV along given BEV direction.""" 31 | if bev_direction == "horizontal": 32 | self.tensor[:, 0] = -self.tensor[:, 0] 33 | elif bev_direction == "vertical": 34 | self.tensor[:, 1] = -self.tensor[:, 1] 35 | 36 | def in_range_bev(self, point_range): 37 | """Check whether the points are in the given range. 38 | 39 | Args: 40 | point_range (list | torch.Tensor): The range of point 41 | in order of (x_min, y_min, x_max, y_max). 42 | 43 | Returns: 44 | torch.Tensor: Indicating whether each point is inside \ 45 | the reference range. 46 | """ 47 | in_range_flags = ( 48 | (self.tensor[:, 0] > point_range[0]) 49 | & (self.tensor[:, 1] > point_range[1]) 50 | & (self.tensor[:, 0] < point_range[2]) 51 | & (self.tensor[:, 1] < point_range[3]) 52 | ) 53 | return in_range_flags 54 | 55 | def convert_to(self, dst, rt_mat=None): 56 | """Convert self to ``dst`` mode. 57 | 58 | Args: 59 | dst (:obj:`CoordMode`): The target Point mode. 60 | rt_mat (np.ndarray | torch.Tensor): The rotation and translation 61 | matrix between different coordinates. Defaults to None. 62 | The conversion from `src` coordinates to `dst` coordinates 63 | usually comes along the change of sensors, e.g., from camera 64 | to LiDAR. This requires a transformation matrix. 65 | 66 | Returns: 67 | :obj:`BasePoints`: The converted point of the same type \ 68 | in the `dst` mode. 69 | """ 70 | from mmdet3d.core.bbox import Coord3DMode 71 | 72 | return Coord3DMode.convert_point(point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat) 73 | -------------------------------------------------------------------------------- /mmdet3d/core/points/lidar_points.py: -------------------------------------------------------------------------------- 1 | from .base_points import BasePoints 2 | 3 | 4 | class LiDARPoints(BasePoints): 5 | """Points of instances in LIDAR coordinates. 6 | 7 | Args: 8 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 9 | points_dim (int): Number of the dimension of a point. 10 | Each row is (x, y, z). Default to 3. 11 | attribute_dims (dict): Dictionary to indicate the meaning of extra 12 | dimension. Default to None. 13 | 14 | Attributes: 15 | tensor (torch.Tensor): Float matrix of N x points_dim. 16 | points_dim (int): Integer indicating the dimension of a point. 17 | Each row is (x, y, z, ...). 18 | attribute_dims (bool): Dictionary to indicate the meaning of extra 19 | dimension. Default to None. 20 | rotation_axis (int): Default rotation axis for points rotation. 21 | """ 22 | 23 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 24 | super(LiDARPoints, self).__init__( 25 | tensor, points_dim=points_dim, attribute_dims=attribute_dims 26 | ) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction="horizontal"): 30 | """Flip the boxes in BEV along given BEV direction.""" 31 | if bev_direction == "horizontal": 32 | self.tensor[:, 1] = -self.tensor[:, 1] 33 | elif bev_direction == "vertical": 34 | self.tensor[:, 0] = -self.tensor[:, 0] 35 | 36 | def in_range_bev(self, point_range): 37 | """Check whether the points are in the given range. 38 | 39 | Args: 40 | point_range (list | torch.Tensor): The range of point 41 | in order of (x_min, y_min, x_max, y_max). 42 | 43 | Returns: 44 | torch.Tensor: Indicating whether each point is inside \ 45 | the reference range. 46 | """ 47 | in_range_flags = ( 48 | (self.tensor[:, 0] > point_range[0]) 49 | & (self.tensor[:, 1] > point_range[1]) 50 | & (self.tensor[:, 0] < point_range[2]) 51 | & (self.tensor[:, 1] < point_range[3]) 52 | ) 53 | return in_range_flags 54 | 55 | def convert_to(self, dst, rt_mat=None): 56 | """Convert self to ``dst`` mode. 57 | 58 | Args: 59 | dst (:obj:`CoordMode`): The target Point mode. 60 | rt_mat (np.ndarray | torch.Tensor): The rotation and translation 61 | matrix between different coordinates. Defaults to None. 62 | The conversion from `src` coordinates to `dst` coordinates 63 | usually comes along the change of sensors, e.g., from camera 64 | to LiDAR. This requires a transformation matrix. 65 | 66 | Returns: 67 | :obj:`BasePoints`: The converted point of the same type \ 68 | in the `dst` mode. 69 | """ 70 | from mmdet3d.core.bbox import Coord3DMode 71 | 72 | return Coord3DMode.convert_point(point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat) 73 | -------------------------------------------------------------------------------- /tools/debug_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import random 5 | import time 6 | 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 8 | 9 | import socket 10 | 11 | import numpy as np 12 | import torch 13 | from mmcv import Config 14 | from torchpack import distributed as dist 15 | from torchpack.environ import auto_set_run_dir, set_run_dir 16 | from torchpack.utils.config import configs 17 | 18 | from mmdet3d.apis import train_model 19 | from mmdet3d.datasets import build_dataset 20 | from mmdet3d.models import build_model 21 | from mmdet3d.utils import convert_sync_batchnorm, get_root_logger, recursive_eval 22 | 23 | 24 | def get_free_tcp_port() -> int: 25 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as tcp: 26 | tcp.bind(("0.0.0.0", 0)) 27 | port = tcp.getsockname()[1] 28 | return port 29 | 30 | 31 | def main(): 32 | master_host = f"localhost:{get_free_tcp_port()}" 33 | dist.init(master_host=master_host) 34 | 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument("config", metavar="FILE", help="config file") 37 | parser.add_argument("--run-dir", metavar="DIR", help="run directory") 38 | args, opts = parser.parse_known_args() 39 | 40 | configs.load(args.config, recursive=True) 41 | configs.update(opts) 42 | 43 | cfg = Config(recursive_eval(configs), filename=args.config) 44 | 45 | torch.backends.cudnn.benchmark = cfg.cudnn_benchmark 46 | torch.cuda.set_device(dist.local_rank()) 47 | 48 | if args.run_dir is None: 49 | args.run_dir = auto_set_run_dir() 50 | else: 51 | set_run_dir(args.run_dir) 52 | cfg.run_dir = args.run_dir 53 | 54 | # dump config 55 | cfg.dump(os.path.join(cfg.run_dir, "configs.yaml")) 56 | 57 | # init the logger before other steps 58 | timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime()) 59 | log_file = os.path.join(cfg.run_dir, f"{timestamp}.log") 60 | logger = get_root_logger(log_file=log_file) 61 | 62 | # log some basic info 63 | logger.info(f"Config:\n{cfg.pretty_text}") 64 | 65 | # set random seeds 66 | if cfg.seed is not None: 67 | logger.info(f"Set random seed to {cfg.seed}, " f"deterministic mode: {cfg.deterministic}") 68 | random.seed(cfg.seed) 69 | np.random.seed(cfg.seed) 70 | torch.manual_seed(cfg.seed) 71 | if cfg.deterministic: 72 | torch.backends.cudnn.deterministic = True 73 | torch.backends.cudnn.benchmark = False 74 | 75 | datasets = [build_dataset(cfg.data.train)] 76 | 77 | model = build_model(cfg.model) 78 | model.init_weights() 79 | if cfg.get("sync_bn", None): 80 | if not isinstance(cfg["sync_bn"], dict): 81 | cfg["sync_bn"] = dict(exclude=[]) 82 | model = convert_sync_batchnorm(model, exclude=cfg["sync_bn"]["exclude"]) 83 | 84 | logger.info(f"Model:\n{model}") 85 | train_model( 86 | model, 87 | datasets, 88 | cfg, 89 | distributed=True, 90 | validate=True, 91 | timestamp=timestamp, 92 | ) 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /configs/osdar23/baseline/transfusion/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | decoder: 3 | backbone: 4 | type: SECOND 5 | in_channels: 256 6 | out_channels: [128, 256] 7 | layer_nums: [5, 5] 8 | layer_strides: [1, 2] 9 | norm_cfg: 10 | type: BN 11 | eps: 1.0e-3 12 | momentum: 0.01 13 | conv_cfg: 14 | type: Conv2d 15 | bias: false 16 | neck: 17 | type: SECONDFPN 18 | in_channels: [128, 256] 19 | out_channels: [256, 256] 20 | upsample_strides: [1, 2] 21 | norm_cfg: 22 | type: BN 23 | eps: 1.0e-3 24 | momentum: 0.01 25 | upsample_cfg: 26 | type: deconv 27 | bias: false 28 | use_conv_for_no_stride: true 29 | heads: 30 | object: 31 | type: TransFusionHead 32 | num_proposals: 200 33 | auxiliary: true 34 | in_channels: 512 35 | hidden_channel: 128 36 | num_classes: ${no_classes} 37 | num_decoder_layers: 1 38 | num_heads: 8 39 | nms_kernel_size: 3 40 | ffn_channel: 256 41 | dropout: 0.1 42 | bn_momentum: 0.1 43 | activation: relu 44 | train_cfg: 45 | dataset: OSDAR23 46 | point_cloud_range: ${point_cloud_range} 47 | grid_size: ${grid_size} 48 | voxel_size: ${voxel_size} 49 | out_size_factor: ${out_size_factor} 50 | gaussian_overlap: 0.1 51 | min_radius: 2 52 | pos_weight: -1 53 | code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] 54 | assigner: 55 | type: HungarianAssigner3D 56 | iou_calculator: 57 | type: BboxOverlaps3D 58 | coordinate: lidar 59 | cls_cost: 60 | type: FocalLossCost 61 | gamma: 2.0 62 | alpha: 0.25 63 | weight: 0.15 64 | reg_cost: 65 | type: BBoxBEVL1Cost 66 | weight: 0.25 67 | iou_cost: 68 | type: IoU3DCost 69 | weight: 0.25 70 | test_cfg: 71 | dataset: OSDAR23 72 | grid_size: ${grid_size} 73 | out_size_factor: ${out_size_factor} 74 | voxel_size: ${voxel_size[:2]} 75 | pc_range: ${point_cloud_range[:2]} 76 | nms_type: null 77 | common_heads: 78 | center: [2, 2] 79 | height: [1, 2] 80 | dim: [3, 2] 81 | rot: [2, 2] 82 | bbox_coder: 83 | type: TransFusionBBoxCoder 84 | pc_range: ${point_cloud_range[:2]} 85 | post_center_range: ${post_center_range} 86 | score_threshold: ${score_threshold} 87 | out_size_factor: ${out_size_factor} 88 | voxel_size: ${voxel_size[:2]} 89 | code_size: 8 90 | loss_cls: 91 | type: FocalLoss 92 | use_sigmoid: true 93 | gamma: 2.0 94 | alpha: 0.25 95 | reduction: mean 96 | loss_weight: 1.0 97 | loss_heatmap: 98 | type: GaussianFocalLoss 99 | reduction: mean 100 | loss_weight: 1.0 101 | loss_bbox: 102 | type: L1Loss 103 | reduction: mean 104 | loss_weight: 0.25 105 | -------------------------------------------------------------------------------- /configs/osdar23/temporal-gru/transfusion/default.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | decoder: 3 | backbone: 4 | type: SECOND 5 | in_channels: 256 6 | out_channels: [128, 256] 7 | layer_nums: [5, 5] 8 | layer_strides: [1, 2] 9 | norm_cfg: 10 | type: BN 11 | eps: 1.0e-3 12 | momentum: 0.01 13 | conv_cfg: 14 | type: Conv2d 15 | bias: false 16 | neck: 17 | type: SECONDFPN 18 | in_channels: [128, 256] 19 | out_channels: [256, 256] 20 | upsample_strides: [1, 2] 21 | norm_cfg: 22 | type: BN 23 | eps: 1.0e-3 24 | momentum: 0.01 25 | upsample_cfg: 26 | type: deconv 27 | bias: false 28 | use_conv_for_no_stride: true 29 | heads: 30 | object: 31 | type: TransFusionHead 32 | num_proposals: 200 33 | auxiliary: true 34 | in_channels: 512 35 | hidden_channel: 128 36 | num_classes: ${no_classes} 37 | num_decoder_layers: 1 38 | num_heads: 8 39 | nms_kernel_size: 3 40 | ffn_channel: 256 41 | dropout: 0.1 42 | bn_momentum: 0.1 43 | activation: relu 44 | train_cfg: 45 | dataset: OSDAR23 46 | point_cloud_range: ${point_cloud_range} 47 | grid_size: ${grid_size} 48 | voxel_size: ${voxel_size} 49 | out_size_factor: ${out_size_factor} 50 | gaussian_overlap: 0.1 51 | min_radius: 2 52 | pos_weight: -1 53 | code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] 54 | assigner: 55 | type: HungarianAssigner3D 56 | iou_calculator: 57 | type: BboxOverlaps3D 58 | coordinate: lidar 59 | cls_cost: 60 | type: FocalLossCost 61 | gamma: 2.0 62 | alpha: 0.25 63 | weight: 0.15 64 | reg_cost: 65 | type: BBoxBEVL1Cost 66 | weight: 0.25 67 | iou_cost: 68 | type: IoU3DCost 69 | weight: 0.25 70 | test_cfg: 71 | dataset: OSDAR23 72 | grid_size: ${grid_size} 73 | out_size_factor: ${out_size_factor} 74 | voxel_size: ${voxel_size[:2]} 75 | pc_range: ${point_cloud_range[:2]} 76 | nms_type: null 77 | common_heads: 78 | center: [2, 2] 79 | height: [1, 2] 80 | dim: [3, 2] 81 | rot: [2, 2] 82 | bbox_coder: 83 | type: TransFusionBBoxCoder 84 | pc_range: ${point_cloud_range[:2]} 85 | post_center_range: ${post_center_range} 86 | score_threshold: ${score_threshold} 87 | out_size_factor: ${out_size_factor} 88 | voxel_size: ${voxel_size[:2]} 89 | code_size: 8 90 | loss_cls: 91 | type: FocalLoss 92 | use_sigmoid: true 93 | gamma: 2.0 94 | alpha: 0.25 95 | reduction: mean 96 | loss_weight: 1.0 97 | loss_heatmap: 98 | type: GaussianFocalLoss 99 | reduction: mean 100 | loss_weight: 1.0 101 | loss_bbox: 102 | type: L1Loss 103 | reduction: mean 104 | loss_weight: 0.25 105 | --------------------------------------------------------------------------------