├── data
    └── .gitkeep
├── mmdet3d
    ├── __init__.py
    ├── models
    │   ├── utils
    │   │   └── __init__.py
    │   ├── heads
    │   │   ├── segm
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   └── bbox
    │   │   │   └── __init__.py
    │   ├── temporal
    │   │   └── __init__.py
    │   ├── vtransforms
    │   │   ├── __init__.py
    │   │   └── lss.py
    │   ├── fusers
    │   │   ├── __init__.py
    │   │   ├── concat.py
    │   │   ├── conv.py
    │   │   └── add.py
    │   ├── fusion_models
    │   │   └── __init__.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   └── lss.py
    │   ├── losses
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   └── resnet.py
    │   └── builder.py
    ├── ops
    │   ├── bev_pool
    │   │   ├── __init__.py
    │   │   └── bev_pool.py
    │   ├── knn
    │   │   ├── __init__.py
    │   │   ├── src
    │   │   │   └── knn.cpp
    │   │   └── knn.py
    │   ├── ball_query
    │   │   ├── __init__.py
    │   │   ├── ball_query.py
    │   │   └── src
    │   │   │   ├── ball_query.cpp
    │   │   │   └── ball_query_cuda.cu
    │   ├── gather_points
    │   │   ├── __init__.py
    │   │   ├── gather_points.py
    │   │   └── src
    │   │   │   └── gather_points.cpp
    │   ├── iou3d
    │   │   ├── __init__.py
    │   │   └── iou3d_utils.py
    │   ├── interpolate
    │   │   ├── __init__.py
    │   │   ├── three_nn.py
    │   │   ├── three_interpolate.py
    │   │   └── src
    │   │   │   └── three_nn_cuda.cu
    │   ├── group_points
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── group_points.cpp
    │   ├── paconv
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── assign_score_withk.cpp
    │   ├── voxel
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── voxelization.cpp
    │   ├── furthest_point_sample
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── furthest_point_sample.py
    │   │   └── src
    │   │   │   └── furthest_point_sample.cpp
    │   ├── roiaware_pool3d
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── points_in_boxes_cpu.cpp
    │   ├── spconv
    │   │   └── __init__.py
    │   ├── pointnet_modules
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   └── point_fp_module.py
    │   └── __init__.py
    ├── apis
    │   ├── __init__.py
    │   └── test.py
    ├── core
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── gaussian.py
    │   ├── voxel
    │   │   ├── __init__.py
    │   │   └── builder.py
    │   ├── bbox
    │   │   ├── match_costs
    │   │   │   ├── __init__.py
    │   │   │   └── match_cost.py
    │   │   ├── assigners
    │   │   │   └── __init__.py
    │   │   ├── iou_calculators
    │   │   │   └── __init__.py
    │   │   ├── samplers
    │   │   │   └── __init__.py
    │   │   ├── coders
    │   │   │   └── __init__.py
    │   │   ├── structures
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   └── util.py
    │   ├── __init__.py
    │   ├── anchor
    │   │   └── __init__.py
    │   ├── post_processing
    │   │   └── __init__.py
    │   └── points
    │   │   ├── __init__.py
    │   │   ├── cam_points.py
    │   │   ├── depth_points.py
    │   │   └── lidar_points.py
    ├── runner
    │   ├── __init__.py
    │   └── epoch_based_runner.py
    ├── datasets
    │   ├── pipelines
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── utils.py
    │   └── builder.py
    └── utils
    │   ├── __init__.py
    │   ├── syncbn.py
    │   ├── config.py
    │   └── logger.py
├── tools
    ├── data_converter
    │   └── __init__.py
    ├── preprocessing
    │   └── __init__.py
    ├── download_pretrained.sh
    ├── visualization
    │   ├── utils
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── create_video.py
    ├── convert_checkpoints_to_torchsparse.py
    ├── create_swint_checkpoint.py
    ├── create_combined_checkpoint.py
    └── debug_train.py
├── pyproject.toml
├── requirements-visual.txt
├── requirements-dev.txt
├── configs
    ├── osdar23
    │   ├── baseline
    │   │   ├── centerhead
    │   │   │   ├── camera
    │   │   │   │   └── default.yaml
    │   │   │   └── default.yaml
    │   │   ├── transfusion
    │   │   │   ├── lidar
    │   │   │   │   ├── default.yaml
    │   │   │   │   ├── voxelnet-1600g-0xy16-0z4.yaml
    │   │   │   │   └── voxelnet-1600g-0xy16-0z4-gtp15.yaml
    │   │   │   └── default.yaml
    │   │   └── default.yaml
    │   ├── temporal
    │   │   ├── transfusion
    │   │   │   └── lidar
    │   │   │   │   ├── default.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml
    │   │   ├── centerhead
    │   │   │   ├── camera
    │   │   │   │   └── default.yaml
    │   │   │   └── default.yaml
    │   │   └── default.yaml
    │   └── temporal-gru
    │   │   ├── transfusion
    │   │       ├── lidar
    │   │       │   ├── default.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml
    │   │       └── default.yaml
    │   │   ├── centerhead
    │   │       ├── camera
    │   │       │   └── default.yaml
    │   │       └── default.yaml
    │   │   └── default.yaml
    ├── tumtraf-i
    │   ├── baseline
    │   │   ├── centerhead
    │   │   │   ├── camera
    │   │   │   │   └── default.yaml
    │   │   │   └── default.yaml
    │   │   ├── transfusion
    │   │   │   └── lidar
    │   │   │   │   ├── default.yaml
    │   │   │   │   ├── voxelnet-1600g-0xy1-0z20.yaml
    │   │   │   │   └── voxelnet-1600g-0xy1-0z20-gtp15.yaml
    │   │   └── default.yaml
    │   ├── temporal
    │   │   ├── transfusion
    │   │   │   └── lidar
    │   │   │   │   ├── default.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml
    │   │   │   │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml
    │   │   │   │   └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml
    │   │   ├── centerhead
    │   │   │   ├── camera
    │   │   │   │   └── default.yaml
    │   │   │   └── default.yaml
    │   │   └── default.yaml
    │   └── temporal-gru
    │   │   ├── transfusion
    │   │       └── lidar
    │   │       │   ├── default.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml
    │   │       │   ├── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml
    │   │       │   └── voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml
    │   │   └── default.yaml
    └── default.yaml
├── docs
    └── figures
    │   └── teaser.jpg
├── setup.cfg
├── requirements.txt
├── Makefile
├── Dockerfile.dev
└── Dockerfile.prod


/data/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mmdet3d/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 100
3 | 


--------------------------------------------------------------------------------
/mmdet3d/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import *
2 | 


--------------------------------------------------------------------------------
/mmdet3d/models/heads/segm/__init__.py:
--------------------------------------------------------------------------------
1 | from .vanilla import *
2 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/bev_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .bev_pool import bev_pool
2 | 


--------------------------------------------------------------------------------
/mmdet3d/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .test import *
2 | from .train import *
3 | 


--------------------------------------------------------------------------------
/requirements-visual.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | pytransform3d
3 | opencv-python


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | mypy
2 | flake8
3 | black
4 | isort
5 | pydocstyle
6 | 


--------------------------------------------------------------------------------
/mmdet3d/models/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox import *
2 | from .segm import *
3 | 


--------------------------------------------------------------------------------
/mmdet3d/models/temporal/__init__.py:
--------------------------------------------------------------------------------
1 | from .gru import *
2 | from .lstm import *
3 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/knn/__init__.py:
--------------------------------------------------------------------------------
1 | from .knn import knn
2 | 
3 | __all__ = ["knn"]
4 | 


--------------------------------------------------------------------------------
/mmdet3d/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .gaussian import *
2 | from .visualize import *
3 | 


--------------------------------------------------------------------------------
/mmdet3d/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .epoch_based_runner import CustomEpochBasedRunner
2 | 


--------------------------------------------------------------------------------
/mmdet3d/models/vtransforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .lss import *
2 | from .depth_lss import *
3 | 


--------------------------------------------------------------------------------
/configs/osdar23/baseline/centerhead/camera/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     lidar: null
4 | 


--------------------------------------------------------------------------------
/configs/osdar23/baseline/transfusion/lidar/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     camera: null
4 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal/transfusion/lidar/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     camera: null
4 | 


--------------------------------------------------------------------------------
/mmdet3d/models/heads/bbox/__init__.py:
--------------------------------------------------------------------------------
1 | from .centerpoint import *
2 | from .transfusion import *
3 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/transfusion/lidar/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     camera: null
4 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/baseline/centerhead/camera/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     lidar: null
4 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/baseline/transfusion/lidar/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     camera: null
4 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     camera: null
4 | 


--------------------------------------------------------------------------------
/mmdet3d/models/fusers/__init__.py:
--------------------------------------------------------------------------------
1 | from .add import *
2 | from .concat import *
3 | from .conv import *
4 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/ball_query/__init__.py:
--------------------------------------------------------------------------------
1 | from .ball_query import ball_query
2 | 
3 | __all__ = ["ball_query"]
4 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     camera: null
4 | 


--------------------------------------------------------------------------------
/docs/figures/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/egemenkopuz/temporal-bevfusion/HEAD/docs/figures/teaser.jpg


--------------------------------------------------------------------------------
/mmdet3d/models/fusion_models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import *
2 | from .bevfusion import *
3 | from .tbevfusion import *
4 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/gather_points/__init__.py:
--------------------------------------------------------------------------------
1 | from .gather_points import gather_points
2 | 
3 | __all__ = ["gather_points"]
4 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/iou3d/__init__.py:
--------------------------------------------------------------------------------
1 | from .iou3d_utils import boxes_iou_bev, nms_gpu, nms_normal_gpu
2 | 
3 | __all__ = ["boxes_iou_bev", "nms_gpu", "nms_normal_gpu"]
4 | 


--------------------------------------------------------------------------------
/tools/download_pretrained.sh:
--------------------------------------------------------------------------------
1 | mkdir pretrained &&
2 | cd pretrained &&
3 | wget https://bevfusion.mit.edu/files/pretrained_updated/swint-nuimages-pretrained.pth
4 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal/centerhead/camera/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     lidar: null
4 |   temporal:
5 |     in_channels: 80
6 |     hidden_channels: [80]
7 | 


--------------------------------------------------------------------------------
/tools/visualization/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .osdar23_meta import OSDAR23Meta
2 | from .tumtraf_meta import TUMTrafMeta
3 | 
4 | __all__ = ["TUMTrafMeta", "OSDAR23Meta"]
5 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/centerhead/camera/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     lidar: null
4 |   temporal:
5 |     in_channels: 80
6 |     hidden_channels: [80]
7 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/centerhead/camera/default.yaml:
--------------------------------------------------------------------------------
1 | model:
2 |   encoders:
3 |     lidar: null
4 |   temporal:
5 |     in_channels: 80
6 |     hidden_channels: [80]
7 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/interpolate/__init__.py:
--------------------------------------------------------------------------------
1 | from .three_interpolate import three_interpolate
2 | from .three_nn import three_nn
3 | 
4 | __all__ = ["three_nn", "three_interpolate"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/group_points/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_points import GroupAll, QueryAndGroup, grouping_operation
2 | 
3 | __all__ = ["QueryAndGroup", "GroupAll", "grouping_operation"]
4 | 


--------------------------------------------------------------------------------
/mmdet3d/core/voxel/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_voxel_generator
2 | from .voxel_generator import VoxelGenerator
3 | 
4 | __all__ = ["build_voxel_generator", "VoxelGenerator"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/paconv/__init__.py:
--------------------------------------------------------------------------------
1 | from .assign_score import assign_score_withk
2 | from .paconv import PAConv, PAConvCUDA
3 | 
4 | __all__ = ["assign_score_withk", "PAConv", "PAConvCUDA"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.models.necks.fpn import FPN
2 | 
3 | from .lss import *
4 | from .second import *
5 | from .generalized_lss import *
6 | from .detectron_fpn import *


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost
3 | 
4 | __all__ = ["build_match_cost", "BBox3DL1Cost"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.datasets.pipelines import Compose
2 | 
3 | from .dbsampler import *
4 | from .formating import *
5 | from .loading import *
6 | from .transforms_3d import *
7 | 


--------------------------------------------------------------------------------
/mmdet3d/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy
2 | 
3 | __all__ = [
4 |     "FocalLoss",
5 |     "SmoothL1Loss",
6 |     "binary_cross_entropy",
7 | ]
8 | 


--------------------------------------------------------------------------------
/configs/osdar23/baseline/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: BEVFusion
 3 |   encoders: null
 4 |   fuser: null
 5 |   heads:
 6 |     map: null
 7 | 
 8 | temporal_mode: false
 9 | val_online_mode: false
10 | test_online_mode: false
11 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/baseline/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: BEVFusion
 3 |   encoders: null
 4 |   fuser: null
 5 |   heads:
 6 |     map: null
 7 | 
 8 | temporal_mode: false
 9 | val_online_mode: false
10 | test_online_mode: false
11 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/voxel/__init__.py:
--------------------------------------------------------------------------------
1 | from .scatter_points import DynamicScatter, dynamic_scatter
2 | from .voxelize import Voxelization, voxelization
3 | 
4 | __all__ = ["Voxelization", "voxelization", "dynamic_scatter", "DynamicScatter"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbones import *
 2 | from .builder import *
 3 | from .fusers import *
 4 | from .fusion_models import *
 5 | from .heads import *
 6 | from .losses import *
 7 | from .necks import *
 8 | from .temporal import *
 9 | from .vtransforms import *
10 | 


--------------------------------------------------------------------------------
/mmdet3d/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt
2 | 
3 | from .resnet import *
4 | from .second import *
5 | from .sparse_encoder import *
6 | from .pillar_encoder import *
7 | from .vovnet import *
8 | from .dla import *


--------------------------------------------------------------------------------
/mmdet3d/ops/furthest_point_sample/__init__.py:
--------------------------------------------------------------------------------
1 | from .furthest_point_sample import furthest_point_sample, furthest_point_sample_with_dist
2 | from .points_sampler import Points_Sampler
3 | 
4 | __all__ = ["furthest_point_sample", "furthest_point_sample_with_dist", "Points_Sampler"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/roiaware_pool3d/__init__.py:
--------------------------------------------------------------------------------
1 | from .points_in_boxes import points_in_boxes_batch, points_in_boxes_cpu, points_in_boxes_gpu
2 | from .roiaware_pool3d import RoIAwarePool3d
3 | 
4 | __all__ = ["RoIAwarePool3d", "points_in_boxes_gpu", "points_in_boxes_cpu", "points_in_boxes_batch"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .points import *  # noqa: F401, F403
4 | from .post_processing import *  # noqa: F401, F403
5 | from .utils import *  # noqa: F401, F403
6 | from .voxel import *  # noqa: F401, F403
7 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner
2 | from .hungarian_assigner import HungarianAssigner3D, HeuristicAssigner3D 
3 | 
4 | __all__ = ["BaseAssigner", "MaxIoUAssigner", "AssignResult", "HungarianAssigner3D", "HeuristicAssigner3D"]
5 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from mmdet.datasets.builder import build_dataloader
 2 | 
 3 | from .builder import *
 4 | from .custom_3d import *
 5 | from .nuscenes_dataset import *
 6 | from .osdar23_dataset import *
 7 | from .pipelines import *
 8 | from .tumtraf_intersection_dataset import *
 9 | from .utils import *
10 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | profile = black
 3 | 
 4 | [flake8]
 5 | max-line-length = 100
 6 | ignore = E203, E402, W503, W504, F821, E501
 7 | exclude = venv,demo
 8 | 
 9 | [mypy]
10 | ignore_missing_imports = True
11 | disallow_untyped_defs = True
12 | exclude = venv|docs|demo
13 | 
14 | [pydocstyle]
15 | ignore = D100
16 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg, print_log
2 | 
3 | from .logger import get_root_logger
4 | from .syncbn import convert_sync_batchnorm
5 | from .config import recursive_eval
6 | 
7 | __all__ = ["Registry", "build_from_cfg", "get_root_logger", "print_log", "convert_sync_batchnorm", "recursive_eval"]
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Pillow==8.4.0
 2 | numpy==1.19.5
 3 | tqdm
 4 | torchpack
 5 | mmcv==1.4.0
 6 | mmcv-full==1.4.0
 7 | mmdet==2.20.0
 8 | nuscenes-devkit
 9 | mpi4py==3.0.3
10 | numba==0.48.0
11 | git+https://github.com/DanielPollithy/pypcd.git
12 | git+https://github.com/facebookresearch/pytorch3d.git@stable
13 | optuna==3.3.0
14 | pandas==1.3.4
15 | plotly==5.17.0
16 | kaleido==0.2.1
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all
 2 | 
 3 | all: dev
 4 | 
 5 | dev: install-pkgs install-dev
 6 | prod: install-pkgs install-prod
 7 | 
 8 | install-pkgs:
 9 | 	pip install --extra-index-url http://24.199.104.228/simple --trusted-host 24.199.104.228 torchsparse==2.1.0+torch110cu113 --force-reinstall
10 | 	pip install -r requirements.txt
11 | 
12 | install-dev:
13 | 	python setup.py develop
14 | 
15 | install-prod:
16 | 	python setup.py install


--------------------------------------------------------------------------------
/mmdet3d/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core.anchor import build_prior_generator
 2 | from .anchor_3d_generator import (
 3 |     AlignedAnchor3DRangeGenerator,
 4 |     AlignedAnchor3DRangeGeneratorPerCls,
 5 |     Anchor3DRangeGenerator,
 6 | )
 7 | 
 8 | __all__ = [
 9 |     "AlignedAnchor3DRangeGenerator",
10 |     "Anchor3DRangeGenerator",
11 |     "build_prior_generator",
12 |     "AlignedAnchor3DRangeGeneratorPerCls",
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet3d/models/fusers/concat.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | 
 5 | from mmdet3d.models.builder import FUSERS
 6 | 
 7 | __all__ = ["ConcatFuser"]
 8 | 
 9 | 
10 | @FUSERS.register_module()
11 | class ConcatFuser:
12 |     def __init__(self, dim: int = 1) -> None:
13 |         self.dim = dim
14 | 
15 |     def __call__(self, inputs: List[torch.Tensor]) -> torch.Tensor:
16 |         return torch.cat(inputs, dim=self.dim)
17 | 


--------------------------------------------------------------------------------
/mmdet3d/core/voxel/builder.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from . import voxel_generator
 4 | 
 5 | 
 6 | def build_voxel_generator(cfg, **kwargs):
 7 |     """Builder of voxel generator."""
 8 |     if isinstance(cfg, voxel_generator.VoxelGenerator):
 9 |         return cfg
10 |     elif isinstance(cfg, dict):
11 |         return mmcv.runner.obj_from_dict(cfg, voxel_generator, default_args=kwargs)
12 |     else:
13 |         raise TypeError("Invalid type {} for building a sampler".format(type(cfg)))
14 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/syncbn.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import torch
 3 | from collections import deque
 4 | 
 5 | 
 6 | __all__ = ["convert_sync_batchnorm"]
 7 | 
 8 | 
 9 | def convert_sync_batchnorm(input_model, exclude=[]):
10 |     for name, module in input_model._modules.items():
11 |         skip = sum([ex in name for ex in exclude])
12 |         if skip:
13 |             continue
14 |         input_model._modules[name] = torch.nn.SyncBatchNorm.convert_sync_batchnorm(module)
15 |     return input_model
16 |     


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
 1 | from .iou3d_calculator import (
 2 |     AxisAlignedBboxOverlaps3D,
 3 |     BboxOverlaps3D,
 4 |     BboxOverlapsNearest3D,
 5 |     axis_aligned_bbox_overlaps_3d,
 6 |     bbox_overlaps_3d,
 7 |     bbox_overlaps_nearest_3d,
 8 | )
 9 | 
10 | __all__ = [
11 |     "BboxOverlapsNearest3D",
12 |     "BboxOverlaps3D",
13 |     "bbox_overlaps_nearest_3d",
14 |     "bbox_overlaps_3d",
15 |     "AxisAlignedBboxOverlaps3D",
16 |     "axis_aligned_bbox_overlaps_3d",
17 | ]
18 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/spconv/__init__.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | from mmcv.cnn.bricks.registry import CONV_LAYERS, NORM_LAYERS
 4 | from torch.nn.parameter import Parameter
 5 | 
 6 | 
 7 | def register_torchsparse():
 8 |     """This func registers torchsparse ops."""
 9 |     from torchsparse.nn import BatchNorm, Conv3d
10 | 
11 |     CONV_LAYERS._register_module(Conv3d, "TorchSparseConv3d", force=True)
12 |     NORM_LAYERS._register_module(BatchNorm, "TorchSparseBatchNorm", force=True)
13 | 
14 | 
15 | register_torchsparse()
16 | 


--------------------------------------------------------------------------------
/mmdet3d/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import EpochBasedRunner
 2 | from mmcv.runner.builder import RUNNERS
 3 | 
 4 | @RUNNERS.register_module()
 5 | class CustomEpochBasedRunner(EpochBasedRunner):
 6 |     def set_dataset(self, dataset):
 7 |         self._dataset = dataset
 8 | 
 9 | 
10 |     def train(self, data_loader, **kwargs):
11 |         # update the schedule for data augmentation
12 |         for dataset in self._dataset:
13 |             dataset.set_epoch(self.epoch)
14 |         super().train(data_loader, **kwargs)
15 | 


--------------------------------------------------------------------------------
/mmdet3d/apis/test.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import torch
 3 | 
 4 | 
 5 | def single_gpu_test(model, data_loader):
 6 |     model.eval()
 7 |     results = []
 8 |     dataset = data_loader.dataset
 9 |     prog_bar = mmcv.ProgressBar(len(dataset))
10 |     for data in data_loader:
11 |         with torch.no_grad():
12 |             result = model(return_loss=False, rescale=True, **data)
13 |         results.extend(result)
14 | 
15 |         batch_size = len(result)
16 |         for _ in range(batch_size):
17 |             prog_bar.update()
18 |     return results
19 | 


--------------------------------------------------------------------------------
/mmdet3d/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
 2 |                                         merge_aug_proposals, merge_aug_scores,
 3 |                                         multiclass_nms)
 4 | 
 5 | from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms, circle_nms
 6 | 
 7 | __all__ = [
 8 |     "multiclass_nms",
 9 |     "merge_aug_proposals",
10 |     "merge_aug_bboxes",
11 |     "merge_aug_scores",
12 |     "merge_aug_masks",
13 |     "box3d_multiclass_nms",
14 |     "aligned_3d_nms",
15 |     "circle_nms",
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import build_sa_module
 2 | from .paconv_sa_module import (
 3 |     PAConvCUDASAModule,
 4 |     PAConvCUDASAModuleMSG,
 5 |     PAConvSAModule,
 6 |     PAConvSAModuleMSG,
 7 | )
 8 | from .point_fp_module import PointFPModule
 9 | from .point_sa_module import PointSAModule, PointSAModuleMSG
10 | 
11 | __all__ = [
12 |     "build_sa_module",
13 |     "PointSAModuleMSG",
14 |     "PointSAModule",
15 |     "PointFPModule",
16 |     "PAConvSAModule",
17 |     "PAConvSAModuleMSG",
18 |     "PAConvCUDASAModule",
19 |     "PAConvCUDASAModuleMSG",
20 | ]
21 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/voxel/src/voxelization.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | #include "voxelization.h"
 3 | 
 4 | namespace voxelization {
 5 | 
 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 7 |   m.def("hard_voxelize", &hard_voxelize, "hard voxelize");
 8 |   m.def("dynamic_voxelize", &dynamic_voxelize, "dynamic voxelization");
 9 |   m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward, "dynamic point to voxel forward");
10 |   m.def("dynamic_point_to_voxel_backward", &dynamic_point_to_voxel_backward, "dynamic point to voxel backward");
11 | }
12 | 
13 | } // namespace voxelization
14 | 


--------------------------------------------------------------------------------
/configs/default.yaml:
--------------------------------------------------------------------------------
 1 | seed: 1337
 2 | deterministic: false
 3 | 
 4 | checkpoint_config:
 5 |   interval: 1
 6 |   max_keep_ckpts: 5
 7 | 
 8 | log_config:
 9 |   interval: 50
10 |   hooks:
11 |     - type: TextLoggerHook
12 |     - type: TensorboardLoggerHook
13 | 
14 | load_from: null
15 | resume_from: null
16 | 
17 | cudnn_benchmark: false
18 | fp16:
19 |   loss_scale:
20 |     growth_interval: 2000
21 | 
22 | data:
23 |   samples_per_gpu: ${samples_per_gpu}
24 |   workers_per_gpu: ${workers_per_gpu}
25 | 
26 | max_epochs: 20
27 | runner:
28 |   type: CustomEpochBasedRunner
29 |   max_epochs: ${max_epochs}
30 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core.bbox.samplers import (
 2 |     BaseSampler,
 3 |     CombinedSampler,
 4 |     InstanceBalancedPosSampler,
 5 |     IoUBalancedNegSampler,
 6 |     OHEMSampler,
 7 |     PseudoSampler,
 8 |     RandomSampler,
 9 |     SamplingResult,
10 | )
11 | from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
12 | 
13 | __all__ = [
14 |     "BaseSampler",
15 |     "PseudoSampler",
16 |     "RandomSampler",
17 |     "InstanceBalancedPosSampler",
18 |     "IoUBalancedNegSampler",
19 |     "CombinedSampler",
20 |     "OHEMSampler",
21 |     "SamplingResult",
22 |     "IoUNegPiecewiseSampler",
23 | ]
24 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/config.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | __all__ = ["recursive_eval"]
 4 | 
 5 | 
 6 | def recursive_eval(obj, globals=None):
 7 |     if globals is None:
 8 |         globals = copy.deepcopy(obj)
 9 | 
10 |     if isinstance(obj, dict):
11 |         for key in obj:
12 |             obj[key] = recursive_eval(obj[key], globals)
13 |     elif isinstance(obj, list):
14 |         for k, val in enumerate(obj):
15 |             obj[k] = recursive_eval(val, globals)
16 |     elif isinstance(obj, str) and obj.startswith("${") and obj.endswith("}"):
17 |         obj = eval(obj[2:-1], globals)
18 |         obj = recursive_eval(obj, globals)
19 | 
20 |     return obj
21 | 


--------------------------------------------------------------------------------
/tools/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | from .osdar23_plot_3d_boxes import osdar23_plot_3d_boxes
 2 | from .osdar23_plot_image_w_labels import osdar23_plot_image_w_labels
 3 | from .osdar23_plot_image_w_lidar_points import osdar23_plot_image_w_lidar_points
 4 | from .tumtraf_plot_3d_boxes import tumtraf_plot_3d_boxes
 5 | from .tumtraf_plot_image_w_labels import tumtraf_plot_image_w_labels
 6 | from .tumtraf_plot_image_w_lidar_points import tumtraf_plot_image_w_lidar_points
 7 | 
 8 | __all__ = [
 9 |     "tumtraf_plot_3d_boxes",
10 |     "tumtraf_plot_image_w_labels",
11 |     "tumtraf_plot_image_w_lidar_points",
12 |     "osdar23_plot_3d_boxes",
13 |     "osdar23_plot_image_w_labels",
14 |     "osdar23_plot_image_w_lidar_points",
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet3d/models/fusers/conv.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | 
 6 | from mmdet3d.models.builder import FUSERS
 7 | 
 8 | __all__ = ["ConvFuser"]
 9 | 
10 | 
11 | @FUSERS.register_module()
12 | class ConvFuser(nn.Sequential):
13 |     def __init__(self, in_channels: int, out_channels: int) -> None:
14 |         self.in_channels = in_channels
15 |         self.out_channels = out_channels
16 |         super().__init__(
17 |             nn.Conv2d(sum(in_channels), out_channels, 3, padding=1, bias=False),
18 |             nn.BatchNorm2d(out_channels),
19 |             nn.ReLU(True),
20 |         )
21 | 
22 |     def forward(self, inputs: List[torch.Tensor]) -> torch.Tensor:
23 |         return super().forward(torch.cat(inputs, dim=1))
24 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core.bbox import build_bbox_coder
 2 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder
 3 | from .centerpoint_bbox_coders import CenterPointBBoxCoder
 4 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
 5 | from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder
 6 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
 7 | from .nms_free_coder import NMSFreeCoder
 8 | from .transfusion_bbox_coder import TransFusionBBoxCoder
 9 | 
10 | __all__ = [
11 |     "build_bbox_coder",
12 |     "DeltaXYZWLHRBBoxCoder",
13 |     "PartialBinBasedBBoxCoder",
14 |     "CenterPointBBoxCoder",
15 |     "AnchorFreeBBoxCoder",
16 |     "GroupFree3DBBoxCoder",
17 |     "NMSFreeCoder",
18 |     "TransFusionBBoxCoder"
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_box3d import BaseInstance3DBoxes
 2 | from .box_3d_mode import Box3DMode
 3 | from .cam_box3d import CameraInstance3DBoxes
 4 | from .coord_3d_mode import Coord3DMode
 5 | from .depth_box3d import DepthInstance3DBoxes
 6 | from .lidar_box3d import LiDARInstance3DBoxes
 7 | from .utils import (
 8 |     get_box_type,
 9 |     get_proj_mat_by_coord_type,
10 |     limit_period,
11 |     mono_cam_box2vis,
12 |     points_cam2img,
13 |     rotation_3d_in_axis,
14 |     xywhr2xyxyr,
15 | )
16 | 
17 | __all__ = [
18 |     "Box3DMode",
19 |     "BaseInstance3DBoxes",
20 |     "LiDARInstance3DBoxes",
21 |     "CameraInstance3DBoxes",
22 |     "DepthInstance3DBoxes",
23 |     "xywhr2xyxyr",
24 |     "get_box_type",
25 |     "rotation_3d_in_axis",
26 |     "limit_period",
27 |     "points_cam2img",
28 |     "Coord3DMode",
29 |     "mono_cam_box2vis",
30 |     "get_proj_mat_by_coord_type",
31 | ]
32 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 2 | from .coders import DeltaXYZWLHRBBoxCoder
 3 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
 4 |                               BboxOverlapsNearest3D,
 5 |                               axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
 6 |                               bbox_overlaps_nearest_3d)
 7 | from .match_costs import BBox3DL1Cost
 8 | from .samplers import (BaseSampler, CombinedSampler,
 9 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
10 |                        PseudoSampler, RandomSampler, SamplingResult)
11 | from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
12 |                          Coord3DMode, DepthInstance3DBoxes,
13 |                          LiDARInstance3DBoxes, get_box_type, limit_period,
14 |                          mono_cam_box2vis, points_cam2img, xywhr2xyxyr)
15 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
 3 | 
 4 | 
 5 | @MATCH_COST.register_module()
 6 | class BBox3DL1Cost:
 7 |     """BBox3DL1Cost.
 8 |     Args:
 9 |         weight (int | float, optional): loss_weight
10 |     """
11 | 
12 |     def __init__(self, weight=1.0):
13 |         self.weight = weight
14 | 
15 |     def __call__(self, bbox_pred, gt_bboxes):
16 |         """
17 |         Args:
18 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
19 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
20 |                 [num_query, 4].
21 |             gt_bboxes (Tensor): Ground truth boxes with normalized
22 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
23 |         Returns:
24 |             torch.Tensor: bbox_cost value with weight
25 |         """
26 |         bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
27 |         return bbox_cost * self.weight
28 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_points import BasePoints
 2 | from .cam_points import CameraPoints
 3 | from .depth_points import DepthPoints
 4 | from .lidar_points import LiDARPoints
 5 | 
 6 | __all__ = ["BasePoints", "CameraPoints", "DepthPoints", "LiDARPoints"]
 7 | 
 8 | 
 9 | def get_points_type(points_type):
10 |     """Get the class of points according to coordinate type.
11 | 
12 |     Args:
13 |         points_type (str): The type of points coordinate.
14 |             The valid value are "CAMERA", "LIDAR", or "DEPTH".
15 | 
16 |     Returns:
17 |         class: Points type.
18 |     """
19 |     if points_type == "CAMERA":
20 |         points_cls = CameraPoints
21 |     elif points_type == "LIDAR":
22 |         points_cls = LiDARPoints
23 |     elif points_type == "DEPTH":
24 |         points_cls = DepthPoints
25 |     else:
26 |         raise ValueError(
27 |             'Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"'
28 |             f" are supported, got {points_type}"
29 |         )
30 | 
31 |     return points_cls
32 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def extract_result_dict(results, key):
 5 |     """Extract and return the data corresponding to key in result dict.
 6 | 
 7 |     ``results`` is a dict output from `pipeline(input_dict)`, which is the
 8 |         loaded data from ``Dataset`` class.
 9 |     The data terms inside may be wrapped in list, tuple and DataContainer, so
10 |         this function essentially extracts data from these wrappers.
11 | 
12 |     Args:
13 |         results (dict): Data loaded using pipeline.
14 |         key (str): Key of the desired data.
15 | 
16 |     Returns:
17 |         np.ndarray | torch.Tensor | None: Data term.
18 |     """
19 |     if key not in results.keys():
20 |         return None
21 |     # results[key] may be data or list[data] or tuple[data]
22 |     # data may be wrapped inside DataContainer
23 |     data = results[key]
24 |     if isinstance(data, (list, tuple)):
25 |         data = data[0]
26 |     if isinstance(data, mmcv.parallel.DataContainer):
27 |         data = data._data
28 |     return data
29 | 


--------------------------------------------------------------------------------
/mmdet3d/models/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry
 2 | from mmdet.models.builder import BACKBONES, HEADS, LOSSES, NECKS
 3 | 
 4 | FUSIONMODELS = Registry("fusion_models")
 5 | VTRANSFORMS = Registry("vtransforms")
 6 | FUSERS = Registry("fusers")
 7 | TEMPORAL = Registry("temporal")
 8 | 
 9 | 
10 | def build_backbone(cfg):
11 |     return BACKBONES.build(cfg)
12 | 
13 | 
14 | def build_neck(cfg):
15 |     return NECKS.build(cfg)
16 | 
17 | 
18 | def build_vtransform(cfg):
19 |     return VTRANSFORMS.build(cfg)
20 | 
21 | 
22 | def build_fuser(cfg):
23 |     return FUSERS.build(cfg)
24 | 
25 | 
26 | def build_temporal(cfg):
27 |     return TEMPORAL.build(cfg)
28 | 
29 | 
30 | def build_head(cfg):
31 |     return HEADS.build(cfg)
32 | 
33 | 
34 | def build_loss(cfg):
35 |     return LOSSES.build(cfg)
36 | 
37 | 
38 | def build_fusion_model(cfg, train_cfg=None, test_cfg=None):
39 |     return FUSIONMODELS.build(cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
40 | 
41 | 
42 | def build_model(cfg, train_cfg=None, test_cfg=None):
43 |     return build_fusion_model(cfg, train_cfg=train_cfg, test_cfg=test_cfg)
44 | 


--------------------------------------------------------------------------------
/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.3.1-devel-ubuntu20.04
 2 | 
 3 | ENV PYTHON_VERSION=3.8
 4 | ENV DEBIAN_FRONTEND noninteractive
 5 | 
 6 | RUN apt-get update \
 7 |     && apt-get install -y \
 8 |     wget curl build-essential g++ gcc ffmpeg ninja-build git \
 9 |     libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl \
10 |     libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \
11 |     libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 \
12 |     openmpi-bin openmpi-common libopenmpi-dev libgtk2.0-dev \
13 |     && apt-get clean \
14 |     && rm -rf /var/lib/apt/lists/*
15 | 
16 | RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
17 | ENV PYENV_ROOT="${HOME}/.pyenv"
18 | ENV PATH="${PYENV_ROOT}/shims:${PYENV_ROOT}/bin:$HOME/.local/bin:${PATH}"
19 | 
20 | RUN pyenv install ${PYTHON_VERSION}
21 | RUN pyenv global ${PYTHON_VERSION}
22 | 
23 | RUN pip3 install torch==1.10.1+cu113 torchvision==0.11.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
24 | RUN pip3 install setuptools==59.5.0
25 | 
26 | WORKDIR /root/mmdet3d
27 | ENTRYPOINT ["tail", "-f", "/dev/null"]
28 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from mmcv.utils import get_logger
 3 | 
 4 | 
 5 | def get_root_logger(log_file=None, log_level=logging.INFO, name="mmdet3d"):
 6 |     """Get root logger and add a keyword filter to it.
 7 | 
 8 |     The logger will be initialized if it has not been initialized. By default a
 9 |     StreamHandler will be added. If `log_file` is specified, a FileHandler will
10 |     also be added. The name of the root logger is the top-level package name,
11 |     e.g., "mmdet3d".
12 | 
13 |     Args:
14 |         log_file (str, optional): File path of log. Defaults to None.
15 |         log_level (int, optional): The level of logger.
16 |             Defaults to logging.INFO.
17 |         name (str, optional): The name of the root logger, also used as a
18 |             filter keyword. Defaults to 'mmdet3d'.
19 | 
20 |     Returns:
21 |         :obj:`logging.Logger`: The obtained logger
22 |     """
23 |     logger = get_logger(name=name, log_file=log_file, log_level=log_level)
24 | 
25 |     # add a logging filter
26 |     logging_filter = logging.Filter(name)
27 |     logging_filter.filter = lambda record: record.find(name) != -1
28 | 
29 |     return logger
30 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/furthest_point_sample/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def calc_square_dist(point_feat_a, point_feat_b, norm=True):
 5 |     """Calculating square distance between a and b.
 6 | 
 7 |     Args:
 8 |         point_feat_a (Tensor): (B, N, C) Feature vector of each point.
 9 |         point_feat_b (Tensor): (B, M, C) Feature vector of each point.
10 |         norm (Bool): Whether to normalize the distance.
11 |             Default: True.
12 | 
13 |     Returns:
14 |         Tensor: (B, N, M) Distance between each pair points.
15 |     """
16 |     length_a = point_feat_a.shape[1]
17 |     length_b = point_feat_b.shape[1]
18 |     num_channel = point_feat_a.shape[-1]
19 |     # [bs, n, 1]
20 |     a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1)
21 |     # [bs, 1, m]
22 |     b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1)
23 |     a_square = a_square.repeat((1, 1, length_b))  # [bs, n, m]
24 |     b_square = b_square.repeat((1, length_a, 1))  # [bs, n, m]
25 | 
26 |     coor = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2))
27 | 
28 |     dist = a_square + b_square - 2 * coor
29 |     if norm:
30 |         dist = torch.sqrt(dist) / num_channel
31 |     return dist
32 | 


--------------------------------------------------------------------------------
/mmdet3d/models/backbones/resnet.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | import torch
 4 | from mmcv.cnn.resnet import BasicBlock, make_res_layer
 5 | from torch import nn
 6 | 
 7 | from mmdet.models import BACKBONES
 8 | 
 9 | __all__ = ["GeneralizedResNet"]
10 | 
11 | 
12 | @BACKBONES.register_module()
13 | class GeneralizedResNet(nn.ModuleList):
14 |     def __init__(
15 |         self,
16 |         in_channels: int,
17 |         blocks: List[Tuple[int, int, int]],
18 |     ) -> None:
19 |         super().__init__()
20 |         self.in_channels = in_channels
21 |         self.blocks = blocks
22 | 
23 |         for num_blocks, out_channels, stride in self.blocks:
24 |             blocks = make_res_layer(
25 |                 BasicBlock,
26 |                 in_channels,
27 |                 out_channels,
28 |                 num_blocks,
29 |                 stride=stride,
30 |                 dilation=1,
31 |             )
32 |             in_channels = out_channels
33 |             self.append(blocks)
34 | 
35 |     def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
36 |         outputs = []
37 |         for module in self:
38 |             x = module(x)
39 |             outputs.append(x)
40 |         return outputs
41 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/paconv/src/assign_score_withk.cpp:
--------------------------------------------------------------------------------
 1 | // Modified from https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
 2 | 
 3 | #include <torch/torch.h>
 4 | #include <torch/extension.h>
 5 | 
 6 | void assign_score_withk_forward_wrapper(
 7 |   int B, int N0, int N1, int M,
 8 |   int K, int O, int aggregate,
 9 |   const at::Tensor& points,
10 |   const at::Tensor& centers,
11 |   const at::Tensor& scores,
12 |   const at::Tensor& knn_idx,
13 |   at::Tensor& output
14 |   );
15 | 
16 | void assign_score_withk_backward_wrapper(
17 |   int B, int N0, int N1, int M,
18 |   int K, int O, int aggregate,
19 |   const at::Tensor& grad_out,
20 |   const at::Tensor& points,
21 |   const at::Tensor& centers,
22 |   const at::Tensor& scores,
23 |   const at::Tensor& knn_idx,
24 |   at::Tensor& grad_points,
25 |   at::Tensor& grad_centers,
26 |   at::Tensor& grad_scores
27 |   );
28 | 
29 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
30 |   m.def("assign_score_withk_forward_wrapper",
31 |         &assign_score_withk_forward_wrapper,
32 |         "Assign score kernel forward (GPU), save memory version");
33 |   m.def("assign_score_withk_backward_wrapper",
34 |         &assign_score_withk_backward_wrapper,
35 |         "Assign score kernel backward (GPU), save memory version");
36 | }
37 | 


--------------------------------------------------------------------------------
/Dockerfile.prod:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.3.1-devel-ubuntu20.04
 2 | 
 3 | ENV PYTHON_VERSION=3.8
 4 | ENV DEBIAN_FRONTEND noninteractive
 5 | 
 6 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6+PTX" \
 7 |     TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
 8 |     FORCE_CUDA="1"
 9 | 
10 | RUN apt-get update \
11 |     && apt-get install -y \
12 |     wget curl build-essential g++ gcc ffmpeg ninja-build git \
13 |     libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl \
14 |     libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \
15 |     libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 \
16 |     openmpi-bin openmpi-common libopenmpi-dev libgtk2.0-dev \
17 |     && apt-get clean \
18 |     && rm -rf /var/lib/apt/lists/*
19 | 
20 | RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
21 | ENV PYENV_ROOT="${HOME}/.pyenv"
22 | ENV PATH="${PYENV_ROOT}/shims:${PYENV_ROOT}/bin:$HOME/.local/bin:${PATH}"
23 | 
24 | RUN pyenv install ${PYTHON_VERSION}
25 | RUN pyenv global ${PYTHON_VERSION}
26 | 
27 | RUN pip3 install torch==1.10.1+cu113 torchvision==0.11.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
28 | RUN pip3 install setuptools==59.5.0
29 | 
30 | WORKDIR /root/mmdet3d
31 | 
32 | COPY . .
33 | RUN make prod
34 | 
35 | ENTRYPOINT ["tail", "-f", "/dev/null"]
36 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry
 2 | 
 3 | SA_MODULES = Registry("point_sa_module")
 4 | 
 5 | 
 6 | def build_sa_module(cfg, *args, **kwargs):
 7 |     """Build PointNet2 set abstraction (SA) module.
 8 | 
 9 |     Args:
10 |         cfg (None or dict): The SA module config, which should contain:
11 |             - type (str): Module type.
12 |             - module args: Args needed to instantiate an SA module.
13 |         args (argument list): Arguments passed to the `__init__`
14 |             method of the corresponding module.
15 |         kwargs (keyword arguments): Keyword arguments passed to the `__init__`
16 |             method of the corresponding SA module .
17 | 
18 |     Returns:
19 |         nn.Module: Created SA module.
20 |     """
21 |     if cfg is None:
22 |         cfg_ = dict(type="PointSAModule")
23 |     else:
24 |         if not isinstance(cfg, dict):
25 |             raise TypeError("cfg must be a dict")
26 |         if "type" not in cfg:
27 |             raise KeyError('the cfg dict must contain the key "type"')
28 |         cfg_ = cfg.copy()
29 | 
30 |     module_type = cfg_.pop("type")
31 |     if module_type not in SA_MODULES:
32 |         raise KeyError(f"Unrecognized module type {module_type}")
33 |     else:
34 |         sa_module = SA_MODULES.get(module_type)
35 | 
36 |     module = sa_module(*args, **kwargs, **cfg_)
37 | 
38 |     return module
39 | 


--------------------------------------------------------------------------------
/mmdet3d/models/fusers/add.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | from mmdet3d.models.builder import FUSERS
 8 | 
 9 | __all__ = ["AddFuser"]
10 | 
11 | 
12 | @FUSERS.register_module()
13 | class AddFuser(nn.Module):
14 |     def __init__(self, in_channels: int, out_channels: int, dropout: float = 0) -> None:
15 |         super().__init__()
16 |         self.in_channels = in_channels
17 |         self.out_channels = out_channels
18 |         self.dropout = dropout
19 | 
20 |         self.transforms = nn.ModuleList()
21 |         for k in range(len(in_channels)):
22 |             self.transforms.append(
23 |                 nn.Sequential(
24 |                     nn.Conv2d(in_channels[k], out_channels, 3, padding=1, bias=False),
25 |                     nn.BatchNorm2d(out_channels),
26 |                     nn.ReLU(True),
27 |                 )
28 |             )
29 | 
30 |     def forward(self, inputs: List[torch.Tensor]) -> torch.Tensor:
31 |         features = []
32 |         for transform, input in zip(self.transforms, inputs):
33 |             features.append(transform(input))
34 | 
35 |         weights = [1] * len(inputs)
36 |         if self.training and random.random() < self.dropout:
37 |             index = random.randint(0, len(inputs) - 1)
38 |             weights[index] = 0
39 | 
40 |         return sum(w * f for w, f in zip(weights, features)) / sum(weights)
41 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/interpolate/three_nn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from typing import Tuple
 4 | 
 5 | from . import interpolate_ext
 6 | 
 7 | 
 8 | class ThreeNN(Function):
 9 |     @staticmethod
10 |     def forward(
11 |         ctx, target: torch.Tensor, source: torch.Tensor
12 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
13 |         """Find the top-3 nearest neighbors of the target set from the source
14 |         set.
15 | 
16 |         Args:
17 |             target (Tensor): shape (B, N, 3), points set that needs to
18 |                 find the nearest neighbors.
19 |             source (Tensor): shape (B, M, 3), points set that is used
20 |                 to find the nearest neighbors of points in target set.
21 | 
22 |         Returns:
23 |             Tensor: shape (B, N, 3), L2 distance of each point in target
24 |                 set to their corresponding nearest neighbors.
25 |         """
26 |         assert target.is_contiguous()
27 |         assert source.is_contiguous()
28 | 
29 |         B, N, _ = target.size()
30 |         m = source.size(1)
31 |         dist2 = torch.cuda.FloatTensor(B, N, 3)
32 |         idx = torch.cuda.IntTensor(B, N, 3)
33 | 
34 |         interpolate_ext.three_nn_wrapper(B, N, m, target, source, dist2, idx)
35 | 
36 |         ctx.mark_non_differentiable(idx)
37 | 
38 |         return torch.sqrt(dist2), idx
39 | 
40 |     @staticmethod
41 |     def backward(ctx, a=None, b=None):
42 |         return None, None
43 | 
44 | 
45 | three_nn = ThreeNN.apply
46 | 


--------------------------------------------------------------------------------
/tools/visualization/create_video.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import ArgumentParser, Namespace
 3 | 
 4 | import cv2
 5 | 
 6 | 
 7 | def get_args() -> Namespace:
 8 |     """
 9 |     Parse given arguments for create_video function.
10 | 
11 |     Returns:
12 |         Namespace: parsed arguments
13 |     """
14 |     parser = ArgumentParser()
15 | 
16 |     parser.add_argument("-s", "--source-folder-dir", type=str, required=True)
17 |     parser.add_argument("-t", "--target-path", type=str, required=True)
18 | 
19 |     return parser.parse_args()
20 | 
21 | 
22 | def create_video(source_folder_dir: str, target_path: str) -> None:
23 |     images = sorted(
24 |         [
25 |             img
26 |             for img in os.listdir(source_folder_dir)
27 |             if img.lower().endswith((".png", ".jpg", ".jpeg"))
28 |         ]
29 |     )
30 | 
31 |     if len(images) == 0:
32 |         return
33 | 
34 |     os.makedirs(os.path.dirname(target_path), exist_ok=True, mode=0o777)
35 | 
36 |     frame = cv2.imread(os.path.join(source_folder_dir, images[0]))
37 |     height, width, _ = frame.shape
38 | 
39 |     video_name = os.path.join(target_path)
40 |     video = cv2.VideoWriter(video_name, 0x7634706D, 10, (width, height))
41 | 
42 |     for image in images:
43 |         video.write(cv2.imread(os.path.join(source_folder_dir, image)))
44 | 
45 |     cv2.destroyAllWindows()
46 |     video.release()
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     args = get_args()
51 |     create_video(args.source_folder_dir, args.target_path)
52 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/knn/src/knn.cpp:
--------------------------------------------------------------------------------
 1 | // Modified from https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap
 2 | 
 3 | #include <torch/serialize/tensor.h>
 4 | #include <torch/extension.h>
 5 | #include <vector>
 6 | #include <THC/THC.h>
 7 | #include <ATen/cuda/CUDAContext.h>
 8 | 
 9 | extern THCState *state;
10 | 
11 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
12 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
13 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
14 | 
15 | 
16 | void knn_kernel_launcher(
17 |     int b,
18 |     int n,
19 |     int m,
20 |     int nsample,
21 |     const float *xyz,
22 |     const float *new_xyz,
23 |     int *idx,
24 |     float *dist2,
25 |     cudaStream_t stream
26 |     );
27 | 
28 | void knn_wrapper(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
29 | {
30 |     CHECK_INPUT(new_xyz_tensor);
31 |     CHECK_INPUT(xyz_tensor);
32 | 
33 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
34 |     const float *xyz = xyz_tensor.data_ptr<float>();
35 |     int *idx = idx_tensor.data_ptr<int>();
36 |     float *dist2 = dist2_tensor.data_ptr<float>();
37 | 
38 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
39 | 
40 |     knn_kernel_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream);
41 | }
42 | 
43 | 
44 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
45 |     m.def("knn_wrapper", &knn_wrapper, "knn_wrapper");
46 | }
47 | 


--------------------------------------------------------------------------------
/configs/osdar23/baseline/transfusion/lidar/voxelnet-1600g-0xy16-0z4.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | voxel_max_points: 15
 8 | voxel_max_voxels: [200000, 200000]
 9 | out_size_factor: 8
10 | 
11 | samples_per_gpu: 6
12 | workers_per_gpu: 6
13 | 
14 | max_epochs: 20
15 | 
16 | optimizer:
17 |   type: AdamW
18 |   lr: 6.6e-05
19 |   weight_decay: 0.01
20 | 
21 | momentum_config:
22 |   policy: cyclic
23 |   cyclic_times: 1
24 |   step_ratio_up: 0.4
25 | 
26 | lr_config:
27 |   cyclic_times: 1
28 |   policy: cyclic
29 |   step_ratio_up: 0.4
30 | 
31 | optimizer_config:
32 |   grad_clip:
33 |     max_norm: 25
34 |     norm_type: 2
35 | 
36 | model:
37 |   encoders:
38 |     lidar:
39 |       voxelize:
40 |         max_num_points: ${voxel_max_points}
41 |         point_cloud_range: ${point_cloud_range}
42 |         voxel_size: ${voxel_size}
43 |         max_voxels: ${voxel_max_voxels}
44 |       backbone:
45 |         type: SparseEncoder
46 |         in_channels: ${use_dim}
47 |         sparse_shape: ${grid_size}
48 |         output_channels: 128
49 |         order:
50 |           - conv
51 |           - norm
52 |           - act
53 |         encoder_channels:
54 |           - [16, 16, 32]
55 |           - [32, 32, 64]
56 |           - [64, 64, 128]
57 |           - [128, 128]
58 |         encoder_paddings:
59 |           - [0, 0, 1]
60 |           - [0, 0, 1]
61 |           - [0, 0, [1, 1, 0]]
62 |           - [0, 0]
63 |         block_type: basicblock
64 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/baseline/transfusion/lidar/voxelnet-1600g-0xy1-0z20.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | voxel_max_points: 10
 8 | voxel_max_voxels: [120000, 120000]
 9 | 
10 | deterministic: True
11 | 
12 | samples_per_gpu: 6
13 | workers_per_gpu: 6
14 | 
15 | max_epochs: 20
16 | 
17 | optimizer:
18 |   type: AdamW
19 |   lr: 6.6e-05
20 |   weight_decay: 0.01
21 | 
22 | momentum_config:
23 |   policy: cyclic
24 |   cyclic_times: 1
25 |   step_ratio_up: 0.4
26 | 
27 | lr_config:
28 |   cyclic_times: 1
29 |   policy: cyclic
30 |   step_ratio_up: 0.4
31 | 
32 | optimizer_config:
33 |   grad_clip:
34 |     max_norm: 25
35 |     norm_type: 2
36 | 
37 | model:
38 |   encoders:
39 |     lidar:
40 |       voxelize:
41 |         max_num_points: ${voxel_max_points}
42 |         point_cloud_range: ${point_cloud_range}
43 |         voxel_size: ${voxel_size}
44 |         max_voxels: ${voxel_max_voxels}
45 |       backbone:
46 |         type: SparseEncoder
47 |         in_channels: ${use_dim}
48 |         sparse_shape: ${grid_size}
49 |         output_channels: 128
50 |         order:
51 |           - conv
52 |           - norm
53 |           - act
54 |         encoder_channels:
55 |           - [16, 16, 32]
56 |           - [32, 32, 64]
57 |           - [64, 64, 128]
58 |           - [128, 128]
59 |         encoder_paddings:
60 |           - [0, 0, 1]
61 |           - [0, 0, 1]
62 |           - [0, 0, [1, 1, 0]]
63 |           - [0, 0]
64 |         block_type: basicblock
65 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/gather_points/gather_points.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from . import gather_points_ext
 5 | 
 6 | 
 7 | class GatherPoints(Function):
 8 |     """Gather Points.
 9 | 
10 |     Gather points with given index.
11 |     """
12 | 
13 |     @staticmethod
14 |     def forward(ctx, features: torch.Tensor, indices: torch.Tensor) -> torch.Tensor:
15 |         """forward.
16 | 
17 |         Args:
18 |             features (Tensor): (B, C, N) features to gather.
19 |             indices (Tensor): (B, M) where M is the number of points.
20 | 
21 |         Returns:
22 |             Tensor: (B, C, M) where M is the number of points.
23 |         """
24 |         assert features.is_contiguous()
25 |         assert indices.is_contiguous()
26 | 
27 |         B, npoint = indices.size()
28 |         _, C, N = features.size()
29 |         output = torch.cuda.FloatTensor(B, C, npoint)
30 | 
31 |         gather_points_ext.gather_points_wrapper(B, C, N, npoint, features, indices, output)
32 | 
33 |         ctx.for_backwards = (indices, C, N)
34 |         ctx.mark_non_differentiable(indices)
35 |         return output
36 | 
37 |     @staticmethod
38 |     def backward(ctx, grad_out):
39 |         idx, C, N = ctx.for_backwards
40 |         B, npoint = idx.size()
41 | 
42 |         grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
43 |         grad_out_data = grad_out.data.contiguous()
44 |         gather_points_ext.gather_points_grad_wrapper(
45 |             B, C, N, npoint, grad_out_data, idx, grad_features.data
46 |         )
47 |         return grad_features, None
48 | 
49 | 
50 | gather_points = GatherPoints.apply
51 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def normalize_bbox(bboxes, pc_range):
 5 | 
 6 |     cx = bboxes[..., 0:1]
 7 |     cy = bboxes[..., 1:2]
 8 |     cz = bboxes[..., 2:3]
 9 |     w = bboxes[..., 3:4].log()
10 |     l = bboxes[..., 4:5].log()
11 |     h = bboxes[..., 5:6].log()
12 | 
13 |     rot = bboxes[..., 6:7]
14 |     if bboxes.size(-1) > 7:
15 |         vx = bboxes[..., 7:8]
16 |         vy = bboxes[..., 8:9]
17 |         normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1)
18 |     else:
19 |         normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1)
20 |     return normalized_bboxes
21 | 
22 | 
23 | def denormalize_bbox(normalized_bboxes, pc_range):
24 |     # rotation
25 |     rot_sine = normalized_bboxes[..., 6:7]
26 | 
27 |     rot_cosine = normalized_bboxes[..., 7:8]
28 |     rot = torch.atan2(rot_sine, rot_cosine)
29 | 
30 |     # center in the bev
31 |     cx = normalized_bboxes[..., 0:1]
32 |     cy = normalized_bboxes[..., 1:2]
33 |     cz = normalized_bboxes[..., 4:5]
34 | 
35 |     # size
36 |     w = normalized_bboxes[..., 2:3]
37 |     l = normalized_bboxes[..., 3:4]
38 |     h = normalized_bboxes[..., 5:6]
39 | 
40 |     w = w.exp()
41 |     l = l.exp()
42 |     h = h.exp()
43 |     if normalized_bboxes.size(-1) > 8:
44 |         # velocity
45 |         vx = normalized_bboxes[:, 8:9]
46 |         vy = normalized_bboxes[:, 9:10]
47 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
48 |     else:
49 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
50 |     return denormalized_bboxes
51 | 


--------------------------------------------------------------------------------
/configs/osdar23/baseline/transfusion/lidar/voxelnet-1600g-0xy16-0z4-gtp15.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | voxel_max_points: 15
 8 | voxel_max_voxels: [200000, 200000]
 9 | out_size_factor: 8
10 | 
11 | samples_per_gpu: 6
12 | workers_per_gpu: 6
13 | 
14 | max_epochs: 20
15 | 
16 | augment_gt_paste:
17 |   max_epoch: 15
18 | 
19 | optimizer:
20 |   type: AdamW
21 |   lr: 6.6e-05
22 |   weight_decay: 0.01
23 | 
24 | momentum_config:
25 |   policy: cyclic
26 |   cyclic_times: 1
27 |   step_ratio_up: 0.4
28 | 
29 | lr_config:
30 |   cyclic_times: 1
31 |   policy: cyclic
32 |   step_ratio_up: 0.4
33 | 
34 | optimizer_config:
35 |   grad_clip:
36 |     max_norm: 25
37 |     norm_type: 2
38 | 
39 | model:
40 |   encoders:
41 |     lidar:
42 |       voxelize:
43 |         max_num_points: ${voxel_max_points}
44 |         point_cloud_range: ${point_cloud_range}
45 |         voxel_size: ${voxel_size}
46 |         max_voxels: ${voxel_max_voxels}
47 |       backbone:
48 |         type: SparseEncoder
49 |         in_channels: ${use_dim}
50 |         sparse_shape: ${grid_size}
51 |         output_channels: 128
52 |         order:
53 |           - conv
54 |           - norm
55 |           - act
56 |         encoder_channels:
57 |           - [16, 16, 32]
58 |           - [32, 32, 64]
59 |           - [64, 64, 128]
60 |           - [128, 128]
61 |         encoder_paddings:
62 |           - [0, 0, 1]
63 |           - [0, 0, 1]
64 |           - [0, 0, [1, 1, 0]]
65 |           - [0, 0]
66 |         block_type: basicblock
67 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/baseline/transfusion/lidar/voxelnet-1600g-0xy1-0z20-gtp15.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | voxel_max_points: 10
 8 | voxel_max_voxels: [120000, 120000]
 9 | 
10 | deterministic: True
11 | 
12 | samples_per_gpu: 6
13 | workers_per_gpu: 6
14 | 
15 | max_epochs: 20
16 | 
17 | augment_gt_paste:
18 |   max_epoch: 15
19 | 
20 | optimizer:
21 |   type: AdamW
22 |   lr: 6.6e-05
23 |   weight_decay: 0.01
24 | 
25 | momentum_config:
26 |   policy: cyclic
27 |   cyclic_times: 1
28 |   step_ratio_up: 0.4
29 | 
30 | lr_config:
31 |   cyclic_times: 1
32 |   policy: cyclic
33 |   step_ratio_up: 0.4
34 | 
35 | optimizer_config:
36 |   grad_clip:
37 |     max_norm: 25
38 |     norm_type: 2
39 | 
40 | model:
41 |   encoders:
42 |     lidar:
43 |       voxelize:
44 |         max_num_points: ${voxel_max_points}
45 |         point_cloud_range: ${point_cloud_range}
46 |         voxel_size: ${voxel_size}
47 |         max_voxels: ${voxel_max_voxels}
48 |       backbone:
49 |         type: SparseEncoder
50 |         in_channels: ${use_dim}
51 |         sparse_shape: ${grid_size}
52 |         output_channels: 128
53 |         order:
54 |           - conv
55 |           - norm
56 |           - act
57 |         encoder_channels:
58 |           - [16, 16, 32]
59 |           - [32, 32, 64]
60 |           - [64, 64, 128]
61 |           - [128, 128]
62 |         encoder_paddings:
63 |           - [0, 0, 1]
64 |           - [0, 0, 1]
65 |           - [0, 0, [1, 1, 0]]
66 |           - [0, 0]
67 |         block_type: basicblock
68 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: TBEVFusion
 3 |   max_queue_length: ${temporal_cache_length}
 4 |   encoders: null
 5 |   fuser: null
 6 |   temporal:
 7 |     type: ConvGRU
 8 |     in_channels: 256
 9 |     hidden_channels: [256]
10 |     kernel_size: [3, 3]
11 |     bias: true
12 |   heads:
13 |     map: null
14 | 
15 | temporal_mode: true
16 | val_online_mode: true
17 | test_online_mode: true
18 | 
19 | temporal_aware_gt_paste: true
20 | apply_same_aug_to_seq: true
21 | 
22 | augment_gt_paste: # default
23 |   apply_same_aug_to_seq: true
24 |   apply_collision_check: true
25 |   apply_temporal_forward: true
26 |   cls_rot_lim:
27 |     lidar__cuboid__buffer_stop:
28 |       - normal
29 |       - 0.0
30 |       - 0.0
31 |     lidar__cuboid__catenary_pole:
32 |       - normal
33 |       - 0.0
34 |       - 0.16490484576995193
35 |     lidar__cuboid__person:
36 |       - normal
37 |       - 0.0
38 |       - 0.16723854104003127
39 |     lidar__cuboid__road_vehicle:
40 |       - normal
41 |       - 0.0
42 |       - 0.07090241143317916
43 |     lidar__cuboid__signal_pole:
44 |       - normal
45 |       - 0.0
46 |       - 0.06715749315684862
47 |   cls_trans_lim:
48 |     lidar__cuboid__buffer_stop:
49 |       - uniform
50 |       - 0.0
51 |       - 0.0
52 |     lidar__cuboid__catenary_pole:
53 |       - uniform
54 |       - 0.0
55 |       - 0.837096823556553
56 |     lidar__cuboid__person:
57 |       - uniform
58 |       - 0.0
59 |       - 2.3655256268321976
60 |     lidar__cuboid__road_vehicle:
61 |       - uniform
62 |       - 0.0
63 |       - 0.8333046350100968
64 |     lidar__cuboid__signal_pole:
65 |       - uniform
66 |       - 0.0
67 |       - 3.3953008285651545
68 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: TBEVFusion
 3 |   max_queue_length: ${temporal_cache_length}
 4 |   encoders: null
 5 |   fuser: null
 6 |   temporal:
 7 |     type: ConvLSTM
 8 |     in_channels: 256
 9 |     hidden_channels: [256]
10 |     kernel_size: [3, 3]
11 |     bias: true
12 |   heads:
13 |     map: null
14 | 
15 | temporal_mode: true
16 | val_online_mode: true
17 | test_online_mode: true
18 | 
19 | apply_same_aug_to_seq: true
20 | 
21 | temporal_aware_gt_paste: true
22 | apply_same_aug_to_seq: true
23 | 
24 | augment_gt_paste: # default
25 |   apply_same_aug_to_seq: true
26 |   apply_collision_check: true
27 |   apply_temporal_forward: true
28 |   cls_rot_lim:
29 |     lidar__cuboid__buffer_stop:
30 |       - normal
31 |       - 0.0
32 |       - 0.0
33 |     lidar__cuboid__catenary_pole:
34 |       - normal
35 |       - 0.0
36 |       - 0.16490484576995193
37 |     lidar__cuboid__person:
38 |       - normal
39 |       - 0.0
40 |       - 0.16723854104003127
41 |     lidar__cuboid__road_vehicle:
42 |       - normal
43 |       - 0.0
44 |       - 0.07090241143317916
45 |     lidar__cuboid__signal_pole:
46 |       - normal
47 |       - 0.0
48 |       - 0.06715749315684862
49 |   cls_trans_lim:
50 |     lidar__cuboid__buffer_stop:
51 |       - uniform
52 |       - 0.0
53 |       - 0.0
54 |     lidar__cuboid__catenary_pole:
55 |       - uniform
56 |       - 0.0
57 |       - 0.837096823556553
58 |     lidar__cuboid__person:
59 |       - uniform
60 |       - 0.0
61 |       - 2.3655256268321976
62 |     lidar__cuboid__road_vehicle:
63 |       - uniform
64 |       - 0.0
65 |       - 0.8333046350100968
66 |     lidar__cuboid__signal_pole:
67 |       - uniform
68 |       - 0.0
69 |       - 3.3953008285651545
70 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/ball_query/ball_query.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from . import ball_query_ext
 5 | 
 6 | 
 7 | class BallQuery(Function):
 8 |     """Ball Query.
 9 | 
10 |     Find nearby points in spherical space.
11 |     """
12 | 
13 |     @staticmethod
14 |     def forward(
15 |         ctx,
16 |         min_radius: float,
17 |         max_radius: float,
18 |         sample_num: int,
19 |         xyz: torch.Tensor,
20 |         center_xyz: torch.Tensor,
21 |     ) -> torch.Tensor:
22 |         """forward.
23 | 
24 |         Args:
25 |             min_radius (float): minimum radius of the balls.
26 |             max_radius (float): maximum radius of the balls.
27 |             sample_num (int): maximum number of features in the balls.
28 |             xyz (Tensor): (B, N, 3) xyz coordinates of the features.
29 |             center_xyz (Tensor): (B, npoint, 3) centers of the ball query.
30 | 
31 |         Returns:
32 |             Tensor: (B, npoint, nsample) tensor with the indicies of
33 |                 the features that form the query balls.
34 |         """
35 |         assert center_xyz.is_contiguous()
36 |         assert xyz.is_contiguous()
37 |         assert min_radius < max_radius
38 | 
39 |         B, N, _ = xyz.size()
40 |         npoint = center_xyz.size(1)
41 |         idx = torch.cuda.IntTensor(B, npoint, sample_num).zero_()
42 | 
43 |         ball_query_ext.ball_query_wrapper(
44 |             B, N, npoint, min_radius, max_radius, sample_num, center_xyz, xyz, idx
45 |         )
46 |         ctx.mark_non_differentiable(idx)
47 |         return idx
48 | 
49 |     @staticmethod
50 |     def backward(ctx, a=None):
51 |         return None, None, None, None
52 | 
53 | 
54 | ball_query = BallQuery.apply
55 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 15
14 | voxel_max_voxels: [200000, 200000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | optimizer:
22 |   type: AdamW
23 |   lr: 6.6e-05
24 |   weight_decay: 0.01
25 |   paramwise_cfg:
26 |     custom_keys:
27 |       encoders.lidar.backbone:
28 |         lr_mult: 0.0
29 | 
30 | momentum_config:
31 |   policy: cyclic
32 |   cyclic_times: 1
33 |   step_ratio_up: 0.4
34 | 
35 | lr_config:
36 |   policy: CosineAnnealing
37 |   warmup: linear
38 |   warmup_iters: 500
39 |   warmup_ratio: 0.33333333
40 |   min_lr_ratio: 1.0e-3
41 | 
42 | optimizer_config:
43 |   grad_clip:
44 |     max_norm: 25
45 |     norm_type: 2
46 | 
47 | model:
48 |   encoders:
49 |     lidar:
50 |       voxelize:
51 |         max_num_points: ${voxel_max_points}
52 |         point_cloud_range: ${point_cloud_range}
53 |         voxel_size: ${voxel_size}
54 |         max_voxels: ${voxel_max_voxels}
55 |       backbone:
56 |         type: SparseEncoder
57 |         in_channels: ${use_dim}
58 |         sparse_shape: ${grid_size}
59 |         output_channels: 128
60 |         order:
61 |           - conv
62 |           - norm
63 |           - act
64 |         encoder_channels:
65 |           - [16, 16, 32]
66 |           - [32, 32, 64]
67 |           - [64, 64, 128]
68 |           - [128, 128]
69 |         encoder_paddings:
70 |           - [0, 0, 1]
71 |           - [0, 0, 1]
72 |           - [0, 0, [1, 1, 0]]
73 |           - [0, 0]
74 |         block_type: basicblock
75 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 15
14 | voxel_max_voxels: [200000, 200000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | optimizer:
22 |   type: AdamW
23 |   lr: 6.6e-05
24 |   weight_decay: 0.01
25 |   paramwise_cfg:
26 |     custom_keys:
27 |       encoders.lidar.backbone:
28 |         lr_mult: 0.0
29 | 
30 | momentum_config:
31 |   policy: cyclic
32 |   cyclic_times: 1
33 |   step_ratio_up: 0.4
34 | 
35 | lr_config:
36 |   policy: CosineAnnealing
37 |   warmup: linear
38 |   warmup_iters: 500
39 |   warmup_ratio: 0.33333333
40 |   min_lr_ratio: 1.0e-3
41 | 
42 | optimizer_config:
43 |   grad_clip:
44 |     max_norm: 25
45 |     norm_type: 2
46 | 
47 | model:
48 |   encoders:
49 |     lidar:
50 |       voxelize:
51 |         max_num_points: ${voxel_max_points}
52 |         point_cloud_range: ${point_cloud_range}
53 |         voxel_size: ${voxel_size}
54 |         max_voxels: ${voxel_max_voxels}
55 |       backbone:
56 |         type: SparseEncoder
57 |         in_channels: ${use_dim}
58 |         sparse_shape: ${grid_size}
59 |         output_channels: 128
60 |         order:
61 |           - conv
62 |           - norm
63 |           - act
64 |         encoder_channels:
65 |           - [16, 16, 32]
66 |           - [32, 32, 64]
67 |           - [64, 64, 128]
68 |           - [128, 128]
69 |         encoder_paddings:
70 |           - [0, 0, 1]
71 |           - [0, 0, 1]
72 |           - [0, 0, [1, 1, 0]]
73 |           - [0, 0]
74 |         block_type: basicblock
75 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: TBEVFusion
 3 |   max_queue_length: ${temporal_cache_length}
 4 |   encoders: null
 5 |   fuser: null
 6 |   temporal:
 7 |     type: ConvGRU
 8 |     in_channels: 256
 9 |     hidden_channels: [256]
10 |     kernel_size: [3, 3]
11 |     bias: true
12 |   heads:
13 |     map: null
14 | 
15 | temporal_mode: true
16 | val_online_mode: true
17 | test_online_mode: true
18 | 
19 | apply_same_aug_to_seq: true
20 | 
21 | augment_gt_paste: # default
22 |   apply_same_aug_to_seq: true
23 |   apply_collision_check: true
24 |   apply_temporal_forward: true
25 |   sampler:
26 |     cls_trans_lim:
27 |       CAR: ["uniform", 0.0, 0.21142457549557347]
28 |       TRAILER: ["uniform", 0.0, 2.061603454258997]
29 |       TRUCK: ["uniform", 0.0, 0.19225818659676847]
30 |       VAN: ["uniform", 0.0, 0.12440957907943972]
31 |       # PEDESTRIAN: ["uniform", 0.0, 1.9631158717540234] WE DO NOT SAMPLE PEDESTRIANS
32 |       PEDESTRIAN: ["uniform", 0.0, 0.0]
33 |       BUS: ["uniform", 0.0, 1.1107448011494194]
34 |       # MOTORCYCLE: ["uniform", 0.0, 0.25374656183458383] WE DO NOT SAMPLE MOTORCYCLES
35 |       MOTORCYCLE: ["uniform", 0.0, 0.0]
36 |       BICYCLE: ["uniform", 0.0, 0.5918484046343995]
37 |       EMERGENCY_VEHICLE: ["uniform", 0.0, 0.6620038588093282]
38 |     cls_rot_lim:
39 |       CAR: ["normal", 0.0, 0.08516856382385488]
40 |       TRAILER: ["normal", 0.0, 0.19199153770261218]
41 |       TRUCK: ["normal", 0.0, 0.12290075954655998]
42 |       VAN: ["normal", 0.0, 0.18803376690181833]
43 |       # PEDESTRIAN: ["normal", 0.0, 0.2227474538090619] WE DO NOT SAMPLE PEDESTRIANS
44 |       PEDESTRIAN: ["normal", 0.0, 0.0]
45 |       BUS: ["normal", 0.0, 0.19959521881110118]
46 |       # MOTORCYCLE: ["normal", 0.0, 0.018643425075835468] WE DO NOT SAMPLE MOTORCYCLES
47 |       MOTORCYCLE: ["normal", 0.0, 0.0]
48 |       BICYCLE: ["normal", 0.0, 0.21636682539331192]
49 |       EMERGENCY_VEHICLE: ["normal", 0.0, 0.1328186144435953]
50 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: TBEVFusion
 3 |   max_queue_length: ${temporal_cache_length}
 4 |   encoders: null
 5 |   fuser: null
 6 |   temporal:
 7 |     type: ConvLSTM
 8 |     in_channels: 256
 9 |     hidden_channels: [256]
10 |     kernel_size: [3, 3]
11 |     bias: true
12 |   heads:
13 |     map: null
14 | 
15 | temporal_mode: true
16 | val_online_mode: true
17 | test_online_mode: true
18 | 
19 | apply_same_aug_to_seq: true
20 | 
21 | augment_gt_paste: # default
22 |   apply_same_aug_to_seq: true
23 |   apply_collision_check: true
24 |   apply_temporal_forward: true
25 |   sampler:
26 |     cls_trans_lim:
27 |       CAR: ["uniform", 0.0, 0.21142457549557347]
28 |       TRAILER: ["uniform", 0.0, 2.061603454258997]
29 |       TRUCK: ["uniform", 0.0, 0.19225818659676847]
30 |       VAN: ["uniform", 0.0, 0.12440957907943972]
31 |       # PEDESTRIAN: ["uniform", 0.0, 1.9631158717540234] WE DO NOT SAMPLE PEDESTRIANS
32 |       PEDESTRIAN: ["uniform", 0.0, 0.0]
33 |       BUS: ["uniform", 0.0, 1.1107448011494194]
34 |       # MOTORCYCLE: ["uniform", 0.0, 0.25374656183458383] WE DO NOT SAMPLE MOTORCYCLES
35 |       MOTORCYCLE: ["uniform", 0.0, 0.0]
36 |       BICYCLE: ["uniform", 0.0, 0.5918484046343995]
37 |       EMERGENCY_VEHICLE: ["uniform", 0.0, 0.6620038588093282]
38 |     cls_rot_lim:
39 |       CAR: ["normal", 0.0, 0.08516856382385488]
40 |       TRAILER: ["normal", 0.0, 0.19199153770261218]
41 |       TRUCK: ["normal", 0.0, 0.12290075954655998]
42 |       VAN: ["normal", 0.0, 0.18803376690181833]
43 |       # PEDESTRIAN: ["normal", 0.0, 0.2227474538090619] WE DO NOT SAMPLE PEDESTRIANS
44 |       PEDESTRIAN: ["normal", 0.0, 0.0]
45 |       BUS: ["normal", 0.0, 0.19959521881110118]
46 |       # MOTORCYCLE: ["normal", 0.0, 0.018643425075835468] WE DO NOT SAMPLE MOTORCYCLES
47 |       MOTORCYCLE: ["normal", 0.0, 0.0]
48 |       BICYCLE: ["normal", 0.0, 0.21636682539331192]
49 |       EMERGENCY_VEHICLE: ["normal", 0.0, 0.1328186144435953]
50 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/ball_query/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | // Modified from
 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp
 3 | 
 4 | #include <THC/THC.h>
 5 | #include <cuda.h>
 6 | #include <cuda_runtime_api.h>
 7 | #include <torch/extension.h>
 8 | #include <torch/serialize/tensor.h>
 9 | 
10 | #include <vector>
11 | 
12 | extern THCState *state;
13 | 
14 | #define CHECK_CUDA(x) \
15 |   TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
16 | #define CHECK_CONTIGUOUS(x) \
17 |   TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
18 | #define CHECK_INPUT(x) \
19 |   CHECK_CUDA(x);       \
20 |   CHECK_CONTIGUOUS(x)
21 | 
22 | int ball_query_wrapper(int b, int n, int m, float min_radius, float max_radius, int nsample,
23 |                        at::Tensor new_xyz_tensor, at::Tensor xyz_tensor,
24 |                        at::Tensor idx_tensor);
25 | 
26 | void ball_query_kernel_launcher(int b, int n, int m, float min_radius, float max_radius,
27 |                                 int nsample, const float *xyz, const float *new_xyz,
28 |                                 int *idx, cudaStream_t stream);
29 | 
30 | int ball_query_wrapper(int b, int n, int m, float min_radius, float max_radius, int nsample,
31 |                        at::Tensor new_xyz_tensor, at::Tensor xyz_tensor,
32 |                        at::Tensor idx_tensor) {
33 |   CHECK_INPUT(new_xyz_tensor);
34 |   CHECK_INPUT(xyz_tensor);
35 |   const float *new_xyz = new_xyz_tensor.data_ptr<float>();
36 |   const float *xyz = xyz_tensor.data_ptr<float>();
37 |   int *idx = idx_tensor.data_ptr<int>();
38 | 
39 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
40 |   ball_query_kernel_launcher(b, n, m, min_radius, max_radius,
41 |                              nsample, new_xyz, xyz, idx, stream);
42 |   return 1;
43 | }
44 | 
45 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
46 |   m.def("ball_query_wrapper", &ball_query_wrapper, "ball_query_wrapper");
47 | }
48 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 15
14 | voxel_max_voxels: [200000, 200000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 2
 8 | temporal_cache_length: 2
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 0
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 2
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 4
 8 | temporal_cache_length: 4
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 15
14 | voxel_max_voxels: [200000, 200000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql2-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 2
 8 | temporal_cache_length: 2
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 0
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt2-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 2
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql4-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 4
 8 | temporal_cache_length: 4
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   lr: 6.6e-05
28 |   weight_decay: 0.01
29 |   paramwise_cfg:
30 |     custom_keys:
31 |       encoders.lidar.backbone:
32 |         lr_mult: 0.0
33 | 
34 | momentum_config:
35 |   policy: cyclic
36 |   cyclic_times: 1
37 |   step_ratio_up: 0.4
38 | 
39 | lr_config:
40 |   policy: CosineAnnealing
41 |   warmup: linear
42 |   warmup_iters: 500
43 |   warmup_ratio: 0.33333333
44 |   min_lr_ratio: 1.0e-3
45 | 
46 | optimizer_config:
47 |   grad_clip:
48 |     max_norm: 25
49 |     norm_type: 2
50 | 
51 | model:
52 |   encoders:
53 |     lidar:
54 |       voxelize:
55 |         max_num_points: ${voxel_max_points}
56 |         point_cloud_range: ${point_cloud_range}
57 |         voxel_size: ${voxel_size}
58 |         max_voxels: ${voxel_max_voxels}
59 |       backbone:
60 |         type: SparseEncoder
61 |         in_channels: ${use_dim}
62 |         sparse_shape: ${grid_size}
63 |         output_channels: 128
64 |         order:
65 |           - conv
66 |           - norm
67 |           - act
68 |         encoder_channels:
69 |           - [16, 16, 32]
70 |           - [32, 32, 64]
71 |           - [64, 64, 128]
72 |           - [128, 128]
73 |         encoder_paddings:
74 |           - [0, 0, 1]
75 |           - [0, 0, 1]
76 |           - [0, 0, [1, 1, 0]]
77 |           - [0, 0]
78 |         block_type: basicblock
79 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | apply_same_aug_to_seq: false
22 | 
23 | augment_gt_paste:
24 |   max_epoch: 3
25 |   apply_same_aug_to_seq: true
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | apply_same_aug_to_seq: false
22 | 
23 | augment_gt_paste:
24 |   max_epoch: 3
25 |   apply_same_aug_to_seq: true
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | reduce_beams: 16
 5 | 
 6 | voxel_size: [0.1, 0.1, 0.2]
 7 | grid_size: [1600, 1600, 51]
 8 | 
 9 | queue_length: 3
10 | temporal_cache_length: 3
11 | queue_range_threshold: 1
12 | 
13 | deterministic: True
14 | 
15 | voxel_max_points: 10
16 | voxel_max_voxels: [120000, 120000]
17 | 
18 | samples_per_gpu: 6
19 | workers_per_gpu: 6
20 | 
21 | max_epochs: 4
22 | 
23 | augment_gt_paste:
24 |   max_epoch: 3
25 |   apply_same_aug_to_seq: true
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | reduce_beams: 4
 5 | 
 6 | voxel_size: [0.1, 0.1, 0.2]
 7 | grid_size: [1600, 1600, 51]
 8 | 
 9 | queue_length: 3
10 | temporal_cache_length: 3
11 | queue_range_threshold: 1
12 | 
13 | deterministic: True
14 | 
15 | voxel_max_points: 10
16 | voxel_max_voxels: [120000, 120000]
17 | 
18 | samples_per_gpu: 6
19 | workers_per_gpu: 6
20 | 
21 | max_epochs: 4
22 | 
23 | augment_gt_paste:
24 |   max_epoch: 3
25 |   apply_same_aug_to_seq: true
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb16-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | reduce_beams: 16
 5 | 
 6 | voxel_size: [0.1, 0.1, 0.2]
 7 | grid_size: [1600, 1600, 51]
 8 | 
 9 | queue_length: 3
10 | temporal_cache_length: 3
11 | queue_range_threshold: 1
12 | 
13 | deterministic: True
14 | 
15 | voxel_max_points: 10
16 | voxel_max_voxels: [120000, 120000]
17 | 
18 | samples_per_gpu: 6
19 | workers_per_gpu: 6
20 | 
21 | max_epochs: 4
22 | 
23 | augment_gt_paste:
24 |   max_epoch: 3
25 |   apply_same_aug_to_seq: true
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-rb4-sameaugall-ql3-qrt1-gtp3-sameaug-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | reduce_beams: 4
 5 | 
 6 | voxel_size: [0.1, 0.1, 0.2]
 7 | grid_size: [1600, 1600, 51]
 8 | 
 9 | queue_length: 3
10 | temporal_cache_length: 3
11 | queue_range_threshold: 1
12 | 
13 | deterministic: True
14 | 
15 | voxel_max_points: 10
16 | voxel_max_voxels: [120000, 120000]
17 | 
18 | samples_per_gpu: 6
19 | workers_per_gpu: 6
20 | 
21 | max_epochs: 4
22 | 
23 | augment_gt_paste:
24 |   max_epoch: 3
25 |   apply_same_aug_to_seq: true
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     cls_rot_lim: null
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-trans-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     cls_rot_lim: null
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   lr: 6.6e-05
30 |   weight_decay: 0.01
31 |   paramwise_cfg:
32 |     custom_keys:
33 |       encoders.lidar.backbone:
34 |         lr_mult: 0.0
35 | 
36 | momentum_config:
37 |   policy: cyclic
38 |   cyclic_times: 1
39 |   step_ratio_up: 0.4
40 | 
41 | lr_config:
42 |   policy: CosineAnnealing
43 |   warmup: linear
44 |   warmup_iters: 500
45 |   warmup_ratio: 0.33333333
46 |   min_lr_ratio: 1.0e-3
47 | 
48 | optimizer_config:
49 |   grad_clip:
50 |     max_norm: 25
51 |     norm_type: 2
52 | 
53 | model:
54 |   encoders:
55 |     lidar:
56 |       voxelize:
57 |         max_num_points: ${voxel_max_points}
58 |         point_cloud_range: ${point_cloud_range}
59 |         voxel_size: ${voxel_size}
60 |         max_voxels: ${voxel_max_voxels}
61 |       backbone:
62 |         type: SparseEncoder
63 |         in_channels: ${use_dim}
64 |         sparse_shape: ${grid_size}
65 |         output_channels: 128
66 |         order:
67 |           - conv
68 |           - norm
69 |           - act
70 |         encoder_channels:
71 |           - [16, 16, 32]
72 |           - [32, 32, 64]
73 |           - [64, 64, 128]
74 |           - [128, 128]
75 |         encoder_paddings:
76 |           - [0, 0, 1]
77 |           - [0, 0, 1]
78 |           - [0, 0, [1, 1, 0]]
79 |           - [0, 0]
80 |         block_type: basicblock
81 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: false
24 |   sampler:
25 |     cls_trans_lim: null
26 |     cls_rot_lim: null
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: false
24 |   sampler:
25 |     cls_trans_lim: null
26 |     cls_rot_lim: null
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     cls_trans_lim: null
26 |     cls_rot_lim: null
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     cls_trans_lim: null
26 |     cls_rot_lim: null
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 15
14 | voxel_max_voxels: [200000, 200000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     reduce_points_by_distance:
26 |       prob: 0.5
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 0
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     reduce_points_by_distance:
26 |       prob: 0.5
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     reduce_points_by_distance:
26 |       prob: 0.5
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-6.0, -128.0, -3.0, 250.0, 128.0, 13.0]
 2 | post_center_range: [-11.0, -132.0, -5.0, 255.0, 132.0, 15.0]
 3 | 
 4 | voxel_size: [0.16, 0.16, 0.4]
 5 | grid_size: [1600, 1600, 41]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 15
14 | voxel_max_voxels: [200000, 200000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     reduce_points_by_distance:
26 |       prob: 0.5
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt0-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 0
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     reduce_points_by_distance:
26 |       prob: 0.5
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal-gru/transfusion/lidar/voxelnet-convlstm-1600g-0xy1-0z20-sameaugall-ql3-qrt1-gtp3-sameaug-rpd0p5-trans-rot-lfrz.yaml:
--------------------------------------------------------------------------------
 1 | point_cloud_range: [-20.0, -80.0, -10.0, 140.0, 80.0, 0.0]
 2 | post_center_range: [-25.0, -85.0, -12.0, 145.0, 85.0, 2.0]
 3 | 
 4 | voxel_size: [0.1, 0.1, 0.2]
 5 | grid_size: [1600, 1600, 51]
 6 | 
 7 | queue_length: 3
 8 | temporal_cache_length: 3
 9 | queue_range_threshold: 1
10 | 
11 | deterministic: True
12 | 
13 | voxel_max_points: 10
14 | voxel_max_voxels: [120000, 120000]
15 | 
16 | samples_per_gpu: 6
17 | workers_per_gpu: 6
18 | 
19 | max_epochs: 4
20 | 
21 | augment_gt_paste:
22 |   max_epoch: 3
23 |   apply_same_aug_to_seq: true
24 |   sampler:
25 |     reduce_points_by_distance:
26 |       prob: 0.5
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   lr: 6.6e-05
31 |   weight_decay: 0.01
32 |   paramwise_cfg:
33 |     custom_keys:
34 |       encoders.lidar.backbone:
35 |         lr_mult: 0.0
36 | 
37 | momentum_config:
38 |   policy: cyclic
39 |   cyclic_times: 1
40 |   step_ratio_up: 0.4
41 | 
42 | lr_config:
43 |   policy: CosineAnnealing
44 |   warmup: linear
45 |   warmup_iters: 500
46 |   warmup_ratio: 0.33333333
47 |   min_lr_ratio: 1.0e-3
48 | 
49 | optimizer_config:
50 |   grad_clip:
51 |     max_norm: 25
52 |     norm_type: 2
53 | 
54 | model:
55 |   encoders:
56 |     lidar:
57 |       voxelize:
58 |         max_num_points: ${voxel_max_points}
59 |         point_cloud_range: ${point_cloud_range}
60 |         voxel_size: ${voxel_size}
61 |         max_voxels: ${voxel_max_voxels}
62 |       backbone:
63 |         type: SparseEncoder
64 |         in_channels: ${use_dim}
65 |         sparse_shape: ${grid_size}
66 |         output_channels: 128
67 |         order:
68 |           - conv
69 |           - norm
70 |           - act
71 |         encoder_channels:
72 |           - [16, 16, 32]
73 |           - [32, 32, 64]
74 |           - [64, 64, 128]
75 |           - [128, 128]
76 |         encoder_paddings:
77 |           - [0, 0, 1]
78 |           - [0, 0, 1]
79 |           - [0, 0, [1, 1, 0]]
80 |           - [0, 0]
81 |         block_type: basicblock
82 | 


--------------------------------------------------------------------------------
/tools/convert_checkpoints_to_torchsparse.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | 
 4 | 
 5 | def main():
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument("ckpt_before", metavar="FILE", help="Original checkpoint.")
 8 |     parser.add_argument("ckpt_after", metavar="FILE", help="Converted checkpoint.")
 9 |     args, opts = parser.parse_known_args()
10 | 
11 |     ckpt_before = args.ckpt_before
12 |     ckpt_after = args.ckpt_after
13 | 
14 |     cp_old = torch.load(ckpt_before, map_location="cpu")
15 |     model = cp_old["state_dict"]
16 |     new_model = dict()
17 | 
18 |     for key in model:
19 |         if key.startswith("encoders.lidar.backbone") and ".bn." not in key:
20 |             is_sparseconv_weight = len(model[key].shape) > 1
21 |         else:
22 |             is_sparseconv_weight = False
23 |         if is_sparseconv_weight:
24 |             new_key = key.replace(".weight", ".kernel")
25 |             weights = model[key]
26 | 
27 |             kx, ky, kz, ic, oc = weights.shape
28 |             converted_weights = weights.reshape(-1, ic, oc)
29 |             if converted_weights.shape[0] == 1:
30 |                 converted_weights = converted_weights[0]
31 | 
32 |             elif converted_weights.shape[0] == 27:
33 |                 offsets = [list(range(kz)), list(range(ky)), list(range(kx))]
34 |                 kykx = ky * kx
35 |                 offsets = [
36 |                     (x * kykx + y * kx + z)
37 |                     for z in offsets[0]
38 |                     for y in offsets[1]
39 |                     for x in offsets[2]
40 |                 ]
41 |                 offsets = torch.tensor(
42 |                     offsets, dtype=torch.int64, device=converted_weights.device
43 |                 )
44 |                 converted_weights = converted_weights[offsets]
45 | 
46 |         else:
47 |             new_key = key
48 |             converted_weights = model[key]
49 |         new_model[new_key] = converted_weights
50 | 
51 |     cp_old["state_dict"] = new_model
52 |     torch.save(cp_old, ckpt_after)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | 
 3 | from mmcv.utils import Registry, build_from_cfg
 4 | from mmdet.datasets import DATASETS
 5 | from mmdet.datasets.builder import _concat_dataset
 6 | 
 7 | if platform.system() != "Windows":
 8 |     # https://github.com/pytorch/pytorch/issues/973
 9 |     import resource
10 | 
11 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
12 |     base_soft_limit = rlimit[0]
13 |     hard_limit = rlimit[1]
14 |     soft_limit = min(max(4096, base_soft_limit), hard_limit)
15 |     resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
16 | 
17 | OBJECTSAMPLERS = Registry("Object sampler")
18 | 
19 | 
20 | def build_dataset(cfg, default_args=None):
21 |     from mmdet.datasets.dataset_wrappers import (
22 |         ClassBalancedDataset,
23 |         ConcatDataset,
24 |         RepeatDataset,
25 |     )
26 | 
27 |     from mmdet3d.datasets.dataset_wrappers import CBGSDataset
28 | 
29 |     if isinstance(cfg, (list, tuple)):
30 |         dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
31 |     elif cfg["type"] == "ConcatDataset":
32 |         dataset = ConcatDataset(
33 |             [build_dataset(c, default_args) for c in cfg["datasets"]],
34 |             cfg.get("separate_eval", True),
35 |         )
36 |     elif cfg["type"] == "RepeatDataset":
37 |         dataset = RepeatDataset(build_dataset(cfg["dataset"], default_args), cfg["times"])
38 |     elif cfg["type"] == "ClassBalancedDataset":
39 |         dataset = ClassBalancedDataset(
40 |             build_dataset(cfg["dataset"], default_args), cfg["oversample_thr"]
41 |         )
42 |     elif cfg["type"] == "CBGSDataset":
43 |         if "temporal" in cfg:
44 |             dataset = CBGSDataset(build_dataset(cfg["dataset"], default_args), cfg["temporal"])
45 |         else:
46 |             dataset = CBGSDataset(build_dataset(cfg["dataset"], default_args))
47 |     elif isinstance(cfg.get("ann_file"), (list, tuple)):
48 |         dataset = _concat_dataset(cfg, default_args)
49 |     else:
50 |         dataset = build_from_cfg(cfg, DATASETS, default_args)
51 | 
52 |     return dataset
53 | 


--------------------------------------------------------------------------------
/mmdet3d/models/necks/lss.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | 
 7 | from mmdet.models import NECKS
 8 | 
 9 | __all__ = ["LSSFPN"]
10 | 
11 | 
12 | @NECKS.register_module()
13 | class LSSFPN(nn.Module):
14 |     def __init__(
15 |         self,
16 |         in_indices: Tuple[int, int],
17 |         in_channels: Tuple[int, int],
18 |         out_channels: int,
19 |         scale_factor: int = 1,
20 |     ) -> None:
21 |         super().__init__()
22 |         self.in_indices = in_indices
23 |         self.in_channels = in_channels
24 |         self.out_channels = out_channels
25 |         self.scale_factor = scale_factor
26 | 
27 |         self.fuse = nn.Sequential(
28 |             nn.Conv2d(in_channels[0] + in_channels[1], out_channels, 1, bias=False),
29 |             nn.BatchNorm2d(out_channels),
30 |             nn.ReLU(True),
31 |             nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
32 |             nn.BatchNorm2d(out_channels),
33 |             nn.ReLU(True),
34 |         )
35 |         if scale_factor > 1:
36 |             self.upsample = nn.Sequential(
37 |                 nn.Upsample(
38 |                     scale_factor=scale_factor,
39 |                     mode="bilinear",
40 |                     align_corners=True,
41 |                 ),
42 |                 nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
43 |                 nn.BatchNorm2d(out_channels),
44 |                 nn.ReLU(True),
45 |             )
46 | 
47 |     def forward(self, x: List[torch.Tensor]) -> torch.Tensor:
48 |         x1 = x[self.in_indices[0]]
49 |         assert x1.shape[1] == self.in_channels[0]
50 | 
51 |         x2 = x[self.in_indices[1]]
52 |         assert x2.shape[1] == self.in_channels[1]
53 | 
54 |         x1 = F.interpolate(
55 |             x1,
56 |             size=x2.shape[-2:],
57 |             mode="bilinear",
58 |             align_corners=True,
59 |         )
60 |         x = torch.cat([x1, x2], dim=1)
61 | 
62 |         x = self.fuse(x)
63 |         if self.scale_factor > 1:
64 |             x = self.upsample(x)
65 |         return x
66 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/interpolate/three_interpolate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from typing import Tuple
 4 | 
 5 | from . import interpolate_ext
 6 | 
 7 | 
 8 | class ThreeInterpolate(Function):
 9 |     @staticmethod
10 |     def forward(
11 |         ctx, features: torch.Tensor, indices: torch.Tensor, weight: torch.Tensor
12 |     ) -> torch.Tensor:
13 |         """Performs weighted linear interpolation on 3 features.
14 | 
15 |         Args:
16 |             features (Tensor): (B, C, M) Features descriptors to be
17 |                 interpolated from
18 |             indices (Tensor): (B, n, 3) index three nearest neighbors
19 |                 of the target features in features
20 |             weight (Tensor): (B, n, 3) weights of interpolation
21 | 
22 |         Returns:
23 |             Tensor: (B, C, N) tensor of the interpolated features
24 |         """
25 |         assert features.is_contiguous()
26 |         assert indices.is_contiguous()
27 |         assert weight.is_contiguous()
28 | 
29 |         B, c, m = features.size()
30 |         n = indices.size(1)
31 |         ctx.three_interpolate_for_backward = (indices, weight, m)
32 |         output = torch.cuda.FloatTensor(B, c, n)
33 | 
34 |         interpolate_ext.three_interpolate_wrapper(B, c, m, n, features, indices, weight, output)
35 |         return output
36 | 
37 |     @staticmethod
38 |     def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
39 |         """Backward of three interpolate.
40 | 
41 |         Args:
42 |             grad_out (Tensor): (B, C, N) tensor with gradients of outputs
43 | 
44 |         Returns:
45 |             Tensor: (B, C, M) tensor with gradients of features
46 |         """
47 |         idx, weight, m = ctx.three_interpolate_for_backward
48 |         B, c, n = grad_out.size()
49 | 
50 |         grad_features = torch.cuda.FloatTensor(B, c, m).zero_()
51 |         grad_out_data = grad_out.data.contiguous()
52 | 
53 |         interpolate_ext.three_interpolate_grad_wrapper(
54 |             B, c, n, m, grad_out_data, idx, weight, grad_features.data
55 |         )
56 |         return grad_features, None, None
57 | 
58 | 
59 | three_interpolate = ThreeInterpolate.apply
60 | 


--------------------------------------------------------------------------------
/tools/create_swint_checkpoint.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser, Namespace
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def get_args() -> Namespace:
 7 |     """
 8 |     Parse given arguments for create_swint_checkpoint function.
 9 | 
10 |     Returns:
11 |         Namespace: parsed arguments
12 |     """
13 |     parser = ArgumentParser()
14 | 
15 |     parser.add_argument("-m", type=str, required=True, help="path to pretrained swint")
16 |     parser.add_argument("-s", type=str, required=True, help="path to source model")
17 |     parser.add_argument("-t", type=str, required=True, help="path to save target model")
18 | 
19 |     return parser.parse_args()
20 | 
21 | 
22 | def convert_to_swint_pth(
23 |     pretrained_swint_path: str,
24 |     source_model_path: str,
25 |     target_save_path: str,
26 |     prefix: str = "encoders.camera.backbone",
27 | ) -> None:
28 |     pretrained_swint = torch.load(pretrained_swint_path, map_location=torch.device("cpu"))
29 |     source_model = torch.load(source_model_path, map_location=torch.device("cpu"))
30 | 
31 |     print("total keys in pretrained swint", len(pretrained_swint["state_dict"].keys()))
32 |     print("total keys in source model", len(source_model["state_dict"].keys()))
33 | 
34 |     common_keys = []
35 |     other_keys = []
36 | 
37 |     for x in source_model["state_dict"].keys():
38 |         if x.startswith(prefix):
39 |             common_keys.append(x)
40 |         else:
41 |             other_keys.append(x)
42 | 
43 |     print("total common keys", len(common_keys))
44 |     print("total other keys", len(other_keys))
45 | 
46 |     # create a new state dict
47 |     new_state_dict = {}
48 |     for key in common_keys:
49 |         new_key_name = key[len(prefix) + 1 :]
50 |         new_state_dict[new_key_name] = source_model["state_dict"][key]
51 | 
52 |     # assert that keys in pretrained_swint and new_state_dict are the same
53 |     for key in new_state_dict.keys():
54 |         assert key in pretrained_swint["state_dict"].keys(), "key not found in pretrained swint"
55 | 
56 |     print("total keys in new state dict", len(new_state_dict.keys()))
57 | 
58 |     # save the new state dict
59 |     pretrained_swint["state_dict"] = new_state_dict
60 |     torch.save(pretrained_swint, target_save_path)
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     args = get_args()
65 |     convert_to_swint_pth(
66 |         pretrained_swint_path=args.m,
67 |         source_model_path=args.s,
68 |         target_save_path=args.t,
69 |     )
70 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/iou3d/iou3d_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from . import iou3d_cuda
 4 | 
 5 | 
 6 | def boxes_iou_bev(boxes_a, boxes_b):
 7 |     """Calculate boxes IoU in the bird view.
 8 | 
 9 |     Args:
10 |         boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
11 |         boxes_b (torch.Tensor): Input boxes b with shape (N, 5).
12 | 
13 |     Returns:
14 |         ans_iou (torch.Tensor): IoU result with shape (M, N).
15 |     """
16 |     ans_iou = boxes_a.new_zeros(torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
17 | 
18 |     iou3d_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou)
19 | 
20 |     return ans_iou
21 | 
22 | 
23 | def nms_gpu(boxes, scores, thresh, pre_maxsize=None, post_max_size=None):
24 |     """Nms function with gpu implementation.
25 | 
26 |     Args:
27 |         boxes (torch.Tensor): Input boxes with the shape of [N, 5]
28 |             ([x1, y1, x2, y2, ry]).
29 |         scores (torch.Tensor): Scores of boxes with the shape of [N].
30 |         thresh (int): Threshold.
31 |         pre_maxsize (int): Max size of boxes before nms. Default: None.
32 |         post_maxsize (int): Max size of boxes after nms. Default: None.
33 | 
34 |     Returns:
35 |         torch.Tensor: Indexes after nms.
36 |     """
37 |     order = scores.sort(0, descending=True)[1]
38 | 
39 |     if pre_maxsize is not None:
40 |         order = order[:pre_maxsize]
41 |     boxes = boxes[order].contiguous()
42 | 
43 |     keep = torch.zeros(boxes.size(0), dtype=torch.long)
44 |     num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh, boxes.device.index)
45 |     keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
46 |     if post_max_size is not None:
47 |         keep = keep[:post_max_size]
48 |     return keep
49 | 
50 | 
51 | def nms_normal_gpu(boxes, scores, thresh):
52 |     """Normal non maximum suppression on GPU.
53 | 
54 |     Args:
55 |         boxes (torch.Tensor): Input boxes with shape (N, 5).
56 |         scores (torch.Tensor): Scores of predicted boxes with shape (N).
57 |         thresh (torch.Tensor): Threshold of non maximum suppression.
58 | 
59 |     Returns:
60 |         torch.Tensor: Remaining indices with scores in descending order.
61 |     """
62 |     order = scores.sort(0, descending=True)[1]
63 | 
64 |     boxes = boxes[order].contiguous()
65 | 
66 |     keep = torch.zeros(boxes.size(0), dtype=torch.long)
67 |     num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh, boxes.device.index)
68 |     return order[keep[:num_out].cuda(boxes.device)].contiguous()
69 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/knn/knn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from . import knn_ext
 5 | 
 6 | 
 7 | class KNN(Function):
 8 |     r"""KNN (CUDA) based on heap data structure.
 9 |     Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
10 |     scene_seg/lib/pointops/src/knnquery_heap>`_.
11 | 
12 |     Find k-nearest points.
13 |     """
14 | 
15 |     @staticmethod
16 |     def forward(
17 |         ctx, k: int, xyz: torch.Tensor, center_xyz: torch.Tensor = None, transposed: bool = False
18 |     ) -> torch.Tensor:
19 |         """Forward.
20 | 
21 |         Args:
22 |             k (int): number of nearest neighbors.
23 |             xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N).
24 |                 xyz coordinates of the features.
25 |             center_xyz (Tensor): (B, npoint, 3) if transposed == False,
26 |                 else (B, 3, npoint). centers of the knn query.
27 |             transposed (bool): whether the input tensors are transposed.
28 |                 defaults to False. Should not expicitly use this keyword
29 |                 when calling knn (=KNN.apply), just add the fourth param.
30 | 
31 |         Returns:
32 |             Tensor: (B, k, npoint) tensor with the indicies of
33 |                 the features that form k-nearest neighbours.
34 |         """
35 |         assert k > 0
36 | 
37 |         if center_xyz is None:
38 |             center_xyz = xyz
39 | 
40 |         if transposed:
41 |             xyz = xyz.transpose(2, 1).contiguous()
42 |             center_xyz = center_xyz.transpose(2, 1).contiguous()
43 | 
44 |         assert xyz.is_contiguous()  # [B, N, 3]
45 |         assert center_xyz.is_contiguous()  # [B, npoint, 3]
46 | 
47 |         center_xyz_device = center_xyz.get_device()
48 |         assert (
49 |             center_xyz_device == xyz.get_device()
50 |         ), "center_xyz and xyz should be put on the same device"
51 |         if torch.cuda.current_device() != center_xyz_device:
52 |             torch.cuda.set_device(center_xyz_device)
53 | 
54 |         B, npoint, _ = center_xyz.shape
55 |         N = xyz.shape[1]
56 | 
57 |         idx = center_xyz.new_zeros((B, npoint, k)).int()
58 |         dist2 = center_xyz.new_zeros((B, npoint, k)).float()
59 | 
60 |         knn_ext.knn_wrapper(B, N, npoint, k, xyz, center_xyz, idx, dist2)
61 |         # idx shape to [B, k, npoint]
62 |         idx = idx.transpose(2, 1).contiguous()
63 |         ctx.mark_non_differentiable(idx)
64 |         return idx
65 | 
66 |     @staticmethod
67 |     def backward(ctx, a=None):
68 |         return None, None, None
69 | 
70 | 
71 | knn = KNN.apply
72 | 


--------------------------------------------------------------------------------
/mmdet3d/models/vtransforms/lss.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | from mmcv.runner import force_fp32
 4 | from torch import nn
 5 | 
 6 | from mmdet3d.models.builder import VTRANSFORMS
 7 | 
 8 | from .base import BaseTransform
 9 | 
10 | __all__ = ["LSSTransform"]
11 | 
12 | 
13 | @VTRANSFORMS.register_module()
14 | class LSSTransform(BaseTransform):
15 |     def __init__(
16 |         self,
17 |         in_channels: int,
18 |         out_channels: int,
19 |         image_size: Tuple[int, int],
20 |         feature_size: Tuple[int, int],
21 |         xbound: Tuple[float, float, float],
22 |         ybound: Tuple[float, float, float],
23 |         zbound: Tuple[float, float, float],
24 |         dbound: Tuple[float, float, float],
25 |         downsample: int = 1,
26 |     ) -> None:
27 |         super().__init__(
28 |             in_channels=in_channels,
29 |             out_channels=out_channels,
30 |             image_size=image_size,
31 |             feature_size=feature_size,
32 |             xbound=xbound,
33 |             ybound=ybound,
34 |             zbound=zbound,
35 |             dbound=dbound,
36 |         )
37 |         self.depthnet = nn.Conv2d(in_channels, self.D + self.C, 1)
38 |         if downsample > 1:
39 |             assert downsample == 2, downsample
40 |             self.downsample = nn.Sequential(
41 |                 nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
42 |                 nn.BatchNorm2d(out_channels),
43 |                 nn.ReLU(True),
44 |                 nn.Conv2d(
45 |                     out_channels,
46 |                     out_channels,
47 |                     3,
48 |                     stride=downsample,
49 |                     padding=1,
50 |                     bias=False,
51 |                 ),
52 |                 nn.BatchNorm2d(out_channels),
53 |                 nn.ReLU(True),
54 |                 nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
55 |                 nn.BatchNorm2d(out_channels),
56 |                 nn.ReLU(True),
57 |             )
58 |         else:
59 |             self.downsample = nn.Identity()
60 | 
61 |     @force_fp32()
62 |     def get_cam_feats(self, x):
63 |         B, N, C, fH, fW = x.shape
64 | 
65 |         x = x.view(B * N, C, fH, fW)
66 | 
67 |         x = self.depthnet(x)
68 |         depth = x[:, : self.D].softmax(dim=1)
69 |         x = depth.unsqueeze(1) * x[:, self.D : (self.D + self.C)].unsqueeze(2)
70 | 
71 |         x = x.view(B, N, self.C, self.D, fH, fW)
72 |         x = x.permute(0, 1, 3, 4, 5, 2)
73 |         return x
74 | 
75 |     def forward(self, *args, **kwargs):
76 |         x = super().forward(*args, **kwargs)
77 |         x = self.downsample(x)
78 |         return x
79 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/gather_points/src/gather_points.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/cuda/CUDAContext.h>
 2 | #include <THC/THC.h>
 3 | #include <torch/extension.h>
 4 | #include <torch/serialize/tensor.h>
 5 | 
 6 | #include <vector>
 7 | 
 8 | extern THCState *state;
 9 | 
10 | int gather_points_wrapper(int b, int c, int n, int npoints,
11 |                           at::Tensor points_tensor, at::Tensor idx_tensor,
12 |                           at::Tensor out_tensor);
13 | 
14 | void gather_points_kernel_launcher(int b, int c, int n, int npoints,
15 |                                    const float *points, const int *idx,
16 |                                    float *out, cudaStream_t stream);
17 | 
18 | int gather_points_grad_wrapper(int b, int c, int n, int npoints,
19 |                                at::Tensor grad_out_tensor,
20 |                                at::Tensor idx_tensor,
21 |                                at::Tensor grad_points_tensor);
22 | 
23 | void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints,
24 |                                         const float *grad_out, const int *idx,
25 |                                         float *grad_points,
26 |                                         cudaStream_t stream);
27 | 
28 | int gather_points_wrapper(int b, int c, int n, int npoints,
29 |                           at::Tensor points_tensor, at::Tensor idx_tensor,
30 |                           at::Tensor out_tensor) {
31 |   const float *points = points_tensor.data_ptr<float>();
32 |   const int *idx = idx_tensor.data_ptr<int>();
33 |   float *out = out_tensor.data_ptr<float>();
34 | 
35 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
36 |   gather_points_kernel_launcher(b, c, n, npoints, points, idx, out, stream);
37 |   return 1;
38 | }
39 | 
40 | int gather_points_grad_wrapper(int b, int c, int n, int npoints,
41 |                                at::Tensor grad_out_tensor,
42 |                                at::Tensor idx_tensor,
43 |                                at::Tensor grad_points_tensor) {
44 |   const float *grad_out = grad_out_tensor.data_ptr<float>();
45 |   const int *idx = idx_tensor.data_ptr<int>();
46 |   float *grad_points = grad_points_tensor.data_ptr<float>();
47 | 
48 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
49 |   gather_points_grad_kernel_launcher(b, c, n, npoints, grad_out, idx,
50 |                                      grad_points, stream);
51 |   return 1;
52 | }
53 | 
54 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
55 |   m.def("gather_points_wrapper", &gather_points_wrapper,
56 |         "gather_points_wrapper");
57 |   m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper,
58 |         "gather_points_grad_wrapper");
59 | }
60 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/furthest_point_sample/furthest_point_sample.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from . import furthest_point_sample_ext
 5 | 
 6 | 
 7 | class FurthestPointSampling(Function):
 8 |     """Furthest Point Sampling.
 9 | 
10 |     Uses iterative furthest point sampling to select a set of features whose
11 |     corresponding points have the furthest distance.
12 |     """
13 | 
14 |     @staticmethod
15 |     def forward(ctx, points_xyz: torch.Tensor, num_points: int) -> torch.Tensor:
16 |         """forward.
17 | 
18 |         Args:
19 |             points_xyz (Tensor): (B, N, 3) where N > num_points.
20 |             num_points (int): Number of points in the sampled set.
21 | 
22 |         Returns:
23 |              Tensor: (B, num_points) indices of the sampled points.
24 |         """
25 |         assert points_xyz.is_contiguous()
26 | 
27 |         B, N = points_xyz.size()[:2]
28 |         output = torch.cuda.IntTensor(B, num_points)
29 |         temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
30 | 
31 |         furthest_point_sample_ext.furthest_point_sampling_wrapper(
32 |             B, N, num_points, points_xyz, temp, output
33 |         )
34 |         ctx.mark_non_differentiable(output)
35 |         return output
36 | 
37 |     @staticmethod
38 |     def backward(xyz, a=None):
39 |         return None, None
40 | 
41 | 
42 | class FurthestPointSamplingWithDist(Function):
43 |     """Furthest Point Sampling With Distance.
44 | 
45 |     Uses iterative furthest point sampling to select a set of features whose
46 |     corresponding points have the furthest distance.
47 |     """
48 | 
49 |     @staticmethod
50 |     def forward(ctx, points_dist: torch.Tensor, num_points: int) -> torch.Tensor:
51 |         """forward.
52 | 
53 |         Args:
54 |             points_dist (Tensor): (B, N, N) Distance between each point pair.
55 |             num_points (int): Number of points in the sampled set.
56 | 
57 |         Returns:
58 |              Tensor: (B, num_points) indices of the sampled points.
59 |         """
60 |         assert points_dist.is_contiguous()
61 | 
62 |         B, N, _ = points_dist.size()
63 |         output = points_dist.new_zeros([B, num_points], dtype=torch.int32)
64 |         temp = points_dist.new_zeros([B, N]).fill_(1e10)
65 | 
66 |         furthest_point_sample_ext.furthest_point_sampling_with_dist_wrapper(
67 |             B, N, num_points, points_dist, temp, output
68 |         )
69 |         ctx.mark_non_differentiable(output)
70 |         return output
71 | 
72 |     @staticmethod
73 |     def backward(xyz, a=None):
74 |         return None, None
75 | 
76 | 
77 | furthest_point_sample = FurthestPointSampling.apply
78 | furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply
79 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from mmcv.ops import (
 2 |     RoIAlign,
 3 |     SigmoidFocalLoss,
 4 |     get_compiler_version,
 5 |     get_compiling_cuda_version,
 6 |     nms,
 7 |     roi_align,
 8 |     sigmoid_focal_loss,
 9 | )
10 | 
11 | from .ball_query import ball_query
12 | from .furthest_point_sample import (
13 |     Points_Sampler,
14 |     furthest_point_sample,
15 |     furthest_point_sample_with_dist,
16 | )
17 | from .gather_points import gather_points
18 | from .group_points import GroupAll, QueryAndGroup, group_points, grouping_operation
19 | from .interpolate import three_interpolate, three_nn
20 | from .knn import knn
21 | from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
22 | from .paconv import PAConv, PAConvCUDA, assign_score_withk
23 | from .bev_pool import *
24 | from .pointnet_modules import (
25 |     PAConvCUDASAModule,
26 |     PAConvCUDASAModuleMSG,
27 |     PAConvSAModule,
28 |     PAConvSAModuleMSG,
29 |     PointFPModule,
30 |     PointSAModule,
31 |     PointSAModuleMSG,
32 |     build_sa_module,
33 | )
34 | from .roiaware_pool3d import (
35 |     RoIAwarePool3d,
36 |     points_in_boxes_batch,
37 |     points_in_boxes_cpu,
38 |     points_in_boxes_gpu,
39 | )
40 | # from .sparse_block import SparseBasicBlock, SparseBottleneck, make_sparse_convmodule
41 | from .sparse_block import SparseBasicBlock, make_sparse_convmodule
42 | from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
43 | from .spconv import *
44 | 
45 | __all__ = [
46 |     "nms",
47 |     "soft_nms",
48 |     "RoIAlign",
49 |     "roi_align",
50 |     "get_compiler_version",
51 |     "get_compiling_cuda_version",
52 |     "NaiveSyncBatchNorm1d",
53 |     "NaiveSyncBatchNorm2d",
54 |     "batched_nms",
55 |     "Voxelization",
56 |     "voxelization",
57 |     "dynamic_scatter",
58 |     "DynamicScatter",
59 |     "sigmoid_focal_loss",
60 |     "SigmoidFocalLoss",
61 |     "SparseBasicBlock",
62 |     # "SparseBottleneck",
63 |     "RoIAwarePool3d",
64 |     "points_in_boxes_gpu",
65 |     "points_in_boxes_cpu",
66 |     "make_sparse_convmodule",
67 |     "ball_query",
68 |     "knn",
69 |     "furthest_point_sample",
70 |     "furthest_point_sample_with_dist",
71 |     "three_interpolate",
72 |     "three_nn",
73 |     "gather_points",
74 |     "grouping_operation",
75 |     "group_points",
76 |     "GroupAll",
77 |     "QueryAndGroup",
78 |     "PointSAModule",
79 |     "PointSAModuleMSG",
80 |     "PointFPModule",
81 |     "points_in_boxes_batch",
82 |     "get_compiler_version",
83 |     "assign_score_withk",
84 |     "get_compiling_cuda_version",
85 |     "Points_Sampler",
86 |     "build_sa_module",
87 |     "PAConv",
88 |     "PAConvCUDA",
89 |     "PAConvSAModuleMSG",
90 |     "PAConvSAModule",
91 |     "PAConvCUDASAModule",
92 |     "PAConvCUDASAModuleMSG",
93 |     "bev_pool",
94 | ]
95 | 


--------------------------------------------------------------------------------
/mmdet3d/core/utils/gaussian.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def gaussian_2d(shape, sigma=1):
 6 |     """Generate gaussian map.
 7 | 
 8 |     Args:
 9 |         shape (list[int]): Shape of the map.
10 |         sigma (float): Sigma to generate gaussian map.
11 |             Defaults to 1.
12 | 
13 |     Returns:
14 |         np.ndarray: Generated gaussian map.
15 |     """
16 |     m, n = [(ss - 1.0) / 2.0 for ss in shape]
17 |     y, x = np.ogrid[-m : m + 1, -n : n + 1]
18 | 
19 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
20 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
21 |     return h
22 | 
23 | 
24 | def draw_heatmap_gaussian(heatmap, center, radius, k=1):
25 |     """Get gaussian masked heatmap.
26 | 
27 |     Args:
28 |         heatmap (torch.Tensor): Heatmap to be masked.
29 |         center (torch.Tensor): Center coord of the heatmap.
30 |         radius (int): Radius of gausian.
31 |         K (int): Multiple of masked_gaussian. Defaults to 1.
32 | 
33 |     Returns:
34 |         torch.Tensor: Masked heatmap.
35 |     """
36 |     diameter = 2 * radius + 1
37 |     gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)
38 | 
39 |     x, y = int(center[0]), int(center[1])
40 | 
41 |     height, width = heatmap.shape[0:2]
42 | 
43 |     left, right = min(x, radius), min(width - x, radius + 1)
44 |     top, bottom = min(y, radius), min(height - y, radius + 1)
45 | 
46 |     masked_heatmap = heatmap[y - top : y + bottom, x - left : x + right]
47 |     masked_gaussian = torch.from_numpy(
48 |         gaussian[radius - top : radius + bottom, radius - left : radius + right]
49 |     ).to(heatmap.device, torch.float32)
50 |     if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
51 |         torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
52 |     return heatmap
53 | 
54 | 
55 | def gaussian_radius(det_size, min_overlap=0.5):
56 |     """Get radius of gaussian.
57 | 
58 |     Args:
59 |         det_size (tuple[torch.Tensor]): Size of the detection result.
60 |         min_overlap (float): Gaussian_overlap. Defaults to 0.5.
61 | 
62 |     Returns:
63 |         torch.Tensor: Computed radius.
64 |     """
65 |     height, width = det_size
66 | 
67 |     a1 = 1
68 |     b1 = height + width
69 |     c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
70 |     sq1 = torch.sqrt(b1**2 - 4 * a1 * c1)
71 |     r1 = (b1 + sq1) / 2
72 | 
73 |     a2 = 4
74 |     b2 = 2 * (height + width)
75 |     c2 = (1 - min_overlap) * width * height
76 |     sq2 = torch.sqrt(b2**2 - 4 * a2 * c2)
77 |     r2 = (b2 + sq2) / 2
78 | 
79 |     a3 = 4 * min_overlap
80 |     b3 = -2 * min_overlap * (height + width)
81 |     c3 = (min_overlap - 1) * width * height
82 |     sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)
83 |     r3 = (b3 + sq3) / 2
84 |     return min(r1, r2, r3)
85 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/group_points/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | // Modified from
 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp
 3 | 
 4 | #include <THC/THC.h>
 5 | #include <cuda.h>
 6 | #include <cuda_runtime_api.h>
 7 | #include <torch/extension.h>
 8 | #include <torch/serialize/tensor.h>
 9 | 
10 | #include <vector>
11 | 
12 | extern THCState *state;
13 | 
14 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample,
15 |                          at::Tensor points_tensor, at::Tensor idx_tensor,
16 |                          at::Tensor out_tensor);
17 | 
18 | void group_points_kernel_launcher(int b, int c, int n, int npoints, int nsample,
19 |                                   const float *points, const int *idx,
20 |                                   float *out, cudaStream_t stream);
21 | 
22 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample,
23 |                               at::Tensor grad_out_tensor, at::Tensor idx_tensor,
24 |                               at::Tensor grad_points_tensor);
25 | 
26 | void group_points_grad_kernel_launcher(int b, int c, int n, int npoints,
27 |                                        int nsample, const float *grad_out,
28 |                                        const int *idx, float *grad_points,
29 |                                        cudaStream_t stream);
30 | 
31 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample,
32 |                               at::Tensor grad_out_tensor, at::Tensor idx_tensor,
33 |                               at::Tensor grad_points_tensor) {
34 |   float *grad_points = grad_points_tensor.data_ptr<float>();
35 |   const int *idx = idx_tensor.data_ptr<int>();
36 |   const float *grad_out = grad_out_tensor.data_ptr<float>();
37 | 
38 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
39 | 
40 |   group_points_grad_kernel_launcher(b, c, n, npoints, nsample, grad_out, idx,
41 |                                     grad_points, stream);
42 |   return 1;
43 | }
44 | 
45 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample,
46 |                          at::Tensor points_tensor, at::Tensor idx_tensor,
47 |                          at::Tensor out_tensor) {
48 |   const float *points = points_tensor.data_ptr<float>();
49 |   const int *idx = idx_tensor.data_ptr<int>();
50 |   float *out = out_tensor.data_ptr<float>();
51 | 
52 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
53 | 
54 |   group_points_kernel_launcher(b, c, n, npoints, nsample, points, idx, out,
55 |                                stream);
56 |   return 1;
57 | }
58 | 
59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
60 |   m.def("forward", &group_points_wrapper, "group_points_wrapper");
61 |   m.def("backward", &group_points_grad_wrapper, "group_points_grad_wrapper");
62 | }
63 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Modified from
 2 | // https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
 3 | // Written by Shaoshuai Shi
 4 | // All Rights Reserved 2019.
 5 | 
 6 | #include <assert.h>
 7 | #include <math.h>
 8 | #include <stdio.h>
 9 | #include <torch/extension.h>
10 | #include <torch/serialize/tensor.h>
11 | 
12 | #define CHECK_CONTIGUOUS(x) \
13 |   TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
14 | // #define DEBUG
15 | 
16 | inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz,
17 |                                       float &local_x, float &local_y) {
18 |   // should rotate pi/2 + alpha to translate LiDAR to local
19 |   float rot_angle = rz + M_PI / 2;
20 |   float cosa = cos(rot_angle), sina = sin(rot_angle);
21 |   local_x = shift_x * cosa + shift_y * (-sina);
22 |   local_y = shift_x * sina + shift_y * cosa;
23 | }
24 | 
25 | inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d,
26 |                                  float &local_x, float &local_y) {
27 |   // param pt: (x, y, z)
28 |   // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
29 |   // bottom center
30 |   float x = pt[0], y = pt[1], z = pt[2];
31 |   float cx = box3d[0], cy = box3d[1], cz = box3d[2];
32 |   float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
33 |   cz += h / 2.0;  // shift to the center since cz in box3d is the bottom center
34 | 
35 |   if (fabsf(z - cz) > h / 2.0) return 0;
36 |   lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y);
37 |   float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
38 |                   (local_y > -w / 2.0) & (local_y < w / 2.0);
39 |   return in_flag;
40 | }
41 | 
42 | int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
43 |                         at::Tensor pts_indices_tensor) {
44 |   // params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the
45 |   // bottom center, each box DO NOT overlaps params pts: (npoints, 3) [x, y, z]
46 |   // in LiDAR coordinate params pts_indices: (N, npoints)
47 | 
48 |   CHECK_CONTIGUOUS(boxes_tensor);
49 |   CHECK_CONTIGUOUS(pts_tensor);
50 |   CHECK_CONTIGUOUS(pts_indices_tensor);
51 | 
52 |   int boxes_num = boxes_tensor.size(0);
53 |   int pts_num = pts_tensor.size(0);
54 | 
55 |   const float *boxes = boxes_tensor.data_ptr<float>();
56 |   const float *pts = pts_tensor.data_ptr<float>();
57 |   int *pts_indices = pts_indices_tensor.data_ptr<int>();
58 | 
59 |   float local_x = 0, local_y = 0;
60 |   for (int i = 0; i < boxes_num; i++) {
61 |     for (int j = 0; j < pts_num; j++) {
62 |       int cur_in_flag =
63 |           check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y);
64 |       pts_indices[i * pts_num + j] = cur_in_flag;
65 |     }
66 |   }
67 | 
68 |   return 1;
69 | }
70 | 


--------------------------------------------------------------------------------
/tools/create_combined_checkpoint.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser, Namespace
 2 | from typing import List
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def get_args() -> Namespace:
 8 |     """
 9 |     Parse given arguments for create_swint_checkpoint function.
10 | 
11 |     Returns:
12 |         Namespace: parsed arguments
13 |     """
14 |     parser = ArgumentParser()
15 | 
16 |     parser.add_argument("-l", type=str, required=True, help="path to lidar model")
17 |     parser.add_argument("-c", type=str, required=True, help="path to camera model")
18 |     parser.add_argument("-t", type=str, required=True, help="path to save target model")
19 |     parser.add_argument("--full", action="store_true", help="whether to save full model")
20 | 
21 |     return parser.parse_args()
22 | 
23 | 
24 | def convert_to_combined_pth(
25 |     lidar_model_path: str,
26 |     camera_model_path: str,
27 |     target_save_path: str,
28 |     camera_prefixes: List[str] = [
29 |         "encoders.camera.backbone",
30 |         "encoders.camera.vtransform",
31 |         "encoders.camera.neck",
32 |     ],
33 |     blacklist_prefixes: List[str] = [
34 |         "temporal_fuser",
35 |     ],
36 | ) -> None:
37 |     lidar_model = torch.load(lidar_model_path, map_location=torch.device("cpu"))
38 |     camera_model = torch.load(camera_model_path, map_location=torch.device("cpu"))
39 | 
40 |     print("total keys in lidar model", len(lidar_model["state_dict"].keys()))
41 |     print("total keys in camera model", len(camera_model["state_dict"].keys()))
42 | 
43 |     camera_keys = []
44 |     for x in camera_model["state_dict"].keys():
45 |         for prefix in camera_prefixes:
46 |             if x.startswith(prefix):
47 |                 camera_keys.append(x)
48 |                 break
49 | 
50 |     # create a new state dict
51 |     new_state_dict = {}
52 |     for key, value in lidar_model["state_dict"].items():
53 |         skip = False
54 |         for x in blacklist_prefixes:
55 |             if key.startswith(x):
56 |                 skip = True
57 |                 break
58 |         if not skip:
59 |             new_state_dict[key] = value
60 | 
61 |     for key, value in camera_model["state_dict"].items():
62 |         if key in camera_keys and key not in blacklist_prefixes:
63 |             new_state_dict[key] = value
64 | 
65 |     print("total keys in new state dict", len(new_state_dict.keys()))
66 |     for x in new_state_dict:
67 |         print(x)
68 | 
69 |     # save the new state dict
70 |     lidar_model["state_dict"] = new_state_dict
71 |     torch.save(lidar_model, target_save_path)
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     args = get_args()
76 |     convert_to_combined_pth(
77 |         lidar_model_path=args.l,
78 |         camera_model_path=args.c,
79 |         target_save_path=args.t,
80 |         blacklist_prefixes=["temporal_fuser"] if not args.full else [],
81 |     )
82 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/ball_query/src/ball_query_cuda.cu:
--------------------------------------------------------------------------------
 1 | // Modified from
 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu
 3 | 
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #define THREADS_PER_BLOCK 256
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | __global__ void ball_query_kernel(int b, int n, int m,
12 |                                   float min_radius,
13 |                                   float max_radius,
14 |                                   int nsample,
15 |                                   const float *__restrict__ new_xyz,
16 |                                   const float *__restrict__ xyz,
17 |                                   int *__restrict__ idx) {
18 |   // new_xyz: (B, M, 3)
19 |   // xyz: (B, N, 3)
20 |   // output:
21 |   //      idx: (B, M, nsample)
22 |   int bs_idx = blockIdx.y;
23 |   int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
24 |   if (bs_idx >= b || pt_idx >= m) return;
25 | 
26 |   new_xyz += bs_idx * m * 3 + pt_idx * 3;
27 |   xyz += bs_idx * n * 3;
28 |   idx += bs_idx * m * nsample + pt_idx * nsample;
29 | 
30 |   float max_radius2 = max_radius * max_radius;
31 |   float min_radius2 = min_radius * min_radius;
32 |   float new_x = new_xyz[0];
33 |   float new_y = new_xyz[1];
34 |   float new_z = new_xyz[2];
35 | 
36 |   int cnt = 0;
37 |   for (int k = 0; k < n; ++k) {
38 |     float x = xyz[k * 3 + 0];
39 |     float y = xyz[k * 3 + 1];
40 |     float z = xyz[k * 3 + 2];
41 |     float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
42 |                (new_z - z) * (new_z - z);
43 |     if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) {
44 |       if (cnt == 0) {
45 |         for (int l = 0; l < nsample; ++l) {
46 |           idx[l] = k;
47 |         }
48 |       }
49 |       idx[cnt] = k;
50 |       ++cnt;
51 |       if (cnt >= nsample) break;
52 |     }
53 |   }
54 | }
55 | 
56 | void ball_query_kernel_launcher(int b, int n, int m, float min_radius, float max_radius,
57 |                                 int nsample, const float *new_xyz, const float *xyz,
58 |                                 int *idx, cudaStream_t stream) {
59 |   // new_xyz: (B, M, 3)
60 |   // xyz: (B, N, 3)
61 |   // output:
62 |   //      idx: (B, M, nsample)
63 | 
64 |   cudaError_t err;
65 | 
66 |   dim3 blocks(DIVUP(m, THREADS_PER_BLOCK),
67 |               b);  // blockIdx.x(col), blockIdx.y(row)
68 |   dim3 threads(THREADS_PER_BLOCK);
69 | 
70 |   ball_query_kernel<<<blocks, threads, 0, stream>>>(b, n, m, min_radius, max_radius,
71 |                                                     nsample, new_xyz, xyz, idx);
72 |   // cudaDeviceSynchronize();  // for using printf in kernel function
73 |   err = cudaGetLastError();
74 |   if (cudaSuccess != err) {
75 |     fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
76 |     exit(-1);
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/configs/osdar23/baseline/centerhead/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   decoder:
 3 |     backbone:
 4 |       type: GeneralizedResNet
 5 |       in_channels: 336
 6 |       blocks:
 7 |         - [2, 160, 2]
 8 |         - [2, 320, 2]
 9 |         - [2, 640, 1]
10 |     neck:
11 |       type: LSSFPN
12 |       in_indices: [-1, 0]
13 |       in_channels: [640, 160]
14 |       out_channels: 256
15 |       scale_factor: 2
16 |   heads:
17 |     object:
18 |       type: CenterHead
19 |       in_channels: 256
20 |       train_cfg:
21 |         point_cloud_range: ${point_cloud_range}
22 |         grid_size: ${grid_size}
23 |         voxel_size: ${voxel_size}
24 |         out_size_factor: ${out_size_factor}
25 |         dense_reg: 1
26 |         gaussian_overlap: 0.1
27 |         max_objs: 500
28 |         min_radius: 2
29 |         code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
30 |       test_cfg:
31 |         post_center_limit_range: ${post_center_range}
32 |         max_per_img: 500
33 |         max_pool_nms: false
34 |         min_radius: [4, 12, 10, 1, 0.85, 0.175]
35 |         score_threshold: ${score_threshold}
36 |         out_size_factor: ${out_size_factor}
37 |         voxel_size: ${voxel_size[:2]}
38 |         pre_max_size: 1000
39 |         post_max_size: 83
40 |         nms_thr: ${nms_threshold}
41 |         nms_type:
42 |           - rotate # lidar__cuboid__person
43 |           - circle # lidar__cuboid__catenary_pole
44 |           - circle # lidar__cuboid__signal_pole
45 |           - circle # lidar__cuboid__road_vehicle
46 |           - rotate # lidar__cuboid__buffer_stop
47 |         nms_scale:
48 |           - [0.4] # lidar__cuboid__person
49 |           - [1.0] # lidar__cuboid__catenary_pole
50 |           - [1.0] # lidar__cuboid__signal_pole
51 |           - [1.0] # lidar__cuboid__road_vehicle
52 |           - [1.0] # lidar__cuboid__buffer_stop
53 |       tasks:
54 |         - ["lidar__cuboid__person"]
55 |         - ["lidar__cuboid__catenary_pole"]
56 |         - ["lidar__cuboid__signal_pole"]
57 |         - ["lidar__cuboid__road_vehicle"]
58 |         - ["lidar__cuboid__buffer_stop"]
59 |       common_heads:
60 |         reg: [2, 2]
61 |         height: [1, 2]
62 |         dim: [3, 2]
63 |         rot: [2, 2]
64 |       share_conv_channel: 64
65 |       bbox_coder:
66 |         type: CenterPointBBoxCoder
67 |         pc_range: ${point_cloud_range}
68 |         post_center_range: ${post_center_range}
69 |         max_num: 500
70 |         score_threshold: ${score_threshold}
71 |         out_size_factor: 8
72 |         voxel_size: ${voxel_size[:2]}
73 |         code_size: 7
74 |       separate_head:
75 |         type: SeparateHead
76 |         init_bias: -2.19
77 |         final_kernel: 3
78 |       loss_cls:
79 |         type: GaussianFocalLoss
80 |         reduction: mean
81 |       loss_bbox:
82 |         type: L1Loss
83 |         reduction: mean
84 |         loss_weight: 0.25
85 |       norm_bbox: true
86 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/baseline/centerhead/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   decoder:
 3 |     backbone:
 4 |       type: GeneralizedResNet
 5 |       in_channels: 336
 6 |       blocks:
 7 |         - [2, 160, 2]
 8 |         - [2, 320, 2]
 9 |         - [2, 640, 1]
10 |     neck:
11 |       type: LSSFPN
12 |       in_indices: [-1, 0]
13 |       in_channels: [640, 160]
14 |       out_channels: 256
15 |       scale_factor: 2
16 |   heads:
17 |     object:
18 |       type: CenterHead
19 |       in_channels: 256
20 |       train_cfg:
21 |         point_cloud_range: ${point_cloud_range}
22 |         grid_size: ${grid_size}
23 |         voxel_size: ${voxel_size}
24 |         out_size_factor: ${out_size_factor}
25 |         dense_reg: 1
26 |         gaussian_overlap: 0.1
27 |         max_objs: 500
28 |         min_radius: 2
29 |         code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
30 |       test_cfg:
31 |         post_center_limit_range: ${post_center_range}
32 |         max_per_img: 500
33 |         max_pool_nms: false
34 |         min_radius: [4, 12, 10, 1, 0.85, 0.175]
35 |         score_threshold: ${score_threshold}
36 |         out_size_factor: ${out_size_factor}
37 |         voxel_size: ${voxel_size[:2]}
38 |         pre_max_size: 1000
39 |         post_max_size: 83
40 |         nms_thr: ${nms_threshold}
41 |         nms_type:
42 |           - circle # CAR
43 |           - rotate # TRUCK
44 |           - rotate # VAN
45 |           - rotate # BUS, TRAILER
46 |           - rotate # MOTORCYCLE, BICYCLE
47 |           - rotate # PEDESTRIAN
48 |           - rotate # EMERGENCY_VEHICLE
49 |         nms_scale:
50 |           - [1.0] # CAR
51 |           - [1.0] # TRUCK
52 |           - [1.0] # VAN
53 |           - [1.0, 1.0] # BUS, TRAILER
54 |           - [1.0, 1.0] # MOTORCYCLE, BICYCLE
55 |           - [1.0] # PEDESTRIAN
56 |           - [1.0] # EMERGENCY_VEHICLE
57 |       tasks:
58 |         - ["CAR"]
59 |         - ["TRUCK"]
60 |         - ["VAN"]
61 |         - ["BUS", "TRAILER"]
62 |         - ["MOTORCYCLE", "BICYCLE"]
63 |         - ["PEDESTRIAN"]
64 |         - ["EMERGENCY_VEHICLE"]
65 |       common_heads:
66 |         reg: [2, 2]
67 |         height: [1, 2]
68 |         dim: [3, 2]
69 |         rot: [2, 2]
70 |       share_conv_channel: 64
71 |       bbox_coder:
72 |         type: CenterPointBBoxCoder
73 |         pc_range: ${point_cloud_range}
74 |         post_center_range: ${post_center_range}
75 |         max_num: 500
76 |         score_threshold: ${score_threshold}
77 |         out_size_factor: ${out_size_factor}
78 |         voxel_size: ${voxel_size[:2]}
79 |         code_size: 7
80 |       separate_head:
81 |         type: SeparateHead
82 |         init_bias: -2.19
83 |         final_kernel: 3
84 |       loss_cls:
85 |         type: GaussianFocalLoss
86 |         reduction: mean
87 |       loss_bbox:
88 |         type: L1Loss
89 |         reduction: mean
90 |         loss_weight: 0.25
91 |       norm_bbox: true
92 | 


--------------------------------------------------------------------------------
/configs/tumtraf-i/temporal/centerhead/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   decoder:
 3 |     backbone:
 4 |       type: GeneralizedResNet
 5 |       in_channels: 336
 6 |       blocks:
 7 |         - [2, 160, 2]
 8 |         - [2, 320, 2]
 9 |         - [2, 640, 1]
10 |     neck:
11 |       type: LSSFPN
12 |       in_indices: [-1, 0]
13 |       in_channels: [640, 160]
14 |       out_channels: 256
15 |       scale_factor: 2
16 |   heads:
17 |     object:
18 |       type: CenterHead
19 |       in_channels: 256
20 |       train_cfg:
21 |         point_cloud_range: ${point_cloud_range}
22 |         grid_size: ${grid_size}
23 |         voxel_size: ${voxel_size}
24 |         out_size_factor: ${out_size_factor}
25 |         dense_reg: 1
26 |         gaussian_overlap: 0.1
27 |         max_objs: 500
28 |         min_radius: 2
29 |         code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
30 |       test_cfg:
31 |         post_center_limit_range: ${post_center_range}
32 |         max_per_img: 500
33 |         max_pool_nms: false
34 |         min_radius: [4, 12, 10, 1, 0.85, 0.175]
35 |         score_threshold: ${score_threshold}
36 |         out_size_factor: ${out_size_factor}
37 |         voxel_size: ${voxel_size[:2]}
38 |         pre_max_size: 1000
39 |         post_max_size: 83
40 |         nms_thr: ${nms_threshold}
41 |         nms_type:
42 |           - circle # CAR
43 |           - rotate # TRUCK
44 |           - rotate # VAN
45 |           - rotate # BUS, TRAILER
46 |           - rotate # MOTORCYCLE, BICYCLE
47 |           - rotate # PEDESTRIAN
48 |           - rotate # EMERGENCY_VEHICLE
49 |         nms_scale:
50 |           - [1.0] # CAR
51 |           - [1.0] # TRUCK
52 |           - [1.0] # VAN
53 |           - [1.0, 1.0] # BUS, TRAILER
54 |           - [1.0, 1.0] # MOTORCYCLE, BICYCLE
55 |           - [1.0] # PEDESTRIAN
56 |           - [1.0] # EMERGENCY_VEHICLE
57 |       tasks:
58 |         - ["CAR"]
59 |         - ["TRUCK"]
60 |         - ["VAN"]
61 |         - ["BUS", "TRAILER"]
62 |         - ["MOTORCYCLE", "BICYCLE"]
63 |         - ["PEDESTRIAN"]
64 |         - ["EMERGENCY_VEHICLE"]
65 |       common_heads:
66 |         reg: [2, 2]
67 |         height: [1, 2]
68 |         dim: [3, 2]
69 |         rot: [2, 2]
70 |       share_conv_channel: 64
71 |       bbox_coder:
72 |         type: CenterPointBBoxCoder
73 |         pc_range: ${point_cloud_range}
74 |         post_center_range: ${post_center_range}
75 |         max_num: 500
76 |         score_threshold: ${score_threshold}
77 |         out_size_factor: ${out_size_factor}
78 |         voxel_size: ${voxel_size[:2]}
79 |         code_size: 7
80 |       separate_head:
81 |         type: SeparateHead
82 |         init_bias: -2.19
83 |         final_kernel: 3
84 |       loss_cls:
85 |         type: GaussianFocalLoss
86 |         reduction: mean
87 |       loss_bbox:
88 |         type: L1Loss
89 |         reduction: mean
90 |         loss_weight: 0.25
91 |       norm_bbox: true
92 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/centerhead/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   decoder:
 3 |     backbone:
 4 |       type: GeneralizedResNet
 5 |       in_channels: 336
 6 |       blocks:
 7 |         - [2, 160, 2]
 8 |         - [2, 320, 2]
 9 |         - [2, 640, 1]
10 |     neck:
11 |       type: LSSFPN
12 |       in_indices: [-1, 0]
13 |       in_channels: [640, 160]
14 |       out_channels: 256
15 |       scale_factor: 2
16 |   heads:
17 |     object:
18 |       type: CenterHead
19 |       in_channels: 256
20 |       train_cfg:
21 |         point_cloud_range: ${point_cloud_range}
22 |         grid_size: ${grid_size}
23 |         voxel_size: ${voxel_size}
24 |         out_size_factor: ${out_size_factor}
25 |         dense_reg: 1
26 |         gaussian_overlap: 0.1
27 |         max_objs: 500
28 |         min_radius: 2
29 |         code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
30 |       test_cfg:
31 |         post_center_limit_range: ${post_center_range}
32 |         max_per_img: 500
33 |         max_pool_nms: false
34 |         min_radius: [4, 12, 10, 1, 0.85, 0.175]
35 |         score_threshold: ${score_threshold}
36 |         out_size_factor: ${out_size_factor}
37 |         voxel_size: ${voxel_size[:2]}
38 |         pre_max_size: 1000
39 |         post_max_size: 83
40 |         nms_thr: ${nms_threshold}
41 |         nms_type:
42 |           - circle # CAR
43 |           - rotate # TRUCK
44 |           - rotate # VAN
45 |           - rotate # BUS, TRAILER
46 |           - rotate # MOTORCYCLE, BICYCLE
47 |           - rotate # PEDESTRIAN
48 |           - rotate # EMERGENCY_VEHICLE
49 |         nms_scale:
50 |           - [1.0] # CAR
51 |           - [1.0] # TRUCK
52 |           - [1.0] # VAN
53 |           - [1.0, 1.0] # BUS, TRAILER
54 |           - [1.0, 1.0] # MOTORCYCLE, BICYCLE
55 |           - [1.0] # PEDESTRIAN
56 |           - [1.0] # EMERGENCY_VEHICLE
57 |       tasks:
58 |         - ["CAR"]
59 |         - ["TRUCK"]
60 |         - ["VAN"]
61 |         - ["BUS", "TRAILER"]
62 |         - ["MOTORCYCLE", "BICYCLE"]
63 |         - ["PEDESTRIAN"]
64 |         - ["EMERGENCY_VEHICLE"]
65 |       common_heads:
66 |         reg: [2, 2]
67 |         height: [1, 2]
68 |         dim: [3, 2]
69 |         rot: [2, 2]
70 |       share_conv_channel: 64
71 |       bbox_coder:
72 |         type: CenterPointBBoxCoder
73 |         pc_range: ${point_cloud_range}
74 |         post_center_range: ${post_center_range}
75 |         max_num: 500
76 |         score_threshold: ${score_threshold}
77 |         out_size_factor: ${out_size_factor}
78 |         voxel_size: ${voxel_size[:2]}
79 |         code_size: 7
80 |       separate_head:
81 |         type: SeparateHead
82 |         init_bias: -2.19
83 |         final_kernel: 3
84 |       loss_cls:
85 |         type: GaussianFocalLoss
86 |         reduction: mean
87 |       loss_bbox:
88 |         type: L1Loss
89 |         reduction: mean
90 |         loss_weight: 0.25
91 |       norm_bbox: true
92 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal/centerhead/default.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   decoder:
 3 |     backbone:
 4 |       type: GeneralizedResNet
 5 |       in_channels: 336
 6 |       blocks:
 7 |         - [2, 160, 2]
 8 |         - [2, 320, 2]
 9 |         - [2, 640, 1]
10 |     neck:
11 |       type: LSSFPN
12 |       in_indices: [-1, 0]
13 |       in_channels: [640, 160]
14 |       out_channels: 256
15 |       scale_factor: 2
16 |   heads:
17 |     object:
18 |       type: CenterHead
19 |       in_channels: 256
20 |       train_cfg:
21 |         point_cloud_range: ${point_cloud_range}
22 |         grid_size: ${grid_size}
23 |         voxel_size: ${voxel_size}
24 |         out_size_factor: ${out_size_factor}
25 |         dense_reg: 1
26 |         gaussian_overlap: 0.1
27 |         max_objs: 500
28 |         min_radius: 2
29 |         code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
30 |       test_cfg:
31 |         post_center_limit_range: ${post_center_range}
32 |         max_per_img: 500
33 |         max_pool_nms: false
34 |         min_radius: [4, 12, 10, 1, 0.85, 0.175]
35 |         score_threshold: ${score_threshold}
36 |         out_size_factor: ${out_size_factor}
37 |         voxel_size: ${voxel_size[:2]}
38 |         pre_max_size: 1000
39 |         post_max_size: 83
40 |         nms_thr: ${nms_threshold}
41 |         nms_type:
42 |           - rotate # lidar__cuboid__person
43 |           - circle # lidar__cuboid__catenary_pole
44 |           - circle # lidar__cuboid__signal_pole
45 |           - circle # lidar__cuboid__road_vehicle
46 |           - rotate # lidar__cuboid__buffer_stop
47 |         nms_scale:
48 |           - [0.4] # lidar__cuboid__person
49 |           - [1.0] # lidar__cuboid__catenary_pole
50 |           - [1.0] # lidar__cuboid__signal_pole
51 |           - [1.0] # lidar__cuboid__road_vehicle
52 |           - [1.0] # lidar__cuboid__buffer_stop
53 |       tasks:
54 |         - ["lidar__cuboid__person"]
55 |         - ["lidar__cuboid__catenary_pole"]
56 |         - ["lidar__cuboid__signal_pole"]
57 |         - ["lidar__cuboid__road_vehicle"]
58 |         - ["lidar__cuboid__buffer_stop"]
59 |       common_heads:
60 |         reg: [2, 2]
61 |         height: [1, 2]
62 |         dim: [3, 2]
63 |         rot: [2, 2]
64 |       share_conv_channel: 64
65 |       bbox_coder:
66 |         type: CenterPointBBoxCoder
67 |         pc_range: ${point_cloud_range}
68 |         post_center_range: ${post_center_range}
69 |         max_num: 500
70 |         score_threshold: ${score_threshold}
71 |         out_size_factor: ${out_size_factor}
72 |         voxel_size: ${voxel_size[:2]}
73 |         code_size: 7
74 |       separate_head:
75 |         type: SeparateHead
76 |         init_bias: -2.19
77 |         final_kernel: 3
78 |       loss_cls:
79 |         type: GaussianFocalLoss
80 |         reduction: mean
81 |       loss_bbox:
82 |         type: L1Loss
83 |         reduction: mean
84 |         loss_weight: 0.25
85 |       norm_bbox: true
86 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/interpolate/src/three_nn_cuda.cu:
--------------------------------------------------------------------------------
 1 | // Modified from
 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu
 3 | 
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #define THREADS_PER_BLOCK 256
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | __global__ void three_nn_kernel(int b, int n, int m,
12 |                                 const float *__restrict__ unknown,
13 |                                 const float *__restrict__ known,
14 |                                 float *__restrict__ dist2,
15 |                                 int *__restrict__ idx) {
16 |   // unknown: (B, N, 3)
17 |   // known: (B, M, 3)
18 |   // output:
19 |   //      dist2: (B, N, 3)
20 |   //      idx: (B, N, 3)
21 | 
22 |   int bs_idx = blockIdx.y;
23 |   int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
24 |   if (bs_idx >= b || pt_idx >= n) return;
25 | 
26 |   unknown += bs_idx * n * 3 + pt_idx * 3;
27 |   known += bs_idx * m * 3;
28 |   dist2 += bs_idx * n * 3 + pt_idx * 3;
29 |   idx += bs_idx * n * 3 + pt_idx * 3;
30 | 
31 |   float ux = unknown[0];
32 |   float uy = unknown[1];
33 |   float uz = unknown[2];
34 | 
35 |   double best1 = 1e40, best2 = 1e40, best3 = 1e40;
36 |   int besti1 = 0, besti2 = 0, besti3 = 0;
37 |   for (int k = 0; k < m; ++k) {
38 |     float x = known[k * 3 + 0];
39 |     float y = known[k * 3 + 1];
40 |     float z = known[k * 3 + 2];
41 |     float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
42 |     if (d < best1) {
43 |       best3 = best2;
44 |       besti3 = besti2;
45 |       best2 = best1;
46 |       besti2 = besti1;
47 |       best1 = d;
48 |       besti1 = k;
49 |     } else if (d < best2) {
50 |       best3 = best2;
51 |       besti3 = besti2;
52 |       best2 = d;
53 |       besti2 = k;
54 |     } else if (d < best3) {
55 |       best3 = d;
56 |       besti3 = k;
57 |     }
58 |   }
59 |   dist2[0] = best1;
60 |   dist2[1] = best2;
61 |   dist2[2] = best3;
62 |   idx[0] = besti1;
63 |   idx[1] = besti2;
64 |   idx[2] = besti3;
65 | }
66 | 
67 | void three_nn_kernel_launcher(int b, int n, int m, const float *unknown,
68 |                               const float *known, float *dist2, int *idx,
69 |                               cudaStream_t stream) {
70 |   // unknown: (B, N, 3)
71 |   // known: (B, M, 3)
72 |   // output:
73 |   //      dist2: (B, N, 3)
74 |   //      idx: (B, N, 3)
75 | 
76 |   cudaError_t err;
77 |   dim3 blocks(DIVUP(n, THREADS_PER_BLOCK),
78 |               b);  // blockIdx.x(col), blockIdx.y(row)
79 |   dim3 threads(THREADS_PER_BLOCK);
80 | 
81 |   three_nn_kernel<<<blocks, threads, 0, stream>>>(b, n, m, unknown, known,
82 |                                                   dist2, idx);
83 | 
84 |   err = cudaGetLastError();
85 |   if (cudaSuccess != err) {
86 |     fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
87 |     exit(-1);
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/point_fp_module.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmcv.cnn import ConvModule
 3 | from mmcv.runner import BaseModule, force_fp32
 4 | from torch import nn as nn
 5 | from typing import List
 6 | 
 7 | from mmdet3d.ops import three_interpolate, three_nn
 8 | 
 9 | 
10 | class PointFPModule(BaseModule):
11 |     """Point feature propagation module used in PointNets.
12 | 
13 |     Propagate the features from one set to another.
14 | 
15 |     Args:
16 |         mlp_channels (list[int]): List of mlp channels.
17 |         norm_cfg (dict): Type of normalization method.
18 |             Default: dict(type='BN2d').
19 |     """
20 | 
21 |     def __init__(self, mlp_channels: List[int], norm_cfg: dict = dict(type="BN2d"), init_cfg=None):
22 |         super().__init__(init_cfg=init_cfg)
23 |         self.fp16_enabled = False
24 |         self.mlps = nn.Sequential()
25 |         for i in range(len(mlp_channels) - 1):
26 |             self.mlps.add_module(
27 |                 f"layer{i}",
28 |                 ConvModule(
29 |                     mlp_channels[i],
30 |                     mlp_channels[i + 1],
31 |                     kernel_size=(1, 1),
32 |                     stride=(1, 1),
33 |                     conv_cfg=dict(type="Conv2d"),
34 |                     norm_cfg=norm_cfg,
35 |                 ),
36 |             )
37 | 
38 |     @force_fp32()
39 |     def forward(
40 |         self,
41 |         target: torch.Tensor,
42 |         source: torch.Tensor,
43 |         target_feats: torch.Tensor,
44 |         source_feats: torch.Tensor,
45 |     ) -> torch.Tensor:
46 |         """forward.
47 | 
48 |         Args:
49 |             target (Tensor): (B, n, 3) tensor of the xyz positions of
50 |                 the target features.
51 |             source (Tensor): (B, m, 3) tensor of the xyz positions of
52 |                 the source features.
53 |             target_feats (Tensor): (B, C1, n) tensor of the features to be
54 |                 propagated to.
55 |             source_feats (Tensor): (B, C2, m) tensor of features
56 |                 to be propagated.
57 | 
58 |         Return:
59 |             Tensor: (B, M, N) M = mlp[-1], tensor of the target features.
60 |         """
61 |         if source is not None:
62 |             dist, idx = three_nn(target, source)
63 |             dist_reciprocal = 1.0 / (dist + 1e-8)
64 |             norm = torch.sum(dist_reciprocal, dim=2, keepdim=True)
65 |             weight = dist_reciprocal / norm
66 | 
67 |             interpolated_feats = three_interpolate(source_feats, idx, weight)
68 |         else:
69 |             interpolated_feats = source_feats.expand(*source_feats.size()[0:2], target.size(1))
70 | 
71 |         if target_feats is not None:
72 |             new_features = torch.cat([interpolated_feats, target_feats], dim=1)  # (B, C2 + C1, n)
73 |         else:
74 |             new_features = interpolated_feats
75 | 
76 |         new_features = new_features.unsqueeze(-1)
77 |         new_features = self.mlps(new_features)
78 | 
79 |         return new_features.squeeze(-1)
80 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/bev_pool/bev_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from . import bev_pool_ext
 4 | 
 5 | __all__ = ["bev_pool"]
 6 | 
 7 | 
 8 | class QuickCumsum(torch.autograd.Function):
 9 |     @staticmethod
10 |     def forward(ctx, x, geom_feats, ranks):
11 |         x = x.cumsum(0)
12 |         kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
13 |         kept[:-1] = ranks[1:] != ranks[:-1]
14 | 
15 |         x, geom_feats = x[kept], geom_feats[kept]
16 |         x = torch.cat((x[:1], x[1:] - x[:-1]))
17 | 
18 |         # save kept for backward
19 |         ctx.save_for_backward(kept)
20 | 
21 |         # no gradient for geom_feats
22 |         ctx.mark_non_differentiable(geom_feats)
23 | 
24 |         return x, geom_feats
25 | 
26 |     @staticmethod
27 |     def backward(ctx, gradx, gradgeom):
28 |         (kept,) = ctx.saved_tensors
29 |         back = torch.cumsum(kept, 0)
30 |         back[kept] -= 1
31 | 
32 |         val = gradx[back]
33 | 
34 |         return val, None, None
35 | 
36 | 
37 | class QuickCumsumCuda(torch.autograd.Function):
38 |     @staticmethod
39 |     def forward(ctx, x, geom_feats, ranks, B, D, H, W):
40 |         kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
41 |         kept[1:] = ranks[1:] != ranks[:-1]
42 |         interval_starts = torch.where(kept)[0].int()
43 |         interval_lengths = torch.zeros_like(interval_starts)
44 |         interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]
45 |         interval_lengths[-1] = x.shape[0] - interval_starts[-1]
46 |         geom_feats = geom_feats.int()
47 | 
48 |         out = bev_pool_ext.bev_pool_forward(
49 |             x,
50 |             geom_feats,
51 |             interval_lengths,
52 |             interval_starts,
53 |             B,
54 |             D,
55 |             H,
56 |             W,
57 |         )
58 | 
59 |         ctx.save_for_backward(interval_starts, interval_lengths, geom_feats)
60 |         ctx.saved_shapes = B, D, H, W
61 |         return out
62 | 
63 |     @staticmethod
64 |     def backward(ctx, out_grad):
65 |         interval_starts, interval_lengths, geom_feats = ctx.saved_tensors
66 |         B, D, H, W = ctx.saved_shapes
67 | 
68 |         out_grad = out_grad.contiguous()
69 |         x_grad = bev_pool_ext.bev_pool_backward(
70 |             out_grad,
71 |             geom_feats,
72 |             interval_lengths,
73 |             interval_starts,
74 |             B,
75 |             D,
76 |             H,
77 |             W,
78 |         )
79 | 
80 |         return x_grad, None, None, None, None, None, None
81 | 
82 | 
83 | def bev_pool(feats, coords, B, D, H, W):
84 |     assert feats.shape[0] == coords.shape[0]
85 | 
86 |     ranks = (
87 |         coords[:, 0] * (W * D * B)
88 |         + coords[:, 1] * (D * B)
89 |         + coords[:, 2] * B
90 |         + coords[:, 3]
91 |     )
92 |     indices = ranks.argsort()
93 |     feats, coords, ranks = feats[indices], coords[indices], ranks[indices]
94 | 
95 |     x = QuickCumsumCuda.apply(feats, coords, ranks, B, D, H, W)
96 |     x = x.permute(0, 4, 1, 2, 3).contiguous()
97 |     return x
98 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp:
--------------------------------------------------------------------------------
 1 | // Modified from
 2 | // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp
 3 | 
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include <THC/THC.h>
 6 | #include <torch/extension.h>
 7 | #include <torch/serialize/tensor.h>
 8 | 
 9 | #include <vector>
10 | 
11 | extern THCState *state;
12 | 
13 | int furthest_point_sampling_wrapper(int b, int n, int m,
14 |                                     at::Tensor points_tensor,
15 |                                     at::Tensor temp_tensor,
16 |                                     at::Tensor idx_tensor);
17 | 
18 | void furthest_point_sampling_kernel_launcher(int b, int n, int m,
19 |                                              const float *dataset, float *temp,
20 |                                              int *idxs, cudaStream_t stream);
21 | 
22 | int furthest_point_sampling_with_dist_wrapper(int b, int n, int m,
23 |                                               at::Tensor points_tensor,
24 |                                               at::Tensor temp_tensor,
25 |                                               at::Tensor idx_tensor);
26 | 
27 | void furthest_point_sampling_with_dist_kernel_launcher(int b, int n, int m,
28 |                                                        const float *dataset,
29 |                                                        float *temp, int *idxs,
30 |                                                        cudaStream_t stream);
31 | 
32 | int furthest_point_sampling_wrapper(int b, int n, int m,
33 |                                     at::Tensor points_tensor,
34 |                                     at::Tensor temp_tensor,
35 |                                     at::Tensor idx_tensor) {
36 |   const float *points = points_tensor.data_ptr<float>();
37 |   float *temp = temp_tensor.data_ptr<float>();
38 |   int *idx = idx_tensor.data_ptr<int>();
39 | 
40 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
41 |   furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
42 |   return 1;
43 | }
44 | 
45 | int furthest_point_sampling_with_dist_wrapper(int b, int n, int m,
46 |                                               at::Tensor points_tensor,
47 |                                               at::Tensor temp_tensor,
48 |                                               at::Tensor idx_tensor) {
49 | 
50 |   const float *points = points_tensor.data<float>();
51 |   float *temp = temp_tensor.data<float>();
52 |   int *idx = idx_tensor.data<int>();
53 | 
54 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
55 |   furthest_point_sampling_with_dist_kernel_launcher(b, n, m, points, temp, idx, stream);
56 |   return 1;
57 | }
58 | 
59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
60 |   m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper,
61 |         "furthest_point_sampling_wrapper");
62 |   m.def("furthest_point_sampling_with_dist_wrapper",
63 |         &furthest_point_sampling_with_dist_wrapper,
64 |         "furthest_point_sampling_with_dist_wrapper");
65 | }
66 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/cam_points.py:
--------------------------------------------------------------------------------
 1 | from .base_points import BasePoints
 2 | 
 3 | 
 4 | class CameraPoints(BasePoints):
 5 |     """Points of instances in CAM coordinates.
 6 | 
 7 |     Args:
 8 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
 9 |         points_dim (int): Number of the dimension of a point.
10 |             Each row is (x, y, z). Default to 3.
11 |         attribute_dims (dict): Dictionary to indicate the meaning of extra
12 |             dimension. Default to None.
13 | 
14 |     Attributes:
15 |         tensor (torch.Tensor): Float matrix of N x points_dim.
16 |         points_dim (int): Integer indicating the dimension of a point.
17 |             Each row is (x, y, z, ...).
18 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
19 |             dimension. Default to None.
20 |         rotation_axis (int): Default rotation axis for points rotation.
21 |     """
22 | 
23 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
24 |         super(CameraPoints, self).__init__(
25 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims
26 |         )
27 |         self.rotation_axis = 1
28 | 
29 |     def flip(self, bev_direction="horizontal"):
30 |         """Flip the boxes in BEV along given BEV direction."""
31 |         if bev_direction == "horizontal":
32 |             self.tensor[:, 0] = -self.tensor[:, 0]
33 |         elif bev_direction == "vertical":
34 |             self.tensor[:, 2] = -self.tensor[:, 2]
35 | 
36 |     def in_range_bev(self, point_range):
37 |         """Check whether the points are in the given range.
38 | 
39 |         Args:
40 |             point_range (list | torch.Tensor): The range of point
41 |                 in order of (x_min, y_min, x_max, y_max).
42 | 
43 |         Returns:
44 |             torch.Tensor: Indicating whether each point is inside \
45 |                 the reference range.
46 |         """
47 |         in_range_flags = (
48 |             (self.tensor[:, 0] > point_range[0])
49 |             & (self.tensor[:, 2] > point_range[1])
50 |             & (self.tensor[:, 0] < point_range[2])
51 |             & (self.tensor[:, 2] < point_range[3])
52 |         )
53 |         return in_range_flags
54 | 
55 |     def convert_to(self, dst, rt_mat=None):
56 |         """Convert self to ``dst`` mode.
57 | 
58 |         Args:
59 |             dst (:obj:`CoordMode`): The target Point mode.
60 |             rt_mat (np.ndarray | torch.Tensor): The rotation and translation
61 |                 matrix between different coordinates. Defaults to None.
62 |                 The conversion from `src` coordinates to `dst` coordinates
63 |                 usually comes along the change of sensors, e.g., from camera
64 |                 to LiDAR. This requires a transformation matrix.
65 | 
66 |         Returns:
67 |             :obj:`BasePoints`: The converted point of the same type \
68 |                 in the `dst` mode.
69 |         """
70 |         from mmdet3d.core.bbox import Coord3DMode
71 | 
72 |         return Coord3DMode.convert_point(point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
73 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/depth_points.py:
--------------------------------------------------------------------------------
 1 | from .base_points import BasePoints
 2 | 
 3 | 
 4 | class DepthPoints(BasePoints):
 5 |     """Points of instances in DEPTH coordinates.
 6 | 
 7 |     Args:
 8 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
 9 |         points_dim (int): Number of the dimension of a point.
10 |             Each row is (x, y, z). Default to 3.
11 |         attribute_dims (dict): Dictionary to indicate the meaning of extra
12 |             dimension. Default to None.
13 | 
14 |     Attributes:
15 |         tensor (torch.Tensor): Float matrix of N x points_dim.
16 |         points_dim (int): Integer indicating the dimension of a point.
17 |             Each row is (x, y, z, ...).
18 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
19 |             dimension. Default to None.
20 |         rotation_axis (int): Default rotation axis for points rotation.
21 |     """
22 | 
23 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
24 |         super(DepthPoints, self).__init__(
25 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims
26 |         )
27 |         self.rotation_axis = 2
28 | 
29 |     def flip(self, bev_direction="horizontal"):
30 |         """Flip the boxes in BEV along given BEV direction."""
31 |         if bev_direction == "horizontal":
32 |             self.tensor[:, 0] = -self.tensor[:, 0]
33 |         elif bev_direction == "vertical":
34 |             self.tensor[:, 1] = -self.tensor[:, 1]
35 | 
36 |     def in_range_bev(self, point_range):
37 |         """Check whether the points are in the given range.
38 | 
39 |         Args:
40 |             point_range (list | torch.Tensor): The range of point
41 |                 in order of (x_min, y_min, x_max, y_max).
42 | 
43 |         Returns:
44 |             torch.Tensor: Indicating whether each point is inside \
45 |                 the reference range.
46 |         """
47 |         in_range_flags = (
48 |             (self.tensor[:, 0] > point_range[0])
49 |             & (self.tensor[:, 1] > point_range[1])
50 |             & (self.tensor[:, 0] < point_range[2])
51 |             & (self.tensor[:, 1] < point_range[3])
52 |         )
53 |         return in_range_flags
54 | 
55 |     def convert_to(self, dst, rt_mat=None):
56 |         """Convert self to ``dst`` mode.
57 | 
58 |         Args:
59 |             dst (:obj:`CoordMode`): The target Point mode.
60 |             rt_mat (np.ndarray | torch.Tensor): The rotation and translation
61 |                 matrix between different coordinates. Defaults to None.
62 |                 The conversion from `src` coordinates to `dst` coordinates
63 |                 usually comes along the change of sensors, e.g., from camera
64 |                 to LiDAR. This requires a transformation matrix.
65 | 
66 |         Returns:
67 |             :obj:`BasePoints`: The converted point of the same type \
68 |                 in the `dst` mode.
69 |         """
70 |         from mmdet3d.core.bbox import Coord3DMode
71 | 
72 |         return Coord3DMode.convert_point(point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
73 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/lidar_points.py:
--------------------------------------------------------------------------------
 1 | from .base_points import BasePoints
 2 | 
 3 | 
 4 | class LiDARPoints(BasePoints):
 5 |     """Points of instances in LIDAR coordinates.
 6 | 
 7 |     Args:
 8 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
 9 |         points_dim (int): Number of the dimension of a point.
10 |             Each row is (x, y, z). Default to 3.
11 |         attribute_dims (dict): Dictionary to indicate the meaning of extra
12 |             dimension. Default to None.
13 | 
14 |     Attributes:
15 |         tensor (torch.Tensor): Float matrix of N x points_dim.
16 |         points_dim (int): Integer indicating the dimension of a point.
17 |             Each row is (x, y, z, ...).
18 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
19 |             dimension. Default to None.
20 |         rotation_axis (int): Default rotation axis for points rotation.
21 |     """
22 | 
23 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
24 |         super(LiDARPoints, self).__init__(
25 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims
26 |         )
27 |         self.rotation_axis = 2
28 | 
29 |     def flip(self, bev_direction="horizontal"):
30 |         """Flip the boxes in BEV along given BEV direction."""
31 |         if bev_direction == "horizontal":
32 |             self.tensor[:, 1] = -self.tensor[:, 1]
33 |         elif bev_direction == "vertical":
34 |             self.tensor[:, 0] = -self.tensor[:, 0]
35 | 
36 |     def in_range_bev(self, point_range):
37 |         """Check whether the points are in the given range.
38 | 
39 |         Args:
40 |             point_range (list | torch.Tensor): The range of point
41 |                 in order of (x_min, y_min, x_max, y_max).
42 | 
43 |         Returns:
44 |             torch.Tensor: Indicating whether each point is inside \
45 |                 the reference range.
46 |         """
47 |         in_range_flags = (
48 |             (self.tensor[:, 0] > point_range[0])
49 |             & (self.tensor[:, 1] > point_range[1])
50 |             & (self.tensor[:, 0] < point_range[2])
51 |             & (self.tensor[:, 1] < point_range[3])
52 |         )
53 |         return in_range_flags
54 | 
55 |     def convert_to(self, dst, rt_mat=None):
56 |         """Convert self to ``dst`` mode.
57 | 
58 |         Args:
59 |             dst (:obj:`CoordMode`): The target Point mode.
60 |             rt_mat (np.ndarray | torch.Tensor): The rotation and translation
61 |                 matrix between different coordinates. Defaults to None.
62 |                 The conversion from `src` coordinates to `dst` coordinates
63 |                 usually comes along the change of sensors, e.g., from camera
64 |                 to LiDAR. This requires a transformation matrix.
65 | 
66 |         Returns:
67 |             :obj:`BasePoints`: The converted point of the same type \
68 |                 in the `dst` mode.
69 |         """
70 |         from mmdet3d.core.bbox import Coord3DMode
71 | 
72 |         return Coord3DMode.convert_point(point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
73 | 


--------------------------------------------------------------------------------
/tools/debug_train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import copy
 3 | import os
 4 | import random
 5 | import time
 6 | 
 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "2"
 8 | 
 9 | import socket
10 | 
11 | import numpy as np
12 | import torch
13 | from mmcv import Config
14 | from torchpack import distributed as dist
15 | from torchpack.environ import auto_set_run_dir, set_run_dir
16 | from torchpack.utils.config import configs
17 | 
18 | from mmdet3d.apis import train_model
19 | from mmdet3d.datasets import build_dataset
20 | from mmdet3d.models import build_model
21 | from mmdet3d.utils import convert_sync_batchnorm, get_root_logger, recursive_eval
22 | 
23 | 
24 | def get_free_tcp_port() -> int:
25 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as tcp:
26 |         tcp.bind(("0.0.0.0", 0))
27 |         port = tcp.getsockname()[1]
28 |     return port
29 | 
30 | 
31 | def main():
32 |     master_host = f"localhost:{get_free_tcp_port()}"
33 |     dist.init(master_host=master_host)
34 | 
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument("config", metavar="FILE", help="config file")
37 |     parser.add_argument("--run-dir", metavar="DIR", help="run directory")
38 |     args, opts = parser.parse_known_args()
39 | 
40 |     configs.load(args.config, recursive=True)
41 |     configs.update(opts)
42 | 
43 |     cfg = Config(recursive_eval(configs), filename=args.config)
44 | 
45 |     torch.backends.cudnn.benchmark = cfg.cudnn_benchmark
46 |     torch.cuda.set_device(dist.local_rank())
47 | 
48 |     if args.run_dir is None:
49 |         args.run_dir = auto_set_run_dir()
50 |     else:
51 |         set_run_dir(args.run_dir)
52 |     cfg.run_dir = args.run_dir
53 | 
54 |     # dump config
55 |     cfg.dump(os.path.join(cfg.run_dir, "configs.yaml"))
56 | 
57 |     # init the logger before other steps
58 |     timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime())
59 |     log_file = os.path.join(cfg.run_dir, f"{timestamp}.log")
60 |     logger = get_root_logger(log_file=log_file)
61 | 
62 |     # log some basic info
63 |     logger.info(f"Config:\n{cfg.pretty_text}")
64 | 
65 |     # set random seeds
66 |     if cfg.seed is not None:
67 |         logger.info(f"Set random seed to {cfg.seed}, " f"deterministic mode: {cfg.deterministic}")
68 |         random.seed(cfg.seed)
69 |         np.random.seed(cfg.seed)
70 |         torch.manual_seed(cfg.seed)
71 |         if cfg.deterministic:
72 |             torch.backends.cudnn.deterministic = True
73 |             torch.backends.cudnn.benchmark = False
74 | 
75 |     datasets = [build_dataset(cfg.data.train)]
76 | 
77 |     model = build_model(cfg.model)
78 |     model.init_weights()
79 |     if cfg.get("sync_bn", None):
80 |         if not isinstance(cfg["sync_bn"], dict):
81 |             cfg["sync_bn"] = dict(exclude=[])
82 |         model = convert_sync_batchnorm(model, exclude=cfg["sync_bn"]["exclude"])
83 | 
84 |     logger.info(f"Model:\n{model}")
85 |     train_model(
86 |         model,
87 |         datasets,
88 |         cfg,
89 |         distributed=True,
90 |         validate=True,
91 |         timestamp=timestamp,
92 |     )
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/configs/osdar23/baseline/transfusion/default.yaml:
--------------------------------------------------------------------------------
  1 | model:
  2 |   decoder:
  3 |     backbone:
  4 |       type: SECOND
  5 |       in_channels: 256
  6 |       out_channels: [128, 256]
  7 |       layer_nums: [5, 5]
  8 |       layer_strides: [1, 2]
  9 |       norm_cfg:
 10 |         type: BN
 11 |         eps: 1.0e-3
 12 |         momentum: 0.01
 13 |       conv_cfg:
 14 |         type: Conv2d
 15 |         bias: false
 16 |     neck:
 17 |       type: SECONDFPN
 18 |       in_channels: [128, 256]
 19 |       out_channels: [256, 256]
 20 |       upsample_strides: [1, 2]
 21 |       norm_cfg:
 22 |         type: BN
 23 |         eps: 1.0e-3
 24 |         momentum: 0.01
 25 |       upsample_cfg:
 26 |         type: deconv
 27 |         bias: false
 28 |       use_conv_for_no_stride: true
 29 |   heads:
 30 |     object:
 31 |       type: TransFusionHead
 32 |       num_proposals: 200
 33 |       auxiliary: true
 34 |       in_channels: 512
 35 |       hidden_channel: 128
 36 |       num_classes: ${no_classes}
 37 |       num_decoder_layers: 1
 38 |       num_heads: 8
 39 |       nms_kernel_size: 3
 40 |       ffn_channel: 256
 41 |       dropout: 0.1
 42 |       bn_momentum: 0.1
 43 |       activation: relu
 44 |       train_cfg:
 45 |         dataset: OSDAR23
 46 |         point_cloud_range: ${point_cloud_range}
 47 |         grid_size: ${grid_size}
 48 |         voxel_size: ${voxel_size}
 49 |         out_size_factor: ${out_size_factor}
 50 |         gaussian_overlap: 0.1
 51 |         min_radius: 2
 52 |         pos_weight: -1
 53 |         code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 54 |         assigner:
 55 |           type: HungarianAssigner3D
 56 |           iou_calculator:
 57 |             type: BboxOverlaps3D
 58 |             coordinate: lidar
 59 |           cls_cost:
 60 |             type: FocalLossCost
 61 |             gamma: 2.0
 62 |             alpha: 0.25
 63 |             weight: 0.15
 64 |           reg_cost:
 65 |             type: BBoxBEVL1Cost
 66 |             weight: 0.25
 67 |           iou_cost:
 68 |             type: IoU3DCost
 69 |             weight: 0.25
 70 |       test_cfg:
 71 |         dataset: OSDAR23
 72 |         grid_size: ${grid_size}
 73 |         out_size_factor: ${out_size_factor}
 74 |         voxel_size: ${voxel_size[:2]}
 75 |         pc_range: ${point_cloud_range[:2]}
 76 |         nms_type: null
 77 |       common_heads:
 78 |         center: [2, 2]
 79 |         height: [1, 2]
 80 |         dim: [3, 2]
 81 |         rot: [2, 2]
 82 |       bbox_coder:
 83 |         type: TransFusionBBoxCoder
 84 |         pc_range: ${point_cloud_range[:2]}
 85 |         post_center_range: ${post_center_range}
 86 |         score_threshold: ${score_threshold}
 87 |         out_size_factor: ${out_size_factor}
 88 |         voxel_size: ${voxel_size[:2]}
 89 |         code_size: 8
 90 |       loss_cls:
 91 |         type: FocalLoss
 92 |         use_sigmoid: true
 93 |         gamma: 2.0
 94 |         alpha: 0.25
 95 |         reduction: mean
 96 |         loss_weight: 1.0
 97 |       loss_heatmap:
 98 |         type: GaussianFocalLoss
 99 |         reduction: mean
100 |         loss_weight: 1.0
101 |       loss_bbox:
102 |         type: L1Loss
103 |         reduction: mean
104 |         loss_weight: 0.25
105 | 


--------------------------------------------------------------------------------
/configs/osdar23/temporal-gru/transfusion/default.yaml:
--------------------------------------------------------------------------------
  1 | model:
  2 |   decoder:
  3 |     backbone:
  4 |       type: SECOND
  5 |       in_channels: 256
  6 |       out_channels: [128, 256]
  7 |       layer_nums: [5, 5]
  8 |       layer_strides: [1, 2]
  9 |       norm_cfg:
 10 |         type: BN
 11 |         eps: 1.0e-3
 12 |         momentum: 0.01
 13 |       conv_cfg:
 14 |         type: Conv2d
 15 |         bias: false
 16 |     neck:
 17 |       type: SECONDFPN
 18 |       in_channels: [128, 256]
 19 |       out_channels: [256, 256]
 20 |       upsample_strides: [1, 2]
 21 |       norm_cfg:
 22 |         type: BN
 23 |         eps: 1.0e-3
 24 |         momentum: 0.01
 25 |       upsample_cfg:
 26 |         type: deconv
 27 |         bias: false
 28 |       use_conv_for_no_stride: true
 29 |   heads:
 30 |     object:
 31 |       type: TransFusionHead
 32 |       num_proposals: 200
 33 |       auxiliary: true
 34 |       in_channels: 512
 35 |       hidden_channel: 128
 36 |       num_classes: ${no_classes}
 37 |       num_decoder_layers: 1
 38 |       num_heads: 8
 39 |       nms_kernel_size: 3
 40 |       ffn_channel: 256
 41 |       dropout: 0.1
 42 |       bn_momentum: 0.1
 43 |       activation: relu
 44 |       train_cfg:
 45 |         dataset: OSDAR23
 46 |         point_cloud_range: ${point_cloud_range}
 47 |         grid_size: ${grid_size}
 48 |         voxel_size: ${voxel_size}
 49 |         out_size_factor: ${out_size_factor}
 50 |         gaussian_overlap: 0.1
 51 |         min_radius: 2
 52 |         pos_weight: -1
 53 |         code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 54 |         assigner:
 55 |           type: HungarianAssigner3D
 56 |           iou_calculator:
 57 |             type: BboxOverlaps3D
 58 |             coordinate: lidar
 59 |           cls_cost:
 60 |             type: FocalLossCost
 61 |             gamma: 2.0
 62 |             alpha: 0.25
 63 |             weight: 0.15
 64 |           reg_cost:
 65 |             type: BBoxBEVL1Cost
 66 |             weight: 0.25
 67 |           iou_cost:
 68 |             type: IoU3DCost
 69 |             weight: 0.25
 70 |       test_cfg:
 71 |         dataset: OSDAR23
 72 |         grid_size: ${grid_size}
 73 |         out_size_factor: ${out_size_factor}
 74 |         voxel_size: ${voxel_size[:2]}
 75 |         pc_range: ${point_cloud_range[:2]}
 76 |         nms_type: null
 77 |       common_heads:
 78 |         center: [2, 2]
 79 |         height: [1, 2]
 80 |         dim: [3, 2]
 81 |         rot: [2, 2]
 82 |       bbox_coder:
 83 |         type: TransFusionBBoxCoder
 84 |         pc_range: ${point_cloud_range[:2]}
 85 |         post_center_range: ${post_center_range}
 86 |         score_threshold: ${score_threshold}
 87 |         out_size_factor: ${out_size_factor}
 88 |         voxel_size: ${voxel_size[:2]}
 89 |         code_size: 8
 90 |       loss_cls:
 91 |         type: FocalLoss
 92 |         use_sigmoid: true
 93 |         gamma: 2.0
 94 |         alpha: 0.25
 95 |         reduction: mean
 96 |         loss_weight: 1.0
 97 |       loss_heatmap:
 98 |         type: GaussianFocalLoss
 99 |         reduction: mean
100 |         loss_weight: 1.0
101 |       loss_bbox:
102 |         type: L1Loss
103 |         reduction: mean
104 |         loss_weight: 0.25
105 | 


--------------------------------------------------------------------------------