├── projects
    ├── __init__.py
    ├── mmdet3d_plugin
    │   ├── core
    │   │   ├── bbox
    │   │   │   ├── coders
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── nms_free_coder.py
    │   │   │   ├── assigners
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── hungarian_assigner_3d.py
    │   │   │   ├── match_costs
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── match_cost.py
    │   │   │   ├── util.py
    │   │   │   └── bbox_merging.py
    │   │   └── merge_all_augs.py
    │   ├── models
    │   │   ├── pts_encoder
    │   │   │   ├── __init__.py
    │   │   │   └── sparse_encoder_hd.py
    │   │   ├── backbones
    │   │   │   ├── __init__.py
    │   │   │   └── second_3d.py
    │   │   ├── detectors
    │   │   │   └── __init__.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   └── rdiouloss.py
    │   │   ├── necks
    │   │   │   ├── __init__.py
    │   │   │   └── second3d_fpn.py
    │   │   ├── dense_heads
    │   │   │   └── __init__.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   └── grid_mask.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── pipelines
    │   │   │   ├── __init__.py
    │   │   │   ├── formatting.py
    │   │   │   ├── test_time_aug.py
    │   │   │   └── loading_3d.py
    │   │   └── sunrgbd_dataset_ov.py
    │   └── __init__.py
    └── configs
    │   ├── uni3detr
    │       ├── uni3detr_scannet.py
    │       ├── uni3detr_scannet_large.py
    │       ├── uni3detr_sunrgbd.py
    │       └── uni3detr_kitti_3classes.py
    │   └── ov_uni3detr
    │       ├── ov_uni3detr_sunrgbd_rgb.py
    │       └── ov_uni3detr_sunrgbd_pc.py
├── docs
    ├── uni3detr.png
    └── ovuni3detr.png
├── requirements.txt
├── MANIFEST.in
├── extra_tools
    ├── dist_train.sh
    ├── dist_test.sh
    ├── create_data.sh
    ├── eval_metric.py
    ├── create_data.py
    ├── analysis_tools
    │   └── eval_metric.py
    ├── get_flops.py
    ├── test.py
    ├── train.py
    └── data_converter
    │   └── create_unified_gt_database.py
├── setup.cfg
├── model-index.yml
├── .gitignore
├── README.md
└── setup.py


/projects/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/uni3detr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhenyuw16/Uni3DETR/HEAD/docs/uni3detr.png


--------------------------------------------------------------------------------
/docs/ovuni3detr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhenyuw16/Uni3DETR/HEAD/docs/ovuni3detr.png


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder
2 | 
3 | __all__ = ['NMSFreeCoder']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/pts_encoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .sparse_encoder_hd import SparseEncoderHD
2 | 
3 | __all__ = ['SparseEncoderHD']


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/optional.txt
3 | -r requirements/runtime.txt
4 | -r requirements/tests.txt
5 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 | 
3 | __all__ = ['HungarianAssigner3D']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vovnet import VoVNet
2 | from .second_3d import SECOND3D
3 | 
4 | __all__ = ['VoVNet', 'SECOND3D']


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .uni3detr import Uni3DETR
2 | from .ov_uni3detr import OV_Uni3DETR
3 | 
4 | __all__ = ['Uni3DETR', 'OV_Uni3DETR']
5 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .rdiouloss import RDIoULoss, IoU3DLoss, SoftFocalLoss
2 | 
3 | __all__ = ['RDIoULoss', 'IoU3DLoss', 'SoftFocalLoss']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .second3d_fpn import SECOND3DFPN
3 | 
4 | __all__ = ['SECOND3DFPN']
5 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .uni3detr_head import Uni3DETRHead
2 | from .uni3detr_head_clip import Uni3DETRHeadCLIP
3 | 
4 | __all__ = ['Uni3DETRHead', 'Uni3DETRHeadCLIP']


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .nuscenes_dataset import NuScenesSweepDataset
2 | from .sunrgbd_dataset_ov import SUNRGBDDataset_OV
3 | 
4 | __all__ = [
5 |     'NuScenesSweepDataset', 'SUNRGBDDataset_OV'
6 | ]
7 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include mmdet3d/.mim/model-index.yml
2 | include requirements/*.txt
3 | recursive-include mmdet3d/.mim/ops *.cpp *.cu *.h *.cc
4 | recursive-include mmdet3d/.mim/configs *.py *.yml
5 | recursive-include mmdet3d/.mim/tools *.sh *.py
6 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .uni3detr_transformer import Uni3DETRTransformer, Uni3DETRTransformerDecoder, UniCrossAtten
2 | from .uni3d_viewtrans import Uni3DViewTrans
3 | 
4 | __all__ = ['Uni3DETRTransformer', 'Uni3DETRTransformerDecoder', 'UniCrossAtten', 'Uni3DViewTrans']
5 | 


--------------------------------------------------------------------------------
/extra_tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29501}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/extra_tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29503}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost, RotatedIoU3DCost, AxisAlignedIoU3DCost, RDIoUCost, SoftFocalLossCost
3 | 
4 | __all__ = ['build_match_cost', 'BBox3DL1Cost', 'RotatedIoU3DCost', 'AxisAlignedIoU3DCost', 'RDIoUCost', 'SoftFocalLossCost']


--------------------------------------------------------------------------------
/extra_tools/create_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | export PYTHONPATH=`pwd`:$PYTHONPATH
 5 | 
 6 | PARTITION=$1
 7 | JOB_NAME=$2
 8 | CONFIG=$3
 9 | WORK_DIR=$4
10 | GPUS=${GPUS:-1}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-1}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | JOB_NAME=create_data
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --kill-on-bad-exit=1 \
21 |     ${SRUN_ARGS} \
22 |     python3 -u tools/create_data.py kitti \
23 |             --root-path ./data/kitti \
24 |             --out-dir ./data/kitti \
25 |             --extra-tag kitti
26 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [yapf]
 2 | BASED_ON_STYLE = pep8
 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
 5 | 
 6 | [isort]
 7 | line_length = 79
 8 | multi_line_output = 0
 9 | extra_standard_library = setuptools
10 | known_first_party = mmdet,mmseg,mmdet3d
11 | known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,pytorch_sphinx_theme,recommonmark,requests,scannet_utils,scipy,seaborn,shapely,skimage,sphinx,tensorflow,terminaltables,torch,trimesh,ts,waymo_open_dataset
12 | no_lines_before = STDLIB,LOCALFOLDER
13 | default_section = THIRDPARTY
14 | 
15 | [codespell]
16 | ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD,warmup
17 | 


--------------------------------------------------------------------------------
/model-index.yml:
--------------------------------------------------------------------------------
 1 | Import:
 2 |   - configs/3dssd/metafile.yml
 3 |   - configs/centerpoint/metafile.yml
 4 |   - configs/dynamic_voxelization/metafile.yml
 5 |   - configs/fcaf3d/metafile.yml
 6 |   - configs/fcos3d/metafile.yml
 7 |   - configs/free_anchor/metafile.yml
 8 |   - configs/groupfree3d/metafile.yml
 9 |   - configs/h3dnet/metafile.yml
10 |   - configs/imvotenet/metafile.yml
11 |   - configs/imvoxelnet/metafile.yml
12 |   - configs/mvxnet/metafile.yml
13 |   - configs/nuimages/metafile.yml
14 |   - configs/parta2/metafile.yml
15 |   - configs/pgd/metafile.yml
16 |   - configs/pointnet2/metafile.yml
17 |   - configs/pointpillars/metafile.yml
18 |   - configs/regnet/metafile.yml
19 |   - configs/second/metafile.yml
20 |   - configs/smoke/metafile.yml
21 |   - configs/ssn/metafile.yml
22 |   - configs/votenet/metafile.yml
23 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
 3 | from .core.bbox.match_costs import BBox3DL1Cost
 4 | from .datasets import NuScenesSweepDataset
 5 | from .datasets.pipelines import (
 6 |   PhotoMetricDistortionMultiViewImage, PadMultiViewImage, NormalizeMultiviewImage, 
 7 |   RandomScaleImageMultiViewImage, ImageRandomResizeCropFlip)
 8 | from .models.backbones.vovnet import VoVNet
 9 | from .models.detectors import Uni3DETR
10 | from .models.dense_heads import Uni3DETRHead
11 | from .models.pts_encoder import SparseEncoderHD
12 | from .models.necks import SECOND3DFPN
13 | from .models.losses import RDIoULoss, IoU3DLoss, SoftFocalLoss
14 | from .models.utils import Uni3DETRTransformer, Uni3DETRTransformerDecoder


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .transform_3d import (
 2 |     PadMultiViewImage, NormalizeMultiviewImage, 
 3 |     PhotoMetricDistortionMultiViewImage,
 4 |     RandomScaleImageMultiViewImage,
 5 |     ImageRandomResizeCropFlip,
 6 |     UnifiedRandomFlip3D, UnifiedRotScaleTrans)
 7 | from .loading_3d import (LoadMultiViewMultiSweepImageFromFiles, LoadMultiViewMultiSweepImageFromFilesIndoor)
 8 | from .dbsampler import UnifiedDataBaseSampler
 9 | from .formatting import CollectUnified3D
10 | from .test_time_aug import MultiRotScaleFlipAug3D
11 | 
12 | __all__ = [
13 |     'PadMultiViewImage', 'NormalizeMultiviewImage', 
14 |     'PhotoMetricDistortionMultiViewImage', 'LoadMultiViewMultiSweepImageFromFilesIndoor',
15 |     'RandomScaleImageMultiViewImage', 'ImageRandomResizeCropFlip',
16 |     'LoadMultiViewMultiSweepImageFromFiles',
17 |     'UnifiedRandomFlip3D', 'UnifiedRotScaleTrans', 'UnifiedDataBaseSampler',
18 |     'MultiRotScaleFlipAug3D'
19 | ]


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.ipynb
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # Environments
 83 | .env
 84 | .venv
 85 | env/
 86 | venv/
 87 | ENV/
 88 | env.bak/
 89 | venv.bak/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # cython generated cpp
105 | data
106 | .vscode
107 | .idea
108 | 
109 | # custom
110 | *.pkl
111 | *.pkl.json
112 | *.log.json
113 | work_dirs/
114 | exps/
115 | *~
116 | mmdet3d/.mim
117 | 
118 | # Pytorch
119 | *.pth
120 | 
121 | # demo
122 | data/
123 | *.obj
124 | *.ply
125 | 


--------------------------------------------------------------------------------
/extra_tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os
 4 | import mmcv
 5 | from mmcv import Config, DictAction
 6 | 
 7 | from mmdet3d.datasets import build_dataset
 8 | from mmdet.utils import update_data_root
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
13 |                                      'results saved in pkl format')
14 |     parser.add_argument('config', help='Config of the model')
15 |     parser.add_argument('pkl_results', help='Results in pickle format')
16 |     parser.add_argument(
17 |         '--format-only',
18 |         action='store_true',
19 |         help='Format the output results without perform evaluation. It is'
20 |         'useful when you want to format the result to a specific format and '
21 |         'submit it to the test server')
22 |     parser.add_argument(
23 |         '--eval',
24 |         type=str,
25 |         nargs='+',
26 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
27 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
28 |     parser.add_argument(
29 |         '--cfg-options',
30 |         nargs='+',
31 |         action=DictAction,
32 |         help='override some settings in the used config, the key-value pair '
33 |         'in xxx=yyy format will be merged into config file. If the value to '
34 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
35 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
36 |         'Note that the quotation marks are necessary and that no white space '
37 |         'is allowed.')
38 |     parser.add_argument(
39 |         '--eval-options',
40 |         nargs='+',
41 |         action=DictAction,
42 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
43 |         'format will be kwargs for dataset.evaluate() function')
44 |     args = parser.parse_args()
45 |     return args
46 | 
47 | 
48 | def main():
49 |     args = parse_args()
50 | 
51 |     cfg = Config.fromfile(args.config)
52 | 
53 |     # update data root according to MMDET_DATASETS
54 |     update_data_root(cfg)
55 |     
56 |     import importlib
57 |     if hasattr(cfg, 'plugin_dir'):
58 |         plugin_dir = cfg.plugin_dir
59 |         _module_dir = os.path.dirname(plugin_dir)
60 |         _module_dir = _module_dir.split('/')
61 |         _module_path = _module_dir[0]
62 |         
63 |         for m in _module_dir[1:]:
64 |             _module_path = _module_path + '.' + m
65 |         print(_module_path)
66 |         plg_lib = importlib.import_module(_module_path)
67 | 
68 |     assert args.eval or args.format_only, (
69 |         'Please specify at least one operation (eval/format the results) with '
70 |         'the argument "--eval", "--format-only"')
71 |     if args.eval and args.format_only:
72 |         raise ValueError('--eval and --format_only cannot be both specified')
73 | 
74 |     if args.cfg_options is not None:
75 |         cfg.merge_from_dict(args.cfg_options)
76 |     cfg.data.test.test_mode = True
77 | 
78 |     dataset = build_dataset(cfg.data.test)
79 |     outputs = mmcv.load(args.pkl_results)
80 | 
81 |     kwargs = {} if args.eval_options is None else args.eval_options
82 |     if args.format_only:
83 |         dataset.format_results(outputs, **kwargs)
84 |     if args.eval:
85 |         eval_kwargs = cfg.get('evaluation', {}).copy()
86 |         # hard-code way to remove EvalHook args
87 |         for key in [
88 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
89 |                 'rule'
90 |         ]:
91 |             eval_kwargs.pop(key, None)
92 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
93 |         print(dataset.evaluate(outputs, **eval_kwargs))
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/sunrgbd_dataset_ov.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import mmcv
  3 | import numpy as np
  4 | import pyquaternion
  5 | import tempfile
  6 | from nuscenes.utils.data_classes import Box as NuScenesBox
  7 | from os import path as osp
  8 | 
  9 | from ..core.indoor_eval import indoor_eval_ov
 10 | 
 11 | import mmdet3d
 12 | #from mmdet.datasets import DATASETS
 13 | from mmdet3d.datasets import DATASETS
 14 | from mmdet3d.core import show_result
 15 | from mmdet3d.core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes
 16 | from mmdet3d.datasets import SUNRGBDDataset
 17 | 
 18 | __mmdet3d_version__ = float(mmdet3d.__version__[:3])
 19 | 
 20 | @DATASETS.register_module()
 21 | class SUNRGBDDataset_OV(SUNRGBDDataset):
 22 |     
 23 |     def __init__(self,
 24 |                  data_root,
 25 |                  ann_file,
 26 |                  pipeline=None,
 27 |                  classes=None,
 28 |                  seen_classes=None,
 29 |                  modality=dict(use_camera=True, use_lidar=True),
 30 |                  box_type_3d='Depth',
 31 |                  filter_empty_gt=True,
 32 |                  test_mode=False,
 33 |                  **kwargs):
 34 |         super().__init__(
 35 |             data_root=data_root,
 36 |             ann_file=ann_file,
 37 |             pipeline=pipeline,
 38 |             classes=classes,
 39 |             modality=modality,
 40 |             box_type_3d=box_type_3d,
 41 |             filter_empty_gt=filter_empty_gt,
 42 |             test_mode=test_mode,
 43 |             **kwargs)
 44 | 
 45 |         self.seen_classes = seen_classes
 46 |         self.classes = seen_classes
 47 |     
 48 |     def evaluate(self,
 49 |                  results,
 50 |                  metric=None,
 51 |                  iou_thr=(0.25, 0.5),
 52 |                  iou_thr_2d=(0.25, 0.5),
 53 |                  logger=None,
 54 |                  show=False,
 55 |                  out_dir=None,
 56 |                  pipeline=None,
 57 |                  axis_aligned_lw=False):
 58 |         """Evaluate.
 59 | 
 60 |         Evaluation in indoor protocol.
 61 | 
 62 |         Args:
 63 |             results (list[dict]): List of results.
 64 |             metric (str | list[str], optional): Metrics to be evaluated.
 65 |                 Default: None.
 66 |             iou_thr (list[float], optional): AP IoU thresholds for 3D
 67 |                 evaluation. Default: (0.25, 0.5).
 68 |             iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
 69 |                 evaluation. Default: (0.5, ).
 70 |             show (bool, optional): Whether to visualize.
 71 |                 Default: False.
 72 |             out_dir (str, optional): Path to save the visualization results.
 73 |                 Default: None.
 74 |             pipeline (list[dict], optional): raw data loading for showing.
 75 |                 Default: None.
 76 | 
 77 |         Returns:
 78 |             dict: Evaluation results.
 79 |         """
 80 |         assert isinstance(
 81 |             results, list), f'Expect results to be list, got {type(results)}.'
 82 |         assert len(results) > 0, 'Expect length of results > 0.'
 83 |         assert len(results) == len(self.data_infos)
 84 |         assert isinstance(
 85 |             results[0], dict
 86 |         ), f'Expect elements in results to be dict, got {type(results[0])}.'
 87 |         gt_annos = [info['annos'] for info in self.data_infos]
 88 |         label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
 89 |         ret_dict = indoor_eval_ov(
 90 |             self.seen_classes,
 91 |             gt_annos,
 92 |             results,
 93 |             iou_thr,
 94 |             label2cat,
 95 |             logger=logger,
 96 |             box_type_3d=self.box_type_3d,
 97 |             box_mode_3d=self.box_mode_3d,
 98 |             axis_aligned_lw=axis_aligned_lw)
 99 |         if show:
100 |             self.show(results, out_dir, pipeline=pipeline)
101 | 
102 |         return ret_dict
103 | 


--------------------------------------------------------------------------------
/extra_tools/create_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | from os import path as osp
  4 | 
  5 | from data_converter import nuscenes_converter as nuscenes_converter
  6 | from data_converter.create_unified_gt_database import create_groundtruth_database
  7 | 
  8 | 
  9 | def nuscenes_data_prep(root_path,
 10 |                        info_prefix,
 11 |                        version,
 12 |                        dataset_name,
 13 |                        out_dir,
 14 |                        max_sweeps=10):
 15 |     """Prepare data related to nuScenes dataset.
 16 | 
 17 |     Related data consists of '.pkl' files recording basic infos,
 18 |     2D annotations and groundtruth database.
 19 | 
 20 |     Args:
 21 |         root_path (str): Path of dataset root.
 22 |         info_prefix (str): The prefix of info filenames.
 23 |         version (str): Dataset version.
 24 |         dataset_name (str): The dataset class name.
 25 |         out_dir (str): Output directory of the groundtruth database info.
 26 |         max_sweeps (int): Number of input consecutive frames. Default: 10
 27 |     """
 28 |     #nuscenes_converter.create_nuscenes_infos(
 29 |     #    root_path, info_prefix, version=version, max_sweeps=max_sweeps)
 30 | 
 31 |     if version == 'v1.0-test':
 32 | #         info_test_path = osp.join(root_path, f'{info_prefix}_infos_test.pkl')
 33 | #         nuscenes_converter.export_2d_annotation(
 34 | #             root_path, info_test_path, version=version)
 35 |         return
 36 | 
 37 | #     info_train_path = osp.join(root_path, f'{info_prefix}_infos_train.pkl')
 38 | #     info_val_path = osp.join(root_path, f'{info_prefix}_infos_val.pkl')
 39 | #     nuscenes_converter.export_2d_annotation(
 40 | #         root_path, info_train_path, version=version)
 41 | #     nuscenes_converter.export_2d_annotation(
 42 | #         root_path, info_val_path, version=version)
 43 |     create_groundtruth_database(dataset_name, root_path, info_prefix,
 44 |                                 f'{out_dir}/{info_prefix}_infos_train.pkl')
 45 | 
 46 | 
 47 | parser = argparse.ArgumentParser(description='Data converter arg parser')
 48 | parser.add_argument('dataset', metavar='nuscenes', help='name of the dataset')
 49 | parser.add_argument(
 50 |     '--root-path',
 51 |     type=str,
 52 |     default='./data/nuscenes',
 53 |     help='specify the root path of dataset')
 54 | parser.add_argument(
 55 |     '--version',
 56 |     type=str,
 57 |     default='v1.0',
 58 |     required=False,
 59 |     help='specify the dataset version, no need for nuscenes')
 60 | parser.add_argument(
 61 |     '--max-sweeps',
 62 |     type=int,
 63 |     default=10,
 64 |     required=False,
 65 |     help='specify sweeps of lidar per example')
 66 | parser.add_argument(
 67 |     '--out-dir',
 68 |     type=str,
 69 |     default='./data/nuscenes',
 70 |     required='False',
 71 |     help='name of info pkl')
 72 | parser.add_argument('--extra-tag', type=str, default='nuscenes')
 73 | parser.add_argument(
 74 |     '--workers', type=int, default=4, help='number of threads to be used')
 75 | args = parser.parse_args()
 76 | 
 77 | if __name__ == '__main__':
 78 |     if args.dataset == 'nuscenes' and args.version != 'v1.0-mini':
 79 |         train_version = f'{args.version}-trainval'
 80 |         nuscenes_data_prep(
 81 |             root_path=args.root_path,
 82 |             info_prefix=args.extra_tag,
 83 |             version=train_version,
 84 |             dataset_name='NuScenesSweepDataset',
 85 |             out_dir=args.out_dir,
 86 |             max_sweeps=args.max_sweeps)
 87 |         test_version = f'{args.version}-test'
 88 |         nuscenes_data_prep(
 89 |             root_path=args.root_path,
 90 |             info_prefix=args.extra_tag,
 91 |             version=test_version,
 92 |             dataset_name='NuScenesSweepDataset',
 93 |             out_dir=args.out_dir,
 94 |             max_sweeps=args.max_sweeps)
 95 |     elif args.dataset == 'nuscenes' and args.version == 'v1.0-mini':
 96 |         train_version = f'{args.version}'
 97 |         nuscenes_data_prep(
 98 |             root_path=args.root_path,
 99 |             info_prefix=args.extra_tag,
100 |             version=train_version,
101 |             dataset_name='NuScenesSweepDataset',
102 |             out_dir=args.out_dir,
103 |             max_sweeps=args.max_sweeps)
104 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/merge_all_augs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | #from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
 5 | from mmdet3d.core.bbox import bbox3d2result, xywhr2xyxyr
 6 | from .bbox.util import bbox3d_mapping_back
 7 | from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
 8 | 
 9 | def merge_all_aug_bboxes_3d(aug_results, img_metas, test_cfg):
10 |     """Merge augmented detection 3D bboxes and scores.
11 | 
12 |     Args:
13 |         aug_results (list[dict]): The dict of detection results.
14 |             The dict contains the following keys
15 | 
16 |             - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
17 |             - scores_3d (torch.Tensor): Detection scores.
18 |             - labels_3d (torch.Tensor): Predicted box labels.
19 |         img_metas (list[dict]): Meta information of each sample.
20 |         test_cfg (dict): Test config.
21 | 
22 |     Returns:
23 |         dict: Bounding boxes results in cpu mode, containing merged results.
24 | 
25 |             - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.
26 |             - scores_3d (torch.Tensor): Merged detection scores.
27 |             - labels_3d (torch.Tensor): Merged predicted box labels.
28 |     """
29 | 
30 |     assert len(aug_results) == len(img_metas), \
31 |         '"aug_results" should have the same length as "img_metas", got len(' \
32 |         f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'
33 | 
34 |     recovered_bboxes = []
35 |     recovered_scores = []
36 |     recovered_labels = []
37 | 
38 |     for bboxes, img_info in zip(aug_results, img_metas):
39 |         scale_factor = img_info[0]['pcd_scale_factor']
40 |         # print(bboxes)
41 |         rotate_degree = img_info[0].get('rot_degree', torch.tensor(0., device=bboxes['scores_3d'].device)) #img_info[0]['rot_degree']
42 |         pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip']
43 |         pcd_vertical_flip = img_info[0]['pcd_vertical_flip']
44 |         # print(bboxes)
45 |         recovered_scores.append(bboxes['scores_3d'])
46 |         recovered_labels.append(bboxes['labels_3d'])
47 |         bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], rotate_degree, scale_factor,  #boxes_3d
48 |                                      pcd_horizontal_flip, pcd_vertical_flip)
49 |         recovered_bboxes.append(bboxes)
50 | 
51 |     aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes)
52 |     aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev)
53 |     aug_scores = torch.cat(recovered_scores, dim=0)
54 |     aug_labels = torch.cat(recovered_labels, dim=0)
55 | 
56 |     # TODO: use a more elegent way to deal with nms
57 |     if True: #test_cfg.use_rotate_nms:
58 |         nms_func = nms_bev #nms_gpu
59 |     else:
60 |         nms_func = nms_normal_gpu
61 | 
62 |     merged_bboxes = []
63 |     merged_scores = []
64 |     merged_labels = []
65 | 
66 |     # Apply multi-class nms when merge bboxes
67 |     if len(aug_labels) == 0:
68 |         return bbox3d2result(aug_bboxes, aug_scores, aug_labels)
69 | 
70 |     for class_id in range(int(torch.max(aug_labels).item() + 1)):
71 |         # print(aug_labels)
72 |         class_inds = (aug_labels == class_id)
73 |         bboxes_i = aug_bboxes[class_inds]
74 |         bboxes_nms_i = aug_bboxes_for_nms[class_inds, :]
75 |         scores_i = aug_scores[class_inds]
76 |         labels_i = aug_labels[class_inds]
77 |         if len(bboxes_nms_i) == 0:
78 |             continue
79 |         selected = nms_func(bboxes_nms_i, scores_i, 0.1) #test_cfg.nms_thr)
80 |         # print('bbb', selected)
81 |         merged_bboxes.append(bboxes_i[selected, :])
82 |         merged_scores.append(scores_i[selected])
83 |         merged_labels.append(labels_i[selected])
84 | 
85 |     # print(merged_bboxes)
86 |     merged_bboxes = merged_bboxes[0].cat(merged_bboxes)
87 |     merged_scores = torch.cat(merged_scores, dim=0)
88 |     merged_labels = torch.cat(merged_labels, dim=0)
89 | 
90 |     _, order = merged_scores.sort(0, descending=True)
91 |     num = min(500, len(aug_bboxes)) # min(test_cfg.max_num, len(aug_bboxes))
92 |     order = order[:num]
93 | 
94 |     merged_bboxes = merged_bboxes[order]
95 |     merged_scores = merged_scores[order]
96 |     merged_labels = merged_labels[order]
97 | 
98 |     return bbox3d2result(merged_bboxes, merged_scores, merged_labels)
99 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/grid_mask.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from PIL import Image
  5 | 
  6 | class Grid(object):
  7 |     def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
  8 |         self.use_h = use_h
  9 |         self.use_w = use_w
 10 |         self.rotate = rotate
 11 |         self.offset = offset
 12 |         self.ratio = ratio
 13 |         self.mode=mode
 14 |         self.st_prob = prob
 15 |         self.prob = prob
 16 | 
 17 |     def set_prob(self, epoch, max_epoch):
 18 |         self.prob = self.st_prob * epoch / max_epoch
 19 | 
 20 |     def __call__(self, img, label):
 21 |         if np.random.rand() > self.prob:
 22 |             return img, label
 23 |         h = img.size(1)
 24 |         w = img.size(2)
 25 |         self.d1 = 2
 26 |         self.d2 = min(h, w)
 27 |         hh = int(1.5*h)
 28 |         ww = int(1.5*w)
 29 |         d = np.random.randint(self.d1, self.d2)
 30 |         if self.ratio == 1:
 31 |             self.l = np.random.randint(1, d)
 32 |         else:
 33 |             self.l = min(max(int(d*self.ratio+0.5),1),d-1)
 34 |         mask = np.ones((hh, ww), np.float32)
 35 |         st_h = np.random.randint(d)
 36 |         st_w = np.random.randint(d)
 37 |         if self.use_h:
 38 |             for i in range(hh//d):
 39 |                 s = d*i + st_h
 40 |                 t = min(s+self.l, hh)
 41 |                 mask[s:t,:] *= 0
 42 |         if self.use_w:
 43 |             for i in range(ww//d):
 44 |                 s = d*i + st_w
 45 |                 t = min(s+self.l, ww)
 46 |                 mask[:,s:t] *= 0
 47 |        
 48 |         r = np.random.randint(self.rotate)
 49 |         mask = Image.fromarray(np.uint8(mask))
 50 |         mask = mask.rotate(r)
 51 |         mask = np.asarray(mask)
 52 |         mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
 53 | 
 54 |         mask = torch.from_numpy(mask).float()
 55 |         if self.mode == 1:
 56 |             mask = 1-mask
 57 | 
 58 |         mask = mask.expand_as(img)
 59 |         if self.offset:
 60 |             offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
 61 |             offset = (1 - mask) * offset
 62 |             img = img * mask + offset
 63 |         else:
 64 |             img = img * mask 
 65 | 
 66 |         return img, label
 67 | 
 68 | 
 69 | class GridMask(nn.Module):
 70 |     def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
 71 |         super(GridMask, self).__init__()
 72 |         self.use_h = use_h
 73 |         self.use_w = use_w
 74 |         self.rotate = rotate
 75 |         self.offset = offset
 76 |         self.ratio = ratio
 77 |         self.mode = mode
 78 |         self.st_prob = prob
 79 |         self.prob = prob
 80 | 
 81 |     def set_prob(self, epoch, max_epoch):
 82 |         self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
 83 | 
 84 |     def forward(self, x):
 85 |         if np.random.rand() > self.prob or not self.training:
 86 |             return x
 87 |         n,c,h,w = x.size()
 88 |         x = x.view(-1,h,w)
 89 |         hh = int(1.5*h)
 90 |         ww = int(1.5*w)
 91 |         d = np.random.randint(2, h)
 92 |         self.l = min(max(int(d*self.ratio+0.5),1),d-1)
 93 |         mask = np.ones((hh, ww), np.float32)
 94 |         st_h = np.random.randint(d)
 95 |         st_w = np.random.randint(d)
 96 |         if self.use_h:
 97 |             for i in range(hh//d):
 98 |                 s = d*i + st_h
 99 |                 t = min(s+self.l, hh)
100 |                 mask[s:t,:] *= 0
101 |         if self.use_w:
102 |             for i in range(ww//d):
103 |                 s = d*i + st_w
104 |                 t = min(s+self.l, ww)
105 |                 mask[:,s:t] *= 0
106 |        
107 |         r = np.random.randint(self.rotate)
108 |         mask = Image.fromarray(np.uint8(mask))
109 |         mask = mask.rotate(r)
110 |         mask = np.asarray(mask)
111 |         mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
112 | 
113 |         mask = torch.from_numpy(mask).float().cuda()
114 |         if self.mode == 1:
115 |             mask = 1-mask
116 |         mask = mask.expand_as(x)
117 |         if self.offset:
118 |             offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float().cuda()
119 |             x = x * mask + offset * (1 - mask)
120 |         else:
121 |             x = x * mask 
122 | 
123 |         return x.view(n,c,h,w)


--------------------------------------------------------------------------------
/extra_tools/analysis_tools/eval_metric.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | import mmcv
  5 | from mmcv import Config, DictAction
  6 | 
  7 | from mmdet3d.datasets import build_dataset
  8 | from mmdet.utils import update_data_root
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
 13 |                                      'results saved in pkl format')
 14 |     parser.add_argument('config', help='Config of the model')
 15 |     parser.add_argument('pkl_results', help='Results in pickle format')
 16 |     parser.add_argument(
 17 |         '--format-only',
 18 |         action='store_true',
 19 |         help='Format the output results without perform evaluation. It is'
 20 |         'useful when you want to format the result to a specific format and '
 21 |         'submit it to the test server')
 22 |     parser.add_argument(
 23 |         '--eval',
 24 |         type=str,
 25 |         nargs='+',
 26 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
 27 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
 28 |     parser.add_argument(
 29 |         '--cfg-options',
 30 |         nargs='+',
 31 |         action=DictAction,
 32 |         help='override some settings in the used config, the key-value pair '
 33 |         'in xxx=yyy format will be merged into config file. If the value to '
 34 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 35 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 36 |         'Note that the quotation marks are necessary and that no white space '
 37 |         'is allowed.')
 38 |     parser.add_argument(
 39 |         '--eval-options',
 40 |         nargs='+',
 41 |         action=DictAction,
 42 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 43 |         'format will be kwargs for dataset.evaluate() function')
 44 |     args = parser.parse_args()
 45 |     return args
 46 | 
 47 | 
 48 | def main():
 49 |     args = parse_args()
 50 | 
 51 |     cfg = Config.fromfile(args.config)
 52 | 
 53 |     # update data root according to MMDET_DATASETS
 54 |     update_data_root(cfg)
 55 | 
 56 |     # import modules from plguin/xx, registry will be updated
 57 |     if hasattr(cfg, 'plugin'):
 58 |         if cfg.plugin:
 59 |             import importlib
 60 |             if hasattr(cfg, 'plugin_dir'):
 61 |                 plugin_dir = cfg.plugin_dir
 62 |                 _module_dir = os.path.dirname(plugin_dir)
 63 |                 _module_dir = _module_dir.split('/')
 64 |                 _module_path = _module_dir[0]
 65 | 
 66 |                 for m in _module_dir[1:]:
 67 |                     _module_path = _module_path + '.' + m
 68 |                 print(_module_path)
 69 |                 plg_lib = importlib.import_module(_module_path)
 70 |             else:
 71 |                 # import dir is the dirpath for the config file
 72 |                 _module_dir = os.path.dirname(args.config)
 73 |                 _module_dir = _module_dir.split('/')
 74 |                 _module_path = _module_dir[0]
 75 |                 for m in _module_dir[1:]:
 76 |                     _module_path = _module_path + '.' + m
 77 |                 print(_module_path)
 78 |                 plg_lib = importlib.import_module(_module_path)
 79 | 
 80 |     assert args.eval or args.format_only, (
 81 |         'Please specify at least one operation (eval/format the results) with '
 82 |         'the argument "--eval", "--format-only"')
 83 |     if args.eval and args.format_only:
 84 |         raise ValueError('--eval and --format_only cannot be both specified')
 85 | 
 86 |     if args.cfg_options is not None:
 87 |         cfg.merge_from_dict(args.cfg_options)
 88 |     cfg.data.test.test_mode = True
 89 | 
 90 |     dataset = build_dataset(cfg.data.test)
 91 |     outputs = mmcv.load(args.pkl_results)
 92 | 
 93 |     kwargs = {} if args.eval_options is None else args.eval_options
 94 |     if args.format_only:
 95 |         dataset.format_results(outputs, **kwargs)
 96 |     if args.eval:
 97 |         eval_kwargs = cfg.get('evaluation', {}).copy()
 98 |         # hard-code way to remove EvalHook args
 99 |         for key in [
100 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
101 |                 'rule'
102 |         ]:
103 |             eval_kwargs.pop(key, None)
104 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
105 |         print(dataset.evaluate(outputs, **eval_kwargs))
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/formatting.py:
--------------------------------------------------------------------------------
 1 | from mmdet.datasets.builder import PIPELINES
 2 | from mmcv.parallel import DataContainer as DC
 3 | 
 4 | @PIPELINES.register_module()
 5 | class CollectUnified3D(object):
 6 |     """Collect data from the loader relevant to the specific task.
 7 | 
 8 |     This is usually the last stage of the data loader pipeline. Typically keys
 9 |     is set to some subset of "img", "proposals", "gt_bboxes",
10 |     "gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
11 | 
12 |     The "img_meta" item is always populated.  The contents of the "img_meta"
13 |     dictionary depends on "meta_keys". By default this includes:
14 | 
15 |         - 'img_shape': shape of the image input to the network as a tuple \
16 |             (h, w, c).  Note that images may be zero padded on the \
17 |             bottom/right if the batch tensor is larger than this shape.
18 |         - 'scale_factor': a float indicating the preprocessing scale
19 |         - 'flip': a boolean indicating if image flip transform was used
20 |         - 'filename': path to the image file
21 |         - 'ori_shape': original shape of the image as a tuple (h, w, c)
22 |         - 'pad_shape': image shape after padding
23 |         - 'lidar2img': transform from lidar to image
24 |         - 'depth2img': transform from depth to image
25 |         - 'cam2img': transform from camera to image
26 |         - 'pcd_horizontal_flip': a boolean indicating if point cloud is \
27 |             flipped horizontally
28 |         - 'pcd_vertical_flip': a boolean indicating if point cloud is \
29 |             flipped vertically
30 |         - 'box_mode_3d': 3D box mode
31 |         - 'box_type_3d': 3D box type
32 |         - 'img_norm_cfg': a dict of normalization information:
33 |             - mean: per channel mean subtraction
34 |             - std: per channel std divisor
35 |             - to_rgb: bool indicating if bgr was converted to rgb
36 |         - 'pcd_trans': point cloud transformations
37 |         - 'sample_idx': sample index
38 |         - 'pcd_scale_factor': point cloud scale factor
39 |         - 'pcd_rotation': rotation applied to point cloud
40 |         - 'pts_filename': path to point cloud file.
41 | 
42 |     Args:
43 |         keys (Sequence[str]): Keys of results to be collected in ``data``.
44 |         meta_keys (Sequence[str], optional): Meta keys to be converted to
45 |             ``mmcv.DataContainer`` and collected in ``data[img_metas]``.
46 |             Default: ('filename', 'ori_shape', 'img_shape', 'lidar2img',
47 |             'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip',
48 |             'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d',
49 |             'box_type_3d', 'img_norm_cfg', 'pcd_trans',
50 |             'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename')
51 |     """
52 | 
53 |     def __init__(self,
54 |                  keys,
55 |                  meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
56 |                             'depth2img', 'cam2img', 'pad_shape',
57 |                             'scale_factor', 'flip', 'pcd_horizontal_flip',
58 |                             'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d',
59 |                             'img_norm_cfg', 'pcd_trans', 'sample_idx',
60 |                             'pcd_scale_factor', 'pcd_rotation', 'pts_filename',
61 |                             'transformation_3d_flow', 'sweeps_paths', 'sweeps_ids', 
62 |                             'sweeps_time', 'uni_rot_aug', 'uni_trans_aug', 'uni_flip_aug',
63 |                             'img_rot_aug', 'img_trans_aug', 'rot_degree')):
64 |         self.keys = keys
65 |         self.meta_keys = meta_keys
66 | 
67 |     def __call__(self, results):
68 |         """Call function to collect keys in results. The keys in ``meta_keys``
69 |         will be converted to :obj:`mmcv.DataContainer`.
70 | 
71 |         Args:
72 |             results (dict): Result dict contains the data to collect.
73 | 
74 |         Returns:
75 |             dict: The result dict contains the following keys
76 |                 - keys in ``self.keys``
77 |                 - ``img_metas``
78 |         """
79 |         data = {}
80 |         img_metas = {}
81 |         for key in self.meta_keys:
82 |             if key in results:
83 |                 img_metas[key] = results[key]
84 | 
85 |         data['img_metas'] = DC(img_metas, cpu_only=True)
86 |         for key in self.keys:
87 |             data[key] = results[key]
88 | 
89 |         return data
90 | 
91 |     def __repr__(self):
92 |         """str: Return a string that describes the module."""
93 |         return self.__class__.__name__ + \
94 |             f'(keys={self.keys}, meta_keys={self.meta_keys})'


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/second_3d.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from symbol import import_from
  3 | import warnings
  4 | from mmcv.cnn import build_conv_layer, build_norm_layer
  5 | from mmcv.runner import BaseModule
  6 | from torch import nn as nn
  7 | 
  8 | from mmdet.models import BACKBONES
  9 | 
 10 | 
 11 | @BACKBONES.register_module()
 12 | class SECOND3D(BaseModule):
 13 |     """Modified Backbone network for SECOND.
 14 | 
 15 |     Args:
 16 |         in_channels (int): Input channels.
 17 |         out_channels (list[int]): Output channels for multi-scale feature maps.
 18 |         layer_nums (list[int]): Number of layers in each stage.
 19 |         layer_strides (list[int]): Strides of each stage.
 20 |         norm_cfg (dict): Config dict of normalization layers.
 21 |         conv_cfg (dict): Config dict of convolutional layers.
 22 |     """
 23 | 
 24 |     def __init__(self,
 25 |                  in_channels=128,
 26 |                  out_channels=[128, 128, 256],
 27 |                  layer_nums=[3, 5, 5],
 28 |                  layer_strides=[2, 2, 2],
 29 |                  is_cascade=True,
 30 |                  norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 31 |                  conv_cfg=dict(type='Conv3d', bias=False),
 32 |                  init_cfg=None,
 33 |                  pretrained=None):
 34 |         super(SECOND3D, self).__init__(init_cfg=init_cfg)
 35 |         assert len(layer_strides) == len(layer_nums)
 36 |         assert len(out_channels) == len(layer_nums)
 37 |         
 38 |         if isinstance(in_channels, list):
 39 |             in_filters = in_channels
 40 |         else:
 41 |             in_filters = [in_channels, *out_channels[:-1]]
 42 |         # note that when stride > 1, conv2d with same padding isn't
 43 |         # equal to pad-conv2d. we should use pad-conv2d.
 44 |         blocks = []
 45 |         self.is_cascade = is_cascade
 46 |         self.kernel_type = conv_cfg.type
 47 |         if "kernel" in conv_cfg:
 48 |             kernel = conv_cfg.pop("kernel")
 49 |         else:
 50 |             kernel = (1,3,3)
 51 |         padding = tuple([(_kernel-1)//2 for _kernel in kernel])
 52 |         for i, layer_num in enumerate(layer_nums):
 53 |             block = [
 54 |                 build_conv_layer(
 55 |                     conv_cfg,
 56 |                     in_filters[i],
 57 |                     out_channels[i],
 58 |                     kernel,
 59 |                     stride=(1,layer_strides[i],layer_strides[i]) if len(padding)==3 else (layer_strides[i],layer_strides[i]),
 60 |                     padding=padding),
 61 |                 build_norm_layer(norm_cfg, out_channels[i])[1],
 62 |                 nn.ReLU(inplace=True),
 63 |             ]
 64 |             for j in range(layer_num):
 65 |                 block.append(
 66 |                     build_conv_layer(
 67 |                         conv_cfg,
 68 |                         out_channels[i],
 69 |                         out_channels[i],
 70 |                         kernel,
 71 |                         padding=padding))
 72 |                 block.append(build_norm_layer(norm_cfg, out_channels[i])[1])
 73 |                 block.append(nn.ReLU(inplace=True))
 74 | 
 75 |             block = nn.Sequential(*block)
 76 |             blocks.append(block)
 77 | 
 78 |         self.blocks = nn.ModuleList(blocks)
 79 | 
 80 |         assert not (init_cfg and pretrained), \
 81 |             'init_cfg and pretrained cannot be setting at the same time'
 82 |         if isinstance(pretrained, str):
 83 |             warnings.warn('DeprecationWarning: pretrained is a deprecated, '
 84 |                           'please use "init_cfg" instead')
 85 |             self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
 86 |         else:
 87 |             self.init_cfg = dict(type='Kaiming', layer=self.kernel_type)
 88 | 
 89 |     def forward(self, x):
 90 |         """Forward function.
 91 | 
 92 |         Args:
 93 |             x (torch.Tensor): Input with shape (N, C, H, W).
 94 | 
 95 |         Returns:
 96 |             tuple[torch.Tensor]: Multi-scale features.
 97 |         """
 98 |         outs = []
 99 |         batch = x.shape[0]
100 |         if self.kernel_type == "Conv2d":
101 |             x = x.transpose(1,2).flatten(0,1)
102 | 
103 |         for i in range(len(self.blocks)):
104 |             if self.is_cascade:
105 |                 x = self.blocks[i](x)
106 |                 outs.append(x)
107 |             else:
108 |                 out = self.blocks[i](x)
109 |                 outs.append(out)
110 |         
111 |         if self.kernel_type == "Conv2d":
112 |             outs = [_out.reshape(batch, -1, *_out.shape[-3:]).transpose(1,2) for _out in outs]
113 |         
114 |         return tuple(outs)
115 | 


--------------------------------------------------------------------------------
/extra_tools/get_flops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | import torch
  5 | from mmcv import Config, DictAction
  6 | 
  7 | from mmdet3d.models import build_model
  8 | 
  9 | try:
 10 |     from mmcv.cnn import get_model_complexity_info
 11 | except ImportError:
 12 |     raise ImportError('Please upgrade mmcv to >0.6.2')
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='Train a detector')
 17 |     parser.add_argument('config', help='train config file path')
 18 |     parser.add_argument(
 19 |         '--shape',
 20 |         type=int,
 21 |         nargs='+',
 22 |         default=[40000, 5],
 23 |         help='input point cloud size')
 24 |     parser.add_argument(
 25 |         '--modality',
 26 |         type=str,
 27 |         default='point',
 28 |         choices=['point', 'image', 'multi'],
 29 |         help='input data modality')
 30 |     parser.add_argument(
 31 |         '--cfg-options',
 32 |         nargs='+',
 33 |         action=DictAction,
 34 |         help='override some settings in the used config, the key-value pair '
 35 |         'in xxx=yyy format will be merged into config file. If the value to '
 36 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 37 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 38 |         'Note that the quotation marks are necessary and that no white space '
 39 |         'is allowed.')
 40 |     args = parser.parse_args()
 41 |     return args
 42 | 
 43 | 
 44 | def main():
 45 | 
 46 |     args = parse_args()
 47 | 
 48 |     if args.modality == 'point':
 49 |         assert len(args.shape) == 2, 'invalid input shape'
 50 |         input_shape = tuple(args.shape)
 51 |     elif args.modality == 'image':
 52 |         if len(args.shape) == 1:
 53 |             input_shape = (3, args.shape[0], args.shape[0])
 54 |         elif len(args.shape) == 2:
 55 |             input_shape = (3, ) + tuple(args.shape)
 56 |         else:
 57 |             raise ValueError('invalid input shape')
 58 |     elif args.modality == 'multi':
 59 |         raise NotImplementedError(
 60 |             'FLOPs counter is currently not supported for models with '
 61 |             'multi-modality input')
 62 | 
 63 |     cfg = Config.fromfile(args.config)
 64 |     if args.cfg_options is not None:
 65 |         cfg.merge_from_dict(args.cfg_options)
 66 |     
 67 |     if hasattr(cfg, 'plugin'):
 68 |         if cfg.plugin:
 69 |             import importlib
 70 |             if hasattr(cfg, 'plugin_dir'):
 71 |                 plugin_dir = cfg.plugin_dir
 72 |                 _module_dir = os.path.dirname(plugin_dir)
 73 |                 _module_dir = _module_dir.split('/')
 74 |                 _module_path = _module_dir[0]
 75 | 
 76 |                 for m in _module_dir[1:]:
 77 |                     _module_path = _module_path + '.' + m
 78 |                 print(_module_path)
 79 |                 plg_lib = importlib.import_module(_module_path)
 80 |             else:
 81 |                 # import dir is the dirpath for the config file
 82 |                 _module_dir = os.path.dirname(args.config)
 83 |                 _module_dir = _module_dir.split('/')
 84 |                 _module_path = _module_dir[0]
 85 |                 for m in _module_dir[1:]:
 86 |                     _module_path = _module_path + '.' + m
 87 |                 print(_module_path)
 88 |                 plg_lib = importlib.import_module(_module_path)
 89 | 
 90 |     # set cudnn_benchmark
 91 |     if cfg.get('cudnn_benchmark', False):
 92 |         torch.backends.cudnn.benchmark = True
 93 | 
 94 |     # work_dir is determined in this priority: CLI > segment in file > filename
 95 |     #if args.work_dir is not None:
 96 |         # update configs according to CLI args if args.work_dir is not None
 97 | 
 98 |     model = build_model(
 99 |         cfg.model,
100 |         train_cfg=cfg.get('train_cfg'),
101 |         test_cfg=cfg.get('test_cfg'))
102 |     if torch.cuda.is_available():
103 |         model.cuda()
104 |     model.eval()
105 | 
106 |     if hasattr(model, 'forward_dummy'):
107 |         model.forward = model.forward_dummy
108 |     else:
109 |         raise NotImplementedError(
110 |             'FLOPs counter is currently not supported for {}'.format(
111 |                 model.__class__.__name__))
112 | 
113 |     flops, params = get_model_complexity_info(model, input_shape)
114 |     split_line = '=' * 30
115 |     print(f'{split_line}\nInput shape: {input_shape}\n'
116 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
117 |     print('!!!Please be cautious if you use the results in papers. '
118 |           'You may need to check if all ops are supported and verify that the '
119 |           'flops computation is correct.')
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     main()
124 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Uni3DETR & OV-Uni3DETR
  3 | 
  4 | This includes code for:
  5 | our NeurIPS 2023 paper
  6 | [**Uni3DETR: Unified 3D Detection Transformer**](https://arxiv.org/pdf/2310.05699)
  7 | 
  8 | <div align="center">
  9 |   <img src="docs/uni3detr.png"/>
 10 | </div><br/>
 11 | 
 12 | our ECCV 2024 paper
 13 | [**OV-Uni3DETR: Towards Unified Open-Vocabulary 3D Object Detection via Cycle-Modality Propagation**](https://arxiv.org/pdf/2403.19580)
 14 | 
 15 | <div align="center">
 16 |   <img src="docs/ovuni3detr.png"/>
 17 | </div><br/>
 18 | 
 19 | Uni3DETR provides a unified structure for both indoor and outdoor 3D object detection.
 20 | Based on this architecture, OV-Uni3DETR further introduces multi-modal learning and open-vocabulary learning to achieve modality unifying and category unifying with a unified structure.
 21 | 
 22 | ## Preparation
 23 | This project is based on [mmDetection3D](https://github.com/open-mmlab/mmdetection3d), which can be constructed as follows.
 24 | * Install mmDetection3D [v1.0.0rc5](https://github.com/open-mmlab/mmdetection3d/tree/v1.0.0rc5) following [the instructions](https://github.com/open-mmlab/mmdetection3d/blob/v1.0.0rc5/docs/getting_started.md).
 25 | * Copy our project and related files to installed mmDetection3D:
 26 | ```bash
 27 | cp -r projects mmdetection3d/
 28 | cp -r extra_tools mmdetection3d/
 29 | ```
 30 | * Prepare the dataset following [mmDetection3D dataset instructions](https://github.com/open-mmlab/mmdetection3d/tree/v1.0.0rc5/docs/en/datasets).
 31 | * Uni3DETR dataset preparation:
 32 |   
 33 | SUN RGB-D dataset:
 34 | The directory structure after processing should be as follows:
 35 | ```
 36 | sunrgbd
 37 | ├── README.md
 38 | ├── matlab
 39 | │   ├── ...
 40 | ├── OFFICIAL_SUNRGBD
 41 | │   ├── ...
 42 | ├── sunrgbd_trainval
 43 | │   ├── ...
 44 | ├── points
 45 | ├── sunrgbd_infos_train.pkl
 46 | ├── sunrgbd_infos_val.pkl
 47 | ```
 48 | ScanNet dataset:
 49 | 
 50 | After downloading datasets following mmDetection3D, run ``python scripts/scannet_globalallign.py`` to perform global alignment in advance. Please note that this operation will modify the data file. If you have any concerns, it is recommended to back up the file first.
 51 | 
 52 | The directory structure should be as below
 53 | 
 54 | ```
 55 | scannet
 56 | ├── meta_data
 57 | ├── batch_load_scannet_data.py
 58 | ├── load_scannet_data.py
 59 | ├── scannet_utils.py
 60 | ├── README.md
 61 | ├── scans
 62 | ├── scans_test
 63 | ├── scannet_instance_data
 64 | ├── points
 65 | │   ├── xxxxx.bin
 66 | ├── instance_mask
 67 | │   ├── xxxxx.bin
 68 | ├── semantic_mask
 69 | │   ├── xxxxx.bin
 70 | ├── seg_info
 71 | │   ├── train_label_weight.npy
 72 | │   ├── train_resampled_scene_idxs.npy
 73 | │   ├── val_label_weight.npy
 74 | │   ├── val_resampled_scene_idxs.npy
 75 | ├── posed_images
 76 | │   ├── scenexxxx_xx
 77 | │   │   ├── xxxxxx.txt
 78 | │   │   ├── xxxxxx.jpg
 79 | │   │   ├── intrinsic.txt
 80 | ├── scannet_infos_train.pkl
 81 | ├── scannet_infos_val.pkl
 82 | ├── scannet_infos_test.pkl
 83 | ```
 84 | 
 85 | The outdoor KITTI and nuScenes datasets preparation steps are totally the same as mmDetection3D.
 86 | 
 87 | * OV-Uni3DETR dataset preparation:
 88 | 
 89 | SUN RGB-D dataset:
 90 | 
 91 | The SUN RGB-D dataset preparation steps are the same as the Uni3DETR steps above, the only difference is the annotation file. The annotations file can be downloaded directly from [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing). We will upload codes about how to generate the annotation files for training soon.
 92 | 
 93 | 
 94 | ## Training
 95 | ```bash
 96 | bash extra_tools/dist_train.sh ${CFG_FILE} ${NUM_GPUS}
 97 | ```
 98 | 
 99 | ## Evaluation
100 | ```bash
101 | bash extra_tools/dist_test.sh ${CFG_FILE} ${CKPT} ${NUM_GPUS} --eval=bbox
102 | ```
103 | 
104 | ## Uni3DETR models
105 | We provide results on SUN RGB-D, ScanNet, KITTI, nuScenes with pretrained models (for Tab. 1, Tab. 2, Tab. 3 of our paper).
106 | |  Dataset                                    | mAP (%) | download | 
107 | |---------------------------------------------|:-------:|:-------:|
108 | | **indoor** |
109 | | [SUN RGB-D](projects/configs/uni3detr/uni3detr_sunrgbd.py) | 67.0 | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
110 | | [ScanNet](projects/configs/uni3detr/uni3detr_scannet_large.py) | 71.7 | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
111 | | **outdoor** |
112 | | [KITTI (3 classes)](projects/configs/uni3detr/uni3detr_kitti_car.py) | 86.57 (moderate car) | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
113 | | [KITTI (car)](projects/configs/uni3detr/uni3detr_kitti_3classes.py) | 86.74 (moderate car) | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
114 | | [nuScenes](projects/configs/uni3detr/uni3detr_nuscenes.py) | 61.7 | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
115 | 
116 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
  3 | from mmcv.ops import diff_iou_rotated_3d
  4 | from mmdet3d.core.bbox import AxisAlignedBboxOverlaps3D
  5 | from projects.mmdet3d_plugin.core.bbox.util import get_rdiou
  6 | from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d, bbox_overlaps_nearest_3d
  7 | import torch.nn.functional as F
  8 | 
  9 | @MATCH_COST.register_module()
 10 | class BBox3DL1Cost(object):
 11 |     """BBox3DL1Cost.
 12 |      Args:
 13 |          weight (int | float, optional): loss_weight
 14 |     """
 15 | 
 16 |     def __init__(self, weight=1.):
 17 |         self.weight = weight
 18 | 
 19 |     def __call__(self, bbox_pred, gt_bboxes):
 20 |         """
 21 |         Args:
 22 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
 23 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
 24 |                 [num_query, 4].
 25 |             gt_bboxes (Tensor): Ground truth boxes with normalized
 26 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
 27 |         Returns:
 28 |             torch.Tensor: bbox_cost value with weight
 29 |         """
 30 |         bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
 31 |         return bbox_cost * self.weight
 32 | 
 33 | 
 34 | @MATCH_COST.register_module()
 35 | class RotatedIoU3DCost(object):
 36 | 
 37 |     def __init__(self, weight=1.):
 38 |         self.weight = weight
 39 | 
 40 |     def __call__(self, bbox_pred, gt_bboxes):
 41 |         """
 42 |         Args:
 43 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
 44 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
 45 |                 [num_query, 4].
 46 |             gt_bboxes (Tensor): Ground truth boxes with normalized
 47 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
 48 |         Returns:
 49 |             torch.Tensor: bbox_cost value with weight
 50 |         """
 51 |         #print(bbox_pred.shape, gt_bboxes.shape)
 52 |         N = gt_bboxes.shape[0]
 53 |         M = bbox_pred.shape[0]
 54 |         bbox_costs = [diff_iou_rotated_3d(bbox_pred.unsqueeze(0), gt_bboxes[[i], :].repeat(M, 1).unsqueeze(0))[0].unsqueeze(1) for i in range(N)]
 55 |         bbox_cost = torch.cat(bbox_costs, 1)
 56 | 
 57 |         return bbox_cost * self.weight
 58 | 
 59 | 
 60 | @MATCH_COST.register_module()
 61 | class AxisAlignedIoU3DCost(object):
 62 | 
 63 |     def __init__(self, weight=1.):
 64 |         self.weight = weight
 65 | 
 66 |     def __call__(self, bbox_pred, gt_bboxes):
 67 |         axis_aligned_iou = AxisAlignedBboxOverlaps3D()(bbox_pred, gt_bboxes)
 68 |         iou_loss = - axis_aligned_iou
 69 |         return iou_loss * self.weight
 70 | 
 71 | @MATCH_COST.register_module()
 72 | class RDIoUCost(object):
 73 | 
 74 |     def __init__(self, weight=1.):
 75 |         self.weight = weight
 76 | 
 77 |     def __call__(self, bbox_pred, gt_bboxes):
 78 |         u, rdiou = get_rdiou(bbox_pred.unsqueeze(1), gt_bboxes.unsqueeze(0))
 79 | 
 80 |         rdiou_loss_n = rdiou - u
 81 |         rdiou_loss_n = torch.clamp(rdiou_loss_n,min=-1.0,max = 1.0)
 82 |         rdiou_loss_n = 1 - rdiou_loss_n
 83 |         return rdiou_loss_n * self.weight
 84 | 
 85 | @MATCH_COST.register_module()
 86 | class IoU3DCost(object):
 87 | 
 88 |     def __init__(self, weight=1.):
 89 |         self.weight = weight
 90 | 
 91 |     def __call__(self, bbox_pred, gt_bboxes):
 92 |         #iou3d = 1 - bbox_overlaps_3d(bbox_pred, gt_bboxes, coordinate='depth')
 93 |         #iou3d = (1 - bbox_overlaps_nearest_3d(bbox_pred, gt_bboxes, coordinate='depth') ) 
 94 |         iou3d = (1 - bbox_overlaps_nearest_3d(bbox_pred, gt_bboxes, coordinate='lidar') ) ############
 95 |         #iou3d += (1 - bbox_overlaps_nearest_3d(bbox_pred[:, [0,2,1,3,5,4,6]], gt_bboxes[:, [0,2,1,3,5,4,6]], coordinate='depth') ) * 0.1
 96 |         #iou3d += (1 - bbox_overlaps_nearest_3d(bbox_pred[:, [1,2,0,4,5,3,6]], gt_bboxes[:, [1,2,0,4,5,3,6]], coordinate='depth') ) * 0.1
 97 |         return iou3d * self.weight
 98 | 
 99 | 
100 | @MATCH_COST.register_module()
101 | class SoftFocalLossCost(object):
102 | 
103 |     def __init__(self,
104 |                  weight=1.,
105 |                  alpha=0.25,
106 |                  gamma=2,
107 |                  eps=1e-12,
108 |                  binary_input=False):
109 |         self.weight = weight
110 |         self.alpha = alpha
111 |         self.gamma = gamma
112 |         self.eps = eps
113 |         self.binary_input = binary_input
114 | 
115 | 
116 |     def __call__(self, cls_pred, gt_labels, iou3d):
117 | 
118 |         cls_pred = cls_pred.sigmoid()
119 | 
120 |         iou3d = iou3d.pow(0.001)
121 |         neg_cost = -(1 - cls_pred * iou3d + self.eps).log() * (
122 |             1 - self.alpha) * (cls_pred * iou3d).pow(self.gamma)
123 | 
124 |         pos_cost = -(cls_pred * iou3d + self.eps).log() * self.alpha * (
125 |             1 - cls_pred * iou3d).pow(self.gamma)
126 | 
127 |         cls_cost = pos_cost[:, gt_labels] - neg_cost[:, gt_labels]
128 | 
129 |         return cls_cost * self.weight


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
  1 | import torch 
  2 | import numpy as np
  3 | import mmdet3d
  4 | 
  5 | __mmdet3d_version__ = float(mmdet3d.__version__[:3])
  6 | 
  7 | 
  8 | def normalize_bbox(bboxes, pc_range=None):
  9 | 
 10 |     cx = bboxes[..., 0:1]
 11 |     cy = bboxes[..., 1:2]
 12 |     cz = bboxes[..., 2:3]
 13 |     # align coord system with previous version
 14 |     if __mmdet3d_version__ < 1.0:
 15 |         # w = bboxes[..., 3:4]
 16 |         # l = bboxes[..., 4:5]
 17 |         # h = bboxes[..., 5:6]
 18 |         w = bboxes[..., 3:4].log()
 19 |         l = bboxes[..., 4:5].log()
 20 |         h = bboxes[..., 5:6].log()
 21 |         rot = bboxes[..., 6:7]
 22 |     else:
 23 |         # l = bboxes[..., 3:4]
 24 |         # w = bboxes[..., 4:5]
 25 |         # h = bboxes[..., 5:6] 
 26 |         l = (bboxes[..., 3:4] + 1e-5).log()
 27 |         w = (bboxes[..., 4:5] + 1e-5).log()
 28 |         h = (bboxes[..., 5:6] + 1e-5).log()
 29 |         rot = bboxes[..., 6:7]
 30 |         rot = -rot - np.pi / 2
 31 |     
 32 |     if bboxes.size(-1) > 7:
 33 |         vx = bboxes[..., 7:8] 
 34 |         vy = bboxes[..., 8:9]
 35 |         normalized_bboxes = torch.cat(
 36 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
 37 |         )
 38 |     else:
 39 |         normalized_bboxes = torch.cat(
 40 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
 41 |         )
 42 |     return normalized_bboxes
 43 | 
 44 | def denormalize_bbox(normalized_bboxes, pc_range=None, version=0.8):
 45 |     # rotation 
 46 |     rot_sine = normalized_bboxes[..., 6:7]
 47 | 
 48 |     rot_cosine = normalized_bboxes[..., 7:8]
 49 |     rot = torch.atan2(rot_sine, rot_cosine)
 50 |     
 51 |     # align coord system with previous version
 52 |     if __mmdet3d_version__ >= 1.0:
 53 |         rot = -rot - np.pi / 2
 54 |     # center in the bev
 55 |     cx = normalized_bboxes[..., 0:1]
 56 |     cy = normalized_bboxes[..., 1:2]
 57 |     cz = normalized_bboxes[..., 4:5]
 58 | 
 59 |     # size
 60 |     w = normalized_bboxes[..., 2:3]
 61 |     l = normalized_bboxes[..., 3:4]
 62 |     h = normalized_bboxes[..., 5:6]
 63 | 
 64 |     w = w.exp() 
 65 |     l = l.exp() 
 66 |     h = h.exp() 
 67 |     if normalized_bboxes.size(-1) > 8:
 68 |          # velocity 
 69 |         vx = normalized_bboxes[..., 8:9]
 70 |         vy = normalized_bboxes[..., 9:10]
 71 |         if __mmdet3d_version__ < 1.0:
 72 |             denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
 73 |         else:
 74 |             denormalized_bboxes = torch.cat([cx, cy, cz, l, w, h, rot, vx, vy], dim=-1)
 75 |     else:
 76 |         if __mmdet3d_version__ < 1.0:
 77 |             denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
 78 |         else:
 79 |             denormalized_bboxes = torch.cat([cx, cy, cz, l, w, h, rot], dim=-1)
 80 |     return denormalized_bboxes
 81 | 
 82 | def bbox3d_mapping_back(bboxes, rot_degree, scale_factor, flip_horizontal, flip_vertical):
 83 |     """Map bboxes from testing scale to original image scale.
 84 | 
 85 |     Args:
 86 |         bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back.
 87 |         scale_factor (float): Scale factor.
 88 |         flip_horizontal (bool): Whether to flip horizontally.
 89 |         flip_vertical (bool): Whether to flip vertically.
 90 | 
 91 |     Returns:
 92 |         :obj:`BaseInstance3DBoxes`: Boxes mapped back.
 93 |     """
 94 |     new_bboxes = bboxes.clone()
 95 |     if flip_horizontal:
 96 |         new_bboxes.flip('horizontal')
 97 |     if flip_vertical:
 98 |         new_bboxes.flip('vertical')
 99 |     new_bboxes.scale(1 / scale_factor)
100 |     new_bboxes.rotate(-rot_degree)
101 | 
102 |     return new_bboxes
103 | 
104 | def get_rdiou(bboxes1, bboxes2):
105 |     x1u, y1u, z1u = bboxes1[:,:,0], bboxes1[:,:,1], bboxes1[:,:,2]
106 |     l1, w1, h1 =  torch.exp(bboxes1[:,:,3]), torch.exp(bboxes1[:,:,4]), torch.exp(bboxes1[:,:,5])
107 |     t1 = torch.sin(bboxes1[:,:,6]) * torch.cos(bboxes2[:,:,6])
108 |     x2u, y2u, z2u = bboxes2[:,:,0], bboxes2[:,:,1], bboxes2[:,:,2]
109 |     l2, w2, h2 =  torch.exp(bboxes2[:,:,3]), torch.exp(bboxes2[:,:,4]), torch.exp(bboxes2[:,:,5])
110 |     t2 = torch.cos(bboxes1[:,:,6]) * torch.sin(bboxes2[:,:,6])
111 | 
112 |     # we emperically scale the y/z to make their predictions more sensitive.
113 |     x1 = x1u
114 |     y1 = y1u * 2
115 |     z1 = z1u * 2
116 |     x2 = x2u
117 |     y2 = y2u * 2
118 |     z2 = z2u * 2
119 | 
120 |     # clamp is necessray to aviod inf.
121 |     l1, w1, h1 = torch.clamp(l1, max=10), torch.clamp(w1, max=10), torch.clamp(h1, max=10)
122 |     j1, j2 = torch.ones_like(h2), torch.ones_like(h2)
123 | 
124 |     volume_1 = l1 * w1 * h1 * j1
125 |     volume_2 = l2 * w2 * h2 * j2
126 | 
127 |     inter_l = torch.max(x1 - l1 / 2, x2 - l2 / 2)
128 |     inter_r = torch.min(x1 + l1 / 2, x2 + l2 / 2)
129 |     inter_t = torch.max(y1 - w1 / 2, y2 - w2 / 2)
130 |     inter_b = torch.min(y1 + w1 / 2, y2 + w2 / 2)
131 |     inter_u = torch.max(z1 - h1 / 2, z2 - h2 / 2)
132 |     inter_d = torch.min(z1 + h1 / 2, z2 + h2 / 2)
133 |     inter_m = torch.max(t1 - j1 / 2, t2 - j2 / 2)
134 |     inter_n = torch.min(t1 + j1 / 2, t2 + j2 / 2)
135 | 
136 |     inter_volume = torch.clamp((inter_r - inter_l),min=0) * torch.clamp((inter_b - inter_t),min=0) \
137 |         * torch.clamp((inter_d - inter_u),min=0) * torch.clamp((inter_n - inter_m),min=0)
138 |     
139 |     c_l = torch.min(x1 - l1 / 2,x2 - l2 / 2)
140 |     c_r = torch.max(x1 + l1 / 2,x2 + l2 / 2)
141 |     c_t = torch.min(y1 - w1 / 2,y2 - w2 / 2)
142 |     c_b = torch.max(y1 + w1 / 2,y2 + w2 / 2)
143 |     c_u = torch.min(z1 - h1 / 2,z2 - h2 / 2)
144 |     c_d = torch.max(z1 + h1 / 2,z2 + h2 / 2)
145 |     c_m = torch.min(t1 - j1 / 2,t2 - j2 / 2)
146 |     c_n = torch.max(t1 + j1 / 2,t2 + j2 / 2)
147 | 
148 |     inter_diag = (x2 - x1)**2 + (y2 - y1)**2 + (z2 - z1)**2 + (t2 - t1)**2
149 |     c_diag = torch.clamp((c_r - c_l),min=0)**2 + torch.clamp((c_b - c_t),min=0)**2 + torch.clamp((c_d - c_u),min=0)**2  + torch.clamp((c_n - c_m),min=0)**2
150 | 
151 |     union = volume_1 + volume_2 - inter_volume
152 |     u = (inter_diag) / c_diag
153 |     rdiou = inter_volume / union
154 |     return u, rdiou


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/test_time_aug.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import mmcv
  3 | import warnings
  4 | from copy import deepcopy
  5 | 
  6 | from mmdet.datasets.builder import PIPELINES
  7 | from mmdet.datasets.pipelines import Compose
  8 | 
  9 | 
 10 | @PIPELINES.register_module()
 11 | class MultiRotScaleFlipAug3D(object):
 12 |     """Test-time augmentation with multiple scales and flipping.
 13 | 
 14 |     Args:
 15 |         transforms (list[dict]): Transforms to apply in each augmentation.
 16 |         img_scale (tuple | list[tuple]: Images scales for resizing.
 17 |         pts_scale_ratio (float | list[float]): Points scale ratios for
 18 |             resizing.
 19 |         flip (bool): Whether apply flip augmentation. Defaults to False.
 20 |         flip_direction (str | list[str]): Flip augmentation directions
 21 |             for images, options are "horizontal" and "vertical".
 22 |             If flip_direction is list, multiple flip augmentations will
 23 |             be applied. It has no effect when ``flip == False``.
 24 |             Defaults to "horizontal".
 25 |         pcd_horizontal_flip (bool): Whether apply horizontal flip augmentation
 26 |             to point cloud. Defaults to True. Note that it works only when
 27 |             'flip' is turned on.
 28 |         pcd_vertical_flip (bool): Whether apply vertical flip augmentation
 29 |             to point cloud. Defaults to True. Note that it works only when
 30 |             'flip' is turned on.
 31 |     """
 32 | 
 33 |     def __init__(self,
 34 |                  transforms,
 35 |                  img_scale,
 36 |                  pts_scale_ratio,
 37 |                  rotate_degree=[0.0],
 38 |                  flip=False,
 39 |                  flip_direction='horizontal',
 40 |                  pcd_horizontal_flip=False,
 41 |                  pcd_vertical_flip=False):
 42 |         self.transforms = Compose(transforms)
 43 |         self.img_scale = img_scale if isinstance(img_scale,
 44 |                                                  list) else [img_scale]
 45 |         self.pts_scale_ratio = pts_scale_ratio \
 46 |             if isinstance(pts_scale_ratio, list) else[float(pts_scale_ratio)]
 47 | 
 48 |         assert mmcv.is_list_of(self.img_scale, tuple)
 49 |         assert mmcv.is_list_of(self.pts_scale_ratio, float)
 50 | 
 51 |         self.rotate_degree = rotate_degree
 52 | 
 53 |         self.flip = flip
 54 |         self.pcd_horizontal_flip = pcd_horizontal_flip
 55 |         self.pcd_vertical_flip = pcd_vertical_flip
 56 | 
 57 |         self.flip_direction = flip_direction if isinstance(
 58 |             flip_direction, list) else [flip_direction]
 59 |         assert mmcv.is_list_of(self.flip_direction, str)
 60 |         if not self.flip and self.flip_direction != ['horizontal']:
 61 |             warnings.warn(
 62 |                 'flip_direction has no effect when flip is set to False')
 63 |         if (self.flip and not any([(t['type'] == 'RandomFlip3D'
 64 |                                     or t['type'] == 'RandomFlip')
 65 |                                    for t in transforms])):
 66 |             warnings.warn(
 67 |                 'flip has no effect when RandomFlip is not in transforms')
 68 | 
 69 |     def __call__(self, results):
 70 |         """Call function to augment common fields in results.
 71 | 
 72 |         Args:
 73 |             results (dict): Result dict contains the data to augment.
 74 | 
 75 |         Returns:
 76 |             dict: The result dict contains the data that is augmented with \
 77 |                 different scales and flips.
 78 |         """
 79 |         aug_data = []
 80 | 
 81 |         # modified from `flip_aug = [False, True] if self.flip else [False]`
 82 |         # to reduce unnecessary scenes when using double flip augmentation
 83 |         # during test time
 84 |         flip_aug = [True] if self.flip else [False]
 85 |         pcd_horizontal_flip_aug = [False, True] \
 86 |             if self.flip and self.pcd_horizontal_flip else [False]
 87 |         pcd_vertical_flip_aug = [False, True] \
 88 |             if self.flip and self.pcd_vertical_flip else [False]
 89 |         for rot_degree in self.rotate_degree:
 90 |             for scale in self.img_scale:
 91 |                 for pts_scale_ratio in self.pts_scale_ratio:
 92 |                     for flip in flip_aug:
 93 |                         for pcd_horizontal_flip in pcd_horizontal_flip_aug:
 94 |                             for pcd_vertical_flip in pcd_vertical_flip_aug:
 95 |                                 for direction in self.flip_direction:
 96 |                                     # results.copy will cause bug
 97 |                                     # since it is shallow copy
 98 |                                     _results = deepcopy(results)
 99 |                                     _results['rot_degree'] = rot_degree
100 |                                     _results['scale'] = scale
101 |                                     _results['flip'] = flip
102 |                                     _results['pcd_scale_factor'] = \
103 |                                         pts_scale_ratio
104 |                                     _results['flip_direction'] = direction
105 |                                     _results['pcd_horizontal_flip'] = \
106 |                                         pcd_horizontal_flip
107 |                                     _results['pcd_vertical_flip'] = \
108 |                                         pcd_vertical_flip
109 |                                     data = self.transforms(_results)
110 |                                     aug_data.append(data)
111 |         # list of dict to dict of list
112 |         aug_data_dict = {key: [] for key in aug_data[0]}
113 |         for data in aug_data:
114 |             for key, val in data.items():
115 |                 aug_data_dict[key].append(val)
116 |         return aug_data_dict
117 | 
118 |     def __repr__(self):
119 |         """str: Return a string that describes the module."""
120 |         repr_str = self.__class__.__name__
121 |         repr_str += f'(transforms={self.transforms}, '
122 |         repr_str += f'img_scale={self.img_scale}, flip={self.flip}, '
123 |         repr_str += f'pts_scale_ratio={self.pts_scale_ratio}, '
124 |         repr_str += f'flip_direction={self.flip_direction})'
125 |         return repr_str
126 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from mmdet.core.bbox import BaseBBoxCoder
  4 | from mmdet.core.bbox.builder import BBOX_CODERS
  5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
  6 | from mmdet.models.utils.transformer import inverse_sigmoid
  7 | 
  8 | 
  9 | @BBOX_CODERS.register_module()
 10 | class NMSFreeCoder(BaseBBoxCoder):
 11 |     """Bbox coder for NMS-free detector.
 12 |     Args:
 13 |         pc_range (list[float]): Range of point cloud.
 14 |         post_center_range (list[float]): Limit of the center.
 15 |             Default: None.
 16 |         max_num (int): Max number to be kept. Default: 100.
 17 |         score_threshold (float): Threshold to filter boxes based on score.
 18 |             Default: None.
 19 |         code_size (int): Code size of bboxes. Default: 9
 20 |     """
 21 | 
 22 |     def __init__(self,
 23 |                  pc_range,
 24 |                  voxel_size=None,
 25 |                  post_center_range=None,
 26 |                  max_num=100,
 27 |                  score_threshold=None,
 28 |                  alpha=0.5,
 29 |                  num_classes=10):
 30 |         
 31 |         self.pc_range = pc_range
 32 |         self.voxel_size = voxel_size
 33 |         self.post_center_range = post_center_range
 34 |         self.max_num = max_num
 35 |         self.score_threshold = score_threshold
 36 |         self.num_classes = num_classes
 37 |         self.alpha = alpha
 38 | 
 39 |     def encode(self):
 40 |         pass
 41 | 
 42 |     def decode_single(self, cls_scores, bbox_preds, all_iou_preds):
 43 |         """Decode bboxes.
 44 |         Args:
 45 |             cls_scores (Tensor): Outputs from the classification head, \
 46 |                 shape [num_query, cls_out_channels]. Note \
 47 |                 cls_out_channels should includes background.
 48 |             bbox_preds (Tensor): Outputs from the regression \
 49 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
 50 |                 Shape [num_query, 9].
 51 |         Returns:
 52 |             list[dict]: Decoded boxes.
 53 |         """
 54 |         max_num = self.max_num
 55 |         #max_num = cls_scores.numel()
 56 | 
 57 |         cls_scores = cls_scores.sigmoid()
 58 |         ious = all_iou_preds.sigmoid()
 59 | 
 60 |         scores, indexs = cls_scores.view(-1).topk(max_num)
 61 |         labels = indexs % self.num_classes
 62 |         bbox_index = indexs // self.num_classes
 63 |         bbox_preds = bbox_preds[bbox_index]
 64 | 
 65 |         final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)   
 66 |         final_scores = scores 
 67 |         final_preds = labels 
 68 | 
 69 |         all_iou_preds = all_iou_preds.sigmoid()
 70 |         final_ious = all_iou_preds[bbox_index]
 71 | 
 72 |         # use score threshold
 73 |         if self.score_threshold is not None:
 74 |             thresh_mask = final_scores > self.score_threshold
 75 |         if self.post_center_range is not None:
 76 |             # self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)
 77 |             self.post_center_range = scores.new_tensor(self.post_center_range)
 78 |             mask = (final_box_preds[..., :3] >=
 79 |                     self.post_center_range[:3]).all(1)
 80 |             mask &= (final_box_preds[..., :3] <=
 81 |                      self.post_center_range[3:]).all(1)
 82 | 
 83 |             if self.score_threshold:
 84 |                 mask &= thresh_mask
 85 | 
 86 |             boxes3d = final_box_preds[mask]
 87 |             scores = final_scores[mask]
 88 |             labels = final_preds[mask]
 89 |             ious = final_ious[mask]
 90 |             
 91 |             predictions_dict = {
 92 |                 'bboxes': boxes3d,
 93 |                 #'scores': scores, 
 94 |                 'scores': scores ** self.alpha * ious.reshape(-1) ** (1-self.alpha),
 95 |                 'labels': labels,
 96 |                 'ious': ious.reshape(-1),
 97 |             }
 98 | 
 99 |         else:
100 |             raise NotImplementedError(
101 |                 'Need to reorganize output as a batch, only '
102 |                 'support post_center_range is not None for now!')
103 |         return predictions_dict
104 | 
105 |     def decode(self, preds_dicts):
106 |         """Decode bboxes.
107 |         Args:
108 |             all_cls_scores (Tensor): Outputs from the classification head, \
109 |                 shape [nb_dec, bs, num_query, cls_out_channels]. Note \
110 |                 cls_out_channels should includes background.
111 |             all_bbox_preds (Tensor): Sigmoid outputs from the regression \
112 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
113 |                 Shape [nb_dec, bs, num_query, 9].
114 |         Returns:
115 |             list[dict]: Decoded boxes.
116 |         """
117 |         # all_cls_scores = preds_dicts['all_cls_scores'][-1]
118 |         # all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
119 |         # all_iou_preds = preds_dicts['all_iou_preds'][-1]
120 | 
121 |         all_cls_scores = torch.mean(preds_dicts['all_cls_scores'][1:], 0)
122 |         all_bbox_preds = torch.mean(preds_dicts['all_bbox_preds'][1:], 0)
123 |         all_iou_preds = torch.mean(preds_dicts['all_iou_preds'][1:], 0)
124 | 
125 |         #all_centerness_preds = torch.mean(preds_dicts['all_centerness_preds'][1:], 0)
126 |         # all_cls_scores = torch.mean(preds_dicts['all_cls_scores'], 0)
127 |         # all_bbox_preds = torch.mean(preds_dicts['all_bbox_preds'], 0)
128 |         # all_cls_scores = 0. * preds_dicts['all_cls_scores'][0] + 0.4 * preds_dicts['all_cls_scores'][1] + 0.6 * preds_dicts['all_cls_scores'][2]
129 |         # all_bbox_preds = 0. * preds_dicts['all_bbox_preds'][0] + 0.4 * preds_dicts['all_bbox_preds'][1] + 0.6 * preds_dicts['all_bbox_preds'][2]
130 |         
131 |         batch_size = all_cls_scores.size()[0]
132 |         predictions_list = []
133 |         for i in range(batch_size):
134 |             predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i], all_iou_preds[i]))
135 |             #predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i], all_iou_preds[i], all_centerness_preds[i]))
136 |         return predictions_list
137 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/necks/second3d_fpn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import numpy as np
  3 | import torch
  4 | from mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer
  5 | from mmcv.runner import BaseModule, auto_fp16
  6 | from torch import nn as nn
  7 | 
  8 | from mmdet.models import NECKS
  9 | 
 10 | 
 11 | @NECKS.register_module()
 12 | class SECOND3DFPN(BaseModule):
 13 |     """Modified FPN used in SECOND.
 14 | 
 15 |     Args:
 16 |         in_channels (list[int]): Input channels of multi-scale feature maps.
 17 |         out_channels (list[int]): Output channels of feature maps.
 18 |         upsample_strides (list[int]): Strides used to upsample the
 19 |             feature maps.
 20 |         norm_cfg (dict): Config dict of normalization layers.
 21 |         upsample_cfg (dict): Config dict of upsample layers.
 22 |         conv_cfg (dict): Config dict of conv layers.
 23 |         use_conv_for_no_stride (bool): Whether to use conv when stride is 1.
 24 |         use_for_distill (bool): Whether to use for cross-modality distillation.
 25 |     """
 26 | 
 27 |     def __init__(self,
 28 |                  in_channels=[128, 128, 256],
 29 |                  out_channels=[256, 256, 256],
 30 |                  upsample_strides=[1, 2, 4],
 31 |                  norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 32 |                  upsample_cfg=dict(type='deconv3d', bias=False),
 33 |                  conv_cfg=dict(type='Conv3d', bias=False),
 34 |                  extra_conv=None,
 35 |                  use_conv_for_no_stride=False,
 36 |                  use_for_distill=False,
 37 |                  init_cfg=None):
 38 |         # if for GroupNorm,
 39 |         # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)
 40 |         super(SECOND3DFPN, self).__init__(init_cfg=init_cfg)
 41 |         assert len(out_channels) == len(upsample_strides) == len(in_channels)
 42 |         self.in_channels = in_channels
 43 |         self.out_channels = out_channels
 44 |         self.extra_conv = extra_conv
 45 |         self.fp16_enabled = False
 46 |         self.use_for_distill = use_for_distill
 47 | 
 48 |         deblocks = []
 49 |         for i, out_channel in enumerate(out_channels):
 50 |             stride = upsample_strides[i]
 51 |             if stride > 1 or (stride == 1 and not use_conv_for_no_stride):
 52 |                 upsample_layer = build_upsample_layer(
 53 |                     upsample_cfg,
 54 |                     in_channels=in_channels[i],
 55 |                     out_channels=out_channel,
 56 |                     kernel_size=(1,stride,stride) if '3d' in upsample_cfg['type'] else (stride,stride),
 57 |                     stride=(1,stride,stride) if '3d' in upsample_cfg['type'] else (stride,stride))
 58 |             else:
 59 |                 stride = np.round(1 / stride).astype(np.int64)
 60 |                 upsample_layer = build_conv_layer(
 61 |                     conv_cfg,
 62 |                     in_channels=in_channels[i],
 63 |                     out_channels=out_channel,
 64 |                     kernel_size=(1,stride,stride) if '3d' in conv_cfg['type'] else (stride,stride),
 65 |                     stride=(1,stride,stride) if '3d' in conv_cfg['type'] else (stride,stride))
 66 | 
 67 |             deblock = nn.Sequential(upsample_layer,
 68 |                                     build_norm_layer(norm_cfg, out_channel)[1],
 69 |                                     nn.ReLU(inplace=True))
 70 |             deblocks.append(deblock)
 71 |         self.deblocks = nn.ModuleList(deblocks)
 72 | 
 73 |         if self.extra_conv is not None:
 74 |             extra_blocks = []
 75 |             self.layer_num = self.extra_conv.pop('num_conv')
 76 |             if "kernel" in self.extra_conv:
 77 |                 kernel = self.extra_conv.pop("kernel")
 78 |             else:
 79 |                 kernel = (3,3,3)
 80 |             padding = tuple([(_k-1)//2 for _k in kernel])
 81 |             if "sep_kernel" in self.extra_conv:
 82 |                 sep_kernel = self.extra_conv.pop("sep_kernel")
 83 |                 sep_padding = tuple([(_k-1)//2 for _k in sep_kernel])
 84 |             else:
 85 |                 sep_kernel = None
 86 |             for j in range(self.layer_num):
 87 |                 extra_blocks.append(
 88 |                     build_conv_layer(
 89 |                         self.extra_conv,
 90 |                         out_channels[-1],
 91 |                         out_channels[-1],
 92 |                         kernel,
 93 |                         padding=padding))
 94 |                 if sep_kernel:
 95 |                     extra_blocks.append(
 96 |                         build_conv_layer(
 97 |                             self.extra_conv,
 98 |                             out_channels[-1],
 99 |                             out_channels[-1],
100 |                             sep_kernel,
101 |                             padding=sep_padding))
102 |                 extra_blocks.append(build_norm_layer(norm_cfg, out_channels[-1])[1])
103 |                 extra_blocks.append(nn.ReLU(inplace=True))
104 |             self.extra_blocks = nn.Sequential(*extra_blocks)
105 | 
106 |         if init_cfg is None:
107 |             self.init_cfg = [
108 |                 dict(type='Kaiming', layer='ConvTranspose2d'),
109 |                 dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)
110 |             ]
111 | 
112 |     @auto_fp16()
113 |     def forward(self, x):
114 |         """Forward function.
115 | 
116 |         Args:
117 |             x (torch.Tensor): 4D Tensor in (N, C, H, W) shape.
118 | 
119 |         Returns:
120 |             list[torch.Tensor]: Multi-level feature maps.
121 |         """
122 |         assert len(x) == len(self.in_channels)
123 |         ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)]
124 | 
125 |         if len(ups) > 1:
126 |             out = sum(ups)
127 |         else:
128 |             out = ups[0]
129 | 
130 |         if self.extra_conv is not None:
131 |             if self.use_for_distill:
132 |                 out_final = out
133 |                 before_relu_list = []
134 |                 for _idx in range(self.layer_num):
135 |                     out_mid = self.extra_blocks[_idx*3:(_idx+1)*3-1](out_final)
136 |                     out_before_relu = out_mid.clone()
137 |                     out_final = self.extra_blocks[(_idx+1)*3-1](out_mid)
138 |                     before_relu_list.append(out_before_relu)
139 |                 
140 |                 out = {'final':out_final, 'before_relu':before_relu_list}
141 |             else:
142 |                 out = self.extra_blocks(out)
143 |         return out
144 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/bbox_merging.py:
--------------------------------------------------------------------------------
  1 | """This file defines nms functions to merge boxes"""
  2 | 
  3 | import time
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from shapely.geometry import Polygon
  8 | 
  9 | from numba import jit
 10 | 
 11 | def boxes_3d_to_corners(boxes_3d):
 12 |     all_corners = []
 13 |     for box_3d in boxes_3d:
 14 |         x3d, y3d, z3d, l, h, w, yaw = box_3d
 15 |         R = np.array([[np.cos(yaw),  0,  np.sin(yaw)],
 16 |                       [0,            1,  0          ],
 17 |                       [-np.sin(yaw), 0,  np.cos(yaw)]]);
 18 |         corners = np.array([[ l/2,  0.0,  w/2],  # front up right
 19 |                             [ l/2,  0.0, -w/2],  # front up left
 20 |                             [-l/2,  0.0, -w/2],  # back up left
 21 |                             [-l/2,  0.0,  w/2],  # back up right
 22 |                             [ l/2, -h,  w/2],  # front down right
 23 |                             [ l/2, -h, -w/2],  # front down left
 24 |                             [-l/2, -h, -w/2],  # back down left
 25 |                             [-l/2, -h,  w/2]]) # back down right
 26 |         r_corners = corners.dot(np.transpose(R))
 27 |         cam_points_xyz = r_corners+np.array([x3d, y3d, z3d])
 28 |         all_corners.append(cam_points_xyz)
 29 |     return np.array(all_corners)
 30 | 
 31 | def overlapped_boxes_3d(single_box, box_list):
 32 |     x0_max, y0_max, z0_max = np.max(single_box, axis=0)
 33 |     x0_min, y0_min, z0_min = np.min(single_box, axis=0)
 34 |     overlap = np.zeros(len(box_list))
 35 |     for i, box in enumerate(box_list):
 36 |         x_max, y_max, z_max = np.max(box, axis=0)
 37 |         x_min, y_min, z_min = np.min(box, axis=0)
 38 |         if x0_max < x_min or x0_min > x_max:
 39 |             overlap[i] = 0
 40 |             continue
 41 |         if y0_max < y_min or y0_min > y_max:
 42 |             overlap[i] = 0
 43 |             continue
 44 |         if z0_max < z_min or z0_min > z_max:
 45 |             overlap[i] = 0
 46 |             continue
 47 |         x_draw_min = min(x0_min, x_min)
 48 |         x_draw_max = max(x0_max, x_max)
 49 |         z_draw_min = min(z0_min, z_min)
 50 |         z_draw_max = max(z0_max, z_max)
 51 |         offset = np.array([x_draw_min, z_draw_min])
 52 |         buf1 = np.zeros((z_draw_max-z_draw_min, x_draw_max-x_draw_min),
 53 |             dtype=np.int32)
 54 |         buf2 = np.zeros_like(buf1)
 55 |         cv2.fillPoly(buf1, [single_box[:4, [0,2]]-offset], color=1)
 56 |         cv2.fillPoly(buf2, [box[:4, [0,2]]-offset], color=1)
 57 |         shared_area = cv2.countNonZero(buf1*buf2)
 58 |         area1 = cv2.countNonZero(buf1)
 59 |         area2 = cv2.countNonZero(buf2)
 60 |         shared_y = min(y_max, y0_max) - max(y_min, y0_min)
 61 |         intersection = shared_y * shared_area
 62 |         union = (y_max-y_min) * area2 + (y0_max-y0_min) * area1
 63 |         overlap[i] = np.float32(intersection) / (union - intersection)
 64 |     return overlap
 65 | 
 66 | def overlapped_boxes_3d_fast_poly(single_box, box_list):
 67 |     single_box_max_corner = np.max(single_box, axis=0)
 68 |     single_box_min_corner = np.min(single_box, axis=0)
 69 |     x0_max, y0_max, z0_max = single_box_max_corner
 70 |     x0_min, y0_min, z0_min = single_box_min_corner
 71 |     max_corner = np.max(box_list, axis=1)
 72 |     min_corner = np.min(box_list, axis=1)
 73 |     overlap = np.zeros(len(box_list))
 74 |     non_overlap_mask =  np.logical_or(single_box_max_corner < min_corner,
 75 |         single_box_min_corner > max_corner)
 76 |     non_overlap_mask = np.any(non_overlap_mask, axis=1)
 77 |     p1  = Polygon(single_box[:4, [0,2]])
 78 |     area1 = p1.area
 79 |     for i in range(len(box_list)):
 80 |         if not non_overlap_mask[i]:
 81 |             x_max, y_max, z_max = max_corner[i]
 82 |             x_min, y_min, z_min = min_corner[i]
 83 |             p2 =  Polygon(box_list[i][:4, [0,2]])
 84 |             shared_area = p1.intersection(p2).area
 85 |             area2 = p2.area
 86 |             shared_y = min(y_max, y0_max) - max(y_min, y0_min)
 87 |             intersection = shared_y * shared_area
 88 |             union = (y_max-y_min) * area2 + (y0_max-y0_min) * area1
 89 |             overlap[i] = np.float32(intersection) / (union - intersection)
 90 |     return overlap
 91 | 
 92 | 
 93 | def bboxes_sort(classes, scores, bboxes, top_k=400, attributes=None):
 94 |     """Sort bounding boxes by decreasing order and keep only the top_k
 95 |     """
 96 |     idxes = np.argsort(-scores)
 97 |     classes = classes[idxes]
 98 |     scores = scores[idxes]
 99 |     bboxes = bboxes[idxes]
100 |     if attributes is not None:
101 |         attributes = attributes[idxes]
102 |     if top_k > 0:
103 |         if len(idxes) > top_k:
104 |             classes = classes[:top_k]
105 |             scores = scores[:top_k]
106 |             bboxes = bboxes[:top_k]
107 |             if attributes is not None:
108 |                 attributes = attributes[:top_k]
109 |     return classes, scores, bboxes, attributes
110 | 
111 | 
112 | def bboxes_nms_merge_only(classes, scores, bboxes, scores_threshold=0.25,
113 |     nms_threshold=0.45, overlapped_fn=overlapped_boxes_3d_fast_poly, appr_factor=10.0,
114 |     attributes=None):
115 |     """Apply non-maximum selection to bounding boxes.
116 |     """
117 |     boxes_corners = boxes_3d_to_corners(bboxes)
118 |     # convert to pixels
119 |     keep_bboxes = np.ones(scores.shape, dtype=np.bool)
120 |     for i in range(scores.size-1):
121 |         if keep_bboxes[i]:
122 |             # Only compute on the rest of bboxes
123 |             valid = keep_bboxes[(i+1):]
124 |             # Computer overlap with bboxes which are following.
125 |             overlap = overlapped_fn(boxes_corners[i],
126 |                 boxes_corners[(i+1):][valid])
127 |             # Overlap threshold for keeping + checking part of the same class
128 |             remove_overlap = np.logical_and(overlap > nms_threshold,
129 |                 classes[(i+1):][valid] == classes[i])
130 |             overlaped_bboxes = np.concatenate(
131 |                 [bboxes[(i+1):][valid][remove_overlap], bboxes[[i]]], axis=0)
132 |             boxes_mean = np.median(overlaped_bboxes, axis=0)
133 |             # boxes_mean = np.mean(overlaped_bboxes, axis=0)
134 |             bboxes[i][:] = boxes_mean[:]
135 |             keep_bboxes[(i+1):][valid] = np.logical_not(remove_overlap)##
136 | 
137 |     idxes = np.where(keep_bboxes)
138 |     classes = classes[idxes]
139 |     scores = scores[idxes]
140 |     bboxes = bboxes[idxes]
141 |     if attributes is not None:
142 |         attributes = attributes[idxes]
143 |     return classes, scores, bboxes, idxes, #attributes
144 | 
145 | def nms_boxes_3d_merge_only(class_labels, detection_boxes_3d, detection_scores,
146 |     overlapped_thres=0.5, overlapped_fn=overlapped_boxes_3d_fast_poly, appr_factor=10.0,
147 |     top_k=-1, attributes=None):
148 |     class_labels, detection_scores, detection_boxes_3d, attributes = \
149 |         bboxes_sort(
150 |             class_labels, detection_scores, detection_boxes_3d, top_k=top_k,
151 |             attributes=attributes)
152 |     # nms
153 |     class_labels, detection_scores, detection_boxes_3d, attributes = \
154 |         bboxes_nms_merge_only(
155 |             class_labels, detection_scores, detection_boxes_3d,
156 |             nms_threshold=overlapped_thres, overlapped_fn=overlapped_fn,
157 |             appr_factor=appr_factor, attributes=attributes)
158 |     return class_labels, detection_boxes_3d, detection_scores, attributes
159 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS
  4 | from mmdet.core.bbox.assigners import AssignResult
  5 | from mmdet.core.bbox.assigners import BaseAssigner
  6 | from mmdet.core.bbox.match_costs import build_match_cost
  7 | from mmdet.models.utils.transformer import inverse_sigmoid
  8 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox, denormalize_bbox
  9 | from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d, bbox_overlaps_nearest_3d
 10 | 
 11 | try:
 12 |     from scipy.optimize import linear_sum_assignment
 13 | except ImportError:
 14 |     linear_sum_assignment = None
 15 | 
 16 | 
 17 | @BBOX_ASSIGNERS.register_module()
 18 | class HungarianAssigner3D(BaseAssigner):
 19 |     """Computes one-to-one matching between predictions and ground truth.
 20 |     This class computes an assignment between the targets and the predictions
 21 |     based on the costs. The costs are weighted sum of three components:
 22 |     classification cost, regression L1 cost and regression iou cost. The
 23 |     targets don't include the no_object, so generally there are more
 24 |     predictions than targets. After the one-to-one matching, the un-matched
 25 |     are treated as backgrounds. Thus each query prediction will be assigned
 26 |     with `0` or a positive integer indicating the ground truth index:
 27 |     - 0: negative sample, no assigned gt
 28 |     - positive integer: positive sample, index (1-based) of assigned gt
 29 |     Args:
 30 |         cls_weight (int | float, optional): The scale factor for classification
 31 |             cost. Default 1.0.
 32 |         bbox_weight (int | float, optional): The scale factor for regression
 33 |             L1 cost. Default 1.0.
 34 |         iou_weight (int | float, optional): The scale factor for regression
 35 |             iou cost. Default 1.0.
 36 |         iou_calculator (dict | optional): The config for the iou calculation.
 37 |             Default type `BboxOverlaps2D`.
 38 |         iou_mode (str | optional): "iou" (intersection over union), "iof"
 39 |                 (intersection over foreground), or "giou" (generalized
 40 |                 intersection over union). Default "giou".
 41 |     """
 42 | 
 43 |     def __init__(self,
 44 |                  cls_cost=dict(type='ClassificationCost', weight=1.),
 45 |                  reg_cost=dict(type='BBoxL1Cost', weight=1.0),
 46 |                  iou_cost=dict(type='IoUCost', weight=0.0),
 47 |                  pc_range=None):
 48 |         self.cls_cost = build_match_cost(cls_cost)
 49 |         self.reg_cost = build_match_cost(reg_cost)
 50 |         self.iou_cost = build_match_cost(iou_cost)
 51 |         self.pc_range = pc_range
 52 | 
 53 |     def assign(self,
 54 |                bbox_pred,
 55 |                cls_pred,
 56 |                gt_bboxes, 
 57 |                gt_labels,
 58 |                num_query,
 59 |                gt_bboxes_ignore=None,
 60 |                eps=1e-7, gt_repeattimes=1):
 61 |         """Computes one-to-one matching based on the weighted costs.
 62 |         This method assign each query prediction to a ground truth or
 63 |         background. The `assigned_gt_inds` with -1 means don't care,
 64 |         0 means negative sample, and positive number is the index (1-based)
 65 |         of assigned gt.
 66 |         The assignment is done in the following steps, the order matters.
 67 |         1. assign every prediction to -1
 68 |         2. compute the weighted costs
 69 |         3. do Hungarian matching on CPU based on the costs
 70 |         4. assign all to 0 (background) first, then for each matched pair
 71 |            between predictions and gts, treat this prediction as foreground
 72 |            and assign the corresponding gt index (plus 1) to it.
 73 |         Args:
 74 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
 75 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
 76 |                 [num_query, 4].
 77 |             cls_pred (Tensor): Predicted classification logits, shape
 78 |                 [num_query, num_class].
 79 |             gt_bboxes (Tensor): Ground truth boxes with unnormalized
 80 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
 81 |             gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
 82 |             gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
 83 |                 labelled as `ignored`. Default None.
 84 |             eps (int | float, optional): A value added to the denominator for
 85 |                 numerical stability. Default 1e-7.
 86 |         Returns:
 87 |             :obj:`AssignResult`: The assigned result.
 88 |         """
 89 |         assert gt_bboxes_ignore is None, \
 90 |             'Only case when gt_bboxes_ignore is None is supported.'
 91 |         num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
 92 | 
 93 |         # 1. assign -1 by default
 94 |         assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
 95 |                                               -1,
 96 |                                               dtype=torch.long)
 97 |         assigned_labels = bbox_pred.new_full((num_bboxes, ),
 98 |                                              -1,
 99 |                                              dtype=torch.long)
100 |         if num_gts == 0 or num_bboxes == 0:
101 |             # No ground truth or boxes, return empty assignment
102 |             if num_gts == 0:
103 |                 # No ground truth, assign all to background
104 |                 assigned_gt_inds[:] = 0
105 |             return AssignResult(
106 |                 num_gts, assigned_gt_inds, None, labels=assigned_labels)
107 | 
108 |         # 2. compute the weighted costs
109 |         # classification and bboxcost.
110 |         normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
111 |         bboxes3d = denormalize_bbox(bbox_pred, self.pc_range) 
112 |         iou3d = bbox_overlaps_nearest_3d(bboxes3d, gt_bboxes, coordinate='depth')
113 |         
114 |         cls_cost = self.cls_cost(cls_pred, gt_labels)
115 |         #cls_cost = self.cls_cost(cls_pred, gt_labels, iou3d)
116 | 
117 |         # regression L1 cost
118 |         reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
119 |         iou_cost = self.iou_cost(bboxes3d, gt_bboxes)
120 | 
121 |         cost = cls_cost + reg_cost + iou_cost
122 |         
123 |         # 3. do Hungarian matching on CPU using linear_sum_assignment
124 |         cost = cost.detach().cpu()
125 |         # cost[torch.isnan(cost)] = 1e5
126 |         if linear_sum_assignment is None:
127 |             raise ImportError('Please run pip install scipy to install scipy first.')
128 |                               
129 |         nq = num_query
130 |         ng = int(cost.shape[0] // nq)
131 |         matched_row_inds, matched_col_inds = [], []
132 |         for g in range(ng):
133 |             # matched_row_inds1, matched_col_inds1 = linear_sum_assignment(cost[g*nq:(g+1)*nq])
134 |             matched_row_inds1, matched_col_inds1 = linear_sum_assignment(cost[g*nq:(g+1)*nq].repeat(1, gt_repeattimes) )
135 |             matched_row_inds.append(g*nq + matched_row_inds1)
136 |             #matched_col_inds.append(matched_col_inds1)
137 |             matched_col_inds.append(matched_col_inds1 % cost.shape[1])
138 |         matched_row_inds = np.concatenate(matched_row_inds)
139 |         matched_col_inds = np.concatenate(matched_col_inds)
140 | 
141 |         matched_row_inds = torch.from_numpy(matched_row_inds).to(bbox_pred.device)
142 |         matched_col_inds = torch.from_numpy(matched_col_inds).to(bbox_pred.device)
143 | 
144 |         # 4. assign backgrounds and foregrounds
145 |         # assign all indices to backgrounds first
146 |         assigned_gt_inds[:] = 0
147 |         # assign foregrounds based on matching results
148 |         assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
149 |         assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
150 |         return AssignResult(
151 |             num_gts, assigned_gt_inds, None, labels=assigned_labels)
152 |     
153 |     @staticmethod
154 |     def _bbox_to_loss(bbox):
155 |         # axis-aligned case: x, y, z, w, h, l -> x1, y1, z1, x2, y2, z2
156 |         return torch.stack(
157 |             (bbox[..., 0] - bbox[..., 3] / 2, bbox[..., 1] - bbox[..., 4] / 2,
158 |              bbox[..., 2] - bbox[..., 5] / 2, bbox[..., 0] + bbox[..., 3] / 2,
159 |              bbox[..., 1] + bbox[..., 4] / 2, bbox[..., 2] + bbox[..., 5] / 2),
160 |             dim=-1)
161 | 


--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_scannet.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../configs/_base_/default_runtime.py'
  3 | ]
  4 | 
  5 | plugin=True
  6 | plugin_dir='projects/mmdet3d_plugin/'
  7 | 
  8 | 
  9 | voxel_size = [0.02, 0.02, 0.02]
 10 | grid_size = [128, 640, 640]
 11 | 
 12 | point_cloud_range = [-6.4, -6.4, -0.1, 6.4, 6.4, 2.46]
 13 | 
 14 | 
 15 | fp16_enabled = True
 16 | bev_stride = 4
 17 | sample_num = 5
 18 | 
 19 | input_modality = dict(
 20 |     use_lidar=True,
 21 |     use_camera=False,
 22 |     use_radar=False,
 23 |     use_map=False,
 24 |     use_external=False)
 25 | 
 26 | model = dict(
 27 |     type='Uni3DETR',
 28 |     pts_voxel_layer=dict(
 29 |         max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),  ###16000
 30 |         point_cloud_range=point_cloud_range),
 31 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
 32 |     pts_middle_encoder=dict(
 33 |         type='SparseEncoderHD',
 34 |         in_channels=4,
 35 |         sparse_shape=grid_size,
 36 |         output_channels=256,
 37 |         order=('conv', 'norm', 'act'),
 38 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
 39 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
 40 |         block_type='basicblock',
 41 |         fp16_enabled=False), # not enable FP16 here
 42 |     pts_backbone=dict(
 43 |         type='SECOND3D',
 44 |         in_channels=[256, 256, 256],
 45 |         out_channels=[128, 256, 512],
 46 |         layer_nums=[5, 5, 5],
 47 |         layer_strides=[1, 2, 4],
 48 |         is_cascade=False,
 49 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 50 |         conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
 51 |     pts_neck=dict(
 52 |         type='SECOND3DFPN',
 53 |         in_channels=[128, 256, 512],
 54 |         out_channels=[256, 256, 256],
 55 |         upsample_strides=[1, 2, 4],
 56 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 57 |         upsample_cfg=dict(type='deconv3d', bias=False),
 58 |         extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
 59 |         use_conv_for_no_stride=True),
 60 |     pts_bbox_head=dict(
 61 |         type='Uni3DETRHead',
 62 |         # transformer_cfg
 63 |         num_query=300,
 64 |         num_classes=18,
 65 |         in_channels=256,
 66 |         sync_cls_avg_factor=True,
 67 |         with_box_refine=True,
 68 |         as_two_stage=False,
 69 |         code_size=8,
 70 |         with_nms=True,
 71 |         transformer=dict(
 72 |             type='Uni3DETRTransformer',
 73 |             fp16_enabled=fp16_enabled,
 74 |             decoder=dict(
 75 |                 type='Uni3DETRTransformerDecoder',
 76 |                 num_layers=3,
 77 |                 return_intermediate=True,
 78 |                 transformerlayers=dict(
 79 |                     type='BaseTransformerLayer',
 80 |                     attn_cfgs=[
 81 |                         dict(
 82 |                             type='MultiheadAttention',
 83 |                             embed_dims=256,
 84 |                             num_heads=8,
 85 |                             dropout=0.1), 
 86 |                         dict(
 87 |                             type='UniCrossAtten',
 88 |                             num_points=1,
 89 |                             embed_dims=256,
 90 |                             num_sweeps=1,
 91 |                             fp16_enabled=fp16_enabled),
 92 |                     ],
 93 |                     ffn_cfgs=dict(
 94 |                         type='FFN',
 95 |                         embed_dims=256,
 96 |                         feedforward_channels=512,
 97 |                         num_fcs=2,
 98 |                         ffn_drop=0.1, ##0.1
 99 |                         act_cfg=dict(type='ReLU', inplace=True),
100 |                     ),
101 |                     norm_cfg=dict(type='LN'),
102 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
103 |                     # operation_order=('cross_attn', 'norm', 'self_attn', 'norm', 'ffn', 'norm'))
104 |             )
105 |         ),
106 |         bbox_coder=dict(
107 |             type='NMSFreeCoder',
108 |             post_center_range=point_cloud_range,
109 |             pc_range=point_cloud_range,
110 |             # max_num=1000,
111 |             max_num=5000,
112 |             voxel_size=voxel_size,
113 |             num_classes=18), 
114 |         positional_encoding=dict(
115 |             type='SinePositionalEncoding',
116 |             num_feats=128,
117 |             normalize=True,
118 |             offset=-0.5),
119 |         loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
120 |         loss_bbox=dict(type='L1Loss', loss_weight=0.25),
121 |         loss_iou=dict(type='IoU3DLoss', loss_weight=1.2), 
122 |         code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
123 |     ),
124 |     # model training and testing settings
125 |     train_cfg=dict(pts=dict(
126 |         grid_size=grid_size,
127 |         voxel_size=voxel_size,
128 |         point_cloud_range=point_cloud_range,
129 |         out_size_factor=bev_stride,
130 |         assigner=dict(
131 |             type='HungarianAssigner3D',
132 |             cls_cost=dict(type='FocalLossCost', weight=2.0),
133 |             reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
134 |             iou_cost=dict(type='IoU3DCost', weight=1.2),
135 |             pc_range=point_cloud_range))))
136 | 
137 | 
138 | dataset_type = 'ScanNetDataset'
139 | data_root = './data/scannet/'
140 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
141 |                'bookshelf', 'picture', 'counter', 'desk', 'curtain',
142 |                'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
143 |                'garbagebin')
144 | 
145 | 
146 | train_pipeline = [
147 |     dict(
148 |         type='LoadPointsFromFile',
149 |         coord_type='DEPTH',
150 |         shift_height=False,
151 |         load_dim=3,
152 |         use_dim=[0, 1, 2]),
153 |     dict(type='LoadAnnotations3D'),
154 |     dict(
155 |         type='RandomFlip3D',
156 |         sync_2d=False,
157 |         flip_ratio_bev_horizontal=0.5,
158 |         flip_ratio_bev_vertical=0.5),
159 |     dict(
160 |         type='GlobalRotScaleTrans',
161 |         rot_range=[-0.087266, 0.087266],
162 |         scale_ratio_range=[.9, 1.1],
163 |         translation_std=[.1, .1, .1],
164 |         shift_height=False),
165 |     dict(type='PointSample', num_points=200000),
166 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
167 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
168 | ]
169 | 
170 | test_pipeline = [
171 |     dict(
172 |         type='LoadPointsFromFile',
173 |         coord_type='DEPTH',
174 |         shift_height=False,
175 |         load_dim=3,
176 |         use_dim=[0, 1, 2]),
177 |     dict(
178 |         type='DefaultFormatBundle3D',
179 |         class_names=class_names,
180 |         with_label=False),
181 |     dict(type='Collect3D', keys=['points'])
182 | ]
183 | 
184 | data = dict(
185 |     samples_per_gpu=3, ##### 3
186 |     workers_per_gpu=4,
187 |     train=dict(
188 |         type='RepeatDataset',
189 |         times=6,
190 |         dataset=dict(
191 |             type=dataset_type,
192 |             data_root=data_root,
193 |             ann_file=data_root + 'scannet_infos_train.pkl',
194 |             pipeline=train_pipeline,
195 |             filter_empty_gt=True,
196 |             classes=class_names,
197 |             box_type_3d='Depth')
198 |         ),
199 |     val=dict(
200 |         type=dataset_type,
201 |         data_root=data_root,
202 |         ann_file=data_root + 'scannet_infos_val.pkl',
203 |         pipeline=test_pipeline,
204 |         classes=class_names,
205 |         test_mode=True,
206 |         box_type_3d='Depth'),
207 |     test=dict(
208 |         type=dataset_type,
209 |         data_root=data_root,
210 |         ann_file=data_root + 'scannet_infos_val.pkl',
211 |         pipeline=test_pipeline,
212 |         classes=class_names,
213 |         test_mode=True,
214 |         box_type_3d='Depth'))
215 | 
216 | evaluation = dict(pipeline=test_pipeline, interval=5)
217 | 
218 | 
219 | # optimizer
220 | # This schedule is mainly used by models on indoor dataset,
221 | # e.g., VoteNet on SUNRGBD and ScanNet
222 | lr = 2e-5 *2/8 * 20 * 4/6  *6/8  *1.5 *8/6###########40 # max learning rate
223 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
224 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
225 | 
226 | 
227 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
228 | runner = dict(type='EpochBasedRunner', max_epochs=40)  ###40
229 | 
230 | # fp16 setting
231 | fp16 = dict(loss_scale=32.)
232 | find_unused_parameters = True
233 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/rdiouloss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | from projects.mmdet3d_plugin.core.bbox.util import get_rdiou
  4 | from torch import nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from mmdet.models.losses.utils import weighted_loss
  8 | from mmdet.models.losses.utils import weight_reduce_loss
  9 | from mmdet.models import LOSSES
 10 | from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d, bbox_overlaps_nearest_3d
 11 | 
 12 | @weighted_loss
 13 | def rd_iou_loss(pred, target):
 14 |     """Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.
 15 |     Note that predictions and targets are one-to-one corresponded.
 16 | 
 17 |     Args:
 18 |         pred (torch.Tensor): Bbox predictions with shape [N, 7]
 19 |             (x, y, z, w, l, h, alpha).
 20 |         target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
 21 |             (x, y, z, w, l, h, alpha).
 22 | 
 23 |     Returns:
 24 |         torch.Tensor: IoU loss between predictions and targets.
 25 |     """
 26 |     u, rdiou = get_rdiou(pred.unsqueeze(0), target.unsqueeze(0))
 27 |     u, rdiou = u[0], rdiou[0]
 28 | 
 29 |     rdiou_loss_n = rdiou - u
 30 |     rdiou_loss_n = torch.clamp(rdiou_loss_n,min=-1.0,max = 1.0)
 31 |     rdiou_loss_n = 1 - rdiou_loss_n
 32 |     return rdiou_loss_n
 33 | 
 34 | 
 35 | @LOSSES.register_module()
 36 | class RDIoULoss(nn.Module):
 37 |     """Calculate the IoU loss (1-IoU) of rotated bounding boxes.
 38 | 
 39 |     Args:
 40 |         reduction (str): Method to reduce losses.
 41 |             The valid reduction method are none, sum or mean.
 42 |         loss_weight (float, optional): Weight of loss. Defaults to 1.0.
 43 |     """
 44 | 
 45 |     def __init__(self, reduction='mean', loss_weight=1.0):
 46 |         super().__init__()
 47 |         self.reduction = reduction
 48 |         self.loss_weight = loss_weight
 49 | 
 50 |     def forward(self,
 51 |                 pred,
 52 |                 target,
 53 |                 weight=None,
 54 |                 avg_factor=None,
 55 |                 reduction_override=None,
 56 |                 **kwargs):
 57 |         """Forward function of loss calculation.
 58 | 
 59 |         Args:
 60 |             pred (torch.Tensor): Bbox predictions with shape [..., 7]
 61 |                 (x, y, z, w, l, h, alpha).
 62 |             target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
 63 |                 (x, y, z, w, l, h, alpha).
 64 |             weight (torch.Tensor | float, optional): Weight of loss.
 65 |                 Defaults to None.
 66 |             avg_factor (int, optional): Average factor that is used to average
 67 |                 the loss. Defaults to None.
 68 |             reduction_override (str, optional): Method to reduce losses.
 69 |                 The valid reduction method are 'none', 'sum' or 'mean'.
 70 |                 Defaults to None.
 71 | 
 72 |         Returns:
 73 |             torch.Tensor: IoU loss between predictions and targets.
 74 |         """
 75 |         if weight is not None and not torch.any(weight > 0):
 76 |             return pred.sum() * weight.sum()  # 0
 77 |         assert reduction_override in (None, 'none', 'mean', 'sum')
 78 |         reduction = (
 79 |             reduction_override if reduction_override else self.reduction)
 80 |         if weight is not None and weight.dim() > 1:
 81 |             weight = weight.mean(-1)
 82 |         loss = self.loss_weight * rd_iou_loss(
 83 |             pred,
 84 |             target,
 85 |             weight,
 86 |             reduction=reduction,
 87 |             avg_factor=avg_factor,
 88 |             **kwargs)
 89 | 
 90 |         return loss
 91 | 
 92 | 
 93 | @weighted_loss
 94 | def iou3d_loss(pred, target):
 95 |     #iou3d = bbox_overlaps_3d(pred, target, coordinate='depth')
 96 |     #iou3d = 1 - torch.diag(iou3d)
 97 | 
 98 |     #iou3d = (1 - bbox_overlaps_nearest_3d(pred, target, is_aligned=True, coordinate='depth') )
 99 |     iou3d = (1 - bbox_overlaps_nearest_3d(pred, target, is_aligned=True, coordinate='lidar') ) 
100 |     #iou3d += (1 - bbox_overlaps_nearest_3d(pred[:, [0,2,1,3,5,4,6]], target[:, [0,2,1,3,5,4,6]], is_aligned=True, coordinate='depth') )  * 0.1
101 |     #iou3d += (1 - bbox_overlaps_nearest_3d(pred[:, [1,2,0,4,5,3,6]], target[:, [1,2,0,4,5,3,6]], is_aligned=True, coordinate='depth') ) * 0.1
102 |     return iou3d
103 | 
104 | 
105 | @LOSSES.register_module()
106 | class IoU3DLoss(nn.Module):
107 |     """Calculate the IoU loss (1-IoU) of rotated bounding boxes.
108 | 
109 |     Args:
110 |         reduction (str): Method to reduce losses.
111 |             The valid reduction method are none, sum or mean.
112 |         loss_weight (float, optional): Weight of loss. Defaults to 1.0.
113 |     """
114 | 
115 |     def __init__(self, reduction='mean', loss_weight=1.0):
116 |         super().__init__()
117 |         self.reduction = reduction
118 |         self.loss_weight = loss_weight
119 | 
120 |     def forward(self,
121 |                 pred,
122 |                 target,
123 |                 weight=None,
124 |                 avg_factor=None,
125 |                 reduction_override=None,
126 |                 **kwargs):
127 |         """Forward function of loss calculation.
128 | 
129 |         Args:
130 |             pred (torch.Tensor): Bbox predictions with shape [..., 7]
131 |                 (x, y, z, w, l, h, alpha).
132 |             target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
133 |                 (x, y, z, w, l, h, alpha).
134 |             weight (torch.Tensor | float, optional): Weight of loss.
135 |                 Defaults to None.
136 |             avg_factor (int, optional): Average factor that is used to average
137 |                 the loss. Defaults to None.
138 |             reduction_override (str, optional): Method to reduce losses.
139 |                 The valid reduction method are 'none', 'sum' or 'mean'.
140 |                 Defaults to None.
141 | 
142 |         Returns:
143 |             torch.Tensor: IoU loss between predictions and targets.
144 |         """
145 |         if weight is not None and not torch.any(weight > 0):
146 |             return pred.sum() * weight.sum()  # 0
147 |         assert reduction_override in (None, 'none', 'mean', 'sum')
148 |         reduction = (
149 |             reduction_override if reduction_override else self.reduction)
150 |         if weight is not None and weight.dim() > 1:
151 |             weight = weight.mean(-1)
152 |         loss = self.loss_weight * iou3d_loss(
153 |             pred,
154 |             target,
155 |             weight,
156 |             reduction=reduction,
157 |             avg_factor=avg_factor,
158 |             **kwargs)
159 | 
160 |         return loss
161 | 
162 | def soft_focal_loss(pred,
163 |         target,
164 |         weight=None,
165 |         gamma=2.0,
166 |         alpha=0.25,
167 |         reduction='mean',
168 |         avg_factor=None):
169 |     pred_sigmoid = pred.sigmoid()
170 | 
171 |     target, target_score = target[0], target[1]
172 |     target_oh = torch.zeros((pred_sigmoid.shape[0], pred.shape[1] + 1)).type_as(pred).to(pred.device)
173 |     target_oh.scatter_(1, target[:,None], 1)
174 |     target_oh = target_oh[:,0:-1]
175 |     target = target[:,None]
176 | 
177 |     target_soft = (target_oh > 0).float() * target_score[:,None]
178 |     pt = target_soft - pred_sigmoid
179 |     focal_weight = ((1 - alpha) + (2*alpha - 1) * target_soft) * pt.pow(gamma)
180 |     loss = F.binary_cross_entropy_with_logits(pred, target_soft, reduction='none') * focal_weight
181 | 
182 |     weight = weight.view(-1,1)
183 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
184 |     return loss
185 | 
186 | @LOSSES.register_module()
187 | class SoftFocalLoss(nn.Module):
188 | 
189 |     def __init__(self,
190 |                  use_sigmoid=True,
191 |                  gamma=2.0,
192 |                  alpha=0.25,
193 |                  reduction='mean',
194 |                  loss_weight=1.0):
195 |         super(SoftFocalLoss, self).__init__()
196 |         assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
197 |         self.use_sigmoid = use_sigmoid
198 |         self.gamma = gamma
199 |         self.alpha = alpha
200 |         self.reduction = reduction
201 |         self.loss_weight = loss_weight
202 |     
203 |     def forward(self,
204 |                 pred,
205 |                 target,
206 |                 weight=None,
207 |                 avg_factor=None,
208 |                 reduction_override=None):
209 |         assert reduction_override in (None, 'none', 'mean', 'sum')
210 |         reduction = (
211 |             reduction_override if reduction_override else self.reduction)
212 |         if self.use_sigmoid:
213 |             loss_cls = self.loss_weight * soft_focal_loss(
214 |                 pred,
215 |                 target,
216 |                 weight,
217 |                 gamma=self.gamma,
218 |                 alpha=self.alpha,
219 |                 reduction=reduction,
220 |                 avg_factor=avg_factor)
221 |         else:
222 |             raise NotImplementedError
223 |         return loss_cls


--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_scannet_large.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../configs/_base_/default_runtime.py'
  3 | ]
  4 | 
  5 | plugin=True
  6 | plugin_dir='projects/mmdet3d_plugin/'
  7 | 
  8 | 
  9 | voxel_size = [0.02, 0.02, 0.02]
 10 | grid_size = [128, 640, 640]
 11 | 
 12 | point_cloud_range = [-6.4, -6.4, -0.1, 6.4, 6.4, 2.46]
 13 | 
 14 | 
 15 | fp16_enabled = True
 16 | bev_stride = 4
 17 | sample_num = 5
 18 | 
 19 | input_modality = dict(
 20 |     use_lidar=True,
 21 |     use_camera=False,
 22 |     use_radar=False,
 23 |     use_map=False,
 24 |     use_external=False)
 25 | 
 26 | model = dict(
 27 |     type='Uni3DETR',
 28 |     dynamic_voxelization=True,
 29 |     pts_voxel_layer=dict(
 30 |         max_num_points=-1, point_cloud_range=point_cloud_range, voxel_size=voxel_size, max_voxels=(-1, -1)),
 31 |     pts_voxel_encoder=dict(type='DynamicSimpleVFE', voxel_size=voxel_size, point_cloud_range=point_cloud_range),
 32 |     pts_middle_encoder=dict(
 33 |         type='SparseEncoderHD',
 34 |         in_channels=4,
 35 |         sparse_shape=grid_size,
 36 |         base_channels=32,
 37 |         output_channels=512,
 38 |         order=('conv', 'norm', 'act'),
 39 |         encoder_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256)),
 40 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
 41 |         block_type='basicblock',
 42 |         fp16_enabled=False), # not enable FP16 here
 43 |     pts_backbone=dict(
 44 |         type='SECOND3D',
 45 |         in_channels=[512, 512, 512],
 46 |         out_channels=[128, 256, 512],
 47 |         layer_nums=[5, 5, 5],
 48 |         layer_strides=[1, 2, 4],
 49 |         is_cascade=False,
 50 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 51 |         conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
 52 |     pts_neck=dict(
 53 |         type='SECOND3DFPN',
 54 |         in_channels=[128, 256, 512],
 55 |         out_channels=[256, 256, 256],
 56 |         upsample_strides=[1, 2, 4],
 57 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 58 |         upsample_cfg=dict(type='deconv3d', bias=False),
 59 |         extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
 60 |         use_conv_for_no_stride=True),
 61 |     pts_bbox_head=dict(
 62 |         type='Uni3DETRHead',
 63 |         # transformer_cfg
 64 |         num_query=300,
 65 |         num_classes=18,
 66 |         in_channels=256,
 67 |         sync_cls_avg_factor=True,
 68 |         with_box_refine=True,
 69 |         as_two_stage=False,
 70 |         code_size=8,
 71 |         transformer=dict(
 72 |             type='Uni3DETRTransformer',
 73 |             fp16_enabled=fp16_enabled,
 74 |             decoder=dict(
 75 |                 type='Uni3DETRTransformerDecoder',
 76 |                 num_layers=3,
 77 |                 return_intermediate=True,
 78 |                 transformerlayers=dict(
 79 |                     type='BaseTransformerLayer',
 80 |                     attn_cfgs=[
 81 |                         dict(
 82 |                             type='MultiheadAttention',
 83 |                             embed_dims=256,
 84 |                             num_heads=8,
 85 |                             dropout=0.1), 
 86 |                         dict(
 87 |                             type='UniCrossAtten',
 88 |                             num_points=1,
 89 |                             embed_dims=256,
 90 |                             num_sweeps=1,
 91 |                             fp16_enabled=fp16_enabled),
 92 |                     ],
 93 |                     ffn_cfgs=dict(
 94 |                         type='FFN',
 95 |                         embed_dims=256,
 96 |                         feedforward_channels=512,
 97 |                         num_fcs=2,
 98 |                         ffn_drop=0.1, ##0.1
 99 |                         act_cfg=dict(type='ReLU', inplace=True),
100 |                     ),
101 |                     norm_cfg=dict(type='LN'),
102 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
103 |                     # operation_order=('cross_attn', 'norm', 'self_attn', 'norm', 'ffn', 'norm'))
104 |             )
105 |         ),
106 |         bbox_coder=dict(
107 |             type='NMSFreeCoder',
108 |             post_center_range=point_cloud_range,
109 |             pc_range=point_cloud_range,
110 |             # max_num=1000,
111 |             max_num=5000,
112 |             alpha=1.0,
113 |             voxel_size=voxel_size,
114 |             num_classes=18), 
115 |         post_processing=dict(
116 |             type='nms',
117 |             nms_thr=0.5),
118 |         ######## soft nms can generate a little higher result
119 |         # post_processing=dict(
120 |         #     type='soft_nms',
121 |         #     gaussian_sigma=0.3, 
122 |         #     prune_threshold=1e-2),
123 |         positional_encoding=dict(
124 |             type='SinePositionalEncoding',
125 |             num_feats=128,
126 |             normalize=True,
127 |             offset=-0.5),
128 |         loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
129 |         loss_bbox=dict(type='L1Loss', loss_weight=0.25),
130 |         loss_iou=dict(type='IoU3DLoss', loss_weight=1.2), 
131 |         code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
132 |     ),
133 |     # model training and testing settings
134 |     train_cfg=dict(pts=dict(
135 |         grid_size=grid_size,
136 |         voxel_size=voxel_size,
137 |         point_cloud_range=point_cloud_range,
138 |         out_size_factor=bev_stride,
139 |         assigner=dict(
140 |             type='HungarianAssigner3D',
141 |             cls_cost=dict(type='FocalLossCost', weight=2.0),
142 |             reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
143 |             iou_cost=dict(type='IoU3DCost', weight=1.2),
144 |             pc_range=point_cloud_range))))
145 | 
146 | 
147 | dataset_type = 'ScanNetDataset'
148 | data_root = './data/scannet/'
149 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
150 |                'bookshelf', 'picture', 'counter', 'desk', 'curtain',
151 |                'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
152 |                'garbagebin')
153 | 
154 | 
155 | train_pipeline = [
156 |     dict(
157 |         type='LoadPointsFromFile',
158 |         coord_type='DEPTH',
159 |         shift_height=False,
160 |         load_dim=3,
161 |         use_dim=[0, 1, 2]),
162 |     dict(type='LoadAnnotations3D'),
163 |     dict(
164 |         type='RandomFlip3D',
165 |         sync_2d=False,
166 |         flip_ratio_bev_horizontal=0.5,
167 |         flip_ratio_bev_vertical=0.5),
168 |     dict(
169 |         type='GlobalRotScaleTrans',
170 |         rot_range=[-0.087266, 0.087266],
171 |         scale_ratio_range=[.9, 1.1],
172 |         translation_std=[.1, .1, .1],
173 |         shift_height=False),
174 |     dict(type='PointSample', num_points=200000),
175 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
176 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
177 | ]
178 | 
179 | test_pipeline = [
180 |     dict(
181 |         type='LoadPointsFromFile',
182 |         coord_type='DEPTH',
183 |         shift_height=False,
184 |         load_dim=3,
185 |         use_dim=[0, 1, 2]),
186 |     dict(
187 |         type='DefaultFormatBundle3D',
188 |         class_names=class_names,
189 |         with_label=False),
190 |     dict(type='Collect3D', keys=['points'])
191 | ]
192 | 
193 | data = dict(
194 |     samples_per_gpu=3, ##### 3
195 |     workers_per_gpu=4,
196 |     train=dict(
197 |         type='RepeatDataset',
198 |         times=6,
199 |         dataset=dict(
200 |             type=dataset_type,
201 |             data_root=data_root,
202 |             ann_file=data_root + 'scannet_infos_train.pkl',
203 |             pipeline=train_pipeline,
204 |             filter_empty_gt=True,
205 |             classes=class_names,
206 |             box_type_3d='Depth')
207 |         ),
208 |     val=dict(
209 |         type=dataset_type,
210 |         data_root=data_root,
211 |         ann_file=data_root + 'scannet_infos_val.pkl',
212 |         pipeline=test_pipeline,
213 |         classes=class_names,
214 |         test_mode=True,
215 |         box_type_3d='Depth'),
216 |     test=dict(
217 |         type=dataset_type,
218 |         data_root=data_root,
219 |         ann_file=data_root + 'scannet_infos_val.pkl',
220 |         pipeline=test_pipeline,
221 |         classes=class_names,
222 |         test_mode=True,
223 |         box_type_3d='Depth'))
224 | 
225 | evaluation = dict(pipeline=test_pipeline, interval=5)
226 | 
227 | 
228 | # optimizer
229 | # This schedule is mainly used by models on indoor dataset,
230 | # e.g., VoteNet on SUNRGBD and ScanNet
231 | lr = 2e-5 *2/8 * 20 * 4/6  *6/8  *1.5 *8/6###########40 # max learning rate
232 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
233 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
234 | 
235 | 
236 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
237 | runner = dict(type='EpochBasedRunner', max_epochs=40)  ###40
238 | 
239 | # fp16 setting
240 | fp16 = dict(loss_scale=32.)
241 | find_unused_parameters = True
242 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | import shutil
  4 | import sys
  5 | import warnings
  6 | from os import path as osp
  7 | from setuptools import find_packages, setup
  8 | 
  9 | import torch
 10 | from torch.utils.cpp_extension import (BuildExtension, CppExtension,
 11 |                                        CUDAExtension)
 12 | 
 13 | 
 14 | def readme():
 15 |     with open('README.md', encoding='utf-8') as f:
 16 |         content = f.read()
 17 |     return content
 18 | 
 19 | 
 20 | version_file = 'mmdet3d/version.py'
 21 | 
 22 | 
 23 | def get_version():
 24 |     with open(version_file, 'r') as f:
 25 |         exec(compile(f.read(), version_file, 'exec'))
 26 |     import sys
 27 | 
 28 |     # return short version for sdist
 29 |     if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
 30 |         return locals()['short_version']
 31 |     else:
 32 |         return locals()['__version__']
 33 | 
 34 | 
 35 | def make_cuda_ext(name,
 36 |                   module,
 37 |                   sources,
 38 |                   sources_cuda=[],
 39 |                   extra_args=[],
 40 |                   extra_include_path=[]):
 41 | 
 42 |     define_macros = []
 43 |     extra_compile_args = {'cxx': [] + extra_args}
 44 | 
 45 |     if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
 46 |         define_macros += [('WITH_CUDA', None)]
 47 |         extension = CUDAExtension
 48 |         extra_compile_args['nvcc'] = extra_args + [
 49 |             '-D__CUDA_NO_HALF_OPERATORS__',
 50 |             '-D__CUDA_NO_HALF_CONVERSIONS__',
 51 |             '-D__CUDA_NO_HALF2_OPERATORS__',
 52 |         ]
 53 |         sources += sources_cuda
 54 |     else:
 55 |         print('Compiling {} without CUDA'.format(name))
 56 |         extension = CppExtension
 57 |         # raise EnvironmentError('CUDA is required to compile MMDetection!')
 58 | 
 59 |     return extension(
 60 |         name='{}.{}'.format(module, name),
 61 |         sources=[os.path.join(*module.split('.'), p) for p in sources],
 62 |         include_dirs=extra_include_path,
 63 |         define_macros=define_macros,
 64 |         extra_compile_args=extra_compile_args)
 65 | 
 66 | 
 67 | def parse_requirements(fname='requirements.txt', with_version=True):
 68 |     """Parse the package dependencies listed in a requirements file but strips
 69 |     specific versioning information.
 70 | 
 71 |     Args:
 72 |         fname (str): path to requirements file
 73 |         with_version (bool, default=False): if True include version specs
 74 | 
 75 |     Returns:
 76 |         list[str]: list of requirements items
 77 | 
 78 |     CommandLine:
 79 |         python -c "import setup; print(setup.parse_requirements())"
 80 |     """
 81 |     import re
 82 |     import sys
 83 |     from os.path import exists
 84 |     require_fpath = fname
 85 | 
 86 |     def parse_line(line):
 87 |         """Parse information from a line in a requirements text file."""
 88 |         if line.startswith('-r '):
 89 |             # Allow specifying requirements in other files
 90 |             target = line.split(' ')[1]
 91 |             for info in parse_require_file(target):
 92 |                 yield info
 93 |         else:
 94 |             info = {'line': line}
 95 |             if line.startswith('-e '):
 96 |                 info['package'] = line.split('#egg=')[1]
 97 |             else:
 98 |                 # Remove versioning from the package
 99 |                 pat = '(' + '|'.join(['>=', '==', '>']) + ')'
100 |                 parts = re.split(pat, line, maxsplit=1)
101 |                 parts = [p.strip() for p in parts]
102 | 
103 |                 info['package'] = parts[0]
104 |                 if len(parts) > 1:
105 |                     op, rest = parts[1:]
106 |                     if ';' in rest:
107 |                         # Handle platform specific dependencies
108 |                         # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
109 |                         version, platform_deps = map(str.strip,
110 |                                                      rest.split(';'))
111 |                         info['platform_deps'] = platform_deps
112 |                     else:
113 |                         version = rest  # NOQA
114 |                     info['version'] = (op, version)
115 |             yield info
116 | 
117 |     def parse_require_file(fpath):
118 |         with open(fpath, 'r') as f:
119 |             for line in f.readlines():
120 |                 line = line.strip()
121 |                 if line and not line.startswith('#'):
122 |                     for info in parse_line(line):
123 |                         yield info
124 | 
125 |     def gen_packages_items():
126 |         if exists(require_fpath):
127 |             for info in parse_require_file(require_fpath):
128 |                 parts = [info['package']]
129 |                 if with_version and 'version' in info:
130 |                     parts.extend(info['version'])
131 |                 if not sys.version.startswith('3.4'):
132 |                     # apparently package_deps are broken in 3.4
133 |                     platform_deps = info.get('platform_deps')
134 |                     if platform_deps is not None:
135 |                         parts.append(';' + platform_deps)
136 |                 item = ''.join(parts)
137 |                 yield item
138 | 
139 |     packages = list(gen_packages_items())
140 |     return packages
141 | 
142 | 
143 | def add_mim_extension():
144 |     """Add extra files that are required to support MIM into the package.
145 | 
146 |     These files will be added by creating a symlink to the originals if the
147 |     package is installed in `editable` mode (e.g. pip install -e .), or by
148 |     copying from the originals otherwise.
149 |     """
150 | 
151 |     # parse installment mode
152 |     if 'develop' in sys.argv:
153 |         # installed by `pip install -e .`
154 |         if platform.system() == 'Windows':
155 |             # set `copy` mode here since symlink fails on Windows.
156 |             mode = 'copy'
157 |         else:
158 |             mode = 'symlink'
159 |     elif 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
160 |         # installed by `pip install .`
161 |         # or create source distribution by `python setup.py sdist`
162 |         mode = 'copy'
163 |     else:
164 |         return
165 | 
166 |     filenames = ['tools', 'configs', 'model-index.yml']
167 |     repo_path = osp.dirname(__file__)
168 |     mim_path = osp.join(repo_path, 'mmdet3d', '.mim')
169 |     os.makedirs(mim_path, exist_ok=True)
170 | 
171 |     for filename in filenames:
172 |         if osp.exists(filename):
173 |             src_path = osp.join(repo_path, filename)
174 |             tar_path = osp.join(mim_path, filename)
175 | 
176 |             if osp.isfile(tar_path) or osp.islink(tar_path):
177 |                 os.remove(tar_path)
178 |             elif osp.isdir(tar_path):
179 |                 shutil.rmtree(tar_path)
180 | 
181 |             if mode == 'symlink':
182 |                 src_relpath = osp.relpath(src_path, osp.dirname(tar_path))
183 |                 os.symlink(src_relpath, tar_path)
184 |             elif mode == 'copy':
185 |                 if osp.isfile(src_path):
186 |                     shutil.copyfile(src_path, tar_path)
187 |                 elif osp.isdir(src_path):
188 |                     shutil.copytree(src_path, tar_path)
189 |                 else:
190 |                     warnings.warn(f'Cannot copy file {src_path}.')
191 |             else:
192 |                 raise ValueError(f'Invalid mode {mode}')
193 | 
194 | 
195 | if __name__ == '__main__':
196 |     add_mim_extension()
197 |     setup(
198 |         name='mmdet3d',
199 |         version=get_version(),
200 |         description=("OpenMMLab's next-generation platform"
201 |                      'for general 3D object detection.'),
202 |         long_description=readme(),
203 |         long_description_content_type='text/markdown',
204 |         author='MMDetection3D Contributors',
205 |         author_email='zwwdev@gmail.com',
206 |         keywords='computer vision, 3D object detection',
207 |         url='https://github.com/open-mmlab/mmdetection3d',
208 |         packages=find_packages(),
209 |         include_package_data=True,
210 |         package_data={'mmdet3d.ops': ['*/*.so']},
211 |         classifiers=[
212 |             'Development Status :: 4 - Beta',
213 |             'License :: OSI Approved :: Apache Software License',
214 |             'Operating System :: OS Independent',
215 |             'Programming Language :: Python :: 3',
216 |             'Programming Language :: Python :: 3.6',
217 |             'Programming Language :: Python :: 3.7',
218 |         ],
219 |         license='Apache License 2.0',
220 |         install_requires=parse_requirements('requirements/runtime.txt'),
221 |         extras_require={
222 |             'all': parse_requirements('requirements.txt'),
223 |             'tests': parse_requirements('requirements/tests.txt'),
224 |             'build': parse_requirements('requirements/build.txt'),
225 |             'optional': parse_requirements('requirements/optional.txt'),
226 |             'mim': parse_requirements('requirements/mminstall.txt'),
227 |         },
228 |         cmdclass={'build_ext': BuildExtension},
229 |         zip_safe=False)
230 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/loading_3d.py:
--------------------------------------------------------------------------------
  1 | from re import I
  2 | import mmcv
  3 | import numpy as np
  4 | 
  5 | from mmdet.datasets.builder import PIPELINES
  6 | 
  7 | 
  8 | @PIPELINES.register_module()
  9 | class LoadMultiViewMultiSweepImageFromFilesIndoor(object):
 10 |     """Load multi channel images from a list of separate channel files.
 11 | 
 12 |     Expects results['img_filename'] to be a list of filenames.
 13 | 
 14 |     Args:
 15 |         to_float32 (bool): Whether to convert the img to float32.
 16 |             Defaults to False.
 17 |         color_type (str): Color type of the file. Defaults to 'unchanged'.
 18 |     """
 19 | 
 20 |     def __init__(self, to_float32=False, sweep_num=1, random_sweep=False, color_type='unchanged'):
 21 |         self.to_float32 = to_float32
 22 |         self.color_type = color_type
 23 |         self.sweep_num = sweep_num
 24 |         self.random_sweep = random_sweep
 25 | 
 26 |     def __call__(self, results):
 27 |         """Call function to load multi-view image from files.
 28 | 
 29 |         Args:
 30 |             results (dict): Result dict containing multi-view image filenames.
 31 | 
 32 |         Returns:
 33 |             dict: The result dict containing the multi-view image data. \
 34 |                 Added keys and values are described below.
 35 | 
 36 |                 - filename (str): Multi-view image filenames.
 37 |                 - img (np.ndarray): Multi-view image arrays.
 38 |                 - img_shape (tuple[int]): Shape of multi-view image arrays.
 39 |                 - ori_shape (tuple[int]): Shape of original image arrays.
 40 |                 - pad_shape (tuple[int]): Shape of padded image arrays.
 41 |                 - scale_factor (float): Scale factor.
 42 |                 - img_norm_cfg (dict): Normalization configuration of images.
 43 |         """
 44 |         # print(results)
 45 |         filename = [results['img_info']['filename']]
 46 |         results['filename'] = filename
 47 |         # img is of shape (h, w, c, num_views)
 48 |         img = np.stack(
 49 |             [mmcv.imread(name, self.color_type) for name in filename], axis=-1)
 50 | 
 51 |         if self.to_float32:
 52 |             img = img.astype(np.float32)
 53 | 
 54 |         # unravel to list, see `DefaultFormatBundle` in formating.py
 55 |         # which will transpose each image separately and then stack into array
 56 |         results['img'] = [img[..., i] for i in range(img.shape[-1])]
 57 |         # results['img'] = results['img'][0]
 58 |         results['img_shape'] = img.shape
 59 |         results['ori_shape'] = img.shape
 60 |         # Set initial values for default meta_keys
 61 |         results['pad_shape'] = img.shape
 62 |         results['scale_factor'] = 1.0
 63 |         num_channels = 1 if len(img.shape) < 3 else img.shape[2]
 64 |         results['img_norm_cfg'] = dict(
 65 |             mean=np.zeros(num_channels, dtype=np.float32),
 66 |             std=np.ones(num_channels, dtype=np.float32),
 67 |             to_rgb=False)
 68 | 
 69 |         if 'depth2img' in results:
 70 |             lidar2img = np.eye(4)
 71 |             if results['depth2img'].shape[0] == 3:
 72 |                 lidar2img[:3, :3] = results['depth2img']
 73 |             else:
 74 |                 lidar2img[:4, :4] = results['depth2img']
 75 |         else:
 76 |             lidar2img = np.eye(4)
 77 |             if results['lidar2img'].shape[0] == 3:
 78 |                 lidar2img[:3, :3] = results['lidar2img']
 79 |             else:
 80 |                 lidar2img[:4, :4] = results['lidar2img']
 81 |         results['lidar2img'] = [lidar2img]
 82 | 
 83 |         return results
 84 | 
 85 |     def __repr__(self):
 86 |         """str: Return a string that describes the module."""
 87 |         repr_str = self.__class__.__name__
 88 |         repr_str += f'(to_float32={self.to_float32}, '
 89 |         repr_str += f"color_type='{self.color_type}')"
 90 |         return repr_str
 91 | 
 92 | @PIPELINES.register_module()
 93 | class LoadMultiViewMultiSweepImageFromFiles(object):
 94 |     """Load multi channel images from a list of separate channel files.
 95 | 
 96 |     Expects results['img_filename'] to be a list of filenames.
 97 | 
 98 |     Args:
 99 |         to_float32 (bool): Whether to convert the img to float32.
100 |             Defaults to False.
101 |         color_type (str): Color type of the file. Defaults to 'unchanged'.
102 |     """
103 | 
104 |     def __init__(self, to_float32=False, sweep_num=1, random_sweep=False, color_type='unchanged'):
105 |         self.to_float32 = to_float32
106 |         self.color_type = color_type
107 |         self.sweep_num = sweep_num
108 |         self.random_sweep = random_sweep
109 | 
110 |     def __call__(self, results):
111 |         """Call function to load multi-view image from files.
112 | 
113 |         Args:
114 |             results (dict): Result dict containing multi-view image filenames.
115 | 
116 |         Returns:
117 |             dict: The result dict containing the multi-view image data. \
118 |                 Added keys and values are described below.
119 | 
120 |                 - filename (str): Multi-view image filenames.
121 |                 - img (np.ndarray): Multi-view image arrays.
122 |                 - img_shape (tuple[int]): Shape of multi-view image arrays.
123 |                 - ori_shape (tuple[int]): Shape of original image arrays.
124 |                 - pad_shape (tuple[int]): Shape of padded image arrays.
125 |                 - scale_factor (float): Scale factor.
126 |                 - img_norm_cfg (dict): Normalization configuration of images.
127 |         """
128 |         filename = results['img_filename']
129 |         results['filename'] = filename
130 |         # img is of shape (h, w, c, num_views)
131 |         img = np.stack(
132 |             [mmcv.imread(name, self.color_type) for name in filename], axis=-1)
133 | 
134 |         img_sweeps = []
135 |         sweeps_paths = results['cam_sweeps_paths']
136 |         sweeps_ids = results['cam_sweeps_id']
137 |         sweeps_time = results['cam_sweeps_time']
138 |         if self.random_sweep:
139 |             random_num = np.random.randint(0, self.sweep_num)
140 |             sweeps_paths = [_sweep[:random_num] for _sweep in sweeps_paths]
141 |             sweeps_ids = [_sweep[:random_num] for _sweep in sweeps_ids]
142 |         else:
143 |             random_num = self.sweep_num
144 | 
145 |         for _idx in range(len(sweeps_paths[0])):
146 |             _sweep = np.stack(
147 |                 [mmcv.imread(name_list[_idx], self.color_type) for name_list in sweeps_paths], axis=-1)
148 |             img_sweeps.append(_sweep)
149 | 
150 |         # add img sweeps to raw image
151 |         img = np.stack([img, *img_sweeps], axis=-1)
152 |         # img is of shape (h, w, c, num_views * sweep_num)
153 |         img = img.reshape(*img.shape[:-2], -1)
154 | 
155 |         if self.to_float32:
156 |             img = img.astype(np.float32)
157 | 
158 |         results['sweeps_paths'] = [[filename[_idx]] + sweeps_paths[_idx] for _idx in range(len(filename))]
159 |         results['sweeps_ids'] = np.stack([[0]+_id for _id in sweeps_ids], axis=-1)
160 |         results['sweeps_time'] = np.stack([[0]+_time for _time in sweeps_time], axis=-1)
161 |         # unravel to list, see `DefaultFormatBundle` in formating.py
162 |         # which will transpose each image separately and then stack into array
163 |         results['img'] = [img[..., i] for i in range(img.shape[-1])]
164 |         results['img_shape'] = img.shape
165 |         results['ori_shape'] = img.shape
166 |         # Set initial values for default meta_keys
167 |         results['pad_shape'] = img.shape
168 |         results['scale_factor'] = 1.0
169 |         num_channels = 1 if len(img.shape) < 3 else img.shape[2]
170 |         results['img_norm_cfg'] = dict(
171 |             mean=np.zeros(num_channels, dtype=np.float32),
172 |             std=np.ones(num_channels, dtype=np.float32),
173 |             to_rgb=False)
174 | 
175 |         # add sweep matrix to raw matrix
176 |         results['lidar2img'] = [np.stack([results['lidar2img'][_idx], 
177 |                                          *results['lidar2img_sweeps'][_idx][:random_num]], axis=0) 
178 |                                          for _idx in range(len(results['lidar2img']))]
179 |         results['lidar2cam'] = [np.stack([results['lidar2cam'][_idx], 
180 |                                          *results['lidar2cam_sweeps'][_idx][:random_num]], axis=0) 
181 |                                          for _idx in range(len(results['lidar2cam']))]
182 |         results['cam_intrinsic'] = [np.stack([results['cam_intrinsic'][_idx], 
183 |                                          *results['cam_sweeps_intrinsics'][_idx][:random_num]], axis=0) 
184 |                                          for _idx in range(len(results['cam_intrinsic']))]
185 |         results.pop('lidar2img_sweeps')
186 |         results.pop('lidar2cam_sweeps')
187 |         results.pop('cam_sweeps_intrinsics')
188 | 
189 |         return results
190 | 
191 |     def __repr__(self):
192 |         """str: Return a string that describes the module."""
193 |         repr_str = self.__class__.__name__
194 |         repr_str += f'(to_float32={self.to_float32}, '
195 |         repr_str += f"color_type='{self.color_type}')"
196 |         return repr_str


--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_sunrgbd.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../configs/_base_/default_runtime.py'
  3 | ]
  4 | 
  5 | plugin=True
  6 | plugin_dir='projects/mmdet3d_plugin/'
  7 | 
  8 | # If point cloud range is changed, the models should also change their point
  9 | # cloud range accordingly
 10 | voxel_size = [0.02, 0.02, 0.02]
 11 | grid_size = [128, 320, 320]
 12 | point_cloud_range = [-3.2, -0.2, -2., 3.2, 6.2, 0.56]
 13 | 
 14 | fp16_enabled = True
 15 | bev_stride = 4
 16 | sample_num = 5
 17 | 
 18 | 
 19 | input_modality = dict(
 20 |     use_lidar=True,
 21 |     use_camera=False,
 22 |     use_radar=False,
 23 |     use_map=False,
 24 |     use_external=False)
 25 | 
 26 | model = dict(
 27 |     type='Uni3DETR',
 28 |     pts_voxel_layer=dict(
 29 |        max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
 30 |        point_cloud_range=point_cloud_range),
 31 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
 32 |     pts_middle_encoder=dict(
 33 |         type='SparseEncoderHD',
 34 |         in_channels=4,
 35 |         sparse_shape=grid_size,
 36 |         output_channels=256,
 37 |         order=('conv', 'norm', 'act'),
 38 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
 39 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
 40 |         block_type='basicblock',
 41 |         fp16_enabled=False), # not enable FP16 here
 42 |     pts_backbone=dict(
 43 |         type='SECOND3D',
 44 |         in_channels=[256, 256, 256],
 45 |         out_channels=[128, 256, 512],
 46 |         layer_nums=[5, 5, 5],
 47 |         layer_strides=[1, 2, 4],
 48 |         is_cascade=False,
 49 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 50 |         conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
 51 |     pts_neck=dict(
 52 |         type='SECOND3DFPN',
 53 |         in_channels=[128, 256, 512],
 54 |         out_channels=[256, 256, 256],
 55 |         upsample_strides=[1, 2, 4],
 56 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 57 |         upsample_cfg=dict(type='deconv3d', bias=False),
 58 |         extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
 59 |         use_conv_for_no_stride=True),
 60 |     pts_bbox_head=dict(
 61 |         type='Uni3DETRHead',
 62 |         num_query=300,
 63 |         num_classes=10,
 64 |         in_channels=256,
 65 |         sync_cls_avg_factor=True,
 66 |         with_box_refine=True,
 67 |         as_two_stage=False,
 68 |         code_size=8,
 69 |         transformer=dict(
 70 |             type='Uni3DETRTransformer',
 71 |             fp16_enabled=fp16_enabled,
 72 |             decoder=dict(
 73 |                 type='Uni3DETRTransformerDecoder',
 74 |                 num_layers=3,
 75 |                 return_intermediate=True,
 76 |                 transformerlayers=dict(
 77 |                     type='BaseTransformerLayer',
 78 |                     attn_cfgs=[
 79 |                         dict(
 80 |                             type='MultiheadAttention',
 81 |                             embed_dims=256,
 82 |                             num_heads=8,
 83 |                             dropout=0.1), 
 84 |                         dict(
 85 |                             type='UniCrossAtten',
 86 |                             num_points=1,
 87 |                             embed_dims=256,
 88 |                             num_sweeps=1,
 89 |                             fp16_enabled=fp16_enabled),
 90 |                     ],
 91 |                     ffn_cfgs=dict(
 92 |                         type='FFN',
 93 |                         embed_dims=256,
 94 |                         feedforward_channels=512,
 95 |                         num_fcs=2,
 96 |                         ffn_drop=0.1,
 97 |                         act_cfg=dict(type='ReLU', inplace=True),
 98 |                     ),
 99 |                     norm_cfg=dict(type='LN'),
100 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
101 |             )
102 |         ),
103 |         bbox_coder=dict(
104 |             type='NMSFreeCoder',
105 |             post_center_range=point_cloud_range,
106 |             pc_range=point_cloud_range,
107 |             max_num=1000,
108 |             voxel_size=voxel_size, 
109 |             alpha=1.0,
110 |             num_classes=10), 
111 |         post_processing=dict(
112 |             type='nms',
113 |             nms_thr=0.5),
114 |         ######## soft nms can generate a little higher result
115 |         # post_processing=dict(
116 |         #     type='soft_nms',
117 |         #     gaussian_sigma=0.3, 
118 |         #     prune_threshold=1e-2),
119 |         positional_encoding=dict(
120 |             type='SinePositionalEncoding',
121 |             num_feats=128,
122 |             normalize=True,
123 |             offset=-0.5),
124 |         loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
125 |         loss_bbox=dict(type='L1Loss', loss_weight=0.25),
126 |         loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
127 |         code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
128 |     ),
129 |     # model training and testing settings
130 |     train_cfg=dict(pts=dict(
131 |         grid_size=grid_size,
132 |         voxel_size=voxel_size,
133 |         point_cloud_range=point_cloud_range,
134 |         out_size_factor=bev_stride,
135 |         assigner=dict(
136 |             type='HungarianAssigner3D',
137 |             cls_cost=dict(type='FocalLossCost', weight=2.0),
138 |             reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
139 |             iou_cost=dict(type='IoU3DCost', weight=1.2),
140 |             pc_range=point_cloud_range))))
141 | 
142 | 
143 | dataset_type = 'SUNRGBDDataset'
144 | data_root = 'data/sunrgbd/'
145 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
146 |                'night_stand', 'bookshelf', 'bathtub')
147 | 
148 | file_client_args = dict(backend='disk')
149 | 
150 | train_pipeline = [
151 |     dict(
152 |         type='LoadPointsFromFile',
153 |         coord_type='DEPTH',
154 |         shift_height=True,
155 |         load_dim=6,
156 |         use_dim=[0, 1, 2],
157 |         file_client_args=file_client_args),
158 |     dict(type='LoadAnnotations3D', file_client_args=file_client_args),
159 |     dict(
160 |         type='RandomFlip3D',
161 |         sync_2d=False,
162 |         flip_ratio_bev_horizontal=0.5,
163 |     ),
164 |     dict(
165 |         type='GlobalRotScaleTrans',
166 |         rot_range=[-0.523599, 0.523599],
167 |         scale_ratio_range=[0.85, 1.15],
168 |         shift_height=True),
169 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
170 |     # dict(type='PointSample', num_points=20000),
171 |     dict(type='PointSample', num_points=100000),
172 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
173 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
174 | ]
175 | test_pipeline = [
176 |     dict(
177 |         type='LoadPointsFromFile',
178 |         coord_type='DEPTH',
179 |         shift_height=True,
180 |         load_dim=6,
181 |         use_dim=[0, 1, 2],
182 |         file_client_args=file_client_args),
183 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
184 |     # dict(type='PointSample', num_points=50000),
185 |     dict(type='PointSample', num_points=100000),
186 |     dict(
187 |         type='DefaultFormatBundle3D',
188 |         class_names=class_names,
189 |         with_label=False),
190 |     dict(type='Collect3D', keys=['points'])
191 | ]
192 | 
193 | data = dict(
194 |     samples_per_gpu=4,
195 |     workers_per_gpu=4,
196 |     train=dict(
197 |         type='RepeatDataset',
198 |         times=2,  #######5
199 |         dataset=dict(
200 |             type=dataset_type,
201 |             data_root=data_root,
202 |             ann_file=data_root + 'sunrgbd_infos_train.pkl',
203 |             pipeline=train_pipeline,
204 |             classes=class_names,
205 |             filter_empty_gt=True,
206 |             box_type_3d='Depth',
207 |             file_client_args=file_client_args)),
208 |     val=dict(
209 |         type=dataset_type,
210 |         data_root=data_root,
211 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
212 |         pipeline=test_pipeline,
213 |         classes=class_names,
214 |         test_mode=True,
215 |         box_type_3d='Depth',
216 |         file_client_args=file_client_args),
217 |     test=dict(
218 |         type=dataset_type,
219 |         data_root=data_root,
220 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
221 |         pipeline=test_pipeline,
222 |         classes=class_names,
223 |         test_mode=True,
224 |         box_type_3d='Depth',
225 |         file_client_args=file_client_args))
226 | 
227 | evaluation = dict(pipeline=test_pipeline, interval=5)
228 | 
229 | 
230 | # optimizer
231 | # This schedule is mainly used by models on indoor dataset,
232 | # e.g., VoteNet on SUNRGBD and ScanNet
233 | lr = 2e-5 *2/8 * 20 # max learning rate
234 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
235 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
236 | 
237 | 
238 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
239 | runner = dict(type='EpochBasedRunner', max_epochs=40)
240 | 
241 | # fp16 setting
242 | fp16 = dict(loss_scale=32.)
243 | find_unused_parameters = True
244 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/pts_encoder/sparse_encoder_hd.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from mmcv.runner import auto_fp16
  3 | from torch import nn as nn
  4 | 
  5 | from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule
  6 | from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
  7 | from mmdet3d.models.builder import MIDDLE_ENCODERS
  8 | 
  9 | if IS_SPCONV2_AVAILABLE:
 10 |     from spconv.pytorch import SparseConvTensor, SparseSequential
 11 | else:
 12 |     from mmcv.ops import SparseConvTensor, SparseSequential
 13 | 
 14 | @MIDDLE_ENCODERS.register_module()
 15 | class SparseEncoderHD(nn.Module):
 16 |     r"""Sparse encoder for SECOND and Part-A2.
 17 | 
 18 |     Args:
 19 |         in_channels (int): The number of input channels.
 20 |         sparse_shape (list[int]): The sparse shape of input tensor.
 21 |         order (list[str]): Order of conv module. Defaults to ('conv',
 22 |             'norm', 'act').
 23 |         norm_cfg (dict): Config of normalization layer. Defaults to
 24 |             dict(type='BN1d', eps=1e-3, momentum=0.01).
 25 |         base_channels (int): Out channels for conv_input layer.
 26 |             Defaults to 16.
 27 |         output_channels (int): Out channels for conv_out layer.
 28 |             Defaults to 128.
 29 |         encoder_channels (tuple[tuple[int]]):
 30 |             Convolutional channels of each encode block.
 31 |         encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
 32 |             Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
 33 |         block_type (str): Type of the block to use. Defaults to 'conv_module'.
 34 |     """
 35 | 
 36 |     def __init__(self,
 37 |                  in_channels,
 38 |                  sparse_shape,
 39 |                  order=('conv', 'norm', 'act'),
 40 |                  norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
 41 |                  base_channels=16,
 42 |                  output_channels=128,
 43 |                  encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
 44 |                                                                         64)),
 45 |                  encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
 46 |                                                                  1)),
 47 |                  encoder_strides=(2, 2, 2, 1),
 48 |                  block_type='conv_module',
 49 |                  keep_depth=True,
 50 |                  fp16_enabled=False):
 51 |         super().__init__()
 52 |         assert block_type in ['conv_module', 'basicblock']
 53 |         self.sparse_shape = sparse_shape
 54 |         self.in_channels = in_channels
 55 |         self.order = order
 56 |         self.base_channels = base_channels
 57 |         self.output_channels = output_channels
 58 |         self.encoder_channels = encoder_channels
 59 |         self.encoder_paddings = encoder_paddings
 60 |         self.encoder_strides = encoder_strides
 61 |         self.stage_num = len(self.encoder_channels)
 62 |         self.keep_depth = keep_depth
 63 |         if fp16_enabled:
 64 |             self.fp16_enabled = fp16_enabled
 65 |         # Spconv init all weight on its own
 66 | 
 67 |         assert isinstance(order, tuple) and len(order) == 3
 68 |         assert set(order) == {'conv', 'norm', 'act'}
 69 | 
 70 |         if self.order[0] != 'conv':  # pre activate
 71 |             self.conv_input = make_sparse_convmodule(
 72 |                 in_channels,
 73 |                 self.base_channels,
 74 |                 3,
 75 |                 norm_cfg=norm_cfg,
 76 |                 padding=1,
 77 |                 indice_key='subm1',
 78 |                 conv_type='SubMConv3d',
 79 |                 order=('conv', ))
 80 |         else:  # post activate
 81 |             self.conv_input = make_sparse_convmodule(
 82 |                 in_channels,
 83 |                 self.base_channels,
 84 |                 3,
 85 |                 norm_cfg=norm_cfg,
 86 |                 padding=1,
 87 |                 indice_key='subm1',
 88 |                 conv_type='SubMConv3d')
 89 | 
 90 |         encoder_out_channels = self.make_encoder_layers(
 91 |             make_sparse_convmodule,
 92 |             norm_cfg,
 93 |             self.base_channels,
 94 |             block_type=block_type)
 95 | 
 96 |         self.conv_out = make_sparse_convmodule(
 97 |             encoder_out_channels,
 98 |             self.output_channels,
 99 |             kernel_size=(1, 1, 1),
100 |             stride=(1, 1, 1),
101 |             norm_cfg=norm_cfg,
102 |             padding=0,
103 |             indice_key='spconv_down2',
104 |             conv_type='SparseConv3d')
105 | 
106 |     @auto_fp16(apply_to=('voxel_features', ))
107 |     def forward(self, voxel_features, coors, batch_size):
108 |         """Forward of SparseEncoder.
109 | 
110 |         Args:
111 |             voxel_features (torch.float32): Voxel features in shape (N, C).
112 |             coors (torch.int32): Coordinates in shape (N, 4), \
113 |                 the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
114 |             batch_size (int): Batch size.
115 | 
116 |         Returns:
117 |             dict: Backbone features.
118 |         """
119 |         coors = coors.int()
120 |         input_sp_tensor = SparseConvTensor(voxel_features, coors,
121 |                                                   self.sparse_shape,
122 |                                                   batch_size)
123 |         x = self.conv_input(input_sp_tensor)
124 | 
125 |         encode_features = []
126 |         for encoder_layer in self.encoder_layers:
127 |             x = encoder_layer(x)
128 |             encode_features.append(x)
129 | 
130 |         # for detection head
131 |         # [200, 176, 5] -> [200, 176, 5]
132 |         out = self.conv_out(encode_features[-1])
133 |         spatial_features = out.dense()
134 | 
135 |         if not self.keep_depth:
136 |             spatial_features = spatial_features.sum(dim=2)
137 | 
138 |         return spatial_features
139 | 
140 |     def make_encoder_layers(self,
141 |                             make_block,
142 |                             norm_cfg,
143 |                             in_channels,
144 |                             block_type='conv_module',
145 |                             conv_cfg=dict(type='SubMConv3d')):
146 |         """make encoder layers using sparse convs.
147 | 
148 |         Args:
149 |             make_block (method): A bounded function to build blocks.
150 |             norm_cfg (dict[str]): Config of normalization layer.
151 |             in_channels (int): The number of encoder input channels.
152 |             block_type (str): Type of the block to use. Defaults to
153 |                 'conv_module'.
154 |             conv_cfg (dict): Config of conv layer. Defaults to
155 |                 dict(type='SubMConv3d').
156 | 
157 |         Returns:
158 |             int: The number of encoder output channels.
159 |         """
160 |         assert block_type in ['conv_module', 'basicblock']
161 |         self.encoder_layers = SparseSequential()
162 | 
163 |         for i, blocks in enumerate(self.encoder_channels):
164 |             blocks_list = []
165 |             for j, out_channels in enumerate(tuple(blocks)):
166 |                 padding = tuple(self.encoder_paddings[i])[j]
167 |                 # each stage started with a spconv layer
168 |                 # except the first stage
169 |                 if i != 0 and j == 0 and block_type == 'conv_module':
170 |                     blocks_list.append(
171 |                         make_block(
172 |                             in_channels,
173 |                             out_channels,
174 |                             3,
175 |                             norm_cfg=norm_cfg,
176 |                             stride=self.encoder_strides[i],
177 |                             padding=padding,
178 |                             indice_key=f'spconv{i + 1}',
179 |                             conv_type='SparseConv3d'))
180 |                 elif block_type == 'basicblock':
181 |                     if j == len(blocks) - 1 and i != len(
182 |                             self.encoder_channels) - 1:
183 |                         blocks_list.append(
184 |                             make_block(
185 |                                 in_channels,
186 |                                 out_channels,
187 |                                 3,
188 |                                 norm_cfg=norm_cfg,
189 |                                 stride=self.encoder_strides[i],
190 |                                 padding=padding,
191 |                                 indice_key=f'spconv{i + 1}',
192 |                                 conv_type='SparseConv3d'))
193 |                     else:
194 |                         blocks_list.append(
195 |                             SparseBasicBlock(
196 |                                 out_channels,
197 |                                 out_channels,
198 |                                 norm_cfg=norm_cfg,
199 |                                 conv_cfg=conv_cfg))
200 |                 else:
201 |                     blocks_list.append(
202 |                         make_block(
203 |                             in_channels,
204 |                             out_channels,
205 |                             3,
206 |                             norm_cfg=norm_cfg,
207 |                             padding=padding,
208 |                             indice_key=f'subm{i + 1}',
209 |                             conv_type='SubMConv3d'))
210 |                 in_channels = out_channels
211 |             stage_name = f'encoder_layer{i + 1}'
212 |             stage_layers = SparseSequential(*blocks_list)
213 |             self.encoder_layers.add_module(stage_name, stage_layers)
214 |         return out_channels
215 | 


--------------------------------------------------------------------------------
/projects/configs/ov_uni3detr/ov_uni3detr_sunrgbd_rgb.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../configs/_base_/default_runtime.py'
  3 | ]
  4 | 
  5 | plugin=True
  6 | plugin_dir='projects/mmdet3d_plugin/'
  7 | 
  8 | # If point cloud range is changed, the models should also change their point
  9 | # cloud range accordingly
 10 | voxel_size = [0.02, 0.02, 0.02]
 11 | grid_size = [128, 320, 320]
 12 | point_cloud_range = [-3.2, -0.2, -2., 3.2, 6.2, 0.56]
 13 | 
 14 | 
 15 | cam_sweep_num = 1
 16 | fp16_enabled = False
 17 | bev_stride = 8 
 18 | sample_num = 15
 19 | voxel_shape = [int(((point_cloud_range[3]-point_cloud_range[0])/voxel_size[0])//bev_stride),
 20 |                int(((point_cloud_range[4]-point_cloud_range[1])/voxel_size[1])//bev_stride),
 21 |                sample_num]
 22 | 
 23 | 
 24 | input_modality = dict(
 25 |     use_lidar=False,
 26 |     use_camera=True,
 27 |     use_radar=False,
 28 |     use_map=False,
 29 |     use_external=False,
 30 |     cam_sweep_num=cam_sweep_num)
 31 | 
 32 | model = dict(
 33 |     type='OV_Uni3DETR',
 34 |     use_grid_mask=True,
 35 |     img_backbone=dict(
 36 |         type='ResNet',
 37 |         depth=50,
 38 |         num_stages=4,
 39 |         out_indices=(0, 1, 2, 3),
 40 |         frozen_stages=1,
 41 |         norm_cfg=dict(type='BN', requires_grad=True),
 42 |         norm_eval=True,
 43 |         style='pytorch',
 44 |         dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
 45 |         stage_with_dcn=(False, True, True, True)
 46 |         ),
 47 |     img_neck=dict(
 48 |         type='FPN',
 49 |         in_channels=[256, 512, 1024, 2048],
 50 |         out_channels=256,
 51 |         num_outs=5,
 52 |         ),
 53 |     depth_head=dict(
 54 |         type='SimpleDepth',
 55 |         model=dict(
 56 |             depth_dim=64,
 57 |         )),
 58 |     view_cfg=dict(
 59 |         num_cams=1,
 60 |         num_convs=3,
 61 |         num_points=sample_num,
 62 |         num_sweeps=cam_sweep_num,
 63 |         kernel_size=(3,3,3),
 64 |         keep_sweep_dim=True,
 65 |         num_feature_levels=4,
 66 |         embed_dims=256,
 67 |         pc_range=point_cloud_range,
 68 |         voxel_shape=voxel_shape,
 69 |         fp16_enabled=fp16_enabled,
 70 |     ),
 71 |     pts_bbox_head=dict(
 72 |         type='Uni3DETRHeadCLIP',
 73 |         num_query=300,
 74 |         zeroshot_path='clip_embed/sunrgbd_clip_a+cname_rn50_manyprompt_46c_coda.npy',
 75 |         num_classes=46, 
 76 |         in_channels=256,
 77 |         sync_cls_avg_factor=True,
 78 |         with_box_refine=True,
 79 |         as_two_stage=False,
 80 |         code_size=8,
 81 |         transformer=dict(
 82 |             type='Uni3DETRTransformer',
 83 |             fp16_enabled=fp16_enabled,
 84 |             decoder=dict(
 85 |                 type='Uni3DETRTransformerDecoder',
 86 |                 num_layers=6,
 87 |                 return_intermediate=True,
 88 |                 transformerlayers=dict(
 89 |                     type='BaseTransformerLayer',
 90 |                     attn_cfgs=[
 91 |                         dict(
 92 |                             type='MultiheadAttention',
 93 |                             embed_dims=256,
 94 |                             num_heads=8,
 95 |                             dropout=0.1),
 96 |                         dict(
 97 |                             type='UniCrossAtten',
 98 |                             num_points=1,
 99 |                             embed_dims=256,
100 |                             num_sweeps=cam_sweep_num,
101 |                             fp16_enabled=fp16_enabled)
102 |                     ],
103 |                     ffn_cfgs=dict(
104 |                         type='FFN',
105 |                         embed_dims=256,
106 |                         feedforward_channels=512,
107 |                         num_fcs=2,
108 |                         ffn_drop=0.1,
109 |                         act_cfg=dict(type='ReLU', inplace=True),
110 |                     ),
111 |                     norm_cfg=dict(type='LN'),
112 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
113 |                                      'ffn', 'norm'))
114 |             )
115 |         ),
116 |         bbox_coder=dict(
117 |             type='NMSFreeCoder',
118 |             post_center_range=point_cloud_range,
119 |             pc_range=point_cloud_range,
120 |             max_num=1000,
121 |             voxel_size=voxel_size,
122 |             alpha=1.0,
123 |             num_classes=46
124 |             ), 
125 |         post_processing=dict(
126 |             type='nms',
127 |             nms_thr=0.5),
128 |         positional_encoding=dict(
129 |             type='SinePositionalEncoding',
130 |             num_feats=128,
131 |             normalize=True,
132 |             offset=-0.5),
133 |         loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
134 |         loss_bbox=dict(type='L1Loss', loss_weight=0.25),
135 |         loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
136 |         code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]),
137 |     # model training and testing settings
138 |     train_cfg=dict(pts=dict(
139 |         grid_size=grid_size,
140 |         voxel_size=voxel_size,
141 |         point_cloud_range=point_cloud_range,
142 |         out_size_factor=bev_stride,
143 |         assigner=dict(
144 |             type='HungarianAssigner3D',
145 |             cls_cost=dict(type='FocalLossCost', weight=2.0),
146 |             reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
147 |             iou_cost=dict(type='IoU3DCost', weight=1.2), # Fake cost. This is just to make it compatible with DETR head. 
148 |             pc_range=point_cloud_range))))
149 | 
150 | dataset_type = 'SUNRGBDDataset'
151 | data_root = 'data/sunrgbd_coda/'
152 | 
153 | # img_norm_cfg = dict(
154 | #     mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
155 | img_norm_cfg = dict(
156 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
157 | 
158 | class_names = ('chair', 'table', 'pillow', 'sofa_chair', 'desk', 'bed', 'sofa', 'computer', 'box', 
159 |               'lamp', 'garbage_bin', 'cabinet', 'shelf', 'drawer', 'sink', 'night_stand', 'kitchen_counter', 
160 |               'paper', 'end_table', 'kitchen_cabinet', 'picture', 'book', 'stool', 'coffee_table', 'bookshelf', 
161 |               'painting', 'key_board', 'dresser', 'tv', 'whiteboard', 'cpu', 'toilet', 'file_cabinet', 'bench', 
162 |               'ottoman', 'plant', 'monitor', 'printer', 'recycle_bin', 'door', 'fridge', 'towel', 'cup', 'mirror', 
163 |               'laptop', 'cloth')
164 | 
165 | 
166 | file_client_args = dict(backend='disk')
167 | 
168 | 
169 | train_pipeline = [
170 |     dict(type='LoadMultiViewMultiSweepImageFromFilesIndoor', sweep_num=cam_sweep_num, to_float32=True),
171 |     dict(type='PhotoMetricDistortionMultiViewImage'),
172 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
173 |     dict(
174 |         type='UnifiedRotScaleTrans',
175 |         rot_range=[-0.3925, 0.3925],
176 |         scale_ratio_range=[0.95, 1.05],
177 |         ),
178 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
179 |     dict(type='PadMultiViewImage', size_divisor=32),
180 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
181 |     dict(type='CollectUnified3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'])
182 | ]
183 | test_pipeline = [
184 |     dict(type='LoadMultiViewMultiSweepImageFromFilesIndoor', sweep_num=cam_sweep_num, to_float32=True),
185 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
186 |     dict(type='PadMultiViewImage', size_divisor=32),
187 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
188 |     dict(type='CollectUnified3D', keys=['img'])
189 | ]
190 | 
191 | 
192 | 
193 | data = dict(
194 |     samples_per_gpu=2,
195 |     workers_per_gpu=4,
196 |     train=dict(
197 |         type='RepeatDataset',
198 |         times=2,  #######5
199 |         dataset=dict(
200 |             type=dataset_type,
201 |             data_root=data_root,
202 |             ann_file = data_root + 'sunrgbd_infos_train_pls_ens_10c36c.pkl',
203 |             pipeline=train_pipeline,
204 |             classes=class_names,
205 |             filter_empty_gt=True,
206 |             box_type_3d='Depth',
207 |             file_client_args=file_client_args)),
208 |     val=dict(
209 |         type=dataset_type,
210 |         data_root=data_root,
211 |         ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
212 |         pipeline=test_pipeline,
213 |         classes=class_names,
214 |         test_mode=True,
215 |         box_type_3d='Depth',
216 |         file_client_args=file_client_args),
217 |     test=dict(
218 |         type=dataset_type,
219 |         data_root=data_root,
220 |         ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
221 |         pipeline=test_pipeline,
222 |         classes=class_names,
223 |         test_mode=True,
224 |         box_type_3d='Depth',
225 |         file_client_args=file_client_args))
226 | 
227 | evaluation = dict(pipeline=test_pipeline, interval=5)
228 | 
229 | 
230 | # optimizer
231 | # This schedule is mainly used by models on indoor dataset,
232 | # e.g., VoteNet on SUNRGBD and ScanNet
233 | optimizer = dict(
234 |     type='AdamW', 
235 |     lr=1.75e-4,
236 |     # lr=2e-4,
237 |     paramwise_cfg=dict(
238 |         custom_keys={
239 |             'img_backbone': dict(lr_mult=0.1),
240 |         }),
241 |     weight_decay=0.01)
242 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
243 | 
244 | 
245 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
246 | 
247 | # runtime settings
248 | runner = dict(type='EpochBasedRunner', max_epochs=40)
249 | 
250 | # fp16 setting
251 | # fp16 = dict(loss_scale=32.)
252 | load_from = 'faster_rcnn_r50_caffe_fpn_1x_coco_dcnv2_c.pth'
253 | 
254 | find_unused_parameters = True
255 | 


--------------------------------------------------------------------------------
/projects/configs/ov_uni3detr/ov_uni3detr_sunrgbd_pc.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../configs/_base_/default_runtime.py'
  3 | ]
  4 | 
  5 | plugin=True
  6 | plugin_dir='projects/mmdet3d_plugin/'
  7 | 
  8 | # If point cloud range is changed, the models should also change their point
  9 | # cloud range accordingly
 10 | voxel_size = [0.02, 0.02, 0.02]
 11 | grid_size = [128, 320, 320]
 12 | point_cloud_range = [-3.2, -0.2, -2., 3.2, 6.2, 0.56]
 13 | 
 14 | fp16_enabled = False
 15 | bev_stride = 4
 16 | sample_num = 5
 17 | 
 18 | 
 19 | input_modality = dict(
 20 |     use_lidar=True,
 21 |     use_camera=False,
 22 |     use_radar=False,
 23 |     use_map=False,
 24 |     use_external=False)
 25 | 
 26 | model = dict(
 27 |     type='OV_Uni3DETR',
 28 |     pts_voxel_layer=dict(
 29 |        max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
 30 |        point_cloud_range=point_cloud_range),
 31 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
 32 |     pts_middle_encoder=dict(
 33 |         type='SparseEncoderHD',
 34 |         in_channels=4,
 35 |         sparse_shape=grid_size,
 36 |         output_channels=256,
 37 |         order=('conv', 'norm', 'act'),
 38 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
 39 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
 40 |         block_type='basicblock',
 41 |         fp16_enabled=False), # not enable FP16 here
 42 |     pts_backbone=dict(
 43 |         type='SECOND3D',
 44 |         in_channels=[256, 256, 256],
 45 |         out_channels=[128, 256, 512],
 46 |         layer_nums=[5, 5, 5],
 47 |         layer_strides=[1, 2, 4],
 48 |         is_cascade=False,
 49 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 50 |         conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
 51 |     pts_neck=dict(
 52 |         type='SECOND3DFPN',
 53 |         in_channels=[128, 256, 512],
 54 |         out_channels=[256, 256, 256],
 55 |         upsample_strides=[1, 2, 4],
 56 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 57 |         upsample_cfg=dict(type='deconv3d', bias=False),
 58 |         extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
 59 |         use_conv_for_no_stride=True),
 60 |     pts_bbox_head=dict(
 61 |         type='Uni3DETRHeadCLIP',
 62 |         num_query=300,
 63 |         zeroshot_path='clip_embed/sunrgbd_clip_a+cname_rn50_manyprompt_46c_coda.npy',
 64 |         num_classes=46, 
 65 |         in_channels=256,
 66 |         sync_cls_avg_factor=True,
 67 |         with_box_refine=True,
 68 |         as_two_stage=False,
 69 |         code_size=8,
 70 |         transformer=dict(
 71 |             type='Uni3DETRTransformer',
 72 |             fp16_enabled=fp16_enabled,
 73 |             decoder=dict(
 74 |                 type='Uni3DETRTransformerDecoder',
 75 |                 num_layers=3,
 76 |                 return_intermediate=True,
 77 |                 transformerlayers=dict(
 78 |                     type='BaseTransformerLayer',
 79 |                     attn_cfgs=[
 80 |                         dict(
 81 |                             type='MultiheadAttention',
 82 |                             embed_dims=256,
 83 |                             num_heads=8,
 84 |                             dropout=0.1), 
 85 |                         dict(
 86 |                             type='UniCrossAtten',
 87 |                             num_points=1,
 88 |                             embed_dims=256,
 89 |                             num_sweeps=1,
 90 |                             fp16_enabled=fp16_enabled),
 91 |                     ],
 92 |                     ffn_cfgs=dict(
 93 |                         type='FFN',
 94 |                         embed_dims=256,
 95 |                         feedforward_channels=512,
 96 |                         num_fcs=2,
 97 |                         ffn_drop=0.1,
 98 |                         act_cfg=dict(type='ReLU', inplace=True),
 99 |                     ),
100 |                     norm_cfg=dict(type='LN'),
101 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
102 |             )
103 |         ),
104 |         bbox_coder=dict(
105 |             type='NMSFreeCoder',
106 |             post_center_range=point_cloud_range,
107 |             pc_range=point_cloud_range,
108 |             max_num=1000,
109 |             voxel_size=voxel_size, 
110 |             alpha=1.0,
111 |             num_classes=46), 
112 |         post_processing=dict(
113 |             type='nms',
114 |             nms_thr=0.5),
115 |         ######## soft nms can generate a little higher result
116 |         # post_processing=dict(
117 |         #     type='soft_nms',
118 |         #     gaussian_sigma=0.3, 
119 |         #     prune_threshold=1e-2),
120 |         positional_encoding=dict(
121 |             type='SinePositionalEncoding',
122 |             num_feats=128,
123 |             normalize=True,
124 |             offset=-0.5),
125 |         loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
126 |         loss_bbox=dict(type='L1Loss', loss_weight=0.25),
127 |         loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
128 |         code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
129 |     ),
130 |     # model training and testing settings
131 |     train_cfg=dict(pts=dict(
132 |         grid_size=grid_size,
133 |         voxel_size=voxel_size,
134 |         point_cloud_range=point_cloud_range,
135 |         out_size_factor=bev_stride,
136 |         assigner=dict(
137 |             type='HungarianAssigner3D',
138 |             cls_cost=dict(type='FocalLossCost', weight=2.0),
139 |             reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
140 |             iou_cost=dict(type='IoU3DCost', weight=1.2),
141 |             pc_range=point_cloud_range))))
142 | 
143 | 
144 | dataset_type = 'SUNRGBDDataset_OV'
145 | data_root = 'data/sunrgbd_coda/'
146 | class_names = ('chair', 'table', 'pillow', 'sofa_chair', 'desk', 'bed', 'sofa', 'computer', 'box', 
147 |               'lamp', 'garbage_bin', 'cabinet', 'shelf', 'drawer', 'sink', 'night_stand', 'kitchen_counter', 
148 |               'paper', 'end_table', 'kitchen_cabinet', 'picture', 'book', 'stool', 'coffee_table', 'bookshelf', 
149 |               'painting', 'key_board', 'dresser', 'tv', 'whiteboard', 'cpu', 'toilet', 'file_cabinet', 'bench', 
150 |               'ottoman', 'plant', 'monitor', 'printer', 'recycle_bin', 'door', 'fridge', 'towel', 'cup', 'mirror', 
151 |               'laptop', 'cloth')
152 | 
153 | seen_classes = ('chair', 'table', 'pillow', 'sofa_chair', 'desk', 'bed', 'sofa', 'computer', 'lamp', 'box')
154 | 
155 | file_client_args = dict(backend='disk')
156 | 
157 | train_pipeline = [
158 |     dict(
159 |         type='LoadPointsFromFile',
160 |         coord_type='DEPTH',
161 |         shift_height=True,
162 |         load_dim=6,
163 |         use_dim=[0, 1, 2],
164 |         file_client_args=file_client_args),
165 |     dict(type='LoadAnnotations3D'),
166 |     dict(
167 |         type='UnifiedRandomFlip3D',
168 |         sync_2d=False,
169 |         flip_ratio_bev_horizontal=0.5,
170 |     ),
171 |     dict(
172 |         type='UnifiedRotScaleTrans',
173 |         rot_range=[-0.523599, 0.523599],
174 |         scale_ratio_range=[0.85, 1.15],
175 |         shift_height=True),
176 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
177 |     # dict(type='PointSample', num_points=20000),
178 |     dict(type='PointSample', num_points=200000),
179 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
180 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
181 | ]
182 | test_pipeline = [
183 |     dict(
184 |         type='LoadPointsFromFile',
185 |         coord_type='DEPTH',
186 |         shift_height=True,
187 |         load_dim=6,
188 |         use_dim=[0, 1, 2],
189 |         file_client_args=file_client_args),
190 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
191 |     # dict(type='PointSample', num_points=50000),
192 |     dict(type='PointSample', num_points=200000),
193 |     dict(
194 |         type='DefaultFormatBundle3D',
195 |         class_names=class_names,
196 |         with_label=False),
197 |     dict(type='Collect3D', keys=['points'])
198 | ]
199 | 
200 | data = dict(
201 |     samples_per_gpu=8,
202 |     workers_per_gpu=4,
203 |     train=dict(
204 |         type='RepeatDataset',
205 |         times=2,  #######5
206 |         dataset=dict(
207 |             type=dataset_type,
208 |             data_root=data_root,
209 |             ann_file=data_root + 'sunrgbd_infos_train_pls_ens_10c36c.pkl',
210 |             pipeline=train_pipeline,
211 |             classes=class_names,
212 |             seen_classes=seen_classes,
213 |             filter_empty_gt=True,
214 |             box_type_3d='Depth',
215 |             file_client_args=file_client_args)),
216 |     val=dict(
217 |         type=dataset_type,
218 |         data_root=data_root,
219 |         ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
220 |         pipeline=test_pipeline,
221 |         classes=class_names,
222 |         seen_classes=seen_classes,
223 |         test_mode=True,
224 |         box_type_3d='Depth',
225 |         file_client_args=file_client_args),
226 |     test=dict(
227 |         type=dataset_type,
228 |         data_root=data_root,
229 |         ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
230 |         pipeline=test_pipeline,
231 |         classes=class_names,
232 |         seen_classes=seen_classes,
233 |         test_mode=True,
234 |         box_type_3d='Depth',
235 |         file_client_args=file_client_args))
236 | 
237 | evaluation = dict(pipeline=test_pipeline, interval=5)
238 | 
239 | 
240 | # optimizer
241 | # This schedule is mainly used by models on indoor dataset,
242 | # e.g., VoteNet on SUNRGBD and ScanNet
243 | lr = 2e-5 *2/8 * 40 # max learning rate
244 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
245 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
246 | 
247 | 
248 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
249 | runner = dict(type='EpochBasedRunner', max_epochs=40)
250 | 
251 | # fp16 setting
252 | # fp16 = dict(loss_scale=32.)
253 | find_unused_parameters = True
254 | 


--------------------------------------------------------------------------------
/extra_tools/test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import mmcv
  4 | import os
  5 | import torch
  6 | import warnings
  7 | from mmcv import Config, DictAction
  8 | from mmcv.cnn import fuse_conv_bn
  9 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 10 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
 11 |                          wrap_fp16_model)
 12 | 
 13 | from mmdet3d.apis import single_gpu_test
 14 | from mmdet3d.datasets import build_dataloader, build_dataset
 15 | from mmdet3d.models import build_model
 16 | from mmdet.apis import multi_gpu_test, set_random_seed
 17 | from mmdet.datasets import replace_ImageToTensor
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser(
 21 |         description='MMDet test (and eval) a model')
 22 |     parser.add_argument('config', help='test config file path')
 23 |     parser.add_argument('checkpoint', help='checkpoint file')
 24 |     parser.add_argument('--out', help='output result file in pickle format')
 25 |     parser.add_argument(
 26 |         '--fuse-conv-bn',
 27 |         action='store_true',
 28 |         help='Whether to fuse conv and bn, this will slightly increase'
 29 |         'the inference speed')
 30 |     parser.add_argument(
 31 |         '--format-only',
 32 |         action='store_true',
 33 |         help='Format the output results without perform evaluation. It is'
 34 |         'useful when you want to format the result to a specific format and '
 35 |         'submit it to the test server')
 36 |     parser.add_argument(
 37 |         '--eval',
 38 |         type=str,
 39 |         nargs='+',
 40 |         help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
 41 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
 42 |     parser.add_argument('--show', action='store_true', help='show results')
 43 |     parser.add_argument(
 44 |         '--show-dir', help='directory where results will be saved')
 45 |     parser.add_argument(
 46 |         '--gpu-collect',
 47 |         action='store_true',
 48 |         help='whether to use gpu to collect results.')
 49 |     parser.add_argument(
 50 |         '--tmpdir',
 51 |         help='tmp directory used for collecting results from multiple '
 52 |         'workers, available when gpu-collect is not specified')
 53 |     parser.add_argument('--seed', type=int, default=0, help='random seed')
 54 |     parser.add_argument(
 55 |         '--deterministic',
 56 |         action='store_true',
 57 |         help='whether to set deterministic options for CUDNN backend.')
 58 |     parser.add_argument(
 59 |         '--cfg-options',
 60 |         nargs='+',
 61 |         action=DictAction,
 62 |         help='override some settings in the used config, the key-value pair '
 63 |         'in xxx=yyy format will be merged into config file. If the value to '
 64 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 65 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 66 |         'Note that the quotation marks are necessary and that no white space '
 67 |         'is allowed.')
 68 |     parser.add_argument(
 69 |         '--options',
 70 |         nargs='+',
 71 |         action=DictAction,
 72 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 73 |         'format will be kwargs for dataset.evaluate() function (deprecate), '
 74 |         'change to --eval-options instead.')
 75 |     parser.add_argument(
 76 |         '--eval-options',
 77 |         nargs='+',
 78 |         action=DictAction,
 79 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 80 |         'format will be kwargs for dataset.evaluate() function')
 81 |     parser.add_argument(
 82 |         '--launcher',
 83 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 84 |         default='none',
 85 |         help='job launcher')
 86 |     parser.add_argument('--local_rank', type=int, default=0)
 87 |     args = parser.parse_args()
 88 |     if 'LOCAL_RANK' not in os.environ:
 89 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 90 | 
 91 |     if args.options and args.eval_options:
 92 |         raise ValueError(
 93 |             '--options and --eval-options cannot be both specified, '
 94 |             '--options is deprecated in favor of --eval-options')
 95 |     if args.options:
 96 |         warnings.warn('--options is deprecated in favor of --eval-options')
 97 |         args.eval_options = args.options
 98 |     return args
 99 | 
100 | 
101 | def main():
102 |     args = parse_args()
103 | 
104 |     assert args.out or args.eval or args.format_only or args.show \
105 |         or args.show_dir, \
106 |         ('Please specify at least one operation (save/eval/format/show the '
107 |          'results / save the results) with the argument "--out", "--eval"'
108 |          ', "--format-only", "--show" or "--show-dir"')
109 | 
110 |     if args.eval and args.format_only:
111 |         raise ValueError('--eval and --format_only cannot be both specified')
112 | 
113 |     if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
114 |         raise ValueError('The output file must be a pkl file.')
115 | 
116 |     cfg = Config.fromfile(args.config)
117 |     if args.cfg_options is not None:
118 |         cfg.merge_from_dict(args.cfg_options)
119 |     # import modules from string list.
120 |     if cfg.get('custom_imports', None):
121 |         from mmcv.utils import import_modules_from_strings
122 |         import_modules_from_strings(**cfg['custom_imports'])
123 | 
124 |     # import modules from plguin/xx, registry will be updated
125 |     if hasattr(cfg, 'plugin'):
126 |         if cfg.plugin:
127 |             import importlib
128 |             if hasattr(cfg, 'plugin_dir'):
129 |                 plugin_dir = cfg.plugin_dir
130 |                 _module_dir = os.path.dirname(plugin_dir)
131 |                 _module_dir = _module_dir.split('/')
132 |                 _module_path = _module_dir[0]
133 | 
134 |                 for m in _module_dir[1:]:
135 |                     _module_path = _module_path + '.' + m
136 |                 print(_module_path)
137 |                 plg_lib = importlib.import_module(_module_path)
138 |             else:
139 |                 # import dir is the dirpath for the config file
140 |                 _module_dir = os.path.dirname(args.config)
141 |                 _module_dir = _module_dir.split('/')
142 |                 _module_path = _module_dir[0]
143 |                 for m in _module_dir[1:]:
144 |                     _module_path = _module_path + '.' + m
145 |                 print(_module_path)
146 |                 plg_lib = importlib.import_module(_module_path)
147 | 
148 |     # set cudnn_benchmark
149 |     if cfg.get('cudnn_benchmark', False):
150 |         torch.backends.cudnn.benchmark = True
151 | 
152 |     cfg.model.pretrained = None
153 |     # in case the test dataset is concatenated
154 |     samples_per_gpu = 1
155 |     if isinstance(cfg.data.test, dict):
156 |         cfg.data.test.test_mode = True
157 |         samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
158 |         if samples_per_gpu > 1:
159 |             # Replace 'ImageToTensor' to 'DefaultFormatBundle'
160 |             cfg.data.test.pipeline = replace_ImageToTensor(
161 |                 cfg.data.test.pipeline)
162 |     elif isinstance(cfg.data.test, list):
163 |         for ds_cfg in cfg.data.test:
164 |             ds_cfg.test_mode = True
165 |         samples_per_gpu = max(
166 |             [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
167 |         if samples_per_gpu > 1:
168 |             for ds_cfg in cfg.data.test:
169 |                 ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
170 | 
171 |     # init distributed env first, since logger depends on the dist info.
172 |     if args.launcher == 'none':
173 |         distributed = False
174 |     else:
175 |         distributed = True
176 |         init_dist(args.launcher, **cfg.dist_params)
177 | 
178 |     # set random seeds
179 |     if args.seed is not None:
180 |         set_random_seed(args.seed, deterministic=args.deterministic)
181 | 
182 |     # build the dataloader
183 |     dataset = build_dataset(cfg.data.test)
184 |     data_loader = build_dataloader(
185 |         dataset,
186 |         samples_per_gpu=samples_per_gpu,
187 |         workers_per_gpu=cfg.data.workers_per_gpu,
188 |         dist=distributed,
189 |         shuffle=False)
190 | 
191 |     # build the model and load checkpoint
192 |     cfg.model.train_cfg = None
193 |     model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
194 |     fp16_cfg = cfg.get('fp16', None)
195 |     if fp16_cfg is not None:
196 |         wrap_fp16_model(model)
197 |     checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
198 |     if args.fuse_conv_bn:
199 |         model = fuse_conv_bn(model)
200 |     # old versions did not save class info in checkpoints, this walkaround is
201 |     # for backward compatibility
202 |     if 'CLASSES' in checkpoint.get('meta', {}):
203 |         model.CLASSES = checkpoint['meta']['CLASSES']
204 |     else:
205 |         model.CLASSES = dataset.CLASSES
206 |     # palette for visualization in segmentation tasks
207 |     if 'PALETTE' in checkpoint.get('meta', {}):
208 |         model.PALETTE = checkpoint['meta']['PALETTE']
209 |     elif hasattr(dataset, 'PALETTE'):
210 |         # segmentation dataset has `PALETTE` attribute
211 |         model.PALETTE = dataset.PALETTE
212 | 
213 |     if not distributed:
214 |         model = MMDataParallel(model, device_ids=[0])
215 |         outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
216 |     else:
217 |         model = MMDistributedDataParallel(
218 |             model.cuda(),
219 |             device_ids=[torch.cuda.current_device()],
220 |             broadcast_buffers=False)
221 |         outputs = multi_gpu_test(model, data_loader, args.tmpdir,
222 |                                  args.gpu_collect)
223 | 
224 |     rank, _ = get_dist_info()
225 |     if rank == 0:
226 |         if args.out:
227 |             print(f'\nwriting results to {args.out}')
228 |             mmcv.dump(outputs, args.out)
229 |         kwargs = {} if args.eval_options is None else args.eval_options
230 |         if args.format_only:
231 |             dataset.format_results(outputs, **kwargs)
232 |         if args.eval:
233 |             eval_kwargs = cfg.get('evaluation', {}).copy()
234 |             # hard-code way to remove EvalHook args
235 |             for key in [
236 |                     'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
237 |                     'rule'
238 |             ]:
239 |                 eval_kwargs.pop(key, None)
240 |             eval_kwargs.update(dict(metric=args.eval, **kwargs))
241 |             print(dataset.evaluate(outputs, **eval_kwargs))
242 | 
243 | 
244 | if __name__ == '__main__':
245 |     warnings.filterwarnings("ignore")
246 |     torch.multiprocessing.set_start_method('fork')
247 |     main()
248 | 


--------------------------------------------------------------------------------
/extra_tools/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from __future__ import division
  3 | 
  4 | import argparse
  5 | import copy
  6 | import mmcv
  7 | import os
  8 | import time
  9 | import torch
 10 | import warnings
 11 | from mmcv import Config, DictAction
 12 | from mmcv.runner import get_dist_info, init_dist
 13 | from os import path as osp
 14 | 
 15 | from mmdet import __version__ as mmdet_version
 16 | from mmdet3d import __version__ as mmdet3d_version
 17 | from mmdet3d.apis import train_model
 18 | from mmdet3d.datasets import build_dataset
 19 | from mmdet3d.models import build_model
 20 | from mmdet3d.utils import collect_env, get_root_logger
 21 | from mmdet.apis import set_random_seed
 22 | from mmseg import __version__ as mmseg_version
 23 | 
 24 | 
 25 | def parse_args():
 26 |     parser = argparse.ArgumentParser(description='Train a detector')
 27 |     parser.add_argument('config', help='train config file path')
 28 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 29 |     parser.add_argument(
 30 |         '--resume-from', help='the checkpoint file to resume from')
 31 |     parser.add_argument(
 32 |         '--no-validate',
 33 |         action='store_true',
 34 |         help='whether not to evaluate the checkpoint during training')
 35 |     group_gpus = parser.add_mutually_exclusive_group()
 36 |     group_gpus.add_argument(
 37 |         '--gpus',
 38 |         type=int,
 39 |         help='number of gpus to use '
 40 |         '(only applicable to non-distributed training)')
 41 |     group_gpus.add_argument(
 42 |         '--gpu-ids',
 43 |         type=int,
 44 |         nargs='+',
 45 |         help='ids of gpus to use '
 46 |         '(only applicable to non-distributed training)')
 47 |     parser.add_argument('--seed', type=int, default=0, help='random seed')
 48 |     parser.add_argument(
 49 |         '--deterministic',
 50 |         action='store_true',
 51 |         help='whether to set deterministic options for CUDNN backend.')
 52 |     parser.add_argument(
 53 |         '--options',
 54 |         nargs='+',
 55 |         action=DictAction,
 56 |         help='override some settings in the used config, the key-value pair '
 57 |         'in xxx=yyy format will be merged into config file (deprecate), '
 58 |         'change to --cfg-options instead.')
 59 |     parser.add_argument(
 60 |         '--cfg-options',
 61 |         nargs='+',
 62 |         action=DictAction,
 63 |         help='override some settings in the used config, the key-value pair '
 64 |         'in xxx=yyy format will be merged into config file. If the value to '
 65 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 66 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 67 |         'Note that the quotation marks are necessary and that no white space '
 68 |         'is allowed.')
 69 |     parser.add_argument(
 70 |         '--launcher',
 71 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 72 |         default='none',
 73 |         help='job launcher')
 74 |     parser.add_argument('--local_rank', type=int, default=0)
 75 |     parser.add_argument(
 76 |         '--autoscale-lr',
 77 |         action='store_true',
 78 |         help='automatically scale lr with the number of gpus')
 79 |     args = parser.parse_args()
 80 |     if 'LOCAL_RANK' not in os.environ:
 81 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 82 | 
 83 |     if args.options and args.cfg_options:
 84 |         raise ValueError(
 85 |             '--options and --cfg-options cannot be both specified, '
 86 |             '--options is deprecated in favor of --cfg-options')
 87 |     if args.options:
 88 |         warnings.warn('--options is deprecated in favor of --cfg-options')
 89 |         args.cfg_options = args.options
 90 | 
 91 |     return args
 92 | 
 93 | 
 94 | def main():
 95 |     args = parse_args()
 96 | 
 97 |     cfg = Config.fromfile(args.config)
 98 |     if args.cfg_options is not None:
 99 |         cfg.merge_from_dict(args.cfg_options)
100 |     # import modules from string list.
101 |     if cfg.get('custom_imports', None):
102 |         from mmcv.utils import import_modules_from_strings
103 |         import_modules_from_strings(**cfg['custom_imports'])
104 | 
105 |     # import modules from plguin/xx, registry will be updated
106 |     if hasattr(cfg, 'plugin'):
107 |         if cfg.plugin:
108 |             import importlib
109 |             if hasattr(cfg, 'plugin_dir'):
110 |                 plugin_dir = cfg.plugin_dir
111 |                 _module_dir = os.path.dirname(plugin_dir)
112 |                 _module_dir = _module_dir.split('/')
113 |                 _module_path = _module_dir[0]
114 | 
115 |                 for m in _module_dir[1:]:
116 |                     _module_path = _module_path + '.' + m
117 |                 print(_module_path)
118 |                 plg_lib = importlib.import_module(_module_path)
119 |             else:
120 |                 # import dir is the dirpath for the config file
121 |                 _module_dir = os.path.dirname(args.config)
122 |                 _module_dir = _module_dir.split('/')
123 |                 _module_path = _module_dir[0]
124 |                 for m in _module_dir[1:]:
125 |                     _module_path = _module_path + '.' + m
126 |                 print(_module_path)
127 |                 plg_lib = importlib.import_module(_module_path)
128 | 
129 |     # set cudnn_benchmark
130 |     if cfg.get('cudnn_benchmark', False):
131 |         torch.backends.cudnn.benchmark = True
132 | 
133 |     # work_dir is determined in this priority: CLI > segment in file > filename
134 |     if args.work_dir is not None:
135 |         # update configs according to CLI args if args.work_dir is not None
136 |         cfg.work_dir = args.work_dir
137 |     elif cfg.get('work_dir', None) is None:
138 |         # use config filename as default work_dir if cfg.work_dir is None
139 |         cfg.work_dir = osp.join('./work_dirs',
140 |                                 osp.splitext(osp.basename(args.config))[0])
141 |     if args.resume_from is not None:
142 |         cfg.resume_from = args.resume_from
143 |     if args.gpu_ids is not None:
144 |         cfg.gpu_ids = args.gpu_ids
145 |     else:
146 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
147 | 
148 |     if args.autoscale_lr:
149 |         # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
150 |         cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
151 | 
152 |     # init distributed env first, since logger depends on the dist info.
153 |     if args.launcher == 'none':
154 |         distributed = False
155 |     else:
156 |         distributed = True
157 |         init_dist(args.launcher, **cfg.dist_params)
158 |         # re-set gpu_ids with distributed training mode
159 |         _, world_size = get_dist_info()
160 |         cfg.gpu_ids = range(world_size)
161 | 
162 |     # create work_dir
163 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
164 |     # dump config
165 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
166 |     # init the logger before other steps
167 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
168 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
169 |     # specify logger name, if we still use 'mmdet', the output info will be
170 |     # filtered and won't be saved in the log_file
171 |     # TODO: ugly workaround to judge whether we are training det or seg model
172 |     if cfg.model.type in ['EncoderDecoder3D']:
173 |         logger_name = 'mmseg'
174 |     else:
175 |         logger_name = 'mmdet'
176 |     logger = get_root_logger(
177 |         log_file=log_file, log_level=cfg.log_level, name=logger_name)
178 | 
179 |     # init the meta dict to record some important information such as
180 |     # environment info and seed, which will be logged
181 |     meta = dict()
182 |     # log env info
183 |     env_info_dict = collect_env()
184 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
185 |     dash_line = '-' * 60 + '\n'
186 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
187 |                 dash_line)
188 |     meta['env_info'] = env_info
189 |     meta['config'] = cfg.pretty_text
190 | 
191 |     # log some basic info
192 |     logger.info(f'Distributed training: {distributed}')
193 |     logger.info(f'Config:\n{cfg.pretty_text}')
194 | 
195 |     # set random seeds
196 |     if args.seed is not None:
197 |         logger.info(f'Set random seed to {args.seed}, '
198 |                     f'deterministic: {args.deterministic}')
199 |         set_random_seed(args.seed, deterministic=args.deterministic)
200 |     cfg.seed = args.seed
201 |     meta['seed'] = args.seed
202 |     meta['exp_name'] = osp.basename(args.config)
203 | 
204 |     model = build_model(
205 |         cfg.model,
206 |         train_cfg=cfg.get('train_cfg'),
207 |         test_cfg=cfg.get('test_cfg'))
208 |     model.init_weights()
209 | 
210 |     logger.info(f'Model:\n{model}')
211 |     datasets = [build_dataset(cfg.data.train)]
212 |     if len(cfg.workflow) == 2:
213 |         val_dataset = copy.deepcopy(cfg.data.val)
214 |         # in case we use a dataset wrapper
215 |         if 'dataset' in cfg.data.train:
216 |             val_dataset.pipeline = cfg.data.train.dataset.pipeline
217 |         else:
218 |             val_dataset.pipeline = cfg.data.train.pipeline
219 |         # set test_mode=False here in deep copied config
220 |         # which do not affect AP/AR calculation later
221 |         # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
222 |         val_dataset.test_mode = False
223 |         datasets.append(build_dataset(val_dataset))
224 |     if cfg.checkpoint_config is not None:
225 |         # save mmdet version, config file content and class names in
226 |         # checkpoints as meta data
227 |         cfg.checkpoint_config.meta = dict(
228 |             mmdet_version=mmdet_version,
229 |             mmseg_version=mmseg_version,
230 |             mmdet3d_version=mmdet3d_version,
231 |             config=cfg.pretty_text,
232 |             CLASSES=datasets[0].CLASSES,
233 |             PALETTE=datasets[0].PALETTE  # for segmentors
234 |             if hasattr(datasets[0], 'PALETTE') else None)
235 |     # add an attribute for visualization convenience
236 |     model.CLASSES = datasets[0].CLASSES
237 | 
238 |     # for v in model.parameters():
239 |     #    v.requires_grad = False
240 |     # for v in model.pts_backbone.parameters():
241 |     #     v.requires_grad = False
242 |     # for v in model.pts_neck.parameters():
243 |     #     v.requires_grad = False
244 |     # for v in model.pts_middle_encoder.parameters():
245 |     #     v.requires_grad = False
246 | 
247 |     train_model(
248 |         model,
249 |         datasets,
250 |         cfg,
251 |         distributed=distributed,
252 |         validate=(not args.no_validate),
253 |         timestamp=timestamp,
254 |         meta=meta)
255 | 
256 | 
257 | if __name__ == '__main__':
258 |     torch.multiprocessing.set_start_method('fork')
259 |     main()
260 | 


--------------------------------------------------------------------------------
/extra_tools/data_converter/create_unified_gt_database.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import mmcv
  3 | import numpy as np
  4 | import pickle
  5 | import argparse
  6 | import os
  7 | import importlib
  8 | 
  9 | from mmcv import track_iter_progress
 10 | from os import path as osp
 11 | 
 12 | from mmdet3d.core.bbox import box_np_ops as box_np_ops
 13 | from mmdet3d.datasets import build_dataset
 14 | from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
 15 | 
 16 | 
 17 | def create_groundtruth_database(dataset_class_name,
 18 |                                 data_path,
 19 |                                 info_prefix,
 20 |                                 info_path=None,
 21 |                                 used_classes=None,
 22 |                                 database_save_path=None,
 23 |                                 db_info_save_path=None,
 24 |                                 with_mask=False):
 25 |     """Given the raw data, generate the ground truth database.
 26 | 
 27 |     Args:
 28 |         dataset_class_name （str): Name of the input dataset.
 29 |         data_path (str): Path of the data.
 30 |         info_prefix (str): Prefix of the info file.
 31 |         info_path (str): Path of the info file.
 32 |             Default: None.
 33 |         mask_anno_path (str): Path of the mask_anno.
 34 |             Default: None.
 35 |         used_classes (list[str]): Classes have been used.
 36 |             Default: None.
 37 |         database_save_path (str): Path to save database.
 38 |             Default: None.
 39 |         db_info_save_path (str): Path to save db_info.
 40 |             Default: None.
 41 |         relative_path (bool): Whether to use relative path.
 42 |             Default: True.
 43 |         with_mask (bool): Whether to use mask.
 44 |             Default: False.
 45 |     """
 46 |     print(f'Create GT Database of {dataset_class_name}')
 47 |     dataset_cfg = dict(
 48 |         type=dataset_class_name, data_root=data_path, ann_file=info_path, return_gt_info=True)
 49 |     if dataset_class_name == 'NuScenesSweepDataset':
 50 |         dataset_cfg.update(
 51 |             use_valid_flag=True,
 52 |             pipeline=[
 53 |                 dict(
 54 |                     type='LoadPointsFromFile',
 55 |                     coord_type='LIDAR',
 56 |                     load_dim=5,
 57 |                     use_dim=5),
 58 |                 dict(
 59 |                     type='LoadPointsFromMultiSweeps',
 60 |                     sweeps_num=10,
 61 |                     use_dim=[0, 1, 2, 3, 4],
 62 |                     pad_empty_sweeps=True,
 63 |                     remove_close=True),
 64 |                 dict(
 65 |                     type='LoadAnnotations3D',
 66 |                     with_bbox_3d=True,
 67 |                     with_label_3d=True)
 68 |             ])
 69 | 
 70 |     dataset = build_dataset(dataset_cfg)
 71 | 
 72 |     if database_save_path is None:
 73 |         database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')
 74 |     if db_info_save_path is None:
 75 |         db_info_save_path = osp.join(data_path,
 76 |                                      f'{info_prefix}_dbinfos_train.pkl')
 77 |     database_pts_path = osp.join(database_save_path, 'pts_dir')
 78 |     database_img_path = osp.join(database_save_path, 'img_dir')
 79 |     mmcv.mkdir_or_exist(database_save_path)
 80 |     mmcv.mkdir_or_exist(database_pts_path)
 81 |     mmcv.mkdir_or_exist(database_img_path)
 82 |     all_db_infos = dict()
 83 | 
 84 |     group_counter = 0
 85 |     for j in track_iter_progress(list(range(len(dataset)))):
 86 | 
 87 |         input_dict = dataset.get_data_info(j)
 88 |         dataset.pre_pipeline(input_dict)
 89 |         example = dataset.pipeline(input_dict)
 90 |         annos = example['ann_info']
 91 |         image_idx = example['sample_idx']
 92 |         points = example['points'].tensor.numpy()
 93 |         gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
 94 |         names = annos['gt_names']
 95 |         group_dict = dict()
 96 |         if 'group_ids' in annos:
 97 |             group_ids = annos['group_ids']
 98 |         else:
 99 |             group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
100 |         difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
101 |         if 'difficulty' in annos:
102 |             difficulty = annos['difficulty']
103 | 
104 |         num_obj = gt_boxes_3d.shape[0]
105 |         point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
106 | 
107 |         # load multi-view image
108 |         input_img = {}
109 |         input_info = {}
110 |         for _cam in example['info']['cams']:
111 |             cam_info = example['info']['cams'][_cam]
112 |             _path = cam_info['data_path']
113 |             _img = mmcv.imread(_path, 'unchanged')
114 |             input_img[_cam] = _img
115 | 
116 |             # obtain lidar to image transformation matrix
117 |             lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
118 |             lidar2cam_t = cam_info[
119 |                 'sensor2lidar_translation'] @ lidar2cam_r.T
120 |             lidar2cam_rt = np.eye(4)
121 |             lidar2cam_rt[:3, :3] = lidar2cam_r.T
122 |             lidar2cam_rt[3, :3] = -lidar2cam_t
123 |             intrinsic = cam_info['cam_intrinsic']
124 |             viewpad = np.eye(4)
125 |             viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
126 |             lidar2img_rt = (viewpad @ lidar2cam_rt.T)
127 | 
128 |             input_info[_cam]={
129 |                 'lidar2img': lidar2img_rt,              
130 |                 'lidar2cam': lidar2cam_rt,
131 |                 'cam_intrinsic': viewpad}
132 |         
133 |         for i in range(num_obj):
134 |             pts_filename = f'{image_idx}_{names[i]}_{i}.bin'
135 |             img_filename = f'{image_idx}_{names[i]}_{i}.png'
136 |             abs_filepath = osp.join(database_pts_path, pts_filename)
137 |             abs_img_filepath = osp.join(database_img_path, img_filename)
138 |             rel_filepath = osp.join(f'{info_prefix}_gt_database', 'pts_dir', pts_filename)
139 |             rel_img_filepath = osp.join(f'{info_prefix}_gt_database', 'img_dir', img_filename)
140 | 
141 |             # save point clouds and image patches for each object
142 |             gt_points = points[point_indices[:, i]]
143 |             gt_points[:, :3] -= gt_boxes_3d[i, :3]
144 | 
145 |             # with open(abs_filepath, 'w') as f:
146 |             #     gt_points.tofile(f)
147 | 
148 |             img_crop, crop_key, crop_depth = find_img_crop(annos['gt_bboxes_3d'][i].corners.numpy(), input_img, input_info,  points[point_indices[:, i]])
149 |             if img_crop is not None:
150 |                 mmcv.imwrite(img_crop, abs_img_filepath)
151 | 
152 |             if (used_classes is None) or names[i] in used_classes:
153 |                 db_info = {
154 |                     'name': names[i],
155 |                     'path': rel_filepath,
156 |                     'image_idx': image_idx,
157 |                     'image_path': rel_img_filepath if img_crop is not None else '',
158 |                     'image_crop_key': crop_key if img_crop is not None else '',
159 |                     'image_crop_depth': crop_depth,
160 |                     'gt_idx': i,
161 |                     'box3d_lidar': gt_boxes_3d[i],
162 |                     'num_points_in_gt': gt_points.shape[0],
163 |                     'difficulty': difficulty[i],
164 |                 }
165 |                 local_group_id = group_ids[i]
166 |                 # if local_group_id >= 0:
167 |                 if local_group_id not in group_dict:
168 |                     group_dict[local_group_id] = group_counter
169 |                     group_counter += 1
170 |                 db_info['group_id'] = group_dict[local_group_id]
171 |                 if 'score' in annos:
172 |                     db_info['score'] = annos['score'][i]
173 |                 if names[i] in all_db_infos:
174 |                     all_db_infos[names[i]].append(db_info)
175 |                 else:
176 |                     all_db_infos[names[i]] = [db_info]
177 | 
178 |     for k, v in all_db_infos.items():
179 |         print(f'load {len(v)} {k} database infos')
180 | 
181 |     with open(db_info_save_path, 'wb') as f:
182 |         pickle.dump(all_db_infos, f)
183 | 
184 | 
185 | def find_img_crop(gt_boxes_3d, input_img, input_info,  points):
186 |     coord_3d = np.concatenate([gt_boxes_3d, np.ones_like(gt_boxes_3d[..., :1])], -1)
187 |     coord_3d = coord_3d.squeeze(0)
188 |     max_crop, crop_key = None, None
189 |     crop_area, crop_depth = 0, 0
190 | 
191 |     for _key in input_img:
192 |         lidar2img = np.array(input_info[_key]['lidar2img'])
193 |         coord_img = coord_3d @ lidar2img.T
194 |         coord_img[:,:2] /= coord_img[:,2,None]
195 |         image_shape = input_img[_key].shape
196 |         if (coord_img[2] <= 0).any():
197 |             continue
198 |         
199 |         avg_depth = coord_img[:,2].mean()
200 |         minxy = np.min(coord_img[:,:2], axis=-2)
201 |         maxxy = np.max(coord_img[:,:2], axis=-2)
202 |         bbox = np.concatenate([minxy, maxxy], axis=-1)
203 |         bbox[0::2] = np.clip(bbox[0::2], a_min=0, a_max=image_shape[1]-1)
204 |         bbox[1::2] = np.clip(bbox[1::2], a_min=0, a_max=image_shape[0]-1)
205 |         bbox = bbox.astype(int)
206 |         if ((bbox[2:]-bbox[:2]) <= 10).any():
207 |             continue
208 | 
209 |         img_crop = input_img[_key][bbox[1]:bbox[3],bbox[0]:bbox[2]]
210 |         if img_crop.shape[0] * img_crop.shape[1] > crop_area:
211 |             max_crop = img_crop
212 |             crop_key = _key
213 |             crop_depth = avg_depth
214 |     
215 |     return max_crop, crop_key, crop_depth
216 | 
217 | 
218 | if __name__ == '__main__':
219 |     parser = argparse.ArgumentParser(description='Data converter arg parser')
220 |     parser.add_argument(
221 |         '--dataset',
222 |         type=str,
223 |         default='NuScenesSweepDataset',
224 |         required=False,
225 |         help='specify dataset name')
226 |     parser.add_argument(
227 |         '--root-path',
228 |         type=str,
229 |         default='./data/nuscenes',
230 |         help='specify the root path of dataset')
231 |     parser.add_argument(
232 |         '--version',
233 |         type=str,
234 |         default='v1.0',
235 |         required=False,
236 |         help='specify the dataset version, no need for kitti')
237 |     parser.add_argument(
238 |         '--out-dir',
239 |         type=str,
240 |         default='./data/nuscenes',
241 |         required=False,
242 |         help='output data dir')
243 |     parser.add_argument(
244 |         '--info-path',
245 |         type=str,
246 |         default='./data/nuscenes/nuscenes_img_pro_infos_train.pkl',
247 |         required=False,
248 |         help='name of info pkl')
249 |     parser.add_argument('--extra-tag', type=str, default='nuscenes_unified')
250 |     args = parser.parse_args()
251 | 
252 |     plugin_dir = 'projects/mmdet3d_plugin/'
253 |     _module_dir = os.path.dirname(plugin_dir)
254 |     _module_dir = _module_dir.split('/')
255 |     _module_path = _module_dir[0]
256 | 
257 |     for m in _module_dir[1:]:
258 |         _module_path = _module_path + '.' + m
259 |     print(_module_path)
260 |     plg_lib = importlib.import_module(_module_path)
261 | 
262 |     create_groundtruth_database(args.dataset, args.root_path, args.extra_tag,
263 |                                 args.info_path)


--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_kitti_3classes.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../configs/_base_/default_runtime.py'
  3 | ]
  4 | 
  5 | plugin=True
  6 | plugin_dir='projects/mmdet3d_plugin/'
  7 | 
  8 | # If point cloud range is changed, the models should also change their point
  9 | # cloud range accordingly
 10 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
 11 | voxel_size = [0.05, 0.05, 0.1]
 12 | fp16_enabled = True
 13 | bev_stride = 4
 14 | sample_num = 5
 15 | # For nuScenes we usually do 10-class detection
 16 | class_names = ['Pedestrian', 'Cyclist', 'Car']
 17 | 
 18 | input_modality = dict(
 19 |     use_lidar=True,
 20 |     use_camera=False,
 21 |     use_radar=False,
 22 |     use_map=False,
 23 |     use_external=False)
 24 | 
 25 | use_dab = True
 26 | 
 27 | model = dict(
 28 |     type='Uni3DETR',
 29 |     pts_voxel_layer=dict(
 30 |         max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
 31 |         point_cloud_range=point_cloud_range),
 32 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
 33 |     pts_middle_encoder=dict(
 34 |         type='SparseEncoderHD',
 35 |         in_channels=4,
 36 |         sparse_shape=[41, 1600, 1408],
 37 |         output_channels=256,
 38 |         order=('conv', 'norm', 'act'),
 39 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
 40 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
 41 |         block_type='basicblock',
 42 |         fp16_enabled=False), # not enable FP16 here
 43 |     pts_backbone=dict(
 44 |         type='SECOND3D',
 45 |         in_channels=[256, 256, 256],
 46 |         out_channels=[128, 256, 512],
 47 |         layer_nums=[5, 5, 5],
 48 |         layer_strides=[1, 2, 4],
 49 |         is_cascade=False,
 50 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 51 |         conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
 52 |     pts_neck=dict(
 53 |         type='SECOND3DFPN',
 54 |         in_channels=[128, 256, 512],
 55 |         out_channels=[256, 256, 256],
 56 |         upsample_strides=[1, 2, 4],
 57 |         norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
 58 |         upsample_cfg=dict(type='deconv3d', bias=False),
 59 |         extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
 60 |         use_conv_for_no_stride=True),
 61 |     pts_bbox_head=dict(
 62 |         type='Uni3DETRHead',
 63 |         # transformer_cfg
 64 |         num_query=300,
 65 |         num_classes=3,
 66 |         in_channels=256,
 67 |         sync_cls_avg_factor=True,
 68 |         with_box_refine=True,
 69 |         as_two_stage=False,
 70 |         code_size=8,
 71 |         gt_repeattimes=5,
 72 |         transformer=dict(
 73 |             type='Uni3DETRTransformer',
 74 |             fp16_enabled=fp16_enabled,
 75 |             decoder=dict(
 76 |                 type='Uni3DETRTransformerDecoder',
 77 |                 num_layers=9,
 78 |                 return_intermediate=True,
 79 |                 transformerlayers=dict(
 80 |                     type='BaseTransformerLayer',
 81 |                     attn_cfgs=[
 82 |                         dict(
 83 |                             type='MultiheadAttention',
 84 |                             embed_dims=256,
 85 |                             num_heads=8,
 86 |                             dropout=0.1),
 87 |                         dict(
 88 |                             type='UniCrossAtten',
 89 |                             num_points=1,
 90 |                             embed_dims=256,
 91 |                             num_sweeps=1,
 92 |                             fp16_enabled=fp16_enabled)
 93 |                     ],
 94 |                     ffn_cfgs=dict(
 95 |                         type='FFN',
 96 |                         embed_dims=256,
 97 |                         feedforward_channels=512,
 98 |                         num_fcs=2,
 99 |                         ffn_drop=0.1,
100 |                         act_cfg=dict(type='ReLU', inplace=True),
101 |                     ),
102 |                     norm_cfg=dict(type='LN'),
103 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
104 |                                      'ffn', 'norm'))
105 |             )
106 |         ),
107 |         bbox_coder=dict(
108 |             type='NMSFreeCoder',
109 |             post_center_range=[0, -40, -3, 70.4, 40, 1],
110 |             pc_range=point_cloud_range,
111 |             max_num=150,
112 |             alpha=0.2,
113 |             voxel_size=voxel_size,
114 |             num_classes=3), 
115 |         post_processing=dict(
116 |             type='box_merging',
117 |             score_thr=[0., 0.3, 0.65]),
118 |         positional_encoding=dict(
119 |             type='SinePositionalEncoding',
120 |             num_feats=128,
121 |             normalize=True,
122 |             offset=-0.5),
123 |         loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
124 |         loss_bbox=dict(type='L1Loss', loss_weight=0.25),
125 |         loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
126 |         code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
127 |     ),
128 |     # model training and testing settings
129 |     train_cfg=dict(pts=dict(
130 |         grid_size=[1408, 1600, 40],
131 |         voxel_size=voxel_size,
132 |         point_cloud_range=point_cloud_range,
133 |         out_size_factor=bev_stride,
134 |         assigner=dict(
135 |             type='HungarianAssigner3D',
136 |             cls_cost=dict(type='FocalLossCost', weight=2.0),
137 |             reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
138 |             iou_cost=dict(type='IoU3DCost', weight=1.2),
139 |             pc_range=point_cloud_range))))
140 | 
141 | 
142 | # dataset settings
143 | dataset_type = 'KittiDataset'
144 | data_root = 'data/kitti/'
145 | class_names = ['Pedestrian', 'Cyclist', 'Car']
146 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
147 | input_modality = dict(use_lidar=True, use_camera=False)
148 | 
149 | db_sampler = dict(
150 |     data_root=data_root,
151 |     info_path=data_root + 'kitti_dbinfos_train.pkl',
152 |     rate=1.0,
153 |     prepare=dict(
154 |         filter_by_difficulty=[-1],
155 |         filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
156 |     classes=class_names,
157 |     sample_groups=dict(Car=20, Pedestrian=6, Cyclist=6))
158 | 
159 | 
160 | file_client_args = dict(backend='disk')
161 | # Uncomment the following if use ceph or other file clients.
162 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
163 | # for more details.
164 | # file_client_args = dict(
165 | #     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
166 | 
167 | train_pipeline = [
168 |     dict(
169 |         type='LoadPointsFromFile',
170 |         coord_type='LIDAR',
171 |         load_dim=4,
172 |         use_dim=4,
173 |         file_client_args=file_client_args),
174 |     dict(
175 |         type='LoadAnnotations3D',
176 |         with_bbox_3d=True,
177 |         with_label_3d=True,
178 |         file_client_args=file_client_args),
179 |     dict(type='ObjectSample', db_sampler=db_sampler),
180 |     dict(
181 |         type='ObjectNoise',
182 |         num_try=100,
183 |         translation_std=[1.0, 1.0, 0.5],
184 |         global_rot_range=[0.0, 0.0],
185 |         rot_range=[-0.78539816, 0.78539816]),
186 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
187 |     dict(
188 |         type='GlobalRotScaleTrans',
189 |         rot_range=[-0.78539816, 0.78539816],
190 |         scale_ratio_range=[0.95, 1.05]),
191 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
192 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
193 |     dict(type='ObjectNameFilter', classes=class_names),
194 |     dict(type='PointShuffle'),
195 |     dict(type='PointSample', num_points=18000),
196 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
197 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
198 | ]
199 | test_pipeline = [
200 |     dict(
201 |         type='LoadPointsFromFile',
202 |         coord_type='LIDAR',
203 |         load_dim=4, 
204 |         use_dim=4,
205 |         file_client_args=file_client_args),
206 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
207 |     dict(
208 |             type='DefaultFormatBundle3D',
209 |             class_names=class_names,
210 |             with_label=False),
211 |     dict(type='Collect3D', keys=['points'])
212 |     # dict(
213 |     #     type='MultiScaleFlipAug3D',
214 |     #     img_scale=(1333, 800),
215 |     #     pts_scale_ratio=1,
216 |     #     flip=False,
217 |     #     transforms=[
218 |     #         dict(
219 |     #             type='GlobalRotScaleTrans',
220 |     #             rot_range=[0, 0],
221 |     #             scale_ratio_range=[1., 1.],
222 |     #             translation_std=[0, 0, 0]),
223 |     #         dict(type='RandomFlip3D'),
224 |     #         dict(
225 |     #             type='PointsRangeFilter', point_cloud_range=point_cloud_range),
226 |     #         dict(
227 |     #             type='DefaultFormatBundle3D',
228 |     #             class_names=class_names,
229 |     #             with_label=False),
230 |     #         dict(type='Collect3D', keys=['points'])
231 |     #     ])
232 | ]
233 | 
234 | 
235 | data = dict(
236 |     samples_per_gpu=1,
237 |     workers_per_gpu=2,
238 |     train=dict(
239 |         type='RepeatDataset',
240 |         times=2,
241 |         dataset=dict(
242 |             type=dataset_type,
243 |             data_root=data_root,
244 |             ann_file=data_root + 'kitti_infos_train_van.pkl',
245 |             split='training',
246 |             pts_prefix='velodyne_reduced',
247 |             pipeline=train_pipeline,
248 |             modality=input_modality,
249 |             classes=class_names,
250 |             test_mode=False,
251 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
252 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
253 |             box_type_3d='LiDAR')),
254 |     val=dict(
255 |         type=dataset_type,
256 |         data_root=data_root,
257 |         ann_file=data_root + 'kitti_infos_val.pkl',
258 |         split='training',
259 |         pts_prefix='velodyne_reduced',
260 |         pipeline=test_pipeline,
261 |         modality=input_modality,
262 |         classes=class_names,
263 |         test_mode=True,
264 |         box_type_3d='LiDAR'),
265 |     test=dict(
266 |         type=dataset_type,
267 |         data_root=data_root,
268 |         ann_file=data_root + 'kitti_infos_val.pkl',
269 |         split='training',
270 |         pts_prefix='velodyne_reduced',
271 |         pipeline=test_pipeline,
272 |         modality=input_modality,
273 |         classes=class_names,
274 |         test_mode=True,
275 |         box_type_3d='LiDAR'))
276 | 
277 | evaluation = dict(interval=1, pipeline=test_pipeline)
278 | 
279 | 
280 | checkpoint_config = dict(interval=1)
281 | 
282 | lr = 2e-5 *3/8 * 18 /2 # max learning rate
283 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
284 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
285 | 
286 | 
287 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
288 | runner = dict(type='EpochBasedRunner', max_epochs=40)
289 | 
290 | find_unused_parameters = True
291 | workflow = [('train', 1)]
292 | gpu_ids = range(0, 1)
293 | dist_params = dict(backend='nccl')
294 | log_level = 'INFO'
295 | 
296 | # fp16 setting
297 | fp16 = dict(loss_scale=32.)
298 | 


--------------------------------------------------------------------------------