├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
├── ovuni3detr.png
└── uni3detr.png
├── extra_tools
├── analysis_tools
│ └── eval_metric.py
├── create_data.py
├── create_data.sh
├── data_converter
│ ├── create_unified_gt_database.py
│ └── nuscenes_converter.py
├── dist_test.sh
├── dist_train.sh
├── eval_metric.py
├── get_flops.py
├── test.py
└── train.py
├── model-index.yml
├── projects
├── __init__.py
├── configs
│ ├── ov_uni3detr
│ │ ├── ov_uni3detr_sunrgbd_mm.py
│ │ ├── ov_uni3detr_sunrgbd_pc.py
│ │ └── ov_uni3detr_sunrgbd_rgb.py
│ └── uni3detr
│ │ ├── uni3detr_kitti_3classes.py
│ │ ├── uni3detr_kitti_car.py
│ │ ├── uni3detr_nuscenes.py
│ │ ├── uni3detr_scannet.py
│ │ ├── uni3detr_scannet_large.py
│ │ └── uni3detr_sunrgbd.py
└── mmdet3d_plugin
│ ├── __init__.py
│ ├── core
│ ├── bbox
│ │ ├── assigners
│ │ │ ├── __init__.py
│ │ │ └── hungarian_assigner_3d.py
│ │ ├── bbox_merging.py
│ │ ├── coders
│ │ │ ├── __init__.py
│ │ │ └── nms_free_coder.py
│ │ ├── match_costs
│ │ │ ├── __init__.py
│ │ │ └── match_cost.py
│ │ └── util.py
│ ├── indoor_eval.py
│ └── merge_all_augs.py
│ ├── datasets
│ ├── __init__.py
│ ├── nuscenes_dataset.py
│ ├── pipelines
│ │ ├── __init__.py
│ │ ├── dbsampler.py
│ │ ├── formatting.py
│ │ ├── loading_3d.py
│ │ ├── test_time_aug.py
│ │ └── transform_3d.py
│ └── sunrgbd_dataset_ov.py
│ └── models
│ ├── backbones
│ ├── __init__.py
│ ├── second_3d.py
│ └── vovnet.py
│ ├── dense_heads
│ ├── __init__.py
│ ├── uni3detr_head.py
│ └── uni3detr_head_clip.py
│ ├── detectors
│ ├── __init__.py
│ ├── ov_uni3detr.py
│ └── uni3detr.py
│ ├── losses
│ ├── __init__.py
│ └── rdiouloss.py
│ ├── necks
│ ├── __init__.py
│ └── second3d_fpn.py
│ ├── pts_encoder
│ ├── __init__.py
│ └── sparse_encoder_hd.py
│ └── utils
│ ├── __init__.py
│ ├── grid_mask.py
│ ├── uni3d_viewtrans.py
│ └── uni3detr_transformer.py
├── requirements.txt
├── setup.cfg
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.ipynb
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *.cover
48 | .hypothesis/
49 | .pytest_cache/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | db.sqlite3
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # Environments
83 | .env
84 | .venv
85 | env/
86 | venv/
87 | ENV/
88 | env.bak/
89 | venv.bak/
90 |
91 | # Spyder project settings
92 | .spyderproject
93 | .spyproject
94 |
95 | # Rope project settings
96 | .ropeproject
97 |
98 | # mkdocs documentation
99 | /site
100 |
101 | # mypy
102 | .mypy_cache/
103 |
104 | # cython generated cpp
105 | data
106 | .vscode
107 | .idea
108 |
109 | # custom
110 | *.pkl
111 | *.pkl.json
112 | *.log.json
113 | work_dirs/
114 | exps/
115 | *~
116 | mmdet3d/.mim
117 |
118 | # Pytorch
119 | *.pth
120 |
121 | # demo
122 | data/
123 | *.obj
124 | *.ply
125 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include mmdet3d/.mim/model-index.yml
2 | include requirements/*.txt
3 | recursive-include mmdet3d/.mim/ops *.cpp *.cu *.h *.cc
4 | recursive-include mmdet3d/.mim/configs *.py *.yml
5 | recursive-include mmdet3d/.mim/tools *.sh *.py
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Uni3DETR & OV-Uni3DETR
3 |
4 | This includes code for:
5 | our NeurIPS 2023 paper
6 | [**Uni3DETR: Unified 3D Detection Transformer**](https://arxiv.org/pdf/2310.05699)
7 |
8 |
9 |

10 |
11 |
12 | our ECCV 2024 paper
13 | [**OV-Uni3DETR: Towards Unified Open-Vocabulary 3D Object Detection via Cycle-Modality Propagation**](https://arxiv.org/pdf/2403.19580)
14 |
15 |
16 |

17 |
18 |
19 | Uni3DETR provides a unified structure for both indoor and outdoor 3D object detection.
20 | Based on this architecture, OV-Uni3DETR further introduces multi-modal learning and open-vocabulary learning to achieve modality unifying and category unifying with a unified structure.
21 |
22 | ## Preparation
23 | This project is based on [mmDetection3D](https://github.com/open-mmlab/mmdetection3d), which can be constructed as follows.
24 | * Install mmDetection3D [v1.0.0rc5](https://github.com/open-mmlab/mmdetection3d/tree/v1.0.0rc5) following [the instructions](https://github.com/open-mmlab/mmdetection3d/blob/v1.0.0rc5/docs/getting_started.md).
25 | * Copy our project and related files to installed mmDetection3D:
26 | ```bash
27 | cp -r projects mmdetection3d/
28 | cp -r extra_tools mmdetection3d/
29 | ```
30 | * Prepare the dataset following [mmDetection3D dataset instructions](https://github.com/open-mmlab/mmdetection3d/tree/v1.0.0rc5/docs/en/datasets).
31 | * Uni3DETR dataset preparation:
32 |
33 | SUN RGB-D dataset:
34 | The directory structure after processing should be as follows:
35 | ```
36 | sunrgbd
37 | ├── README.md
38 | ├── matlab
39 | │ ├── ...
40 | ├── OFFICIAL_SUNRGBD
41 | │ ├── ...
42 | ├── sunrgbd_trainval
43 | │ ├── ...
44 | ├── points
45 | ├── sunrgbd_infos_train.pkl
46 | ├── sunrgbd_infos_val.pkl
47 | ```
48 | ScanNet dataset:
49 |
50 | After downloading datasets following mmDetection3D, run ``python scripts/scannet_globalallign.py`` to perform global alignment in advance. Please note that this operation will modify the data file. If you have any concerns, it is recommended to back up the file first.
51 |
52 | The directory structure should be as below
53 |
54 | ```
55 | scannet
56 | ├── meta_data
57 | ├── batch_load_scannet_data.py
58 | ├── load_scannet_data.py
59 | ├── scannet_utils.py
60 | ├── README.md
61 | ├── scans
62 | ├── scans_test
63 | ├── scannet_instance_data
64 | ├── points
65 | │ ├── xxxxx.bin
66 | ├── instance_mask
67 | │ ├── xxxxx.bin
68 | ├── semantic_mask
69 | │ ├── xxxxx.bin
70 | ├── seg_info
71 | │ ├── train_label_weight.npy
72 | │ ├── train_resampled_scene_idxs.npy
73 | │ ├── val_label_weight.npy
74 | │ ├── val_resampled_scene_idxs.npy
75 | ├── posed_images
76 | │ ├── scenexxxx_xx
77 | │ │ ├── xxxxxx.txt
78 | │ │ ├── xxxxxx.jpg
79 | │ │ ├── intrinsic.txt
80 | ├── scannet_infos_train.pkl
81 | ├── scannet_infos_val.pkl
82 | ├── scannet_infos_test.pkl
83 | ```
84 |
85 | The outdoor KITTI and nuScenes datasets preparation steps are totally the same as mmDetection3D.
86 |
87 | * OV-Uni3DETR dataset preparation:
88 |
89 | SUN RGB-D dataset:
90 |
91 | The SUN RGB-D dataset preparation steps are the same as the Uni3DETR steps above, the only difference is the annotation file. The annotations file can be downloaded directly from [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing). We will upload codes about how to generate the annotation files for training soon.
92 |
93 |
94 | ## Training
95 | ```bash
96 | bash extra_tools/dist_train.sh ${CFG_FILE} ${NUM_GPUS}
97 | ```
98 |
99 | ## Evaluation
100 | ```bash
101 | bash extra_tools/dist_test.sh ${CFG_FILE} ${CKPT} ${NUM_GPUS} --eval=bbox
102 | ```
103 |
104 | ## Uni3DETR models
105 | We provide results on SUN RGB-D, ScanNet, KITTI, nuScenes with pretrained models (for Tab. 1, Tab. 2, Tab. 3 of our paper).
106 | | Dataset | mAP (%) | download |
107 | |---------------------------------------------|:-------:|:-------:|
108 | | **indoor** |
109 | | [SUN RGB-D](projects/configs/uni3detr/uni3detr_sunrgbd.py) | 67.0 | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
110 | | [ScanNet](projects/configs/uni3detr/uni3detr_scannet_large.py) | 71.7 | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
111 | | **outdoor** |
112 | | [KITTI (3 classes)](projects/configs/uni3detr/uni3detr_kitti_car.py) | 86.57 (moderate car) | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
113 | | [KITTI (car)](projects/configs/uni3detr/uni3detr_kitti_3classes.py) | 86.74 (moderate car) | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
114 | | [nuScenes](projects/configs/uni3detr/uni3detr_nuscenes.py) | 61.7 | [GoogleDrive](https://drive.google.com/drive/folders/1ljh6quUw5gLyHbQiY68HDGtY6QLp_d6e?usp=sharing) |
115 |
116 |
--------------------------------------------------------------------------------
/docs/ovuni3detr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhenyuw16/Uni3DETR/15cb08a7ddfc2e4f0ae5a5a7b9ec6a7be8175399/docs/ovuni3detr.png
--------------------------------------------------------------------------------
/docs/uni3detr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhenyuw16/Uni3DETR/15cb08a7ddfc2e4f0ae5a5a7b9ec6a7be8175399/docs/uni3detr.png
--------------------------------------------------------------------------------
/extra_tools/analysis_tools/eval_metric.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import os
4 | import mmcv
5 | from mmcv import Config, DictAction
6 |
7 | from mmdet3d.datasets import build_dataset
8 | from mmdet.utils import update_data_root
9 |
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser(description='Evaluate metric of the '
13 | 'results saved in pkl format')
14 | parser.add_argument('config', help='Config of the model')
15 | parser.add_argument('pkl_results', help='Results in pickle format')
16 | parser.add_argument(
17 | '--format-only',
18 | action='store_true',
19 | help='Format the output results without perform evaluation. It is'
20 | 'useful when you want to format the result to a specific format and '
21 | 'submit it to the test server')
22 | parser.add_argument(
23 | '--eval',
24 | type=str,
25 | nargs='+',
26 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
27 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
28 | parser.add_argument(
29 | '--cfg-options',
30 | nargs='+',
31 | action=DictAction,
32 | help='override some settings in the used config, the key-value pair '
33 | 'in xxx=yyy format will be merged into config file. If the value to '
34 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
35 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
36 | 'Note that the quotation marks are necessary and that no white space '
37 | 'is allowed.')
38 | parser.add_argument(
39 | '--eval-options',
40 | nargs='+',
41 | action=DictAction,
42 | help='custom options for evaluation, the key-value pair in xxx=yyy '
43 | 'format will be kwargs for dataset.evaluate() function')
44 | args = parser.parse_args()
45 | return args
46 |
47 |
48 | def main():
49 | args = parse_args()
50 |
51 | cfg = Config.fromfile(args.config)
52 |
53 | # update data root according to MMDET_DATASETS
54 | update_data_root(cfg)
55 |
56 | # import modules from plguin/xx, registry will be updated
57 | if hasattr(cfg, 'plugin'):
58 | if cfg.plugin:
59 | import importlib
60 | if hasattr(cfg, 'plugin_dir'):
61 | plugin_dir = cfg.plugin_dir
62 | _module_dir = os.path.dirname(plugin_dir)
63 | _module_dir = _module_dir.split('/')
64 | _module_path = _module_dir[0]
65 |
66 | for m in _module_dir[1:]:
67 | _module_path = _module_path + '.' + m
68 | print(_module_path)
69 | plg_lib = importlib.import_module(_module_path)
70 | else:
71 | # import dir is the dirpath for the config file
72 | _module_dir = os.path.dirname(args.config)
73 | _module_dir = _module_dir.split('/')
74 | _module_path = _module_dir[0]
75 | for m in _module_dir[1:]:
76 | _module_path = _module_path + '.' + m
77 | print(_module_path)
78 | plg_lib = importlib.import_module(_module_path)
79 |
80 | assert args.eval or args.format_only, (
81 | 'Please specify at least one operation (eval/format the results) with '
82 | 'the argument "--eval", "--format-only"')
83 | if args.eval and args.format_only:
84 | raise ValueError('--eval and --format_only cannot be both specified')
85 |
86 | if args.cfg_options is not None:
87 | cfg.merge_from_dict(args.cfg_options)
88 | cfg.data.test.test_mode = True
89 |
90 | dataset = build_dataset(cfg.data.test)
91 | outputs = mmcv.load(args.pkl_results)
92 |
93 | kwargs = {} if args.eval_options is None else args.eval_options
94 | if args.format_only:
95 | dataset.format_results(outputs, **kwargs)
96 | if args.eval:
97 | eval_kwargs = cfg.get('evaluation', {}).copy()
98 | # hard-code way to remove EvalHook args
99 | for key in [
100 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
101 | 'rule'
102 | ]:
103 | eval_kwargs.pop(key, None)
104 | eval_kwargs.update(dict(metric=args.eval, **kwargs))
105 | print(dataset.evaluate(outputs, **eval_kwargs))
106 |
107 |
108 | if __name__ == '__main__':
109 | main()
110 |
--------------------------------------------------------------------------------
/extra_tools/create_data.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | from os import path as osp
4 |
5 | from data_converter import nuscenes_converter as nuscenes_converter
6 | from data_converter.create_unified_gt_database import create_groundtruth_database
7 |
8 |
9 | def nuscenes_data_prep(root_path,
10 | info_prefix,
11 | version,
12 | dataset_name,
13 | out_dir,
14 | max_sweeps=10):
15 | """Prepare data related to nuScenes dataset.
16 |
17 | Related data consists of '.pkl' files recording basic infos,
18 | 2D annotations and groundtruth database.
19 |
20 | Args:
21 | root_path (str): Path of dataset root.
22 | info_prefix (str): The prefix of info filenames.
23 | version (str): Dataset version.
24 | dataset_name (str): The dataset class name.
25 | out_dir (str): Output directory of the groundtruth database info.
26 | max_sweeps (int): Number of input consecutive frames. Default: 10
27 | """
28 | #nuscenes_converter.create_nuscenes_infos(
29 | # root_path, info_prefix, version=version, max_sweeps=max_sweeps)
30 |
31 | if version == 'v1.0-test':
32 | # info_test_path = osp.join(root_path, f'{info_prefix}_infos_test.pkl')
33 | # nuscenes_converter.export_2d_annotation(
34 | # root_path, info_test_path, version=version)
35 | return
36 |
37 | # info_train_path = osp.join(root_path, f'{info_prefix}_infos_train.pkl')
38 | # info_val_path = osp.join(root_path, f'{info_prefix}_infos_val.pkl')
39 | # nuscenes_converter.export_2d_annotation(
40 | # root_path, info_train_path, version=version)
41 | # nuscenes_converter.export_2d_annotation(
42 | # root_path, info_val_path, version=version)
43 | create_groundtruth_database(dataset_name, root_path, info_prefix,
44 | f'{out_dir}/{info_prefix}_infos_train.pkl')
45 |
46 |
47 | parser = argparse.ArgumentParser(description='Data converter arg parser')
48 | parser.add_argument('dataset', metavar='nuscenes', help='name of the dataset')
49 | parser.add_argument(
50 | '--root-path',
51 | type=str,
52 | default='./data/nuscenes',
53 | help='specify the root path of dataset')
54 | parser.add_argument(
55 | '--version',
56 | type=str,
57 | default='v1.0',
58 | required=False,
59 | help='specify the dataset version, no need for nuscenes')
60 | parser.add_argument(
61 | '--max-sweeps',
62 | type=int,
63 | default=10,
64 | required=False,
65 | help='specify sweeps of lidar per example')
66 | parser.add_argument(
67 | '--out-dir',
68 | type=str,
69 | default='./data/nuscenes',
70 | required='False',
71 | help='name of info pkl')
72 | parser.add_argument('--extra-tag', type=str, default='nuscenes')
73 | parser.add_argument(
74 | '--workers', type=int, default=4, help='number of threads to be used')
75 | args = parser.parse_args()
76 |
77 | if __name__ == '__main__':
78 | if args.dataset == 'nuscenes' and args.version != 'v1.0-mini':
79 | train_version = f'{args.version}-trainval'
80 | nuscenes_data_prep(
81 | root_path=args.root_path,
82 | info_prefix=args.extra_tag,
83 | version=train_version,
84 | dataset_name='NuScenesSweepDataset',
85 | out_dir=args.out_dir,
86 | max_sweeps=args.max_sweeps)
87 | test_version = f'{args.version}-test'
88 | nuscenes_data_prep(
89 | root_path=args.root_path,
90 | info_prefix=args.extra_tag,
91 | version=test_version,
92 | dataset_name='NuScenesSweepDataset',
93 | out_dir=args.out_dir,
94 | max_sweeps=args.max_sweeps)
95 | elif args.dataset == 'nuscenes' and args.version == 'v1.0-mini':
96 | train_version = f'{args.version}'
97 | nuscenes_data_prep(
98 | root_path=args.root_path,
99 | info_prefix=args.extra_tag,
100 | version=train_version,
101 | dataset_name='NuScenesSweepDataset',
102 | out_dir=args.out_dir,
103 | max_sweeps=args.max_sweeps)
104 |
--------------------------------------------------------------------------------
/extra_tools/create_data.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 | export PYTHONPATH=`pwd`:$PYTHONPATH
5 |
6 | PARTITION=$1
7 | JOB_NAME=$2
8 | CONFIG=$3
9 | WORK_DIR=$4
10 | GPUS=${GPUS:-1}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-1}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | JOB_NAME=create_data
14 |
15 | srun -p ${PARTITION} \
16 | --job-name=${JOB_NAME} \
17 | --gres=gpu:${GPUS_PER_NODE} \
18 | --ntasks=${GPUS} \
19 | --ntasks-per-node=${GPUS_PER_NODE} \
20 | --kill-on-bad-exit=1 \
21 | ${SRUN_ARGS} \
22 | python3 -u tools/create_data.py kitti \
23 | --root-path ./data/kitti \
24 | --out-dir ./data/kitti \
25 | --extra-tag kitti
26 |
--------------------------------------------------------------------------------
/extra_tools/data_converter/create_unified_gt_database.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import mmcv
3 | import numpy as np
4 | import pickle
5 | import argparse
6 | import os
7 | import importlib
8 |
9 | from mmcv import track_iter_progress
10 | from os import path as osp
11 |
12 | from mmdet3d.core.bbox import box_np_ops as box_np_ops
13 | from mmdet3d.datasets import build_dataset
14 | from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
15 |
16 |
17 | def create_groundtruth_database(dataset_class_name,
18 | data_path,
19 | info_prefix,
20 | info_path=None,
21 | used_classes=None,
22 | database_save_path=None,
23 | db_info_save_path=None,
24 | with_mask=False):
25 | """Given the raw data, generate the ground truth database.
26 |
27 | Args:
28 | dataset_class_name (str): Name of the input dataset.
29 | data_path (str): Path of the data.
30 | info_prefix (str): Prefix of the info file.
31 | info_path (str): Path of the info file.
32 | Default: None.
33 | mask_anno_path (str): Path of the mask_anno.
34 | Default: None.
35 | used_classes (list[str]): Classes have been used.
36 | Default: None.
37 | database_save_path (str): Path to save database.
38 | Default: None.
39 | db_info_save_path (str): Path to save db_info.
40 | Default: None.
41 | relative_path (bool): Whether to use relative path.
42 | Default: True.
43 | with_mask (bool): Whether to use mask.
44 | Default: False.
45 | """
46 | print(f'Create GT Database of {dataset_class_name}')
47 | dataset_cfg = dict(
48 | type=dataset_class_name, data_root=data_path, ann_file=info_path, return_gt_info=True)
49 | if dataset_class_name == 'NuScenesSweepDataset':
50 | dataset_cfg.update(
51 | use_valid_flag=True,
52 | pipeline=[
53 | dict(
54 | type='LoadPointsFromFile',
55 | coord_type='LIDAR',
56 | load_dim=5,
57 | use_dim=5),
58 | dict(
59 | type='LoadPointsFromMultiSweeps',
60 | sweeps_num=10,
61 | use_dim=[0, 1, 2, 3, 4],
62 | pad_empty_sweeps=True,
63 | remove_close=True),
64 | dict(
65 | type='LoadAnnotations3D',
66 | with_bbox_3d=True,
67 | with_label_3d=True)
68 | ])
69 |
70 | dataset = build_dataset(dataset_cfg)
71 |
72 | if database_save_path is None:
73 | database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')
74 | if db_info_save_path is None:
75 | db_info_save_path = osp.join(data_path,
76 | f'{info_prefix}_dbinfos_train.pkl')
77 | database_pts_path = osp.join(database_save_path, 'pts_dir')
78 | database_img_path = osp.join(database_save_path, 'img_dir')
79 | mmcv.mkdir_or_exist(database_save_path)
80 | mmcv.mkdir_or_exist(database_pts_path)
81 | mmcv.mkdir_or_exist(database_img_path)
82 | all_db_infos = dict()
83 |
84 | group_counter = 0
85 | for j in track_iter_progress(list(range(len(dataset)))):
86 |
87 | input_dict = dataset.get_data_info(j)
88 | dataset.pre_pipeline(input_dict)
89 | example = dataset.pipeline(input_dict)
90 | annos = example['ann_info']
91 | image_idx = example['sample_idx']
92 | points = example['points'].tensor.numpy()
93 | gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
94 | names = annos['gt_names']
95 | group_dict = dict()
96 | if 'group_ids' in annos:
97 | group_ids = annos['group_ids']
98 | else:
99 | group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
100 | difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
101 | if 'difficulty' in annos:
102 | difficulty = annos['difficulty']
103 |
104 | num_obj = gt_boxes_3d.shape[0]
105 | point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
106 |
107 | # load multi-view image
108 | input_img = {}
109 | input_info = {}
110 | for _cam in example['info']['cams']:
111 | cam_info = example['info']['cams'][_cam]
112 | _path = cam_info['data_path']
113 | _img = mmcv.imread(_path, 'unchanged')
114 | input_img[_cam] = _img
115 |
116 | # obtain lidar to image transformation matrix
117 | lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
118 | lidar2cam_t = cam_info[
119 | 'sensor2lidar_translation'] @ lidar2cam_r.T
120 | lidar2cam_rt = np.eye(4)
121 | lidar2cam_rt[:3, :3] = lidar2cam_r.T
122 | lidar2cam_rt[3, :3] = -lidar2cam_t
123 | intrinsic = cam_info['cam_intrinsic']
124 | viewpad = np.eye(4)
125 | viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
126 | lidar2img_rt = (viewpad @ lidar2cam_rt.T)
127 |
128 | input_info[_cam]={
129 | 'lidar2img': lidar2img_rt,
130 | 'lidar2cam': lidar2cam_rt,
131 | 'cam_intrinsic': viewpad}
132 |
133 | for i in range(num_obj):
134 | pts_filename = f'{image_idx}_{names[i]}_{i}.bin'
135 | img_filename = f'{image_idx}_{names[i]}_{i}.png'
136 | abs_filepath = osp.join(database_pts_path, pts_filename)
137 | abs_img_filepath = osp.join(database_img_path, img_filename)
138 | rel_filepath = osp.join(f'{info_prefix}_gt_database', 'pts_dir', pts_filename)
139 | rel_img_filepath = osp.join(f'{info_prefix}_gt_database', 'img_dir', img_filename)
140 |
141 | # save point clouds and image patches for each object
142 | gt_points = points[point_indices[:, i]]
143 | gt_points[:, :3] -= gt_boxes_3d[i, :3]
144 |
145 | # with open(abs_filepath, 'w') as f:
146 | # gt_points.tofile(f)
147 |
148 | img_crop, crop_key, crop_depth = find_img_crop(annos['gt_bboxes_3d'][i].corners.numpy(), input_img, input_info, points[point_indices[:, i]])
149 | if img_crop is not None:
150 | mmcv.imwrite(img_crop, abs_img_filepath)
151 |
152 | if (used_classes is None) or names[i] in used_classes:
153 | db_info = {
154 | 'name': names[i],
155 | 'path': rel_filepath,
156 | 'image_idx': image_idx,
157 | 'image_path': rel_img_filepath if img_crop is not None else '',
158 | 'image_crop_key': crop_key if img_crop is not None else '',
159 | 'image_crop_depth': crop_depth,
160 | 'gt_idx': i,
161 | 'box3d_lidar': gt_boxes_3d[i],
162 | 'num_points_in_gt': gt_points.shape[0],
163 | 'difficulty': difficulty[i],
164 | }
165 | local_group_id = group_ids[i]
166 | # if local_group_id >= 0:
167 | if local_group_id not in group_dict:
168 | group_dict[local_group_id] = group_counter
169 | group_counter += 1
170 | db_info['group_id'] = group_dict[local_group_id]
171 | if 'score' in annos:
172 | db_info['score'] = annos['score'][i]
173 | if names[i] in all_db_infos:
174 | all_db_infos[names[i]].append(db_info)
175 | else:
176 | all_db_infos[names[i]] = [db_info]
177 |
178 | for k, v in all_db_infos.items():
179 | print(f'load {len(v)} {k} database infos')
180 |
181 | with open(db_info_save_path, 'wb') as f:
182 | pickle.dump(all_db_infos, f)
183 |
184 |
185 | def find_img_crop(gt_boxes_3d, input_img, input_info, points):
186 | coord_3d = np.concatenate([gt_boxes_3d, np.ones_like(gt_boxes_3d[..., :1])], -1)
187 | coord_3d = coord_3d.squeeze(0)
188 | max_crop, crop_key = None, None
189 | crop_area, crop_depth = 0, 0
190 |
191 | for _key in input_img:
192 | lidar2img = np.array(input_info[_key]['lidar2img'])
193 | coord_img = coord_3d @ lidar2img.T
194 | coord_img[:,:2] /= coord_img[:,2,None]
195 | image_shape = input_img[_key].shape
196 | if (coord_img[2] <= 0).any():
197 | continue
198 |
199 | avg_depth = coord_img[:,2].mean()
200 | minxy = np.min(coord_img[:,:2], axis=-2)
201 | maxxy = np.max(coord_img[:,:2], axis=-2)
202 | bbox = np.concatenate([minxy, maxxy], axis=-1)
203 | bbox[0::2] = np.clip(bbox[0::2], a_min=0, a_max=image_shape[1]-1)
204 | bbox[1::2] = np.clip(bbox[1::2], a_min=0, a_max=image_shape[0]-1)
205 | bbox = bbox.astype(int)
206 | if ((bbox[2:]-bbox[:2]) <= 10).any():
207 | continue
208 |
209 | img_crop = input_img[_key][bbox[1]:bbox[3],bbox[0]:bbox[2]]
210 | if img_crop.shape[0] * img_crop.shape[1] > crop_area:
211 | max_crop = img_crop
212 | crop_key = _key
213 | crop_depth = avg_depth
214 |
215 | return max_crop, crop_key, crop_depth
216 |
217 |
218 | if __name__ == '__main__':
219 | parser = argparse.ArgumentParser(description='Data converter arg parser')
220 | parser.add_argument(
221 | '--dataset',
222 | type=str,
223 | default='NuScenesSweepDataset',
224 | required=False,
225 | help='specify dataset name')
226 | parser.add_argument(
227 | '--root-path',
228 | type=str,
229 | default='./data/nuscenes',
230 | help='specify the root path of dataset')
231 | parser.add_argument(
232 | '--version',
233 | type=str,
234 | default='v1.0',
235 | required=False,
236 | help='specify the dataset version, no need for kitti')
237 | parser.add_argument(
238 | '--out-dir',
239 | type=str,
240 | default='./data/nuscenes',
241 | required=False,
242 | help='output data dir')
243 | parser.add_argument(
244 | '--info-path',
245 | type=str,
246 | default='./data/nuscenes/nuscenes_img_pro_infos_train.pkl',
247 | required=False,
248 | help='name of info pkl')
249 | parser.add_argument('--extra-tag', type=str, default='nuscenes_unified')
250 | args = parser.parse_args()
251 |
252 | plugin_dir = 'projects/mmdet3d_plugin/'
253 | _module_dir = os.path.dirname(plugin_dir)
254 | _module_dir = _module_dir.split('/')
255 | _module_path = _module_dir[0]
256 |
257 | for m in _module_dir[1:]:
258 | _module_path = _module_path + '.' + m
259 | print(_module_path)
260 | plg_lib = importlib.import_module(_module_path)
261 |
262 | create_groundtruth_database(args.dataset, args.root_path, args.extra_tag,
263 | args.info_path)
--------------------------------------------------------------------------------
/extra_tools/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | CHECKPOINT=$2
5 | GPUS=$3
6 | PORT=${PORT:-29503}
7 |
8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
9 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 |
--------------------------------------------------------------------------------
/extra_tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | GPUS=$2
5 | PORT=${PORT:-29501}
6 |
7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
8 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 |
--------------------------------------------------------------------------------
/extra_tools/eval_metric.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import os
4 | import mmcv
5 | from mmcv import Config, DictAction
6 |
7 | from mmdet3d.datasets import build_dataset
8 | from mmdet.utils import update_data_root
9 |
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser(description='Evaluate metric of the '
13 | 'results saved in pkl format')
14 | parser.add_argument('config', help='Config of the model')
15 | parser.add_argument('pkl_results', help='Results in pickle format')
16 | parser.add_argument(
17 | '--format-only',
18 | action='store_true',
19 | help='Format the output results without perform evaluation. It is'
20 | 'useful when you want to format the result to a specific format and '
21 | 'submit it to the test server')
22 | parser.add_argument(
23 | '--eval',
24 | type=str,
25 | nargs='+',
26 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
27 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
28 | parser.add_argument(
29 | '--cfg-options',
30 | nargs='+',
31 | action=DictAction,
32 | help='override some settings in the used config, the key-value pair '
33 | 'in xxx=yyy format will be merged into config file. If the value to '
34 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
35 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
36 | 'Note that the quotation marks are necessary and that no white space '
37 | 'is allowed.')
38 | parser.add_argument(
39 | '--eval-options',
40 | nargs='+',
41 | action=DictAction,
42 | help='custom options for evaluation, the key-value pair in xxx=yyy '
43 | 'format will be kwargs for dataset.evaluate() function')
44 | args = parser.parse_args()
45 | return args
46 |
47 |
48 | def main():
49 | args = parse_args()
50 |
51 | cfg = Config.fromfile(args.config)
52 |
53 | # update data root according to MMDET_DATASETS
54 | update_data_root(cfg)
55 |
56 | import importlib
57 | if hasattr(cfg, 'plugin_dir'):
58 | plugin_dir = cfg.plugin_dir
59 | _module_dir = os.path.dirname(plugin_dir)
60 | _module_dir = _module_dir.split('/')
61 | _module_path = _module_dir[0]
62 |
63 | for m in _module_dir[1:]:
64 | _module_path = _module_path + '.' + m
65 | print(_module_path)
66 | plg_lib = importlib.import_module(_module_path)
67 |
68 | assert args.eval or args.format_only, (
69 | 'Please specify at least one operation (eval/format the results) with '
70 | 'the argument "--eval", "--format-only"')
71 | if args.eval and args.format_only:
72 | raise ValueError('--eval and --format_only cannot be both specified')
73 |
74 | if args.cfg_options is not None:
75 | cfg.merge_from_dict(args.cfg_options)
76 | cfg.data.test.test_mode = True
77 |
78 | dataset = build_dataset(cfg.data.test)
79 | outputs = mmcv.load(args.pkl_results)
80 |
81 | kwargs = {} if args.eval_options is None else args.eval_options
82 | if args.format_only:
83 | dataset.format_results(outputs, **kwargs)
84 | if args.eval:
85 | eval_kwargs = cfg.get('evaluation', {}).copy()
86 | # hard-code way to remove EvalHook args
87 | for key in [
88 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
89 | 'rule'
90 | ]:
91 | eval_kwargs.pop(key, None)
92 | eval_kwargs.update(dict(metric=args.eval, **kwargs))
93 | print(dataset.evaluate(outputs, **eval_kwargs))
94 |
95 |
96 | if __name__ == '__main__':
97 | main()
98 |
--------------------------------------------------------------------------------
/extra_tools/get_flops.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import os
4 | import torch
5 | from mmcv import Config, DictAction
6 |
7 | from mmdet3d.models import build_model
8 |
9 | try:
10 | from mmcv.cnn import get_model_complexity_info
11 | except ImportError:
12 | raise ImportError('Please upgrade mmcv to >0.6.2')
13 |
14 |
15 | def parse_args():
16 | parser = argparse.ArgumentParser(description='Train a detector')
17 | parser.add_argument('config', help='train config file path')
18 | parser.add_argument(
19 | '--shape',
20 | type=int,
21 | nargs='+',
22 | default=[40000, 5],
23 | help='input point cloud size')
24 | parser.add_argument(
25 | '--modality',
26 | type=str,
27 | default='point',
28 | choices=['point', 'image', 'multi'],
29 | help='input data modality')
30 | parser.add_argument(
31 | '--cfg-options',
32 | nargs='+',
33 | action=DictAction,
34 | help='override some settings in the used config, the key-value pair '
35 | 'in xxx=yyy format will be merged into config file. If the value to '
36 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
37 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
38 | 'Note that the quotation marks are necessary and that no white space '
39 | 'is allowed.')
40 | args = parser.parse_args()
41 | return args
42 |
43 |
44 | def main():
45 |
46 | args = parse_args()
47 |
48 | if args.modality == 'point':
49 | assert len(args.shape) == 2, 'invalid input shape'
50 | input_shape = tuple(args.shape)
51 | elif args.modality == 'image':
52 | if len(args.shape) == 1:
53 | input_shape = (3, args.shape[0], args.shape[0])
54 | elif len(args.shape) == 2:
55 | input_shape = (3, ) + tuple(args.shape)
56 | else:
57 | raise ValueError('invalid input shape')
58 | elif args.modality == 'multi':
59 | raise NotImplementedError(
60 | 'FLOPs counter is currently not supported for models with '
61 | 'multi-modality input')
62 |
63 | cfg = Config.fromfile(args.config)
64 | if args.cfg_options is not None:
65 | cfg.merge_from_dict(args.cfg_options)
66 |
67 | if hasattr(cfg, 'plugin'):
68 | if cfg.plugin:
69 | import importlib
70 | if hasattr(cfg, 'plugin_dir'):
71 | plugin_dir = cfg.plugin_dir
72 | _module_dir = os.path.dirname(plugin_dir)
73 | _module_dir = _module_dir.split('/')
74 | _module_path = _module_dir[0]
75 |
76 | for m in _module_dir[1:]:
77 | _module_path = _module_path + '.' + m
78 | print(_module_path)
79 | plg_lib = importlib.import_module(_module_path)
80 | else:
81 | # import dir is the dirpath for the config file
82 | _module_dir = os.path.dirname(args.config)
83 | _module_dir = _module_dir.split('/')
84 | _module_path = _module_dir[0]
85 | for m in _module_dir[1:]:
86 | _module_path = _module_path + '.' + m
87 | print(_module_path)
88 | plg_lib = importlib.import_module(_module_path)
89 |
90 | # set cudnn_benchmark
91 | if cfg.get('cudnn_benchmark', False):
92 | torch.backends.cudnn.benchmark = True
93 |
94 | # work_dir is determined in this priority: CLI > segment in file > filename
95 | #if args.work_dir is not None:
96 | # update configs according to CLI args if args.work_dir is not None
97 |
98 | model = build_model(
99 | cfg.model,
100 | train_cfg=cfg.get('train_cfg'),
101 | test_cfg=cfg.get('test_cfg'))
102 | if torch.cuda.is_available():
103 | model.cuda()
104 | model.eval()
105 |
106 | if hasattr(model, 'forward_dummy'):
107 | model.forward = model.forward_dummy
108 | else:
109 | raise NotImplementedError(
110 | 'FLOPs counter is currently not supported for {}'.format(
111 | model.__class__.__name__))
112 |
113 | flops, params = get_model_complexity_info(model, input_shape)
114 | split_line = '=' * 30
115 | print(f'{split_line}\nInput shape: {input_shape}\n'
116 | f'Flops: {flops}\nParams: {params}\n{split_line}')
117 | print('!!!Please be cautious if you use the results in papers. '
118 | 'You may need to check if all ops are supported and verify that the '
119 | 'flops computation is correct.')
120 |
121 |
122 | if __name__ == '__main__':
123 | main()
124 |
--------------------------------------------------------------------------------
/extra_tools/test.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import mmcv
4 | import os
5 | import torch
6 | import warnings
7 | from mmcv import Config, DictAction
8 | from mmcv.cnn import fuse_conv_bn
9 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
10 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
11 | wrap_fp16_model)
12 |
13 | from mmdet3d.apis import single_gpu_test
14 | from mmdet3d.datasets import build_dataloader, build_dataset
15 | from mmdet3d.models import build_model
16 | from mmdet.apis import multi_gpu_test, set_random_seed
17 | from mmdet.datasets import replace_ImageToTensor
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(
21 | description='MMDet test (and eval) a model')
22 | parser.add_argument('config', help='test config file path')
23 | parser.add_argument('checkpoint', help='checkpoint file')
24 | parser.add_argument('--out', help='output result file in pickle format')
25 | parser.add_argument(
26 | '--fuse-conv-bn',
27 | action='store_true',
28 | help='Whether to fuse conv and bn, this will slightly increase'
29 | 'the inference speed')
30 | parser.add_argument(
31 | '--format-only',
32 | action='store_true',
33 | help='Format the output results without perform evaluation. It is'
34 | 'useful when you want to format the result to a specific format and '
35 | 'submit it to the test server')
36 | parser.add_argument(
37 | '--eval',
38 | type=str,
39 | nargs='+',
40 | help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
41 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
42 | parser.add_argument('--show', action='store_true', help='show results')
43 | parser.add_argument(
44 | '--show-dir', help='directory where results will be saved')
45 | parser.add_argument(
46 | '--gpu-collect',
47 | action='store_true',
48 | help='whether to use gpu to collect results.')
49 | parser.add_argument(
50 | '--tmpdir',
51 | help='tmp directory used for collecting results from multiple '
52 | 'workers, available when gpu-collect is not specified')
53 | parser.add_argument('--seed', type=int, default=0, help='random seed')
54 | parser.add_argument(
55 | '--deterministic',
56 | action='store_true',
57 | help='whether to set deterministic options for CUDNN backend.')
58 | parser.add_argument(
59 | '--cfg-options',
60 | nargs='+',
61 | action=DictAction,
62 | help='override some settings in the used config, the key-value pair '
63 | 'in xxx=yyy format will be merged into config file. If the value to '
64 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
65 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
66 | 'Note that the quotation marks are necessary and that no white space '
67 | 'is allowed.')
68 | parser.add_argument(
69 | '--options',
70 | nargs='+',
71 | action=DictAction,
72 | help='custom options for evaluation, the key-value pair in xxx=yyy '
73 | 'format will be kwargs for dataset.evaluate() function (deprecate), '
74 | 'change to --eval-options instead.')
75 | parser.add_argument(
76 | '--eval-options',
77 | nargs='+',
78 | action=DictAction,
79 | help='custom options for evaluation, the key-value pair in xxx=yyy '
80 | 'format will be kwargs for dataset.evaluate() function')
81 | parser.add_argument(
82 | '--launcher',
83 | choices=['none', 'pytorch', 'slurm', 'mpi'],
84 | default='none',
85 | help='job launcher')
86 | parser.add_argument('--local_rank', type=int, default=0)
87 | args = parser.parse_args()
88 | if 'LOCAL_RANK' not in os.environ:
89 | os.environ['LOCAL_RANK'] = str(args.local_rank)
90 |
91 | if args.options and args.eval_options:
92 | raise ValueError(
93 | '--options and --eval-options cannot be both specified, '
94 | '--options is deprecated in favor of --eval-options')
95 | if args.options:
96 | warnings.warn('--options is deprecated in favor of --eval-options')
97 | args.eval_options = args.options
98 | return args
99 |
100 |
101 | def main():
102 | args = parse_args()
103 |
104 | assert args.out or args.eval or args.format_only or args.show \
105 | or args.show_dir, \
106 | ('Please specify at least one operation (save/eval/format/show the '
107 | 'results / save the results) with the argument "--out", "--eval"'
108 | ', "--format-only", "--show" or "--show-dir"')
109 |
110 | if args.eval and args.format_only:
111 | raise ValueError('--eval and --format_only cannot be both specified')
112 |
113 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
114 | raise ValueError('The output file must be a pkl file.')
115 |
116 | cfg = Config.fromfile(args.config)
117 | if args.cfg_options is not None:
118 | cfg.merge_from_dict(args.cfg_options)
119 | # import modules from string list.
120 | if cfg.get('custom_imports', None):
121 | from mmcv.utils import import_modules_from_strings
122 | import_modules_from_strings(**cfg['custom_imports'])
123 |
124 | # import modules from plguin/xx, registry will be updated
125 | if hasattr(cfg, 'plugin'):
126 | if cfg.plugin:
127 | import importlib
128 | if hasattr(cfg, 'plugin_dir'):
129 | plugin_dir = cfg.plugin_dir
130 | _module_dir = os.path.dirname(plugin_dir)
131 | _module_dir = _module_dir.split('/')
132 | _module_path = _module_dir[0]
133 |
134 | for m in _module_dir[1:]:
135 | _module_path = _module_path + '.' + m
136 | print(_module_path)
137 | plg_lib = importlib.import_module(_module_path)
138 | else:
139 | # import dir is the dirpath for the config file
140 | _module_dir = os.path.dirname(args.config)
141 | _module_dir = _module_dir.split('/')
142 | _module_path = _module_dir[0]
143 | for m in _module_dir[1:]:
144 | _module_path = _module_path + '.' + m
145 | print(_module_path)
146 | plg_lib = importlib.import_module(_module_path)
147 |
148 | # set cudnn_benchmark
149 | if cfg.get('cudnn_benchmark', False):
150 | torch.backends.cudnn.benchmark = True
151 |
152 | cfg.model.pretrained = None
153 | # in case the test dataset is concatenated
154 | samples_per_gpu = 1
155 | if isinstance(cfg.data.test, dict):
156 | cfg.data.test.test_mode = True
157 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
158 | if samples_per_gpu > 1:
159 | # Replace 'ImageToTensor' to 'DefaultFormatBundle'
160 | cfg.data.test.pipeline = replace_ImageToTensor(
161 | cfg.data.test.pipeline)
162 | elif isinstance(cfg.data.test, list):
163 | for ds_cfg in cfg.data.test:
164 | ds_cfg.test_mode = True
165 | samples_per_gpu = max(
166 | [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
167 | if samples_per_gpu > 1:
168 | for ds_cfg in cfg.data.test:
169 | ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
170 |
171 | # init distributed env first, since logger depends on the dist info.
172 | if args.launcher == 'none':
173 | distributed = False
174 | else:
175 | distributed = True
176 | init_dist(args.launcher, **cfg.dist_params)
177 |
178 | # set random seeds
179 | if args.seed is not None:
180 | set_random_seed(args.seed, deterministic=args.deterministic)
181 |
182 | # build the dataloader
183 | dataset = build_dataset(cfg.data.test)
184 | data_loader = build_dataloader(
185 | dataset,
186 | samples_per_gpu=samples_per_gpu,
187 | workers_per_gpu=cfg.data.workers_per_gpu,
188 | dist=distributed,
189 | shuffle=False)
190 |
191 | # build the model and load checkpoint
192 | cfg.model.train_cfg = None
193 | model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
194 | fp16_cfg = cfg.get('fp16', None)
195 | if fp16_cfg is not None:
196 | wrap_fp16_model(model)
197 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
198 | if args.fuse_conv_bn:
199 | model = fuse_conv_bn(model)
200 | # old versions did not save class info in checkpoints, this walkaround is
201 | # for backward compatibility
202 | if 'CLASSES' in checkpoint.get('meta', {}):
203 | model.CLASSES = checkpoint['meta']['CLASSES']
204 | else:
205 | model.CLASSES = dataset.CLASSES
206 | # palette for visualization in segmentation tasks
207 | if 'PALETTE' in checkpoint.get('meta', {}):
208 | model.PALETTE = checkpoint['meta']['PALETTE']
209 | elif hasattr(dataset, 'PALETTE'):
210 | # segmentation dataset has `PALETTE` attribute
211 | model.PALETTE = dataset.PALETTE
212 |
213 | if not distributed:
214 | model = MMDataParallel(model, device_ids=[0])
215 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
216 | else:
217 | model = MMDistributedDataParallel(
218 | model.cuda(),
219 | device_ids=[torch.cuda.current_device()],
220 | broadcast_buffers=False)
221 | outputs = multi_gpu_test(model, data_loader, args.tmpdir,
222 | args.gpu_collect)
223 |
224 | rank, _ = get_dist_info()
225 | if rank == 0:
226 | if args.out:
227 | print(f'\nwriting results to {args.out}')
228 | mmcv.dump(outputs, args.out)
229 | kwargs = {} if args.eval_options is None else args.eval_options
230 | if args.format_only:
231 | dataset.format_results(outputs, **kwargs)
232 | if args.eval:
233 | eval_kwargs = cfg.get('evaluation', {}).copy()
234 | # hard-code way to remove EvalHook args
235 | for key in [
236 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
237 | 'rule'
238 | ]:
239 | eval_kwargs.pop(key, None)
240 | eval_kwargs.update(dict(metric=args.eval, **kwargs))
241 | print(dataset.evaluate(outputs, **eval_kwargs))
242 |
243 |
244 | if __name__ == '__main__':
245 | warnings.filterwarnings("ignore")
246 | torch.multiprocessing.set_start_method('fork')
247 | main()
248 |
--------------------------------------------------------------------------------
/extra_tools/train.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from __future__ import division
3 |
4 | import argparse
5 | import copy
6 | import mmcv
7 | import os
8 | import time
9 | import torch
10 | import warnings
11 | from mmcv import Config, DictAction
12 | from mmcv.runner import get_dist_info, init_dist
13 | from os import path as osp
14 |
15 | from mmdet import __version__ as mmdet_version
16 | from mmdet3d import __version__ as mmdet3d_version
17 | from mmdet3d.apis import train_model
18 | from mmdet3d.datasets import build_dataset
19 | from mmdet3d.models import build_model
20 | from mmdet3d.utils import collect_env, get_root_logger
21 | from mmdet.apis import set_random_seed
22 | from mmseg import __version__ as mmseg_version
23 |
24 |
25 | def parse_args():
26 | parser = argparse.ArgumentParser(description='Train a detector')
27 | parser.add_argument('config', help='train config file path')
28 | parser.add_argument('--work-dir', help='the dir to save logs and models')
29 | parser.add_argument(
30 | '--resume-from', help='the checkpoint file to resume from')
31 | parser.add_argument(
32 | '--no-validate',
33 | action='store_true',
34 | help='whether not to evaluate the checkpoint during training')
35 | group_gpus = parser.add_mutually_exclusive_group()
36 | group_gpus.add_argument(
37 | '--gpus',
38 | type=int,
39 | help='number of gpus to use '
40 | '(only applicable to non-distributed training)')
41 | group_gpus.add_argument(
42 | '--gpu-ids',
43 | type=int,
44 | nargs='+',
45 | help='ids of gpus to use '
46 | '(only applicable to non-distributed training)')
47 | parser.add_argument('--seed', type=int, default=0, help='random seed')
48 | parser.add_argument(
49 | '--deterministic',
50 | action='store_true',
51 | help='whether to set deterministic options for CUDNN backend.')
52 | parser.add_argument(
53 | '--options',
54 | nargs='+',
55 | action=DictAction,
56 | help='override some settings in the used config, the key-value pair '
57 | 'in xxx=yyy format will be merged into config file (deprecate), '
58 | 'change to --cfg-options instead.')
59 | parser.add_argument(
60 | '--cfg-options',
61 | nargs='+',
62 | action=DictAction,
63 | help='override some settings in the used config, the key-value pair '
64 | 'in xxx=yyy format will be merged into config file. If the value to '
65 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
66 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
67 | 'Note that the quotation marks are necessary and that no white space '
68 | 'is allowed.')
69 | parser.add_argument(
70 | '--launcher',
71 | choices=['none', 'pytorch', 'slurm', 'mpi'],
72 | default='none',
73 | help='job launcher')
74 | parser.add_argument('--local_rank', type=int, default=0)
75 | parser.add_argument(
76 | '--autoscale-lr',
77 | action='store_true',
78 | help='automatically scale lr with the number of gpus')
79 | args = parser.parse_args()
80 | if 'LOCAL_RANK' not in os.environ:
81 | os.environ['LOCAL_RANK'] = str(args.local_rank)
82 |
83 | if args.options and args.cfg_options:
84 | raise ValueError(
85 | '--options and --cfg-options cannot be both specified, '
86 | '--options is deprecated in favor of --cfg-options')
87 | if args.options:
88 | warnings.warn('--options is deprecated in favor of --cfg-options')
89 | args.cfg_options = args.options
90 |
91 | return args
92 |
93 |
94 | def main():
95 | args = parse_args()
96 |
97 | cfg = Config.fromfile(args.config)
98 | if args.cfg_options is not None:
99 | cfg.merge_from_dict(args.cfg_options)
100 | # import modules from string list.
101 | if cfg.get('custom_imports', None):
102 | from mmcv.utils import import_modules_from_strings
103 | import_modules_from_strings(**cfg['custom_imports'])
104 |
105 | # import modules from plguin/xx, registry will be updated
106 | if hasattr(cfg, 'plugin'):
107 | if cfg.plugin:
108 | import importlib
109 | if hasattr(cfg, 'plugin_dir'):
110 | plugin_dir = cfg.plugin_dir
111 | _module_dir = os.path.dirname(plugin_dir)
112 | _module_dir = _module_dir.split('/')
113 | _module_path = _module_dir[0]
114 |
115 | for m in _module_dir[1:]:
116 | _module_path = _module_path + '.' + m
117 | print(_module_path)
118 | plg_lib = importlib.import_module(_module_path)
119 | else:
120 | # import dir is the dirpath for the config file
121 | _module_dir = os.path.dirname(args.config)
122 | _module_dir = _module_dir.split('/')
123 | _module_path = _module_dir[0]
124 | for m in _module_dir[1:]:
125 | _module_path = _module_path + '.' + m
126 | print(_module_path)
127 | plg_lib = importlib.import_module(_module_path)
128 |
129 | # set cudnn_benchmark
130 | if cfg.get('cudnn_benchmark', False):
131 | torch.backends.cudnn.benchmark = True
132 |
133 | # work_dir is determined in this priority: CLI > segment in file > filename
134 | if args.work_dir is not None:
135 | # update configs according to CLI args if args.work_dir is not None
136 | cfg.work_dir = args.work_dir
137 | elif cfg.get('work_dir', None) is None:
138 | # use config filename as default work_dir if cfg.work_dir is None
139 | cfg.work_dir = osp.join('./work_dirs',
140 | osp.splitext(osp.basename(args.config))[0])
141 | if args.resume_from is not None:
142 | cfg.resume_from = args.resume_from
143 | if args.gpu_ids is not None:
144 | cfg.gpu_ids = args.gpu_ids
145 | else:
146 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
147 |
148 | if args.autoscale_lr:
149 | # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
150 | cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
151 |
152 | # init distributed env first, since logger depends on the dist info.
153 | if args.launcher == 'none':
154 | distributed = False
155 | else:
156 | distributed = True
157 | init_dist(args.launcher, **cfg.dist_params)
158 | # re-set gpu_ids with distributed training mode
159 | _, world_size = get_dist_info()
160 | cfg.gpu_ids = range(world_size)
161 |
162 | # create work_dir
163 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
164 | # dump config
165 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
166 | # init the logger before other steps
167 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
168 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
169 | # specify logger name, if we still use 'mmdet', the output info will be
170 | # filtered and won't be saved in the log_file
171 | # TODO: ugly workaround to judge whether we are training det or seg model
172 | if cfg.model.type in ['EncoderDecoder3D']:
173 | logger_name = 'mmseg'
174 | else:
175 | logger_name = 'mmdet'
176 | logger = get_root_logger(
177 | log_file=log_file, log_level=cfg.log_level, name=logger_name)
178 |
179 | # init the meta dict to record some important information such as
180 | # environment info and seed, which will be logged
181 | meta = dict()
182 | # log env info
183 | env_info_dict = collect_env()
184 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
185 | dash_line = '-' * 60 + '\n'
186 | logger.info('Environment info:\n' + dash_line + env_info + '\n' +
187 | dash_line)
188 | meta['env_info'] = env_info
189 | meta['config'] = cfg.pretty_text
190 |
191 | # log some basic info
192 | logger.info(f'Distributed training: {distributed}')
193 | logger.info(f'Config:\n{cfg.pretty_text}')
194 |
195 | # set random seeds
196 | if args.seed is not None:
197 | logger.info(f'Set random seed to {args.seed}, '
198 | f'deterministic: {args.deterministic}')
199 | set_random_seed(args.seed, deterministic=args.deterministic)
200 | cfg.seed = args.seed
201 | meta['seed'] = args.seed
202 | meta['exp_name'] = osp.basename(args.config)
203 |
204 | model = build_model(
205 | cfg.model,
206 | train_cfg=cfg.get('train_cfg'),
207 | test_cfg=cfg.get('test_cfg'))
208 | model.init_weights()
209 |
210 | logger.info(f'Model:\n{model}')
211 | datasets = [build_dataset(cfg.data.train)]
212 | if len(cfg.workflow) == 2:
213 | val_dataset = copy.deepcopy(cfg.data.val)
214 | # in case we use a dataset wrapper
215 | if 'dataset' in cfg.data.train:
216 | val_dataset.pipeline = cfg.data.train.dataset.pipeline
217 | else:
218 | val_dataset.pipeline = cfg.data.train.pipeline
219 | # set test_mode=False here in deep copied config
220 | # which do not affect AP/AR calculation later
221 | # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow # noqa
222 | val_dataset.test_mode = False
223 | datasets.append(build_dataset(val_dataset))
224 | if cfg.checkpoint_config is not None:
225 | # save mmdet version, config file content and class names in
226 | # checkpoints as meta data
227 | cfg.checkpoint_config.meta = dict(
228 | mmdet_version=mmdet_version,
229 | mmseg_version=mmseg_version,
230 | mmdet3d_version=mmdet3d_version,
231 | config=cfg.pretty_text,
232 | CLASSES=datasets[0].CLASSES,
233 | PALETTE=datasets[0].PALETTE # for segmentors
234 | if hasattr(datasets[0], 'PALETTE') else None)
235 | # add an attribute for visualization convenience
236 | model.CLASSES = datasets[0].CLASSES
237 |
238 | # for v in model.parameters():
239 | # v.requires_grad = False
240 | # for v in model.pts_backbone.parameters():
241 | # v.requires_grad = False
242 | # for v in model.pts_neck.parameters():
243 | # v.requires_grad = False
244 | # for v in model.pts_middle_encoder.parameters():
245 | # v.requires_grad = False
246 |
247 | train_model(
248 | model,
249 | datasets,
250 | cfg,
251 | distributed=distributed,
252 | validate=(not args.no_validate),
253 | timestamp=timestamp,
254 | meta=meta)
255 |
256 |
257 | if __name__ == '__main__':
258 | torch.multiprocessing.set_start_method('fork')
259 | main()
260 |
--------------------------------------------------------------------------------
/model-index.yml:
--------------------------------------------------------------------------------
1 | Import:
2 | - configs/3dssd/metafile.yml
3 | - configs/centerpoint/metafile.yml
4 | - configs/dynamic_voxelization/metafile.yml
5 | - configs/fcaf3d/metafile.yml
6 | - configs/fcos3d/metafile.yml
7 | - configs/free_anchor/metafile.yml
8 | - configs/groupfree3d/metafile.yml
9 | - configs/h3dnet/metafile.yml
10 | - configs/imvotenet/metafile.yml
11 | - configs/imvoxelnet/metafile.yml
12 | - configs/mvxnet/metafile.yml
13 | - configs/nuimages/metafile.yml
14 | - configs/parta2/metafile.yml
15 | - configs/pgd/metafile.yml
16 | - configs/pointnet2/metafile.yml
17 | - configs/pointpillars/metafile.yml
18 | - configs/regnet/metafile.yml
19 | - configs/second/metafile.yml
20 | - configs/smoke/metafile.yml
21 | - configs/ssn/metafile.yml
22 | - configs/votenet/metafile.yml
23 |
--------------------------------------------------------------------------------
/projects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhenyuw16/Uni3DETR/15cb08a7ddfc2e4f0ae5a5a7b9ec6a7be8175399/projects/__init__.py
--------------------------------------------------------------------------------
/projects/configs/ov_uni3detr/ov_uni3detr_sunrgbd_pc.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../../configs/_base_/default_runtime.py'
3 | ]
4 |
5 | plugin=True
6 | plugin_dir='projects/mmdet3d_plugin/'
7 |
8 | # If point cloud range is changed, the models should also change their point
9 | # cloud range accordingly
10 | voxel_size = [0.02, 0.02, 0.02]
11 | grid_size = [128, 320, 320]
12 | point_cloud_range = [-3.2, -0.2, -2., 3.2, 6.2, 0.56]
13 |
14 | fp16_enabled = False
15 | bev_stride = 4
16 | sample_num = 5
17 |
18 |
19 | input_modality = dict(
20 | use_lidar=True,
21 | use_camera=False,
22 | use_radar=False,
23 | use_map=False,
24 | use_external=False)
25 |
26 | model = dict(
27 | type='OV_Uni3DETR',
28 | pts_voxel_layer=dict(
29 | max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
30 | point_cloud_range=point_cloud_range),
31 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
32 | pts_middle_encoder=dict(
33 | type='SparseEncoderHD',
34 | in_channels=4,
35 | sparse_shape=grid_size,
36 | output_channels=256,
37 | order=('conv', 'norm', 'act'),
38 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
39 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
40 | block_type='basicblock',
41 | fp16_enabled=False), # not enable FP16 here
42 | pts_backbone=dict(
43 | type='SECOND3D',
44 | in_channels=[256, 256, 256],
45 | out_channels=[128, 256, 512],
46 | layer_nums=[5, 5, 5],
47 | layer_strides=[1, 2, 4],
48 | is_cascade=False,
49 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
50 | conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
51 | pts_neck=dict(
52 | type='SECOND3DFPN',
53 | in_channels=[128, 256, 512],
54 | out_channels=[256, 256, 256],
55 | upsample_strides=[1, 2, 4],
56 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
57 | upsample_cfg=dict(type='deconv3d', bias=False),
58 | extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
59 | use_conv_for_no_stride=True),
60 | pts_bbox_head=dict(
61 | type='Uni3DETRHeadCLIP',
62 | num_query=300,
63 | zeroshot_path='clip_embed/sunrgbd_clip_a+cname_rn50_manyprompt_46c_coda.npy',
64 | num_classes=46,
65 | in_channels=256,
66 | sync_cls_avg_factor=True,
67 | with_box_refine=True,
68 | as_two_stage=False,
69 | code_size=8,
70 | transformer=dict(
71 | type='Uni3DETRTransformer',
72 | fp16_enabled=fp16_enabled,
73 | decoder=dict(
74 | type='Uni3DETRTransformerDecoder',
75 | num_layers=3,
76 | return_intermediate=True,
77 | transformerlayers=dict(
78 | type='BaseTransformerLayer',
79 | attn_cfgs=[
80 | dict(
81 | type='MultiheadAttention',
82 | embed_dims=256,
83 | num_heads=8,
84 | dropout=0.1),
85 | dict(
86 | type='UniCrossAtten',
87 | num_points=1,
88 | embed_dims=256,
89 | num_sweeps=1,
90 | fp16_enabled=fp16_enabled),
91 | ],
92 | ffn_cfgs=dict(
93 | type='FFN',
94 | embed_dims=256,
95 | feedforward_channels=512,
96 | num_fcs=2,
97 | ffn_drop=0.1,
98 | act_cfg=dict(type='ReLU', inplace=True),
99 | ),
100 | norm_cfg=dict(type='LN'),
101 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
102 | )
103 | ),
104 | bbox_coder=dict(
105 | type='NMSFreeCoder',
106 | post_center_range=point_cloud_range,
107 | pc_range=point_cloud_range,
108 | max_num=1000,
109 | voxel_size=voxel_size,
110 | alpha=1.0,
111 | num_classes=46),
112 | post_processing=dict(
113 | type='nms',
114 | nms_thr=0.5),
115 | ######## soft nms can generate a little higher result
116 | # post_processing=dict(
117 | # type='soft_nms',
118 | # gaussian_sigma=0.3,
119 | # prune_threshold=1e-2),
120 | positional_encoding=dict(
121 | type='SinePositionalEncoding',
122 | num_feats=128,
123 | normalize=True,
124 | offset=-0.5),
125 | loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
126 | loss_bbox=dict(type='L1Loss', loss_weight=0.25),
127 | loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
128 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
129 | ),
130 | # model training and testing settings
131 | train_cfg=dict(pts=dict(
132 | grid_size=grid_size,
133 | voxel_size=voxel_size,
134 | point_cloud_range=point_cloud_range,
135 | out_size_factor=bev_stride,
136 | assigner=dict(
137 | type='HungarianAssigner3D',
138 | cls_cost=dict(type='FocalLossCost', weight=2.0),
139 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
140 | iou_cost=dict(type='IoU3DCost', weight=1.2),
141 | pc_range=point_cloud_range))))
142 |
143 |
144 | dataset_type = 'SUNRGBDDataset_OV'
145 | data_root = 'data/sunrgbd_coda/'
146 | class_names = ('chair', 'table', 'pillow', 'sofa_chair', 'desk', 'bed', 'sofa', 'computer', 'box',
147 | 'lamp', 'garbage_bin', 'cabinet', 'shelf', 'drawer', 'sink', 'night_stand', 'kitchen_counter',
148 | 'paper', 'end_table', 'kitchen_cabinet', 'picture', 'book', 'stool', 'coffee_table', 'bookshelf',
149 | 'painting', 'key_board', 'dresser', 'tv', 'whiteboard', 'cpu', 'toilet', 'file_cabinet', 'bench',
150 | 'ottoman', 'plant', 'monitor', 'printer', 'recycle_bin', 'door', 'fridge', 'towel', 'cup', 'mirror',
151 | 'laptop', 'cloth')
152 |
153 | seen_classes = ('chair', 'table', 'pillow', 'sofa_chair', 'desk', 'bed', 'sofa', 'computer', 'lamp', 'box')
154 |
155 | file_client_args = dict(backend='disk')
156 |
157 | train_pipeline = [
158 | dict(
159 | type='LoadPointsFromFile',
160 | coord_type='DEPTH',
161 | shift_height=True,
162 | load_dim=6,
163 | use_dim=[0, 1, 2],
164 | file_client_args=file_client_args),
165 | dict(type='LoadAnnotations3D'),
166 | dict(
167 | type='UnifiedRandomFlip3D',
168 | sync_2d=False,
169 | flip_ratio_bev_horizontal=0.5,
170 | ),
171 | dict(
172 | type='UnifiedRotScaleTrans',
173 | rot_range=[-0.523599, 0.523599],
174 | scale_ratio_range=[0.85, 1.15],
175 | shift_height=True),
176 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
177 | # dict(type='PointSample', num_points=20000),
178 | dict(type='PointSample', num_points=200000),
179 | dict(type='DefaultFormatBundle3D', class_names=class_names),
180 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
181 | ]
182 | test_pipeline = [
183 | dict(
184 | type='LoadPointsFromFile',
185 | coord_type='DEPTH',
186 | shift_height=True,
187 | load_dim=6,
188 | use_dim=[0, 1, 2],
189 | file_client_args=file_client_args),
190 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
191 | # dict(type='PointSample', num_points=50000),
192 | dict(type='PointSample', num_points=200000),
193 | dict(
194 | type='DefaultFormatBundle3D',
195 | class_names=class_names,
196 | with_label=False),
197 | dict(type='Collect3D', keys=['points'])
198 | ]
199 |
200 | data = dict(
201 | samples_per_gpu=8,
202 | workers_per_gpu=4,
203 | train=dict(
204 | type='RepeatDataset',
205 | times=2, #######5
206 | dataset=dict(
207 | type=dataset_type,
208 | data_root=data_root,
209 | ann_file=data_root + 'sunrgbd_infos_train_pls_ens_10c36c.pkl',
210 | pipeline=train_pipeline,
211 | classes=class_names,
212 | seen_classes=seen_classes,
213 | filter_empty_gt=True,
214 | box_type_3d='Depth',
215 | file_client_args=file_client_args)),
216 | val=dict(
217 | type=dataset_type,
218 | data_root=data_root,
219 | ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
220 | pipeline=test_pipeline,
221 | classes=class_names,
222 | seen_classes=seen_classes,
223 | test_mode=True,
224 | box_type_3d='Depth',
225 | file_client_args=file_client_args),
226 | test=dict(
227 | type=dataset_type,
228 | data_root=data_root,
229 | ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
230 | pipeline=test_pipeline,
231 | classes=class_names,
232 | seen_classes=seen_classes,
233 | test_mode=True,
234 | box_type_3d='Depth',
235 | file_client_args=file_client_args))
236 |
237 | evaluation = dict(pipeline=test_pipeline, interval=5)
238 |
239 |
240 | # optimizer
241 | # This schedule is mainly used by models on indoor dataset,
242 | # e.g., VoteNet on SUNRGBD and ScanNet
243 | lr = 2e-5 *2/8 * 40 # max learning rate
244 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
245 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
246 |
247 |
248 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
249 | runner = dict(type='EpochBasedRunner', max_epochs=40)
250 |
251 | # fp16 setting
252 | # fp16 = dict(loss_scale=32.)
253 | find_unused_parameters = True
254 |
--------------------------------------------------------------------------------
/projects/configs/ov_uni3detr/ov_uni3detr_sunrgbd_rgb.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../../configs/_base_/default_runtime.py'
3 | ]
4 |
5 | plugin=True
6 | plugin_dir='projects/mmdet3d_plugin/'
7 |
8 | # If point cloud range is changed, the models should also change their point
9 | # cloud range accordingly
10 | voxel_size = [0.02, 0.02, 0.02]
11 | grid_size = [128, 320, 320]
12 | point_cloud_range = [-3.2, -0.2, -2., 3.2, 6.2, 0.56]
13 |
14 |
15 | cam_sweep_num = 1
16 | fp16_enabled = False
17 | bev_stride = 8
18 | sample_num = 15
19 | voxel_shape = [int(((point_cloud_range[3]-point_cloud_range[0])/voxel_size[0])//bev_stride),
20 | int(((point_cloud_range[4]-point_cloud_range[1])/voxel_size[1])//bev_stride),
21 | sample_num]
22 |
23 |
24 | input_modality = dict(
25 | use_lidar=False,
26 | use_camera=True,
27 | use_radar=False,
28 | use_map=False,
29 | use_external=False,
30 | cam_sweep_num=cam_sweep_num)
31 |
32 | model = dict(
33 | type='OV_Uni3DETR',
34 | use_grid_mask=True,
35 | img_backbone=dict(
36 | type='ResNet',
37 | depth=50,
38 | num_stages=4,
39 | out_indices=(0, 1, 2, 3),
40 | frozen_stages=1,
41 | norm_cfg=dict(type='BN', requires_grad=True),
42 | norm_eval=True,
43 | style='pytorch',
44 | dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
45 | stage_with_dcn=(False, True, True, True)
46 | ),
47 | img_neck=dict(
48 | type='FPN',
49 | in_channels=[256, 512, 1024, 2048],
50 | out_channels=256,
51 | num_outs=5,
52 | ),
53 | depth_head=dict(
54 | type='SimpleDepth',
55 | model=dict(
56 | depth_dim=64,
57 | )),
58 | view_cfg=dict(
59 | num_cams=1,
60 | num_convs=3,
61 | num_points=sample_num,
62 | num_sweeps=cam_sweep_num,
63 | kernel_size=(3,3,3),
64 | keep_sweep_dim=True,
65 | num_feature_levels=4,
66 | embed_dims=256,
67 | pc_range=point_cloud_range,
68 | voxel_shape=voxel_shape,
69 | fp16_enabled=fp16_enabled,
70 | ),
71 | pts_bbox_head=dict(
72 | type='Uni3DETRHeadCLIP',
73 | num_query=300,
74 | zeroshot_path='clip_embed/sunrgbd_clip_a+cname_rn50_manyprompt_46c_coda.npy',
75 | num_classes=46,
76 | in_channels=256,
77 | sync_cls_avg_factor=True,
78 | with_box_refine=True,
79 | as_two_stage=False,
80 | code_size=8,
81 | transformer=dict(
82 | type='Uni3DETRTransformer',
83 | fp16_enabled=fp16_enabled,
84 | decoder=dict(
85 | type='Uni3DETRTransformerDecoder',
86 | num_layers=6,
87 | return_intermediate=True,
88 | transformerlayers=dict(
89 | type='BaseTransformerLayer',
90 | attn_cfgs=[
91 | dict(
92 | type='MultiheadAttention',
93 | embed_dims=256,
94 | num_heads=8,
95 | dropout=0.1),
96 | dict(
97 | type='UniCrossAtten',
98 | num_points=1,
99 | embed_dims=256,
100 | num_sweeps=cam_sweep_num,
101 | fp16_enabled=fp16_enabled)
102 | ],
103 | ffn_cfgs=dict(
104 | type='FFN',
105 | embed_dims=256,
106 | feedforward_channels=512,
107 | num_fcs=2,
108 | ffn_drop=0.1,
109 | act_cfg=dict(type='ReLU', inplace=True),
110 | ),
111 | norm_cfg=dict(type='LN'),
112 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
113 | 'ffn', 'norm'))
114 | )
115 | ),
116 | bbox_coder=dict(
117 | type='NMSFreeCoder',
118 | post_center_range=point_cloud_range,
119 | pc_range=point_cloud_range,
120 | max_num=1000,
121 | voxel_size=voxel_size,
122 | alpha=1.0,
123 | num_classes=46
124 | ),
125 | post_processing=dict(
126 | type='nms',
127 | nms_thr=0.5),
128 | positional_encoding=dict(
129 | type='SinePositionalEncoding',
130 | num_feats=128,
131 | normalize=True,
132 | offset=-0.5),
133 | loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
134 | loss_bbox=dict(type='L1Loss', loss_weight=0.25),
135 | loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
136 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]),
137 | # model training and testing settings
138 | train_cfg=dict(pts=dict(
139 | grid_size=grid_size,
140 | voxel_size=voxel_size,
141 | point_cloud_range=point_cloud_range,
142 | out_size_factor=bev_stride,
143 | assigner=dict(
144 | type='HungarianAssigner3D',
145 | cls_cost=dict(type='FocalLossCost', weight=2.0),
146 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
147 | iou_cost=dict(type='IoU3DCost', weight=1.2), # Fake cost. This is just to make it compatible with DETR head.
148 | pc_range=point_cloud_range))))
149 |
150 | dataset_type = 'SUNRGBDDataset'
151 | data_root = 'data/sunrgbd_coda/'
152 |
153 | # img_norm_cfg = dict(
154 | # mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
155 | img_norm_cfg = dict(
156 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
157 |
158 | class_names = ('chair', 'table', 'pillow', 'sofa_chair', 'desk', 'bed', 'sofa', 'computer', 'box',
159 | 'lamp', 'garbage_bin', 'cabinet', 'shelf', 'drawer', 'sink', 'night_stand', 'kitchen_counter',
160 | 'paper', 'end_table', 'kitchen_cabinet', 'picture', 'book', 'stool', 'coffee_table', 'bookshelf',
161 | 'painting', 'key_board', 'dresser', 'tv', 'whiteboard', 'cpu', 'toilet', 'file_cabinet', 'bench',
162 | 'ottoman', 'plant', 'monitor', 'printer', 'recycle_bin', 'door', 'fridge', 'towel', 'cup', 'mirror',
163 | 'laptop', 'cloth')
164 |
165 |
166 | file_client_args = dict(backend='disk')
167 |
168 |
169 | train_pipeline = [
170 | dict(type='LoadMultiViewMultiSweepImageFromFilesIndoor', sweep_num=cam_sweep_num, to_float32=True),
171 | dict(type='PhotoMetricDistortionMultiViewImage'),
172 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
173 | dict(
174 | type='UnifiedRotScaleTrans',
175 | rot_range=[-0.3925, 0.3925],
176 | scale_ratio_range=[0.95, 1.05],
177 | ),
178 | dict(type='NormalizeMultiviewImage', **img_norm_cfg),
179 | dict(type='PadMultiViewImage', size_divisor=32),
180 | dict(type='DefaultFormatBundle3D', class_names=class_names),
181 | dict(type='CollectUnified3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'])
182 | ]
183 | test_pipeline = [
184 | dict(type='LoadMultiViewMultiSweepImageFromFilesIndoor', sweep_num=cam_sweep_num, to_float32=True),
185 | dict(type='NormalizeMultiviewImage', **img_norm_cfg),
186 | dict(type='PadMultiViewImage', size_divisor=32),
187 | dict(type='DefaultFormatBundle3D', class_names=class_names),
188 | dict(type='CollectUnified3D', keys=['img'])
189 | ]
190 |
191 |
192 |
193 | data = dict(
194 | samples_per_gpu=2,
195 | workers_per_gpu=4,
196 | train=dict(
197 | type='RepeatDataset',
198 | times=2, #######5
199 | dataset=dict(
200 | type=dataset_type,
201 | data_root=data_root,
202 | ann_file = data_root + 'sunrgbd_infos_train_pls_ens_10c36c.pkl',
203 | pipeline=train_pipeline,
204 | classes=class_names,
205 | filter_empty_gt=True,
206 | box_type_3d='Depth',
207 | file_client_args=file_client_args)),
208 | val=dict(
209 | type=dataset_type,
210 | data_root=data_root,
211 | ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
212 | pipeline=test_pipeline,
213 | classes=class_names,
214 | test_mode=True,
215 | box_type_3d='Depth',
216 | file_client_args=file_client_args),
217 | test=dict(
218 | type=dataset_type,
219 | data_root=data_root,
220 | ann_file=data_root + 'sunrgbd_infos_val_withimg.pkl',
221 | pipeline=test_pipeline,
222 | classes=class_names,
223 | test_mode=True,
224 | box_type_3d='Depth',
225 | file_client_args=file_client_args))
226 |
227 | evaluation = dict(pipeline=test_pipeline, interval=5)
228 |
229 |
230 | # optimizer
231 | # This schedule is mainly used by models on indoor dataset,
232 | # e.g., VoteNet on SUNRGBD and ScanNet
233 | optimizer = dict(
234 | type='AdamW',
235 | lr=1.75e-4,
236 | # lr=2e-4,
237 | paramwise_cfg=dict(
238 | custom_keys={
239 | 'img_backbone': dict(lr_mult=0.1),
240 | }),
241 | weight_decay=0.01)
242 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
243 |
244 |
245 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
246 |
247 | # runtime settings
248 | runner = dict(type='EpochBasedRunner', max_epochs=40)
249 |
250 | # fp16 setting
251 | # fp16 = dict(loss_scale=32.)
252 | load_from = 'faster_rcnn_r50_caffe_fpn_1x_coco_dcnv2_c.pth'
253 |
254 | find_unused_parameters = True
255 |
--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_kitti_3classes.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../../configs/_base_/default_runtime.py'
3 | ]
4 |
5 | plugin=True
6 | plugin_dir='projects/mmdet3d_plugin/'
7 |
8 | # If point cloud range is changed, the models should also change their point
9 | # cloud range accordingly
10 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
11 | voxel_size = [0.05, 0.05, 0.1]
12 | fp16_enabled = True
13 | bev_stride = 4
14 | sample_num = 5
15 | # For nuScenes we usually do 10-class detection
16 | class_names = ['Pedestrian', 'Cyclist', 'Car']
17 |
18 | input_modality = dict(
19 | use_lidar=True,
20 | use_camera=False,
21 | use_radar=False,
22 | use_map=False,
23 | use_external=False)
24 |
25 | use_dab = True
26 |
27 | model = dict(
28 | type='Uni3DETR',
29 | pts_voxel_layer=dict(
30 | max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
31 | point_cloud_range=point_cloud_range),
32 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
33 | pts_middle_encoder=dict(
34 | type='SparseEncoderHD',
35 | in_channels=4,
36 | sparse_shape=[41, 1600, 1408],
37 | output_channels=256,
38 | order=('conv', 'norm', 'act'),
39 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
40 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
41 | block_type='basicblock',
42 | fp16_enabled=False), # not enable FP16 here
43 | pts_backbone=dict(
44 | type='SECOND3D',
45 | in_channels=[256, 256, 256],
46 | out_channels=[128, 256, 512],
47 | layer_nums=[5, 5, 5],
48 | layer_strides=[1, 2, 4],
49 | is_cascade=False,
50 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
51 | conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
52 | pts_neck=dict(
53 | type='SECOND3DFPN',
54 | in_channels=[128, 256, 512],
55 | out_channels=[256, 256, 256],
56 | upsample_strides=[1, 2, 4],
57 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
58 | upsample_cfg=dict(type='deconv3d', bias=False),
59 | extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
60 | use_conv_for_no_stride=True),
61 | pts_bbox_head=dict(
62 | type='Uni3DETRHead',
63 | # transformer_cfg
64 | num_query=300,
65 | num_classes=3,
66 | in_channels=256,
67 | sync_cls_avg_factor=True,
68 | with_box_refine=True,
69 | as_two_stage=False,
70 | code_size=8,
71 | gt_repeattimes=5,
72 | transformer=dict(
73 | type='Uni3DETRTransformer',
74 | fp16_enabled=fp16_enabled,
75 | decoder=dict(
76 | type='Uni3DETRTransformerDecoder',
77 | num_layers=9,
78 | return_intermediate=True,
79 | transformerlayers=dict(
80 | type='BaseTransformerLayer',
81 | attn_cfgs=[
82 | dict(
83 | type='MultiheadAttention',
84 | embed_dims=256,
85 | num_heads=8,
86 | dropout=0.1),
87 | dict(
88 | type='UniCrossAtten',
89 | num_points=1,
90 | embed_dims=256,
91 | num_sweeps=1,
92 | fp16_enabled=fp16_enabled)
93 | ],
94 | ffn_cfgs=dict(
95 | type='FFN',
96 | embed_dims=256,
97 | feedforward_channels=512,
98 | num_fcs=2,
99 | ffn_drop=0.1,
100 | act_cfg=dict(type='ReLU', inplace=True),
101 | ),
102 | norm_cfg=dict(type='LN'),
103 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
104 | 'ffn', 'norm'))
105 | )
106 | ),
107 | bbox_coder=dict(
108 | type='NMSFreeCoder',
109 | post_center_range=[0, -40, -3, 70.4, 40, 1],
110 | pc_range=point_cloud_range,
111 | max_num=150,
112 | alpha=0.2,
113 | voxel_size=voxel_size,
114 | num_classes=3),
115 | post_processing=dict(
116 | type='box_merging',
117 | score_thr=[0., 0.3, 0.65]),
118 | positional_encoding=dict(
119 | type='SinePositionalEncoding',
120 | num_feats=128,
121 | normalize=True,
122 | offset=-0.5),
123 | loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
124 | loss_bbox=dict(type='L1Loss', loss_weight=0.25),
125 | loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
126 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
127 | ),
128 | # model training and testing settings
129 | train_cfg=dict(pts=dict(
130 | grid_size=[1408, 1600, 40],
131 | voxel_size=voxel_size,
132 | point_cloud_range=point_cloud_range,
133 | out_size_factor=bev_stride,
134 | assigner=dict(
135 | type='HungarianAssigner3D',
136 | cls_cost=dict(type='FocalLossCost', weight=2.0),
137 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
138 | iou_cost=dict(type='IoU3DCost', weight=1.2),
139 | pc_range=point_cloud_range))))
140 |
141 |
142 | # dataset settings
143 | dataset_type = 'KittiDataset'
144 | data_root = 'data/kitti/'
145 | class_names = ['Pedestrian', 'Cyclist', 'Car']
146 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
147 | input_modality = dict(use_lidar=True, use_camera=False)
148 |
149 | db_sampler = dict(
150 | data_root=data_root,
151 | info_path=data_root + 'kitti_dbinfos_train.pkl',
152 | rate=1.0,
153 | prepare=dict(
154 | filter_by_difficulty=[-1],
155 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
156 | classes=class_names,
157 | sample_groups=dict(Car=20, Pedestrian=6, Cyclist=6))
158 |
159 |
160 | file_client_args = dict(backend='disk')
161 | # Uncomment the following if use ceph or other file clients.
162 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
163 | # for more details.
164 | # file_client_args = dict(
165 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
166 |
167 | train_pipeline = [
168 | dict(
169 | type='LoadPointsFromFile',
170 | coord_type='LIDAR',
171 | load_dim=4,
172 | use_dim=4,
173 | file_client_args=file_client_args),
174 | dict(
175 | type='LoadAnnotations3D',
176 | with_bbox_3d=True,
177 | with_label_3d=True,
178 | file_client_args=file_client_args),
179 | dict(type='ObjectSample', db_sampler=db_sampler),
180 | dict(
181 | type='ObjectNoise',
182 | num_try=100,
183 | translation_std=[1.0, 1.0, 0.5],
184 | global_rot_range=[0.0, 0.0],
185 | rot_range=[-0.78539816, 0.78539816]),
186 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
187 | dict(
188 | type='GlobalRotScaleTrans',
189 | rot_range=[-0.78539816, 0.78539816],
190 | scale_ratio_range=[0.95, 1.05]),
191 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
192 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
193 | dict(type='ObjectNameFilter', classes=class_names),
194 | dict(type='PointShuffle'),
195 | dict(type='PointSample', num_points=18000),
196 | dict(type='DefaultFormatBundle3D', class_names=class_names),
197 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
198 | ]
199 | test_pipeline = [
200 | dict(
201 | type='LoadPointsFromFile',
202 | coord_type='LIDAR',
203 | load_dim=4,
204 | use_dim=4,
205 | file_client_args=file_client_args),
206 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
207 | dict(
208 | type='DefaultFormatBundle3D',
209 | class_names=class_names,
210 | with_label=False),
211 | dict(type='Collect3D', keys=['points'])
212 | # dict(
213 | # type='MultiScaleFlipAug3D',
214 | # img_scale=(1333, 800),
215 | # pts_scale_ratio=1,
216 | # flip=False,
217 | # transforms=[
218 | # dict(
219 | # type='GlobalRotScaleTrans',
220 | # rot_range=[0, 0],
221 | # scale_ratio_range=[1., 1.],
222 | # translation_std=[0, 0, 0]),
223 | # dict(type='RandomFlip3D'),
224 | # dict(
225 | # type='PointsRangeFilter', point_cloud_range=point_cloud_range),
226 | # dict(
227 | # type='DefaultFormatBundle3D',
228 | # class_names=class_names,
229 | # with_label=False),
230 | # dict(type='Collect3D', keys=['points'])
231 | # ])
232 | ]
233 |
234 |
235 | data = dict(
236 | samples_per_gpu=1,
237 | workers_per_gpu=2,
238 | train=dict(
239 | type='RepeatDataset',
240 | times=2,
241 | dataset=dict(
242 | type=dataset_type,
243 | data_root=data_root,
244 | ann_file=data_root + 'kitti_infos_train_van.pkl',
245 | split='training',
246 | pts_prefix='velodyne_reduced',
247 | pipeline=train_pipeline,
248 | modality=input_modality,
249 | classes=class_names,
250 | test_mode=False,
251 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
252 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
253 | box_type_3d='LiDAR')),
254 | val=dict(
255 | type=dataset_type,
256 | data_root=data_root,
257 | ann_file=data_root + 'kitti_infos_val.pkl',
258 | split='training',
259 | pts_prefix='velodyne_reduced',
260 | pipeline=test_pipeline,
261 | modality=input_modality,
262 | classes=class_names,
263 | test_mode=True,
264 | box_type_3d='LiDAR'),
265 | test=dict(
266 | type=dataset_type,
267 | data_root=data_root,
268 | ann_file=data_root + 'kitti_infos_val.pkl',
269 | split='training',
270 | pts_prefix='velodyne_reduced',
271 | pipeline=test_pipeline,
272 | modality=input_modality,
273 | classes=class_names,
274 | test_mode=True,
275 | box_type_3d='LiDAR'))
276 |
277 | evaluation = dict(interval=1, pipeline=test_pipeline)
278 |
279 |
280 | checkpoint_config = dict(interval=1)
281 |
282 | lr = 2e-5 *3/8 * 18 /2 # max learning rate
283 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
284 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
285 |
286 |
287 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
288 | runner = dict(type='EpochBasedRunner', max_epochs=40)
289 |
290 | find_unused_parameters = True
291 | workflow = [('train', 1)]
292 | gpu_ids = range(0, 1)
293 | dist_params = dict(backend='nccl')
294 | log_level = 'INFO'
295 |
296 | # fp16 setting
297 | fp16 = dict(loss_scale=32.)
298 |
--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_scannet.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../../configs/_base_/default_runtime.py'
3 | ]
4 |
5 | plugin=True
6 | plugin_dir='projects/mmdet3d_plugin/'
7 |
8 |
9 | voxel_size = [0.02, 0.02, 0.02]
10 | grid_size = [128, 640, 640]
11 |
12 | point_cloud_range = [-6.4, -6.4, -0.1, 6.4, 6.4, 2.46]
13 |
14 |
15 | fp16_enabled = True
16 | bev_stride = 4
17 | sample_num = 5
18 |
19 | input_modality = dict(
20 | use_lidar=True,
21 | use_camera=False,
22 | use_radar=False,
23 | use_map=False,
24 | use_external=False)
25 |
26 | model = dict(
27 | type='Uni3DETR',
28 | pts_voxel_layer=dict(
29 | max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000), ###16000
30 | point_cloud_range=point_cloud_range),
31 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
32 | pts_middle_encoder=dict(
33 | type='SparseEncoderHD',
34 | in_channels=4,
35 | sparse_shape=grid_size,
36 | output_channels=256,
37 | order=('conv', 'norm', 'act'),
38 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
39 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
40 | block_type='basicblock',
41 | fp16_enabled=False), # not enable FP16 here
42 | pts_backbone=dict(
43 | type='SECOND3D',
44 | in_channels=[256, 256, 256],
45 | out_channels=[128, 256, 512],
46 | layer_nums=[5, 5, 5],
47 | layer_strides=[1, 2, 4],
48 | is_cascade=False,
49 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
50 | conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
51 | pts_neck=dict(
52 | type='SECOND3DFPN',
53 | in_channels=[128, 256, 512],
54 | out_channels=[256, 256, 256],
55 | upsample_strides=[1, 2, 4],
56 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
57 | upsample_cfg=dict(type='deconv3d', bias=False),
58 | extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
59 | use_conv_for_no_stride=True),
60 | pts_bbox_head=dict(
61 | type='Uni3DETRHead',
62 | # transformer_cfg
63 | num_query=300,
64 | num_classes=18,
65 | in_channels=256,
66 | sync_cls_avg_factor=True,
67 | with_box_refine=True,
68 | as_two_stage=False,
69 | code_size=8,
70 | with_nms=True,
71 | transformer=dict(
72 | type='Uni3DETRTransformer',
73 | fp16_enabled=fp16_enabled,
74 | decoder=dict(
75 | type='Uni3DETRTransformerDecoder',
76 | num_layers=3,
77 | return_intermediate=True,
78 | transformerlayers=dict(
79 | type='BaseTransformerLayer',
80 | attn_cfgs=[
81 | dict(
82 | type='MultiheadAttention',
83 | embed_dims=256,
84 | num_heads=8,
85 | dropout=0.1),
86 | dict(
87 | type='UniCrossAtten',
88 | num_points=1,
89 | embed_dims=256,
90 | num_sweeps=1,
91 | fp16_enabled=fp16_enabled),
92 | ],
93 | ffn_cfgs=dict(
94 | type='FFN',
95 | embed_dims=256,
96 | feedforward_channels=512,
97 | num_fcs=2,
98 | ffn_drop=0.1, ##0.1
99 | act_cfg=dict(type='ReLU', inplace=True),
100 | ),
101 | norm_cfg=dict(type='LN'),
102 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
103 | # operation_order=('cross_attn', 'norm', 'self_attn', 'norm', 'ffn', 'norm'))
104 | )
105 | ),
106 | bbox_coder=dict(
107 | type='NMSFreeCoder',
108 | post_center_range=point_cloud_range,
109 | pc_range=point_cloud_range,
110 | # max_num=1000,
111 | max_num=5000,
112 | voxel_size=voxel_size,
113 | num_classes=18),
114 | positional_encoding=dict(
115 | type='SinePositionalEncoding',
116 | num_feats=128,
117 | normalize=True,
118 | offset=-0.5),
119 | loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
120 | loss_bbox=dict(type='L1Loss', loss_weight=0.25),
121 | loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
122 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
123 | ),
124 | # model training and testing settings
125 | train_cfg=dict(pts=dict(
126 | grid_size=grid_size,
127 | voxel_size=voxel_size,
128 | point_cloud_range=point_cloud_range,
129 | out_size_factor=bev_stride,
130 | assigner=dict(
131 | type='HungarianAssigner3D',
132 | cls_cost=dict(type='FocalLossCost', weight=2.0),
133 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
134 | iou_cost=dict(type='IoU3DCost', weight=1.2),
135 | pc_range=point_cloud_range))))
136 |
137 |
138 | dataset_type = 'ScanNetDataset'
139 | data_root = './data/scannet/'
140 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
141 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
142 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
143 | 'garbagebin')
144 |
145 |
146 | train_pipeline = [
147 | dict(
148 | type='LoadPointsFromFile',
149 | coord_type='DEPTH',
150 | shift_height=False,
151 | load_dim=3,
152 | use_dim=[0, 1, 2]),
153 | dict(type='LoadAnnotations3D'),
154 | dict(
155 | type='RandomFlip3D',
156 | sync_2d=False,
157 | flip_ratio_bev_horizontal=0.5,
158 | flip_ratio_bev_vertical=0.5),
159 | dict(
160 | type='GlobalRotScaleTrans',
161 | rot_range=[-0.087266, 0.087266],
162 | scale_ratio_range=[.9, 1.1],
163 | translation_std=[.1, .1, .1],
164 | shift_height=False),
165 | dict(type='PointSample', num_points=200000),
166 | dict(type='DefaultFormatBundle3D', class_names=class_names),
167 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
168 | ]
169 |
170 | test_pipeline = [
171 | dict(
172 | type='LoadPointsFromFile',
173 | coord_type='DEPTH',
174 | shift_height=False,
175 | load_dim=3,
176 | use_dim=[0, 1, 2]),
177 | dict(
178 | type='DefaultFormatBundle3D',
179 | class_names=class_names,
180 | with_label=False),
181 | dict(type='Collect3D', keys=['points'])
182 | ]
183 |
184 | data = dict(
185 | samples_per_gpu=3, ##### 3
186 | workers_per_gpu=4,
187 | train=dict(
188 | type='RepeatDataset',
189 | times=6,
190 | dataset=dict(
191 | type=dataset_type,
192 | data_root=data_root,
193 | ann_file=data_root + 'scannet_infos_train.pkl',
194 | pipeline=train_pipeline,
195 | filter_empty_gt=True,
196 | classes=class_names,
197 | box_type_3d='Depth')
198 | ),
199 | val=dict(
200 | type=dataset_type,
201 | data_root=data_root,
202 | ann_file=data_root + 'scannet_infos_val.pkl',
203 | pipeline=test_pipeline,
204 | classes=class_names,
205 | test_mode=True,
206 | box_type_3d='Depth'),
207 | test=dict(
208 | type=dataset_type,
209 | data_root=data_root,
210 | ann_file=data_root + 'scannet_infos_val.pkl',
211 | pipeline=test_pipeline,
212 | classes=class_names,
213 | test_mode=True,
214 | box_type_3d='Depth'))
215 |
216 | evaluation = dict(pipeline=test_pipeline, interval=5)
217 |
218 |
219 | # optimizer
220 | # This schedule is mainly used by models on indoor dataset,
221 | # e.g., VoteNet on SUNRGBD and ScanNet
222 | lr = 2e-5 *2/8 * 20 * 4/6 *6/8 *1.5 *8/6###########40 # max learning rate
223 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
224 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
225 |
226 |
227 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
228 | runner = dict(type='EpochBasedRunner', max_epochs=40) ###40
229 |
230 | # fp16 setting
231 | fp16 = dict(loss_scale=32.)
232 | find_unused_parameters = True
233 |
--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_scannet_large.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../../configs/_base_/default_runtime.py'
3 | ]
4 |
5 | plugin=True
6 | plugin_dir='projects/mmdet3d_plugin/'
7 |
8 |
9 | voxel_size = [0.02, 0.02, 0.02]
10 | grid_size = [128, 640, 640]
11 |
12 | point_cloud_range = [-6.4, -6.4, -0.1, 6.4, 6.4, 2.46]
13 |
14 |
15 | fp16_enabled = True
16 | bev_stride = 4
17 | sample_num = 5
18 |
19 | input_modality = dict(
20 | use_lidar=True,
21 | use_camera=False,
22 | use_radar=False,
23 | use_map=False,
24 | use_external=False)
25 |
26 | model = dict(
27 | type='Uni3DETR',
28 | dynamic_voxelization=True,
29 | pts_voxel_layer=dict(
30 | max_num_points=-1, point_cloud_range=point_cloud_range, voxel_size=voxel_size, max_voxels=(-1, -1)),
31 | pts_voxel_encoder=dict(type='DynamicSimpleVFE', voxel_size=voxel_size, point_cloud_range=point_cloud_range),
32 | pts_middle_encoder=dict(
33 | type='SparseEncoderHD',
34 | in_channels=4,
35 | sparse_shape=grid_size,
36 | base_channels=32,
37 | output_channels=512,
38 | order=('conv', 'norm', 'act'),
39 | encoder_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256)),
40 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
41 | block_type='basicblock',
42 | fp16_enabled=False), # not enable FP16 here
43 | pts_backbone=dict(
44 | type='SECOND3D',
45 | in_channels=[512, 512, 512],
46 | out_channels=[128, 256, 512],
47 | layer_nums=[5, 5, 5],
48 | layer_strides=[1, 2, 4],
49 | is_cascade=False,
50 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
51 | conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
52 | pts_neck=dict(
53 | type='SECOND3DFPN',
54 | in_channels=[128, 256, 512],
55 | out_channels=[256, 256, 256],
56 | upsample_strides=[1, 2, 4],
57 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
58 | upsample_cfg=dict(type='deconv3d', bias=False),
59 | extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
60 | use_conv_for_no_stride=True),
61 | pts_bbox_head=dict(
62 | type='Uni3DETRHead',
63 | # transformer_cfg
64 | num_query=300,
65 | num_classes=18,
66 | in_channels=256,
67 | sync_cls_avg_factor=True,
68 | with_box_refine=True,
69 | as_two_stage=False,
70 | code_size=8,
71 | transformer=dict(
72 | type='Uni3DETRTransformer',
73 | fp16_enabled=fp16_enabled,
74 | decoder=dict(
75 | type='Uni3DETRTransformerDecoder',
76 | num_layers=3,
77 | return_intermediate=True,
78 | transformerlayers=dict(
79 | type='BaseTransformerLayer',
80 | attn_cfgs=[
81 | dict(
82 | type='MultiheadAttention',
83 | embed_dims=256,
84 | num_heads=8,
85 | dropout=0.1),
86 | dict(
87 | type='UniCrossAtten',
88 | num_points=1,
89 | embed_dims=256,
90 | num_sweeps=1,
91 | fp16_enabled=fp16_enabled),
92 | ],
93 | ffn_cfgs=dict(
94 | type='FFN',
95 | embed_dims=256,
96 | feedforward_channels=512,
97 | num_fcs=2,
98 | ffn_drop=0.1, ##0.1
99 | act_cfg=dict(type='ReLU', inplace=True),
100 | ),
101 | norm_cfg=dict(type='LN'),
102 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
103 | # operation_order=('cross_attn', 'norm', 'self_attn', 'norm', 'ffn', 'norm'))
104 | )
105 | ),
106 | bbox_coder=dict(
107 | type='NMSFreeCoder',
108 | post_center_range=point_cloud_range,
109 | pc_range=point_cloud_range,
110 | # max_num=1000,
111 | max_num=5000,
112 | alpha=1.0,
113 | voxel_size=voxel_size,
114 | num_classes=18),
115 | post_processing=dict(
116 | type='nms',
117 | nms_thr=0.5),
118 | ######## soft nms can generate a little higher result
119 | # post_processing=dict(
120 | # type='soft_nms',
121 | # gaussian_sigma=0.3,
122 | # prune_threshold=1e-2),
123 | positional_encoding=dict(
124 | type='SinePositionalEncoding',
125 | num_feats=128,
126 | normalize=True,
127 | offset=-0.5),
128 | loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
129 | loss_bbox=dict(type='L1Loss', loss_weight=0.25),
130 | loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
131 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
132 | ),
133 | # model training and testing settings
134 | train_cfg=dict(pts=dict(
135 | grid_size=grid_size,
136 | voxel_size=voxel_size,
137 | point_cloud_range=point_cloud_range,
138 | out_size_factor=bev_stride,
139 | assigner=dict(
140 | type='HungarianAssigner3D',
141 | cls_cost=dict(type='FocalLossCost', weight=2.0),
142 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
143 | iou_cost=dict(type='IoU3DCost', weight=1.2),
144 | pc_range=point_cloud_range))))
145 |
146 |
147 | dataset_type = 'ScanNetDataset'
148 | data_root = './data/scannet/'
149 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
150 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
151 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
152 | 'garbagebin')
153 |
154 |
155 | train_pipeline = [
156 | dict(
157 | type='LoadPointsFromFile',
158 | coord_type='DEPTH',
159 | shift_height=False,
160 | load_dim=3,
161 | use_dim=[0, 1, 2]),
162 | dict(type='LoadAnnotations3D'),
163 | dict(
164 | type='RandomFlip3D',
165 | sync_2d=False,
166 | flip_ratio_bev_horizontal=0.5,
167 | flip_ratio_bev_vertical=0.5),
168 | dict(
169 | type='GlobalRotScaleTrans',
170 | rot_range=[-0.087266, 0.087266],
171 | scale_ratio_range=[.9, 1.1],
172 | translation_std=[.1, .1, .1],
173 | shift_height=False),
174 | dict(type='PointSample', num_points=200000),
175 | dict(type='DefaultFormatBundle3D', class_names=class_names),
176 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
177 | ]
178 |
179 | test_pipeline = [
180 | dict(
181 | type='LoadPointsFromFile',
182 | coord_type='DEPTH',
183 | shift_height=False,
184 | load_dim=3,
185 | use_dim=[0, 1, 2]),
186 | dict(
187 | type='DefaultFormatBundle3D',
188 | class_names=class_names,
189 | with_label=False),
190 | dict(type='Collect3D', keys=['points'])
191 | ]
192 |
193 | data = dict(
194 | samples_per_gpu=3, ##### 3
195 | workers_per_gpu=4,
196 | train=dict(
197 | type='RepeatDataset',
198 | times=6,
199 | dataset=dict(
200 | type=dataset_type,
201 | data_root=data_root,
202 | ann_file=data_root + 'scannet_infos_train.pkl',
203 | pipeline=train_pipeline,
204 | filter_empty_gt=True,
205 | classes=class_names,
206 | box_type_3d='Depth')
207 | ),
208 | val=dict(
209 | type=dataset_type,
210 | data_root=data_root,
211 | ann_file=data_root + 'scannet_infos_val.pkl',
212 | pipeline=test_pipeline,
213 | classes=class_names,
214 | test_mode=True,
215 | box_type_3d='Depth'),
216 | test=dict(
217 | type=dataset_type,
218 | data_root=data_root,
219 | ann_file=data_root + 'scannet_infos_val.pkl',
220 | pipeline=test_pipeline,
221 | classes=class_names,
222 | test_mode=True,
223 | box_type_3d='Depth'))
224 |
225 | evaluation = dict(pipeline=test_pipeline, interval=5)
226 |
227 |
228 | # optimizer
229 | # This schedule is mainly used by models on indoor dataset,
230 | # e.g., VoteNet on SUNRGBD and ScanNet
231 | lr = 2e-5 *2/8 * 20 * 4/6 *6/8 *1.5 *8/6###########40 # max learning rate
232 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
233 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
234 |
235 |
236 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
237 | runner = dict(type='EpochBasedRunner', max_epochs=40) ###40
238 |
239 | # fp16 setting
240 | fp16 = dict(loss_scale=32.)
241 | find_unused_parameters = True
242 |
--------------------------------------------------------------------------------
/projects/configs/uni3detr/uni3detr_sunrgbd.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../../configs/_base_/default_runtime.py'
3 | ]
4 |
5 | plugin=True
6 | plugin_dir='projects/mmdet3d_plugin/'
7 |
8 | # If point cloud range is changed, the models should also change their point
9 | # cloud range accordingly
10 | voxel_size = [0.02, 0.02, 0.02]
11 | grid_size = [128, 320, 320]
12 | point_cloud_range = [-3.2, -0.2, -2., 3.2, 6.2, 0.56]
13 |
14 | fp16_enabled = True
15 | bev_stride = 4
16 | sample_num = 5
17 |
18 |
19 | input_modality = dict(
20 | use_lidar=True,
21 | use_camera=False,
22 | use_radar=False,
23 | use_map=False,
24 | use_external=False)
25 |
26 | model = dict(
27 | type='Uni3DETR',
28 | pts_voxel_layer=dict(
29 | max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
30 | point_cloud_range=point_cloud_range),
31 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
32 | pts_middle_encoder=dict(
33 | type='SparseEncoderHD',
34 | in_channels=4,
35 | sparse_shape=grid_size,
36 | output_channels=256,
37 | order=('conv', 'norm', 'act'),
38 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
39 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
40 | block_type='basicblock',
41 | fp16_enabled=False), # not enable FP16 here
42 | pts_backbone=dict(
43 | type='SECOND3D',
44 | in_channels=[256, 256, 256],
45 | out_channels=[128, 256, 512],
46 | layer_nums=[5, 5, 5],
47 | layer_strides=[1, 2, 4],
48 | is_cascade=False,
49 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
50 | conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
51 | pts_neck=dict(
52 | type='SECOND3DFPN',
53 | in_channels=[128, 256, 512],
54 | out_channels=[256, 256, 256],
55 | upsample_strides=[1, 2, 4],
56 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
57 | upsample_cfg=dict(type='deconv3d', bias=False),
58 | extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
59 | use_conv_for_no_stride=True),
60 | pts_bbox_head=dict(
61 | type='Uni3DETRHead',
62 | num_query=300,
63 | num_classes=10,
64 | in_channels=256,
65 | sync_cls_avg_factor=True,
66 | with_box_refine=True,
67 | as_two_stage=False,
68 | code_size=8,
69 | transformer=dict(
70 | type='Uni3DETRTransformer',
71 | fp16_enabled=fp16_enabled,
72 | decoder=dict(
73 | type='Uni3DETRTransformerDecoder',
74 | num_layers=3,
75 | return_intermediate=True,
76 | transformerlayers=dict(
77 | type='BaseTransformerLayer',
78 | attn_cfgs=[
79 | dict(
80 | type='MultiheadAttention',
81 | embed_dims=256,
82 | num_heads=8,
83 | dropout=0.1),
84 | dict(
85 | type='UniCrossAtten',
86 | num_points=1,
87 | embed_dims=256,
88 | num_sweeps=1,
89 | fp16_enabled=fp16_enabled),
90 | ],
91 | ffn_cfgs=dict(
92 | type='FFN',
93 | embed_dims=256,
94 | feedforward_channels=512,
95 | num_fcs=2,
96 | ffn_drop=0.1,
97 | act_cfg=dict(type='ReLU', inplace=True),
98 | ),
99 | norm_cfg=dict(type='LN'),
100 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))
101 | )
102 | ),
103 | bbox_coder=dict(
104 | type='NMSFreeCoder',
105 | post_center_range=point_cloud_range,
106 | pc_range=point_cloud_range,
107 | max_num=1000,
108 | voxel_size=voxel_size,
109 | alpha=1.0,
110 | num_classes=10),
111 | post_processing=dict(
112 | type='nms',
113 | nms_thr=0.5),
114 | ######## soft nms can generate a little higher result
115 | # post_processing=dict(
116 | # type='soft_nms',
117 | # gaussian_sigma=0.3,
118 | # prune_threshold=1e-2),
119 | positional_encoding=dict(
120 | type='SinePositionalEncoding',
121 | num_feats=128,
122 | normalize=True,
123 | offset=-0.5),
124 | loss_cls=dict(type='SoftFocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.5),
125 | loss_bbox=dict(type='L1Loss', loss_weight=0.25),
126 | loss_iou=dict(type='IoU3DLoss', loss_weight=1.2),
127 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
128 | ),
129 | # model training and testing settings
130 | train_cfg=dict(pts=dict(
131 | grid_size=grid_size,
132 | voxel_size=voxel_size,
133 | point_cloud_range=point_cloud_range,
134 | out_size_factor=bev_stride,
135 | assigner=dict(
136 | type='HungarianAssigner3D',
137 | cls_cost=dict(type='FocalLossCost', weight=2.0),
138 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
139 | iou_cost=dict(type='IoU3DCost', weight=1.2),
140 | pc_range=point_cloud_range))))
141 |
142 |
143 | dataset_type = 'SUNRGBDDataset'
144 | data_root = 'data/sunrgbd/'
145 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
146 | 'night_stand', 'bookshelf', 'bathtub')
147 |
148 | file_client_args = dict(backend='disk')
149 |
150 | train_pipeline = [
151 | dict(
152 | type='LoadPointsFromFile',
153 | coord_type='DEPTH',
154 | shift_height=True,
155 | load_dim=6,
156 | use_dim=[0, 1, 2],
157 | file_client_args=file_client_args),
158 | dict(type='LoadAnnotations3D', file_client_args=file_client_args),
159 | dict(
160 | type='RandomFlip3D',
161 | sync_2d=False,
162 | flip_ratio_bev_horizontal=0.5,
163 | ),
164 | dict(
165 | type='GlobalRotScaleTrans',
166 | rot_range=[-0.523599, 0.523599],
167 | scale_ratio_range=[0.85, 1.15],
168 | shift_height=True),
169 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
170 | # dict(type='PointSample', num_points=20000),
171 | dict(type='PointSample', num_points=100000),
172 | dict(type='DefaultFormatBundle3D', class_names=class_names),
173 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
174 | ]
175 | test_pipeline = [
176 | dict(
177 | type='LoadPointsFromFile',
178 | coord_type='DEPTH',
179 | shift_height=True,
180 | load_dim=6,
181 | use_dim=[0, 1, 2],
182 | file_client_args=file_client_args),
183 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
184 | # dict(type='PointSample', num_points=50000),
185 | dict(type='PointSample', num_points=100000),
186 | dict(
187 | type='DefaultFormatBundle3D',
188 | class_names=class_names,
189 | with_label=False),
190 | dict(type='Collect3D', keys=['points'])
191 | ]
192 |
193 | data = dict(
194 | samples_per_gpu=4,
195 | workers_per_gpu=4,
196 | train=dict(
197 | type='RepeatDataset',
198 | times=2, #######5
199 | dataset=dict(
200 | type=dataset_type,
201 | data_root=data_root,
202 | ann_file=data_root + 'sunrgbd_infos_train.pkl',
203 | pipeline=train_pipeline,
204 | classes=class_names,
205 | filter_empty_gt=True,
206 | box_type_3d='Depth',
207 | file_client_args=file_client_args)),
208 | val=dict(
209 | type=dataset_type,
210 | data_root=data_root,
211 | ann_file=data_root + 'sunrgbd_infos_val.pkl',
212 | pipeline=test_pipeline,
213 | classes=class_names,
214 | test_mode=True,
215 | box_type_3d='Depth',
216 | file_client_args=file_client_args),
217 | test=dict(
218 | type=dataset_type,
219 | data_root=data_root,
220 | ann_file=data_root + 'sunrgbd_infos_val.pkl',
221 | pipeline=test_pipeline,
222 | classes=class_names,
223 | test_mode=True,
224 | box_type_3d='Depth',
225 | file_client_args=file_client_args))
226 |
227 | evaluation = dict(pipeline=test_pipeline, interval=5)
228 |
229 |
230 | # optimizer
231 | # This schedule is mainly used by models on indoor dataset,
232 | # e.g., VoteNet on SUNRGBD and ScanNet
233 | lr = 2e-5 *2/8 * 20 # max learning rate
234 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
235 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
236 |
237 |
238 | lr_config = dict(policy='step', warmup=None, step=[32, 38])
239 | runner = dict(type='EpochBasedRunner', max_epochs=40)
240 |
241 | # fp16 setting
242 | fp16 = dict(loss_scale=32.)
243 | find_unused_parameters = True
244 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
3 | from .core.bbox.match_costs import BBox3DL1Cost
4 | from .datasets import NuScenesSweepDataset
5 | from .datasets.pipelines import (
6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage, NormalizeMultiviewImage,
7 | RandomScaleImageMultiViewImage, ImageRandomResizeCropFlip)
8 | from .models.backbones.vovnet import VoVNet
9 | from .models.detectors import Uni3DETR
10 | from .models.dense_heads import Uni3DETRHead
11 | from .models.pts_encoder import SparseEncoderHD
12 | from .models.necks import SECOND3DFPN
13 | from .models.losses import RDIoULoss, IoU3DLoss, SoftFocalLoss
14 | from .models.utils import Uni3DETRTransformer, Uni3DETRTransformerDecoder
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 |
3 | __all__ = ['HungarianAssigner3D']
4 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS
4 | from mmdet.core.bbox.assigners import AssignResult
5 | from mmdet.core.bbox.assigners import BaseAssigner
6 | from mmdet.core.bbox.match_costs import build_match_cost
7 | from mmdet.models.utils.transformer import inverse_sigmoid
8 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox, denormalize_bbox
9 | from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d, bbox_overlaps_nearest_3d
10 |
11 | try:
12 | from scipy.optimize import linear_sum_assignment
13 | except ImportError:
14 | linear_sum_assignment = None
15 |
16 |
17 | @BBOX_ASSIGNERS.register_module()
18 | class HungarianAssigner3D(BaseAssigner):
19 | """Computes one-to-one matching between predictions and ground truth.
20 | This class computes an assignment between the targets and the predictions
21 | based on the costs. The costs are weighted sum of three components:
22 | classification cost, regression L1 cost and regression iou cost. The
23 | targets don't include the no_object, so generally there are more
24 | predictions than targets. After the one-to-one matching, the un-matched
25 | are treated as backgrounds. Thus each query prediction will be assigned
26 | with `0` or a positive integer indicating the ground truth index:
27 | - 0: negative sample, no assigned gt
28 | - positive integer: positive sample, index (1-based) of assigned gt
29 | Args:
30 | cls_weight (int | float, optional): The scale factor for classification
31 | cost. Default 1.0.
32 | bbox_weight (int | float, optional): The scale factor for regression
33 | L1 cost. Default 1.0.
34 | iou_weight (int | float, optional): The scale factor for regression
35 | iou cost. Default 1.0.
36 | iou_calculator (dict | optional): The config for the iou calculation.
37 | Default type `BboxOverlaps2D`.
38 | iou_mode (str | optional): "iou" (intersection over union), "iof"
39 | (intersection over foreground), or "giou" (generalized
40 | intersection over union). Default "giou".
41 | """
42 |
43 | def __init__(self,
44 | cls_cost=dict(type='ClassificationCost', weight=1.),
45 | reg_cost=dict(type='BBoxL1Cost', weight=1.0),
46 | iou_cost=dict(type='IoUCost', weight=0.0),
47 | pc_range=None):
48 | self.cls_cost = build_match_cost(cls_cost)
49 | self.reg_cost = build_match_cost(reg_cost)
50 | self.iou_cost = build_match_cost(iou_cost)
51 | self.pc_range = pc_range
52 |
53 | def assign(self,
54 | bbox_pred,
55 | cls_pred,
56 | gt_bboxes,
57 | gt_labels,
58 | num_query,
59 | gt_bboxes_ignore=None,
60 | eps=1e-7, gt_repeattimes=1):
61 | """Computes one-to-one matching based on the weighted costs.
62 | This method assign each query prediction to a ground truth or
63 | background. The `assigned_gt_inds` with -1 means don't care,
64 | 0 means negative sample, and positive number is the index (1-based)
65 | of assigned gt.
66 | The assignment is done in the following steps, the order matters.
67 | 1. assign every prediction to -1
68 | 2. compute the weighted costs
69 | 3. do Hungarian matching on CPU based on the costs
70 | 4. assign all to 0 (background) first, then for each matched pair
71 | between predictions and gts, treat this prediction as foreground
72 | and assign the corresponding gt index (plus 1) to it.
73 | Args:
74 | bbox_pred (Tensor): Predicted boxes with normalized coordinates
75 | (cx, cy, w, h), which are all in range [0, 1]. Shape
76 | [num_query, 4].
77 | cls_pred (Tensor): Predicted classification logits, shape
78 | [num_query, num_class].
79 | gt_bboxes (Tensor): Ground truth boxes with unnormalized
80 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
81 | gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
82 | gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
83 | labelled as `ignored`. Default None.
84 | eps (int | float, optional): A value added to the denominator for
85 | numerical stability. Default 1e-7.
86 | Returns:
87 | :obj:`AssignResult`: The assigned result.
88 | """
89 | assert gt_bboxes_ignore is None, \
90 | 'Only case when gt_bboxes_ignore is None is supported.'
91 | num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
92 |
93 | # 1. assign -1 by default
94 | assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
95 | -1,
96 | dtype=torch.long)
97 | assigned_labels = bbox_pred.new_full((num_bboxes, ),
98 | -1,
99 | dtype=torch.long)
100 | if num_gts == 0 or num_bboxes == 0:
101 | # No ground truth or boxes, return empty assignment
102 | if num_gts == 0:
103 | # No ground truth, assign all to background
104 | assigned_gt_inds[:] = 0
105 | return AssignResult(
106 | num_gts, assigned_gt_inds, None, labels=assigned_labels)
107 |
108 | # 2. compute the weighted costs
109 | # classification and bboxcost.
110 | normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
111 | bboxes3d = denormalize_bbox(bbox_pred, self.pc_range)
112 | iou3d = bbox_overlaps_nearest_3d(bboxes3d, gt_bboxes, coordinate='depth')
113 |
114 | cls_cost = self.cls_cost(cls_pred, gt_labels)
115 | #cls_cost = self.cls_cost(cls_pred, gt_labels, iou3d)
116 |
117 | # regression L1 cost
118 | reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
119 | iou_cost = self.iou_cost(bboxes3d, gt_bboxes)
120 |
121 | cost = cls_cost + reg_cost + iou_cost
122 |
123 | # 3. do Hungarian matching on CPU using linear_sum_assignment
124 | cost = cost.detach().cpu()
125 | # cost[torch.isnan(cost)] = 1e5
126 | if linear_sum_assignment is None:
127 | raise ImportError('Please run pip install scipy to install scipy first.')
128 |
129 | nq = num_query
130 | ng = int(cost.shape[0] // nq)
131 | matched_row_inds, matched_col_inds = [], []
132 | for g in range(ng):
133 | # matched_row_inds1, matched_col_inds1 = linear_sum_assignment(cost[g*nq:(g+1)*nq])
134 | matched_row_inds1, matched_col_inds1 = linear_sum_assignment(cost[g*nq:(g+1)*nq].repeat(1, gt_repeattimes) )
135 | matched_row_inds.append(g*nq + matched_row_inds1)
136 | #matched_col_inds.append(matched_col_inds1)
137 | matched_col_inds.append(matched_col_inds1 % cost.shape[1])
138 | matched_row_inds = np.concatenate(matched_row_inds)
139 | matched_col_inds = np.concatenate(matched_col_inds)
140 |
141 | matched_row_inds = torch.from_numpy(matched_row_inds).to(bbox_pred.device)
142 | matched_col_inds = torch.from_numpy(matched_col_inds).to(bbox_pred.device)
143 |
144 | # 4. assign backgrounds and foregrounds
145 | # assign all indices to backgrounds first
146 | assigned_gt_inds[:] = 0
147 | # assign foregrounds based on matching results
148 | assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
149 | assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
150 | return AssignResult(
151 | num_gts, assigned_gt_inds, None, labels=assigned_labels)
152 |
153 | @staticmethod
154 | def _bbox_to_loss(bbox):
155 | # axis-aligned case: x, y, z, w, h, l -> x1, y1, z1, x2, y2, z2
156 | return torch.stack(
157 | (bbox[..., 0] - bbox[..., 3] / 2, bbox[..., 1] - bbox[..., 4] / 2,
158 | bbox[..., 2] - bbox[..., 5] / 2, bbox[..., 0] + bbox[..., 3] / 2,
159 | bbox[..., 1] + bbox[..., 4] / 2, bbox[..., 2] + bbox[..., 5] / 2),
160 | dim=-1)
161 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/bbox_merging.py:
--------------------------------------------------------------------------------
1 | """This file defines nms functions to merge boxes"""
2 |
3 | import time
4 |
5 | import cv2
6 | import numpy as np
7 | from shapely.geometry import Polygon
8 |
9 | from numba import jit
10 |
11 | def boxes_3d_to_corners(boxes_3d):
12 | all_corners = []
13 | for box_3d in boxes_3d:
14 | x3d, y3d, z3d, l, h, w, yaw = box_3d
15 | R = np.array([[np.cos(yaw), 0, np.sin(yaw)],
16 | [0, 1, 0 ],
17 | [-np.sin(yaw), 0, np.cos(yaw)]]);
18 | corners = np.array([[ l/2, 0.0, w/2], # front up right
19 | [ l/2, 0.0, -w/2], # front up left
20 | [-l/2, 0.0, -w/2], # back up left
21 | [-l/2, 0.0, w/2], # back up right
22 | [ l/2, -h, w/2], # front down right
23 | [ l/2, -h, -w/2], # front down left
24 | [-l/2, -h, -w/2], # back down left
25 | [-l/2, -h, w/2]]) # back down right
26 | r_corners = corners.dot(np.transpose(R))
27 | cam_points_xyz = r_corners+np.array([x3d, y3d, z3d])
28 | all_corners.append(cam_points_xyz)
29 | return np.array(all_corners)
30 |
31 | def overlapped_boxes_3d(single_box, box_list):
32 | x0_max, y0_max, z0_max = np.max(single_box, axis=0)
33 | x0_min, y0_min, z0_min = np.min(single_box, axis=0)
34 | overlap = np.zeros(len(box_list))
35 | for i, box in enumerate(box_list):
36 | x_max, y_max, z_max = np.max(box, axis=0)
37 | x_min, y_min, z_min = np.min(box, axis=0)
38 | if x0_max < x_min or x0_min > x_max:
39 | overlap[i] = 0
40 | continue
41 | if y0_max < y_min or y0_min > y_max:
42 | overlap[i] = 0
43 | continue
44 | if z0_max < z_min or z0_min > z_max:
45 | overlap[i] = 0
46 | continue
47 | x_draw_min = min(x0_min, x_min)
48 | x_draw_max = max(x0_max, x_max)
49 | z_draw_min = min(z0_min, z_min)
50 | z_draw_max = max(z0_max, z_max)
51 | offset = np.array([x_draw_min, z_draw_min])
52 | buf1 = np.zeros((z_draw_max-z_draw_min, x_draw_max-x_draw_min),
53 | dtype=np.int32)
54 | buf2 = np.zeros_like(buf1)
55 | cv2.fillPoly(buf1, [single_box[:4, [0,2]]-offset], color=1)
56 | cv2.fillPoly(buf2, [box[:4, [0,2]]-offset], color=1)
57 | shared_area = cv2.countNonZero(buf1*buf2)
58 | area1 = cv2.countNonZero(buf1)
59 | area2 = cv2.countNonZero(buf2)
60 | shared_y = min(y_max, y0_max) - max(y_min, y0_min)
61 | intersection = shared_y * shared_area
62 | union = (y_max-y_min) * area2 + (y0_max-y0_min) * area1
63 | overlap[i] = np.float32(intersection) / (union - intersection)
64 | return overlap
65 |
66 | def overlapped_boxes_3d_fast_poly(single_box, box_list):
67 | single_box_max_corner = np.max(single_box, axis=0)
68 | single_box_min_corner = np.min(single_box, axis=0)
69 | x0_max, y0_max, z0_max = single_box_max_corner
70 | x0_min, y0_min, z0_min = single_box_min_corner
71 | max_corner = np.max(box_list, axis=1)
72 | min_corner = np.min(box_list, axis=1)
73 | overlap = np.zeros(len(box_list))
74 | non_overlap_mask = np.logical_or(single_box_max_corner < min_corner,
75 | single_box_min_corner > max_corner)
76 | non_overlap_mask = np.any(non_overlap_mask, axis=1)
77 | p1 = Polygon(single_box[:4, [0,2]])
78 | area1 = p1.area
79 | for i in range(len(box_list)):
80 | if not non_overlap_mask[i]:
81 | x_max, y_max, z_max = max_corner[i]
82 | x_min, y_min, z_min = min_corner[i]
83 | p2 = Polygon(box_list[i][:4, [0,2]])
84 | shared_area = p1.intersection(p2).area
85 | area2 = p2.area
86 | shared_y = min(y_max, y0_max) - max(y_min, y0_min)
87 | intersection = shared_y * shared_area
88 | union = (y_max-y_min) * area2 + (y0_max-y0_min) * area1
89 | overlap[i] = np.float32(intersection) / (union - intersection)
90 | return overlap
91 |
92 |
93 | def bboxes_sort(classes, scores, bboxes, top_k=400, attributes=None):
94 | """Sort bounding boxes by decreasing order and keep only the top_k
95 | """
96 | idxes = np.argsort(-scores)
97 | classes = classes[idxes]
98 | scores = scores[idxes]
99 | bboxes = bboxes[idxes]
100 | if attributes is not None:
101 | attributes = attributes[idxes]
102 | if top_k > 0:
103 | if len(idxes) > top_k:
104 | classes = classes[:top_k]
105 | scores = scores[:top_k]
106 | bboxes = bboxes[:top_k]
107 | if attributes is not None:
108 | attributes = attributes[:top_k]
109 | return classes, scores, bboxes, attributes
110 |
111 |
112 | def bboxes_nms_merge_only(classes, scores, bboxes, scores_threshold=0.25,
113 | nms_threshold=0.45, overlapped_fn=overlapped_boxes_3d_fast_poly, appr_factor=10.0,
114 | attributes=None):
115 | """Apply non-maximum selection to bounding boxes.
116 | """
117 | boxes_corners = boxes_3d_to_corners(bboxes)
118 | # convert to pixels
119 | keep_bboxes = np.ones(scores.shape, dtype=np.bool)
120 | for i in range(scores.size-1):
121 | if keep_bboxes[i]:
122 | # Only compute on the rest of bboxes
123 | valid = keep_bboxes[(i+1):]
124 | # Computer overlap with bboxes which are following.
125 | overlap = overlapped_fn(boxes_corners[i],
126 | boxes_corners[(i+1):][valid])
127 | # Overlap threshold for keeping + checking part of the same class
128 | remove_overlap = np.logical_and(overlap > nms_threshold,
129 | classes[(i+1):][valid] == classes[i])
130 | overlaped_bboxes = np.concatenate(
131 | [bboxes[(i+1):][valid][remove_overlap], bboxes[[i]]], axis=0)
132 | boxes_mean = np.median(overlaped_bboxes, axis=0)
133 | # boxes_mean = np.mean(overlaped_bboxes, axis=0)
134 | bboxes[i][:] = boxes_mean[:]
135 | keep_bboxes[(i+1):][valid] = np.logical_not(remove_overlap)##
136 |
137 | idxes = np.where(keep_bboxes)
138 | classes = classes[idxes]
139 | scores = scores[idxes]
140 | bboxes = bboxes[idxes]
141 | if attributes is not None:
142 | attributes = attributes[idxes]
143 | return classes, scores, bboxes, idxes, #attributes
144 |
145 | def nms_boxes_3d_merge_only(class_labels, detection_boxes_3d, detection_scores,
146 | overlapped_thres=0.5, overlapped_fn=overlapped_boxes_3d_fast_poly, appr_factor=10.0,
147 | top_k=-1, attributes=None):
148 | class_labels, detection_scores, detection_boxes_3d, attributes = \
149 | bboxes_sort(
150 | class_labels, detection_scores, detection_boxes_3d, top_k=top_k,
151 | attributes=attributes)
152 | # nms
153 | class_labels, detection_scores, detection_boxes_3d, attributes = \
154 | bboxes_nms_merge_only(
155 | class_labels, detection_scores, detection_boxes_3d,
156 | nms_threshold=overlapped_thres, overlapped_fn=overlapped_fn,
157 | appr_factor=appr_factor, attributes=attributes)
158 | return class_labels, detection_boxes_3d, detection_scores, attributes
159 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder
2 |
3 | __all__ = ['NMSFreeCoder']
4 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from mmdet.core.bbox import BaseBBoxCoder
4 | from mmdet.core.bbox.builder import BBOX_CODERS
5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
6 | from mmdet.models.utils.transformer import inverse_sigmoid
7 |
8 |
9 | @BBOX_CODERS.register_module()
10 | class NMSFreeCoder(BaseBBoxCoder):
11 | """Bbox coder for NMS-free detector.
12 | Args:
13 | pc_range (list[float]): Range of point cloud.
14 | post_center_range (list[float]): Limit of the center.
15 | Default: None.
16 | max_num (int): Max number to be kept. Default: 100.
17 | score_threshold (float): Threshold to filter boxes based on score.
18 | Default: None.
19 | code_size (int): Code size of bboxes. Default: 9
20 | """
21 |
22 | def __init__(self,
23 | pc_range,
24 | voxel_size=None,
25 | post_center_range=None,
26 | max_num=100,
27 | score_threshold=None,
28 | alpha=0.5,
29 | num_classes=10):
30 |
31 | self.pc_range = pc_range
32 | self.voxel_size = voxel_size
33 | self.post_center_range = post_center_range
34 | self.max_num = max_num
35 | self.score_threshold = score_threshold
36 | self.num_classes = num_classes
37 | self.alpha = alpha
38 |
39 | def encode(self):
40 | pass
41 |
42 | def decode_single(self, cls_scores, bbox_preds, all_iou_preds):
43 | """Decode bboxes.
44 | Args:
45 | cls_scores (Tensor): Outputs from the classification head, \
46 | shape [num_query, cls_out_channels]. Note \
47 | cls_out_channels should includes background.
48 | bbox_preds (Tensor): Outputs from the regression \
49 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
50 | Shape [num_query, 9].
51 | Returns:
52 | list[dict]: Decoded boxes.
53 | """
54 | max_num = self.max_num
55 | #max_num = cls_scores.numel()
56 |
57 | cls_scores = cls_scores.sigmoid()
58 | ious = all_iou_preds.sigmoid()
59 |
60 | scores, indexs = cls_scores.view(-1).topk(max_num)
61 | labels = indexs % self.num_classes
62 | bbox_index = indexs // self.num_classes
63 | bbox_preds = bbox_preds[bbox_index]
64 |
65 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
66 | final_scores = scores
67 | final_preds = labels
68 |
69 | all_iou_preds = all_iou_preds.sigmoid()
70 | final_ious = all_iou_preds[bbox_index]
71 |
72 | # use score threshold
73 | if self.score_threshold is not None:
74 | thresh_mask = final_scores > self.score_threshold
75 | if self.post_center_range is not None:
76 | # self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)
77 | self.post_center_range = scores.new_tensor(self.post_center_range)
78 | mask = (final_box_preds[..., :3] >=
79 | self.post_center_range[:3]).all(1)
80 | mask &= (final_box_preds[..., :3] <=
81 | self.post_center_range[3:]).all(1)
82 |
83 | if self.score_threshold:
84 | mask &= thresh_mask
85 |
86 | boxes3d = final_box_preds[mask]
87 | scores = final_scores[mask]
88 | labels = final_preds[mask]
89 | ious = final_ious[mask]
90 |
91 | predictions_dict = {
92 | 'bboxes': boxes3d,
93 | #'scores': scores,
94 | 'scores': scores ** self.alpha * ious.reshape(-1) ** (1-self.alpha),
95 | 'labels': labels,
96 | 'ious': ious.reshape(-1),
97 | }
98 |
99 | else:
100 | raise NotImplementedError(
101 | 'Need to reorganize output as a batch, only '
102 | 'support post_center_range is not None for now!')
103 | return predictions_dict
104 |
105 | def decode(self, preds_dicts):
106 | """Decode bboxes.
107 | Args:
108 | all_cls_scores (Tensor): Outputs from the classification head, \
109 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \
110 | cls_out_channels should includes background.
111 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \
112 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
113 | Shape [nb_dec, bs, num_query, 9].
114 | Returns:
115 | list[dict]: Decoded boxes.
116 | """
117 | # all_cls_scores = preds_dicts['all_cls_scores'][-1]
118 | # all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
119 | # all_iou_preds = preds_dicts['all_iou_preds'][-1]
120 |
121 | all_cls_scores = torch.mean(preds_dicts['all_cls_scores'][1:], 0)
122 | all_bbox_preds = torch.mean(preds_dicts['all_bbox_preds'][1:], 0)
123 | all_iou_preds = torch.mean(preds_dicts['all_iou_preds'][1:], 0)
124 |
125 | #all_centerness_preds = torch.mean(preds_dicts['all_centerness_preds'][1:], 0)
126 | # all_cls_scores = torch.mean(preds_dicts['all_cls_scores'], 0)
127 | # all_bbox_preds = torch.mean(preds_dicts['all_bbox_preds'], 0)
128 | # all_cls_scores = 0. * preds_dicts['all_cls_scores'][0] + 0.4 * preds_dicts['all_cls_scores'][1] + 0.6 * preds_dicts['all_cls_scores'][2]
129 | # all_bbox_preds = 0. * preds_dicts['all_bbox_preds'][0] + 0.4 * preds_dicts['all_bbox_preds'][1] + 0.6 * preds_dicts['all_bbox_preds'][2]
130 |
131 | batch_size = all_cls_scores.size()[0]
132 | predictions_list = []
133 | for i in range(batch_size):
134 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i], all_iou_preds[i]))
135 | #predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i], all_iou_preds[i], all_centerness_preds[i]))
136 | return predictions_list
137 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost, RotatedIoU3DCost, AxisAlignedIoU3DCost, RDIoUCost, SoftFocalLossCost
3 |
4 | __all__ = ['build_match_cost', 'BBox3DL1Cost', 'RotatedIoU3DCost', 'AxisAlignedIoU3DCost', 'RDIoUCost', 'SoftFocalLossCost']
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
3 | from mmcv.ops import diff_iou_rotated_3d
4 | from mmdet3d.core.bbox import AxisAlignedBboxOverlaps3D
5 | from projects.mmdet3d_plugin.core.bbox.util import get_rdiou
6 | from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d, bbox_overlaps_nearest_3d
7 | import torch.nn.functional as F
8 |
9 | @MATCH_COST.register_module()
10 | class BBox3DL1Cost(object):
11 | """BBox3DL1Cost.
12 | Args:
13 | weight (int | float, optional): loss_weight
14 | """
15 |
16 | def __init__(self, weight=1.):
17 | self.weight = weight
18 |
19 | def __call__(self, bbox_pred, gt_bboxes):
20 | """
21 | Args:
22 | bbox_pred (Tensor): Predicted boxes with normalized coordinates
23 | (cx, cy, w, h), which are all in range [0, 1]. Shape
24 | [num_query, 4].
25 | gt_bboxes (Tensor): Ground truth boxes with normalized
26 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
27 | Returns:
28 | torch.Tensor: bbox_cost value with weight
29 | """
30 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
31 | return bbox_cost * self.weight
32 |
33 |
34 | @MATCH_COST.register_module()
35 | class RotatedIoU3DCost(object):
36 |
37 | def __init__(self, weight=1.):
38 | self.weight = weight
39 |
40 | def __call__(self, bbox_pred, gt_bboxes):
41 | """
42 | Args:
43 | bbox_pred (Tensor): Predicted boxes with normalized coordinates
44 | (cx, cy, w, h), which are all in range [0, 1]. Shape
45 | [num_query, 4].
46 | gt_bboxes (Tensor): Ground truth boxes with normalized
47 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
48 | Returns:
49 | torch.Tensor: bbox_cost value with weight
50 | """
51 | #print(bbox_pred.shape, gt_bboxes.shape)
52 | N = gt_bboxes.shape[0]
53 | M = bbox_pred.shape[0]
54 | bbox_costs = [diff_iou_rotated_3d(bbox_pred.unsqueeze(0), gt_bboxes[[i], :].repeat(M, 1).unsqueeze(0))[0].unsqueeze(1) for i in range(N)]
55 | bbox_cost = torch.cat(bbox_costs, 1)
56 |
57 | return bbox_cost * self.weight
58 |
59 |
60 | @MATCH_COST.register_module()
61 | class AxisAlignedIoU3DCost(object):
62 |
63 | def __init__(self, weight=1.):
64 | self.weight = weight
65 |
66 | def __call__(self, bbox_pred, gt_bboxes):
67 | axis_aligned_iou = AxisAlignedBboxOverlaps3D()(bbox_pred, gt_bboxes)
68 | iou_loss = - axis_aligned_iou
69 | return iou_loss * self.weight
70 |
71 | @MATCH_COST.register_module()
72 | class RDIoUCost(object):
73 |
74 | def __init__(self, weight=1.):
75 | self.weight = weight
76 |
77 | def __call__(self, bbox_pred, gt_bboxes):
78 | u, rdiou = get_rdiou(bbox_pred.unsqueeze(1), gt_bboxes.unsqueeze(0))
79 |
80 | rdiou_loss_n = rdiou - u
81 | rdiou_loss_n = torch.clamp(rdiou_loss_n,min=-1.0,max = 1.0)
82 | rdiou_loss_n = 1 - rdiou_loss_n
83 | return rdiou_loss_n * self.weight
84 |
85 | @MATCH_COST.register_module()
86 | class IoU3DCost(object):
87 |
88 | def __init__(self, weight=1.):
89 | self.weight = weight
90 |
91 | def __call__(self, bbox_pred, gt_bboxes):
92 | #iou3d = 1 - bbox_overlaps_3d(bbox_pred, gt_bboxes, coordinate='depth')
93 | #iou3d = (1 - bbox_overlaps_nearest_3d(bbox_pred, gt_bboxes, coordinate='depth') )
94 | iou3d = (1 - bbox_overlaps_nearest_3d(bbox_pred, gt_bboxes, coordinate='lidar') ) ############
95 | #iou3d += (1 - bbox_overlaps_nearest_3d(bbox_pred[:, [0,2,1,3,5,4,6]], gt_bboxes[:, [0,2,1,3,5,4,6]], coordinate='depth') ) * 0.1
96 | #iou3d += (1 - bbox_overlaps_nearest_3d(bbox_pred[:, [1,2,0,4,5,3,6]], gt_bboxes[:, [1,2,0,4,5,3,6]], coordinate='depth') ) * 0.1
97 | return iou3d * self.weight
98 |
99 |
100 | @MATCH_COST.register_module()
101 | class SoftFocalLossCost(object):
102 |
103 | def __init__(self,
104 | weight=1.,
105 | alpha=0.25,
106 | gamma=2,
107 | eps=1e-12,
108 | binary_input=False):
109 | self.weight = weight
110 | self.alpha = alpha
111 | self.gamma = gamma
112 | self.eps = eps
113 | self.binary_input = binary_input
114 |
115 |
116 | def __call__(self, cls_pred, gt_labels, iou3d):
117 |
118 | cls_pred = cls_pred.sigmoid()
119 |
120 | iou3d = iou3d.pow(0.001)
121 | neg_cost = -(1 - cls_pred * iou3d + self.eps).log() * (
122 | 1 - self.alpha) * (cls_pred * iou3d).pow(self.gamma)
123 |
124 | pos_cost = -(cls_pred * iou3d + self.eps).log() * self.alpha * (
125 | 1 - cls_pred * iou3d).pow(self.gamma)
126 |
127 | cls_cost = pos_cost[:, gt_labels] - neg_cost[:, gt_labels]
128 |
129 | return cls_cost * self.weight
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import mmdet3d
4 |
5 | __mmdet3d_version__ = float(mmdet3d.__version__[:3])
6 |
7 |
8 | def normalize_bbox(bboxes, pc_range=None):
9 |
10 | cx = bboxes[..., 0:1]
11 | cy = bboxes[..., 1:2]
12 | cz = bboxes[..., 2:3]
13 | # align coord system with previous version
14 | if __mmdet3d_version__ < 1.0:
15 | # w = bboxes[..., 3:4]
16 | # l = bboxes[..., 4:5]
17 | # h = bboxes[..., 5:6]
18 | w = bboxes[..., 3:4].log()
19 | l = bboxes[..., 4:5].log()
20 | h = bboxes[..., 5:6].log()
21 | rot = bboxes[..., 6:7]
22 | else:
23 | # l = bboxes[..., 3:4]
24 | # w = bboxes[..., 4:5]
25 | # h = bboxes[..., 5:6]
26 | l = (bboxes[..., 3:4] + 1e-5).log()
27 | w = (bboxes[..., 4:5] + 1e-5).log()
28 | h = (bboxes[..., 5:6] + 1e-5).log()
29 | rot = bboxes[..., 6:7]
30 | rot = -rot - np.pi / 2
31 |
32 | if bboxes.size(-1) > 7:
33 | vx = bboxes[..., 7:8]
34 | vy = bboxes[..., 8:9]
35 | normalized_bboxes = torch.cat(
36 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
37 | )
38 | else:
39 | normalized_bboxes = torch.cat(
40 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
41 | )
42 | return normalized_bboxes
43 |
44 | def denormalize_bbox(normalized_bboxes, pc_range=None, version=0.8):
45 | # rotation
46 | rot_sine = normalized_bboxes[..., 6:7]
47 |
48 | rot_cosine = normalized_bboxes[..., 7:8]
49 | rot = torch.atan2(rot_sine, rot_cosine)
50 |
51 | # align coord system with previous version
52 | if __mmdet3d_version__ >= 1.0:
53 | rot = -rot - np.pi / 2
54 | # center in the bev
55 | cx = normalized_bboxes[..., 0:1]
56 | cy = normalized_bboxes[..., 1:2]
57 | cz = normalized_bboxes[..., 4:5]
58 |
59 | # size
60 | w = normalized_bboxes[..., 2:3]
61 | l = normalized_bboxes[..., 3:4]
62 | h = normalized_bboxes[..., 5:6]
63 |
64 | w = w.exp()
65 | l = l.exp()
66 | h = h.exp()
67 | if normalized_bboxes.size(-1) > 8:
68 | # velocity
69 | vx = normalized_bboxes[..., 8:9]
70 | vy = normalized_bboxes[..., 9:10]
71 | if __mmdet3d_version__ < 1.0:
72 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
73 | else:
74 | denormalized_bboxes = torch.cat([cx, cy, cz, l, w, h, rot, vx, vy], dim=-1)
75 | else:
76 | if __mmdet3d_version__ < 1.0:
77 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
78 | else:
79 | denormalized_bboxes = torch.cat([cx, cy, cz, l, w, h, rot], dim=-1)
80 | return denormalized_bboxes
81 |
82 | def bbox3d_mapping_back(bboxes, rot_degree, scale_factor, flip_horizontal, flip_vertical):
83 | """Map bboxes from testing scale to original image scale.
84 |
85 | Args:
86 | bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back.
87 | scale_factor (float): Scale factor.
88 | flip_horizontal (bool): Whether to flip horizontally.
89 | flip_vertical (bool): Whether to flip vertically.
90 |
91 | Returns:
92 | :obj:`BaseInstance3DBoxes`: Boxes mapped back.
93 | """
94 | new_bboxes = bboxes.clone()
95 | if flip_horizontal:
96 | new_bboxes.flip('horizontal')
97 | if flip_vertical:
98 | new_bboxes.flip('vertical')
99 | new_bboxes.scale(1 / scale_factor)
100 | new_bboxes.rotate(-rot_degree)
101 |
102 | return new_bboxes
103 |
104 | def get_rdiou(bboxes1, bboxes2):
105 | x1u, y1u, z1u = bboxes1[:,:,0], bboxes1[:,:,1], bboxes1[:,:,2]
106 | l1, w1, h1 = torch.exp(bboxes1[:,:,3]), torch.exp(bboxes1[:,:,4]), torch.exp(bboxes1[:,:,5])
107 | t1 = torch.sin(bboxes1[:,:,6]) * torch.cos(bboxes2[:,:,6])
108 | x2u, y2u, z2u = bboxes2[:,:,0], bboxes2[:,:,1], bboxes2[:,:,2]
109 | l2, w2, h2 = torch.exp(bboxes2[:,:,3]), torch.exp(bboxes2[:,:,4]), torch.exp(bboxes2[:,:,5])
110 | t2 = torch.cos(bboxes1[:,:,6]) * torch.sin(bboxes2[:,:,6])
111 |
112 | # we emperically scale the y/z to make their predictions more sensitive.
113 | x1 = x1u
114 | y1 = y1u * 2
115 | z1 = z1u * 2
116 | x2 = x2u
117 | y2 = y2u * 2
118 | z2 = z2u * 2
119 |
120 | # clamp is necessray to aviod inf.
121 | l1, w1, h1 = torch.clamp(l1, max=10), torch.clamp(w1, max=10), torch.clamp(h1, max=10)
122 | j1, j2 = torch.ones_like(h2), torch.ones_like(h2)
123 |
124 | volume_1 = l1 * w1 * h1 * j1
125 | volume_2 = l2 * w2 * h2 * j2
126 |
127 | inter_l = torch.max(x1 - l1 / 2, x2 - l2 / 2)
128 | inter_r = torch.min(x1 + l1 / 2, x2 + l2 / 2)
129 | inter_t = torch.max(y1 - w1 / 2, y2 - w2 / 2)
130 | inter_b = torch.min(y1 + w1 / 2, y2 + w2 / 2)
131 | inter_u = torch.max(z1 - h1 / 2, z2 - h2 / 2)
132 | inter_d = torch.min(z1 + h1 / 2, z2 + h2 / 2)
133 | inter_m = torch.max(t1 - j1 / 2, t2 - j2 / 2)
134 | inter_n = torch.min(t1 + j1 / 2, t2 + j2 / 2)
135 |
136 | inter_volume = torch.clamp((inter_r - inter_l),min=0) * torch.clamp((inter_b - inter_t),min=0) \
137 | * torch.clamp((inter_d - inter_u),min=0) * torch.clamp((inter_n - inter_m),min=0)
138 |
139 | c_l = torch.min(x1 - l1 / 2,x2 - l2 / 2)
140 | c_r = torch.max(x1 + l1 / 2,x2 + l2 / 2)
141 | c_t = torch.min(y1 - w1 / 2,y2 - w2 / 2)
142 | c_b = torch.max(y1 + w1 / 2,y2 + w2 / 2)
143 | c_u = torch.min(z1 - h1 / 2,z2 - h2 / 2)
144 | c_d = torch.max(z1 + h1 / 2,z2 + h2 / 2)
145 | c_m = torch.min(t1 - j1 / 2,t2 - j2 / 2)
146 | c_n = torch.max(t1 + j1 / 2,t2 + j2 / 2)
147 |
148 | inter_diag = (x2 - x1)**2 + (y2 - y1)**2 + (z2 - z1)**2 + (t2 - t1)**2
149 | c_diag = torch.clamp((c_r - c_l),min=0)**2 + torch.clamp((c_b - c_t),min=0)**2 + torch.clamp((c_d - c_u),min=0)**2 + torch.clamp((c_n - c_m),min=0)**2
150 |
151 | union = volume_1 + volume_2 - inter_volume
152 | u = (inter_diag) / c_diag
153 | rdiou = inter_volume / union
154 | return u, rdiou
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/merge_all_augs.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 |
4 | #from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
5 | from mmdet3d.core.bbox import bbox3d2result, xywhr2xyxyr
6 | from .bbox.util import bbox3d_mapping_back
7 | from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
8 |
9 | def merge_all_aug_bboxes_3d(aug_results, img_metas, test_cfg):
10 | """Merge augmented detection 3D bboxes and scores.
11 |
12 | Args:
13 | aug_results (list[dict]): The dict of detection results.
14 | The dict contains the following keys
15 |
16 | - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
17 | - scores_3d (torch.Tensor): Detection scores.
18 | - labels_3d (torch.Tensor): Predicted box labels.
19 | img_metas (list[dict]): Meta information of each sample.
20 | test_cfg (dict): Test config.
21 |
22 | Returns:
23 | dict: Bounding boxes results in cpu mode, containing merged results.
24 |
25 | - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.
26 | - scores_3d (torch.Tensor): Merged detection scores.
27 | - labels_3d (torch.Tensor): Merged predicted box labels.
28 | """
29 |
30 | assert len(aug_results) == len(img_metas), \
31 | '"aug_results" should have the same length as "img_metas", got len(' \
32 | f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'
33 |
34 | recovered_bboxes = []
35 | recovered_scores = []
36 | recovered_labels = []
37 |
38 | for bboxes, img_info in zip(aug_results, img_metas):
39 | scale_factor = img_info[0]['pcd_scale_factor']
40 | # print(bboxes)
41 | rotate_degree = img_info[0].get('rot_degree', torch.tensor(0., device=bboxes['scores_3d'].device)) #img_info[0]['rot_degree']
42 | pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip']
43 | pcd_vertical_flip = img_info[0]['pcd_vertical_flip']
44 | # print(bboxes)
45 | recovered_scores.append(bboxes['scores_3d'])
46 | recovered_labels.append(bboxes['labels_3d'])
47 | bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], rotate_degree, scale_factor, #boxes_3d
48 | pcd_horizontal_flip, pcd_vertical_flip)
49 | recovered_bboxes.append(bboxes)
50 |
51 | aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes)
52 | aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev)
53 | aug_scores = torch.cat(recovered_scores, dim=0)
54 | aug_labels = torch.cat(recovered_labels, dim=0)
55 |
56 | # TODO: use a more elegent way to deal with nms
57 | if True: #test_cfg.use_rotate_nms:
58 | nms_func = nms_bev #nms_gpu
59 | else:
60 | nms_func = nms_normal_gpu
61 |
62 | merged_bboxes = []
63 | merged_scores = []
64 | merged_labels = []
65 |
66 | # Apply multi-class nms when merge bboxes
67 | if len(aug_labels) == 0:
68 | return bbox3d2result(aug_bboxes, aug_scores, aug_labels)
69 |
70 | for class_id in range(int(torch.max(aug_labels).item() + 1)):
71 | # print(aug_labels)
72 | class_inds = (aug_labels == class_id)
73 | bboxes_i = aug_bboxes[class_inds]
74 | bboxes_nms_i = aug_bboxes_for_nms[class_inds, :]
75 | scores_i = aug_scores[class_inds]
76 | labels_i = aug_labels[class_inds]
77 | if len(bboxes_nms_i) == 0:
78 | continue
79 | selected = nms_func(bboxes_nms_i, scores_i, 0.1) #test_cfg.nms_thr)
80 | # print('bbb', selected)
81 | merged_bboxes.append(bboxes_i[selected, :])
82 | merged_scores.append(scores_i[selected])
83 | merged_labels.append(labels_i[selected])
84 |
85 | # print(merged_bboxes)
86 | merged_bboxes = merged_bboxes[0].cat(merged_bboxes)
87 | merged_scores = torch.cat(merged_scores, dim=0)
88 | merged_labels = torch.cat(merged_labels, dim=0)
89 |
90 | _, order = merged_scores.sort(0, descending=True)
91 | num = min(500, len(aug_bboxes)) # min(test_cfg.max_num, len(aug_bboxes))
92 | order = order[:num]
93 |
94 | merged_bboxes = merged_bboxes[order]
95 | merged_scores = merged_scores[order]
96 | merged_labels = merged_labels[order]
97 |
98 | return bbox3d2result(merged_bboxes, merged_scores, merged_labels)
99 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .nuscenes_dataset import NuScenesSweepDataset
2 | from .sunrgbd_dataset_ov import SUNRGBDDataset_OV
3 |
4 | __all__ = [
5 | 'NuScenesSweepDataset', 'SUNRGBDDataset_OV'
6 | ]
7 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | from .transform_3d import (
2 | PadMultiViewImage, NormalizeMultiviewImage,
3 | PhotoMetricDistortionMultiViewImage,
4 | RandomScaleImageMultiViewImage,
5 | ImageRandomResizeCropFlip,
6 | UnifiedRandomFlip3D, UnifiedRotScaleTrans)
7 | from .loading_3d import (LoadMultiViewMultiSweepImageFromFiles, LoadMultiViewMultiSweepImageFromFilesIndoor)
8 | from .dbsampler import UnifiedDataBaseSampler
9 | from .formatting import CollectUnified3D
10 | from .test_time_aug import MultiRotScaleFlipAug3D
11 |
12 | __all__ = [
13 | 'PadMultiViewImage', 'NormalizeMultiviewImage',
14 | 'PhotoMetricDistortionMultiViewImage', 'LoadMultiViewMultiSweepImageFromFilesIndoor',
15 | 'RandomScaleImageMultiViewImage', 'ImageRandomResizeCropFlip',
16 | 'LoadMultiViewMultiSweepImageFromFiles',
17 | 'UnifiedRandomFlip3D', 'UnifiedRotScaleTrans', 'UnifiedDataBaseSampler',
18 | 'MultiRotScaleFlipAug3D'
19 | ]
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/formatting.py:
--------------------------------------------------------------------------------
1 | from mmdet.datasets.builder import PIPELINES
2 | from mmcv.parallel import DataContainer as DC
3 |
4 | @PIPELINES.register_module()
5 | class CollectUnified3D(object):
6 | """Collect data from the loader relevant to the specific task.
7 |
8 | This is usually the last stage of the data loader pipeline. Typically keys
9 | is set to some subset of "img", "proposals", "gt_bboxes",
10 | "gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
11 |
12 | The "img_meta" item is always populated. The contents of the "img_meta"
13 | dictionary depends on "meta_keys". By default this includes:
14 |
15 | - 'img_shape': shape of the image input to the network as a tuple \
16 | (h, w, c). Note that images may be zero padded on the \
17 | bottom/right if the batch tensor is larger than this shape.
18 | - 'scale_factor': a float indicating the preprocessing scale
19 | - 'flip': a boolean indicating if image flip transform was used
20 | - 'filename': path to the image file
21 | - 'ori_shape': original shape of the image as a tuple (h, w, c)
22 | - 'pad_shape': image shape after padding
23 | - 'lidar2img': transform from lidar to image
24 | - 'depth2img': transform from depth to image
25 | - 'cam2img': transform from camera to image
26 | - 'pcd_horizontal_flip': a boolean indicating if point cloud is \
27 | flipped horizontally
28 | - 'pcd_vertical_flip': a boolean indicating if point cloud is \
29 | flipped vertically
30 | - 'box_mode_3d': 3D box mode
31 | - 'box_type_3d': 3D box type
32 | - 'img_norm_cfg': a dict of normalization information:
33 | - mean: per channel mean subtraction
34 | - std: per channel std divisor
35 | - to_rgb: bool indicating if bgr was converted to rgb
36 | - 'pcd_trans': point cloud transformations
37 | - 'sample_idx': sample index
38 | - 'pcd_scale_factor': point cloud scale factor
39 | - 'pcd_rotation': rotation applied to point cloud
40 | - 'pts_filename': path to point cloud file.
41 |
42 | Args:
43 | keys (Sequence[str]): Keys of results to be collected in ``data``.
44 | meta_keys (Sequence[str], optional): Meta keys to be converted to
45 | ``mmcv.DataContainer`` and collected in ``data[img_metas]``.
46 | Default: ('filename', 'ori_shape', 'img_shape', 'lidar2img',
47 | 'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip',
48 | 'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d',
49 | 'box_type_3d', 'img_norm_cfg', 'pcd_trans',
50 | 'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename')
51 | """
52 |
53 | def __init__(self,
54 | keys,
55 | meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
56 | 'depth2img', 'cam2img', 'pad_shape',
57 | 'scale_factor', 'flip', 'pcd_horizontal_flip',
58 | 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d',
59 | 'img_norm_cfg', 'pcd_trans', 'sample_idx',
60 | 'pcd_scale_factor', 'pcd_rotation', 'pts_filename',
61 | 'transformation_3d_flow', 'sweeps_paths', 'sweeps_ids',
62 | 'sweeps_time', 'uni_rot_aug', 'uni_trans_aug', 'uni_flip_aug',
63 | 'img_rot_aug', 'img_trans_aug', 'rot_degree')):
64 | self.keys = keys
65 | self.meta_keys = meta_keys
66 |
67 | def __call__(self, results):
68 | """Call function to collect keys in results. The keys in ``meta_keys``
69 | will be converted to :obj:`mmcv.DataContainer`.
70 |
71 | Args:
72 | results (dict): Result dict contains the data to collect.
73 |
74 | Returns:
75 | dict: The result dict contains the following keys
76 | - keys in ``self.keys``
77 | - ``img_metas``
78 | """
79 | data = {}
80 | img_metas = {}
81 | for key in self.meta_keys:
82 | if key in results:
83 | img_metas[key] = results[key]
84 |
85 | data['img_metas'] = DC(img_metas, cpu_only=True)
86 | for key in self.keys:
87 | data[key] = results[key]
88 |
89 | return data
90 |
91 | def __repr__(self):
92 | """str: Return a string that describes the module."""
93 | return self.__class__.__name__ + \
94 | f'(keys={self.keys}, meta_keys={self.meta_keys})'
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/loading_3d.py:
--------------------------------------------------------------------------------
1 | from re import I
2 | import mmcv
3 | import numpy as np
4 |
5 | from mmdet.datasets.builder import PIPELINES
6 |
7 |
8 | @PIPELINES.register_module()
9 | class LoadMultiViewMultiSweepImageFromFilesIndoor(object):
10 | """Load multi channel images from a list of separate channel files.
11 |
12 | Expects results['img_filename'] to be a list of filenames.
13 |
14 | Args:
15 | to_float32 (bool): Whether to convert the img to float32.
16 | Defaults to False.
17 | color_type (str): Color type of the file. Defaults to 'unchanged'.
18 | """
19 |
20 | def __init__(self, to_float32=False, sweep_num=1, random_sweep=False, color_type='unchanged'):
21 | self.to_float32 = to_float32
22 | self.color_type = color_type
23 | self.sweep_num = sweep_num
24 | self.random_sweep = random_sweep
25 |
26 | def __call__(self, results):
27 | """Call function to load multi-view image from files.
28 |
29 | Args:
30 | results (dict): Result dict containing multi-view image filenames.
31 |
32 | Returns:
33 | dict: The result dict containing the multi-view image data. \
34 | Added keys and values are described below.
35 |
36 | - filename (str): Multi-view image filenames.
37 | - img (np.ndarray): Multi-view image arrays.
38 | - img_shape (tuple[int]): Shape of multi-view image arrays.
39 | - ori_shape (tuple[int]): Shape of original image arrays.
40 | - pad_shape (tuple[int]): Shape of padded image arrays.
41 | - scale_factor (float): Scale factor.
42 | - img_norm_cfg (dict): Normalization configuration of images.
43 | """
44 | # print(results)
45 | filename = [results['img_info']['filename']]
46 | results['filename'] = filename
47 | # img is of shape (h, w, c, num_views)
48 | img = np.stack(
49 | [mmcv.imread(name, self.color_type) for name in filename], axis=-1)
50 |
51 | if self.to_float32:
52 | img = img.astype(np.float32)
53 |
54 | # unravel to list, see `DefaultFormatBundle` in formating.py
55 | # which will transpose each image separately and then stack into array
56 | results['img'] = [img[..., i] for i in range(img.shape[-1])]
57 | # results['img'] = results['img'][0]
58 | results['img_shape'] = img.shape
59 | results['ori_shape'] = img.shape
60 | # Set initial values for default meta_keys
61 | results['pad_shape'] = img.shape
62 | results['scale_factor'] = 1.0
63 | num_channels = 1 if len(img.shape) < 3 else img.shape[2]
64 | results['img_norm_cfg'] = dict(
65 | mean=np.zeros(num_channels, dtype=np.float32),
66 | std=np.ones(num_channels, dtype=np.float32),
67 | to_rgb=False)
68 |
69 | if 'depth2img' in results:
70 | lidar2img = np.eye(4)
71 | if results['depth2img'].shape[0] == 3:
72 | lidar2img[:3, :3] = results['depth2img']
73 | else:
74 | lidar2img[:4, :4] = results['depth2img']
75 | else:
76 | lidar2img = np.eye(4)
77 | if results['lidar2img'].shape[0] == 3:
78 | lidar2img[:3, :3] = results['lidar2img']
79 | else:
80 | lidar2img[:4, :4] = results['lidar2img']
81 | results['lidar2img'] = [lidar2img]
82 |
83 | return results
84 |
85 | def __repr__(self):
86 | """str: Return a string that describes the module."""
87 | repr_str = self.__class__.__name__
88 | repr_str += f'(to_float32={self.to_float32}, '
89 | repr_str += f"color_type='{self.color_type}')"
90 | return repr_str
91 |
92 | @PIPELINES.register_module()
93 | class LoadMultiViewMultiSweepImageFromFiles(object):
94 | """Load multi channel images from a list of separate channel files.
95 |
96 | Expects results['img_filename'] to be a list of filenames.
97 |
98 | Args:
99 | to_float32 (bool): Whether to convert the img to float32.
100 | Defaults to False.
101 | color_type (str): Color type of the file. Defaults to 'unchanged'.
102 | """
103 |
104 | def __init__(self, to_float32=False, sweep_num=1, random_sweep=False, color_type='unchanged'):
105 | self.to_float32 = to_float32
106 | self.color_type = color_type
107 | self.sweep_num = sweep_num
108 | self.random_sweep = random_sweep
109 |
110 | def __call__(self, results):
111 | """Call function to load multi-view image from files.
112 |
113 | Args:
114 | results (dict): Result dict containing multi-view image filenames.
115 |
116 | Returns:
117 | dict: The result dict containing the multi-view image data. \
118 | Added keys and values are described below.
119 |
120 | - filename (str): Multi-view image filenames.
121 | - img (np.ndarray): Multi-view image arrays.
122 | - img_shape (tuple[int]): Shape of multi-view image arrays.
123 | - ori_shape (tuple[int]): Shape of original image arrays.
124 | - pad_shape (tuple[int]): Shape of padded image arrays.
125 | - scale_factor (float): Scale factor.
126 | - img_norm_cfg (dict): Normalization configuration of images.
127 | """
128 | filename = results['img_filename']
129 | results['filename'] = filename
130 | # img is of shape (h, w, c, num_views)
131 | img = np.stack(
132 | [mmcv.imread(name, self.color_type) for name in filename], axis=-1)
133 |
134 | img_sweeps = []
135 | sweeps_paths = results['cam_sweeps_paths']
136 | sweeps_ids = results['cam_sweeps_id']
137 | sweeps_time = results['cam_sweeps_time']
138 | if self.random_sweep:
139 | random_num = np.random.randint(0, self.sweep_num)
140 | sweeps_paths = [_sweep[:random_num] for _sweep in sweeps_paths]
141 | sweeps_ids = [_sweep[:random_num] for _sweep in sweeps_ids]
142 | else:
143 | random_num = self.sweep_num
144 |
145 | for _idx in range(len(sweeps_paths[0])):
146 | _sweep = np.stack(
147 | [mmcv.imread(name_list[_idx], self.color_type) for name_list in sweeps_paths], axis=-1)
148 | img_sweeps.append(_sweep)
149 |
150 | # add img sweeps to raw image
151 | img = np.stack([img, *img_sweeps], axis=-1)
152 | # img is of shape (h, w, c, num_views * sweep_num)
153 | img = img.reshape(*img.shape[:-2], -1)
154 |
155 | if self.to_float32:
156 | img = img.astype(np.float32)
157 |
158 | results['sweeps_paths'] = [[filename[_idx]] + sweeps_paths[_idx] for _idx in range(len(filename))]
159 | results['sweeps_ids'] = np.stack([[0]+_id for _id in sweeps_ids], axis=-1)
160 | results['sweeps_time'] = np.stack([[0]+_time for _time in sweeps_time], axis=-1)
161 | # unravel to list, see `DefaultFormatBundle` in formating.py
162 | # which will transpose each image separately and then stack into array
163 | results['img'] = [img[..., i] for i in range(img.shape[-1])]
164 | results['img_shape'] = img.shape
165 | results['ori_shape'] = img.shape
166 | # Set initial values for default meta_keys
167 | results['pad_shape'] = img.shape
168 | results['scale_factor'] = 1.0
169 | num_channels = 1 if len(img.shape) < 3 else img.shape[2]
170 | results['img_norm_cfg'] = dict(
171 | mean=np.zeros(num_channels, dtype=np.float32),
172 | std=np.ones(num_channels, dtype=np.float32),
173 | to_rgb=False)
174 |
175 | # add sweep matrix to raw matrix
176 | results['lidar2img'] = [np.stack([results['lidar2img'][_idx],
177 | *results['lidar2img_sweeps'][_idx][:random_num]], axis=0)
178 | for _idx in range(len(results['lidar2img']))]
179 | results['lidar2cam'] = [np.stack([results['lidar2cam'][_idx],
180 | *results['lidar2cam_sweeps'][_idx][:random_num]], axis=0)
181 | for _idx in range(len(results['lidar2cam']))]
182 | results['cam_intrinsic'] = [np.stack([results['cam_intrinsic'][_idx],
183 | *results['cam_sweeps_intrinsics'][_idx][:random_num]], axis=0)
184 | for _idx in range(len(results['cam_intrinsic']))]
185 | results.pop('lidar2img_sweeps')
186 | results.pop('lidar2cam_sweeps')
187 | results.pop('cam_sweeps_intrinsics')
188 |
189 | return results
190 |
191 | def __repr__(self):
192 | """str: Return a string that describes the module."""
193 | repr_str = self.__class__.__name__
194 | repr_str += f'(to_float32={self.to_float32}, '
195 | repr_str += f"color_type='{self.color_type}')"
196 | return repr_str
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/test_time_aug.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import mmcv
3 | import warnings
4 | from copy import deepcopy
5 |
6 | from mmdet.datasets.builder import PIPELINES
7 | from mmdet.datasets.pipelines import Compose
8 |
9 |
10 | @PIPELINES.register_module()
11 | class MultiRotScaleFlipAug3D(object):
12 | """Test-time augmentation with multiple scales and flipping.
13 |
14 | Args:
15 | transforms (list[dict]): Transforms to apply in each augmentation.
16 | img_scale (tuple | list[tuple]: Images scales for resizing.
17 | pts_scale_ratio (float | list[float]): Points scale ratios for
18 | resizing.
19 | flip (bool): Whether apply flip augmentation. Defaults to False.
20 | flip_direction (str | list[str]): Flip augmentation directions
21 | for images, options are "horizontal" and "vertical".
22 | If flip_direction is list, multiple flip augmentations will
23 | be applied. It has no effect when ``flip == False``.
24 | Defaults to "horizontal".
25 | pcd_horizontal_flip (bool): Whether apply horizontal flip augmentation
26 | to point cloud. Defaults to True. Note that it works only when
27 | 'flip' is turned on.
28 | pcd_vertical_flip (bool): Whether apply vertical flip augmentation
29 | to point cloud. Defaults to True. Note that it works only when
30 | 'flip' is turned on.
31 | """
32 |
33 | def __init__(self,
34 | transforms,
35 | img_scale,
36 | pts_scale_ratio,
37 | rotate_degree=[0.0],
38 | flip=False,
39 | flip_direction='horizontal',
40 | pcd_horizontal_flip=False,
41 | pcd_vertical_flip=False):
42 | self.transforms = Compose(transforms)
43 | self.img_scale = img_scale if isinstance(img_scale,
44 | list) else [img_scale]
45 | self.pts_scale_ratio = pts_scale_ratio \
46 | if isinstance(pts_scale_ratio, list) else[float(pts_scale_ratio)]
47 |
48 | assert mmcv.is_list_of(self.img_scale, tuple)
49 | assert mmcv.is_list_of(self.pts_scale_ratio, float)
50 |
51 | self.rotate_degree = rotate_degree
52 |
53 | self.flip = flip
54 | self.pcd_horizontal_flip = pcd_horizontal_flip
55 | self.pcd_vertical_flip = pcd_vertical_flip
56 |
57 | self.flip_direction = flip_direction if isinstance(
58 | flip_direction, list) else [flip_direction]
59 | assert mmcv.is_list_of(self.flip_direction, str)
60 | if not self.flip and self.flip_direction != ['horizontal']:
61 | warnings.warn(
62 | 'flip_direction has no effect when flip is set to False')
63 | if (self.flip and not any([(t['type'] == 'RandomFlip3D'
64 | or t['type'] == 'RandomFlip')
65 | for t in transforms])):
66 | warnings.warn(
67 | 'flip has no effect when RandomFlip is not in transforms')
68 |
69 | def __call__(self, results):
70 | """Call function to augment common fields in results.
71 |
72 | Args:
73 | results (dict): Result dict contains the data to augment.
74 |
75 | Returns:
76 | dict: The result dict contains the data that is augmented with \
77 | different scales and flips.
78 | """
79 | aug_data = []
80 |
81 | # modified from `flip_aug = [False, True] if self.flip else [False]`
82 | # to reduce unnecessary scenes when using double flip augmentation
83 | # during test time
84 | flip_aug = [True] if self.flip else [False]
85 | pcd_horizontal_flip_aug = [False, True] \
86 | if self.flip and self.pcd_horizontal_flip else [False]
87 | pcd_vertical_flip_aug = [False, True] \
88 | if self.flip and self.pcd_vertical_flip else [False]
89 | for rot_degree in self.rotate_degree:
90 | for scale in self.img_scale:
91 | for pts_scale_ratio in self.pts_scale_ratio:
92 | for flip in flip_aug:
93 | for pcd_horizontal_flip in pcd_horizontal_flip_aug:
94 | for pcd_vertical_flip in pcd_vertical_flip_aug:
95 | for direction in self.flip_direction:
96 | # results.copy will cause bug
97 | # since it is shallow copy
98 | _results = deepcopy(results)
99 | _results['rot_degree'] = rot_degree
100 | _results['scale'] = scale
101 | _results['flip'] = flip
102 | _results['pcd_scale_factor'] = \
103 | pts_scale_ratio
104 | _results['flip_direction'] = direction
105 | _results['pcd_horizontal_flip'] = \
106 | pcd_horizontal_flip
107 | _results['pcd_vertical_flip'] = \
108 | pcd_vertical_flip
109 | data = self.transforms(_results)
110 | aug_data.append(data)
111 | # list of dict to dict of list
112 | aug_data_dict = {key: [] for key in aug_data[0]}
113 | for data in aug_data:
114 | for key, val in data.items():
115 | aug_data_dict[key].append(val)
116 | return aug_data_dict
117 |
118 | def __repr__(self):
119 | """str: Return a string that describes the module."""
120 | repr_str = self.__class__.__name__
121 | repr_str += f'(transforms={self.transforms}, '
122 | repr_str += f'img_scale={self.img_scale}, flip={self.flip}, '
123 | repr_str += f'pts_scale_ratio={self.pts_scale_ratio}, '
124 | repr_str += f'flip_direction={self.flip_direction})'
125 | return repr_str
126 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/sunrgbd_dataset_ov.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import mmcv
3 | import numpy as np
4 | import pyquaternion
5 | import tempfile
6 | from nuscenes.utils.data_classes import Box as NuScenesBox
7 | from os import path as osp
8 |
9 | from ..core.indoor_eval import indoor_eval_ov
10 |
11 | import mmdet3d
12 | #from mmdet.datasets import DATASETS
13 | from mmdet3d.datasets import DATASETS
14 | from mmdet3d.core import show_result
15 | from mmdet3d.core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes
16 | from mmdet3d.datasets import SUNRGBDDataset
17 |
18 | __mmdet3d_version__ = float(mmdet3d.__version__[:3])
19 |
20 | @DATASETS.register_module()
21 | class SUNRGBDDataset_OV(SUNRGBDDataset):
22 |
23 | def __init__(self,
24 | data_root,
25 | ann_file,
26 | pipeline=None,
27 | classes=None,
28 | seen_classes=None,
29 | modality=dict(use_camera=True, use_lidar=True),
30 | box_type_3d='Depth',
31 | filter_empty_gt=True,
32 | test_mode=False,
33 | **kwargs):
34 | super().__init__(
35 | data_root=data_root,
36 | ann_file=ann_file,
37 | pipeline=pipeline,
38 | classes=classes,
39 | modality=modality,
40 | box_type_3d=box_type_3d,
41 | filter_empty_gt=filter_empty_gt,
42 | test_mode=test_mode,
43 | **kwargs)
44 |
45 | self.seen_classes = seen_classes
46 | self.classes = seen_classes
47 |
48 | def evaluate(self,
49 | results,
50 | metric=None,
51 | iou_thr=(0.25, 0.5),
52 | iou_thr_2d=(0.25, 0.5),
53 | logger=None,
54 | show=False,
55 | out_dir=None,
56 | pipeline=None,
57 | axis_aligned_lw=False):
58 | """Evaluate.
59 |
60 | Evaluation in indoor protocol.
61 |
62 | Args:
63 | results (list[dict]): List of results.
64 | metric (str | list[str], optional): Metrics to be evaluated.
65 | Default: None.
66 | iou_thr (list[float], optional): AP IoU thresholds for 3D
67 | evaluation. Default: (0.25, 0.5).
68 | iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
69 | evaluation. Default: (0.5, ).
70 | show (bool, optional): Whether to visualize.
71 | Default: False.
72 | out_dir (str, optional): Path to save the visualization results.
73 | Default: None.
74 | pipeline (list[dict], optional): raw data loading for showing.
75 | Default: None.
76 |
77 | Returns:
78 | dict: Evaluation results.
79 | """
80 | assert isinstance(
81 | results, list), f'Expect results to be list, got {type(results)}.'
82 | assert len(results) > 0, 'Expect length of results > 0.'
83 | assert len(results) == len(self.data_infos)
84 | assert isinstance(
85 | results[0], dict
86 | ), f'Expect elements in results to be dict, got {type(results[0])}.'
87 | gt_annos = [info['annos'] for info in self.data_infos]
88 | label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
89 | ret_dict = indoor_eval_ov(
90 | self.seen_classes,
91 | gt_annos,
92 | results,
93 | iou_thr,
94 | label2cat,
95 | logger=logger,
96 | box_type_3d=self.box_type_3d,
97 | box_mode_3d=self.box_mode_3d,
98 | axis_aligned_lw=axis_aligned_lw)
99 | if show:
100 | self.show(results, out_dir, pipeline=pipeline)
101 |
102 | return ret_dict
103 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vovnet import VoVNet
2 | from .second_3d import SECOND3D
3 |
4 | __all__ = ['VoVNet', 'SECOND3D']
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/second_3d.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from symbol import import_from
3 | import warnings
4 | from mmcv.cnn import build_conv_layer, build_norm_layer
5 | from mmcv.runner import BaseModule
6 | from torch import nn as nn
7 |
8 | from mmdet.models import BACKBONES
9 |
10 |
11 | @BACKBONES.register_module()
12 | class SECOND3D(BaseModule):
13 | """Modified Backbone network for SECOND.
14 |
15 | Args:
16 | in_channels (int): Input channels.
17 | out_channels (list[int]): Output channels for multi-scale feature maps.
18 | layer_nums (list[int]): Number of layers in each stage.
19 | layer_strides (list[int]): Strides of each stage.
20 | norm_cfg (dict): Config dict of normalization layers.
21 | conv_cfg (dict): Config dict of convolutional layers.
22 | """
23 |
24 | def __init__(self,
25 | in_channels=128,
26 | out_channels=[128, 128, 256],
27 | layer_nums=[3, 5, 5],
28 | layer_strides=[2, 2, 2],
29 | is_cascade=True,
30 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
31 | conv_cfg=dict(type='Conv3d', bias=False),
32 | init_cfg=None,
33 | pretrained=None):
34 | super(SECOND3D, self).__init__(init_cfg=init_cfg)
35 | assert len(layer_strides) == len(layer_nums)
36 | assert len(out_channels) == len(layer_nums)
37 |
38 | if isinstance(in_channels, list):
39 | in_filters = in_channels
40 | else:
41 | in_filters = [in_channels, *out_channels[:-1]]
42 | # note that when stride > 1, conv2d with same padding isn't
43 | # equal to pad-conv2d. we should use pad-conv2d.
44 | blocks = []
45 | self.is_cascade = is_cascade
46 | self.kernel_type = conv_cfg.type
47 | if "kernel" in conv_cfg:
48 | kernel = conv_cfg.pop("kernel")
49 | else:
50 | kernel = (1,3,3)
51 | padding = tuple([(_kernel-1)//2 for _kernel in kernel])
52 | for i, layer_num in enumerate(layer_nums):
53 | block = [
54 | build_conv_layer(
55 | conv_cfg,
56 | in_filters[i],
57 | out_channels[i],
58 | kernel,
59 | stride=(1,layer_strides[i],layer_strides[i]) if len(padding)==3 else (layer_strides[i],layer_strides[i]),
60 | padding=padding),
61 | build_norm_layer(norm_cfg, out_channels[i])[1],
62 | nn.ReLU(inplace=True),
63 | ]
64 | for j in range(layer_num):
65 | block.append(
66 | build_conv_layer(
67 | conv_cfg,
68 | out_channels[i],
69 | out_channels[i],
70 | kernel,
71 | padding=padding))
72 | block.append(build_norm_layer(norm_cfg, out_channels[i])[1])
73 | block.append(nn.ReLU(inplace=True))
74 |
75 | block = nn.Sequential(*block)
76 | blocks.append(block)
77 |
78 | self.blocks = nn.ModuleList(blocks)
79 |
80 | assert not (init_cfg and pretrained), \
81 | 'init_cfg and pretrained cannot be setting at the same time'
82 | if isinstance(pretrained, str):
83 | warnings.warn('DeprecationWarning: pretrained is a deprecated, '
84 | 'please use "init_cfg" instead')
85 | self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
86 | else:
87 | self.init_cfg = dict(type='Kaiming', layer=self.kernel_type)
88 |
89 | def forward(self, x):
90 | """Forward function.
91 |
92 | Args:
93 | x (torch.Tensor): Input with shape (N, C, H, W).
94 |
95 | Returns:
96 | tuple[torch.Tensor]: Multi-scale features.
97 | """
98 | outs = []
99 | batch = x.shape[0]
100 | if self.kernel_type == "Conv2d":
101 | x = x.transpose(1,2).flatten(0,1)
102 |
103 | for i in range(len(self.blocks)):
104 | if self.is_cascade:
105 | x = self.blocks[i](x)
106 | outs.append(x)
107 | else:
108 | out = self.blocks[i](x)
109 | outs.append(out)
110 |
111 | if self.kernel_type == "Conv2d":
112 | outs = [_out.reshape(batch, -1, *_out.shape[-3:]).transpose(1,2) for _out in outs]
113 |
114 | return tuple(outs)
115 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .uni3detr_head import Uni3DETRHead
2 | from .uni3detr_head_clip import Uni3DETRHeadCLIP
3 |
4 | __all__ = ['Uni3DETRHead', 'Uni3DETRHeadCLIP']
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .uni3detr import Uni3DETR
2 | from .ov_uni3detr import OV_Uni3DETR
3 |
4 | __all__ = ['Uni3DETR', 'OV_Uni3DETR']
5 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .rdiouloss import RDIoULoss, IoU3DLoss, SoftFocalLoss
2 |
3 | __all__ = ['RDIoULoss', 'IoU3DLoss', 'SoftFocalLoss']
4 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/rdiouloss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | from projects.mmdet3d_plugin.core.bbox.util import get_rdiou
4 | from torch import nn as nn
5 | import torch.nn.functional as F
6 |
7 | from mmdet.models.losses.utils import weighted_loss
8 | from mmdet.models.losses.utils import weight_reduce_loss
9 | from mmdet.models import LOSSES
10 | from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d, bbox_overlaps_nearest_3d
11 |
12 | @weighted_loss
13 | def rd_iou_loss(pred, target):
14 | """Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.
15 | Note that predictions and targets are one-to-one corresponded.
16 |
17 | Args:
18 | pred (torch.Tensor): Bbox predictions with shape [N, 7]
19 | (x, y, z, w, l, h, alpha).
20 | target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
21 | (x, y, z, w, l, h, alpha).
22 |
23 | Returns:
24 | torch.Tensor: IoU loss between predictions and targets.
25 | """
26 | u, rdiou = get_rdiou(pred.unsqueeze(0), target.unsqueeze(0))
27 | u, rdiou = u[0], rdiou[0]
28 |
29 | rdiou_loss_n = rdiou - u
30 | rdiou_loss_n = torch.clamp(rdiou_loss_n,min=-1.0,max = 1.0)
31 | rdiou_loss_n = 1 - rdiou_loss_n
32 | return rdiou_loss_n
33 |
34 |
35 | @LOSSES.register_module()
36 | class RDIoULoss(nn.Module):
37 | """Calculate the IoU loss (1-IoU) of rotated bounding boxes.
38 |
39 | Args:
40 | reduction (str): Method to reduce losses.
41 | The valid reduction method are none, sum or mean.
42 | loss_weight (float, optional): Weight of loss. Defaults to 1.0.
43 | """
44 |
45 | def __init__(self, reduction='mean', loss_weight=1.0):
46 | super().__init__()
47 | self.reduction = reduction
48 | self.loss_weight = loss_weight
49 |
50 | def forward(self,
51 | pred,
52 | target,
53 | weight=None,
54 | avg_factor=None,
55 | reduction_override=None,
56 | **kwargs):
57 | """Forward function of loss calculation.
58 |
59 | Args:
60 | pred (torch.Tensor): Bbox predictions with shape [..., 7]
61 | (x, y, z, w, l, h, alpha).
62 | target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
63 | (x, y, z, w, l, h, alpha).
64 | weight (torch.Tensor | float, optional): Weight of loss.
65 | Defaults to None.
66 | avg_factor (int, optional): Average factor that is used to average
67 | the loss. Defaults to None.
68 | reduction_override (str, optional): Method to reduce losses.
69 | The valid reduction method are 'none', 'sum' or 'mean'.
70 | Defaults to None.
71 |
72 | Returns:
73 | torch.Tensor: IoU loss between predictions and targets.
74 | """
75 | if weight is not None and not torch.any(weight > 0):
76 | return pred.sum() * weight.sum() # 0
77 | assert reduction_override in (None, 'none', 'mean', 'sum')
78 | reduction = (
79 | reduction_override if reduction_override else self.reduction)
80 | if weight is not None and weight.dim() > 1:
81 | weight = weight.mean(-1)
82 | loss = self.loss_weight * rd_iou_loss(
83 | pred,
84 | target,
85 | weight,
86 | reduction=reduction,
87 | avg_factor=avg_factor,
88 | **kwargs)
89 |
90 | return loss
91 |
92 |
93 | @weighted_loss
94 | def iou3d_loss(pred, target):
95 | #iou3d = bbox_overlaps_3d(pred, target, coordinate='depth')
96 | #iou3d = 1 - torch.diag(iou3d)
97 |
98 | #iou3d = (1 - bbox_overlaps_nearest_3d(pred, target, is_aligned=True, coordinate='depth') )
99 | iou3d = (1 - bbox_overlaps_nearest_3d(pred, target, is_aligned=True, coordinate='lidar') )
100 | #iou3d += (1 - bbox_overlaps_nearest_3d(pred[:, [0,2,1,3,5,4,6]], target[:, [0,2,1,3,5,4,6]], is_aligned=True, coordinate='depth') ) * 0.1
101 | #iou3d += (1 - bbox_overlaps_nearest_3d(pred[:, [1,2,0,4,5,3,6]], target[:, [1,2,0,4,5,3,6]], is_aligned=True, coordinate='depth') ) * 0.1
102 | return iou3d
103 |
104 |
105 | @LOSSES.register_module()
106 | class IoU3DLoss(nn.Module):
107 | """Calculate the IoU loss (1-IoU) of rotated bounding boxes.
108 |
109 | Args:
110 | reduction (str): Method to reduce losses.
111 | The valid reduction method are none, sum or mean.
112 | loss_weight (float, optional): Weight of loss. Defaults to 1.0.
113 | """
114 |
115 | def __init__(self, reduction='mean', loss_weight=1.0):
116 | super().__init__()
117 | self.reduction = reduction
118 | self.loss_weight = loss_weight
119 |
120 | def forward(self,
121 | pred,
122 | target,
123 | weight=None,
124 | avg_factor=None,
125 | reduction_override=None,
126 | **kwargs):
127 | """Forward function of loss calculation.
128 |
129 | Args:
130 | pred (torch.Tensor): Bbox predictions with shape [..., 7]
131 | (x, y, z, w, l, h, alpha).
132 | target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
133 | (x, y, z, w, l, h, alpha).
134 | weight (torch.Tensor | float, optional): Weight of loss.
135 | Defaults to None.
136 | avg_factor (int, optional): Average factor that is used to average
137 | the loss. Defaults to None.
138 | reduction_override (str, optional): Method to reduce losses.
139 | The valid reduction method are 'none', 'sum' or 'mean'.
140 | Defaults to None.
141 |
142 | Returns:
143 | torch.Tensor: IoU loss between predictions and targets.
144 | """
145 | if weight is not None and not torch.any(weight > 0):
146 | return pred.sum() * weight.sum() # 0
147 | assert reduction_override in (None, 'none', 'mean', 'sum')
148 | reduction = (
149 | reduction_override if reduction_override else self.reduction)
150 | if weight is not None and weight.dim() > 1:
151 | weight = weight.mean(-1)
152 | loss = self.loss_weight * iou3d_loss(
153 | pred,
154 | target,
155 | weight,
156 | reduction=reduction,
157 | avg_factor=avg_factor,
158 | **kwargs)
159 |
160 | return loss
161 |
162 | def soft_focal_loss(pred,
163 | target,
164 | weight=None,
165 | gamma=2.0,
166 | alpha=0.25,
167 | reduction='mean',
168 | avg_factor=None):
169 | pred_sigmoid = pred.sigmoid()
170 |
171 | target, target_score = target[0], target[1]
172 | target_oh = torch.zeros((pred_sigmoid.shape[0], pred.shape[1] + 1)).type_as(pred).to(pred.device)
173 | target_oh.scatter_(1, target[:,None], 1)
174 | target_oh = target_oh[:,0:-1]
175 | target = target[:,None]
176 |
177 | target_soft = (target_oh > 0).float() * target_score[:,None]
178 | pt = target_soft - pred_sigmoid
179 | focal_weight = ((1 - alpha) + (2*alpha - 1) * target_soft) * pt.pow(gamma)
180 | loss = F.binary_cross_entropy_with_logits(pred, target_soft, reduction='none') * focal_weight
181 |
182 | weight = weight.view(-1,1)
183 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
184 | return loss
185 |
186 | @LOSSES.register_module()
187 | class SoftFocalLoss(nn.Module):
188 |
189 | def __init__(self,
190 | use_sigmoid=True,
191 | gamma=2.0,
192 | alpha=0.25,
193 | reduction='mean',
194 | loss_weight=1.0):
195 | super(SoftFocalLoss, self).__init__()
196 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
197 | self.use_sigmoid = use_sigmoid
198 | self.gamma = gamma
199 | self.alpha = alpha
200 | self.reduction = reduction
201 | self.loss_weight = loss_weight
202 |
203 | def forward(self,
204 | pred,
205 | target,
206 | weight=None,
207 | avg_factor=None,
208 | reduction_override=None):
209 | assert reduction_override in (None, 'none', 'mean', 'sum')
210 | reduction = (
211 | reduction_override if reduction_override else self.reduction)
212 | if self.use_sigmoid:
213 | loss_cls = self.loss_weight * soft_focal_loss(
214 | pred,
215 | target,
216 | weight,
217 | gamma=self.gamma,
218 | alpha=self.alpha,
219 | reduction=reduction,
220 | avg_factor=avg_factor)
221 | else:
222 | raise NotImplementedError
223 | return loss_cls
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .second3d_fpn import SECOND3DFPN
3 |
4 | __all__ = ['SECOND3DFPN']
5 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/necks/second3d_fpn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import numpy as np
3 | import torch
4 | from mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer
5 | from mmcv.runner import BaseModule, auto_fp16
6 | from torch import nn as nn
7 |
8 | from mmdet.models import NECKS
9 |
10 |
11 | @NECKS.register_module()
12 | class SECOND3DFPN(BaseModule):
13 | """Modified FPN used in SECOND.
14 |
15 | Args:
16 | in_channels (list[int]): Input channels of multi-scale feature maps.
17 | out_channels (list[int]): Output channels of feature maps.
18 | upsample_strides (list[int]): Strides used to upsample the
19 | feature maps.
20 | norm_cfg (dict): Config dict of normalization layers.
21 | upsample_cfg (dict): Config dict of upsample layers.
22 | conv_cfg (dict): Config dict of conv layers.
23 | use_conv_for_no_stride (bool): Whether to use conv when stride is 1.
24 | use_for_distill (bool): Whether to use for cross-modality distillation.
25 | """
26 |
27 | def __init__(self,
28 | in_channels=[128, 128, 256],
29 | out_channels=[256, 256, 256],
30 | upsample_strides=[1, 2, 4],
31 | norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
32 | upsample_cfg=dict(type='deconv3d', bias=False),
33 | conv_cfg=dict(type='Conv3d', bias=False),
34 | extra_conv=None,
35 | use_conv_for_no_stride=False,
36 | use_for_distill=False,
37 | init_cfg=None):
38 | # if for GroupNorm,
39 | # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)
40 | super(SECOND3DFPN, self).__init__(init_cfg=init_cfg)
41 | assert len(out_channels) == len(upsample_strides) == len(in_channels)
42 | self.in_channels = in_channels
43 | self.out_channels = out_channels
44 | self.extra_conv = extra_conv
45 | self.fp16_enabled = False
46 | self.use_for_distill = use_for_distill
47 |
48 | deblocks = []
49 | for i, out_channel in enumerate(out_channels):
50 | stride = upsample_strides[i]
51 | if stride > 1 or (stride == 1 and not use_conv_for_no_stride):
52 | upsample_layer = build_upsample_layer(
53 | upsample_cfg,
54 | in_channels=in_channels[i],
55 | out_channels=out_channel,
56 | kernel_size=(1,stride,stride) if '3d' in upsample_cfg['type'] else (stride,stride),
57 | stride=(1,stride,stride) if '3d' in upsample_cfg['type'] else (stride,stride))
58 | else:
59 | stride = np.round(1 / stride).astype(np.int64)
60 | upsample_layer = build_conv_layer(
61 | conv_cfg,
62 | in_channels=in_channels[i],
63 | out_channels=out_channel,
64 | kernel_size=(1,stride,stride) if '3d' in conv_cfg['type'] else (stride,stride),
65 | stride=(1,stride,stride) if '3d' in conv_cfg['type'] else (stride,stride))
66 |
67 | deblock = nn.Sequential(upsample_layer,
68 | build_norm_layer(norm_cfg, out_channel)[1],
69 | nn.ReLU(inplace=True))
70 | deblocks.append(deblock)
71 | self.deblocks = nn.ModuleList(deblocks)
72 |
73 | if self.extra_conv is not None:
74 | extra_blocks = []
75 | self.layer_num = self.extra_conv.pop('num_conv')
76 | if "kernel" in self.extra_conv:
77 | kernel = self.extra_conv.pop("kernel")
78 | else:
79 | kernel = (3,3,3)
80 | padding = tuple([(_k-1)//2 for _k in kernel])
81 | if "sep_kernel" in self.extra_conv:
82 | sep_kernel = self.extra_conv.pop("sep_kernel")
83 | sep_padding = tuple([(_k-1)//2 for _k in sep_kernel])
84 | else:
85 | sep_kernel = None
86 | for j in range(self.layer_num):
87 | extra_blocks.append(
88 | build_conv_layer(
89 | self.extra_conv,
90 | out_channels[-1],
91 | out_channels[-1],
92 | kernel,
93 | padding=padding))
94 | if sep_kernel:
95 | extra_blocks.append(
96 | build_conv_layer(
97 | self.extra_conv,
98 | out_channels[-1],
99 | out_channels[-1],
100 | sep_kernel,
101 | padding=sep_padding))
102 | extra_blocks.append(build_norm_layer(norm_cfg, out_channels[-1])[1])
103 | extra_blocks.append(nn.ReLU(inplace=True))
104 | self.extra_blocks = nn.Sequential(*extra_blocks)
105 |
106 | if init_cfg is None:
107 | self.init_cfg = [
108 | dict(type='Kaiming', layer='ConvTranspose2d'),
109 | dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)
110 | ]
111 |
112 | @auto_fp16()
113 | def forward(self, x):
114 | """Forward function.
115 |
116 | Args:
117 | x (torch.Tensor): 4D Tensor in (N, C, H, W) shape.
118 |
119 | Returns:
120 | list[torch.Tensor]: Multi-level feature maps.
121 | """
122 | assert len(x) == len(self.in_channels)
123 | ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)]
124 |
125 | if len(ups) > 1:
126 | out = sum(ups)
127 | else:
128 | out = ups[0]
129 |
130 | if self.extra_conv is not None:
131 | if self.use_for_distill:
132 | out_final = out
133 | before_relu_list = []
134 | for _idx in range(self.layer_num):
135 | out_mid = self.extra_blocks[_idx*3:(_idx+1)*3-1](out_final)
136 | out_before_relu = out_mid.clone()
137 | out_final = self.extra_blocks[(_idx+1)*3-1](out_mid)
138 | before_relu_list.append(out_before_relu)
139 |
140 | out = {'final':out_final, 'before_relu':before_relu_list}
141 | else:
142 | out = self.extra_blocks(out)
143 | return out
144 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/pts_encoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .sparse_encoder_hd import SparseEncoderHD
2 |
3 | __all__ = ['SparseEncoderHD']
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/pts_encoder/sparse_encoder_hd.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmcv.runner import auto_fp16
3 | from torch import nn as nn
4 |
5 | from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule
6 | from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
7 | from mmdet3d.models.builder import MIDDLE_ENCODERS
8 |
9 | if IS_SPCONV2_AVAILABLE:
10 | from spconv.pytorch import SparseConvTensor, SparseSequential
11 | else:
12 | from mmcv.ops import SparseConvTensor, SparseSequential
13 |
14 | @MIDDLE_ENCODERS.register_module()
15 | class SparseEncoderHD(nn.Module):
16 | r"""Sparse encoder for SECOND and Part-A2.
17 |
18 | Args:
19 | in_channels (int): The number of input channels.
20 | sparse_shape (list[int]): The sparse shape of input tensor.
21 | order (list[str]): Order of conv module. Defaults to ('conv',
22 | 'norm', 'act').
23 | norm_cfg (dict): Config of normalization layer. Defaults to
24 | dict(type='BN1d', eps=1e-3, momentum=0.01).
25 | base_channels (int): Out channels for conv_input layer.
26 | Defaults to 16.
27 | output_channels (int): Out channels for conv_out layer.
28 | Defaults to 128.
29 | encoder_channels (tuple[tuple[int]]):
30 | Convolutional channels of each encode block.
31 | encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.
32 | Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
33 | block_type (str): Type of the block to use. Defaults to 'conv_module'.
34 | """
35 |
36 | def __init__(self,
37 | in_channels,
38 | sparse_shape,
39 | order=('conv', 'norm', 'act'),
40 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
41 | base_channels=16,
42 | output_channels=128,
43 | encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
44 | 64)),
45 | encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
46 | 1)),
47 | encoder_strides=(2, 2, 2, 1),
48 | block_type='conv_module',
49 | keep_depth=True,
50 | fp16_enabled=False):
51 | super().__init__()
52 | assert block_type in ['conv_module', 'basicblock']
53 | self.sparse_shape = sparse_shape
54 | self.in_channels = in_channels
55 | self.order = order
56 | self.base_channels = base_channels
57 | self.output_channels = output_channels
58 | self.encoder_channels = encoder_channels
59 | self.encoder_paddings = encoder_paddings
60 | self.encoder_strides = encoder_strides
61 | self.stage_num = len(self.encoder_channels)
62 | self.keep_depth = keep_depth
63 | if fp16_enabled:
64 | self.fp16_enabled = fp16_enabled
65 | # Spconv init all weight on its own
66 |
67 | assert isinstance(order, tuple) and len(order) == 3
68 | assert set(order) == {'conv', 'norm', 'act'}
69 |
70 | if self.order[0] != 'conv': # pre activate
71 | self.conv_input = make_sparse_convmodule(
72 | in_channels,
73 | self.base_channels,
74 | 3,
75 | norm_cfg=norm_cfg,
76 | padding=1,
77 | indice_key='subm1',
78 | conv_type='SubMConv3d',
79 | order=('conv', ))
80 | else: # post activate
81 | self.conv_input = make_sparse_convmodule(
82 | in_channels,
83 | self.base_channels,
84 | 3,
85 | norm_cfg=norm_cfg,
86 | padding=1,
87 | indice_key='subm1',
88 | conv_type='SubMConv3d')
89 |
90 | encoder_out_channels = self.make_encoder_layers(
91 | make_sparse_convmodule,
92 | norm_cfg,
93 | self.base_channels,
94 | block_type=block_type)
95 |
96 | self.conv_out = make_sparse_convmodule(
97 | encoder_out_channels,
98 | self.output_channels,
99 | kernel_size=(1, 1, 1),
100 | stride=(1, 1, 1),
101 | norm_cfg=norm_cfg,
102 | padding=0,
103 | indice_key='spconv_down2',
104 | conv_type='SparseConv3d')
105 |
106 | @auto_fp16(apply_to=('voxel_features', ))
107 | def forward(self, voxel_features, coors, batch_size):
108 | """Forward of SparseEncoder.
109 |
110 | Args:
111 | voxel_features (torch.float32): Voxel features in shape (N, C).
112 | coors (torch.int32): Coordinates in shape (N, 4), \
113 | the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
114 | batch_size (int): Batch size.
115 |
116 | Returns:
117 | dict: Backbone features.
118 | """
119 | coors = coors.int()
120 | input_sp_tensor = SparseConvTensor(voxel_features, coors,
121 | self.sparse_shape,
122 | batch_size)
123 | x = self.conv_input(input_sp_tensor)
124 |
125 | encode_features = []
126 | for encoder_layer in self.encoder_layers:
127 | x = encoder_layer(x)
128 | encode_features.append(x)
129 |
130 | # for detection head
131 | # [200, 176, 5] -> [200, 176, 5]
132 | out = self.conv_out(encode_features[-1])
133 | spatial_features = out.dense()
134 |
135 | if not self.keep_depth:
136 | spatial_features = spatial_features.sum(dim=2)
137 |
138 | return spatial_features
139 |
140 | def make_encoder_layers(self,
141 | make_block,
142 | norm_cfg,
143 | in_channels,
144 | block_type='conv_module',
145 | conv_cfg=dict(type='SubMConv3d')):
146 | """make encoder layers using sparse convs.
147 |
148 | Args:
149 | make_block (method): A bounded function to build blocks.
150 | norm_cfg (dict[str]): Config of normalization layer.
151 | in_channels (int): The number of encoder input channels.
152 | block_type (str): Type of the block to use. Defaults to
153 | 'conv_module'.
154 | conv_cfg (dict): Config of conv layer. Defaults to
155 | dict(type='SubMConv3d').
156 |
157 | Returns:
158 | int: The number of encoder output channels.
159 | """
160 | assert block_type in ['conv_module', 'basicblock']
161 | self.encoder_layers = SparseSequential()
162 |
163 | for i, blocks in enumerate(self.encoder_channels):
164 | blocks_list = []
165 | for j, out_channels in enumerate(tuple(blocks)):
166 | padding = tuple(self.encoder_paddings[i])[j]
167 | # each stage started with a spconv layer
168 | # except the first stage
169 | if i != 0 and j == 0 and block_type == 'conv_module':
170 | blocks_list.append(
171 | make_block(
172 | in_channels,
173 | out_channels,
174 | 3,
175 | norm_cfg=norm_cfg,
176 | stride=self.encoder_strides[i],
177 | padding=padding,
178 | indice_key=f'spconv{i + 1}',
179 | conv_type='SparseConv3d'))
180 | elif block_type == 'basicblock':
181 | if j == len(blocks) - 1 and i != len(
182 | self.encoder_channels) - 1:
183 | blocks_list.append(
184 | make_block(
185 | in_channels,
186 | out_channels,
187 | 3,
188 | norm_cfg=norm_cfg,
189 | stride=self.encoder_strides[i],
190 | padding=padding,
191 | indice_key=f'spconv{i + 1}',
192 | conv_type='SparseConv3d'))
193 | else:
194 | blocks_list.append(
195 | SparseBasicBlock(
196 | out_channels,
197 | out_channels,
198 | norm_cfg=norm_cfg,
199 | conv_cfg=conv_cfg))
200 | else:
201 | blocks_list.append(
202 | make_block(
203 | in_channels,
204 | out_channels,
205 | 3,
206 | norm_cfg=norm_cfg,
207 | padding=padding,
208 | indice_key=f'subm{i + 1}',
209 | conv_type='SubMConv3d'))
210 | in_channels = out_channels
211 | stage_name = f'encoder_layer{i + 1}'
212 | stage_layers = SparseSequential(*blocks_list)
213 | self.encoder_layers.add_module(stage_name, stage_layers)
214 | return out_channels
215 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .uni3detr_transformer import Uni3DETRTransformer, Uni3DETRTransformerDecoder, UniCrossAtten
2 | from .uni3d_viewtrans import Uni3DViewTrans
3 |
4 | __all__ = ['Uni3DETRTransformer', 'Uni3DETRTransformerDecoder', 'UniCrossAtten', 'Uni3DViewTrans']
5 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/grid_mask.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from PIL import Image
5 |
6 | class Grid(object):
7 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
8 | self.use_h = use_h
9 | self.use_w = use_w
10 | self.rotate = rotate
11 | self.offset = offset
12 | self.ratio = ratio
13 | self.mode=mode
14 | self.st_prob = prob
15 | self.prob = prob
16 |
17 | def set_prob(self, epoch, max_epoch):
18 | self.prob = self.st_prob * epoch / max_epoch
19 |
20 | def __call__(self, img, label):
21 | if np.random.rand() > self.prob:
22 | return img, label
23 | h = img.size(1)
24 | w = img.size(2)
25 | self.d1 = 2
26 | self.d2 = min(h, w)
27 | hh = int(1.5*h)
28 | ww = int(1.5*w)
29 | d = np.random.randint(self.d1, self.d2)
30 | if self.ratio == 1:
31 | self.l = np.random.randint(1, d)
32 | else:
33 | self.l = min(max(int(d*self.ratio+0.5),1),d-1)
34 | mask = np.ones((hh, ww), np.float32)
35 | st_h = np.random.randint(d)
36 | st_w = np.random.randint(d)
37 | if self.use_h:
38 | for i in range(hh//d):
39 | s = d*i + st_h
40 | t = min(s+self.l, hh)
41 | mask[s:t,:] *= 0
42 | if self.use_w:
43 | for i in range(ww//d):
44 | s = d*i + st_w
45 | t = min(s+self.l, ww)
46 | mask[:,s:t] *= 0
47 |
48 | r = np.random.randint(self.rotate)
49 | mask = Image.fromarray(np.uint8(mask))
50 | mask = mask.rotate(r)
51 | mask = np.asarray(mask)
52 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
53 |
54 | mask = torch.from_numpy(mask).float()
55 | if self.mode == 1:
56 | mask = 1-mask
57 |
58 | mask = mask.expand_as(img)
59 | if self.offset:
60 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
61 | offset = (1 - mask) * offset
62 | img = img * mask + offset
63 | else:
64 | img = img * mask
65 |
66 | return img, label
67 |
68 |
69 | class GridMask(nn.Module):
70 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
71 | super(GridMask, self).__init__()
72 | self.use_h = use_h
73 | self.use_w = use_w
74 | self.rotate = rotate
75 | self.offset = offset
76 | self.ratio = ratio
77 | self.mode = mode
78 | self.st_prob = prob
79 | self.prob = prob
80 |
81 | def set_prob(self, epoch, max_epoch):
82 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
83 |
84 | def forward(self, x):
85 | if np.random.rand() > self.prob or not self.training:
86 | return x
87 | n,c,h,w = x.size()
88 | x = x.view(-1,h,w)
89 | hh = int(1.5*h)
90 | ww = int(1.5*w)
91 | d = np.random.randint(2, h)
92 | self.l = min(max(int(d*self.ratio+0.5),1),d-1)
93 | mask = np.ones((hh, ww), np.float32)
94 | st_h = np.random.randint(d)
95 | st_w = np.random.randint(d)
96 | if self.use_h:
97 | for i in range(hh//d):
98 | s = d*i + st_h
99 | t = min(s+self.l, hh)
100 | mask[s:t,:] *= 0
101 | if self.use_w:
102 | for i in range(ww//d):
103 | s = d*i + st_w
104 | t = min(s+self.l, ww)
105 | mask[:,s:t] *= 0
106 |
107 | r = np.random.randint(self.rotate)
108 | mask = Image.fromarray(np.uint8(mask))
109 | mask = mask.rotate(r)
110 | mask = np.asarray(mask)
111 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
112 |
113 | mask = torch.from_numpy(mask).float().cuda()
114 | if self.mode == 1:
115 | mask = 1-mask
116 | mask = mask.expand_as(x)
117 | if self.offset:
118 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float().cuda()
119 | x = x * mask + offset * (1 - mask)
120 | else:
121 | x = x * mask
122 |
123 | return x.view(n,c,h,w)
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/optional.txt
3 | -r requirements/runtime.txt
4 | -r requirements/tests.txt
5 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [yapf]
2 | BASED_ON_STYLE = pep8
3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
5 |
6 | [isort]
7 | line_length = 79
8 | multi_line_output = 0
9 | extra_standard_library = setuptools
10 | known_first_party = mmdet,mmseg,mmdet3d
11 | known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,pytorch_sphinx_theme,recommonmark,requests,scannet_utils,scipy,seaborn,shapely,skimage,sphinx,tensorflow,terminaltables,torch,trimesh,ts,waymo_open_dataset
12 | no_lines_before = STDLIB,LOCALFOLDER
13 | default_section = THIRDPARTY
14 |
15 | [codespell]
16 | ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD,warmup
17 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import platform
3 | import shutil
4 | import sys
5 | import warnings
6 | from os import path as osp
7 | from setuptools import find_packages, setup
8 |
9 | import torch
10 | from torch.utils.cpp_extension import (BuildExtension, CppExtension,
11 | CUDAExtension)
12 |
13 |
14 | def readme():
15 | with open('README.md', encoding='utf-8') as f:
16 | content = f.read()
17 | return content
18 |
19 |
20 | version_file = 'mmdet3d/version.py'
21 |
22 |
23 | def get_version():
24 | with open(version_file, 'r') as f:
25 | exec(compile(f.read(), version_file, 'exec'))
26 | import sys
27 |
28 | # return short version for sdist
29 | if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
30 | return locals()['short_version']
31 | else:
32 | return locals()['__version__']
33 |
34 |
35 | def make_cuda_ext(name,
36 | module,
37 | sources,
38 | sources_cuda=[],
39 | extra_args=[],
40 | extra_include_path=[]):
41 |
42 | define_macros = []
43 | extra_compile_args = {'cxx': [] + extra_args}
44 |
45 | if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
46 | define_macros += [('WITH_CUDA', None)]
47 | extension = CUDAExtension
48 | extra_compile_args['nvcc'] = extra_args + [
49 | '-D__CUDA_NO_HALF_OPERATORS__',
50 | '-D__CUDA_NO_HALF_CONVERSIONS__',
51 | '-D__CUDA_NO_HALF2_OPERATORS__',
52 | ]
53 | sources += sources_cuda
54 | else:
55 | print('Compiling {} without CUDA'.format(name))
56 | extension = CppExtension
57 | # raise EnvironmentError('CUDA is required to compile MMDetection!')
58 |
59 | return extension(
60 | name='{}.{}'.format(module, name),
61 | sources=[os.path.join(*module.split('.'), p) for p in sources],
62 | include_dirs=extra_include_path,
63 | define_macros=define_macros,
64 | extra_compile_args=extra_compile_args)
65 |
66 |
67 | def parse_requirements(fname='requirements.txt', with_version=True):
68 | """Parse the package dependencies listed in a requirements file but strips
69 | specific versioning information.
70 |
71 | Args:
72 | fname (str): path to requirements file
73 | with_version (bool, default=False): if True include version specs
74 |
75 | Returns:
76 | list[str]: list of requirements items
77 |
78 | CommandLine:
79 | python -c "import setup; print(setup.parse_requirements())"
80 | """
81 | import re
82 | import sys
83 | from os.path import exists
84 | require_fpath = fname
85 |
86 | def parse_line(line):
87 | """Parse information from a line in a requirements text file."""
88 | if line.startswith('-r '):
89 | # Allow specifying requirements in other files
90 | target = line.split(' ')[1]
91 | for info in parse_require_file(target):
92 | yield info
93 | else:
94 | info = {'line': line}
95 | if line.startswith('-e '):
96 | info['package'] = line.split('#egg=')[1]
97 | else:
98 | # Remove versioning from the package
99 | pat = '(' + '|'.join(['>=', '==', '>']) + ')'
100 | parts = re.split(pat, line, maxsplit=1)
101 | parts = [p.strip() for p in parts]
102 |
103 | info['package'] = parts[0]
104 | if len(parts) > 1:
105 | op, rest = parts[1:]
106 | if ';' in rest:
107 | # Handle platform specific dependencies
108 | # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
109 | version, platform_deps = map(str.strip,
110 | rest.split(';'))
111 | info['platform_deps'] = platform_deps
112 | else:
113 | version = rest # NOQA
114 | info['version'] = (op, version)
115 | yield info
116 |
117 | def parse_require_file(fpath):
118 | with open(fpath, 'r') as f:
119 | for line in f.readlines():
120 | line = line.strip()
121 | if line and not line.startswith('#'):
122 | for info in parse_line(line):
123 | yield info
124 |
125 | def gen_packages_items():
126 | if exists(require_fpath):
127 | for info in parse_require_file(require_fpath):
128 | parts = [info['package']]
129 | if with_version and 'version' in info:
130 | parts.extend(info['version'])
131 | if not sys.version.startswith('3.4'):
132 | # apparently package_deps are broken in 3.4
133 | platform_deps = info.get('platform_deps')
134 | if platform_deps is not None:
135 | parts.append(';' + platform_deps)
136 | item = ''.join(parts)
137 | yield item
138 |
139 | packages = list(gen_packages_items())
140 | return packages
141 |
142 |
143 | def add_mim_extension():
144 | """Add extra files that are required to support MIM into the package.
145 |
146 | These files will be added by creating a symlink to the originals if the
147 | package is installed in `editable` mode (e.g. pip install -e .), or by
148 | copying from the originals otherwise.
149 | """
150 |
151 | # parse installment mode
152 | if 'develop' in sys.argv:
153 | # installed by `pip install -e .`
154 | if platform.system() == 'Windows':
155 | # set `copy` mode here since symlink fails on Windows.
156 | mode = 'copy'
157 | else:
158 | mode = 'symlink'
159 | elif 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
160 | # installed by `pip install .`
161 | # or create source distribution by `python setup.py sdist`
162 | mode = 'copy'
163 | else:
164 | return
165 |
166 | filenames = ['tools', 'configs', 'model-index.yml']
167 | repo_path = osp.dirname(__file__)
168 | mim_path = osp.join(repo_path, 'mmdet3d', '.mim')
169 | os.makedirs(mim_path, exist_ok=True)
170 |
171 | for filename in filenames:
172 | if osp.exists(filename):
173 | src_path = osp.join(repo_path, filename)
174 | tar_path = osp.join(mim_path, filename)
175 |
176 | if osp.isfile(tar_path) or osp.islink(tar_path):
177 | os.remove(tar_path)
178 | elif osp.isdir(tar_path):
179 | shutil.rmtree(tar_path)
180 |
181 | if mode == 'symlink':
182 | src_relpath = osp.relpath(src_path, osp.dirname(tar_path))
183 | os.symlink(src_relpath, tar_path)
184 | elif mode == 'copy':
185 | if osp.isfile(src_path):
186 | shutil.copyfile(src_path, tar_path)
187 | elif osp.isdir(src_path):
188 | shutil.copytree(src_path, tar_path)
189 | else:
190 | warnings.warn(f'Cannot copy file {src_path}.')
191 | else:
192 | raise ValueError(f'Invalid mode {mode}')
193 |
194 |
195 | if __name__ == '__main__':
196 | add_mim_extension()
197 | setup(
198 | name='mmdet3d',
199 | version=get_version(),
200 | description=("OpenMMLab's next-generation platform"
201 | 'for general 3D object detection.'),
202 | long_description=readme(),
203 | long_description_content_type='text/markdown',
204 | author='MMDetection3D Contributors',
205 | author_email='zwwdev@gmail.com',
206 | keywords='computer vision, 3D object detection',
207 | url='https://github.com/open-mmlab/mmdetection3d',
208 | packages=find_packages(),
209 | include_package_data=True,
210 | package_data={'mmdet3d.ops': ['*/*.so']},
211 | classifiers=[
212 | 'Development Status :: 4 - Beta',
213 | 'License :: OSI Approved :: Apache Software License',
214 | 'Operating System :: OS Independent',
215 | 'Programming Language :: Python :: 3',
216 | 'Programming Language :: Python :: 3.6',
217 | 'Programming Language :: Python :: 3.7',
218 | ],
219 | license='Apache License 2.0',
220 | install_requires=parse_requirements('requirements/runtime.txt'),
221 | extras_require={
222 | 'all': parse_requirements('requirements.txt'),
223 | 'tests': parse_requirements('requirements/tests.txt'),
224 | 'build': parse_requirements('requirements/build.txt'),
225 | 'optional': parse_requirements('requirements/optional.txt'),
226 | 'mim': parse_requirements('requirements/mminstall.txt'),
227 | },
228 | cmdclass={'build_ext': BuildExtension},
229 | zip_safe=False)
230 |
--------------------------------------------------------------------------------