├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── can_bus.ipynb
    ├── getting_started.md
    ├── install.md
    └── prepare_dataset.md
├── figs
    ├── arch.png
    └── sota_results.png
├── projects
    ├── __init__.py
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── coco_instance.py
    │   │   │   ├── kitti-3d-3class.py
    │   │   │   ├── kitti-3d-car.py
    │   │   │   ├── lyft-3d.py
    │   │   │   ├── nuim_instance.py
    │   │   │   ├── nus-3d.py
    │   │   │   ├── nus-mono3d.py
    │   │   │   ├── range100_lyft-3d.py
    │   │   │   ├── s3dis-3d-5class.py
    │   │   │   ├── s3dis_seg-3d-13class.py
    │   │   │   ├── scannet-3d-18class.py
    │   │   │   ├── scannet_seg-3d-20class.py
    │   │   │   ├── sunrgbd-3d-10class.py
    │   │   │   ├── waymoD5-3d-3class.py
    │   │   │   └── waymoD5-3d-car.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── 3dssd.py
    │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   ├── centerpoint_01voxel_second_secfpn_nus.py
    │   │   │   ├── centerpoint_02pillar_second_secfpn_nus.py
    │   │   │   ├── fcos3d.py
    │   │   │   ├── groupfree3d.py
    │   │   │   ├── h3dnet.py
    │   │   │   ├── hv_pointpillars_fpn_lyft.py
    │   │   │   ├── hv_pointpillars_fpn_nus.py
    │   │   │   ├── hv_pointpillars_fpn_range100_lyft.py
    │   │   │   ├── hv_pointpillars_secfpn_kitti.py
    │   │   │   ├── hv_pointpillars_secfpn_waymo.py
    │   │   │   ├── hv_second_secfpn_kitti.py
    │   │   │   ├── hv_second_secfpn_waymo.py
    │   │   │   ├── imvotenet_image.py
    │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   ├── paconv_cuda_ssg.py
    │   │   │   ├── paconv_ssg.py
    │   │   │   ├── parta2.py
    │   │   │   ├── pointnet2_msg.py
    │   │   │   ├── pointnet2_ssg.py
    │   │   │   └── votenet.py
    │   │   └── schedules
    │   │   │   ├── cosine.py
    │   │   │   ├── cyclic_20e.py
    │   │   │   ├── cyclic_40e.py
    │   │   │   ├── mmdet_schedule_1x.py
    │   │   │   ├── schedule_2x.py
    │   │   │   ├── schedule_3x.py
    │   │   │   ├── seg_cosine_150e.py
    │   │   │   ├── seg_cosine_200e.py
    │   │   │   └── seg_cosine_50e.py
    │   ├── bevformer
    │   │   ├── bevformer_base.py
    │   │   ├── bevformer_small.py
    │   │   └── bevformer_tiny.py
    │   ├── bevformer_fp16
    │   │   └── bevformer_tiny_fp16.py
    │   ├── bevformerv2
    │   │   ├── bevformerv2-r50-t1-24ep.py
    │   │   ├── bevformerv2-r50-t1-48ep.py
    │   │   ├── bevformerv2-r50-t1-base-24ep.py
    │   │   ├── bevformerv2-r50-t1-base-48ep.py
    │   │   ├── bevformerv2-r50-t2-24ep.py
    │   │   ├── bevformerv2-r50-t2-48ep.py
    │   │   └── bevformerv2-r50-t8-24ep.py
    │   └── datasets
    │   │   ├── custom_lyft-3d.py
    │   │   ├── custom_nus-3d.py
    │   │   └── custom_waymo-3d.py
    └── mmdet3d_plugin
    │   ├── __init__.py
    │   ├── bevformer
    │       ├── __init__.py
    │       ├── apis
    │       │   ├── __init__.py
    │       │   ├── mmdet_train.py
    │       │   ├── test.py
    │       │   └── train.py
    │       ├── dense_heads
    │       │   ├── __init__.py
    │       │   ├── bev_head.py
    │       │   └── bevformer_head.py
    │       ├── detectors
    │       │   ├── __init__.py
    │       │   ├── bevformer.py
    │       │   ├── bevformerV2.py
    │       │   └── bevformer_fp16.py
    │       ├── hooks
    │       │   ├── __init__.py
    │       │   └── custom_hooks.py
    │       ├── modules
    │       │   ├── __init__.py
    │       │   ├── custom_base_transformer_layer.py
    │       │   ├── decoder.py
    │       │   ├── encoder.py
    │       │   ├── group_attention.py
    │       │   ├── multi_scale_deformable_attn_function.py
    │       │   ├── spatial_cross_attention.py
    │       │   ├── temporal_self_attention.py
    │       │   ├── transformer.py
    │       │   └── transformerV2.py
    │       └── runner
    │       │   ├── __init__.py
    │       │   └── epoch_based_runner.py
    │   ├── core
    │       ├── bbox
    │       │   ├── assigners
    │       │   │   ├── __init__.py
    │       │   │   └── hungarian_assigner_3d.py
    │       │   ├── coders
    │       │   │   ├── __init__.py
    │       │   │   └── nms_free_coder.py
    │       │   ├── match_costs
    │       │   │   ├── __init__.py
    │       │   │   └── match_cost.py
    │       │   └── util.py
    │       └── evaluation
    │       │   ├── __init__.py
    │       │   ├── eval_hooks.py
    │       │   └── kitti2waymo.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── builder.py
    │       ├── nuscenes_dataset.py
    │       ├── nuscenes_dataset_v2.py
    │       ├── nuscenes_mono_dataset.py
    │       ├── nuscnes_eval.py
    │       ├── pipelines
    │       │   ├── __init__.py
    │       │   ├── augmentation.py
    │       │   ├── dd3d_mapper.py
    │       │   ├── formating.py
    │       │   ├── loading.py
    │       │   └── transform_3d.py
    │       └── samplers
    │       │   ├── __init__.py
    │       │   ├── distributed_sampler.py
    │       │   ├── group_sampler.py
    │       │   └── sampler.py
    │   ├── dd3d
    │       ├── __init__.py
    │       ├── datasets
    │       │   ├── __init__.py
    │       │   ├── nuscenes.py
    │       │   └── transform_utils.py
    │       ├── layers
    │       │   ├── iou_loss.py
    │       │   ├── normalization.py
    │       │   └── smooth_l1_loss.py
    │       ├── modeling
    │       │   ├── __init__.py
    │       │   ├── core.py
    │       │   ├── disentangled_box3d_loss.py
    │       │   ├── fcos2d.py
    │       │   ├── fcos3d.py
    │       │   ├── nuscenes_dd3d.py
    │       │   └── prepare_targets.py
    │       ├── structures
    │       │   ├── __init__.py
    │       │   ├── boxes3d.py
    │       │   ├── image_list.py
    │       │   ├── pose.py
    │       │   └── transform3d.py
    │       └── utils
    │       │   ├── comm.py
    │       │   ├── geometry.py
    │       │   ├── tasks.py
    │       │   ├── tensor2d.py
    │       │   └── visualization.py
    │   └── models
    │       ├── backbones
    │           ├── __init__.py
    │           └── vovnet.py
    │       ├── hooks
    │           ├── __init__.py
    │           └── hooks.py
    │       ├── opt
    │           ├── __init__.py
    │           └── adamw.py
    │       └── utils
    │           ├── __init__.py
    │           ├── bricks.py
    │           ├── grid_mask.py
    │           ├── position_embedding.py
    │           └── visual.py
└── tools
    ├── analysis_tools
        ├── __init__.py
        ├── analyze_logs.py
        ├── benchmark.py
        ├── get_params.py
        └── visual.py
    ├── create_data.py
    ├── data_converter
        ├── __init__.py
        ├── create_gt_database.py
        ├── indoor_converter.py
        ├── kitti_converter.py
        ├── kitti_data_utils.py
        ├── lyft_converter.py
        ├── lyft_data_fixer.py
        ├── nuimage_converter.py
        ├── nuscenes_converter.py
        ├── s3dis_data_utils.py
        ├── scannet_data_utils.py
        ├── sunrgbd_data_utils.py
        └── waymo_converter.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── fp16
        ├── dist_train.sh
        └── train.py
    ├── misc
        ├── browse_dataset.py
        ├── fuse_conv_bn.py
        ├── print_config.py
        └── visualize_results.py
    ├── model_converters
        ├── convert_votenet_checkpoints.py
        ├── publish_model.py
        └── regnet2mmdet.py
    ├── test.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | ckpts/
 13 | data/
 14 | ckpts
 15 | data
 16 | test/
 17 | val/
 18 | work_dirs/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 


--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | 
 3 | **Please ensure you have prepared the environment and the nuScenes dataset.**
 4 | 
 5 | # Train and Test
 6 | 
 7 | Train BEVFormer with 8 GPUs 
 8 | ```
 9 | ./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py 8
10 | ```
11 | 
12 | Eval BEVFormer with 8 GPUs
13 | ```
14 | ./tools/dist_test.sh ./projects/configs/bevformer/bevformer_base.py ./path/to/ckpts.pth 8
15 | ```
16 | Note: using 1 GPU to eval can obtain slightly higher performance because continuous video may be truncated with multiple GPUs. By default we report the score evaled with 8 GPUs.
17 | 
18 | 
19 | 
20 | # Using FP16 to train the model.
21 | The above training script can not support FP16 training, 
22 | and we provide another script to train BEVFormer with FP16.
23 | 
24 | ```
25 | ./tools/fp16/dist_train.sh ./projects/configs/bevformer_fp16/bevformer_tiny_fp16.py 8
26 | ```
27 | 
28 | 
29 | # Visualization 
30 | 
31 | see [visual.py](../tools/analysis_tools/visual.py)


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | # Step-by-step installation instructions
 2 | 
 3 | Following https://mmdetection3d.readthedocs.io/en/latest/getting_started.html#installation
 4 | 
 5 | 
 6 | 
 7 | **a. Create a conda virtual environment and activate it.**
 8 | ```shell
 9 | conda create -n open-mmlab python=3.8 -y
10 | conda activate open-mmlab
11 | ```
12 | 
13 | **b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).**
14 | ```shell
15 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
16 | # Recommended torch>=1.9
17 | 
18 | ```
19 | 
20 | **c. Install gcc>=5 in conda env (optional).**
21 | ```shell
22 | conda install -c omgarcia gcc-6 # gcc-6.2
23 | ```
24 | 
25 | **c. Install mmcv-full.**
26 | ```shell
27 | pip install mmcv-full==1.4.0
28 | #  pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
29 | ```
30 | 
31 | **d. Install mmdet and mmseg.**
32 | ```shell
33 | pip install mmdet==2.14.0
34 | pip install mmsegmentation==0.14.1
35 | ```
36 | 
37 | **e. Install mmdet3d from source code.**
38 | ```shell
39 | git clone https://github.com/open-mmlab/mmdetection3d.git
40 | cd mmdetection3d
41 | git checkout v0.17.1 # Other versions may not be compatible.
42 | python setup.py install
43 | ```
44 | 
45 | **f. Install Detectron2 and Timm.**
46 | ```shell
47 | pip install einops fvcore seaborn iopath==0.1.9 timm==0.6.13  typing-extensions==4.5.0 pylint ipython==8.12  numpy==1.19.5 matplotlib==3.5.2 numba==0.48.0 pandas==1.4.4 scikit-image==0.19.3 setuptools==59.5.0
48 | python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
49 | ```
50 | 
51 | 
52 | **g. Clone BEVFormer.**
53 | ```
54 | git clone https://github.com/fundamentalvision/BEVFormer.git
55 | ```
56 | 
57 | **h. Prepare pretrained models.**
58 | ```shell
59 | cd bevformer
60 | mkdir ckpts
61 | 
62 | cd ckpts & wget https://github.com/zhiqi-li/storage/releases/download/v1.0/r101_dcn_fcos3d_pretrain.pth
63 | ```
64 | 
65 | note: this pretrained model is the same model used in [detr3d](https://github.com/WangYueFt/detr3d)
66 | 


--------------------------------------------------------------------------------
/docs/prepare_dataset.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## NuScenes
 4 | Download nuScenes V1.0 full dataset data  and CAN bus expansion data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running
 5 | 
 6 | 
 7 | **Download CAN bus expansion**
 8 | ```
 9 | # download 'can_bus.zip'
10 | unzip can_bus.zip 
11 | # move can_bus to data dir
12 | ```
13 | 
14 | **Prepare nuScenes data**
15 | 
16 | *We genetate custom annotation files which are different from mmdet3d's*
17 | ```
18 | python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes --version v1.0 --canbus ./data
19 | ```
20 | 
21 | Using the above code will generate `nuscenes_infos_temporal_{train,val}.pkl`.
22 | 
23 | **Folder structure**
24 | ```
25 | bevformer
26 | ├── projects/
27 | ├── tools/
28 | ├── configs/
29 | ├── ckpts/
30 | │   ├── r101_dcn_fcos3d_pretrain.pth
31 | ├── data/
32 | │   ├── can_bus/
33 | │   ├── nuscenes/
34 | │   │   ├── maps/
35 | │   │   ├── samples/
36 | │   │   ├── sweeps/
37 | │   │   ├── v1.0-test/
38 | |   |   ├── v1.0-trainval/
39 | |   |   ├── nuscenes_infos_temporal_train.pkl
40 | |   |   ├── nuscenes_infos_temporal_val.pkl
41 | ```
42 | 


--------------------------------------------------------------------------------
/figs/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/figs/arch.png


--------------------------------------------------------------------------------
/figs/sota_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/figs/sota_results.png


--------------------------------------------------------------------------------
/projects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/projects/__init__.py


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/kitti-3d-3class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'KittiDataset'
  3 | data_root = 'data/kitti/'
  4 | class_names = ['Pedestrian', 'Cyclist', 'Car']
  5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
  6 | input_modality = dict(use_lidar=True, use_camera=False)
  7 | db_sampler = dict(
  8 |     data_root=data_root,
  9 |     info_path=data_root + 'kitti_dbinfos_train.pkl',
 10 |     rate=1.0,
 11 |     prepare=dict(
 12 |         filter_by_difficulty=[-1],
 13 |         filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
 14 |     classes=class_names,
 15 |     sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))
 16 | 
 17 | file_client_args = dict(backend='disk')
 18 | # Uncomment the following if use ceph or other file clients.
 19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 20 | # for more details.
 21 | # file_client_args = dict(
 22 | #     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
 23 | 
 24 | train_pipeline = [
 25 |     dict(
 26 |         type='LoadPointsFromFile',
 27 |         coord_type='LIDAR',
 28 |         load_dim=4,
 29 |         use_dim=4,
 30 |         file_client_args=file_client_args),
 31 |     dict(
 32 |         type='LoadAnnotations3D',
 33 |         with_bbox_3d=True,
 34 |         with_label_3d=True,
 35 |         file_client_args=file_client_args),
 36 |     dict(type='ObjectSample', db_sampler=db_sampler),
 37 |     dict(
 38 |         type='ObjectNoise',
 39 |         num_try=100,
 40 |         translation_std=[1.0, 1.0, 0.5],
 41 |         global_rot_range=[0.0, 0.0],
 42 |         rot_range=[-0.78539816, 0.78539816]),
 43 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 44 |     dict(
 45 |         type='GlobalRotScaleTrans',
 46 |         rot_range=[-0.78539816, 0.78539816],
 47 |         scale_ratio_range=[0.95, 1.05]),
 48 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 50 |     dict(type='PointShuffle'),
 51 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 52 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 53 | ]
 54 | test_pipeline = [
 55 |     dict(
 56 |         type='LoadPointsFromFile',
 57 |         coord_type='LIDAR',
 58 |         load_dim=4,
 59 |         use_dim=4,
 60 |         file_client_args=file_client_args),
 61 |     dict(
 62 |         type='MultiScaleFlipAug3D',
 63 |         img_scale=(1333, 800),
 64 |         pts_scale_ratio=1,
 65 |         flip=False,
 66 |         transforms=[
 67 |             dict(
 68 |                 type='GlobalRotScaleTrans',
 69 |                 rot_range=[0, 0],
 70 |                 scale_ratio_range=[1., 1.],
 71 |                 translation_std=[0, 0, 0]),
 72 |             dict(type='RandomFlip3D'),
 73 |             dict(
 74 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 75 |             dict(
 76 |                 type='DefaultFormatBundle3D',
 77 |                 class_names=class_names,
 78 |                 with_label=False),
 79 |             dict(type='Collect3D', keys=['points'])
 80 |         ])
 81 | ]
 82 | # construct a pipeline for data and gt loading in show function
 83 | # please keep its loading function consistent with test_pipeline (e.g. client)
 84 | eval_pipeline = [
 85 |     dict(
 86 |         type='LoadPointsFromFile',
 87 |         coord_type='LIDAR',
 88 |         load_dim=4,
 89 |         use_dim=4,
 90 |         file_client_args=file_client_args),
 91 |     dict(
 92 |         type='DefaultFormatBundle3D',
 93 |         class_names=class_names,
 94 |         with_label=False),
 95 |     dict(type='Collect3D', keys=['points'])
 96 | ]
 97 | 
 98 | data = dict(
 99 |     samples_per_gpu=6,
100 |     workers_per_gpu=4,
101 |     train=dict(
102 |         type='RepeatDataset',
103 |         times=2,
104 |         dataset=dict(
105 |             type=dataset_type,
106 |             data_root=data_root,
107 |             ann_file=data_root + 'kitti_infos_train.pkl',
108 |             split='training',
109 |             pts_prefix='velodyne_reduced',
110 |             pipeline=train_pipeline,
111 |             modality=input_modality,
112 |             classes=class_names,
113 |             test_mode=False,
114 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
115 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
116 |             box_type_3d='LiDAR')),
117 |     val=dict(
118 |         type=dataset_type,
119 |         data_root=data_root,
120 |         ann_file=data_root + 'kitti_infos_val.pkl',
121 |         split='training',
122 |         pts_prefix='velodyne_reduced',
123 |         pipeline=test_pipeline,
124 |         modality=input_modality,
125 |         classes=class_names,
126 |         test_mode=True,
127 |         box_type_3d='LiDAR'),
128 |     test=dict(
129 |         type=dataset_type,
130 |         data_root=data_root,
131 |         ann_file=data_root + 'kitti_infos_val.pkl',
132 |         split='training',
133 |         pts_prefix='velodyne_reduced',
134 |         pipeline=test_pipeline,
135 |         modality=input_modality,
136 |         classes=class_names,
137 |         test_mode=True,
138 |         box_type_3d='LiDAR'))
139 | 
140 | evaluation = dict(interval=1, pipeline=eval_pipeline)
141 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/kitti-3d-car.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'KittiDataset'
  3 | data_root = 'data/kitti/'
  4 | class_names = ['Car']
  5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
  6 | input_modality = dict(use_lidar=True, use_camera=False)
  7 | db_sampler = dict(
  8 |     data_root=data_root,
  9 |     info_path=data_root + 'kitti_dbinfos_train.pkl',
 10 |     rate=1.0,
 11 |     prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
 12 |     classes=class_names,
 13 |     sample_groups=dict(Car=15))
 14 | 
 15 | file_client_args = dict(backend='disk')
 16 | # Uncomment the following if use ceph or other file clients.
 17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 18 | # for more details.
 19 | # file_client_args = dict(
 20 | #     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
 21 | 
 22 | train_pipeline = [
 23 |     dict(
 24 |         type='LoadPointsFromFile',
 25 |         coord_type='LIDAR',
 26 |         load_dim=4,
 27 |         use_dim=4,
 28 |         file_client_args=file_client_args),
 29 |     dict(
 30 |         type='LoadAnnotations3D',
 31 |         with_bbox_3d=True,
 32 |         with_label_3d=True,
 33 |         file_client_args=file_client_args),
 34 |     dict(type='ObjectSample', db_sampler=db_sampler),
 35 |     dict(
 36 |         type='ObjectNoise',
 37 |         num_try=100,
 38 |         translation_std=[1.0, 1.0, 0.5],
 39 |         global_rot_range=[0.0, 0.0],
 40 |         rot_range=[-0.78539816, 0.78539816]),
 41 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 42 |     dict(
 43 |         type='GlobalRotScaleTrans',
 44 |         rot_range=[-0.78539816, 0.78539816],
 45 |         scale_ratio_range=[0.95, 1.05]),
 46 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 47 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='PointShuffle'),
 49 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 50 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 51 | ]
 52 | test_pipeline = [
 53 |     dict(
 54 |         type='LoadPointsFromFile',
 55 |         coord_type='LIDAR',
 56 |         load_dim=4,
 57 |         use_dim=4,
 58 |         file_client_args=file_client_args),
 59 |     dict(
 60 |         type='MultiScaleFlipAug3D',
 61 |         img_scale=(1333, 800),
 62 |         pts_scale_ratio=1,
 63 |         flip=False,
 64 |         transforms=[
 65 |             dict(
 66 |                 type='GlobalRotScaleTrans',
 67 |                 rot_range=[0, 0],
 68 |                 scale_ratio_range=[1., 1.],
 69 |                 translation_std=[0, 0, 0]),
 70 |             dict(type='RandomFlip3D'),
 71 |             dict(
 72 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 73 |             dict(
 74 |                 type='DefaultFormatBundle3D',
 75 |                 class_names=class_names,
 76 |                 with_label=False),
 77 |             dict(type='Collect3D', keys=['points'])
 78 |         ])
 79 | ]
 80 | # construct a pipeline for data and gt loading in show function
 81 | # please keep its loading function consistent with test_pipeline (e.g. client)
 82 | eval_pipeline = [
 83 |     dict(
 84 |         type='LoadPointsFromFile',
 85 |         coord_type='LIDAR',
 86 |         load_dim=4,
 87 |         use_dim=4,
 88 |         file_client_args=file_client_args),
 89 |     dict(
 90 |         type='DefaultFormatBundle3D',
 91 |         class_names=class_names,
 92 |         with_label=False),
 93 |     dict(type='Collect3D', keys=['points'])
 94 | ]
 95 | 
 96 | data = dict(
 97 |     samples_per_gpu=6,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type='RepeatDataset',
101 |         times=2,
102 |         dataset=dict(
103 |             type=dataset_type,
104 |             data_root=data_root,
105 |             ann_file=data_root + 'kitti_infos_train.pkl',
106 |             split='training',
107 |             pts_prefix='velodyne_reduced',
108 |             pipeline=train_pipeline,
109 |             modality=input_modality,
110 |             classes=class_names,
111 |             test_mode=False,
112 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
113 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
114 |             box_type_3d='LiDAR')),
115 |     val=dict(
116 |         type=dataset_type,
117 |         data_root=data_root,
118 |         ann_file=data_root + 'kitti_infos_val.pkl',
119 |         split='training',
120 |         pts_prefix='velodyne_reduced',
121 |         pipeline=test_pipeline,
122 |         modality=input_modality,
123 |         classes=class_names,
124 |         test_mode=True,
125 |         box_type_3d='LiDAR'),
126 |     test=dict(
127 |         type=dataset_type,
128 |         data_root=data_root,
129 |         ann_file=data_root + 'kitti_infos_val.pkl',
130 |         split='training',
131 |         pts_prefix='velodyne_reduced',
132 |         pipeline=test_pipeline,
133 |         modality=input_modality,
134 |         classes=class_names,
135 |         test_mode=True,
136 |         box_type_3d='LiDAR'))
137 | 
138 | evaluation = dict(interval=1, pipeline=eval_pipeline)
139 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/lyft-3d.py:
--------------------------------------------------------------------------------
  1 | # If point cloud range is changed, the models should also change their point
  2 | # cloud range accordingly
  3 | point_cloud_range = [-80, -80, -5, 80, 80, 3]
  4 | # For Lyft we usually do 9-class detection
  5 | class_names = [
  6 |     'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
  7 |     'bicycle', 'pedestrian', 'animal'
  8 | ]
  9 | dataset_type = 'LyftDataset'
 10 | data_root = 'data/lyft/'
 11 | # Input modality for Lyft dataset, this is consistent with the submission
 12 | # format which requires the information in input_modality.
 13 | input_modality = dict(
 14 |     use_lidar=True,
 15 |     use_camera=False,
 16 |     use_radar=False,
 17 |     use_map=False,
 18 |     use_external=False)
 19 | file_client_args = dict(backend='disk')
 20 | # Uncomment the following if use ceph or other file clients.
 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 22 | # for more details.
 23 | # file_client_args = dict(
 24 | #     backend='petrel',
 25 | #     path_mapping=dict({
 26 | #         './data/lyft/': 's3://lyft/lyft/',
 27 | #         'data/lyft/': 's3://lyft/lyft/'
 28 | #    }))
 29 | train_pipeline = [
 30 |     dict(
 31 |         type='LoadPointsFromFile',
 32 |         coord_type='LIDAR',
 33 |         load_dim=5,
 34 |         use_dim=5,
 35 |         file_client_args=file_client_args),
 36 |     dict(
 37 |         type='LoadPointsFromMultiSweeps',
 38 |         sweeps_num=10,
 39 |         file_client_args=file_client_args),
 40 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 41 |     dict(
 42 |         type='GlobalRotScaleTrans',
 43 |         rot_range=[-0.3925, 0.3925],
 44 |         scale_ratio_range=[0.95, 1.05],
 45 |         translation_std=[0, 0, 0]),
 46 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 47 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='PointShuffle'),
 50 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 51 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 52 | ]
 53 | test_pipeline = [
 54 |     dict(
 55 |         type='LoadPointsFromFile',
 56 |         coord_type='LIDAR',
 57 |         load_dim=5,
 58 |         use_dim=5,
 59 |         file_client_args=file_client_args),
 60 |     dict(
 61 |         type='LoadPointsFromMultiSweeps',
 62 |         sweeps_num=10,
 63 |         file_client_args=file_client_args),
 64 |     dict(
 65 |         type='MultiScaleFlipAug3D',
 66 |         img_scale=(1333, 800),
 67 |         pts_scale_ratio=1,
 68 |         flip=False,
 69 |         transforms=[
 70 |             dict(
 71 |                 type='GlobalRotScaleTrans',
 72 |                 rot_range=[0, 0],
 73 |                 scale_ratio_range=[1., 1.],
 74 |                 translation_std=[0, 0, 0]),
 75 |             dict(type='RandomFlip3D'),
 76 |             dict(
 77 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 78 |             dict(
 79 |                 type='DefaultFormatBundle3D',
 80 |                 class_names=class_names,
 81 |                 with_label=False),
 82 |             dict(type='Collect3D', keys=['points'])
 83 |         ])
 84 | ]
 85 | # construct a pipeline for data and gt loading in show function
 86 | # please keep its loading function consistent with test_pipeline (e.g. client)
 87 | eval_pipeline = [
 88 |     dict(
 89 |         type='LoadPointsFromFile',
 90 |         coord_type='LIDAR',
 91 |         load_dim=5,
 92 |         use_dim=5,
 93 |         file_client_args=file_client_args),
 94 |     dict(
 95 |         type='LoadPointsFromMultiSweeps',
 96 |         sweeps_num=10,
 97 |         file_client_args=file_client_args),
 98 |     dict(
 99 |         type='DefaultFormatBundle3D',
100 |         class_names=class_names,
101 |         with_label=False),
102 |     dict(type='Collect3D', keys=['points'])
103 | ]
104 | 
105 | data = dict(
106 |     samples_per_gpu=2,
107 |     workers_per_gpu=2,
108 |     train=dict(
109 |         type=dataset_type,
110 |         data_root=data_root,
111 |         ann_file=data_root + 'lyft_infos_train.pkl',
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         modality=input_modality,
115 |         test_mode=False),
116 |     val=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         ann_file=data_root + 'lyft_infos_val.pkl',
120 |         pipeline=test_pipeline,
121 |         classes=class_names,
122 |         modality=input_modality,
123 |         test_mode=True),
124 |     test=dict(
125 |         type=dataset_type,
126 |         data_root=data_root,
127 |         ann_file=data_root + 'lyft_infos_test.pkl',
128 |         pipeline=test_pipeline,
129 |         classes=class_names,
130 |         modality=input_modality,
131 |         test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/nuim_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/nuimages/'
 3 | class_names = [
 4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
 5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
 6 | ]
 7 | img_norm_cfg = dict(
 8 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 9 | train_pipeline = [
10 |     dict(type='LoadImageFromFile'),
11 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
12 |     dict(
13 |         type='Resize',
14 |         img_scale=[(1280, 720), (1920, 1080)],
15 |         multiscale_mode='range',
16 |         keep_ratio=True),
17 |     dict(type='RandomFlip', flip_ratio=0.5),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='Pad', size_divisor=32),
20 |     dict(type='DefaultFormatBundle'),
21 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
22 | ]
23 | test_pipeline = [
24 |     dict(type='LoadImageFromFile'),
25 |     dict(
26 |         type='MultiScaleFlipAug',
27 |         img_scale=(1600, 900),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='Pad', size_divisor=32),
34 |             dict(type='ImageToTensor', keys=['img']),
35 |             dict(type='Collect', keys=['img']),
36 |         ])
37 | ]
38 | data = dict(
39 |     samples_per_gpu=2,
40 |     workers_per_gpu=2,
41 |     train=dict(
42 |         type=dataset_type,
43 |         ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
44 |         img_prefix=data_root,
45 |         classes=class_names,
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
50 |         img_prefix=data_root,
51 |         classes=class_names,
52 |         pipeline=test_pipeline),
53 |     test=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
56 |         img_prefix=data_root,
57 |         classes=class_names,
58 |         pipeline=test_pipeline))
59 | evaluation = dict(metric=['bbox', 'segm'])
60 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/nus-mono3d.py:
--------------------------------------------------------------------------------
  1 | dataset_type = 'CustomNuScenesMonoDataset'
  2 | data_root = 'data/nuscenes/'
  3 | class_names = [
  4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
  5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
  6 | ]
  7 | # Input modality for nuScenes dataset, this is consistent with the submission
  8 | # format which requires the information in input_modality.
  9 | input_modality = dict(
 10 |     use_lidar=False,
 11 |     use_camera=True,
 12 |     use_radar=False,
 13 |     use_map=False,
 14 |     use_external=False)
 15 | img_norm_cfg = dict(
 16 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 17 | train_pipeline = [
 18 |     dict(type='LoadImageFromFileMono3D'),
 19 |     dict(
 20 |         type='LoadAnnotations3D',
 21 |         with_bbox=True,
 22 |         with_label=True,
 23 |         with_attr_label=True,
 24 |         with_bbox_3d=True,
 25 |         with_label_3d=True,
 26 |         with_bbox_depth=True),
 27 |     dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
 28 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 29 |     dict(type='Normalize', **img_norm_cfg),
 30 |     dict(type='Pad', size_divisor=32),
 31 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 32 |     dict(
 33 |         type='Collect3D',
 34 |         keys=[
 35 |             'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
 36 |             'gt_labels_3d', 'centers2d', 'depths'
 37 |         ]),
 38 | ]
 39 | test_pipeline = [
 40 |     dict(type='LoadImageFromFileMono3D'),
 41 |     dict(
 42 |         type='MultiScaleFlipAug',
 43 |         scale_factor=1.0,
 44 |         flip=False,
 45 |         transforms=[
 46 |             dict(type='RandomFlip3D'),
 47 |             dict(type='Normalize', **img_norm_cfg),
 48 |             dict(type='Pad', size_divisor=32),
 49 |             dict(
 50 |                 type='DefaultFormatBundle3D',
 51 |                 class_names=class_names,
 52 |                 with_label=False),
 53 |             dict(type='Collect3D', keys=['img']),
 54 |         ])
 55 | ]
 56 | # construct a pipeline for data and gt loading in show function
 57 | # please keep its loading function consistent with test_pipeline (e.g. client)
 58 | eval_pipeline = [
 59 |     dict(type='LoadImageFromFileMono3D'),
 60 |     dict(
 61 |         type='DefaultFormatBundle3D',
 62 |         class_names=class_names,
 63 |         with_label=False),
 64 |     dict(type='Collect3D', keys=['img'])
 65 | ]
 66 | 
 67 | data = dict(
 68 |     samples_per_gpu=2,
 69 |     workers_per_gpu=2,
 70 |     train=dict(
 71 |         type=dataset_type,
 72 |         data_root=data_root,
 73 |         ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
 74 |         img_prefix=data_root,
 75 |         classes=class_names,
 76 |         pipeline=train_pipeline,
 77 |         modality=input_modality,
 78 |         test_mode=False,
 79 |         box_type_3d='Camera'),
 80 |     val=dict(
 81 |         type=dataset_type,
 82 |         data_root=data_root,
 83 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 84 |         img_prefix=data_root,
 85 |         classes=class_names,
 86 |         pipeline=test_pipeline,
 87 |         modality=input_modality,
 88 |         test_mode=True,
 89 |         box_type_3d='Camera'),
 90 |     test=dict(
 91 |         type=dataset_type,
 92 |         data_root=data_root,
 93 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 94 |         img_prefix=data_root,
 95 |         classes=class_names,
 96 |         pipeline=test_pipeline,
 97 |         modality=input_modality,
 98 |         test_mode=True,
 99 |         box_type_3d='Camera'))
100 | evaluation = dict(interval=2)
101 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/range100_lyft-3d.py:
--------------------------------------------------------------------------------
  1 | # If point cloud range is changed, the models should also change their point
  2 | # cloud range accordingly
  3 | point_cloud_range = [-100, -100, -5, 100, 100, 3]
  4 | # For Lyft we usually do 9-class detection
  5 | class_names = [
  6 |     'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
  7 |     'bicycle', 'pedestrian', 'animal'
  8 | ]
  9 | dataset_type = 'LyftDataset'
 10 | data_root = 'data/lyft/'
 11 | # Input modality for Lyft dataset, this is consistent with the submission
 12 | # format which requires the information in input_modality.
 13 | input_modality = dict(
 14 |     use_lidar=True,
 15 |     use_camera=False,
 16 |     use_radar=False,
 17 |     use_map=False,
 18 |     use_external=False)
 19 | file_client_args = dict(backend='disk')
 20 | # Uncomment the following if use ceph or other file clients.
 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 22 | # for more details.
 23 | # file_client_args = dict(
 24 | #     backend='petrel',
 25 | #     path_mapping=dict({
 26 | #         './data/lyft/': 's3://lyft/lyft/',
 27 | #         'data/lyft/': 's3://lyft/lyft/'
 28 | #    }))
 29 | train_pipeline = [
 30 |     dict(
 31 |         type='LoadPointsFromFile',
 32 |         coord_type='LIDAR',
 33 |         load_dim=5,
 34 |         use_dim=5,
 35 |         file_client_args=file_client_args),
 36 |     dict(
 37 |         type='LoadPointsFromMultiSweeps',
 38 |         sweeps_num=10,
 39 |         file_client_args=file_client_args),
 40 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 41 |     dict(
 42 |         type='GlobalRotScaleTrans',
 43 |         rot_range=[-0.3925, 0.3925],
 44 |         scale_ratio_range=[0.95, 1.05],
 45 |         translation_std=[0, 0, 0]),
 46 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 47 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='PointShuffle'),
 50 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 51 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 52 | ]
 53 | test_pipeline = [
 54 |     dict(
 55 |         type='LoadPointsFromFile',
 56 |         coord_type='LIDAR',
 57 |         load_dim=5,
 58 |         use_dim=5,
 59 |         file_client_args=file_client_args),
 60 |     dict(
 61 |         type='LoadPointsFromMultiSweeps',
 62 |         sweeps_num=10,
 63 |         file_client_args=file_client_args),
 64 |     dict(
 65 |         type='MultiScaleFlipAug3D',
 66 |         img_scale=(1333, 800),
 67 |         pts_scale_ratio=1,
 68 |         flip=False,
 69 |         transforms=[
 70 |             dict(
 71 |                 type='GlobalRotScaleTrans',
 72 |                 rot_range=[0, 0],
 73 |                 scale_ratio_range=[1., 1.],
 74 |                 translation_std=[0, 0, 0]),
 75 |             dict(type='RandomFlip3D'),
 76 |             dict(
 77 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 78 |             dict(
 79 |                 type='DefaultFormatBundle3D',
 80 |                 class_names=class_names,
 81 |                 with_label=False),
 82 |             dict(type='Collect3D', keys=['points'])
 83 |         ])
 84 | ]
 85 | # construct a pipeline for data and gt loading in show function
 86 | # please keep its loading function consistent with test_pipeline (e.g. client)
 87 | eval_pipeline = [
 88 |     dict(
 89 |         type='LoadPointsFromFile',
 90 |         coord_type='LIDAR',
 91 |         load_dim=5,
 92 |         use_dim=5,
 93 |         file_client_args=file_client_args),
 94 |     dict(
 95 |         type='LoadPointsFromMultiSweeps',
 96 |         sweeps_num=10,
 97 |         file_client_args=file_client_args),
 98 |     dict(
 99 |         type='DefaultFormatBundle3D',
100 |         class_names=class_names,
101 |         with_label=False),
102 |     dict(type='Collect3D', keys=['points'])
103 | ]
104 | 
105 | data = dict(
106 |     samples_per_gpu=2,
107 |     workers_per_gpu=2,
108 |     train=dict(
109 |         type=dataset_type,
110 |         data_root=data_root,
111 |         ann_file=data_root + 'lyft_infos_train.pkl',
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         modality=input_modality,
115 |         test_mode=False),
116 |     val=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         ann_file=data_root + 'lyft_infos_val.pkl',
120 |         pipeline=test_pipeline,
121 |         classes=class_names,
122 |         modality=input_modality,
123 |         test_mode=True),
124 |     test=dict(
125 |         type=dataset_type,
126 |         data_root=data_root,
127 |         ann_file=data_root + 'lyft_infos_test.pkl',
128 |         pipeline=test_pipeline,
129 |         classes=class_names,
130 |         modality=input_modality,
131 |         test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/s3dis-3d-5class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'S3DISDataset'
  3 | data_root = './data/s3dis/'
  4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
  5 | train_area = [1, 2, 3, 4, 6]
  6 | test_area = 5
  7 | 
  8 | train_pipeline = [
  9 |     dict(
 10 |         type='LoadPointsFromFile',
 11 |         coord_type='DEPTH',
 12 |         shift_height=True,
 13 |         load_dim=6,
 14 |         use_dim=[0, 1, 2, 3, 4, 5]),
 15 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 16 |     dict(type='PointSample', num_points=40000),
 17 |     dict(
 18 |         type='RandomFlip3D',
 19 |         sync_2d=False,
 20 |         flip_ratio_bev_horizontal=0.5,
 21 |         flip_ratio_bev_vertical=0.5),
 22 |     dict(
 23 |         type='GlobalRotScaleTrans',
 24 |         # following ScanNet dataset the rotation range is 5 degrees
 25 |         rot_range=[-0.087266, 0.087266],
 26 |         scale_ratio_range=[1.0, 1.0],
 27 |         shift_height=True),
 28 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 29 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 30 | ]
 31 | test_pipeline = [
 32 |     dict(
 33 |         type='LoadPointsFromFile',
 34 |         coord_type='DEPTH',
 35 |         shift_height=True,
 36 |         load_dim=6,
 37 |         use_dim=[0, 1, 2, 3, 4, 5]),
 38 |     dict(
 39 |         type='MultiScaleFlipAug3D',
 40 |         img_scale=(1333, 800),
 41 |         pts_scale_ratio=1,
 42 |         flip=False,
 43 |         transforms=[
 44 |             dict(
 45 |                 type='GlobalRotScaleTrans',
 46 |                 rot_range=[0, 0],
 47 |                 scale_ratio_range=[1., 1.],
 48 |                 translation_std=[0, 0, 0]),
 49 |             dict(
 50 |                 type='RandomFlip3D',
 51 |                 sync_2d=False,
 52 |                 flip_ratio_bev_horizontal=0.5,
 53 |                 flip_ratio_bev_vertical=0.5),
 54 |             dict(type='PointSample', num_points=40000),
 55 |             dict(
 56 |                 type='DefaultFormatBundle3D',
 57 |                 class_names=class_names,
 58 |                 with_label=False),
 59 |             dict(type='Collect3D', keys=['points'])
 60 |         ])
 61 | ]
 62 | # construct a pipeline for data and gt loading in show function
 63 | # please keep its loading function consistent with test_pipeline (e.g. client)
 64 | eval_pipeline = [
 65 |     dict(
 66 |         type='LoadPointsFromFile',
 67 |         coord_type='DEPTH',
 68 |         shift_height=False,
 69 |         load_dim=6,
 70 |         use_dim=[0, 1, 2, 3, 4, 5]),
 71 |     dict(
 72 |         type='DefaultFormatBundle3D',
 73 |         class_names=class_names,
 74 |         with_label=False),
 75 |     dict(type='Collect3D', keys=['points'])
 76 | ]
 77 | 
 78 | data = dict(
 79 |     samples_per_gpu=8,
 80 |     workers_per_gpu=4,
 81 |     train=dict(
 82 |         type='RepeatDataset',
 83 |         times=5,
 84 |         dataset=dict(
 85 |             type='ConcatDataset',
 86 |             datasets=[
 87 |                 dict(
 88 |                     type=dataset_type,
 89 |                     data_root=data_root,
 90 |                     ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
 91 |                     pipeline=train_pipeline,
 92 |                     filter_empty_gt=False,
 93 |                     classes=class_names,
 94 |                     box_type_3d='Depth') for i in train_area
 95 |             ],
 96 |             separate_eval=False)),
 97 |     val=dict(
 98 |         type=dataset_type,
 99 |         data_root=data_root,
100 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
101 |         pipeline=test_pipeline,
102 |         classes=class_names,
103 |         test_mode=True,
104 |         box_type_3d='Depth'),
105 |     test=dict(
106 |         type=dataset_type,
107 |         data_root=data_root,
108 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
109 |         pipeline=test_pipeline,
110 |         classes=class_names,
111 |         test_mode=True,
112 |         box_type_3d='Depth'))
113 | 
114 | evaluation = dict(pipeline=eval_pipeline)
115 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'S3DISSegDataset'
  3 | data_root = './data/s3dis/'
  4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
  5 |                'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
  6 | num_points = 4096
  7 | train_area = [1, 2, 3, 4, 6]
  8 | test_area = 5
  9 | train_pipeline = [
 10 |     dict(
 11 |         type='LoadPointsFromFile',
 12 |         coord_type='DEPTH',
 13 |         shift_height=False,
 14 |         use_color=True,
 15 |         load_dim=6,
 16 |         use_dim=[0, 1, 2, 3, 4, 5]),
 17 |     dict(
 18 |         type='LoadAnnotations3D',
 19 |         with_bbox_3d=False,
 20 |         with_label_3d=False,
 21 |         with_mask_3d=False,
 22 |         with_seg_3d=True),
 23 |     dict(
 24 |         type='PointSegClassMapping',
 25 |         valid_cat_ids=tuple(range(len(class_names))),
 26 |         max_cat_id=13),
 27 |     dict(
 28 |         type='IndoorPatchPointSample',
 29 |         num_points=num_points,
 30 |         block_size=1.0,
 31 |         ignore_index=len(class_names),
 32 |         use_normalized_coord=True,
 33 |         enlarge_size=0.2,
 34 |         min_unique_num=None),
 35 |     dict(type='NormalizePointsColor', color_mean=None),
 36 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 37 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
 38 | ]
 39 | test_pipeline = [
 40 |     dict(
 41 |         type='LoadPointsFromFile',
 42 |         coord_type='DEPTH',
 43 |         shift_height=False,
 44 |         use_color=True,
 45 |         load_dim=6,
 46 |         use_dim=[0, 1, 2, 3, 4, 5]),
 47 |     dict(type='NormalizePointsColor', color_mean=None),
 48 |     dict(
 49 |         # a wrapper in order to successfully call test function
 50 |         # actually we don't perform test-time-aug
 51 |         type='MultiScaleFlipAug3D',
 52 |         img_scale=(1333, 800),
 53 |         pts_scale_ratio=1,
 54 |         flip=False,
 55 |         transforms=[
 56 |             dict(
 57 |                 type='GlobalRotScaleTrans',
 58 |                 rot_range=[0, 0],
 59 |                 scale_ratio_range=[1., 1.],
 60 |                 translation_std=[0, 0, 0]),
 61 |             dict(
 62 |                 type='RandomFlip3D',
 63 |                 sync_2d=False,
 64 |                 flip_ratio_bev_horizontal=0.0,
 65 |                 flip_ratio_bev_vertical=0.0),
 66 |             dict(
 67 |                 type='DefaultFormatBundle3D',
 68 |                 class_names=class_names,
 69 |                 with_label=False),
 70 |             dict(type='Collect3D', keys=['points'])
 71 |         ])
 72 | ]
 73 | # construct a pipeline for data and gt loading in show function
 74 | # please keep its loading function consistent with test_pipeline (e.g. client)
 75 | # we need to load gt seg_mask!
 76 | eval_pipeline = [
 77 |     dict(
 78 |         type='LoadPointsFromFile',
 79 |         coord_type='DEPTH',
 80 |         shift_height=False,
 81 |         use_color=True,
 82 |         load_dim=6,
 83 |         use_dim=[0, 1, 2, 3, 4, 5]),
 84 |     dict(
 85 |         type='LoadAnnotations3D',
 86 |         with_bbox_3d=False,
 87 |         with_label_3d=False,
 88 |         with_mask_3d=False,
 89 |         with_seg_3d=True),
 90 |     dict(
 91 |         type='PointSegClassMapping',
 92 |         valid_cat_ids=tuple(range(len(class_names))),
 93 |         max_cat_id=13),
 94 |     dict(
 95 |         type='DefaultFormatBundle3D',
 96 |         with_label=False,
 97 |         class_names=class_names),
 98 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
 99 | ]
100 | 
101 | data = dict(
102 |     samples_per_gpu=8,
103 |     workers_per_gpu=4,
104 |     # train on area 1, 2, 3, 4, 6
105 |     # test on area 5
106 |     train=dict(
107 |         type=dataset_type,
108 |         data_root=data_root,
109 |         ann_files=[
110 |             data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area
111 |         ],
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         test_mode=False,
115 |         ignore_index=len(class_names),
116 |         scene_idxs=[
117 |             data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'
118 |             for i in train_area
119 |         ]),
120 |     val=dict(
121 |         type=dataset_type,
122 |         data_root=data_root,
123 |         ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
124 |         pipeline=test_pipeline,
125 |         classes=class_names,
126 |         test_mode=True,
127 |         ignore_index=len(class_names),
128 |         scene_idxs=data_root +
129 |         f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'),
130 |     test=dict(
131 |         type=dataset_type,
132 |         data_root=data_root,
133 |         ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
134 |         pipeline=test_pipeline,
135 |         classes=class_names,
136 |         test_mode=True,
137 |         ignore_index=len(class_names)))
138 | 
139 | evaluation = dict(pipeline=eval_pipeline)
140 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/scannet-3d-18class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'ScanNetDataset'
  3 | data_root = './data/scannet/'
  4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
  5 |                'bookshelf', 'picture', 'counter', 'desk', 'curtain',
  6 |                'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
  7 |                'garbagebin')
  8 | train_pipeline = [
  9 |     dict(
 10 |         type='LoadPointsFromFile',
 11 |         coord_type='DEPTH',
 12 |         shift_height=True,
 13 |         load_dim=6,
 14 |         use_dim=[0, 1, 2]),
 15 |     dict(
 16 |         type='LoadAnnotations3D',
 17 |         with_bbox_3d=True,
 18 |         with_label_3d=True,
 19 |         with_mask_3d=True,
 20 |         with_seg_3d=True),
 21 |     dict(type='GlobalAlignment', rotation_axis=2),
 22 |     dict(
 23 |         type='PointSegClassMapping',
 24 |         valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
 25 |                        36, 39),
 26 |         max_cat_id=40),
 27 |     dict(type='PointSample', num_points=40000),
 28 |     dict(
 29 |         type='RandomFlip3D',
 30 |         sync_2d=False,
 31 |         flip_ratio_bev_horizontal=0.5,
 32 |         flip_ratio_bev_vertical=0.5),
 33 |     dict(
 34 |         type='GlobalRotScaleTrans',
 35 |         rot_range=[-0.087266, 0.087266],
 36 |         scale_ratio_range=[1.0, 1.0],
 37 |         shift_height=True),
 38 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 39 |     dict(
 40 |         type='Collect3D',
 41 |         keys=[
 42 |             'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
 43 |             'pts_instance_mask'
 44 |         ])
 45 | ]
 46 | test_pipeline = [
 47 |     dict(
 48 |         type='LoadPointsFromFile',
 49 |         coord_type='DEPTH',
 50 |         shift_height=True,
 51 |         load_dim=6,
 52 |         use_dim=[0, 1, 2]),
 53 |     dict(type='GlobalAlignment', rotation_axis=2),
 54 |     dict(
 55 |         type='MultiScaleFlipAug3D',
 56 |         img_scale=(1333, 800),
 57 |         pts_scale_ratio=1,
 58 |         flip=False,
 59 |         transforms=[
 60 |             dict(
 61 |                 type='GlobalRotScaleTrans',
 62 |                 rot_range=[0, 0],
 63 |                 scale_ratio_range=[1., 1.],
 64 |                 translation_std=[0, 0, 0]),
 65 |             dict(
 66 |                 type='RandomFlip3D',
 67 |                 sync_2d=False,
 68 |                 flip_ratio_bev_horizontal=0.5,
 69 |                 flip_ratio_bev_vertical=0.5),
 70 |             dict(type='PointSample', num_points=40000),
 71 |             dict(
 72 |                 type='DefaultFormatBundle3D',
 73 |                 class_names=class_names,
 74 |                 with_label=False),
 75 |             dict(type='Collect3D', keys=['points'])
 76 |         ])
 77 | ]
 78 | # construct a pipeline for data and gt loading in show function
 79 | # please keep its loading function consistent with test_pipeline (e.g. client)
 80 | eval_pipeline = [
 81 |     dict(
 82 |         type='LoadPointsFromFile',
 83 |         coord_type='DEPTH',
 84 |         shift_height=False,
 85 |         load_dim=6,
 86 |         use_dim=[0, 1, 2]),
 87 |     dict(type='GlobalAlignment', rotation_axis=2),
 88 |     dict(
 89 |         type='DefaultFormatBundle3D',
 90 |         class_names=class_names,
 91 |         with_label=False),
 92 |     dict(type='Collect3D', keys=['points'])
 93 | ]
 94 | 
 95 | data = dict(
 96 |     samples_per_gpu=8,
 97 |     workers_per_gpu=4,
 98 |     train=dict(
 99 |         type='RepeatDataset',
100 |         times=5,
101 |         dataset=dict(
102 |             type=dataset_type,
103 |             data_root=data_root,
104 |             ann_file=data_root + 'scannet_infos_train.pkl',
105 |             pipeline=train_pipeline,
106 |             filter_empty_gt=False,
107 |             classes=class_names,
108 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
109 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
110 |             box_type_3d='Depth')),
111 |     val=dict(
112 |         type=dataset_type,
113 |         data_root=data_root,
114 |         ann_file=data_root + 'scannet_infos_val.pkl',
115 |         pipeline=test_pipeline,
116 |         classes=class_names,
117 |         test_mode=True,
118 |         box_type_3d='Depth'),
119 |     test=dict(
120 |         type=dataset_type,
121 |         data_root=data_root,
122 |         ann_file=data_root + 'scannet_infos_val.pkl',
123 |         pipeline=test_pipeline,
124 |         classes=class_names,
125 |         test_mode=True,
126 |         box_type_3d='Depth'))
127 | 
128 | evaluation = dict(pipeline=eval_pipeline)
129 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/scannet_seg-3d-20class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'ScanNetSegDataset'
  3 | data_root = './data/scannet/'
  4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
  5 |                'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
  6 |                'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
  7 |                'bathtub', 'otherfurniture')
  8 | num_points = 8192
  9 | train_pipeline = [
 10 |     dict(
 11 |         type='LoadPointsFromFile',
 12 |         coord_type='DEPTH',
 13 |         shift_height=False,
 14 |         use_color=True,
 15 |         load_dim=6,
 16 |         use_dim=[0, 1, 2, 3, 4, 5]),
 17 |     dict(
 18 |         type='LoadAnnotations3D',
 19 |         with_bbox_3d=False,
 20 |         with_label_3d=False,
 21 |         with_mask_3d=False,
 22 |         with_seg_3d=True),
 23 |     dict(
 24 |         type='PointSegClassMapping',
 25 |         valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
 26 |                        33, 34, 36, 39),
 27 |         max_cat_id=40),
 28 |     dict(
 29 |         type='IndoorPatchPointSample',
 30 |         num_points=num_points,
 31 |         block_size=1.5,
 32 |         ignore_index=len(class_names),
 33 |         use_normalized_coord=False,
 34 |         enlarge_size=0.2,
 35 |         min_unique_num=None),
 36 |     dict(type='NormalizePointsColor', color_mean=None),
 37 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 38 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
 39 | ]
 40 | test_pipeline = [
 41 |     dict(
 42 |         type='LoadPointsFromFile',
 43 |         coord_type='DEPTH',
 44 |         shift_height=False,
 45 |         use_color=True,
 46 |         load_dim=6,
 47 |         use_dim=[0, 1, 2, 3, 4, 5]),
 48 |     dict(type='NormalizePointsColor', color_mean=None),
 49 |     dict(
 50 |         # a wrapper in order to successfully call test function
 51 |         # actually we don't perform test-time-aug
 52 |         type='MultiScaleFlipAug3D',
 53 |         img_scale=(1333, 800),
 54 |         pts_scale_ratio=1,
 55 |         flip=False,
 56 |         transforms=[
 57 |             dict(
 58 |                 type='GlobalRotScaleTrans',
 59 |                 rot_range=[0, 0],
 60 |                 scale_ratio_range=[1., 1.],
 61 |                 translation_std=[0, 0, 0]),
 62 |             dict(
 63 |                 type='RandomFlip3D',
 64 |                 sync_2d=False,
 65 |                 flip_ratio_bev_horizontal=0.0,
 66 |                 flip_ratio_bev_vertical=0.0),
 67 |             dict(
 68 |                 type='DefaultFormatBundle3D',
 69 |                 class_names=class_names,
 70 |                 with_label=False),
 71 |             dict(type='Collect3D', keys=['points'])
 72 |         ])
 73 | ]
 74 | # construct a pipeline for data and gt loading in show function
 75 | # please keep its loading function consistent with test_pipeline (e.g. client)
 76 | # we need to load gt seg_mask!
 77 | eval_pipeline = [
 78 |     dict(
 79 |         type='LoadPointsFromFile',
 80 |         coord_type='DEPTH',
 81 |         shift_height=False,
 82 |         use_color=True,
 83 |         load_dim=6,
 84 |         use_dim=[0, 1, 2, 3, 4, 5]),
 85 |     dict(
 86 |         type='LoadAnnotations3D',
 87 |         with_bbox_3d=False,
 88 |         with_label_3d=False,
 89 |         with_mask_3d=False,
 90 |         with_seg_3d=True),
 91 |     dict(
 92 |         type='PointSegClassMapping',
 93 |         valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
 94 |                        33, 34, 36, 39),
 95 |         max_cat_id=40),
 96 |     dict(
 97 |         type='DefaultFormatBundle3D',
 98 |         with_label=False,
 99 |         class_names=class_names),
100 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
101 | ]
102 | 
103 | data = dict(
104 |     samples_per_gpu=8,
105 |     workers_per_gpu=4,
106 |     train=dict(
107 |         type=dataset_type,
108 |         data_root=data_root,
109 |         ann_file=data_root + 'scannet_infos_train.pkl',
110 |         pipeline=train_pipeline,
111 |         classes=class_names,
112 |         test_mode=False,
113 |         ignore_index=len(class_names),
114 |         scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'),
115 |     val=dict(
116 |         type=dataset_type,
117 |         data_root=data_root,
118 |         ann_file=data_root + 'scannet_infos_val.pkl',
119 |         pipeline=test_pipeline,
120 |         classes=class_names,
121 |         test_mode=True,
122 |         ignore_index=len(class_names)),
123 |     test=dict(
124 |         type=dataset_type,
125 |         data_root=data_root,
126 |         ann_file=data_root + 'scannet_infos_val.pkl',
127 |         pipeline=test_pipeline,
128 |         classes=class_names,
129 |         test_mode=True,
130 |         ignore_index=len(class_names)))
131 | 
132 | evaluation = dict(pipeline=eval_pipeline)
133 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/sunrgbd-3d-10class.py:
--------------------------------------------------------------------------------
  1 | dataset_type = 'SUNRGBDDataset'
  2 | data_root = 'data/sunrgbd/'
  3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
  4 |                'night_stand', 'bookshelf', 'bathtub')
  5 | train_pipeline = [
  6 |     dict(
  7 |         type='LoadPointsFromFile',
  8 |         coord_type='DEPTH',
  9 |         shift_height=True,
 10 |         load_dim=6,
 11 |         use_dim=[0, 1, 2]),
 12 |     dict(type='LoadAnnotations3D'),
 13 |     dict(
 14 |         type='RandomFlip3D',
 15 |         sync_2d=False,
 16 |         flip_ratio_bev_horizontal=0.5,
 17 |     ),
 18 |     dict(
 19 |         type='GlobalRotScaleTrans',
 20 |         rot_range=[-0.523599, 0.523599],
 21 |         scale_ratio_range=[0.85, 1.15],
 22 |         shift_height=True),
 23 |     dict(type='PointSample', num_points=20000),
 24 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 25 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 26 | ]
 27 | test_pipeline = [
 28 |     dict(
 29 |         type='LoadPointsFromFile',
 30 |         coord_type='DEPTH',
 31 |         shift_height=True,
 32 |         load_dim=6,
 33 |         use_dim=[0, 1, 2]),
 34 |     dict(
 35 |         type='MultiScaleFlipAug3D',
 36 |         img_scale=(1333, 800),
 37 |         pts_scale_ratio=1,
 38 |         flip=False,
 39 |         transforms=[
 40 |             dict(
 41 |                 type='GlobalRotScaleTrans',
 42 |                 rot_range=[0, 0],
 43 |                 scale_ratio_range=[1., 1.],
 44 |                 translation_std=[0, 0, 0]),
 45 |             dict(
 46 |                 type='RandomFlip3D',
 47 |                 sync_2d=False,
 48 |                 flip_ratio_bev_horizontal=0.5,
 49 |             ),
 50 |             dict(type='PointSample', num_points=20000),
 51 |             dict(
 52 |                 type='DefaultFormatBundle3D',
 53 |                 class_names=class_names,
 54 |                 with_label=False),
 55 |             dict(type='Collect3D', keys=['points'])
 56 |         ])
 57 | ]
 58 | # construct a pipeline for data and gt loading in show function
 59 | # please keep its loading function consistent with test_pipeline (e.g. client)
 60 | eval_pipeline = [
 61 |     dict(
 62 |         type='LoadPointsFromFile',
 63 |         coord_type='DEPTH',
 64 |         shift_height=False,
 65 |         load_dim=6,
 66 |         use_dim=[0, 1, 2]),
 67 |     dict(
 68 |         type='DefaultFormatBundle3D',
 69 |         class_names=class_names,
 70 |         with_label=False),
 71 |     dict(type='Collect3D', keys=['points'])
 72 | ]
 73 | 
 74 | data = dict(
 75 |     samples_per_gpu=16,
 76 |     workers_per_gpu=4,
 77 |     train=dict(
 78 |         type='RepeatDataset',
 79 |         times=5,
 80 |         dataset=dict(
 81 |             type=dataset_type,
 82 |             data_root=data_root,
 83 |             ann_file=data_root + 'sunrgbd_infos_train.pkl',
 84 |             pipeline=train_pipeline,
 85 |             classes=class_names,
 86 |             filter_empty_gt=False,
 87 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
 88 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
 89 |             box_type_3d='Depth')),
 90 |     val=dict(
 91 |         type=dataset_type,
 92 |         data_root=data_root,
 93 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
 94 |         pipeline=test_pipeline,
 95 |         classes=class_names,
 96 |         test_mode=True,
 97 |         box_type_3d='Depth'),
 98 |     test=dict(
 99 |         type=dataset_type,
100 |         data_root=data_root,
101 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
102 |         pipeline=test_pipeline,
103 |         classes=class_names,
104 |         test_mode=True,
105 |         box_type_3d='Depth'))
106 | 
107 | evaluation = dict(pipeline=eval_pipeline)
108 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/waymoD5-3d-car.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | # D5 in the config name means the whole dataset is divided into 5 folds
  3 | # We only use one fold for efficient experiments
  4 | dataset_type = 'WaymoDataset'
  5 | data_root = 'data/waymo/kitti_format/'
  6 | file_client_args = dict(backend='disk')
  7 | # Uncomment the following if use ceph or other file clients.
  8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
  9 | # for more details.
 10 | # file_client_args = dict(
 11 | #     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
 12 | 
 13 | class_names = ['Car']
 14 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
 15 | input_modality = dict(use_lidar=True, use_camera=False)
 16 | db_sampler = dict(
 17 |     data_root=data_root,
 18 |     info_path=data_root + 'waymo_dbinfos_train.pkl',
 19 |     rate=1.0,
 20 |     prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
 21 |     classes=class_names,
 22 |     sample_groups=dict(Car=15),
 23 |     points_loader=dict(
 24 |         type='LoadPointsFromFile',
 25 |         coord_type='LIDAR',
 26 |         load_dim=5,
 27 |         use_dim=[0, 1, 2, 3, 4],
 28 |         file_client_args=file_client_args))
 29 | 
 30 | train_pipeline = [
 31 |     dict(
 32 |         type='LoadPointsFromFile',
 33 |         coord_type='LIDAR',
 34 |         load_dim=6,
 35 |         use_dim=5,
 36 |         file_client_args=file_client_args),
 37 |     dict(
 38 |         type='LoadAnnotations3D',
 39 |         with_bbox_3d=True,
 40 |         with_label_3d=True,
 41 |         file_client_args=file_client_args),
 42 |     dict(type='ObjectSample', db_sampler=db_sampler),
 43 |     dict(
 44 |         type='RandomFlip3D',
 45 |         sync_2d=False,
 46 |         flip_ratio_bev_horizontal=0.5,
 47 |         flip_ratio_bev_vertical=0.5),
 48 |     dict(
 49 |         type='GlobalRotScaleTrans',
 50 |         rot_range=[-0.78539816, 0.78539816],
 51 |         scale_ratio_range=[0.95, 1.05]),
 52 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 53 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 54 |     dict(type='PointShuffle'),
 55 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 56 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 57 | ]
 58 | test_pipeline = [
 59 |     dict(
 60 |         type='LoadPointsFromFile',
 61 |         coord_type='LIDAR',
 62 |         load_dim=6,
 63 |         use_dim=5,
 64 |         file_client_args=file_client_args),
 65 |     dict(
 66 |         type='MultiScaleFlipAug3D',
 67 |         img_scale=(1333, 800),
 68 |         pts_scale_ratio=1,
 69 |         flip=False,
 70 |         transforms=[
 71 |             dict(
 72 |                 type='GlobalRotScaleTrans',
 73 |                 rot_range=[0, 0],
 74 |                 scale_ratio_range=[1., 1.],
 75 |                 translation_std=[0, 0, 0]),
 76 |             dict(type='RandomFlip3D'),
 77 |             dict(
 78 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 79 |             dict(
 80 |                 type='DefaultFormatBundle3D',
 81 |                 class_names=class_names,
 82 |                 with_label=False),
 83 |             dict(type='Collect3D', keys=['points'])
 84 |         ])
 85 | ]
 86 | # construct a pipeline for data and gt loading in show function
 87 | # please keep its loading function consistent with test_pipeline (e.g. client)
 88 | eval_pipeline = [
 89 |     dict(
 90 |         type='LoadPointsFromFile',
 91 |         coord_type='LIDAR',
 92 |         load_dim=6,
 93 |         use_dim=5,
 94 |         file_client_args=file_client_args),
 95 |     dict(
 96 |         type='DefaultFormatBundle3D',
 97 |         class_names=class_names,
 98 |         with_label=False),
 99 |     dict(type='Collect3D', keys=['points'])
100 | ]
101 | 
102 | data = dict(
103 |     samples_per_gpu=2,
104 |     workers_per_gpu=4,
105 |     train=dict(
106 |         type='RepeatDataset',
107 |         times=2,
108 |         dataset=dict(
109 |             type=dataset_type,
110 |             data_root=data_root,
111 |             ann_file=data_root + 'waymo_infos_train.pkl',
112 |             split='training',
113 |             pipeline=train_pipeline,
114 |             modality=input_modality,
115 |             classes=class_names,
116 |             test_mode=False,
117 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
118 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
119 |             box_type_3d='LiDAR',
120 |             # load one frame every five frames
121 |             load_interval=5)),
122 |     val=dict(
123 |         type=dataset_type,
124 |         data_root=data_root,
125 |         ann_file=data_root + 'waymo_infos_val.pkl',
126 |         split='training',
127 |         pipeline=test_pipeline,
128 |         modality=input_modality,
129 |         classes=class_names,
130 |         test_mode=True,
131 |         box_type_3d='LiDAR'),
132 |     test=dict(
133 |         type=dataset_type,
134 |         data_root=data_root,
135 |         ann_file=data_root + 'waymo_infos_val.pkl',
136 |         split='training',
137 |         pipeline=test_pipeline,
138 |         modality=input_modality,
139 |         classes=class_names,
140 |         test_mode=True,
141 |         box_type_3d='LiDAR'))
142 | 
143 | evaluation = dict(interval=24, pipeline=eval_pipeline)
144 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable push
 3 | # By default we use textlogger hook and tensorboard
 4 | # For more loggers see
 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
 6 | log_config = dict(
 7 |     interval=50,
 8 |     hooks=[
 9 |         dict(type='TextLoggerHook'),
10 |         dict(type='TensorboardLoggerHook')
11 |     ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/3dssd.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='SSD3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SAMSG',
 5 |         in_channels=4,
 6 |         num_points=(4096, 512, (256, 256)),
 7 |         radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
 8 |         num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
 9 |         sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
10 |                      ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
11 |                      ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
12 |         aggregation_channels=(64, 128, 256),
13 |         fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
14 |         fps_sample_range_lists=((-1), (-1), (512, -1)),
15 |         norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
16 |         sa_cfg=dict(
17 |             type='PointSAModuleMSG',
18 |             pool_mod='max',
19 |             use_xyz=True,
20 |             normalize_xyz=False)),
21 |     bbox_head=dict(
22 |         type='SSD3DHead',
23 |         in_channels=256,
24 |         vote_module_cfg=dict(
25 |             in_channels=256,
26 |             num_points=256,
27 |             gt_per_seed=1,
28 |             conv_channels=(128, ),
29 |             conv_cfg=dict(type='Conv1d'),
30 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31 |             with_res_feat=False,
32 |             vote_xyz_range=(3.0, 3.0, 2.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModuleMSG',
35 |             num_point=256,
36 |             radii=(4.8, 6.4),
37 |             sample_nums=(16, 32),
38 |             mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39 |             norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40 |             use_xyz=True,
41 |             normalize_xyz=False,
42 |             bias=True),
43 |         pred_layer_cfg=dict(
44 |             in_channels=1536,
45 |             shared_conv_channels=(512, 128),
46 |             cls_conv_channels=(128, ),
47 |             reg_conv_channels=(128, ),
48 |             conv_cfg=dict(type='Conv1d'),
49 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50 |             bias=True),
51 |         conv_cfg=dict(type='Conv1d'),
52 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
53 |         objectness_loss=dict(
54 |             type='CrossEntropyLoss',
55 |             use_sigmoid=True,
56 |             reduction='sum',
57 |             loss_weight=1.0),
58 |         center_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
60 |         dir_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         dir_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
64 |         size_res_loss=dict(
65 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
66 |         corner_loss=dict(
67 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
68 |         vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),
69 |     # model training and testing settings
70 |     train_cfg=dict(
71 |         sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
72 |     test_cfg=dict(
73 |         nms_cfg=dict(type='nms', iou_thr=0.1),
74 |         sample_mod='spec',
75 |         score_thr=0.0,
76 |         per_class_proposal=True,
77 |         max_output_num=100))
78 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.1, 0.1, 0.2]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
 6 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 7 |     pts_middle_encoder=dict(
 8 |         type='SparseEncoder',
 9 |         in_channels=5,
10 |         sparse_shape=[41, 1024, 1024],
11 |         output_channels=128,
12 |         order=('conv', 'norm', 'act'),
13 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
14 |                                                                       128)),
15 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
16 |         block_type='basicblock'),
17 |     pts_backbone=dict(
18 |         type='SECOND',
19 |         in_channels=256,
20 |         out_channels=[128, 256],
21 |         layer_nums=[5, 5],
22 |         layer_strides=[1, 2],
23 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
24 |         conv_cfg=dict(type='Conv2d', bias=False)),
25 |     pts_neck=dict(
26 |         type='SECONDFPN',
27 |         in_channels=[128, 256],
28 |         out_channels=[256, 256],
29 |         upsample_strides=[1, 2],
30 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
31 |         upsample_cfg=dict(type='deconv', bias=False),
32 |         use_conv_for_no_stride=True),
33 |     pts_bbox_head=dict(
34 |         type='CenterHead',
35 |         in_channels=sum([256, 256]),
36 |         tasks=[
37 |             dict(num_class=1, class_names=['car']),
38 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
39 |             dict(num_class=2, class_names=['bus', 'trailer']),
40 |             dict(num_class=1, class_names=['barrier']),
41 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
42 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
43 |         ],
44 |         common_heads=dict(
45 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
46 |         share_conv_channel=64,
47 |         bbox_coder=dict(
48 |             type='CenterPointBBoxCoder',
49 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
50 |             max_num=500,
51 |             score_threshold=0.1,
52 |             out_size_factor=8,
53 |             voxel_size=voxel_size[:2],
54 |             code_size=9),
55 |         separate_head=dict(
56 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
57 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
58 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
59 |         norm_bbox=True),
60 |     # model training and testing settings
61 |     train_cfg=dict(
62 |         pts=dict(
63 |             grid_size=[1024, 1024, 40],
64 |             voxel_size=voxel_size,
65 |             out_size_factor=8,
66 |             dense_reg=1,
67 |             gaussian_overlap=0.1,
68 |             max_objs=500,
69 |             min_radius=2,
70 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
71 |     test_cfg=dict(
72 |         pts=dict(
73 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
74 |             max_per_img=500,
75 |             max_pool_nms=False,
76 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
77 |             score_threshold=0.1,
78 |             out_size_factor=8,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.2, 0.2, 8]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
 6 |     pts_voxel_encoder=dict(
 7 |         type='PillarFeatureNet',
 8 |         in_channels=5,
 9 |         feat_channels=[64],
10 |         with_distance=False,
11 |         voxel_size=(0.2, 0.2, 8),
12 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
13 |         legacy=False),
14 |     pts_middle_encoder=dict(
15 |         type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
16 |     pts_backbone=dict(
17 |         type='SECOND',
18 |         in_channels=64,
19 |         out_channels=[64, 128, 256],
20 |         layer_nums=[3, 5, 5],
21 |         layer_strides=[2, 2, 2],
22 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
23 |         conv_cfg=dict(type='Conv2d', bias=False)),
24 |     pts_neck=dict(
25 |         type='SECONDFPN',
26 |         in_channels=[64, 128, 256],
27 |         out_channels=[128, 128, 128],
28 |         upsample_strides=[0.5, 1, 2],
29 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
30 |         upsample_cfg=dict(type='deconv', bias=False),
31 |         use_conv_for_no_stride=True),
32 |     pts_bbox_head=dict(
33 |         type='CenterHead',
34 |         in_channels=sum([128, 128, 128]),
35 |         tasks=[
36 |             dict(num_class=1, class_names=['car']),
37 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
38 |             dict(num_class=2, class_names=['bus', 'trailer']),
39 |             dict(num_class=1, class_names=['barrier']),
40 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
41 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
42 |         ],
43 |         common_heads=dict(
44 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
45 |         share_conv_channel=64,
46 |         bbox_coder=dict(
47 |             type='CenterPointBBoxCoder',
48 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
49 |             max_num=500,
50 |             score_threshold=0.1,
51 |             out_size_factor=4,
52 |             voxel_size=voxel_size[:2],
53 |             code_size=9),
54 |         separate_head=dict(
55 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
56 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
57 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
58 |         norm_bbox=True),
59 |     # model training and testing settings
60 |     train_cfg=dict(
61 |         pts=dict(
62 |             grid_size=[512, 512, 1],
63 |             voxel_size=voxel_size,
64 |             out_size_factor=4,
65 |             dense_reg=1,
66 |             gaussian_overlap=0.1,
67 |             max_objs=500,
68 |             min_radius=2,
69 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
70 |     test_cfg=dict(
71 |         pts=dict(
72 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
73 |             max_per_img=500,
74 |             max_pool_nms=False,
75 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
76 |             score_threshold=0.1,
77 |             pc_range=[-51.2, -51.2],
78 |             out_size_factor=4,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/fcos3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='FCOSMono3D',
 3 |     pretrained='open-mmlab://detectron2/resnet101_caffe',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=False),
11 |         norm_eval=True,
12 |         style='caffe'),
13 |     neck=dict(
14 |         type='FPN',
15 |         in_channels=[256, 512, 1024, 2048],
16 |         out_channels=256,
17 |         start_level=1,
18 |         add_extra_convs='on_output',
19 |         num_outs=5,
20 |         relu_before_extra_convs=True),
21 |     bbox_head=dict(
22 |         type='FCOSMono3DHead',
23 |         num_classes=10,
24 |         in_channels=256,
25 |         stacked_convs=2,
26 |         feat_channels=256,
27 |         use_direction_classifier=True,
28 |         diff_rad_by_sin=True,
29 |         pred_attrs=True,
30 |         pred_velo=True,
31 |         dir_offset=0.7854,  # pi/4
32 |         strides=[8, 16, 32, 64, 128],
33 |         group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
34 |         cls_branch=(256, ),
35 |         reg_branch=(
36 |             (256, ),  # offset
37 |             (256, ),  # depth
38 |             (256, ),  # size
39 |             (256, ),  # rot
40 |             ()  # velo
41 |         ),
42 |         dir_branch=(256, ),
43 |         attr_branch=(256, ),
44 |         loss_cls=dict(
45 |             type='FocalLoss',
46 |             use_sigmoid=True,
47 |             gamma=2.0,
48 |             alpha=0.25,
49 |             loss_weight=1.0),
50 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
51 |         loss_dir=dict(
52 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 |         loss_attr=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 |         loss_centerness=dict(
56 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
57 |         norm_on_bbox=True,
58 |         centerness_on_reg=True,
59 |         center_sampling=True,
60 |         conv_bias=True,
61 |         dcn_on_last_conv=True),
62 |     train_cfg=dict(
63 |         allowed_border=0,
64 |         code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
65 |         pos_weight=-1,
66 |         debug=False),
67 |     test_cfg=dict(
68 |         use_rotate_nms=True,
69 |         nms_across_levels=False,
70 |         nms_pre=1000,
71 |         nms_thr=0.8,
72 |         score_thr=0.05,
73 |         min_bbox_size=0,
74 |         max_per_img=200))
75 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/groupfree3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='GroupFree3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=3,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 288)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='GroupFree3DHead',
20 |         in_channels=288,
21 |         num_decoder_layers=6,
22 |         num_proposal=256,
23 |         transformerlayers=dict(
24 |             type='BaseTransformerLayer',
25 |             attn_cfgs=dict(
26 |                 type='GroupFree3DMHA',
27 |                 embed_dims=288,
28 |                 num_heads=8,
29 |                 attn_drop=0.1,
30 |                 dropout_layer=dict(type='Dropout', drop_prob=0.1)),
31 |             ffn_cfgs=dict(
32 |                 embed_dims=288,
33 |                 feedforward_channels=2048,
34 |                 ffn_drop=0.1,
35 |                 act_cfg=dict(type='ReLU', inplace=True)),
36 |             operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
37 |                              'norm')),
38 |         pred_layer_cfg=dict(
39 |             in_channels=288, shared_conv_channels=(288, 288), bias=True),
40 |         sampling_objectness_loss=dict(
41 |             type='FocalLoss',
42 |             use_sigmoid=True,
43 |             gamma=2.0,
44 |             alpha=0.25,
45 |             loss_weight=8.0),
46 |         objectness_loss=dict(
47 |             type='FocalLoss',
48 |             use_sigmoid=True,
49 |             gamma=2.0,
50 |             alpha=0.25,
51 |             loss_weight=1.0),
52 |         center_loss=dict(
53 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
54 |         dir_class_loss=dict(
55 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
56 |         dir_res_loss=dict(
57 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
58 |         size_class_loss=dict(
59 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60 |         size_res_loss=dict(
61 |             type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
62 |         semantic_loss=dict(
63 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
64 |     # model training and testing settings
65 |     train_cfg=dict(sample_mod='kps'),
66 |     test_cfg=dict(
67 |         sample_mod='kps',
68 |         nms_thr=0.25,
69 |         score_thr=0.0,
70 |         per_class_proposal=True,
71 |         prediction_stages='last'))
72 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 |     pts_middle_encoder=dict(output_shape=[640, 640]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | # Voxel size for voxel encoder
 3 | # Usually voxel size is changed consistently with the point cloud range
 4 | # If point cloud range is modified, do remember to change all related
 5 | # keys in the config.
 6 | voxel_size = [0.25, 0.25, 8]
 7 | model = dict(
 8 |     type='MVXFasterRCNN',
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=64,
11 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
12 |         voxel_size=voxel_size,
13 |         max_voxels=(30000, 40000)),
14 |     pts_voxel_encoder=dict(
15 |         type='HardVFE',
16 |         in_channels=4,
17 |         feat_channels=[64, 64],
18 |         with_distance=False,
19 |         voxel_size=voxel_size,
20 |         with_cluster_center=True,
21 |         with_voxel_center=True,
22 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 |     pts_middle_encoder=dict(
25 |         type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
26 |     pts_backbone=dict(
27 |         type='SECOND',
28 |         in_channels=64,
29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 |         layer_nums=[3, 5, 5],
31 |         layer_strides=[2, 2, 2],
32 |         out_channels=[64, 128, 256]),
33 |     pts_neck=dict(
34 |         type='FPN',
35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 |         act_cfg=dict(type='ReLU'),
37 |         in_channels=[64, 128, 256],
38 |         out_channels=256,
39 |         start_level=0,
40 |         num_outs=3),
41 |     pts_bbox_head=dict(
42 |         type='Anchor3DHead',
43 |         num_classes=10,
44 |         in_channels=256,
45 |         feat_channels=256,
46 |         use_direction_classifier=True,
47 |         anchor_generator=dict(
48 |             type='AlignedAnchor3DRangeGenerator',
49 |             ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
50 |             scales=[1, 2, 4],
51 |             sizes=[
52 |                 [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)
53 |                 [0.5774, 1.7321, 1.],  # 1/sqrt(3)
54 |                 [1., 1., 1.],
55 |                 [0.4, 0.4, 1],
56 |             ],
57 |             custom_values=[0, 0],
58 |             rotations=[0, 1.57],
59 |             reshape_out=True),
60 |         assigner_per_size=False,
61 |         diff_rad_by_sin=True,
62 |         dir_offset=0.7854,  # pi/4
63 |         dir_limit_offset=0,
64 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
65 |         loss_cls=dict(
66 |             type='FocalLoss',
67 |             use_sigmoid=True,
68 |             gamma=2.0,
69 |             alpha=0.25,
70 |             loss_weight=1.0),
71 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
72 |         loss_dir=dict(
73 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
74 |     # model training and testing settings
75 |     train_cfg=dict(
76 |         pts=dict(
77 |             assigner=dict(
78 |                 type='MaxIoUAssigner',
79 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
80 |                 pos_iou_thr=0.6,
81 |                 neg_iou_thr=0.3,
82 |                 min_pos_iou=0.3,
83 |                 ignore_iof_thr=-1),
84 |             allowed_border=0,
85 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
86 |             pos_weight=-1,
87 |             debug=False)),
88 |     test_cfg=dict(
89 |         pts=dict(
90 |             use_rotate_nms=True,
91 |             nms_across_levels=False,
92 |             nms_pre=1000,
93 |             nms_thr=0.2,
94 |             score_thr=0.05,
95 |             min_bbox_size=0,
96 |             max_num=500)))
97 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 |     pts_middle_encoder=dict(output_shape=[800, 800]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.16, 0.16, 4]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=32,  # max_points_per_voxel
 7 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)  # (training, testing) max_voxels
10 |     ),
11 |     voxel_encoder=dict(
12 |         type='PillarFeatureNet',
13 |         in_channels=4,
14 |         feat_channels=[64],
15 |         with_distance=False,
16 |         voxel_size=voxel_size,
17 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
18 |     middle_encoder=dict(
19 |         type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
20 |     backbone=dict(
21 |         type='SECOND',
22 |         in_channels=64,
23 |         layer_nums=[3, 5, 5],
24 |         layer_strides=[2, 2, 2],
25 |         out_channels=[64, 128, 256]),
26 |     neck=dict(
27 |         type='SECONDFPN',
28 |         in_channels=[64, 128, 256],
29 |         upsample_strides=[1, 2, 4],
30 |         out_channels=[128, 128, 128]),
31 |     bbox_head=dict(
32 |         type='Anchor3DHead',
33 |         num_classes=3,
34 |         in_channels=384,
35 |         feat_channels=384,
36 |         use_direction_classifier=True,
37 |         anchor_generator=dict(
38 |             type='Anchor3DRangeGenerator',
39 |             ranges=[
40 |                 [0, -39.68, -0.6, 70.4, 39.68, -0.6],
41 |                 [0, -39.68, -0.6, 70.4, 39.68, -0.6],
42 |                 [0, -39.68, -1.78, 70.4, 39.68, -1.78],
43 |             ],
44 |             sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
45 |             rotations=[0, 1.57],
46 |             reshape_out=False),
47 |         diff_rad_by_sin=True,
48 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
49 |         loss_cls=dict(
50 |             type='FocalLoss',
51 |             use_sigmoid=True,
52 |             gamma=2.0,
53 |             alpha=0.25,
54 |             loss_weight=1.0),
55 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
56 |         loss_dir=dict(
57 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
58 |     # model training and testing settings
59 |     train_cfg=dict(
60 |         assigner=[
61 |             dict(  # for Pedestrian
62 |                 type='MaxIoUAssigner',
63 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
64 |                 pos_iou_thr=0.5,
65 |                 neg_iou_thr=0.35,
66 |                 min_pos_iou=0.35,
67 |                 ignore_iof_thr=-1),
68 |             dict(  # for Cyclist
69 |                 type='MaxIoUAssigner',
70 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
71 |                 pos_iou_thr=0.5,
72 |                 neg_iou_thr=0.35,
73 |                 min_pos_iou=0.35,
74 |                 ignore_iof_thr=-1),
75 |             dict(  # for Car
76 |                 type='MaxIoUAssigner',
77 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
78 |                 pos_iou_thr=0.6,
79 |                 neg_iou_thr=0.45,
80 |                 min_pos_iou=0.45,
81 |                 ignore_iof_thr=-1),
82 |         ],
83 |         allowed_border=0,
84 |         pos_weight=-1,
85 |         debug=False),
86 |     test_cfg=dict(
87 |         use_rotate_nms=True,
88 |         nms_across_levels=False,
89 |         nms_thr=0.01,
90 |         score_thr=0.1,
91 |         min_bbox_size=0,
92 |         nms_pre=100,
93 |         max_num=50))
94 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.32, 0.32, 6]
  7 | model = dict(
  8 |     type='MVXFasterRCNN',
  9 |     pts_voxel_layer=dict(
 10 |         max_num_points=20,
 11 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(32000, 32000)),
 14 |     pts_voxel_encoder=dict(
 15 |         type='HardVFE',
 16 |         in_channels=5,
 17 |         feat_channels=[64],
 18 |         with_distance=False,
 19 |         voxel_size=voxel_size,
 20 |         with_cluster_center=True,
 21 |         with_voxel_center=True,
 22 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
 24 |     pts_middle_encoder=dict(
 25 |         type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
 26 |     pts_backbone=dict(
 27 |         type='SECOND',
 28 |         in_channels=64,
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         layer_nums=[3, 5, 5],
 31 |         layer_strides=[1, 2, 2],
 32 |         out_channels=[64, 128, 256]),
 33 |     pts_neck=dict(
 34 |         type='SECONDFPN',
 35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 36 |         in_channels=[64, 128, 256],
 37 |         upsample_strides=[1, 2, 4],
 38 |         out_channels=[128, 128, 128]),
 39 |     pts_bbox_head=dict(
 40 |         type='Anchor3DHead',
 41 |         num_classes=3,
 42 |         in_channels=384,
 43 |         feat_channels=384,
 44 |         use_direction_classifier=True,
 45 |         anchor_generator=dict(
 46 |             type='AlignedAnchor3DRangeGenerator',
 47 |             ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
 48 |                     [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
 49 |                     [-74.88, -74.88, 0, 74.88, 74.88, 0]],
 50 |             sizes=[
 51 |                 [2.08, 4.73, 1.77],  # car
 52 |                 [0.84, 1.81, 1.77],  # cyclist
 53 |                 [0.84, 0.91, 1.74]  # pedestrian
 54 |             ],
 55 |             rotations=[0, 1.57],
 56 |             reshape_out=False),
 57 |         diff_rad_by_sin=True,
 58 |         dir_offset=0.7854,  # pi/4
 59 |         dir_limit_offset=0,
 60 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 61 |         loss_cls=dict(
 62 |             type='FocalLoss',
 63 |             use_sigmoid=True,
 64 |             gamma=2.0,
 65 |             alpha=0.25,
 66 |             loss_weight=1.0),
 67 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 68 |         loss_dir=dict(
 69 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 70 |     # model training and testing settings
 71 |     train_cfg=dict(
 72 |         pts=dict(
 73 |             assigner=[
 74 |                 dict(  # car
 75 |                     type='MaxIoUAssigner',
 76 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 77 |                     pos_iou_thr=0.55,
 78 |                     neg_iou_thr=0.4,
 79 |                     min_pos_iou=0.4,
 80 |                     ignore_iof_thr=-1),
 81 |                 dict(  # cyclist
 82 |                     type='MaxIoUAssigner',
 83 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 84 |                     pos_iou_thr=0.5,
 85 |                     neg_iou_thr=0.3,
 86 |                     min_pos_iou=0.3,
 87 |                     ignore_iof_thr=-1),
 88 |                 dict(  # pedestrian
 89 |                     type='MaxIoUAssigner',
 90 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 91 |                     pos_iou_thr=0.5,
 92 |                     neg_iou_thr=0.3,
 93 |                     min_pos_iou=0.3,
 94 |                     ignore_iof_thr=-1),
 95 |             ],
 96 |             allowed_border=0,
 97 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 98 |             pos_weight=-1,
 99 |             debug=False)),
100 |     test_cfg=dict(
101 |         pts=dict(
102 |             use_rotate_nms=True,
103 |             nms_across_levels=False,
104 |             nms_pre=4096,
105 |             nms_thr=0.25,
106 |             score_thr=0.1,
107 |             min_bbox_size=0,
108 |             max_num=500)))
109 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_second_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.05, 0.05, 0.1]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=5,
 7 |         point_cloud_range=[0, -40, -3, 70.4, 40, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)),
10 |     voxel_encoder=dict(type='HardSimpleVFE'),
11 |     middle_encoder=dict(
12 |         type='SparseEncoder',
13 |         in_channels=4,
14 |         sparse_shape=[41, 1600, 1408],
15 |         order=('conv', 'norm', 'act')),
16 |     backbone=dict(
17 |         type='SECOND',
18 |         in_channels=256,
19 |         layer_nums=[5, 5],
20 |         layer_strides=[1, 2],
21 |         out_channels=[128, 256]),
22 |     neck=dict(
23 |         type='SECONDFPN',
24 |         in_channels=[128, 256],
25 |         upsample_strides=[1, 2],
26 |         out_channels=[256, 256]),
27 |     bbox_head=dict(
28 |         type='Anchor3DHead',
29 |         num_classes=3,
30 |         in_channels=512,
31 |         feat_channels=512,
32 |         use_direction_classifier=True,
33 |         anchor_generator=dict(
34 |             type='Anchor3DRangeGenerator',
35 |             ranges=[
36 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
37 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
38 |                 [0, -40.0, -1.78, 70.4, 40.0, -1.78],
39 |             ],
40 |             sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
41 |             rotations=[0, 1.57],
42 |             reshape_out=False),
43 |         diff_rad_by_sin=True,
44 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
45 |         loss_cls=dict(
46 |             type='FocalLoss',
47 |             use_sigmoid=True,
48 |             gamma=2.0,
49 |             alpha=0.25,
50 |             loss_weight=1.0),
51 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
52 |         loss_dir=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
54 |     # model training and testing settings
55 |     train_cfg=dict(
56 |         assigner=[
57 |             dict(  # for Pedestrian
58 |                 type='MaxIoUAssigner',
59 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
60 |                 pos_iou_thr=0.35,
61 |                 neg_iou_thr=0.2,
62 |                 min_pos_iou=0.2,
63 |                 ignore_iof_thr=-1),
64 |             dict(  # for Cyclist
65 |                 type='MaxIoUAssigner',
66 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
67 |                 pos_iou_thr=0.35,
68 |                 neg_iou_thr=0.2,
69 |                 min_pos_iou=0.2,
70 |                 ignore_iof_thr=-1),
71 |             dict(  # for Car
72 |                 type='MaxIoUAssigner',
73 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
74 |                 pos_iou_thr=0.6,
75 |                 neg_iou_thr=0.45,
76 |                 min_pos_iou=0.45,
77 |                 ignore_iof_thr=-1),
78 |         ],
79 |         allowed_border=0,
80 |         pos_weight=-1,
81 |         debug=False),
82 |     test_cfg=dict(
83 |         use_rotate_nms=True,
84 |         nms_across_levels=False,
85 |         nms_thr=0.01,
86 |         score_thr=0.1,
87 |         min_bbox_size=0,
88 |         nms_pre=100,
89 |         max_num=50))
90 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_second_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.08, 0.08, 0.1]
  7 | model = dict(
  8 |     type='VoxelNet',
  9 |     voxel_layer=dict(
 10 |         max_num_points=10,
 11 |         point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(80000, 90000)),
 14 |     voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 15 |     middle_encoder=dict(
 16 |         type='SparseEncoder',
 17 |         in_channels=5,
 18 |         sparse_shape=[61, 1280, 1920],
 19 |         order=('conv', 'norm', 'act')),
 20 |     backbone=dict(
 21 |         type='SECOND',
 22 |         in_channels=384,
 23 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 24 |         layer_nums=[5, 5],
 25 |         layer_strides=[1, 2],
 26 |         out_channels=[128, 256]),
 27 |     neck=dict(
 28 |         type='SECONDFPN',
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         in_channels=[128, 256],
 31 |         upsample_strides=[1, 2],
 32 |         out_channels=[256, 256]),
 33 |     bbox_head=dict(
 34 |         type='Anchor3DHead',
 35 |         num_classes=3,
 36 |         in_channels=512,
 37 |         feat_channels=512,
 38 |         use_direction_classifier=True,
 39 |         anchor_generator=dict(
 40 |             type='AlignedAnchor3DRangeGenerator',
 41 |             ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
 42 |                     [-76.8, -51.2, 0, 76.8, 51.2, 0],
 43 |                     [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
 44 |             sizes=[
 45 |                 [2.08, 4.73, 1.77],  # car
 46 |                 [0.84, 0.91, 1.74],  # pedestrian
 47 |                 [0.84, 1.81, 1.77]  # cyclist
 48 |             ],
 49 |             rotations=[0, 1.57],
 50 |             reshape_out=False),
 51 |         diff_rad_by_sin=True,
 52 |         dir_offset=0.7854,  # pi/4
 53 |         dir_limit_offset=0,
 54 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 55 |         loss_cls=dict(
 56 |             type='FocalLoss',
 57 |             use_sigmoid=True,
 58 |             gamma=2.0,
 59 |             alpha=0.25,
 60 |             loss_weight=1.0),
 61 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 62 |         loss_dir=dict(
 63 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 64 |     # model training and testing settings
 65 |     train_cfg=dict(
 66 |         assigner=[
 67 |             dict(  # car
 68 |                 type='MaxIoUAssigner',
 69 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 70 |                 pos_iou_thr=0.55,
 71 |                 neg_iou_thr=0.4,
 72 |                 min_pos_iou=0.4,
 73 |                 ignore_iof_thr=-1),
 74 |             dict(  # pedestrian
 75 |                 type='MaxIoUAssigner',
 76 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 77 |                 pos_iou_thr=0.5,
 78 |                 neg_iou_thr=0.3,
 79 |                 min_pos_iou=0.3,
 80 |                 ignore_iof_thr=-1),
 81 |             dict(  # cyclist
 82 |                 type='MaxIoUAssigner',
 83 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 84 |                 pos_iou_thr=0.5,
 85 |                 neg_iou_thr=0.3,
 86 |                 min_pos_iou=0.3,
 87 |                 ignore_iof_thr=-1)
 88 |         ],
 89 |         allowed_border=0,
 90 |         code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 91 |         pos_weight=-1,
 92 |         debug=False),
 93 |     test_cfg=dict(
 94 |         use_rotate_nms=True,
 95 |         nms_across_levels=False,
 96 |         nms_pre=4096,
 97 |         nms_thr=0.25,
 98 |         score_thr=0.1,
 99 |         min_bbox_size=0,
100 |         max_num=500))
101 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/imvotenet_image.py:
--------------------------------------------------------------------------------
  1 | model = dict(
  2 |     type='ImVoteNet',
  3 |     img_backbone=dict(
  4 |         type='ResNet',
  5 |         depth=50,
  6 |         num_stages=4,
  7 |         out_indices=(0, 1, 2, 3),
  8 |         frozen_stages=1,
  9 |         norm_cfg=dict(type='BN', requires_grad=False),
 10 |         norm_eval=True,
 11 |         style='caffe'),
 12 |     img_neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     img_rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[8],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[4, 8, 16, 32, 64]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     img_roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=256,
 39 |             featmap_strides=[4, 8, 16, 32]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=256,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=10,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 | 
 55 |     # model training and testing settings
 56 |     train_cfg=dict(
 57 |         img_rpn=dict(
 58 |             assigner=dict(
 59 |                 type='MaxIoUAssigner',
 60 |                 pos_iou_thr=0.7,
 61 |                 neg_iou_thr=0.3,
 62 |                 min_pos_iou=0.3,
 63 |                 match_low_quality=True,
 64 |                 ignore_iof_thr=-1),
 65 |             sampler=dict(
 66 |                 type='RandomSampler',
 67 |                 num=256,
 68 |                 pos_fraction=0.5,
 69 |                 neg_pos_ub=-1,
 70 |                 add_gt_as_proposals=False),
 71 |             allowed_border=-1,
 72 |             pos_weight=-1,
 73 |             debug=False),
 74 |         img_rpn_proposal=dict(
 75 |             nms_across_levels=False,
 76 |             nms_pre=2000,
 77 |             nms_post=1000,
 78 |             max_per_img=1000,
 79 |             nms=dict(type='nms', iou_threshold=0.7),
 80 |             min_bbox_size=0),
 81 |         img_rcnn=dict(
 82 |             assigner=dict(
 83 |                 type='MaxIoUAssigner',
 84 |                 pos_iou_thr=0.5,
 85 |                 neg_iou_thr=0.5,
 86 |                 min_pos_iou=0.5,
 87 |                 match_low_quality=False,
 88 |                 ignore_iof_thr=-1),
 89 |             sampler=dict(
 90 |                 type='RandomSampler',
 91 |                 num=512,
 92 |                 pos_fraction=0.25,
 93 |                 neg_pos_ub=-1,
 94 |                 add_gt_as_proposals=True),
 95 |             pos_weight=-1,
 96 |             debug=False)),
 97 |     test_cfg=dict(
 98 |         img_rpn=dict(
 99 |             nms_across_levels=False,
100 |             nms_pre=1000,
101 |             nms_post=1000,
102 |             max_per_img=1000,
103 |             nms=dict(type='nms', iou_threshold=0.7),
104 |             min_bbox_size=0),
105 |         img_rcnn=dict(
106 |             score_thr=0.05,
107 |             nms=dict(type='nms', iou_threshold=0.5),
108 |             max_per_img=100)))
109 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_across_levels=False,
 90 |             nms_pre=2000,
 91 |             nms_post=1000,
 92 |             max_num=1000,
 93 |             nms_thr=0.7,
 94 |             min_bbox_size=0),
 95 |         rcnn=dict(
 96 |             assigner=dict(
 97 |                 type='MaxIoUAssigner',
 98 |                 pos_iou_thr=0.5,
 99 |                 neg_iou_thr=0.5,
100 |                 min_pos_iou=0.5,
101 |                 match_low_quality=True,
102 |                 ignore_iof_thr=-1),
103 |             sampler=dict(
104 |                 type='RandomSampler',
105 |                 num=512,
106 |                 pos_fraction=0.25,
107 |                 neg_pos_ub=-1,
108 |                 add_gt_as_proposals=True),
109 |             mask_size=28,
110 |             pos_weight=-1,
111 |             debug=False)),
112 |     test_cfg=dict(
113 |         rpn=dict(
114 |             nms_across_levels=False,
115 |             nms_pre=1000,
116 |             nms_post=1000,
117 |             max_num=1000,
118 |             nms_thr=0.7,
119 |             min_bbox_size=0),
120 |         rcnn=dict(
121 |             score_thr=0.05,
122 |             nms=dict(type='nms', iou_threshold=0.5),
123 |             max_per_img=100,
124 |             mask_thr_binary=0.5)))
125 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         sa_cfg=dict(
6 |             type='PAConvCUDASAModule',
7 |             scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/paconv_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=9,  # [xyz, rgb, normalized_xyz]
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(None, None, None, None),  # use kNN instead of ball query
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d', momentum=0.1),
14 |         sa_cfg=dict(
15 |             type='PAConvSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False,
19 |             paconv_num_kernels=[16, 16, 16],
20 |             paconv_kernel_input='w_neighbor',
21 |             scorenet_input='w_neighbor_dist',
22 |             scorenet_cfg=dict(
23 |                 mlp_channels=[16, 16, 16],
24 |                 score_norm='softmax',
25 |                 temp_factor=1.0,
26 |                 last_bn=False))),
27 |     decode_head=dict(
28 |         type='PAConvHead',
29 |         # PAConv model's decoder takes skip connections from beckbone
30 |         # different from PointNet++, it also concats input features in the last
31 |         # level of decoder, leading to `128 + 6` as the channel number
32 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
33 |                      (128 + 6, 128, 128, 128)),
34 |         channels=128,
35 |         dropout_ratio=0.5,
36 |         conv_cfg=dict(type='Conv1d'),
37 |         norm_cfg=dict(type='BN1d'),
38 |         act_cfg=dict(type='ReLU'),
39 |         loss_decode=dict(
40 |             type='CrossEntropyLoss',
41 |             use_sigmoid=False,
42 |             class_weight=None,  # should be modified with dataset
43 |             loss_weight=1.0)),
44 |     # correlation loss to regularize PAConv's kernel weights
45 |     loss_regularization=dict(
46 |         type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='slide'))
50 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pointnet2_ssg.py'
 2 | 
 3 | # model settings
 4 | model = dict(
 5 |     backbone=dict(
 6 |         _delete_=True,
 7 |         type='PointNet2SAMSG',
 8 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 9 |         num_points=(1024, 256, 64, 16),
10 |         radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 |         num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 |         sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 |                                                                     128)),
14 |                      ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 |                                                           (256, 384, 512))),
16 |         aggregation_channels=(None, None, None, None),
17 |         fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 |         fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 |         dilated_group=(False, False, False, False),
20 |         out_indices=(0, 1, 2, 3),
21 |         sa_cfg=dict(
22 |             type='PointSAModuleMSG',
23 |             pool_mod='max',
24 |             use_xyz=True,
25 |             normalize_xyz=False)),
26 |     decode_head=dict(
27 |         fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 |                      (128, 128, 128, 128))))
29 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(0.1, 0.2, 0.4, 0.8),
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d'),
14 |         sa_cfg=dict(
15 |             type='PointSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False)),
19 |     decode_head=dict(
20 |         type='PointNet2Head',
21 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 |                      (128, 128, 128, 128)),
23 |         channels=128,
24 |         dropout_ratio=0.5,
25 |         conv_cfg=dict(type='Conv1d'),
26 |         norm_cfg=dict(type='BN1d'),
27 |         act_cfg=dict(type='ReLU'),
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss',
30 |             use_sigmoid=False,
31 |             class_weight=None,  # should be modified with dataset
32 |             loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(),
35 |     test_cfg=dict(mode='slide'))
36 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/votenet.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='VoteNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=4,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 256)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='VoteHead',
20 |         vote_module_cfg=dict(
21 |             in_channels=256,
22 |             vote_per_seed=1,
23 |             gt_per_seed=3,
24 |             conv_channels=(256, 256),
25 |             conv_cfg=dict(type='Conv1d'),
26 |             norm_cfg=dict(type='BN1d'),
27 |             norm_feats=True,
28 |             vote_loss=dict(
29 |                 type='ChamferDistance',
30 |                 mode='l1',
31 |                 reduction='none',
32 |                 loss_dst_weight=10.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModule',
35 |             num_point=256,
36 |             radius=0.3,
37 |             num_sample=16,
38 |             mlp_channels=[256, 128, 128, 128],
39 |             use_xyz=True,
40 |             normalize_xyz=True),
41 |         pred_layer_cfg=dict(
42 |             in_channels=128, shared_conv_channels=(128, 128), bias=True),
43 |         conv_cfg=dict(type='Conv1d'),
44 |         norm_cfg=dict(type='BN1d'),
45 |         objectness_loss=dict(
46 |             type='CrossEntropyLoss',
47 |             class_weight=[0.2, 0.8],
48 |             reduction='sum',
49 |             loss_weight=5.0),
50 |         center_loss=dict(
51 |             type='ChamferDistance',
52 |             mode='l2',
53 |             reduction='sum',
54 |             loss_src_weight=10.0,
55 |             loss_dst_weight=10.0),
56 |         dir_class_loss=dict(
57 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
58 |         dir_res_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
60 |         size_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         size_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
64 |         semantic_loss=dict(
65 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
66 |     # model training and testing settings
67 |     train_cfg=dict(
68 |         pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
69 |     test_cfg=dict(
70 |         sample_mod='seed',
71 |         nms_thr=0.25,
72 |         score_thr=0.05,
73 |         per_class_proposal=True))
74 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
 1 | # This schedule is mainly used by models with dynamic voxelization
 2 | # optimizer
 3 | lr = 0.003  # max learning rate
 4 | optimizer = dict(
 5 |     type='AdamW',
 6 |     lr=lr,
 7 |     betas=(0.95, 0.99),  # the momentum is change during training
 8 |     weight_decay=0.001)
 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 | 
11 | lr_config = dict(
12 |     policy='CosineAnnealing',
13 |     warmup='linear',
14 |     warmup_iters=1000,
15 |     warmup_ratio=1.0 / 10,
16 |     min_lr_ratio=1e-5)
17 | 
18 | momentum_config = None
19 | 
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
 1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
 2 | # Since the models are trained by 24 epochs by default, we set evaluation
 3 | # interval to be 20. Please change the interval accordingly if you do not
 4 | # use a default schedule.
 5 | # optimizer
 6 | # This schedule is mainly used by models on nuScenes dataset
 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
 8 | # max_norm=10 is better for SECOND
 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 |     policy='cyclic',
12 |     target_ratio=(10, 1e-4),
13 |     cyclic_times=1,
14 |     step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(0.85 / 0.95, 1),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | 
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cyclic_40e.py:
--------------------------------------------------------------------------------
 1 | # The schedule is usually used by models trained on KITTI dataset
 2 | 
 3 | # The learning rate set in the cyclic schedule is the initial learning rate
 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
 6 | lr = 0.0018
 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use
 8 | # the offcial AdamW optimizer implemented by PyTorch.
 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa
13 | # We implement them in mmcv, for more details, please refer to
14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa
15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa
16 | lr_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(10, 1e-4),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | momentum_config = dict(
23 |     policy='cyclic',
24 |     target_ratio=(0.85 / 0.95, 1),
25 |     cyclic_times=1,
26 |     step_ratio_up=0.4,
27 | )
28 | # Although the max_epochs is 40, this schedule is usually used we
29 | # RepeatDataset with repeat ratio N, thus the actual max epoch
30 | # number could be Nx40
31 | runner = dict(type='EpochBasedRunner', max_epochs=40)
32 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on nuScenes dataset
 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
 4 | # max_norm=10 is better for SECOND
 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=1000,
10 |     warmup_ratio=1.0 / 1000,
11 |     step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on indoor dataset,
 3 | # e.g., VoteNet on SUNRGBD and ScanNet
 4 | lr = 0.008  # max learning rate
 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on ScanNet dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 | 


--------------------------------------------------------------------------------
/projects/configs/datasets/custom_lyft-3d.py:
--------------------------------------------------------------------------------
  1 | # If point cloud range is changed, the models should also change their point
  2 | # cloud range accordingly
  3 | point_cloud_range = [-80, -80, -5, 80, 80, 3]
  4 | # For Lyft we usually do 9-class detection
  5 | class_names = [
  6 |     'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
  7 |     'bicycle', 'pedestrian', 'animal'
  8 | ]
  9 | dataset_type = 'CustomLyftDataset'
 10 | data_root = 'data/lyft/'
 11 | # Input modality for Lyft dataset, this is consistent with the submission
 12 | # format which requires the information in input_modality.
 13 | input_modality = dict(
 14 |     use_lidar=True,
 15 |     use_camera=False,
 16 |     use_radar=False,
 17 |     use_map=False,
 18 |     use_external=True)
 19 | file_client_args = dict(backend='disk')
 20 | # Uncomment the following if use ceph or other file clients.
 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 22 | # for more details.
 23 | # file_client_args = dict(
 24 | #     backend='petrel',
 25 | #     path_mapping=dict({
 26 | #         './data/lyft/': 's3://lyft/lyft/',
 27 | #         'data/lyft/': 's3://lyft/lyft/'
 28 | #    }))
 29 | train_pipeline = [
 30 |     dict(
 31 |         type='LoadPointsFromFile',
 32 |         coord_type='LIDAR',
 33 |         load_dim=5,
 34 |         use_dim=5,
 35 |         file_client_args=file_client_args),
 36 |     dict(
 37 |         type='LoadPointsFromMultiSweeps',
 38 |         sweeps_num=10,
 39 |         file_client_args=file_client_args),
 40 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 41 |     dict(
 42 |         type='GlobalRotScaleTrans',
 43 |         rot_range=[-0.3925, 0.3925],
 44 |         scale_ratio_range=[0.95, 1.05],
 45 |         translation_std=[0, 0, 0]),
 46 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 47 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='PointShuffle'),
 50 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 51 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 52 | ]
 53 | test_pipeline = [
 54 |     dict(
 55 |         type='LoadPointsFromFile',
 56 |         coord_type='LIDAR',
 57 |         load_dim=5,
 58 |         use_dim=5,
 59 |         file_client_args=file_client_args),
 60 |     dict(
 61 |         type='LoadPointsFromMultiSweeps',
 62 |         sweeps_num=10,
 63 |         file_client_args=file_client_args),
 64 |     dict(
 65 |         type='MultiScaleFlipAug3D',
 66 |         img_scale=(1333, 800),
 67 |         pts_scale_ratio=1,
 68 |         flip=False,
 69 |         transforms=[
 70 |             dict(
 71 |                 type='GlobalRotScaleTrans',
 72 |                 rot_range=[0, 0],
 73 |                 scale_ratio_range=[1., 1.],
 74 |                 translation_std=[0, 0, 0]),
 75 |             dict(type='RandomFlip3D'),
 76 |             dict(
 77 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 78 |             dict(
 79 |                 type='DefaultFormatBundle3D',
 80 |                 class_names=class_names,
 81 |                 with_label=False),
 82 |             dict(type='Collect3D', keys=['points'])
 83 |         ])
 84 | ]
 85 | # construct a pipeline for data and gt loading in show function
 86 | # please keep its loading function consistent with test_pipeline (e.g. client)
 87 | eval_pipeline = [
 88 |     dict(
 89 |         type='LoadPointsFromFile',
 90 |         coord_type='LIDAR',
 91 |         load_dim=5,
 92 |         use_dim=5,
 93 |         file_client_args=file_client_args),
 94 |     dict(
 95 |         type='LoadPointsFromMultiSweeps',
 96 |         sweeps_num=10,
 97 |         file_client_args=file_client_args),
 98 |     dict(
 99 |         type='DefaultFormatBundle3D',
100 |         class_names=class_names,
101 |         with_label=False),
102 |     dict(type='Collect3D', keys=['points'])
103 | ]
104 | 
105 | data = dict(
106 |     samples_per_gpu=2,
107 |     workers_per_gpu=2,
108 |     train=dict(
109 |         type=dataset_type,
110 |         data_root=data_root,
111 |         ann_file=data_root + 'lyft_infos_train.pkl',
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         modality=input_modality,
115 |         test_mode=False),
116 |     val=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         ann_file=data_root + 'lyft_infos_val.pkl',
120 |         pipeline=test_pipeline,
121 |         classes=class_names,
122 |         modality=input_modality,
123 |         test_mode=True),
124 |     test=dict(
125 |         type=dataset_type,
126 |         data_root=data_root,
127 |         ann_file=data_root + 'lyft_infos_val.pkl',
128 |         pipeline=test_pipeline,
129 |         classes=class_names,
130 |         modality=input_modality,
131 |         test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)


--------------------------------------------------------------------------------
/projects/configs/datasets/custom_waymo-3d.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | # D5 in the config name means the whole dataset is divided into 5 folds
  3 | # We only use one fold for efficient experiments
  4 | dataset_type = 'CustomWaymoDataset'
  5 | data_root = 'data/waymo/kitti_format/'
  6 | file_client_args = dict(backend='disk')
  7 | # Uncomment the following if use ceph or other file clients.
  8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
  9 | # for more details.
 10 | # file_client_args = dict(
 11 | #     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
 12 | 
 13 | img_norm_cfg = dict(
 14 |     mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 15 | class_names = ['Car', 'Pedestrian', 'Cyclist']
 16 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
 17 | input_modality = dict(use_lidar=False, use_camera=True)
 18 | db_sampler = dict(
 19 |     data_root=data_root,
 20 |     info_path=data_root + 'waymo_dbinfos_train.pkl',
 21 |     rate=1.0,
 22 |     prepare=dict(
 23 |         filter_by_difficulty=[-1],
 24 |         filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
 25 |     classes=class_names,
 26 |     sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
 27 |     points_loader=dict(
 28 |         type='LoadPointsFromFile',
 29 |         coord_type='LIDAR',
 30 |         load_dim=5,
 31 |         use_dim=[0, 1, 2, 3, 4],
 32 |         file_client_args=file_client_args))
 33 | 
 34 | 
 35 | 
 36 | train_pipeline = [
 37 |     dict(type='LoadMultiViewImageFromFiles', to_float32=True),
 38 |     dict(type='PhotoMetricDistortionMultiViewImage'),
 39 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
 40 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 41 |     dict(type='ObjectNameFilter', classes=class_names),
 42 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
 43 |     dict(type='PadMultiViewImage', size_divisor=32),
 44 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 45 |     dict(type='CustomCollect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'])
 46 | ]
 47 | 
 48 | 
 49 | test_pipeline = [
 50 |     dict(type='LoadMultiViewImageFromFiles', to_float32=True),
 51 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
 52 |     dict(type='PadMultiViewImage', size_divisor=32),
 53 |     dict(
 54 |         type='MultiScaleFlipAug3D',
 55 |         img_scale=(1920, 1280),
 56 |         pts_scale_ratio=1,
 57 |         flip=False,
 58 |         transforms=[
 59 |             dict(
 60 |                 type='DefaultFormatBundle3D',
 61 |                 class_names=class_names,
 62 |                 with_label=False),
 63 |             dict(type='CustomCollect3D', keys=['img'])
 64 |         ])
 65 | ]
 66 | 
 67 | 
 68 | # construct a pipeline for data and gt loading in show function
 69 | # please keep its loading function consistent with test_pipeline (e.g. client)
 70 | 
 71 | data = dict(
 72 |     samples_per_gpu=2,
 73 |     workers_per_gpu=4,
 74 |     train=dict(
 75 |         type='RepeatDataset',
 76 |         times=2,
 77 |         dataset=dict(
 78 |             type=dataset_type,
 79 |             data_root=data_root,
 80 |             ann_file=data_root + 'waymo_infos_train.pkl',
 81 |             split='training',
 82 |             pipeline=train_pipeline,
 83 |             modality=input_modality,
 84 |             classes=class_names,
 85 |             test_mode=False,
 86 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
 87 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
 88 |             box_type_3d='LiDAR',
 89 |             # load one frame every five frames
 90 |             load_interval=5)),
 91 |     val=dict(
 92 |         type=dataset_type,
 93 |         data_root=data_root,
 94 |         ann_file=data_root + 'waymo_infos_val.pkl',
 95 |         split='training',
 96 |         pipeline=test_pipeline,
 97 |         modality=input_modality,
 98 |         classes=class_names,
 99 |         test_mode=True,
100 |         box_type_3d='LiDAR'),
101 |     test=dict(
102 |         type=dataset_type,
103 |         data_root=data_root,
104 |         ann_file=data_root + 'waymo_infos_val.pkl',
105 |         split='training',
106 |         pipeline=test_pipeline,
107 |         modality=input_modality,
108 |         classes=class_names,
109 |         test_mode=True,
110 |         box_type_3d='LiDAR'))
111 | 
112 | evaluation = dict(interval=24, pipeline=test_pipeline)


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
 3 | from .core.bbox.match_costs import BBox3DL1Cost
 4 | from .core.evaluation.eval_hooks import CustomDistEvalHook
 5 | from .datasets.pipelines import (
 6 |   PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 
 7 |   NormalizeMultiviewImage,  CustomCollect3D)
 8 | from .models.utils import *
 9 | from .models.opt.adamw import AdamW2
10 | from .bevformer import *
11 | from .dd3d import *
12 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .dense_heads import *
3 | from .detectors import *
4 | from .modules import *
5 | from .runner import *
6 | from .hooks import *
7 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .train import custom_train_model
2 | from .mmdet_train import custom_train_detector
3 | # from .test import custom_multi_gpu_test


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/apis/train.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------
 2 | # Copyright (c) OpenMMLab. All rights reserved.
 3 | # ---------------------------------------------
 4 | #  Modified by Zhiqi Li
 5 | # ---------------------------------------------
 6 | 
 7 | from .mmdet_train import custom_train_detector
 8 | from mmseg.apis import train_segmentor
 9 | from mmdet.apis import train_detector
10 | 
11 | def custom_train_model(model,
12 |                 dataset,
13 |                 cfg,
14 |                 distributed=False,
15 |                 validate=False,
16 |                 timestamp=None,
17 |                 eval_model=None,
18 |                 meta=None):
19 |     """A function wrapper for launching model training according to cfg.
20 | 
21 |     Because we need different eval_hook in runner. Should be deprecated in the
22 |     future.
23 |     """
24 |     if cfg.model.type in ['EncoderDecoder3D']:
25 |         assert False
26 |     else:
27 |         custom_train_detector(
28 |             model,
29 |             dataset,
30 |             cfg,
31 |             distributed=distributed,
32 |             validate=validate,
33 |             timestamp=timestamp,
34 |             eval_model=eval_model,
35 |             meta=meta)
36 | 
37 | 
38 | def train_model(model,
39 |                 dataset,
40 |                 cfg,
41 |                 distributed=False,
42 |                 validate=False,
43 |                 timestamp=None,
44 |                 meta=None):
45 |     """A function wrapper for launching model training according to cfg.
46 | 
47 |     Because we need different eval_hook in runner. Should be deprecated in the
48 |     future.
49 |     """
50 |     if cfg.model.type in ['EncoderDecoder3D']:
51 |         train_segmentor(
52 |             model,
53 |             dataset,
54 |             cfg,
55 |             distributed=distributed,
56 |             validate=validate,
57 |             timestamp=timestamp,
58 |             meta=meta)
59 |     else:
60 |         train_detector(
61 |             model,
62 |             dataset,
63 |             cfg,
64 |             distributed=distributed,
65 |             validate=validate,
66 |             timestamp=timestamp,
67 |             meta=meta)
68 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer_head import BEVFormerHead, BEVFormerHead_GroupDETR
2 | from .bev_head import BEVHead
3 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer import BEVFormer
2 | from .bevformer_fp16 import BEVFormer_fp16
3 | from .bevformerV2 import BEVFormerV2


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------
 2 | # Copyright (c) OpenMMLab. All rights reserved.
 3 | # ---------------------------------------------
 4 | #  Modified by Zhiqi Li
 5 | # ---------------------------------------------
 6 | 
 7 | from tkinter.messagebox import NO
 8 | import torch
 9 | from mmcv.runner import force_fp32, auto_fp16
10 | from mmdet.models import DETECTORS
11 | from mmdet3d.core import bbox3d2result
12 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector
13 | from projects.mmdet3d_plugin.models.utils.grid_mask import GridMask
14 | from projects.mmdet3d_plugin.bevformer.detectors.bevformer import BEVFormer
15 | import time
16 | import copy
17 | import numpy as np
18 | import mmdet3d
19 | from projects.mmdet3d_plugin.models.utils.bricks import run_time
20 | 
21 | 
22 | @DETECTORS.register_module()
23 | class BEVFormer_fp16(BEVFormer):
24 |     """
25 |     The default version BEVFormer currently can not support FP16. 
26 |     We provide this version to resolve this issue.
27 |     """
28 |     
29 |     @auto_fp16(apply_to=('img', 'prev_bev', 'points'))
30 |     def forward_train(self,
31 |                       points=None,
32 |                       img_metas=None,
33 |                       gt_bboxes_3d=None,
34 |                       gt_labels_3d=None,
35 |                       gt_labels=None,
36 |                       gt_bboxes=None,
37 |                       img=None,
38 |                       proposals=None,
39 |                       gt_bboxes_ignore=None,
40 |                       img_depth=None,
41 |                       img_mask=None,
42 |                       prev_bev=None,
43 |                       ):
44 |         """Forward training function.
45 |         Args:
46 |             points (list[torch.Tensor], optional): Points of each sample.
47 |                 Defaults to None.
48 |             img_metas (list[dict], optional): Meta information of each sample.
49 |                 Defaults to None.
50 |             gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):
51 |                 Ground truth 3D boxes. Defaults to None.
52 |             gt_labels_3d (list[torch.Tensor], optional): Ground truth labels
53 |                 of 3D boxes. Defaults to None.
54 |             gt_labels (list[torch.Tensor], optional): Ground truth labels
55 |                 of 2D boxes in images. Defaults to None.
56 |             gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in
57 |                 images. Defaults to None.
58 |             img (torch.Tensor optional): Images of each sample with shape
59 |                 (N, C, H, W). Defaults to None.
60 |             proposals ([list[torch.Tensor], optional): Predicted proposals
61 |                 used for training Fast RCNN. Defaults to None.
62 |             gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
63 |                 2D boxes in images to be ignored. Defaults to None.
64 |         Returns:
65 |             dict: Losses of different branches.
66 |         """
67 |         
68 |         img_feats = self.extract_feat(img=img, img_metas=img_metas)
69 | 
70 |         losses = dict()
71 |         losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d,
72 |                                             gt_labels_3d, img_metas,
73 |                                             gt_bboxes_ignore, prev_bev=prev_bev)
74 |         losses.update(losses_pts)
75 |         return losses
76 | 
77 | 
78 |     def val_step(self, data, optimizer):
79 |         """
80 |         In BEVFormer_fp16, we use this `val_step` function to inference the `prev_pev`.
81 |         This is not the standard function of `val_step`.
82 |         """
83 | 
84 |         img = data['img']
85 |         img_metas = data['img_metas']
86 |         img_feats = self.extract_feat(img=img,  img_metas=img_metas)
87 |         prev_bev = data.get('prev_bev', None)
88 |         prev_bev = self.pts_bbox_head(img_feats, img_metas, prev_bev=prev_bev, only_bev=True)
89 |         return prev_bev


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom_hooks import TransferWeight


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner.hooks.hook import HOOKS, Hook
 2 | from projects.mmdet3d_plugin.models.utils import run_time
 3 | 
 4 | 
 5 | @HOOKS.register_module()
 6 | class TransferWeight(Hook):
 7 |     
 8 |     def __init__(self, every_n_inters=1):
 9 |         self.every_n_inters=every_n_inters
10 | 
11 |     def after_train_iter(self, runner):
12 |         if self.every_n_inner_iters(runner, self.every_n_inters):
13 |             runner.eval_model.load_state_dict(runner.model.state_dict())
14 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import PerceptionTransformer
2 | from .transformerV2 import PerceptionTransformerV2, PerceptionTransformerBEVEncoder
3 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D
4 | from .temporal_self_attention import TemporalSelfAttention
5 | from .encoder import BEVFormerEncoder, BEVFormerLayer
6 | from .decoder import DetectionTransformerDecoder
7 | from .group_attention import GroupMultiheadAttention
8 | 
9 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .epoch_based_runner import EpochBasedRunner_video


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # ---------------------------------------------
 3 | #  Modified by Zhiqi Li
 4 | # ---------------------------------------------
 5 | 
 6 | import os.path as osp
 7 | import torch
 8 | import mmcv
 9 | from mmcv.runner.base_runner import BaseRunner
10 | from mmcv.runner.epoch_based_runner import EpochBasedRunner
11 | from mmcv.runner.builder import RUNNERS
12 | from mmcv.runner.checkpoint import save_checkpoint
13 | from mmcv.runner.utils import get_host_info
14 | from pprint import pprint
15 | from mmcv.parallel.data_container import DataContainer
16 | 
17 | 
18 | @RUNNERS.register_module()
19 | class EpochBasedRunner_video(EpochBasedRunner):
20 |     
21 |     ''' 
22 |     # basic logic
23 |     
24 |     input_sequence = [a, b, c] # given a sequence of samples
25 |     
26 |     prev_bev = None
27 |     for each in input_sequcene[:-1]
28 |         prev_bev = eval_model(each, prev_bev)) # inference only.
29 |     
30 |     model(input_sequcene[-1], prev_bev) # train the last sample.
31 |     '''
32 |     
33 |     def __init__(self,
34 |                  model,
35 |                  eval_model=None,
36 |                  batch_processor=None,
37 |                  optimizer=None,
38 |                  work_dir=None,
39 |                  logger=None,
40 |                  meta=None,
41 |                  keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'],
42 |                  max_iters=None,
43 |                  max_epochs=None):
44 |         super().__init__(model,
45 |                  batch_processor,
46 |                  optimizer,
47 |                  work_dir,
48 |                  logger,
49 |                  meta,
50 |                  max_iters,
51 |                  max_epochs)
52 |         keys.append('img_metas')
53 |         self.keys = keys
54 |         self.eval_model = eval_model
55 |         self.eval_model.eval()
56 |     
57 |     def run_iter(self, data_batch, train_mode, **kwargs):
58 |         if self.batch_processor is not None:
59 |             assert False
60 |             # outputs = self.batch_processor(
61 |             #     self.model, data_batch, train_mode=train_mode, **kwargs)
62 |         elif train_mode:
63 | 
64 |             num_samples = data_batch['img'].data[0].size(1)
65 |             data_list = []
66 |             prev_bev = None
67 |             for i in range(num_samples):
68 |                 data = {}
69 |                 for key in self.keys:
70 |                     if key not in ['img_metas', 'img', 'points']:
71 |                         data[key] = data_batch[key]
72 |                     else:
73 |                         if key == 'img':
74 |                             data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True)
75 |                         elif key == 'img_metas':
76 |                             data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only)
77 |                         else:
78 |                             assert False
79 |                 data_list.append(data)
80 |             with torch.no_grad():
81 |                 for i in range(num_samples-1):
82 |                     if data_list[i]['img_metas'].data[0][0]['prev_bev_exists']:
83 |                         data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
84 |                     prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs)
85 |             if data_list[-1]['img_metas'].data[0][0]['prev_bev_exists']:
86 |                 data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
87 |             outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs)
88 |         else:
89 |             assert False
90 |             # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
91 | 
92 |         if not isinstance(outputs, dict):
93 |             raise TypeError('"batch_processor()" or "model.train_step()"'
94 |                             'and "model.val_step()" must return a dict')
95 |         if 'log_vars' in outputs:
96 |             self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
97 |         self.outputs = outputs


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 | 
3 | __all__ = ['HungarianAssigner3D']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder
2 | 
3 | __all__ = ['NMSFreeCoder']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from mmdet.core.bbox import BaseBBoxCoder
  4 | from mmdet.core.bbox.builder import BBOX_CODERS
  5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
  6 | import numpy as np
  7 | 
  8 | 
  9 | @BBOX_CODERS.register_module()
 10 | class NMSFreeCoder(BaseBBoxCoder):
 11 |     """Bbox coder for NMS-free detector.
 12 |     Args:
 13 |         pc_range (list[float]): Range of point cloud.
 14 |         post_center_range (list[float]): Limit of the center.
 15 |             Default: None.
 16 |         max_num (int): Max number to be kept. Default: 100.
 17 |         score_threshold (float): Threshold to filter boxes based on score.
 18 |             Default: None.
 19 |         code_size (int): Code size of bboxes. Default: 9
 20 |     """
 21 | 
 22 |     def __init__(self,
 23 |                  pc_range,
 24 |                  voxel_size=None,
 25 |                  post_center_range=None,
 26 |                  max_num=100,
 27 |                  score_threshold=None,
 28 |                  num_classes=10):
 29 |         self.pc_range = pc_range
 30 |         self.voxel_size = voxel_size
 31 |         self.post_center_range = post_center_range
 32 |         self.max_num = max_num
 33 |         self.score_threshold = score_threshold
 34 |         self.num_classes = num_classes
 35 | 
 36 |     def encode(self):
 37 | 
 38 |         pass
 39 | 
 40 |     def decode_single(self, cls_scores, bbox_preds):
 41 |         """Decode bboxes.
 42 |         Args:
 43 |             cls_scores (Tensor): Outputs from the classification head, \
 44 |                 shape [num_query, cls_out_channels]. Note \
 45 |                 cls_out_channels should includes background.
 46 |             bbox_preds (Tensor): Outputs from the regression \
 47 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
 48 |                 Shape [num_query, 9].
 49 |         Returns:
 50 |             list[dict]: Decoded boxes.
 51 |         """
 52 |         max_num = self.max_num
 53 | 
 54 |         cls_scores = cls_scores.sigmoid()
 55 |         scores, indexs = cls_scores.view(-1).topk(max_num)
 56 |         labels = indexs % self.num_classes
 57 |         bbox_index = indexs // self.num_classes
 58 |         bbox_preds = bbox_preds[bbox_index]
 59 |        
 60 |         final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)   
 61 |         final_scores = scores 
 62 |         final_preds = labels 
 63 | 
 64 |         # use score threshold
 65 |         if self.score_threshold is not None:
 66 |             thresh_mask = final_scores > self.score_threshold
 67 |             tmp_score = self.score_threshold
 68 |             while thresh_mask.sum() == 0:
 69 |                 tmp_score *= 0.9
 70 |                 if tmp_score < 0.01:
 71 |                     thresh_mask = final_scores > -1
 72 |                     break
 73 |                 thresh_mask = final_scores >= tmp_score
 74 | 
 75 |         if self.post_center_range is not None:
 76 |             self.post_center_range = torch.tensor(
 77 |                 self.post_center_range, device=scores.device)
 78 |             mask = (final_box_preds[..., :3] >=
 79 |                     self.post_center_range[:3]).all(1)
 80 |             mask &= (final_box_preds[..., :3] <=
 81 |                      self.post_center_range[3:]).all(1)
 82 | 
 83 |             if self.score_threshold:
 84 |                 mask &= thresh_mask
 85 | 
 86 |             boxes3d = final_box_preds[mask]
 87 |             scores = final_scores[mask]
 88 | 
 89 |             labels = final_preds[mask]
 90 |             predictions_dict = {
 91 |                 'bboxes': boxes3d,
 92 |                 'scores': scores,
 93 |                 'labels': labels
 94 |             }
 95 | 
 96 |         else:
 97 |             raise NotImplementedError(
 98 |                 'Need to reorganize output as a batch, only '
 99 |                 'support post_center_range is not None for now!')
100 |         return predictions_dict
101 | 
102 |     def decode(self, preds_dicts):
103 |         """Decode bboxes.
104 |         Args:
105 |             all_cls_scores (Tensor): Outputs from the classification head, \
106 |                 shape [nb_dec, bs, num_query, cls_out_channels]. Note \
107 |                 cls_out_channels should includes background.
108 |             all_bbox_preds (Tensor): Sigmoid outputs from the regression \
109 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
110 |                 Shape [nb_dec, bs, num_query, 9].
111 |         Returns:
112 |             list[dict]: Decoded boxes.
113 |         """
114 |         all_cls_scores = preds_dicts['all_cls_scores'][-1]
115 |         all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
116 |         
117 |         batch_size = all_cls_scores.size()[0]
118 |         predictions_list = []
119 |         for i in range(batch_size):
120 |             predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
121 |         return predictions_list
122 | 
123 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost, SmoothL1Cost
3 | 
4 | __all__ = ['build_match_cost', 'BBox3DL1Cost', 'SmoothL1Cost']


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import mmcv
 3 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
 4 | 
 5 | 
 6 | @MATCH_COST.register_module()
 7 | class BBox3DL1Cost(object):
 8 |     """BBox3DL1Cost.
 9 |      Args:
10 |          weight (int | float, optional): loss_weight
11 |     """
12 | 
13 |     def __init__(self, weight=1.):
14 |         self.weight = weight
15 | 
16 |     def __call__(self, bbox_pred, gt_bboxes):
17 |         """
18 |         Args:
19 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
20 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
21 |                 [num_query, 4].
22 |             gt_bboxes (Tensor): Ground truth boxes with normalized
23 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
24 |         Returns:
25 |             torch.Tensor: bbox_cost value with weight
26 |         """
27 |         bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
28 |         return bbox_cost * self.weight
29 | 
30 | @mmcv.jit(derivate=True, coderize=True)
31 | #@weighted_loss
32 | def smooth_l1_loss(pred, target, beta=1.0):
33 |     """Smooth L1 loss.
34 |     Args:
35 |         pred (torch.Tensor): The prediction.
36 |         target (torch.Tensor): The learning target of the prediction.
37 |         beta (float, optional): The threshold in the piecewise function.
38 |             Defaults to 1.0.
39 |     Returns:
40 |         torch.Tensor: Calculated loss
41 |     """
42 |     assert beta > 0
43 |     if target.numel() == 0:
44 |         return pred.sum() * 0
45 | 
46 |     # assert pred.size() == target.size()
47 |     diff = torch.abs(pred - target)
48 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
49 |                        diff - 0.5 * beta)
50 |     return loss.sum(-1)
51 | 
52 | 
53 | @MATCH_COST.register_module()
54 | class SmoothL1Cost(object):
55 |     """SmoothL1Cost.
56 |      Args:
57 |          weight (int | float, optional): loss weight
58 | 
59 |      Examples:
60 |          >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
61 |          >>> import torch
62 |          >>> self = IoUCost()
63 |          >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
64 |          >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
65 |          >>> self(bboxes, gt_bboxes)
66 |          tensor([[-0.1250,  0.1667],
67 |                 [ 0.1667, -0.5000]])
68 |     """
69 | 
70 |     def __init__(self, weight=1.):
71 |         self.weight = weight
72 | 
73 |     def __call__(self, input, target):
74 |         """
75 |         Args:
76 |             bboxes (Tensor): Predicted boxes with unnormalized coordinates
77 |                 (x1, y1, x2, y2). Shape [num_query, 4].
78 |             gt_bboxes (Tensor): Ground truth boxes with unnormalized
79 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
80 | 
81 |         Returns:
82 |             torch.Tensor: iou_cost value with weight
83 |         """
84 |         N1, C = input.shape
85 |         N2, C = target.shape
86 |         input = input.contiguous().view(N1, C)[:, None, :]
87 |         target = target.contiguous().view(N2, C)[None, :, :]
88 |         cost = smooth_l1_loss(input, target)
89 | 
90 |         return cost * self.weight


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | 
 3 | 
 4 | def normalize_bbox(bboxes, pc_range):
 5 | 
 6 |     cx = bboxes[..., 0:1]
 7 |     cy = bboxes[..., 1:2]
 8 |     cz = bboxes[..., 2:3]
 9 |     w = bboxes[..., 3:4].log()
10 |     l = bboxes[..., 4:5].log()
11 |     h = bboxes[..., 5:6].log()
12 | 
13 |     rot = bboxes[..., 6:7]
14 |     if bboxes.size(-1) > 7:
15 |         vx = bboxes[..., 7:8] 
16 |         vy = bboxes[..., 8:9]
17 |         normalized_bboxes = torch.cat(
18 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
19 |         )
20 |     else:
21 |         normalized_bboxes = torch.cat(
22 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
23 |         )
24 |     return normalized_bboxes
25 | 
26 | def denormalize_bbox(normalized_bboxes, pc_range):
27 |     # rotation 
28 |     rot_sine = normalized_bboxes[..., 6:7]
29 | 
30 |     rot_cosine = normalized_bboxes[..., 7:8]
31 |     rot = torch.atan2(rot_sine, rot_cosine)
32 | 
33 |     # center in the bev
34 |     cx = normalized_bboxes[..., 0:1]
35 |     cy = normalized_bboxes[..., 1:2]
36 |     cz = normalized_bboxes[..., 4:5]
37 |    
38 |     # size
39 |     w = normalized_bboxes[..., 2:3]
40 |     l = normalized_bboxes[..., 3:4]
41 |     h = normalized_bboxes[..., 5:6]
42 | 
43 |     w = w.exp() 
44 |     l = l.exp() 
45 |     h = h.exp() 
46 |     if normalized_bboxes.size(-1) > 8:
47 |          # velocity 
48 |         vx = normalized_bboxes[:, 8:9]
49 |         vy = normalized_bboxes[:, 9:10]
50 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
51 |     else:
52 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
53 |     return denormalized_bboxes


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import CustomDistEvalHook


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
 3 | # in order to avoid strong version dependency, we did not directly
 4 | # inherit EvalHook but BaseDistEvalHook.
 5 | 
 6 | import bisect
 7 | import os.path as osp
 8 | 
 9 | import mmcv
10 | import torch.distributed as dist
11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook
12 | from mmcv.runner import EvalHook as BaseEvalHook
13 | from torch.nn.modules.batchnorm import _BatchNorm
14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook
15 | 
16 | 
17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
18 |     assert mmcv.is_list_of(dynamic_interval_list, tuple)
19 | 
20 |     dynamic_milestones = [0]
21 |     dynamic_milestones.extend(
22 |         [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])
23 |     dynamic_intervals = [start_interval]
24 |     dynamic_intervals.extend(
25 |         [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])
26 |     return dynamic_milestones, dynamic_intervals
27 | 
28 | 
29 | class CustomDistEvalHook(BaseDistEvalHook):
30 | 
31 |     def __init__(self, *args, dynamic_intervals=None,  **kwargs):
32 |         super(CustomDistEvalHook, self).__init__(*args, **kwargs)
33 |         self.use_dynamic_intervals = dynamic_intervals is not None
34 |         if self.use_dynamic_intervals:
35 |             self.dynamic_milestones, self.dynamic_intervals = \
36 |                 _calc_dynamic_intervals(self.interval, dynamic_intervals)
37 | 
38 |     def _decide_interval(self, runner):
39 |         if self.use_dynamic_intervals:
40 |             progress = runner.epoch if self.by_epoch else runner.iter
41 |             step = bisect.bisect(self.dynamic_milestones, (progress + 1))
42 |             # Dynamically modify the evaluation interval
43 |             self.interval = self.dynamic_intervals[step - 1]
44 | 
45 |     def before_train_epoch(self, runner):
46 |         """Evaluate the model only at the start of training by epoch."""
47 |         self._decide_interval(runner)
48 |         super().before_train_epoch(runner)
49 | 
50 |     def before_train_iter(self, runner):
51 |         self._decide_interval(runner)
52 |         super().before_train_iter(runner)
53 | 
54 |     def _do_evaluate(self, runner):
55 |         """perform evaluation and save ckpt."""
56 |         # Synchronization of BatchNorm's buffer (running_mean
57 |         # and running_var) is not supported in the DDP of pytorch,
58 |         # which may cause the inconsistent performance of models in
59 |         # different ranks, so we broadcast BatchNorm's buffers
60 |         # of rank 0 to other ranks to avoid this.
61 |         if self.broadcast_bn_buffer:
62 |             model = runner.model
63 |             for name, module in model.named_modules():
64 |                 if isinstance(module,
65 |                               _BatchNorm) and module.track_running_stats:
66 |                     dist.broadcast(module.running_var, 0)
67 |                     dist.broadcast(module.running_mean, 0)
68 | 
69 |         if not self._should_evaluate(runner):
70 |             return
71 | 
72 |         tmpdir = self.tmpdir
73 |         if tmpdir is None:
74 |             tmpdir = osp.join(runner.work_dir, '.eval_hook')
75 | 
76 |         from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur  import
77 | 
78 |         results = custom_multi_gpu_test(
79 |             runner.model,
80 |             self.dataloader,
81 |             tmpdir=tmpdir,
82 |             gpu_collect=self.gpu_collect)
83 |         if runner.rank == 0:
84 |             print('\n')
85 |             runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
86 | 
87 |             key_score = self.evaluate(runner, results)
88 | 
89 |             if self.save_best:
90 |                 self._save_ckpt(runner, key_score)
91 |   
92 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .nuscenes_dataset import CustomNuScenesDataset
2 | from .nuscenes_dataset_v2 import CustomNuScenesDatasetV2
3 | 
4 | from .builder import custom_build_dataset
5 | __all__ = [
6 |     'CustomNuScenesDataset',
7 |     'CustomNuScenesDatasetV2',
8 | ]
9 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .transform_3d import (
 2 |     PadMultiViewImage, NormalizeMultiviewImage, 
 3 |     PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage)
 4 | from .formating import CustomDefaultFormatBundle3D
 5 | from .augmentation import (CropResizeFlipImage, GlobalRotScaleTransImage)
 6 | from .dd3d_mapper import DD3DMapper
 7 | __all__ = [
 8 |     'PadMultiViewImage', 'NormalizeMultiviewImage', 
 9 |     'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D',
10 |     'RandomScaleImageMultiViewImage',
11 |     'CropResizeFlipImage', 'GlobalRotScaleTransImage',
12 |     'DD3DMapper',
13 | ]


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import numpy as np
 3 | import torch
 4 | from mmcv.parallel.data_container import DataContainer as DC
 5 | from mmdet.datasets.builder import PIPELINES
 6 | from projects.mmdet3d_plugin.dd3d.datasets.transform_utils import annotations_to_instances
 7 | from projects.mmdet3d_plugin.dd3d.structures.pose import Pose
 8 | from projects.mmdet3d_plugin.dd3d.utils.tasks import TaskManager
 9 | 
10 | 
11 | @PIPELINES.register_module()
12 | class DD3DMapper:
13 |     def __init__(self,
14 |                  is_train: bool = True,
15 |                  tasks=dict(box2d_on=True, box3d_on=True),
16 |                  ):
17 |         self.is_train = is_train
18 |         self.task_manager = TaskManager(**tasks)
19 | 
20 |     def __call__(self, results):
21 |         if results['mono_input_dict'] is None:
22 |             return results
23 |         mono_input_dict = []
24 |         for dataset_dict in results['mono_input_dict']:
25 |             dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
26 |             image_shape = results['img'].data.shape[-2:]
27 |             intrinsics = None
28 |             if "intrinsics" in dataset_dict:
29 |                 intrinsics = dataset_dict['intrinsics']
30 |                 if not torch.is_tensor(intrinsics):
31 |                     intrinsics = np.reshape(
32 |                         intrinsics,
33 |                         (3, 3),
34 |                     ).astype(np.float32)
35 |                     intrinsics = torch.as_tensor(intrinsics)
36 |                     # NOTE: intrinsics = transforms.apply_intrinsics(intrinsics)
37 |                     dataset_dict["intrinsics"] = intrinsics
38 |                 dataset_dict["inv_intrinsics"] = torch.linalg.inv(dataset_dict['intrinsics'])
39 | 
40 |             if "pose" in dataset_dict:
41 |                 pose = Pose(wxyz=np.float32(dataset_dict["pose"]["wxyz"]),
42 |                             tvec=np.float32(dataset_dict["pose"]["tvec"]))
43 |                 dataset_dict["pose"] = pose
44 |                 # NOTE: no transforms affect global pose.
45 | 
46 |             if "extrinsics" in dataset_dict:
47 |                 extrinsics = Pose(
48 |                     wxyz=np.float32(dataset_dict["extrinsics"]["wxyz"]),
49 |                     tvec=np.float32(dataset_dict["extrinsics"]["tvec"])
50 |                 )
51 |                 dataset_dict["extrinsics"] = extrinsics
52 | 
53 |             if not self.task_manager.has_detection_task:
54 |                 dataset_dict.pop("annotations", None)
55 | 
56 |             if "annotations" in dataset_dict:
57 |                 for anno in dataset_dict["annotations"]:
58 |                     if not self.task_manager.has_detection_task:
59 |                         anno.pop("bbox", None)
60 |                         anno.pop("bbox_mode", None)
61 |                     if not self.task_manager.box3d_on:
62 |                         anno.pop("bbox3d", None)
63 |                 annos = [anno for anno in dataset_dict["annotations"] if anno.get("iscrowd", 0) == 0]
64 |                 if annos and 'bbox3d' in annos[0]:
65 |                     # Remove boxes with negative z-value for center.
66 |                     annos = [anno for anno in annos if anno['bbox3d'][6] > 0]
67 | 
68 |                 instances = annotations_to_instances(
69 |                     annos,
70 |                     image_shape,  # TODO: the effect of the shape?
71 |                     intrinsics=intrinsics.numpy(),
72 |                 )
73 | 
74 |                 if self.is_train:
75 |                     # instances = d2_utils.filter_empty_instances(instances)
76 |                     m = instances.gt_boxes.nonempty(threshold=1e-5)
77 |                     instances = instances[m]
78 |                     annos = [anno for tmp_m, anno in zip(m, annos) if tmp_m]
79 |                 dataset_dict["instances"] = instances
80 | 
81 |             dataset_dict['annotations'] = annos
82 | 
83 |             mono_input_dict.append(dataset_dict)
84 | 
85 |         # TODO: drop batch that has no annotations?
86 |         box_num = 0
87 |         for dataset_dict in mono_input_dict:
88 |             box_num += dataset_dict["instances"].gt_boxes.tensor.shape[0]
89 |         if box_num == 0:
90 |             return None
91 | 
92 |         mono_input_dict = DC(mono_input_dict, cpu_only=True)
93 |         results['mono_input_dict'] = mono_input_dict
94 |         return results
95 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) OpenMMLab. All rights reserved.
 3 | import numpy as np
 4 | from mmcv.parallel import DataContainer as DC
 5 | 
 6 | from mmdet3d.core.bbox import BaseInstance3DBoxes
 7 | from mmdet3d.core.points import BasePoints
 8 | from mmdet.datasets.builder import PIPELINES
 9 | from mmdet.datasets.pipelines import to_tensor
10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D
11 | 
12 | @PIPELINES.register_module()
13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D):
14 |     """Default formatting bundle.
15 |     It simplifies the pipeline of formatting common fields for voxels,
16 |     including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
17 |     "gt_semantic_seg".
18 |     These fields are formatted as follows.
19 |     - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
20 |     - proposals: (1)to tensor, (2)to DataContainer
21 |     - gt_bboxes: (1)to tensor, (2)to DataContainer
22 |     - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
23 |     - gt_labels: (1)to tensor, (2)to DataContainer
24 |     """
25 | 
26 |     def __call__(self, results):
27 |         """Call function to transform and format common fields in results.
28 |         Args:
29 |             results (dict): Result dict contains the data to convert.
30 |         Returns:
31 |             dict: The result dict contains the data that is formatted with
32 |                 default bundle.
33 |         """
34 |         # Format 3D data
35 |         results = super(CustomDefaultFormatBundle3D, self).__call__(results)
36 |         results['gt_map_masks'] = DC(
37 |             to_tensor(results['gt_map_masks']), stack=True)
38 | 
39 |         return results


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/projects/mmdet3d_plugin/datasets/pipelines/loading.py


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_sampler import DistributedGroupSampler
2 | from .distributed_sampler import DistributedSampler
3 | from .sampler import SAMPLER, build_sampler
4 | 
5 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from torch.utils.data import DistributedSampler as _DistributedSampler
 5 | from .sampler import SAMPLER
 6 | 
 7 | 
 8 | @SAMPLER.register_module()
 9 | class DistributedSampler(_DistributedSampler):
10 | 
11 |     def __init__(self,
12 |                  dataset=None,
13 |                  num_replicas=None,
14 |                  rank=None,
15 |                  shuffle=True,
16 |                  seed=0):
17 |         super().__init__(
18 |             dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
19 |         # for the compatibility from PyTorch 1.3+
20 |         self.seed = seed if seed is not None else 0
21 | 
22 |     def __iter__(self):
23 |         # deterministically shuffle based on epoch
24 |         if self.shuffle:
25 |             assert False
26 |         else:
27 |             indices = torch.arange(len(self.dataset)).tolist()
28 | 
29 |         # add extra samples to make it evenly divisible
30 |         # in case that indices is shorter than half of total_size
31 |         indices = (indices *
32 |                    math.ceil(self.total_size / len(indices)))[:self.total_size]
33 |         assert len(indices) == self.total_size
34 | 
35 |         # subsample
36 |         per_replicas = self.total_size//self.num_replicas
37 |         # indices = indices[self.rank:self.total_size:self.num_replicas]
38 |         indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]
39 |         assert len(indices) == self.num_samples
40 | 
41 |         return iter(indices)
42 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) OpenMMLab. All rights reserved.
  3 | import math
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from mmcv.runner import get_dist_info
  8 | from torch.utils.data import Sampler
  9 | from .sampler import SAMPLER
 10 | import random
 11 | from IPython import embed
 12 | 
 13 | 
 14 | @SAMPLER.register_module()
 15 | class DistributedGroupSampler(Sampler):
 16 |     """Sampler that restricts data loading to a subset of the dataset.
 17 |     It is especially useful in conjunction with
 18 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
 19 |     process can pass a DistributedSampler instance as a DataLoader sampler,
 20 |     and load a subset of the original dataset that is exclusive to it.
 21 |     .. note::
 22 |         Dataset is assumed to be of constant size.
 23 |     Arguments:
 24 |         dataset: Dataset used for sampling.
 25 |         num_replicas (optional): Number of processes participating in
 26 |             distributed training.
 27 |         rank (optional): Rank of the current process within num_replicas.
 28 |         seed (int, optional): random seed used to shuffle the sampler if
 29 |             ``shuffle=True``. This number should be identical across all
 30 |             processes in the distributed group. Default: 0.
 31 |     """
 32 | 
 33 |     def __init__(self,
 34 |                  dataset,
 35 |                  samples_per_gpu=1,
 36 |                  num_replicas=None,
 37 |                  rank=None,
 38 |                  seed=0):
 39 |         _rank, _num_replicas = get_dist_info()
 40 |         if num_replicas is None:
 41 |             num_replicas = _num_replicas
 42 |         if rank is None:
 43 |             rank = _rank
 44 |         self.dataset = dataset
 45 |         self.samples_per_gpu = samples_per_gpu
 46 |         self.num_replicas = num_replicas
 47 |         self.rank = rank
 48 |         self.epoch = 0
 49 |         self.seed = seed if seed is not None else 0
 50 | 
 51 |         assert hasattr(self.dataset, 'flag')
 52 |         self.flag = self.dataset.flag
 53 |         self.group_sizes = np.bincount(self.flag)
 54 | 
 55 |         self.num_samples = 0
 56 |         for i, j in enumerate(self.group_sizes):
 57 |             self.num_samples += int(
 58 |                 math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
 59 |                           self.num_replicas)) * self.samples_per_gpu
 60 |         self.total_size = self.num_samples * self.num_replicas
 61 | 
 62 |     def __iter__(self):
 63 |         # deterministically shuffle based on epoch
 64 |         g = torch.Generator()
 65 |         g.manual_seed(self.epoch + self.seed)
 66 | 
 67 |         indices = []
 68 |         for i, size in enumerate(self.group_sizes):
 69 |             if size > 0:
 70 |                 indice = np.where(self.flag == i)[0]
 71 |                 assert len(indice) == size
 72 |                 # add .numpy() to avoid bug when selecting indice in parrots.
 73 |                 # TODO: check whether torch.randperm() can be replaced by
 74 |                 # numpy.random.permutation().
 75 |                 indice = indice[list(
 76 |                     torch.randperm(int(size), generator=g).numpy())].tolist()
 77 |                 extra = int(
 78 |                     math.ceil(
 79 |                         size * 1.0 / self.samples_per_gpu / self.num_replicas)
 80 |                 ) * self.samples_per_gpu * self.num_replicas - len(indice)
 81 |                 # pad indice
 82 |                 tmp = indice.copy()
 83 |                 for _ in range(extra // size):
 84 |                     indice.extend(tmp)
 85 |                 indice.extend(tmp[:extra % size])
 86 |                 indices.extend(indice)
 87 | 
 88 |         assert len(indices) == self.total_size
 89 | 
 90 |         indices = [
 91 |             indices[j] for i in list(
 92 |                 torch.randperm(
 93 |                     len(indices) // self.samples_per_gpu, generator=g))
 94 |             for j in range(i * self.samples_per_gpu, (i + 1) *
 95 |                            self.samples_per_gpu)
 96 |         ]
 97 | 
 98 |         # subsample
 99 |         offset = self.num_samples * self.rank
100 |         indices = indices[offset:offset + self.num_samples]
101 |         assert len(indices) == self.num_samples
102 | 
103 |         return iter(indices)
104 | 
105 |     def __len__(self):
106 |         return self.num_samples
107 | 
108 |     def set_epoch(self, epoch):
109 |         self.epoch = epoch
110 | 
111 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/sampler.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils.registry import Registry, build_from_cfg
2 | 
3 | SAMPLER = Registry('sampler')
4 | 
5 | 
6 | def build_sampler(cfg, default_args):
7 |     return build_from_cfg(cfg, SAMPLER, default_args)
8 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling import *


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/projects/mmdet3d_plugin/dd3d/datasets/__init__.py


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | # Adapted from AdelaiDet:
 3 | #   https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/iou_loss.py
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | 
 8 | class IOULoss(nn.Module):
 9 |     """
10 |     Intersetion Over Union (IoU) loss which supports three
11 |     different IoU computations:
12 | 
13 |     * IoU
14 |     * Linear IoU
15 |     * gIoU
16 |     """
17 |     def __init__(self, loc_loss_type='iou'):
18 |         super(IOULoss, self).__init__()
19 |         self.loc_loss_type = loc_loss_type
20 | 
21 |     def forward(self, pred, target, weight=None):
22 |         """
23 |         Args:
24 |             pred: Nx4 predicted bounding boxes
25 |             target: Nx4 target bounding boxes
26 |             weight: N loss weight for each instance
27 |         """
28 |         pred_left = pred[:, 0]
29 |         pred_top = pred[:, 1]
30 |         pred_right = pred[:, 2]
31 |         pred_bottom = pred[:, 3]
32 | 
33 |         target_left = target[:, 0]
34 |         target_top = target[:, 1]
35 |         target_right = target[:, 2]
36 |         target_bottom = target[:, 3]
37 | 
38 |         target_aera = (target_left + target_right) * \
39 |                       (target_top + target_bottom)
40 |         pred_aera = (pred_left + pred_right) * \
41 |                     (pred_top + pred_bottom)
42 | 
43 |         w_intersect = torch.min(pred_left, target_left) + \
44 |                       torch.min(pred_right, target_right)
45 |         h_intersect = torch.min(pred_bottom, target_bottom) + \
46 |                       torch.min(pred_top, target_top)
47 | 
48 |         g_w_intersect = torch.max(pred_left, target_left) + \
49 |                         torch.max(pred_right, target_right)
50 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + \
51 |                         torch.max(pred_top, target_top)
52 |         ac_uion = g_w_intersect * g_h_intersect
53 | 
54 |         area_intersect = w_intersect * h_intersect
55 |         area_union = target_aera + pred_aera - area_intersect
56 | 
57 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
58 |         gious = ious - (ac_uion - area_union) / ac_uion
59 |         if self.loc_loss_type == 'iou':
60 |             losses = -torch.log(ious)
61 |         elif self.loc_loss_type == 'linear_iou':
62 |             losses = 1 - ious
63 |         elif self.loc_loss_type == 'giou':
64 |             losses = 1 - gious
65 |         else:
66 |             raise NotImplementedError
67 | 
68 |         if weight is not None:
69 |             return (losses * weight).sum()
70 |         else:
71 |             return losses.sum()
72 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/layers/normalization.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | # Adapted from AdelaiDet
 3 | #   https://github.com/aim-uofa/AdelaiDet/
 4 | import logging
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | LOG = logging.getLogger(__name__)
10 | 
11 | 
12 | class Scale(nn.Module):
13 |     def __init__(self, init_value=1.0):
14 |         super(Scale, self).__init__()
15 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
16 | 
17 |     def forward(self, input):
18 |         return input * self.scale
19 | 
20 | 
21 | class Offset(nn.Module):
22 |     def __init__(self, init_value=0.):
23 |         super(Offset, self).__init__()
24 |         self.bias = nn.Parameter(torch.FloatTensor([init_value]))
25 | 
26 |     def forward(self, input):
27 |         return input + self.bias
28 | 
29 | 
30 | class ModuleListDial(nn.ModuleList):
31 |     def __init__(self, modules=None):
32 |         super(ModuleListDial, self).__init__(modules)
33 |         self.cur_position = 0
34 | 
35 |     def forward(self, x):
36 |         result = self[self.cur_position](x)
37 |         self.cur_position += 1
38 |         if self.cur_position >= len(self):
39 |             self.cur_position = 0
40 |         return result
41 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 3 | # Adapted from fvcore:
 4 | #   https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def smooth_l1_loss(input: torch.Tensor, target: torch.Tensor, beta: float, reduction: str = "none") -> torch.Tensor:
10 |     """
11 |     Smooth L1 loss defined in the Fast R-CNN paper as:
12 | 
13 |                   | 0.5 * x ** 2 / beta   if abs(x) < beta
14 |     smoothl1(x) = |
15 |                   | abs(x) - 0.5 * beta   otherwise,
16 | 
17 |     where x = input - target.
18 | 
19 |     Smooth L1 loss is related to Huber loss, which is defined as:
20 | 
21 |                 | 0.5 * x ** 2                  if abs(x) < beta
22 |      huber(x) = |
23 |                 | beta * (abs(x) - 0.5 * beta)  otherwise
24 | 
25 |     Smooth L1 loss is equal to huber(x) / beta. This leads to the following
26 |     differences:
27 | 
28 |      - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss
29 |        converges to a constant 0 loss.
30 |      - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss
31 |        converges to L2 loss.
32 |      - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant
33 |        slope of 1. For Huber loss, the slope of the L1 segment is beta.
34 | 
35 |     Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta
36 |     portion replaced with a quadratic function such that at abs(x) = beta, its
37 |     slope is 1. The quadratic segment smooths the L1 loss near x = 0.
38 | 
39 |     Args:
40 |         input (Tensor): input tensor of any shape
41 |         target (Tensor): target value tensor with the same shape as input
42 |         beta (float): L1 to L2 change point.
43 |             For beta values < 1e-5, L1 loss is computed.
44 |         reduction: 'none' | 'mean' | 'sum'
45 |                  'none': No reduction will be applied to the output.
46 |                  'mean': The output will be averaged.
47 |                  'sum': The output will be summed.
48 | 
49 |     Returns:
50 |         The loss with the reduction option applied.
51 | 
52 |     Note:
53 |         PyTorch's builtin "Smooth L1 loss" implementation does not actually
54 |         implement Smooth L1 loss, nor does it implement Huber loss. It implements
55 |         the special case of both in which they are equal (beta=1).
56 |         See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss.
57 |      """
58 |     # (dennis.park) Make it work with mixed precision training.
59 |     beta = torch.as_tensor(beta).to(input.dtype)
60 |     if beta < 1e-5:
61 |         # if beta == 0, then torch.where will result in nan gradients when
62 |         # the chain rule is applied due to pytorch implementation details
63 |         # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
64 |         # zeros, rather than "no gradient"). To avoid this issue, we define
65 |         # small values of beta to be exactly l1 loss.
66 |         loss = torch.abs(input - target)
67 |     else:
68 |         n = torch.abs(input - target)
69 |         cond = n < beta
70 |         a = 0.5 * n**2
71 |         b = n - 0.5 * beta
72 |         a, b = a.to(input.dtype), b.to(input.dtype)
73 |         loss = torch.where(cond, a, b)
74 |         # loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
75 | 
76 |     if reduction == "mean":
77 |         loss = loss.mean()
78 |     elif reduction == "sum":
79 |         loss = loss.sum()
80 |     return loss
81 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .nuscenes_dd3d import NuscenesDD3D


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import logging
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from projects.mmdet3d_plugin.dd3d.layers.smooth_l1_loss import smooth_l1_loss
 8 | 
 9 | LOG = logging.getLogger(__name__)
10 | 
11 | 
12 | class DisentangledBox3DLoss(nn.Module):
13 |     def __init__(self, smooth_l1_loss_beta, max_loss_per_group):
14 |         super().__init__()
15 |         self.smooth_l1_loss_beta = smooth_l1_loss_beta
16 |         self.max_loss_per_group = max_loss_per_group
17 | 
18 |     def forward(self, box3d_pred, box3d_targets, locations, weights=None):
19 | 
20 |         box3d_pred = box3d_pred.to(torch.float32)
21 |         box3d_targets = box3d_targets.to(torch.float32)
22 | 
23 |         target_corners = box3d_targets.corners
24 | 
25 |         disentangled_losses = {}
26 |         for component_key in ["quat", "proj_ctr", "depth", "size"]:
27 |             disentangled_boxes = box3d_targets.clone()
28 |             setattr(disentangled_boxes, component_key, getattr(box3d_pred, component_key))
29 |             pred_corners = disentangled_boxes.to(torch.float32).corners
30 | 
31 |             loss = smooth_l1_loss(pred_corners, target_corners, beta=self.smooth_l1_loss_beta)
32 | 
33 |             # Bound the loss
34 |             loss.clamp(max=self.max_loss_per_group)
35 | 
36 |             if weights is not None:
37 |                 # loss = torch.sum(loss.reshape(-1, 24) * weights.unsqueeze(-1))
38 |                 loss = torch.sum(loss.reshape(-1, 24).mean(dim=1) * weights)
39 |             else:
40 |                 loss = loss.reshape(-1, 24).mean()
41 | 
42 |             disentangled_losses["loss_box3d_" + component_key] = loss
43 | 
44 |         entangled_l1_dist = (target_corners - box3d_pred.corners).detach().abs().reshape(-1, 24).mean(dim=1)
45 | 
46 |         return disentangled_losses, entangled_l1_dist
47 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/structures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | from .image_list import ImageList
3 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/utils/comm.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import logging
  3 | from functools import wraps
  4 | 
  5 | import torch.distributed as dist
  6 | 
  7 | from detectron2.utils import comm as d2_comm
  8 | 
  9 | LOG = logging.getLogger(__name__)
 10 | 
 11 | _NESTED_BROADCAST_FROM_MASTER = False
 12 | 
 13 | 
 14 | def is_distributed():
 15 |     return d2_comm.get_world_size() > 1
 16 | 
 17 | 
 18 | def broadcast_from_master(fn):
 19 |     """If distributed, only the master executes the function and broadcast the results to other workers.
 20 | 
 21 |     Usage:
 22 |     @broadcast_from_master
 23 |     def foo(a, b): ...
 24 |     """
 25 |     @wraps(fn)
 26 |     def wrapper(*args, **kwargs):  # pylint: disable=unused-argument
 27 |         global _NESTED_BROADCAST_FROM_MASTER
 28 | 
 29 |         if not is_distributed():
 30 |             return fn(*args, **kwargs)
 31 | 
 32 |         if _NESTED_BROADCAST_FROM_MASTER:
 33 |             assert d2_comm.is_main_process()
 34 |             LOG.warning(f"_NESTED_BROADCAST_FROM_MASTER = True, {fn.__name__}")
 35 |             return fn(*args, **kwargs)
 36 | 
 37 |         if d2_comm.is_main_process():
 38 |             _NESTED_BROADCAST_FROM_MASTER = True
 39 |             ret = [fn(*args, **kwargs), ]
 40 |             _NESTED_BROADCAST_FROM_MASTER = False
 41 |         else:
 42 |             ret = [None, ]
 43 |         if dist.is_initialized():
 44 |             dist.broadcast_object_list(ret)
 45 |         ret = ret[0]
 46 | 
 47 |         assert ret is not None
 48 |         return ret
 49 | 
 50 |     return wrapper
 51 | 
 52 | 
 53 | def master_only(fn):
 54 |     """If distributed, only the master executes the function.
 55 | 
 56 |     Usage:
 57 |     @master_only
 58 |     def foo(a, b): ...
 59 |     """
 60 |     @wraps(fn)
 61 |     def wrapped_fn(*args, **kwargs):
 62 |         if d2_comm.is_main_process():
 63 |             ret = fn(*args, **kwargs)
 64 |         d2_comm.synchronize()
 65 |         if d2_comm.is_main_process():
 66 |             return ret
 67 | 
 68 |     return wrapped_fn
 69 | 
 70 | 
 71 | def gather_dict(dikt):
 72 |     """Gather python dictionaries from all workers to the rank=0 worker.
 73 | 
 74 |     Assumption: the keys of `dikt` are disjoint across all workers.
 75 | 
 76 |     If rank = 0, then returned aggregated dict.
 77 |     If rank > 0, then return `None`.
 78 |     """
 79 |     dict_lst = d2_comm.gather(dikt, dst=0)
 80 |     if d2_comm.is_main_process():
 81 |         gathered_dict = {}
 82 |         for dic in dict_lst:
 83 |             for k in dic.keys():
 84 |                 assert k not in gathered_dict, f"Dictionary key overlaps: {k}"
 85 |             gathered_dict.update(dic)
 86 |         return gathered_dict
 87 |     else:
 88 |         return None
 89 | 
 90 | 
 91 | def reduce_sum(tensor):
 92 |     """
 93 |     Adapted from AdelaiDet:
 94 |         https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py
 95 |     """
 96 |     if not is_distributed():
 97 |         return tensor
 98 |     tensor = tensor.clone()
 99 |     dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
100 |     return tensor
101 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/utils/tasks.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | from collections import OrderedDict
 3 | 
 4 | # from detectron2.config import configurable
 5 | 
 6 | 
 7 | class Task():
 8 |     def __init__(self, name, is_detection_task, is_dense_prediction_task):
 9 |         self.name = name
10 |         self.is_detection_task = is_detection_task
11 |         self.is_dense_prediction_task = is_dense_prediction_task
12 | 
13 | 
14 | # yapf: disable
15 | TASKS = [
16 |     Task(
17 |         name="box2d",
18 |         is_detection_task=True,
19 |         is_dense_prediction_task=False,
20 |     ),
21 |     Task(
22 |         name="box3d",
23 |         is_detection_task=True,
24 |         is_dense_prediction_task=False,
25 |     ),
26 |     Task(
27 |         name="depth",
28 |         is_detection_task=False,
29 |         is_dense_prediction_task=True,
30 |     )
31 | ]
32 | # yapf: enable
33 | 
34 | NAME_TO_TASK = OrderedDict([(task.name, task) for task in TASKS])
35 | 
36 | 
37 | class TaskManager():
38 |     #@configurable
39 |     def __init__(self, box2d_on=False, box3d_on=False, depth_on=False):
40 |         """
41 |         configurable is experimental.
42 |         """
43 |         self._box2d_on = self._mask2d_on = self._box3d_on = self._semseg2d_on = self._depth_on = False
44 |         tasks = []
45 |         if box2d_on:
46 |             tasks.append(NAME_TO_TASK['box2d'])
47 |             self._box2d_on = True
48 |         if box3d_on:
49 |             tasks.append(NAME_TO_TASK['box3d'])
50 |             self._box3d_on = True
51 |         if depth_on:
52 |             tasks.append(NAME_TO_TASK['depth'])
53 |             self._depth_on = True
54 | 
55 |         if not tasks:
56 |             raise ValueError("No task specified.")
57 | 
58 |         self._tasks = tasks
59 | 
60 |     @property
61 |     def tasks(self):
62 |         return self._tasks
63 | 
64 |     '''@classmethod
65 |     def from_config(cls, cfg):
66 |         # yapf: disable
67 |         return OrderedDict(
68 |             box2d_on    = cfg.MODEL.BOX2D_ON,
69 |             box3d_on    = cfg.MODEL.BOX3D_ON,
70 |             depth_on    = cfg.MODEL.DEPTH_ON,
71 |         )
72 |         # yapf: enable'''
73 | 
74 |     # Indicators that tells if each task is enabled.
75 |     @property
76 |     def box2d_on(self):
77 |         return self._box2d_on
78 | 
79 |     @property
80 |     def box3d_on(self):
81 |         return self._box3d_on
82 | 
83 |     @property
84 |     def depth_on(self):
85 |         return self._depth_on
86 | 
87 |     @property
88 |     def has_dense_prediction_task(self):
89 |         return any([task.is_dense_prediction_task for task in self.tasks])
90 | 
91 |     @property
92 |     def has_detection_task(self):
93 |         return any([task.is_detection_task for task in self.tasks])
94 | 
95 |     @property
96 |     def task_names(self):
97 |         return [task.name for task in self.tasks]
98 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/dd3d/utils/tensor2d.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def compute_features_locations(h, w, stride, dtype=torch.float32, device='cpu', offset="none"):
 7 |     """Adapted from AdelaiDet:
 8 |         https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py
 9 | 
10 |     Key differnece: offset is configurable.
11 |     """
12 |     shifts_x = torch.arange(0, w * stride, step=stride, dtype=dtype, device=device)
13 |     shifts_y = torch.arange(0, h * stride, step=stride, dtype=dtype, device=device)
14 |     shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
15 |     shift_x = shift_x.reshape(-1)
16 |     shift_y = shift_y.reshape(-1)
17 |     # (dennis.park)
18 |     # locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
19 |     locations = torch.stack((shift_x, shift_y), dim=1)
20 |     if offset == "half":
21 |         locations += stride // 2
22 |     else:
23 |         assert offset == "none"
24 | 
25 |     return locations
26 | 
27 | 
28 | def aligned_bilinear(tensor, factor, offset="none"):
29 |     """Adapted from AdelaiDet:
30 |         https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py
31 |     """
32 |     assert tensor.dim() == 4
33 |     assert factor >= 1
34 |     assert int(factor) == factor
35 | 
36 |     if factor == 1:
37 |         return tensor
38 | 
39 |     h, w = tensor.size()[2:]
40 |     tensor = F.pad(tensor, pad=(0, 1, 0, 1), mode="replicate")
41 |     oh = factor * h + 1
42 |     ow = factor * w + 1
43 |     tensor = F.interpolate(tensor, size=(oh, ow), mode='bilinear', align_corners=True)
44 |     if offset == "half":
45 |         tensor = F.pad(tensor, pad=(factor // 2, 0, factor // 2, 0), mode="replicate")
46 | 
47 |     return tensor[:, :, :oh - 1, :ow - 1]
48 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vovnet import VoVNet
2 | 
3 | __all__ = ['VoVNet']


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .hooks import GradChecker


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/hooks/hooks.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner.hooks.hook import HOOKS, Hook
 2 | from projects.mmdet3d_plugin.models.utils import run_time
 3 | 
 4 | 
 5 | @HOOKS.register_module()
 6 | class GradChecker(Hook):
 7 | 
 8 |     def after_train_iter(self, runner):
 9 |         for key, val in runner.model.named_parameters():
10 |             if val.grad == None and val.requires_grad:
11 |                 print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from .adamw import AdamW2


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .bricks import run_time
3 | from .grid_mask import GridMask
4 | from .position_embedding import RelPositionEmbedding
5 | from .visual import save_tensor


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/bricks.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import time
 3 | from collections import defaultdict
 4 | import torch
 5 | time_maps = defaultdict(lambda :0.)
 6 | count_maps = defaultdict(lambda :0.)
 7 | def run_time(name):
 8 |     def middle(fn):
 9 |         def wrapper(*args, **kwargs):
10 |             torch.cuda.synchronize()
11 |             start = time.time()
12 |             res = fn(*args, **kwargs)
13 |             torch.cuda.synchronize()
14 |             time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start
15 |             count_maps['%s : %s'%(name, fn.__name__) ] +=1
16 |             print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))
17 |             return res
18 |         return wrapper
19 |     return middle
20 |     


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/grid_mask.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from PIL import Image
  5 | from mmcv.runner import force_fp32, auto_fp16
  6 | 
  7 | class Grid(object):
  8 |     def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
  9 |         self.use_h = use_h
 10 |         self.use_w = use_w
 11 |         self.rotate = rotate
 12 |         self.offset = offset
 13 |         self.ratio = ratio
 14 |         self.mode=mode
 15 |         self.st_prob = prob
 16 |         self.prob = prob
 17 | 
 18 |     def set_prob(self, epoch, max_epoch):
 19 |         self.prob = self.st_prob * epoch / max_epoch
 20 | 
 21 |     def __call__(self, img, label):
 22 |         if np.random.rand() > self.prob:
 23 |             return img, label
 24 |         h = img.size(1)
 25 |         w = img.size(2)
 26 |         self.d1 = 2
 27 |         self.d2 = min(h, w)
 28 |         hh = int(1.5*h)
 29 |         ww = int(1.5*w)
 30 |         d = np.random.randint(self.d1, self.d2)
 31 |         if self.ratio == 1:
 32 |             self.l = np.random.randint(1, d)
 33 |         else:
 34 |             self.l = min(max(int(d*self.ratio+0.5),1),d-1)
 35 |         mask = np.ones((hh, ww), np.float32)
 36 |         st_h = np.random.randint(d)
 37 |         st_w = np.random.randint(d)
 38 |         if self.use_h:
 39 |             for i in range(hh//d):
 40 |                 s = d*i + st_h
 41 |                 t = min(s+self.l, hh)
 42 |                 mask[s:t,:] *= 0
 43 |         if self.use_w:
 44 |             for i in range(ww//d):
 45 |                 s = d*i + st_w
 46 |                 t = min(s+self.l, ww)
 47 |                 mask[:,s:t] *= 0
 48 |        
 49 |         r = np.random.randint(self.rotate)
 50 |         mask = Image.fromarray(np.uint8(mask))
 51 |         mask = mask.rotate(r)
 52 |         mask = np.asarray(mask)
 53 |         mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
 54 | 
 55 |         mask = torch.from_numpy(mask).float()
 56 |         if self.mode == 1:
 57 |             mask = 1-mask
 58 | 
 59 |         mask = mask.expand_as(img)
 60 |         if self.offset:
 61 |             offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
 62 |             offset = (1 - mask) * offset
 63 |             img = img * mask + offset
 64 |         else:
 65 |             img = img * mask 
 66 | 
 67 |         return img, label
 68 | 
 69 | 
 70 | class GridMask(nn.Module):
 71 |     def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
 72 |         super(GridMask, self).__init__()
 73 |         self.use_h = use_h
 74 |         self.use_w = use_w
 75 |         self.rotate = rotate
 76 |         self.offset = offset
 77 |         self.ratio = ratio
 78 |         self.mode = mode
 79 |         self.st_prob = prob
 80 |         self.prob = prob
 81 |         self.fp16_enable = False
 82 |     def set_prob(self, epoch, max_epoch):
 83 |         self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
 84 |     @auto_fp16()
 85 |     def forward(self, x):
 86 |         if np.random.rand() > self.prob or not self.training:
 87 |             return x
 88 |         n,c,h,w = x.size()
 89 |         x = x.view(-1,h,w)
 90 |         hh = int(1.5*h)
 91 |         ww = int(1.5*w)
 92 |         d = np.random.randint(2, h)
 93 |         self.l = min(max(int(d*self.ratio+0.5),1),d-1)
 94 |         mask = np.ones((hh, ww), np.float32)
 95 |         st_h = np.random.randint(d)
 96 |         st_w = np.random.randint(d)
 97 |         if self.use_h:
 98 |             for i in range(hh//d):
 99 |                 s = d*i + st_h
100 |                 t = min(s+self.l, hh)
101 |                 mask[s:t,:] *= 0
102 |         if self.use_w:
103 |             for i in range(ww//d):
104 |                 s = d*i + st_w
105 |                 t = min(s+self.l, ww)
106 |                 mask[:,s:t] *= 0
107 |        
108 |         r = np.random.randint(self.rotate)
109 |         mask = Image.fromarray(np.uint8(mask))
110 |         mask = mask.rotate(r)
111 |         mask = np.asarray(mask)
112 |         mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
113 | 
114 |         mask = torch.from_numpy(mask).to(x.dtype).cuda()
115 |         if self.mode == 1:
116 |             mask = 1-mask
117 |         mask = mask.expand_as(x)
118 |         if self.offset:
119 |             offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda()
120 |             x = x * mask + offset * (1 - mask)
121 |         else:
122 |             x = x * mask 
123 |         
124 |         return x.view(n,c,h,w)


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/position_embedding.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | 
 5 | class RelPositionEmbedding(nn.Module):
 6 |     def __init__(self, num_pos_feats=64, pos_norm=True):
 7 |         super().__init__()
 8 |         self.num_pos_feats = num_pos_feats
 9 |         self.fc = nn.Linear(4, self.num_pos_feats,bias=False)
10 |         #nn.init.orthogonal_(self.fc.weight)
11 |         #self.fc.weight.requires_grad = False
12 |         self.pos_norm = pos_norm
13 |         if self.pos_norm:
14 |             self.norm = nn.LayerNorm(self.num_pos_feats)
15 |     def forward(self, tensor):
16 |         #mask = nesttensor.mask
17 |         B,C,H,W = tensor.shape
18 |         #print('tensor.shape',  tensor.shape)
19 |         y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
20 |         #y_axis = torch.stack((y_range, 1-y_range),dim=1)
21 |         y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
22 |         y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
23 | 
24 |         x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
25 |         #x_axis =torch.stack((x_range,1-x_range),dim=1)
26 |         x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
27 |         x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
28 |         x_pos = torch.cat((y_axis, x_axis), dim=1)
29 |         x_pos = self.fc(x_pos)
30 | 
31 |         if self.pos_norm:
32 |             x_pos = self.norm(x_pos)
33 |         #print('xpos,', x_pos.max(),x_pos.min())
34 |         return x_pos


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/visual.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision.utils import make_grid
 3 | import torchvision
 4 | import matplotlib.pyplot as plt
 5 | import cv2
 6 | 
 7 | 
 8 | def convert_color(img_path):
 9 |     plt.figure()
10 |     img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
11 |     plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))
12 |     plt.close()
13 | 
14 | 
15 | def save_tensor(tensor, path, pad_value=254.0,):
16 |     print('save_tensor', path)
17 |     tensor = tensor.to(torch.float).detach().cpu()
18 |     if tensor.type() == 'torch.BoolTensor':
19 |         tensor = tensor*255
20 |     if len(tensor.shape) == 3:
21 |         tensor = tensor.unsqueeze(1)
22 |     tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
23 |     torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)
24 |     convert_color(path)
25 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/tools/analysis_tools/__init__.py


--------------------------------------------------------------------------------
/tools/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import time
 4 | import torch
 5 | from mmcv import Config
 6 | from mmcv.parallel import MMDataParallel
 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model
 8 | import sys
 9 | sys.path.append('.')
10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader
11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset
12 | # from mmdet3d.datasets import build_dataloader, build_dataset
13 | from mmdet3d.models import build_detector
14 | #from tools.misc.fuse_conv_bn import fuse_module
15 | 
16 | 
17 | def parse_args():
18 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
19 |     parser.add_argument('config', help='test config file path')
20 |     parser.add_argument('--checkpoint', default=None, help='checkpoint file')
21 |     parser.add_argument('--samples', default=2000, help='samples to benchmark')
22 |     parser.add_argument(
23 |         '--log-interval', default=50, help='interval of logging')
24 |     parser.add_argument(
25 |         '--fuse-conv-bn',
26 |         action='store_true',
27 |         help='Whether to fuse conv and bn, this will slightly increase'
28 |         'the inference speed')
29 |     args = parser.parse_args()
30 |     return args
31 | 
32 | 
33 | def main():
34 |     args = parse_args()
35 | 
36 |     cfg = Config.fromfile(args.config)
37 |     # set cudnn_benchmark
38 |     if cfg.get('cudnn_benchmark', False):
39 |         torch.backends.cudnn.benchmark = True
40 |     cfg.model.pretrained = None
41 |     cfg.data.test.test_mode = True
42 | 
43 |     # build the dataloader
44 |     # TODO: support multiple images per gpu (only minor changes are needed)
45 |     print(cfg.data.test)
46 |     dataset = custom_build_dataset(cfg.data.test)
47 |     data_loader = build_dataloader(
48 |         dataset,
49 |         samples_per_gpu=1,
50 |         workers_per_gpu=cfg.data.workers_per_gpu,
51 |         dist=False,
52 |         shuffle=False)
53 | 
54 |     # build the model and load checkpoint
55 |     cfg.model.train_cfg = None
56 |     model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
57 |     fp16_cfg = cfg.get('fp16', None)
58 |     if fp16_cfg is not None:
59 |         wrap_fp16_model(model)
60 |     if args.checkpoint is not None:
61 |         load_checkpoint(model, args.checkpoint, map_location='cpu')
62 |     #if args.fuse_conv_bn:
63 |     #    model = fuse_module(model)
64 | 
65 |     model = MMDataParallel(model, device_ids=[0])
66 | 
67 |     model.eval()
68 | 
69 |     # the first several iterations may be very slow so skip them
70 |     num_warmup = 5
71 |     pure_inf_time = 0
72 | 
73 |     # benchmark with several samples and take the average
74 |     for i, data in enumerate(data_loader):
75 |         torch.cuda.synchronize()
76 |         start_time = time.perf_counter()
77 |         with torch.no_grad():
78 |             model(return_loss=False, rescale=True, **data)
79 | 
80 |         torch.cuda.synchronize()
81 |         elapsed = time.perf_counter() - start_time
82 | 
83 |         if i >= num_warmup:
84 |             pure_inf_time += elapsed
85 |             if (i + 1) % args.log_interval == 0:
86 |                 fps = (i + 1 - num_warmup) / pure_inf_time
87 |                 print(f'Done image [{i + 1:<3}/ {args.samples}], '
88 |                       f'fps: {fps:.1f} img / s')
89 | 
90 |         if (i + 1) == args.samples:
91 |             pure_inf_time += elapsed
92 |             fps = (i + 1 - num_warmup) / pure_inf_time
93 |             print(f'Overall fps: {fps:.1f} img / s')
94 |             break
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     main()
99 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/get_params.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | file_path = './ckpts/bevformer_v4.pth'
 3 | model = torch.load(file_path, map_location='cpu')
 4 | all = 0
 5 | for key in list(model['state_dict'].keys()):
 6 |     all += model['state_dict'][key].nelement()
 7 | print(all)
 8 | 
 9 | # smaller 63374123
10 | # v4 69140395
11 | 


--------------------------------------------------------------------------------
/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/tools/data_converter/lyft_data_fixer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import numpy as np
 4 | import os
 5 | 
 6 | 
 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'):
 8 |     # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000  # noqa
 9 |     lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
10 |     root_folder = os.path.join(root_folder, f'{version}-train')
11 |     lidar_path = os.path.join(root_folder, lidar_path)
12 |     assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
13 |         f'dataset and make sure {lidar_path} is present.'
14 |     points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
15 |     try:
16 |         points.reshape([-1, 5])
17 |         print(f'This fix is not required for version {version}.')
18 |     except ValueError:
19 |         new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
20 |         new_points.tofile(lidar_path)
21 |         print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
22 | 
23 | 
24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
25 | parser.add_argument(
26 |     '--root-folder',
27 |     type=str,
28 |     default='./data/lyft',
29 |     help='specify the root path of Lyft dataset')
30 | parser.add_argument(
31 |     '--version',
32 |     type=str,
33 |     default='v1.01',
34 |     help='specify Lyft dataset version')
35 | args = parser.parse_args()
36 | 
37 | if __name__ == '__main__':
38 |     fix_lyft(root_folder=args.root_folder, version=args.version)
39 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29503}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox
11 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-28509}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic
10 | 


--------------------------------------------------------------------------------
/tools/fp16/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-28508}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic
10 | 


--------------------------------------------------------------------------------
/tools/misc/fuse_conv_bn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import torch
 4 | from mmcv.runner import save_checkpoint
 5 | from torch import nn as nn
 6 | 
 7 | from mmdet.apis import init_model
 8 | 
 9 | 
10 | def fuse_conv_bn(conv, bn):
11 |     """During inference, the functionary of batch norm layers is turned off but
12 |     only the mean and var alone channels are used, which exposes the chance to
13 |     fuse it with the preceding conv layers to save computations and simplify
14 |     network structures."""
15 |     conv_w = conv.weight
16 |     conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
17 |         bn.running_mean)
18 | 
19 |     factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
20 |     conv.weight = nn.Parameter(conv_w *
21 |                                factor.reshape([conv.out_channels, 1, 1, 1]))
22 |     conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
23 |     return conv
24 | 
25 | 
26 | def fuse_module(m):
27 |     last_conv = None
28 |     last_conv_name = None
29 | 
30 |     for name, child in m.named_children():
31 |         if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
32 |             if last_conv is None:  # only fuse BN that is after Conv
33 |                 continue
34 |             fused_conv = fuse_conv_bn(last_conv, child)
35 |             m._modules[last_conv_name] = fused_conv
36 |             # To reduce changes, set BN as Identity instead of deleting it.
37 |             m._modules[name] = nn.Identity()
38 |             last_conv = None
39 |         elif isinstance(child, nn.Conv2d):
40 |             last_conv = child
41 |             last_conv_name = name
42 |         else:
43 |             fuse_module(child)
44 |     return m
45 | 
46 | 
47 | def parse_args():
48 |     parser = argparse.ArgumentParser(
49 |         description='fuse Conv and BN layers in a model')
50 |     parser.add_argument('config', help='config file path')
51 |     parser.add_argument('checkpoint', help='checkpoint file path')
52 |     parser.add_argument('out', help='output path of the converted model')
53 |     args = parser.parse_args()
54 |     return args
55 | 
56 | 
57 | def main():
58 |     args = parse_args()
59 |     # build the model from a config file and a checkpoint file
60 |     model = init_model(args.config, args.checkpoint)
61 |     # fuse conv and bn layers of the model
62 |     fused_model = fuse_module(model)
63 |     save_checkpoint(fused_model, args.out)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/tools/misc/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from mmcv import Config, DictAction
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser(description='Print the whole config')
 8 |     parser.add_argument('config', help='config file path')
 9 |     parser.add_argument(
10 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
11 |     args = parser.parse_args()
12 | 
13 |     return args
14 | 
15 | 
16 | def main():
17 |     args = parse_args()
18 | 
19 |     cfg = Config.fromfile(args.config)
20 |     if args.options is not None:
21 |         cfg.merge_from_dict(args.options)
22 |     print(f'Config:\n{cfg.pretty_text}')
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import mmcv
 4 | from mmcv import Config
 5 | 
 6 | from mmdet3d.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(
11 |         description='MMDet3D visualize the results')
12 |     parser.add_argument('config', help='test config file path')
13 |     parser.add_argument('--result', help='results file in pickle format')
14 |     parser.add_argument(
15 |         '--show-dir', help='directory where visualize results will be saved')
16 |     args = parser.parse_args()
17 | 
18 |     return args
19 | 
20 | 
21 | def main():
22 |     args = parse_args()
23 | 
24 |     if args.result is not None and \
25 |             not args.result.endswith(('.pkl', '.pickle')):
26 |         raise ValueError('The results file must be a pkl file.')
27 | 
28 |     cfg = Config.fromfile(args.config)
29 |     cfg.data.test.test_mode = True
30 | 
31 |     # build the dataset
32 |     dataset = build_dataset(cfg.data.test)
33 |     results = mmcv.load(args.result)
34 | 
35 |     if getattr(dataset, 'show', None) is not None:
36 |         # data loading pipeline for showing
37 |         eval_pipeline = cfg.get('eval_pipeline', {})
38 |         if eval_pipeline:
39 |             dataset.show(results, args.show_dir, pipeline=eval_pipeline)
40 |         else:
41 |             dataset.show(results, args.show_dir)  # use default pipeline
42 |     else:
43 |         raise NotImplementedError(
44 |             'Show is not implemented for dataset {}!'.format(
45 |                 type(dataset).__name__))
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 |     subprocess.Popen(['mv', out_file, final_file])
27 | 
28 | 
29 | def main():
30 |     args = parse_args()
31 |     process_checkpoint(args.in_file, args.out_file)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/tools/model_converters/regnet2mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import torch
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | def convert_stem(model_key, model_weight, state_dict, converted_names):
 8 |     new_key = model_key.replace('stem.conv', 'conv1')
 9 |     new_key = new_key.replace('stem.bn', 'bn1')
10 |     state_dict[new_key] = model_weight
11 |     converted_names.add(model_key)
12 |     print(f'Convert {model_key} to {new_key}')
13 | 
14 | 
15 | def convert_head(model_key, model_weight, state_dict, converted_names):
16 |     new_key = model_key.replace('head.fc', 'fc')
17 |     state_dict[new_key] = model_weight
18 |     converted_names.add(model_key)
19 |     print(f'Convert {model_key} to {new_key}')
20 | 
21 | 
22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
23 |     split_keys = model_key.split('.')
24 |     layer, block, module = split_keys[:3]
25 |     block_id = int(block[1:])
26 |     layer_name = f'layer{int(layer[1:])}'
27 |     block_name = f'{block_id - 1}'
28 | 
29 |     if block_id == 1 and module == 'bn':
30 |         new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
31 |     elif block_id == 1 and module == 'proj':
32 |         new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
33 |     elif module == 'f':
34 |         if split_keys[3] == 'a_bn':
35 |             module_name = 'bn1'
36 |         elif split_keys[3] == 'b_bn':
37 |             module_name = 'bn2'
38 |         elif split_keys[3] == 'c_bn':
39 |             module_name = 'bn3'
40 |         elif split_keys[3] == 'a':
41 |             module_name = 'conv1'
42 |         elif split_keys[3] == 'b':
43 |             module_name = 'conv2'
44 |         elif split_keys[3] == 'c':
45 |             module_name = 'conv3'
46 |         new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
47 |     else:
48 |         raise ValueError(f'Unsupported conversion of key {model_key}')
49 |     print(f'Convert {model_key} to {new_key}')
50 |     state_dict[new_key] = model_weight
51 |     converted_names.add(model_key)
52 | 
53 | 
54 | def convert(src, dst):
55 |     """Convert keys in pycls pretrained RegNet models to mmdet style."""
56 |     # load caffe model
57 |     regnet_model = torch.load(src)
58 |     blobs = regnet_model['model_state']
59 |     # convert to pytorch style
60 |     state_dict = OrderedDict()
61 |     converted_names = set()
62 |     for key, weight in blobs.items():
63 |         if 'stem' in key:
64 |             convert_stem(key, weight, state_dict, converted_names)
65 |         elif 'head' in key:
66 |             convert_head(key, weight, state_dict, converted_names)
67 |         elif key.startswith('s'):
68 |             convert_reslayer(key, weight, state_dict, converted_names)
69 | 
70 |     # check if all layers are converted
71 |     for key in blobs:
72 |         if key not in converted_names:
73 |             print(f'not converted: {key}')
74 |     # save checkpoint
75 |     checkpoint = dict()
76 |     checkpoint['state_dict'] = state_dict
77 |     torch.save(checkpoint, dst)
78 | 
79 | 
80 | def main():
81 |     parser = argparse.ArgumentParser(description='Convert model keys')
82 |     parser.add_argument('src', help='src detectron model path')
83 |     parser.add_argument('dst', help='save path')
84 |     args = parser.parse_args()
85 |     convert(args.src, args.dst)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------