├── .gitignore ├── LICENSE ├── README.md ├── docs ├── can_bus.ipynb ├── getting_started.md ├── install.md └── prepare_dataset.md ├── figs ├── arch.png └── sota_results.png ├── projects ├── __init__.py ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── coco_instance.py │ │ │ ├── kitti-3d-3class.py │ │ │ ├── kitti-3d-car.py │ │ │ ├── lyft-3d.py │ │ │ ├── nuim_instance.py │ │ │ ├── nus-3d.py │ │ │ ├── nus-mono3d.py │ │ │ ├── range100_lyft-3d.py │ │ │ ├── s3dis-3d-5class.py │ │ │ ├── s3dis_seg-3d-13class.py │ │ │ ├── scannet-3d-18class.py │ │ │ ├── scannet_seg-3d-20class.py │ │ │ ├── sunrgbd-3d-10class.py │ │ │ ├── waymoD5-3d-3class.py │ │ │ └── waymoD5-3d-car.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── 3dssd.py │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ │ ├── fcos3d.py │ │ │ ├── groupfree3d.py │ │ │ ├── h3dnet.py │ │ │ ├── hv_pointpillars_fpn_lyft.py │ │ │ ├── hv_pointpillars_fpn_nus.py │ │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ │ ├── hv_second_secfpn_kitti.py │ │ │ ├── hv_second_secfpn_waymo.py │ │ │ ├── imvotenet_image.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── paconv_cuda_ssg.py │ │ │ ├── paconv_ssg.py │ │ │ ├── parta2.py │ │ │ ├── pointnet2_msg.py │ │ │ ├── pointnet2_ssg.py │ │ │ └── votenet.py │ │ └── schedules │ │ │ ├── cosine.py │ │ │ ├── cyclic_20e.py │ │ │ ├── cyclic_40e.py │ │ │ ├── mmdet_schedule_1x.py │ │ │ ├── schedule_2x.py │ │ │ ├── schedule_3x.py │ │ │ ├── seg_cosine_150e.py │ │ │ ├── seg_cosine_200e.py │ │ │ └── seg_cosine_50e.py │ ├── bevformer │ │ ├── bevformer_base.py │ │ ├── bevformer_small.py │ │ └── bevformer_tiny.py │ ├── bevformer_fp16 │ │ └── bevformer_tiny_fp16.py │ ├── bevformerv2 │ │ ├── bevformerv2-r50-t1-24ep.py │ │ ├── bevformerv2-r50-t1-48ep.py │ │ ├── bevformerv2-r50-t1-base-24ep.py │ │ ├── bevformerv2-r50-t1-base-48ep.py │ │ ├── bevformerv2-r50-t2-24ep.py │ │ ├── bevformerv2-r50-t2-48ep.py │ │ └── bevformerv2-r50-t8-24ep.py │ └── datasets │ │ ├── custom_lyft-3d.py │ │ ├── custom_nus-3d.py │ │ └── custom_waymo-3d.py └── mmdet3d_plugin │ ├── __init__.py │ ├── bevformer │ ├── __init__.py │ ├── apis │ │ ├── __init__.py │ │ ├── mmdet_train.py │ │ ├── test.py │ │ └── train.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── bev_head.py │ │ └── bevformer_head.py │ ├── detectors │ │ ├── __init__.py │ │ ├── bevformer.py │ │ ├── bevformerV2.py │ │ └── bevformer_fp16.py │ ├── hooks │ │ ├── __init__.py │ │ └── custom_hooks.py │ ├── modules │ │ ├── __init__.py │ │ ├── custom_base_transformer_layer.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── group_attention.py │ │ ├── multi_scale_deformable_attn_function.py │ │ ├── spatial_cross_attention.py │ │ ├── temporal_self_attention.py │ │ ├── transformer.py │ │ └── transformerV2.py │ └── runner │ │ ├── __init__.py │ │ └── epoch_based_runner.py │ ├── core │ ├── bbox │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ └── hungarian_assigner_3d.py │ │ ├── coders │ │ │ ├── __init__.py │ │ │ └── nms_free_coder.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── match_cost.py │ │ └── util.py │ └── evaluation │ │ ├── __init__.py │ │ ├── eval_hooks.py │ │ └── kitti2waymo.py │ ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── nuscenes_dataset.py │ ├── nuscenes_dataset_v2.py │ ├── nuscenes_mono_dataset.py │ ├── nuscnes_eval.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── augmentation.py │ │ ├── dd3d_mapper.py │ │ ├── formating.py │ │ ├── loading.py │ │ └── transform_3d.py │ └── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ ├── group_sampler.py │ │ └── sampler.py │ ├── dd3d │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── nuscenes.py │ │ └── transform_utils.py │ ├── layers │ │ ├── iou_loss.py │ │ ├── normalization.py │ │ └── smooth_l1_loss.py │ ├── modeling │ │ ├── __init__.py │ │ ├── core.py │ │ ├── disentangled_box3d_loss.py │ │ ├── fcos2d.py │ │ ├── fcos3d.py │ │ ├── nuscenes_dd3d.py │ │ └── prepare_targets.py │ ├── structures │ │ ├── __init__.py │ │ ├── boxes3d.py │ │ ├── image_list.py │ │ ├── pose.py │ │ └── transform3d.py │ └── utils │ │ ├── comm.py │ │ ├── geometry.py │ │ ├── tasks.py │ │ ├── tensor2d.py │ │ └── visualization.py │ └── models │ ├── backbones │ ├── __init__.py │ └── vovnet.py │ ├── hooks │ ├── __init__.py │ └── hooks.py │ ├── opt │ ├── __init__.py │ └── adamw.py │ └── utils │ ├── __init__.py │ ├── bricks.py │ ├── grid_mask.py │ ├── position_embedding.py │ └── visual.py └── tools ├── analysis_tools ├── __init__.py ├── analyze_logs.py ├── benchmark.py ├── get_params.py └── visual.py ├── create_data.py ├── data_converter ├── __init__.py ├── create_gt_database.py ├── indoor_converter.py ├── kitti_converter.py ├── kitti_data_utils.py ├── lyft_converter.py ├── lyft_data_fixer.py ├── nuimage_converter.py ├── nuscenes_converter.py ├── s3dis_data_utils.py ├── scannet_data_utils.py ├── sunrgbd_data_utils.py └── waymo_converter.py ├── dist_test.sh ├── dist_train.sh ├── fp16 ├── dist_train.sh └── train.py ├── misc ├── browse_dataset.py ├── fuse_conv_bn.py ├── print_config.py └── visualize_results.py ├── model_converters ├── convert_votenet_checkpoints.py ├── publish_model.py └── regnet2mmdet.py ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | ckpts/ 13 | data/ 14 | ckpts 15 | data 16 | test/ 17 | val/ 18 | work_dirs/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | pip-wheel-metadata/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | 3 | **Please ensure you have prepared the environment and the nuScenes dataset.** 4 | 5 | # Train and Test 6 | 7 | Train BEVFormer with 8 GPUs 8 | ``` 9 | ./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py 8 10 | ``` 11 | 12 | Eval BEVFormer with 8 GPUs 13 | ``` 14 | ./tools/dist_test.sh ./projects/configs/bevformer/bevformer_base.py ./path/to/ckpts.pth 8 15 | ``` 16 | Note: using 1 GPU to eval can obtain slightly higher performance because continuous video may be truncated with multiple GPUs. By default we report the score evaled with 8 GPUs. 17 | 18 | 19 | 20 | # Using FP16 to train the model. 21 | The above training script can not support FP16 training, 22 | and we provide another script to train BEVFormer with FP16. 23 | 24 | ``` 25 | ./tools/fp16/dist_train.sh ./projects/configs/bevformer_fp16/bevformer_tiny_fp16.py 8 26 | ``` 27 | 28 | 29 | # Visualization 30 | 31 | see [visual.py](../tools/analysis_tools/visual.py) -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Step-by-step installation instructions 2 | 3 | Following https://mmdetection3d.readthedocs.io/en/latest/getting_started.html#installation 4 | 5 | 6 | 7 | **a. Create a conda virtual environment and activate it.** 8 | ```shell 9 | conda create -n open-mmlab python=3.8 -y 10 | conda activate open-mmlab 11 | ``` 12 | 13 | **b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).** 14 | ```shell 15 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html 16 | # Recommended torch>=1.9 17 | 18 | ``` 19 | 20 | **c. Install gcc>=5 in conda env (optional).** 21 | ```shell 22 | conda install -c omgarcia gcc-6 # gcc-6.2 23 | ``` 24 | 25 | **c. Install mmcv-full.** 26 | ```shell 27 | pip install mmcv-full==1.4.0 28 | # pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 29 | ``` 30 | 31 | **d. Install mmdet and mmseg.** 32 | ```shell 33 | pip install mmdet==2.14.0 34 | pip install mmsegmentation==0.14.1 35 | ``` 36 | 37 | **e. Install mmdet3d from source code.** 38 | ```shell 39 | git clone https://github.com/open-mmlab/mmdetection3d.git 40 | cd mmdetection3d 41 | git checkout v0.17.1 # Other versions may not be compatible. 42 | python setup.py install 43 | ``` 44 | 45 | **f. Install Detectron2 and Timm.** 46 | ```shell 47 | pip install einops fvcore seaborn iopath==0.1.9 timm==0.6.13 typing-extensions==4.5.0 pylint ipython==8.12 numpy==1.19.5 matplotlib==3.5.2 numba==0.48.0 pandas==1.4.4 scikit-image==0.19.3 setuptools==59.5.0 48 | python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' 49 | ``` 50 | 51 | 52 | **g. Clone BEVFormer.** 53 | ``` 54 | git clone https://github.com/fundamentalvision/BEVFormer.git 55 | ``` 56 | 57 | **h. Prepare pretrained models.** 58 | ```shell 59 | cd bevformer 60 | mkdir ckpts 61 | 62 | cd ckpts & wget https://github.com/zhiqi-li/storage/releases/download/v1.0/r101_dcn_fcos3d_pretrain.pth 63 | ``` 64 | 65 | note: this pretrained model is the same model used in [detr3d](https://github.com/WangYueFt/detr3d) 66 | -------------------------------------------------------------------------------- /docs/prepare_dataset.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## NuScenes 4 | Download nuScenes V1.0 full dataset data and CAN bus expansion data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running 5 | 6 | 7 | **Download CAN bus expansion** 8 | ``` 9 | # download 'can_bus.zip' 10 | unzip can_bus.zip 11 | # move can_bus to data dir 12 | ``` 13 | 14 | **Prepare nuScenes data** 15 | 16 | *We genetate custom annotation files which are different from mmdet3d's* 17 | ``` 18 | python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes --version v1.0 --canbus ./data 19 | ``` 20 | 21 | Using the above code will generate `nuscenes_infos_temporal_{train,val}.pkl`. 22 | 23 | **Folder structure** 24 | ``` 25 | bevformer 26 | ├── projects/ 27 | ├── tools/ 28 | ├── configs/ 29 | ├── ckpts/ 30 | │ ├── r101_dcn_fcos3d_pretrain.pth 31 | ├── data/ 32 | │ ├── can_bus/ 33 | │ ├── nuscenes/ 34 | │ │ ├── maps/ 35 | │ │ ├── samples/ 36 | │ │ ├── sweeps/ 37 | │ │ ├── v1.0-test/ 38 | | | ├── v1.0-trainval/ 39 | | | ├── nuscenes_infos_temporal_train.pkl 40 | | | ├── nuscenes_infos_temporal_val.pkl 41 | ``` 42 | -------------------------------------------------------------------------------- /figs/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/figs/arch.png -------------------------------------------------------------------------------- /figs/sota_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/figs/sota_results.png -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/projects/__init__.py -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-3class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Pedestrian', 'Cyclist', 'Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict( 12 | filter_by_difficulty=[-1], 13 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 14 | classes=class_names, 15 | sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6)) 16 | 17 | file_client_args = dict(backend='disk') 18 | # Uncomment the following if use ceph or other file clients. 19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 20 | # for more details. 21 | # file_client_args = dict( 22 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 23 | 24 | train_pipeline = [ 25 | dict( 26 | type='LoadPointsFromFile', 27 | coord_type='LIDAR', 28 | load_dim=4, 29 | use_dim=4, 30 | file_client_args=file_client_args), 31 | dict( 32 | type='LoadAnnotations3D', 33 | with_bbox_3d=True, 34 | with_label_3d=True, 35 | file_client_args=file_client_args), 36 | dict(type='ObjectSample', db_sampler=db_sampler), 37 | dict( 38 | type='ObjectNoise', 39 | num_try=100, 40 | translation_std=[1.0, 1.0, 0.5], 41 | global_rot_range=[0.0, 0.0], 42 | rot_range=[-0.78539816, 0.78539816]), 43 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[-0.78539816, 0.78539816], 47 | scale_ratio_range=[0.95, 1.05]), 48 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 50 | dict(type='PointShuffle'), 51 | dict(type='DefaultFormatBundle3D', class_names=class_names), 52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 53 | ] 54 | test_pipeline = [ 55 | dict( 56 | type='LoadPointsFromFile', 57 | coord_type='LIDAR', 58 | load_dim=4, 59 | use_dim=4, 60 | file_client_args=file_client_args), 61 | dict( 62 | type='MultiScaleFlipAug3D', 63 | img_scale=(1333, 800), 64 | pts_scale_ratio=1, 65 | flip=False, 66 | transforms=[ 67 | dict( 68 | type='GlobalRotScaleTrans', 69 | rot_range=[0, 0], 70 | scale_ratio_range=[1., 1.], 71 | translation_std=[0, 0, 0]), 72 | dict(type='RandomFlip3D'), 73 | dict( 74 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 75 | dict( 76 | type='DefaultFormatBundle3D', 77 | class_names=class_names, 78 | with_label=False), 79 | dict(type='Collect3D', keys=['points']) 80 | ]) 81 | ] 82 | # construct a pipeline for data and gt loading in show function 83 | # please keep its loading function consistent with test_pipeline (e.g. client) 84 | eval_pipeline = [ 85 | dict( 86 | type='LoadPointsFromFile', 87 | coord_type='LIDAR', 88 | load_dim=4, 89 | use_dim=4, 90 | file_client_args=file_client_args), 91 | dict( 92 | type='DefaultFormatBundle3D', 93 | class_names=class_names, 94 | with_label=False), 95 | dict(type='Collect3D', keys=['points']) 96 | ] 97 | 98 | data = dict( 99 | samples_per_gpu=6, 100 | workers_per_gpu=4, 101 | train=dict( 102 | type='RepeatDataset', 103 | times=2, 104 | dataset=dict( 105 | type=dataset_type, 106 | data_root=data_root, 107 | ann_file=data_root + 'kitti_infos_train.pkl', 108 | split='training', 109 | pts_prefix='velodyne_reduced', 110 | pipeline=train_pipeline, 111 | modality=input_modality, 112 | classes=class_names, 113 | test_mode=False, 114 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 115 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 116 | box_type_3d='LiDAR')), 117 | val=dict( 118 | type=dataset_type, 119 | data_root=data_root, 120 | ann_file=data_root + 'kitti_infos_val.pkl', 121 | split='training', 122 | pts_prefix='velodyne_reduced', 123 | pipeline=test_pipeline, 124 | modality=input_modality, 125 | classes=class_names, 126 | test_mode=True, 127 | box_type_3d='LiDAR'), 128 | test=dict( 129 | type=dataset_type, 130 | data_root=data_root, 131 | ann_file=data_root + 'kitti_infos_val.pkl', 132 | split='training', 133 | pts_prefix='velodyne_reduced', 134 | pipeline=test_pipeline, 135 | modality=input_modality, 136 | classes=class_names, 137 | test_mode=True, 138 | box_type_3d='LiDAR')) 139 | 140 | evaluation = dict(interval=1, pipeline=eval_pipeline) 141 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 12 | classes=class_names, 13 | sample_groups=dict(Car=15)) 14 | 15 | file_client_args = dict(backend='disk') 16 | # Uncomment the following if use ceph or other file clients. 17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 18 | # for more details. 19 | # file_client_args = dict( 20 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 21 | 22 | train_pipeline = [ 23 | dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=4, 27 | use_dim=4, 28 | file_client_args=file_client_args), 29 | dict( 30 | type='LoadAnnotations3D', 31 | with_bbox_3d=True, 32 | with_label_3d=True, 33 | file_client_args=file_client_args), 34 | dict(type='ObjectSample', db_sampler=db_sampler), 35 | dict( 36 | type='ObjectNoise', 37 | num_try=100, 38 | translation_std=[1.0, 1.0, 0.5], 39 | global_rot_range=[0.0, 0.0], 40 | rot_range=[-0.78539816, 0.78539816]), 41 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 42 | dict( 43 | type='GlobalRotScaleTrans', 44 | rot_range=[-0.78539816, 0.78539816], 45 | scale_ratio_range=[0.95, 1.05]), 46 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 47 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='PointShuffle'), 49 | dict(type='DefaultFormatBundle3D', class_names=class_names), 50 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 51 | ] 52 | test_pipeline = [ 53 | dict( 54 | type='LoadPointsFromFile', 55 | coord_type='LIDAR', 56 | load_dim=4, 57 | use_dim=4, 58 | file_client_args=file_client_args), 59 | dict( 60 | type='MultiScaleFlipAug3D', 61 | img_scale=(1333, 800), 62 | pts_scale_ratio=1, 63 | flip=False, 64 | transforms=[ 65 | dict( 66 | type='GlobalRotScaleTrans', 67 | rot_range=[0, 0], 68 | scale_ratio_range=[1., 1.], 69 | translation_std=[0, 0, 0]), 70 | dict(type='RandomFlip3D'), 71 | dict( 72 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 73 | dict( 74 | type='DefaultFormatBundle3D', 75 | class_names=class_names, 76 | with_label=False), 77 | dict(type='Collect3D', keys=['points']) 78 | ]) 79 | ] 80 | # construct a pipeline for data and gt loading in show function 81 | # please keep its loading function consistent with test_pipeline (e.g. client) 82 | eval_pipeline = [ 83 | dict( 84 | type='LoadPointsFromFile', 85 | coord_type='LIDAR', 86 | load_dim=4, 87 | use_dim=4, 88 | file_client_args=file_client_args), 89 | dict( 90 | type='DefaultFormatBundle3D', 91 | class_names=class_names, 92 | with_label=False), 93 | dict(type='Collect3D', keys=['points']) 94 | ] 95 | 96 | data = dict( 97 | samples_per_gpu=6, 98 | workers_per_gpu=4, 99 | train=dict( 100 | type='RepeatDataset', 101 | times=2, 102 | dataset=dict( 103 | type=dataset_type, 104 | data_root=data_root, 105 | ann_file=data_root + 'kitti_infos_train.pkl', 106 | split='training', 107 | pts_prefix='velodyne_reduced', 108 | pipeline=train_pipeline, 109 | modality=input_modality, 110 | classes=class_names, 111 | test_mode=False, 112 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 113 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 114 | box_type_3d='LiDAR')), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'kitti_infos_val.pkl', 119 | split='training', 120 | pts_prefix='velodyne_reduced', 121 | pipeline=test_pipeline, 122 | modality=input_modality, 123 | classes=class_names, 124 | test_mode=True, 125 | box_type_3d='LiDAR'), 126 | test=dict( 127 | type=dataset_type, 128 | data_root=data_root, 129 | ann_file=data_root + 'kitti_infos_val.pkl', 130 | split='training', 131 | pts_prefix='velodyne_reduced', 132 | pipeline=test_pipeline, 133 | modality=input_modality, 134 | classes=class_names, 135 | test_mode=True, 136 | box_type_3d='LiDAR')) 137 | 138 | evaluation = dict(interval=1, pipeline=eval_pipeline) 139 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-80, -80, -5, 80, 80, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nuim_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/nuimages/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1280, 720), (1920, 1080)], 15 | multiscale_mode='range', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1600, 900), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=2, 40 | workers_per_gpu=2, 41 | train=dict( 42 | type=dataset_type, 43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json', 44 | img_prefix=data_root, 45 | classes=class_names, 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 50 | img_prefix=data_root, 51 | classes=class_names, 52 | pipeline=test_pipeline), 53 | test=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 56 | img_prefix=data_root, 57 | classes=class_names, 58 | pipeline=test_pipeline)) 59 | evaluation = dict(metric=['bbox', 'segm']) 60 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nus-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CustomNuScenesMonoDataset' 2 | data_root = 'data/nuscenes/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | # Input modality for nuScenes dataset, this is consistent with the submission 8 | # format which requires the information in input_modality. 9 | input_modality = dict( 10 | use_lidar=False, 11 | use_camera=True, 12 | use_radar=False, 13 | use_map=False, 14 | use_external=False) 15 | img_norm_cfg = dict( 16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 17 | train_pipeline = [ 18 | dict(type='LoadImageFromFileMono3D'), 19 | dict( 20 | type='LoadAnnotations3D', 21 | with_bbox=True, 22 | with_label=True, 23 | with_attr_label=True, 24 | with_bbox_3d=True, 25 | with_label_3d=True, 26 | with_bbox_depth=True), 27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True), 28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle3D', class_names=class_names), 32 | dict( 33 | type='Collect3D', 34 | keys=[ 35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d', 36 | 'gt_labels_3d', 'centers2d', 'depths' 37 | ]), 38 | ] 39 | test_pipeline = [ 40 | dict(type='LoadImageFromFileMono3D'), 41 | dict( 42 | type='MultiScaleFlipAug', 43 | scale_factor=1.0, 44 | flip=False, 45 | transforms=[ 46 | dict(type='RandomFlip3D'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict( 50 | type='DefaultFormatBundle3D', 51 | class_names=class_names, 52 | with_label=False), 53 | dict(type='Collect3D', keys=['img']), 54 | ]) 55 | ] 56 | # construct a pipeline for data and gt loading in show function 57 | # please keep its loading function consistent with test_pipeline (e.g. client) 58 | eval_pipeline = [ 59 | dict(type='LoadImageFromFileMono3D'), 60 | dict( 61 | type='DefaultFormatBundle3D', 62 | class_names=class_names, 63 | with_label=False), 64 | dict(type='Collect3D', keys=['img']) 65 | ] 66 | 67 | data = dict( 68 | samples_per_gpu=2, 69 | workers_per_gpu=2, 70 | train=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json', 74 | img_prefix=data_root, 75 | classes=class_names, 76 | pipeline=train_pipeline, 77 | modality=input_modality, 78 | test_mode=False, 79 | box_type_3d='Camera'), 80 | val=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 84 | img_prefix=data_root, 85 | classes=class_names, 86 | pipeline=test_pipeline, 87 | modality=input_modality, 88 | test_mode=True, 89 | box_type_3d='Camera'), 90 | test=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 94 | img_prefix=data_root, 95 | classes=class_names, 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | test_mode=True, 99 | box_type_3d='Camera')) 100 | evaluation = dict(interval=2) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/range100_lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-100, -100, -5, 100, 100, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis-3d-5class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') 5 | train_area = [1, 2, 3, 4, 6] 6 | test_area = 5 7 | 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2, 3, 4, 5]), 15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 16 | dict(type='PointSample', num_points=40000), 17 | dict( 18 | type='RandomFlip3D', 19 | sync_2d=False, 20 | flip_ratio_bev_horizontal=0.5, 21 | flip_ratio_bev_vertical=0.5), 22 | dict( 23 | type='GlobalRotScaleTrans', 24 | # following ScanNet dataset the rotation range is 5 degrees 25 | rot_range=[-0.087266, 0.087266], 26 | scale_ratio_range=[1.0, 1.0], 27 | shift_height=True), 28 | dict(type='DefaultFormatBundle3D', class_names=class_names), 29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 30 | ] 31 | test_pipeline = [ 32 | dict( 33 | type='LoadPointsFromFile', 34 | coord_type='DEPTH', 35 | shift_height=True, 36 | load_dim=6, 37 | use_dim=[0, 1, 2, 3, 4, 5]), 38 | dict( 39 | type='MultiScaleFlipAug3D', 40 | img_scale=(1333, 800), 41 | pts_scale_ratio=1, 42 | flip=False, 43 | transforms=[ 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[0, 0], 47 | scale_ratio_range=[1., 1.], 48 | translation_std=[0, 0, 0]), 49 | dict( 50 | type='RandomFlip3D', 51 | sync_2d=False, 52 | flip_ratio_bev_horizontal=0.5, 53 | flip_ratio_bev_vertical=0.5), 54 | dict(type='PointSample', num_points=40000), 55 | dict( 56 | type='DefaultFormatBundle3D', 57 | class_names=class_names, 58 | with_label=False), 59 | dict(type='Collect3D', keys=['points']) 60 | ]) 61 | ] 62 | # construct a pipeline for data and gt loading in show function 63 | # please keep its loading function consistent with test_pipeline (e.g. client) 64 | eval_pipeline = [ 65 | dict( 66 | type='LoadPointsFromFile', 67 | coord_type='DEPTH', 68 | shift_height=False, 69 | load_dim=6, 70 | use_dim=[0, 1, 2, 3, 4, 5]), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ] 77 | 78 | data = dict( 79 | samples_per_gpu=8, 80 | workers_per_gpu=4, 81 | train=dict( 82 | type='RepeatDataset', 83 | times=5, 84 | dataset=dict( 85 | type='ConcatDataset', 86 | datasets=[ 87 | dict( 88 | type=dataset_type, 89 | data_root=data_root, 90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl', 91 | pipeline=train_pipeline, 92 | filter_empty_gt=False, 93 | classes=class_names, 94 | box_type_3d='Depth') for i in train_area 95 | ], 96 | separate_eval=False)), 97 | val=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 101 | pipeline=test_pipeline, 102 | classes=class_names, 103 | test_mode=True, 104 | box_type_3d='Depth'), 105 | test=dict( 106 | type=dataset_type, 107 | data_root=data_root, 108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 109 | pipeline=test_pipeline, 110 | classes=class_names, 111 | test_mode=True, 112 | box_type_3d='Depth')) 113 | 114 | evaluation = dict(pipeline=eval_pipeline) 115 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis_seg-3d-13class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISSegDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 5 | 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') 6 | num_points = 4096 7 | train_area = [1, 2, 3, 4, 6] 8 | test_area = 5 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=tuple(range(len(class_names))), 26 | max_cat_id=13), 27 | dict( 28 | type='IndoorPatchPointSample', 29 | num_points=num_points, 30 | block_size=1.0, 31 | ignore_index=len(class_names), 32 | use_normalized_coord=True, 33 | enlarge_size=0.2, 34 | min_unique_num=None), 35 | dict(type='NormalizePointsColor', color_mean=None), 36 | dict(type='DefaultFormatBundle3D', class_names=class_names), 37 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 38 | ] 39 | test_pipeline = [ 40 | dict( 41 | type='LoadPointsFromFile', 42 | coord_type='DEPTH', 43 | shift_height=False, 44 | use_color=True, 45 | load_dim=6, 46 | use_dim=[0, 1, 2, 3, 4, 5]), 47 | dict(type='NormalizePointsColor', color_mean=None), 48 | dict( 49 | # a wrapper in order to successfully call test function 50 | # actually we don't perform test-time-aug 51 | type='MultiScaleFlipAug3D', 52 | img_scale=(1333, 800), 53 | pts_scale_ratio=1, 54 | flip=False, 55 | transforms=[ 56 | dict( 57 | type='GlobalRotScaleTrans', 58 | rot_range=[0, 0], 59 | scale_ratio_range=[1., 1.], 60 | translation_std=[0, 0, 0]), 61 | dict( 62 | type='RandomFlip3D', 63 | sync_2d=False, 64 | flip_ratio_bev_horizontal=0.0, 65 | flip_ratio_bev_vertical=0.0), 66 | dict( 67 | type='DefaultFormatBundle3D', 68 | class_names=class_names, 69 | with_label=False), 70 | dict(type='Collect3D', keys=['points']) 71 | ]) 72 | ] 73 | # construct a pipeline for data and gt loading in show function 74 | # please keep its loading function consistent with test_pipeline (e.g. client) 75 | # we need to load gt seg_mask! 76 | eval_pipeline = [ 77 | dict( 78 | type='LoadPointsFromFile', 79 | coord_type='DEPTH', 80 | shift_height=False, 81 | use_color=True, 82 | load_dim=6, 83 | use_dim=[0, 1, 2, 3, 4, 5]), 84 | dict( 85 | type='LoadAnnotations3D', 86 | with_bbox_3d=False, 87 | with_label_3d=False, 88 | with_mask_3d=False, 89 | with_seg_3d=True), 90 | dict( 91 | type='PointSegClassMapping', 92 | valid_cat_ids=tuple(range(len(class_names))), 93 | max_cat_id=13), 94 | dict( 95 | type='DefaultFormatBundle3D', 96 | with_label=False, 97 | class_names=class_names), 98 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 99 | ] 100 | 101 | data = dict( 102 | samples_per_gpu=8, 103 | workers_per_gpu=4, 104 | # train on area 1, 2, 3, 4, 6 105 | # test on area 5 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_files=[ 110 | data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area 111 | ], 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | test_mode=False, 115 | ignore_index=len(class_names), 116 | scene_idxs=[ 117 | data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy' 118 | for i in train_area 119 | ]), 120 | val=dict( 121 | type=dataset_type, 122 | data_root=data_root, 123 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 124 | pipeline=test_pipeline, 125 | classes=class_names, 126 | test_mode=True, 127 | ignore_index=len(class_names), 128 | scene_idxs=data_root + 129 | f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'), 130 | test=dict( 131 | type=dataset_type, 132 | data_root=data_root, 133 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 134 | pipeline=test_pipeline, 135 | classes=class_names, 136 | test_mode=True, 137 | ignore_index=len(class_names))) 138 | 139 | evaluation = dict(pipeline=eval_pipeline) 140 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 5 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 6 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 7 | 'garbagebin') 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2]), 15 | dict( 16 | type='LoadAnnotations3D', 17 | with_bbox_3d=True, 18 | with_label_3d=True, 19 | with_mask_3d=True, 20 | with_seg_3d=True), 21 | dict(type='GlobalAlignment', rotation_axis=2), 22 | dict( 23 | type='PointSegClassMapping', 24 | valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 25 | 36, 39), 26 | max_cat_id=40), 27 | dict(type='PointSample', num_points=40000), 28 | dict( 29 | type='RandomFlip3D', 30 | sync_2d=False, 31 | flip_ratio_bev_horizontal=0.5, 32 | flip_ratio_bev_vertical=0.5), 33 | dict( 34 | type='GlobalRotScaleTrans', 35 | rot_range=[-0.087266, 0.087266], 36 | scale_ratio_range=[1.0, 1.0], 37 | shift_height=True), 38 | dict(type='DefaultFormatBundle3D', class_names=class_names), 39 | dict( 40 | type='Collect3D', 41 | keys=[ 42 | 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 43 | 'pts_instance_mask' 44 | ]) 45 | ] 46 | test_pipeline = [ 47 | dict( 48 | type='LoadPointsFromFile', 49 | coord_type='DEPTH', 50 | shift_height=True, 51 | load_dim=6, 52 | use_dim=[0, 1, 2]), 53 | dict(type='GlobalAlignment', rotation_axis=2), 54 | dict( 55 | type='MultiScaleFlipAug3D', 56 | img_scale=(1333, 800), 57 | pts_scale_ratio=1, 58 | flip=False, 59 | transforms=[ 60 | dict( 61 | type='GlobalRotScaleTrans', 62 | rot_range=[0, 0], 63 | scale_ratio_range=[1., 1.], 64 | translation_std=[0, 0, 0]), 65 | dict( 66 | type='RandomFlip3D', 67 | sync_2d=False, 68 | flip_ratio_bev_horizontal=0.5, 69 | flip_ratio_bev_vertical=0.5), 70 | dict(type='PointSample', num_points=40000), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ]) 77 | ] 78 | # construct a pipeline for data and gt loading in show function 79 | # please keep its loading function consistent with test_pipeline (e.g. client) 80 | eval_pipeline = [ 81 | dict( 82 | type='LoadPointsFromFile', 83 | coord_type='DEPTH', 84 | shift_height=False, 85 | load_dim=6, 86 | use_dim=[0, 1, 2]), 87 | dict(type='GlobalAlignment', rotation_axis=2), 88 | dict( 89 | type='DefaultFormatBundle3D', 90 | class_names=class_names, 91 | with_label=False), 92 | dict(type='Collect3D', keys=['points']) 93 | ] 94 | 95 | data = dict( 96 | samples_per_gpu=8, 97 | workers_per_gpu=4, 98 | train=dict( 99 | type='RepeatDataset', 100 | times=5, 101 | dataset=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'scannet_infos_train.pkl', 105 | pipeline=train_pipeline, 106 | filter_empty_gt=False, 107 | classes=class_names, 108 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 109 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 110 | box_type_3d='Depth')), 111 | val=dict( 112 | type=dataset_type, 113 | data_root=data_root, 114 | ann_file=data_root + 'scannet_infos_val.pkl', 115 | pipeline=test_pipeline, 116 | classes=class_names, 117 | test_mode=True, 118 | box_type_3d='Depth'), 119 | test=dict( 120 | type=dataset_type, 121 | data_root=data_root, 122 | ann_file=data_root + 'scannet_infos_val.pkl', 123 | pipeline=test_pipeline, 124 | classes=class_names, 125 | test_mode=True, 126 | box_type_3d='Depth')) 127 | 128 | evaluation = dict(pipeline=eval_pipeline) 129 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet_seg-3d-20class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetSegDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 5 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 6 | 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 7 | 'bathtub', 'otherfurniture') 8 | num_points = 8192 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 26 | 33, 34, 36, 39), 27 | max_cat_id=40), 28 | dict( 29 | type='IndoorPatchPointSample', 30 | num_points=num_points, 31 | block_size=1.5, 32 | ignore_index=len(class_names), 33 | use_normalized_coord=False, 34 | enlarge_size=0.2, 35 | min_unique_num=None), 36 | dict(type='NormalizePointsColor', color_mean=None), 37 | dict(type='DefaultFormatBundle3D', class_names=class_names), 38 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 39 | ] 40 | test_pipeline = [ 41 | dict( 42 | type='LoadPointsFromFile', 43 | coord_type='DEPTH', 44 | shift_height=False, 45 | use_color=True, 46 | load_dim=6, 47 | use_dim=[0, 1, 2, 3, 4, 5]), 48 | dict(type='NormalizePointsColor', color_mean=None), 49 | dict( 50 | # a wrapper in order to successfully call test function 51 | # actually we don't perform test-time-aug 52 | type='MultiScaleFlipAug3D', 53 | img_scale=(1333, 800), 54 | pts_scale_ratio=1, 55 | flip=False, 56 | transforms=[ 57 | dict( 58 | type='GlobalRotScaleTrans', 59 | rot_range=[0, 0], 60 | scale_ratio_range=[1., 1.], 61 | translation_std=[0, 0, 0]), 62 | dict( 63 | type='RandomFlip3D', 64 | sync_2d=False, 65 | flip_ratio_bev_horizontal=0.0, 66 | flip_ratio_bev_vertical=0.0), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ]) 73 | ] 74 | # construct a pipeline for data and gt loading in show function 75 | # please keep its loading function consistent with test_pipeline (e.g. client) 76 | # we need to load gt seg_mask! 77 | eval_pipeline = [ 78 | dict( 79 | type='LoadPointsFromFile', 80 | coord_type='DEPTH', 81 | shift_height=False, 82 | use_color=True, 83 | load_dim=6, 84 | use_dim=[0, 1, 2, 3, 4, 5]), 85 | dict( 86 | type='LoadAnnotations3D', 87 | with_bbox_3d=False, 88 | with_label_3d=False, 89 | with_mask_3d=False, 90 | with_seg_3d=True), 91 | dict( 92 | type='PointSegClassMapping', 93 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 94 | 33, 34, 36, 39), 95 | max_cat_id=40), 96 | dict( 97 | type='DefaultFormatBundle3D', 98 | with_label=False, 99 | class_names=class_names), 100 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 101 | ] 102 | 103 | data = dict( 104 | samples_per_gpu=8, 105 | workers_per_gpu=4, 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_file=data_root + 'scannet_infos_train.pkl', 110 | pipeline=train_pipeline, 111 | classes=class_names, 112 | test_mode=False, 113 | ignore_index=len(class_names), 114 | scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'scannet_infos_val.pkl', 119 | pipeline=test_pipeline, 120 | classes=class_names, 121 | test_mode=True, 122 | ignore_index=len(class_names)), 123 | test=dict( 124 | type=dataset_type, 125 | data_root=data_root, 126 | ann_file=data_root + 'scannet_infos_val.pkl', 127 | pipeline=test_pipeline, 128 | classes=class_names, 129 | test_mode=True, 130 | ignore_index=len(class_names))) 131 | 132 | evaluation = dict(pipeline=eval_pipeline) 133 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SUNRGBDDataset' 2 | data_root = 'data/sunrgbd/' 3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 4 | 'night_stand', 'bookshelf', 'bathtub') 5 | train_pipeline = [ 6 | dict( 7 | type='LoadPointsFromFile', 8 | coord_type='DEPTH', 9 | shift_height=True, 10 | load_dim=6, 11 | use_dim=[0, 1, 2]), 12 | dict(type='LoadAnnotations3D'), 13 | dict( 14 | type='RandomFlip3D', 15 | sync_2d=False, 16 | flip_ratio_bev_horizontal=0.5, 17 | ), 18 | dict( 19 | type='GlobalRotScaleTrans', 20 | rot_range=[-0.523599, 0.523599], 21 | scale_ratio_range=[0.85, 1.15], 22 | shift_height=True), 23 | dict(type='PointSample', num_points=20000), 24 | dict(type='DefaultFormatBundle3D', class_names=class_names), 25 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 26 | ] 27 | test_pipeline = [ 28 | dict( 29 | type='LoadPointsFromFile', 30 | coord_type='DEPTH', 31 | shift_height=True, 32 | load_dim=6, 33 | use_dim=[0, 1, 2]), 34 | dict( 35 | type='MultiScaleFlipAug3D', 36 | img_scale=(1333, 800), 37 | pts_scale_ratio=1, 38 | flip=False, 39 | transforms=[ 40 | dict( 41 | type='GlobalRotScaleTrans', 42 | rot_range=[0, 0], 43 | scale_ratio_range=[1., 1.], 44 | translation_std=[0, 0, 0]), 45 | dict( 46 | type='RandomFlip3D', 47 | sync_2d=False, 48 | flip_ratio_bev_horizontal=0.5, 49 | ), 50 | dict(type='PointSample', num_points=20000), 51 | dict( 52 | type='DefaultFormatBundle3D', 53 | class_names=class_names, 54 | with_label=False), 55 | dict(type='Collect3D', keys=['points']) 56 | ]) 57 | ] 58 | # construct a pipeline for data and gt loading in show function 59 | # please keep its loading function consistent with test_pipeline (e.g. client) 60 | eval_pipeline = [ 61 | dict( 62 | type='LoadPointsFromFile', 63 | coord_type='DEPTH', 64 | shift_height=False, 65 | load_dim=6, 66 | use_dim=[0, 1, 2]), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ] 73 | 74 | data = dict( 75 | samples_per_gpu=16, 76 | workers_per_gpu=4, 77 | train=dict( 78 | type='RepeatDataset', 79 | times=5, 80 | dataset=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'sunrgbd_infos_train.pkl', 84 | pipeline=train_pipeline, 85 | classes=class_names, 86 | filter_empty_gt=False, 87 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 88 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 89 | box_type_3d='Depth')), 90 | val=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 94 | pipeline=test_pipeline, 95 | classes=class_names, 96 | test_mode=True, 97 | box_type_3d='Depth'), 98 | test=dict( 99 | type=dataset_type, 100 | data_root=data_root, 101 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 102 | pipeline=test_pipeline, 103 | classes=class_names, 104 | test_mode=True, 105 | box_type_3d='Depth')) 106 | 107 | evaluation = dict(pipeline=eval_pipeline) 108 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/waymoD5-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'WaymoDataset' 5 | data_root = 'data/waymo/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | class_names = ['Car'] 14 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 15 | input_modality = dict(use_lidar=True, use_camera=False) 16 | db_sampler = dict( 17 | data_root=data_root, 18 | info_path=data_root + 'waymo_dbinfos_train.pkl', 19 | rate=1.0, 20 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 21 | classes=class_names, 22 | sample_groups=dict(Car=15), 23 | points_loader=dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=5, 27 | use_dim=[0, 1, 2, 3, 4], 28 | file_client_args=file_client_args)) 29 | 30 | train_pipeline = [ 31 | dict( 32 | type='LoadPointsFromFile', 33 | coord_type='LIDAR', 34 | load_dim=6, 35 | use_dim=5, 36 | file_client_args=file_client_args), 37 | dict( 38 | type='LoadAnnotations3D', 39 | with_bbox_3d=True, 40 | with_label_3d=True, 41 | file_client_args=file_client_args), 42 | dict(type='ObjectSample', db_sampler=db_sampler), 43 | dict( 44 | type='RandomFlip3D', 45 | sync_2d=False, 46 | flip_ratio_bev_horizontal=0.5, 47 | flip_ratio_bev_vertical=0.5), 48 | dict( 49 | type='GlobalRotScaleTrans', 50 | rot_range=[-0.78539816, 0.78539816], 51 | scale_ratio_range=[0.95, 1.05]), 52 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 53 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 54 | dict(type='PointShuffle'), 55 | dict(type='DefaultFormatBundle3D', class_names=class_names), 56 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 57 | ] 58 | test_pipeline = [ 59 | dict( 60 | type='LoadPointsFromFile', 61 | coord_type='LIDAR', 62 | load_dim=6, 63 | use_dim=5, 64 | file_client_args=file_client_args), 65 | dict( 66 | type='MultiScaleFlipAug3D', 67 | img_scale=(1333, 800), 68 | pts_scale_ratio=1, 69 | flip=False, 70 | transforms=[ 71 | dict( 72 | type='GlobalRotScaleTrans', 73 | rot_range=[0, 0], 74 | scale_ratio_range=[1., 1.], 75 | translation_std=[0, 0, 0]), 76 | dict(type='RandomFlip3D'), 77 | dict( 78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 79 | dict( 80 | type='DefaultFormatBundle3D', 81 | class_names=class_names, 82 | with_label=False), 83 | dict(type='Collect3D', keys=['points']) 84 | ]) 85 | ] 86 | # construct a pipeline for data and gt loading in show function 87 | # please keep its loading function consistent with test_pipeline (e.g. client) 88 | eval_pipeline = [ 89 | dict( 90 | type='LoadPointsFromFile', 91 | coord_type='LIDAR', 92 | load_dim=6, 93 | use_dim=5, 94 | file_client_args=file_client_args), 95 | dict( 96 | type='DefaultFormatBundle3D', 97 | class_names=class_names, 98 | with_label=False), 99 | dict(type='Collect3D', keys=['points']) 100 | ] 101 | 102 | data = dict( 103 | samples_per_gpu=2, 104 | workers_per_gpu=4, 105 | train=dict( 106 | type='RepeatDataset', 107 | times=2, 108 | dataset=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'waymo_infos_train.pkl', 112 | split='training', 113 | pipeline=train_pipeline, 114 | modality=input_modality, 115 | classes=class_names, 116 | test_mode=False, 117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 119 | box_type_3d='LiDAR', 120 | # load one frame every five frames 121 | load_interval=5)), 122 | val=dict( 123 | type=dataset_type, 124 | data_root=data_root, 125 | ann_file=data_root + 'waymo_infos_val.pkl', 126 | split='training', 127 | pipeline=test_pipeline, 128 | modality=input_modality, 129 | classes=class_names, 130 | test_mode=True, 131 | box_type_3d='LiDAR'), 132 | test=dict( 133 | type=dataset_type, 134 | data_root=data_root, 135 | ann_file=data_root + 'waymo_infos_val.pkl', 136 | split='training', 137 | pipeline=test_pipeline, 138 | modality=input_modality, 139 | classes=class_names, 140 | test_mode=True, 141 | box_type_3d='LiDAR')) 142 | 143 | evaluation = dict(interval=24, pipeline=eval_pipeline) 144 | -------------------------------------------------------------------------------- /projects/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/3dssd.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SSD3DNet', 3 | backbone=dict( 4 | type='PointNet2SAMSG', 5 | in_channels=4, 6 | num_points=(4096, 512, (256, 256)), 7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)), 8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)), 9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)), 10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)), 11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))), 12 | aggregation_channels=(64, 128, 256), 13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')), 14 | fps_sample_range_lists=((-1), (-1), (512, -1)), 15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 16 | sa_cfg=dict( 17 | type='PointSAModuleMSG', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=False)), 21 | bbox_head=dict( 22 | type='SSD3DHead', 23 | in_channels=256, 24 | vote_module_cfg=dict( 25 | in_channels=256, 26 | num_points=256, 27 | gt_per_seed=1, 28 | conv_channels=(128, ), 29 | conv_cfg=dict(type='Conv1d'), 30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 31 | with_res_feat=False, 32 | vote_xyz_range=(3.0, 3.0, 2.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModuleMSG', 35 | num_point=256, 36 | radii=(4.8, 6.4), 37 | sample_nums=(16, 32), 38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)), 39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 40 | use_xyz=True, 41 | normalize_xyz=False, 42 | bias=True), 43 | pred_layer_cfg=dict( 44 | in_channels=1536, 45 | shared_conv_channels=(512, 128), 46 | cls_conv_channels=(128, ), 47 | reg_conv_channels=(128, ), 48 | conv_cfg=dict(type='Conv1d'), 49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 50 | bias=True), 51 | conv_cfg=dict(type='Conv1d'), 52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 53 | objectness_loss=dict( 54 | type='CrossEntropyLoss', 55 | use_sigmoid=True, 56 | reduction='sum', 57 | loss_weight=1.0), 58 | center_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 60 | dir_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | dir_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 64 | size_res_loss=dict( 65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 66 | corner_loss=dict( 67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05), 72 | test_cfg=dict( 73 | nms_cfg=dict(type='nms', iou_thr=0.1), 74 | sample_mod='spec', 75 | score_thr=0.0, 76 | per_class_proposal=True, 77 | max_output_num=100)) 78 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.1, 0.1, 0.2] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), 6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 7 | pts_middle_encoder=dict( 8 | type='SparseEncoder', 9 | in_channels=5, 10 | sparse_shape=[41, 1024, 1024], 11 | output_channels=128, 12 | order=('conv', 'norm', 'act'), 13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 14 | 128)), 15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), 16 | block_type='basicblock'), 17 | pts_backbone=dict( 18 | type='SECOND', 19 | in_channels=256, 20 | out_channels=[128, 256], 21 | layer_nums=[5, 5], 22 | layer_strides=[1, 2], 23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 24 | conv_cfg=dict(type='Conv2d', bias=False)), 25 | pts_neck=dict( 26 | type='SECONDFPN', 27 | in_channels=[128, 256], 28 | out_channels=[256, 256], 29 | upsample_strides=[1, 2], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | use_conv_for_no_stride=True), 33 | pts_bbox_head=dict( 34 | type='CenterHead', 35 | in_channels=sum([256, 256]), 36 | tasks=[ 37 | dict(num_class=1, class_names=['car']), 38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 39 | dict(num_class=2, class_names=['bus', 'trailer']), 40 | dict(num_class=1, class_names=['barrier']), 41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 43 | ], 44 | common_heads=dict( 45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 46 | share_conv_channel=64, 47 | bbox_coder=dict( 48 | type='CenterPointBBoxCoder', 49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 50 | max_num=500, 51 | score_threshold=0.1, 52 | out_size_factor=8, 53 | voxel_size=voxel_size[:2], 54 | code_size=9), 55 | separate_head=dict( 56 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 59 | norm_bbox=True), 60 | # model training and testing settings 61 | train_cfg=dict( 62 | pts=dict( 63 | grid_size=[1024, 1024, 40], 64 | voxel_size=voxel_size, 65 | out_size_factor=8, 66 | dense_reg=1, 67 | gaussian_overlap=0.1, 68 | max_objs=500, 69 | min_radius=2, 70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 71 | test_cfg=dict( 72 | pts=dict( 73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 74 | max_per_img=500, 75 | max_pool_nms=False, 76 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 77 | score_threshold=0.1, 78 | out_size_factor=8, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.2, 0.2, 8] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), 6 | pts_voxel_encoder=dict( 7 | type='PillarFeatureNet', 8 | in_channels=5, 9 | feat_channels=[64], 10 | with_distance=False, 11 | voxel_size=(0.2, 0.2, 8), 12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), 13 | legacy=False), 14 | pts_middle_encoder=dict( 15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), 16 | pts_backbone=dict( 17 | type='SECOND', 18 | in_channels=64, 19 | out_channels=[64, 128, 256], 20 | layer_nums=[3, 5, 5], 21 | layer_strides=[2, 2, 2], 22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 23 | conv_cfg=dict(type='Conv2d', bias=False)), 24 | pts_neck=dict( 25 | type='SECONDFPN', 26 | in_channels=[64, 128, 256], 27 | out_channels=[128, 128, 128], 28 | upsample_strides=[0.5, 1, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | upsample_cfg=dict(type='deconv', bias=False), 31 | use_conv_for_no_stride=True), 32 | pts_bbox_head=dict( 33 | type='CenterHead', 34 | in_channels=sum([128, 128, 128]), 35 | tasks=[ 36 | dict(num_class=1, class_names=['car']), 37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 38 | dict(num_class=2, class_names=['bus', 'trailer']), 39 | dict(num_class=1, class_names=['barrier']), 40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 42 | ], 43 | common_heads=dict( 44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 45 | share_conv_channel=64, 46 | bbox_coder=dict( 47 | type='CenterPointBBoxCoder', 48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 49 | max_num=500, 50 | score_threshold=0.1, 51 | out_size_factor=4, 52 | voxel_size=voxel_size[:2], 53 | code_size=9), 54 | separate_head=dict( 55 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 58 | norm_bbox=True), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | pts=dict( 62 | grid_size=[512, 512, 1], 63 | voxel_size=voxel_size, 64 | out_size_factor=4, 65 | dense_reg=1, 66 | gaussian_overlap=0.1, 67 | max_objs=500, 68 | min_radius=2, 69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 70 | test_cfg=dict( 71 | pts=dict( 72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 73 | max_per_img=500, 74 | max_pool_nms=False, 75 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 76 | score_threshold=0.1, 77 | pc_range=[-51.2, -51.2], 78 | out_size_factor=4, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/fcos3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FCOSMono3D', 3 | pretrained='open-mmlab://detectron2/resnet101_caffe', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=False), 11 | norm_eval=True, 12 | style='caffe'), 13 | neck=dict( 14 | type='FPN', 15 | in_channels=[256, 512, 1024, 2048], 16 | out_channels=256, 17 | start_level=1, 18 | add_extra_convs='on_output', 19 | num_outs=5, 20 | relu_before_extra_convs=True), 21 | bbox_head=dict( 22 | type='FCOSMono3DHead', 23 | num_classes=10, 24 | in_channels=256, 25 | stacked_convs=2, 26 | feat_channels=256, 27 | use_direction_classifier=True, 28 | diff_rad_by_sin=True, 29 | pred_attrs=True, 30 | pred_velo=True, 31 | dir_offset=0.7854, # pi/4 32 | strides=[8, 16, 32, 64, 128], 33 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 34 | cls_branch=(256, ), 35 | reg_branch=( 36 | (256, ), # offset 37 | (256, ), # depth 38 | (256, ), # size 39 | (256, ), # rot 40 | () # velo 41 | ), 42 | dir_branch=(256, ), 43 | attr_branch=(256, ), 44 | loss_cls=dict( 45 | type='FocalLoss', 46 | use_sigmoid=True, 47 | gamma=2.0, 48 | alpha=0.25, 49 | loss_weight=1.0), 50 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 51 | loss_dir=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_attr=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 57 | norm_on_bbox=True, 58 | centerness_on_reg=True, 59 | center_sampling=True, 60 | conv_bias=True, 61 | dcn_on_last_conv=True), 62 | train_cfg=dict( 63 | allowed_border=0, 64 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], 65 | pos_weight=-1, 66 | debug=False), 67 | test_cfg=dict( 68 | use_rotate_nms=True, 69 | nms_across_levels=False, 70 | nms_pre=1000, 71 | nms_thr=0.8, 72 | score_thr=0.05, 73 | min_bbox_size=0, 74 | max_per_img=200)) 75 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_nus.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.25, 0.25, 8] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=64, 11 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 12 | voxel_size=voxel_size, 13 | max_voxels=(30000, 40000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=4, 17 | feat_channels=[64, 64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[2, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='FPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | act_cfg=dict(type='ReLU'), 37 | in_channels=[64, 128, 256], 38 | out_channels=256, 39 | start_level=0, 40 | num_outs=3), 41 | pts_bbox_head=dict( 42 | type='Anchor3DHead', 43 | num_classes=10, 44 | in_channels=256, 45 | feat_channels=256, 46 | use_direction_classifier=True, 47 | anchor_generator=dict( 48 | type='AlignedAnchor3DRangeGenerator', 49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]], 50 | scales=[1, 2, 4], 51 | sizes=[ 52 | [0.8660, 2.5981, 1.], # 1.5/sqrt(3) 53 | [0.5774, 1.7321, 1.], # 1/sqrt(3) 54 | [1., 1., 1.], 55 | [0.4, 0.4, 1], 56 | ], 57 | custom_values=[0, 0], 58 | rotations=[0, 1.57], 59 | reshape_out=True), 60 | assigner_per_size=False, 61 | diff_rad_by_sin=True, 62 | dir_offset=0.7854, # pi/4 63 | dir_limit_offset=0, 64 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), 65 | loss_cls=dict( 66 | type='FocalLoss', 67 | use_sigmoid=True, 68 | gamma=2.0, 69 | alpha=0.25, 70 | loss_weight=1.0), 71 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 72 | loss_dir=dict( 73 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 74 | # model training and testing settings 75 | train_cfg=dict( 76 | pts=dict( 77 | assigner=dict( 78 | type='MaxIoUAssigner', 79 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 80 | pos_iou_thr=0.6, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.3, 83 | ignore_iof_thr=-1), 84 | allowed_border=0, 85 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 86 | pos_weight=-1, 87 | debug=False)), 88 | test_cfg=dict( 89 | pts=dict( 90 | use_rotate_nms=True, 91 | nms_across_levels=False, 92 | nms_pre=1000, 93 | nms_thr=0.2, 94 | score_thr=0.05, 95 | min_bbox_size=0, 96 | max_num=500))) 97 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.16, 0.16, 4] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=32, # max_points_per_voxel 7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000) # (training, testing) max_voxels 10 | ), 11 | voxel_encoder=dict( 12 | type='PillarFeatureNet', 13 | in_channels=4, 14 | feat_channels=[64], 15 | with_distance=False, 16 | voxel_size=voxel_size, 17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), 18 | middle_encoder=dict( 19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=64, 23 | layer_nums=[3, 5, 5], 24 | layer_strides=[2, 2, 2], 25 | out_channels=[64, 128, 256]), 26 | neck=dict( 27 | type='SECONDFPN', 28 | in_channels=[64, 128, 256], 29 | upsample_strides=[1, 2, 4], 30 | out_channels=[128, 128, 128]), 31 | bbox_head=dict( 32 | type='Anchor3DHead', 33 | num_classes=3, 34 | in_channels=384, 35 | feat_channels=384, 36 | use_direction_classifier=True, 37 | anchor_generator=dict( 38 | type='Anchor3DRangeGenerator', 39 | ranges=[ 40 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 41 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 42 | [0, -39.68, -1.78, 70.4, 39.68, -1.78], 43 | ], 44 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 45 | rotations=[0, 1.57], 46 | reshape_out=False), 47 | diff_rad_by_sin=True, 48 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 49 | loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | gamma=2.0, 53 | alpha=0.25, 54 | loss_weight=1.0), 55 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 56 | loss_dir=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 58 | # model training and testing settings 59 | train_cfg=dict( 60 | assigner=[ 61 | dict( # for Pedestrian 62 | type='MaxIoUAssigner', 63 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 64 | pos_iou_thr=0.5, 65 | neg_iou_thr=0.35, 66 | min_pos_iou=0.35, 67 | ignore_iof_thr=-1), 68 | dict( # for Cyclist 69 | type='MaxIoUAssigner', 70 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 71 | pos_iou_thr=0.5, 72 | neg_iou_thr=0.35, 73 | min_pos_iou=0.35, 74 | ignore_iof_thr=-1), 75 | dict( # for Car 76 | type='MaxIoUAssigner', 77 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 78 | pos_iou_thr=0.6, 79 | neg_iou_thr=0.45, 80 | min_pos_iou=0.45, 81 | ignore_iof_thr=-1), 82 | ], 83 | allowed_border=0, 84 | pos_weight=-1, 85 | debug=False), 86 | test_cfg=dict( 87 | use_rotate_nms=True, 88 | nms_across_levels=False, 89 | nms_thr=0.01, 90 | score_thr=0.1, 91 | min_bbox_size=0, 92 | nms_pre=100, 93 | max_num=50)) 94 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.32, 0.32, 6] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(32000, 32000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=5, 17 | feat_channels=[64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[1, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='SECONDFPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | in_channels=[64, 128, 256], 37 | upsample_strides=[1, 2, 4], 38 | out_channels=[128, 128, 128]), 39 | pts_bbox_head=dict( 40 | type='Anchor3DHead', 41 | num_classes=3, 42 | in_channels=384, 43 | feat_channels=384, 44 | use_direction_classifier=True, 45 | anchor_generator=dict( 46 | type='AlignedAnchor3DRangeGenerator', 47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], 48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], 49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]], 50 | sizes=[ 51 | [2.08, 4.73, 1.77], # car 52 | [0.84, 1.81, 1.77], # cyclist 53 | [0.84, 0.91, 1.74] # pedestrian 54 | ], 55 | rotations=[0, 1.57], 56 | reshape_out=False), 57 | diff_rad_by_sin=True, 58 | dir_offset=0.7854, # pi/4 59 | dir_limit_offset=0, 60 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 61 | loss_cls=dict( 62 | type='FocalLoss', 63 | use_sigmoid=True, 64 | gamma=2.0, 65 | alpha=0.25, 66 | loss_weight=1.0), 67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 68 | loss_dir=dict( 69 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | pts=dict( 73 | assigner=[ 74 | dict( # car 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.55, 78 | neg_iou_thr=0.4, 79 | min_pos_iou=0.4, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1), 88 | dict( # pedestrian 89 | type='MaxIoUAssigner', 90 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 91 | pos_iou_thr=0.5, 92 | neg_iou_thr=0.3, 93 | min_pos_iou=0.3, 94 | ignore_iof_thr=-1), 95 | ], 96 | allowed_border=0, 97 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 98 | pos_weight=-1, 99 | debug=False)), 100 | test_cfg=dict( 101 | pts=dict( 102 | use_rotate_nms=True, 103 | nms_across_levels=False, 104 | nms_pre=4096, 105 | nms_thr=0.25, 106 | score_thr=0.1, 107 | min_bbox_size=0, 108 | max_num=500))) 109 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.05, 0.05, 0.1] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=5, 7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000)), 10 | voxel_encoder=dict(type='HardSimpleVFE'), 11 | middle_encoder=dict( 12 | type='SparseEncoder', 13 | in_channels=4, 14 | sparse_shape=[41, 1600, 1408], 15 | order=('conv', 'norm', 'act')), 16 | backbone=dict( 17 | type='SECOND', 18 | in_channels=256, 19 | layer_nums=[5, 5], 20 | layer_strides=[1, 2], 21 | out_channels=[128, 256]), 22 | neck=dict( 23 | type='SECONDFPN', 24 | in_channels=[128, 256], 25 | upsample_strides=[1, 2], 26 | out_channels=[256, 256]), 27 | bbox_head=dict( 28 | type='Anchor3DHead', 29 | num_classes=3, 30 | in_channels=512, 31 | feat_channels=512, 32 | use_direction_classifier=True, 33 | anchor_generator=dict( 34 | type='Anchor3DRangeGenerator', 35 | ranges=[ 36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78], 39 | ], 40 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 41 | rotations=[0, 1.57], 42 | reshape_out=False), 43 | diff_rad_by_sin=True, 44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 52 | loss_dir=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | assigner=[ 57 | dict( # for Pedestrian 58 | type='MaxIoUAssigner', 59 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 60 | pos_iou_thr=0.35, 61 | neg_iou_thr=0.2, 62 | min_pos_iou=0.2, 63 | ignore_iof_thr=-1), 64 | dict( # for Cyclist 65 | type='MaxIoUAssigner', 66 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 67 | pos_iou_thr=0.35, 68 | neg_iou_thr=0.2, 69 | min_pos_iou=0.2, 70 | ignore_iof_thr=-1), 71 | dict( # for Car 72 | type='MaxIoUAssigner', 73 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 74 | pos_iou_thr=0.6, 75 | neg_iou_thr=0.45, 76 | min_pos_iou=0.45, 77 | ignore_iof_thr=-1), 78 | ], 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | test_cfg=dict( 83 | use_rotate_nms=True, 84 | nms_across_levels=False, 85 | nms_thr=0.01, 86 | score_thr=0.1, 87 | min_bbox_size=0, 88 | nms_pre=100, 89 | max_num=50)) 90 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.08, 0.08, 0.1] 7 | model = dict( 8 | type='VoxelNet', 9 | voxel_layer=dict( 10 | max_num_points=10, 11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(80000, 90000)), 14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 15 | middle_encoder=dict( 16 | type='SparseEncoder', 17 | in_channels=5, 18 | sparse_shape=[61, 1280, 1920], 19 | order=('conv', 'norm', 'act')), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=384, 23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 24 | layer_nums=[5, 5], 25 | layer_strides=[1, 2], 26 | out_channels=[128, 256]), 27 | neck=dict( 28 | type='SECONDFPN', 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | in_channels=[128, 256], 31 | upsample_strides=[1, 2], 32 | out_channels=[256, 256]), 33 | bbox_head=dict( 34 | type='Anchor3DHead', 35 | num_classes=3, 36 | in_channels=512, 37 | feat_channels=512, 38 | use_direction_classifier=True, 39 | anchor_generator=dict( 40 | type='AlignedAnchor3DRangeGenerator', 41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], 42 | [-76.8, -51.2, 0, 76.8, 51.2, 0], 43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], 44 | sizes=[ 45 | [2.08, 4.73, 1.77], # car 46 | [0.84, 0.91, 1.74], # pedestrian 47 | [0.84, 1.81, 1.77] # cyclist 48 | ], 49 | rotations=[0, 1.57], 50 | reshape_out=False), 51 | diff_rad_by_sin=True, 52 | dir_offset=0.7854, # pi/4 53 | dir_limit_offset=0, 54 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 55 | loss_cls=dict( 56 | type='FocalLoss', 57 | use_sigmoid=True, 58 | gamma=2.0, 59 | alpha=0.25, 60 | loss_weight=1.0), 61 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 62 | loss_dir=dict( 63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 64 | # model training and testing settings 65 | train_cfg=dict( 66 | assigner=[ 67 | dict( # car 68 | type='MaxIoUAssigner', 69 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 70 | pos_iou_thr=0.55, 71 | neg_iou_thr=0.4, 72 | min_pos_iou=0.4, 73 | ignore_iof_thr=-1), 74 | dict( # pedestrian 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1) 88 | ], 89 | allowed_border=0, 90 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 91 | pos_weight=-1, 92 | debug=False), 93 | test_cfg=dict( 94 | use_rotate_nms=True, 95 | nms_across_levels=False, 96 | nms_pre=4096, 97 | nms_thr=0.25, 98 | score_thr=0.1, 99 | min_bbox_size=0, 100 | max_num=500)) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/imvotenet_image.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImVoteNet', 3 | img_backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe'), 12 | img_neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | img_rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | img_roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=10, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | 55 | # model training and testing settings 56 | train_cfg=dict( 57 | img_rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | img_rpn_proposal=dict( 75 | nms_across_levels=False, 76 | nms_pre=2000, 77 | nms_post=1000, 78 | max_per_img=1000, 79 | nms=dict(type='nms', iou_threshold=0.7), 80 | min_bbox_size=0), 81 | img_rcnn=dict( 82 | assigner=dict( 83 | type='MaxIoUAssigner', 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.5, 86 | min_pos_iou=0.5, 87 | match_low_quality=False, 88 | ignore_iof_thr=-1), 89 | sampler=dict( 90 | type='RandomSampler', 91 | num=512, 92 | pos_fraction=0.25, 93 | neg_pos_ub=-1, 94 | add_gt_as_proposals=True), 95 | pos_weight=-1, 96 | debug=False)), 97 | test_cfg=dict( 98 | img_rpn=dict( 99 | nms_across_levels=False, 100 | nms_pre=1000, 101 | nms_post=1000, 102 | max_per_img=1000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | img_rcnn=dict( 106 | score_thr=0.05, 107 | nms=dict(type='nms', iou_threshold=0.5), 108 | max_per_img=100))) 109 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_across_levels=False, 90 | nms_pre=2000, 91 | nms_post=1000, 92 | max_num=1000, 93 | nms_thr=0.7, 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | assigner=dict( 97 | type='MaxIoUAssigner', 98 | pos_iou_thr=0.5, 99 | neg_iou_thr=0.5, 100 | min_pos_iou=0.5, 101 | match_low_quality=True, 102 | ignore_iof_thr=-1), 103 | sampler=dict( 104 | type='RandomSampler', 105 | num=512, 106 | pos_fraction=0.25, 107 | neg_pos_ub=-1, 108 | add_gt_as_proposals=True), 109 | mask_size=28, 110 | pos_weight=-1, 111 | debug=False)), 112 | test_cfg=dict( 113 | rpn=dict( 114 | nms_across_levels=False, 115 | nms_pre=1000, 116 | nms_post=1000, 117 | max_num=1000, 118 | nms_thr=0.7, 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.5), 123 | max_per_img=100, 124 | mask_thr_binary=0.5))) 125 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=9, # [xyz, rgb, normalized_xyz] 7 | num_points=(1024, 256, 64, 16), 8 | radius=(None, None, None, None), # use kNN instead of ball query 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d', momentum=0.1), 14 | sa_cfg=dict( 15 | type='PAConvSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False, 19 | paconv_num_kernels=[16, 16, 16], 20 | paconv_kernel_input='w_neighbor', 21 | scorenet_input='w_neighbor_dist', 22 | scorenet_cfg=dict( 23 | mlp_channels=[16, 16, 16], 24 | score_norm='softmax', 25 | temp_factor=1.0, 26 | last_bn=False))), 27 | decode_head=dict( 28 | type='PAConvHead', 29 | # PAConv model's decoder takes skip connections from beckbone 30 | # different from PointNet++, it also concats input features in the last 31 | # level of decoder, leading to `128 + 6` as the channel number 32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 33 | (128 + 6, 128, 128, 128)), 34 | channels=128, 35 | dropout_ratio=0.5, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=dict(type='BN1d'), 38 | act_cfg=dict(type='ReLU'), 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | class_weight=None, # should be modified with dataset 43 | loss_weight=1.0)), 44 | # correlation loss to regularize PAConv's kernel weights 45 | loss_regularization=dict( 46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='slide')) 50 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='VoteHead', 20 | vote_module_cfg=dict( 21 | in_channels=256, 22 | vote_per_seed=1, 23 | gt_per_seed=3, 24 | conv_channels=(256, 256), 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | norm_feats=True, 28 | vote_loss=dict( 29 | type='ChamferDistance', 30 | mode='l1', 31 | reduction='none', 32 | loss_dst_weight=10.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModule', 35 | num_point=256, 36 | radius=0.3, 37 | num_sample=16, 38 | mlp_channels=[256, 128, 128, 128], 39 | use_xyz=True, 40 | normalize_xyz=True), 41 | pred_layer_cfg=dict( 42 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 43 | conv_cfg=dict(type='Conv1d'), 44 | norm_cfg=dict(type='BN1d'), 45 | objectness_loss=dict( 46 | type='CrossEntropyLoss', 47 | class_weight=[0.2, 0.8], 48 | reduction='sum', 49 | loss_weight=5.0), 50 | center_loss=dict( 51 | type='ChamferDistance', 52 | mode='l2', 53 | reduction='sum', 54 | loss_src_weight=10.0, 55 | loss_dst_weight=10.0), 56 | dir_class_loss=dict( 57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 58 | dir_res_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 60 | size_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | size_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 64 | semantic_loss=dict( 65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 66 | # model training and testing settings 67 | train_cfg=dict( 68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 69 | test_cfg=dict( 70 | sample_mod='seed', 71 | nms_thr=0.25, 72 | score_thr=0.05, 73 | per_class_proposal=True)) 74 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the offcial AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /projects/configs/datasets/custom_lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-80, -80, -5, 80, 80, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'CustomLyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=True) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_val.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) -------------------------------------------------------------------------------- /projects/configs/datasets/custom_waymo-3d.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'CustomWaymoDataset' 5 | data_root = 'data/waymo/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | img_norm_cfg = dict( 14 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 15 | class_names = ['Car', 'Pedestrian', 'Cyclist'] 16 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 17 | input_modality = dict(use_lidar=False, use_camera=True) 18 | db_sampler = dict( 19 | data_root=data_root, 20 | info_path=data_root + 'waymo_dbinfos_train.pkl', 21 | rate=1.0, 22 | prepare=dict( 23 | filter_by_difficulty=[-1], 24 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 25 | classes=class_names, 26 | sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), 27 | points_loader=dict( 28 | type='LoadPointsFromFile', 29 | coord_type='LIDAR', 30 | load_dim=5, 31 | use_dim=[0, 1, 2, 3, 4], 32 | file_client_args=file_client_args)) 33 | 34 | 35 | 36 | train_pipeline = [ 37 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 38 | dict(type='PhotoMetricDistortionMultiViewImage'), 39 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False), 40 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 41 | dict(type='ObjectNameFilter', classes=class_names), 42 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 43 | dict(type='PadMultiViewImage', size_divisor=32), 44 | dict(type='DefaultFormatBundle3D', class_names=class_names), 45 | dict(type='CustomCollect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img']) 46 | ] 47 | 48 | 49 | test_pipeline = [ 50 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 51 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 52 | dict(type='PadMultiViewImage', size_divisor=32), 53 | dict( 54 | type='MultiScaleFlipAug3D', 55 | img_scale=(1920, 1280), 56 | pts_scale_ratio=1, 57 | flip=False, 58 | transforms=[ 59 | dict( 60 | type='DefaultFormatBundle3D', 61 | class_names=class_names, 62 | with_label=False), 63 | dict(type='CustomCollect3D', keys=['img']) 64 | ]) 65 | ] 66 | 67 | 68 | # construct a pipeline for data and gt loading in show function 69 | # please keep its loading function consistent with test_pipeline (e.g. client) 70 | 71 | data = dict( 72 | samples_per_gpu=2, 73 | workers_per_gpu=4, 74 | train=dict( 75 | type='RepeatDataset', 76 | times=2, 77 | dataset=dict( 78 | type=dataset_type, 79 | data_root=data_root, 80 | ann_file=data_root + 'waymo_infos_train.pkl', 81 | split='training', 82 | pipeline=train_pipeline, 83 | modality=input_modality, 84 | classes=class_names, 85 | test_mode=False, 86 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 87 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 88 | box_type_3d='LiDAR', 89 | # load one frame every five frames 90 | load_interval=5)), 91 | val=dict( 92 | type=dataset_type, 93 | data_root=data_root, 94 | ann_file=data_root + 'waymo_infos_val.pkl', 95 | split='training', 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | classes=class_names, 99 | test_mode=True, 100 | box_type_3d='LiDAR'), 101 | test=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'waymo_infos_val.pkl', 105 | split='training', 106 | pipeline=test_pipeline, 107 | modality=input_modality, 108 | classes=class_names, 109 | test_mode=True, 110 | box_type_3d='LiDAR')) 111 | 112 | evaluation = dict(interval=24, pipeline=test_pipeline) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 3 | from .core.bbox.match_costs import BBox3DL1Cost 4 | from .core.evaluation.eval_hooks import CustomDistEvalHook 5 | from .datasets.pipelines import ( 6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 7 | NormalizeMultiviewImage, CustomCollect3D) 8 | from .models.utils import * 9 | from .models.opt.adamw import AdamW2 10 | from .bevformer import * 11 | from .dd3d import * 12 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .dense_heads import * 3 | from .detectors import * 4 | from .modules import * 5 | from .runner import * 6 | from .hooks import * 7 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .train import custom_train_model 2 | from .mmdet_train import custom_train_detector 3 | # from .test import custom_multi_gpu_test -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/train.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from .mmdet_train import custom_train_detector 8 | from mmseg.apis import train_segmentor 9 | from mmdet.apis import train_detector 10 | 11 | def custom_train_model(model, 12 | dataset, 13 | cfg, 14 | distributed=False, 15 | validate=False, 16 | timestamp=None, 17 | eval_model=None, 18 | meta=None): 19 | """A function wrapper for launching model training according to cfg. 20 | 21 | Because we need different eval_hook in runner. Should be deprecated in the 22 | future. 23 | """ 24 | if cfg.model.type in ['EncoderDecoder3D']: 25 | assert False 26 | else: 27 | custom_train_detector( 28 | model, 29 | dataset, 30 | cfg, 31 | distributed=distributed, 32 | validate=validate, 33 | timestamp=timestamp, 34 | eval_model=eval_model, 35 | meta=meta) 36 | 37 | 38 | def train_model(model, 39 | dataset, 40 | cfg, 41 | distributed=False, 42 | validate=False, 43 | timestamp=None, 44 | meta=None): 45 | """A function wrapper for launching model training according to cfg. 46 | 47 | Because we need different eval_hook in runner. Should be deprecated in the 48 | future. 49 | """ 50 | if cfg.model.type in ['EncoderDecoder3D']: 51 | train_segmentor( 52 | model, 53 | dataset, 54 | cfg, 55 | distributed=distributed, 56 | validate=validate, 57 | timestamp=timestamp, 58 | meta=meta) 59 | else: 60 | train_detector( 61 | model, 62 | dataset, 63 | cfg, 64 | distributed=distributed, 65 | validate=validate, 66 | timestamp=timestamp, 67 | meta=meta) 68 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_head import BEVFormerHead, BEVFormerHead_GroupDETR 2 | from .bev_head import BEVHead 3 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer import BEVFormer 2 | from .bevformer_fp16 import BEVFormer_fp16 3 | from .bevformerV2 import BEVFormerV2 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from tkinter.messagebox import NO 8 | import torch 9 | from mmcv.runner import force_fp32, auto_fp16 10 | from mmdet.models import DETECTORS 11 | from mmdet3d.core import bbox3d2result 12 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector 13 | from projects.mmdet3d_plugin.models.utils.grid_mask import GridMask 14 | from projects.mmdet3d_plugin.bevformer.detectors.bevformer import BEVFormer 15 | import time 16 | import copy 17 | import numpy as np 18 | import mmdet3d 19 | from projects.mmdet3d_plugin.models.utils.bricks import run_time 20 | 21 | 22 | @DETECTORS.register_module() 23 | class BEVFormer_fp16(BEVFormer): 24 | """ 25 | The default version BEVFormer currently can not support FP16. 26 | We provide this version to resolve this issue. 27 | """ 28 | 29 | @auto_fp16(apply_to=('img', 'prev_bev', 'points')) 30 | def forward_train(self, 31 | points=None, 32 | img_metas=None, 33 | gt_bboxes_3d=None, 34 | gt_labels_3d=None, 35 | gt_labels=None, 36 | gt_bboxes=None, 37 | img=None, 38 | proposals=None, 39 | gt_bboxes_ignore=None, 40 | img_depth=None, 41 | img_mask=None, 42 | prev_bev=None, 43 | ): 44 | """Forward training function. 45 | Args: 46 | points (list[torch.Tensor], optional): Points of each sample. 47 | Defaults to None. 48 | img_metas (list[dict], optional): Meta information of each sample. 49 | Defaults to None. 50 | gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional): 51 | Ground truth 3D boxes. Defaults to None. 52 | gt_labels_3d (list[torch.Tensor], optional): Ground truth labels 53 | of 3D boxes. Defaults to None. 54 | gt_labels (list[torch.Tensor], optional): Ground truth labels 55 | of 2D boxes in images. Defaults to None. 56 | gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in 57 | images. Defaults to None. 58 | img (torch.Tensor optional): Images of each sample with shape 59 | (N, C, H, W). Defaults to None. 60 | proposals ([list[torch.Tensor], optional): Predicted proposals 61 | used for training Fast RCNN. Defaults to None. 62 | gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth 63 | 2D boxes in images to be ignored. Defaults to None. 64 | Returns: 65 | dict: Losses of different branches. 66 | """ 67 | 68 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 69 | 70 | losses = dict() 71 | losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d, 72 | gt_labels_3d, img_metas, 73 | gt_bboxes_ignore, prev_bev=prev_bev) 74 | losses.update(losses_pts) 75 | return losses 76 | 77 | 78 | def val_step(self, data, optimizer): 79 | """ 80 | In BEVFormer_fp16, we use this `val_step` function to inference the `prev_pev`. 81 | This is not the standard function of `val_step`. 82 | """ 83 | 84 | img = data['img'] 85 | img_metas = data['img_metas'] 86 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 87 | prev_bev = data.get('prev_bev', None) 88 | prev_bev = self.pts_bbox_head(img_feats, img_metas, prev_bev=prev_bev, only_bev=True) 89 | return prev_bev -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom_hooks import TransferWeight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class TransferWeight(Hook): 7 | 8 | def __init__(self, every_n_inters=1): 9 | self.every_n_inters=every_n_inters 10 | 11 | def after_train_iter(self, runner): 12 | if self.every_n_inner_iters(runner, self.every_n_inters): 13 | runner.eval_model.load_state_dict(runner.model.state_dict()) 14 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import PerceptionTransformer 2 | from .transformerV2 import PerceptionTransformerV2, PerceptionTransformerBEVEncoder 3 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D 4 | from .temporal_self_attention import TemporalSelfAttention 5 | from .encoder import BEVFormerEncoder, BEVFormerLayer 6 | from .decoder import DetectionTransformerDecoder 7 | from .group_attention import GroupMultiheadAttention 8 | 9 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .epoch_based_runner import EpochBasedRunner_video -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # --------------------------------------------- 3 | # Modified by Zhiqi Li 4 | # --------------------------------------------- 5 | 6 | import os.path as osp 7 | import torch 8 | import mmcv 9 | from mmcv.runner.base_runner import BaseRunner 10 | from mmcv.runner.epoch_based_runner import EpochBasedRunner 11 | from mmcv.runner.builder import RUNNERS 12 | from mmcv.runner.checkpoint import save_checkpoint 13 | from mmcv.runner.utils import get_host_info 14 | from pprint import pprint 15 | from mmcv.parallel.data_container import DataContainer 16 | 17 | 18 | @RUNNERS.register_module() 19 | class EpochBasedRunner_video(EpochBasedRunner): 20 | 21 | ''' 22 | # basic logic 23 | 24 | input_sequence = [a, b, c] # given a sequence of samples 25 | 26 | prev_bev = None 27 | for each in input_sequcene[:-1] 28 | prev_bev = eval_model(each, prev_bev)) # inference only. 29 | 30 | model(input_sequcene[-1], prev_bev) # train the last sample. 31 | ''' 32 | 33 | def __init__(self, 34 | model, 35 | eval_model=None, 36 | batch_processor=None, 37 | optimizer=None, 38 | work_dir=None, 39 | logger=None, 40 | meta=None, 41 | keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], 42 | max_iters=None, 43 | max_epochs=None): 44 | super().__init__(model, 45 | batch_processor, 46 | optimizer, 47 | work_dir, 48 | logger, 49 | meta, 50 | max_iters, 51 | max_epochs) 52 | keys.append('img_metas') 53 | self.keys = keys 54 | self.eval_model = eval_model 55 | self.eval_model.eval() 56 | 57 | def run_iter(self, data_batch, train_mode, **kwargs): 58 | if self.batch_processor is not None: 59 | assert False 60 | # outputs = self.batch_processor( 61 | # self.model, data_batch, train_mode=train_mode, **kwargs) 62 | elif train_mode: 63 | 64 | num_samples = data_batch['img'].data[0].size(1) 65 | data_list = [] 66 | prev_bev = None 67 | for i in range(num_samples): 68 | data = {} 69 | for key in self.keys: 70 | if key not in ['img_metas', 'img', 'points']: 71 | data[key] = data_batch[key] 72 | else: 73 | if key == 'img': 74 | data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True) 75 | elif key == 'img_metas': 76 | data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only) 77 | else: 78 | assert False 79 | data_list.append(data) 80 | with torch.no_grad(): 81 | for i in range(num_samples-1): 82 | if data_list[i]['img_metas'].data[0][0]['prev_bev_exists']: 83 | data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 84 | prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs) 85 | if data_list[-1]['img_metas'].data[0][0]['prev_bev_exists']: 86 | data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 87 | outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs) 88 | else: 89 | assert False 90 | # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) 91 | 92 | if not isinstance(outputs, dict): 93 | raise TypeError('"batch_processor()" or "model.train_step()"' 94 | 'and "model.val_step()" must return a dict') 95 | if 'log_vars' in outputs: 96 | self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) 97 | self.outputs = outputs -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | 3 | __all__ = ['HungarianAssigner3D'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder 2 | 3 | __all__ = ['NMSFreeCoder'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core.bbox import BaseBBoxCoder 4 | from mmdet.core.bbox.builder import BBOX_CODERS 5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox 6 | import numpy as np 7 | 8 | 9 | @BBOX_CODERS.register_module() 10 | class NMSFreeCoder(BaseBBoxCoder): 11 | """Bbox coder for NMS-free detector. 12 | Args: 13 | pc_range (list[float]): Range of point cloud. 14 | post_center_range (list[float]): Limit of the center. 15 | Default: None. 16 | max_num (int): Max number to be kept. Default: 100. 17 | score_threshold (float): Threshold to filter boxes based on score. 18 | Default: None. 19 | code_size (int): Code size of bboxes. Default: 9 20 | """ 21 | 22 | def __init__(self, 23 | pc_range, 24 | voxel_size=None, 25 | post_center_range=None, 26 | max_num=100, 27 | score_threshold=None, 28 | num_classes=10): 29 | self.pc_range = pc_range 30 | self.voxel_size = voxel_size 31 | self.post_center_range = post_center_range 32 | self.max_num = max_num 33 | self.score_threshold = score_threshold 34 | self.num_classes = num_classes 35 | 36 | def encode(self): 37 | 38 | pass 39 | 40 | def decode_single(self, cls_scores, bbox_preds): 41 | """Decode bboxes. 42 | Args: 43 | cls_scores (Tensor): Outputs from the classification head, \ 44 | shape [num_query, cls_out_channels]. Note \ 45 | cls_out_channels should includes background. 46 | bbox_preds (Tensor): Outputs from the regression \ 47 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 48 | Shape [num_query, 9]. 49 | Returns: 50 | list[dict]: Decoded boxes. 51 | """ 52 | max_num = self.max_num 53 | 54 | cls_scores = cls_scores.sigmoid() 55 | scores, indexs = cls_scores.view(-1).topk(max_num) 56 | labels = indexs % self.num_classes 57 | bbox_index = indexs // self.num_classes 58 | bbox_preds = bbox_preds[bbox_index] 59 | 60 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 61 | final_scores = scores 62 | final_preds = labels 63 | 64 | # use score threshold 65 | if self.score_threshold is not None: 66 | thresh_mask = final_scores > self.score_threshold 67 | tmp_score = self.score_threshold 68 | while thresh_mask.sum() == 0: 69 | tmp_score *= 0.9 70 | if tmp_score < 0.01: 71 | thresh_mask = final_scores > -1 72 | break 73 | thresh_mask = final_scores >= tmp_score 74 | 75 | if self.post_center_range is not None: 76 | self.post_center_range = torch.tensor( 77 | self.post_center_range, device=scores.device) 78 | mask = (final_box_preds[..., :3] >= 79 | self.post_center_range[:3]).all(1) 80 | mask &= (final_box_preds[..., :3] <= 81 | self.post_center_range[3:]).all(1) 82 | 83 | if self.score_threshold: 84 | mask &= thresh_mask 85 | 86 | boxes3d = final_box_preds[mask] 87 | scores = final_scores[mask] 88 | 89 | labels = final_preds[mask] 90 | predictions_dict = { 91 | 'bboxes': boxes3d, 92 | 'scores': scores, 93 | 'labels': labels 94 | } 95 | 96 | else: 97 | raise NotImplementedError( 98 | 'Need to reorganize output as a batch, only ' 99 | 'support post_center_range is not None for now!') 100 | return predictions_dict 101 | 102 | def decode(self, preds_dicts): 103 | """Decode bboxes. 104 | Args: 105 | all_cls_scores (Tensor): Outputs from the classification head, \ 106 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 107 | cls_out_channels should includes background. 108 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 109 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 110 | Shape [nb_dec, bs, num_query, 9]. 111 | Returns: 112 | list[dict]: Decoded boxes. 113 | """ 114 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 115 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1] 116 | 117 | batch_size = all_cls_scores.size()[0] 118 | predictions_list = [] 119 | for i in range(batch_size): 120 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i])) 121 | return predictions_list 122 | 123 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost, SmoothL1Cost 3 | 4 | __all__ = ['build_match_cost', 'BBox3DL1Cost', 'SmoothL1Cost'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import mmcv 3 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 4 | 5 | 6 | @MATCH_COST.register_module() 7 | class BBox3DL1Cost(object): 8 | """BBox3DL1Cost. 9 | Args: 10 | weight (int | float, optional): loss_weight 11 | """ 12 | 13 | def __init__(self, weight=1.): 14 | self.weight = weight 15 | 16 | def __call__(self, bbox_pred, gt_bboxes): 17 | """ 18 | Args: 19 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 20 | (cx, cy, w, h), which are all in range [0, 1]. Shape 21 | [num_query, 4]. 22 | gt_bboxes (Tensor): Ground truth boxes with normalized 23 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 24 | Returns: 25 | torch.Tensor: bbox_cost value with weight 26 | """ 27 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 28 | return bbox_cost * self.weight 29 | 30 | @mmcv.jit(derivate=True, coderize=True) 31 | #@weighted_loss 32 | def smooth_l1_loss(pred, target, beta=1.0): 33 | """Smooth L1 loss. 34 | Args: 35 | pred (torch.Tensor): The prediction. 36 | target (torch.Tensor): The learning target of the prediction. 37 | beta (float, optional): The threshold in the piecewise function. 38 | Defaults to 1.0. 39 | Returns: 40 | torch.Tensor: Calculated loss 41 | """ 42 | assert beta > 0 43 | if target.numel() == 0: 44 | return pred.sum() * 0 45 | 46 | # assert pred.size() == target.size() 47 | diff = torch.abs(pred - target) 48 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 49 | diff - 0.5 * beta) 50 | return loss.sum(-1) 51 | 52 | 53 | @MATCH_COST.register_module() 54 | class SmoothL1Cost(object): 55 | """SmoothL1Cost. 56 | Args: 57 | weight (int | float, optional): loss weight 58 | 59 | Examples: 60 | >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost 61 | >>> import torch 62 | >>> self = IoUCost() 63 | >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]]) 64 | >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]]) 65 | >>> self(bboxes, gt_bboxes) 66 | tensor([[-0.1250, 0.1667], 67 | [ 0.1667, -0.5000]]) 68 | """ 69 | 70 | def __init__(self, weight=1.): 71 | self.weight = weight 72 | 73 | def __call__(self, input, target): 74 | """ 75 | Args: 76 | bboxes (Tensor): Predicted boxes with unnormalized coordinates 77 | (x1, y1, x2, y2). Shape [num_query, 4]. 78 | gt_bboxes (Tensor): Ground truth boxes with unnormalized 79 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 80 | 81 | Returns: 82 | torch.Tensor: iou_cost value with weight 83 | """ 84 | N1, C = input.shape 85 | N2, C = target.shape 86 | input = input.contiguous().view(N1, C)[:, None, :] 87 | target = target.contiguous().view(N2, C)[None, :, :] 88 | cost = smooth_l1_loss(input, target) 89 | 90 | return cost * self.weight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def normalize_bbox(bboxes, pc_range): 5 | 6 | cx = bboxes[..., 0:1] 7 | cy = bboxes[..., 1:2] 8 | cz = bboxes[..., 2:3] 9 | w = bboxes[..., 3:4].log() 10 | l = bboxes[..., 4:5].log() 11 | h = bboxes[..., 5:6].log() 12 | 13 | rot = bboxes[..., 6:7] 14 | if bboxes.size(-1) > 7: 15 | vx = bboxes[..., 7:8] 16 | vy = bboxes[..., 8:9] 17 | normalized_bboxes = torch.cat( 18 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1 19 | ) 20 | else: 21 | normalized_bboxes = torch.cat( 22 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1 23 | ) 24 | return normalized_bboxes 25 | 26 | def denormalize_bbox(normalized_bboxes, pc_range): 27 | # rotation 28 | rot_sine = normalized_bboxes[..., 6:7] 29 | 30 | rot_cosine = normalized_bboxes[..., 7:8] 31 | rot = torch.atan2(rot_sine, rot_cosine) 32 | 33 | # center in the bev 34 | cx = normalized_bboxes[..., 0:1] 35 | cy = normalized_bboxes[..., 1:2] 36 | cz = normalized_bboxes[..., 4:5] 37 | 38 | # size 39 | w = normalized_bboxes[..., 2:3] 40 | l = normalized_bboxes[..., 3:4] 41 | h = normalized_bboxes[..., 5:6] 42 | 43 | w = w.exp() 44 | l = l.exp() 45 | h = h.exp() 46 | if normalized_bboxes.size(-1) > 8: 47 | # velocity 48 | vx = normalized_bboxes[:, 8:9] 49 | vy = normalized_bboxes[:, 9:10] 50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 51 | else: 52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 53 | return denormalized_bboxes -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import CustomDistEvalHook -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | 2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, 3 | # in order to avoid strong version dependency, we did not directly 4 | # inherit EvalHook but BaseDistEvalHook. 5 | 6 | import bisect 7 | import os.path as osp 8 | 9 | import mmcv 10 | import torch.distributed as dist 11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 12 | from mmcv.runner import EvalHook as BaseEvalHook 13 | from torch.nn.modules.batchnorm import _BatchNorm 14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook 15 | 16 | 17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list): 18 | assert mmcv.is_list_of(dynamic_interval_list, tuple) 19 | 20 | dynamic_milestones = [0] 21 | dynamic_milestones.extend( 22 | [dynamic_interval[0] for dynamic_interval in dynamic_interval_list]) 23 | dynamic_intervals = [start_interval] 24 | dynamic_intervals.extend( 25 | [dynamic_interval[1] for dynamic_interval in dynamic_interval_list]) 26 | return dynamic_milestones, dynamic_intervals 27 | 28 | 29 | class CustomDistEvalHook(BaseDistEvalHook): 30 | 31 | def __init__(self, *args, dynamic_intervals=None, **kwargs): 32 | super(CustomDistEvalHook, self).__init__(*args, **kwargs) 33 | self.use_dynamic_intervals = dynamic_intervals is not None 34 | if self.use_dynamic_intervals: 35 | self.dynamic_milestones, self.dynamic_intervals = \ 36 | _calc_dynamic_intervals(self.interval, dynamic_intervals) 37 | 38 | def _decide_interval(self, runner): 39 | if self.use_dynamic_intervals: 40 | progress = runner.epoch if self.by_epoch else runner.iter 41 | step = bisect.bisect(self.dynamic_milestones, (progress + 1)) 42 | # Dynamically modify the evaluation interval 43 | self.interval = self.dynamic_intervals[step - 1] 44 | 45 | def before_train_epoch(self, runner): 46 | """Evaluate the model only at the start of training by epoch.""" 47 | self._decide_interval(runner) 48 | super().before_train_epoch(runner) 49 | 50 | def before_train_iter(self, runner): 51 | self._decide_interval(runner) 52 | super().before_train_iter(runner) 53 | 54 | def _do_evaluate(self, runner): 55 | """perform evaluation and save ckpt.""" 56 | # Synchronization of BatchNorm's buffer (running_mean 57 | # and running_var) is not supported in the DDP of pytorch, 58 | # which may cause the inconsistent performance of models in 59 | # different ranks, so we broadcast BatchNorm's buffers 60 | # of rank 0 to other ranks to avoid this. 61 | if self.broadcast_bn_buffer: 62 | model = runner.model 63 | for name, module in model.named_modules(): 64 | if isinstance(module, 65 | _BatchNorm) and module.track_running_stats: 66 | dist.broadcast(module.running_var, 0) 67 | dist.broadcast(module.running_mean, 0) 68 | 69 | if not self._should_evaluate(runner): 70 | return 71 | 72 | tmpdir = self.tmpdir 73 | if tmpdir is None: 74 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 75 | 76 | from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import 77 | 78 | results = custom_multi_gpu_test( 79 | runner.model, 80 | self.dataloader, 81 | tmpdir=tmpdir, 82 | gpu_collect=self.gpu_collect) 83 | if runner.rank == 0: 84 | print('\n') 85 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 86 | 87 | key_score = self.evaluate(runner, results) 88 | 89 | if self.save_best: 90 | self._save_ckpt(runner, key_score) 91 | 92 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .nuscenes_dataset import CustomNuScenesDataset 2 | from .nuscenes_dataset_v2 import CustomNuScenesDatasetV2 3 | 4 | from .builder import custom_build_dataset 5 | __all__ = [ 6 | 'CustomNuScenesDataset', 7 | 'CustomNuScenesDatasetV2', 8 | ] 9 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform_3d import ( 2 | PadMultiViewImage, NormalizeMultiviewImage, 3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage) 4 | from .formating import CustomDefaultFormatBundle3D 5 | from .augmentation import (CropResizeFlipImage, GlobalRotScaleTransImage) 6 | from .dd3d_mapper import DD3DMapper 7 | __all__ = [ 8 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 9 | 'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D', 10 | 'RandomScaleImageMultiViewImage', 11 | 'CropResizeFlipImage', 'GlobalRotScaleTransImage', 12 | 'DD3DMapper', 13 | ] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | import torch 4 | from mmcv.parallel.data_container import DataContainer as DC 5 | from mmdet.datasets.builder import PIPELINES 6 | from projects.mmdet3d_plugin.dd3d.datasets.transform_utils import annotations_to_instances 7 | from projects.mmdet3d_plugin.dd3d.structures.pose import Pose 8 | from projects.mmdet3d_plugin.dd3d.utils.tasks import TaskManager 9 | 10 | 11 | @PIPELINES.register_module() 12 | class DD3DMapper: 13 | def __init__(self, 14 | is_train: bool = True, 15 | tasks=dict(box2d_on=True, box3d_on=True), 16 | ): 17 | self.is_train = is_train 18 | self.task_manager = TaskManager(**tasks) 19 | 20 | def __call__(self, results): 21 | if results['mono_input_dict'] is None: 22 | return results 23 | mono_input_dict = [] 24 | for dataset_dict in results['mono_input_dict']: 25 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below 26 | image_shape = results['img'].data.shape[-2:] 27 | intrinsics = None 28 | if "intrinsics" in dataset_dict: 29 | intrinsics = dataset_dict['intrinsics'] 30 | if not torch.is_tensor(intrinsics): 31 | intrinsics = np.reshape( 32 | intrinsics, 33 | (3, 3), 34 | ).astype(np.float32) 35 | intrinsics = torch.as_tensor(intrinsics) 36 | # NOTE: intrinsics = transforms.apply_intrinsics(intrinsics) 37 | dataset_dict["intrinsics"] = intrinsics 38 | dataset_dict["inv_intrinsics"] = torch.linalg.inv(dataset_dict['intrinsics']) 39 | 40 | if "pose" in dataset_dict: 41 | pose = Pose(wxyz=np.float32(dataset_dict["pose"]["wxyz"]), 42 | tvec=np.float32(dataset_dict["pose"]["tvec"])) 43 | dataset_dict["pose"] = pose 44 | # NOTE: no transforms affect global pose. 45 | 46 | if "extrinsics" in dataset_dict: 47 | extrinsics = Pose( 48 | wxyz=np.float32(dataset_dict["extrinsics"]["wxyz"]), 49 | tvec=np.float32(dataset_dict["extrinsics"]["tvec"]) 50 | ) 51 | dataset_dict["extrinsics"] = extrinsics 52 | 53 | if not self.task_manager.has_detection_task: 54 | dataset_dict.pop("annotations", None) 55 | 56 | if "annotations" in dataset_dict: 57 | for anno in dataset_dict["annotations"]: 58 | if not self.task_manager.has_detection_task: 59 | anno.pop("bbox", None) 60 | anno.pop("bbox_mode", None) 61 | if not self.task_manager.box3d_on: 62 | anno.pop("bbox3d", None) 63 | annos = [anno for anno in dataset_dict["annotations"] if anno.get("iscrowd", 0) == 0] 64 | if annos and 'bbox3d' in annos[0]: 65 | # Remove boxes with negative z-value for center. 66 | annos = [anno for anno in annos if anno['bbox3d'][6] > 0] 67 | 68 | instances = annotations_to_instances( 69 | annos, 70 | image_shape, # TODO: the effect of the shape? 71 | intrinsics=intrinsics.numpy(), 72 | ) 73 | 74 | if self.is_train: 75 | # instances = d2_utils.filter_empty_instances(instances) 76 | m = instances.gt_boxes.nonempty(threshold=1e-5) 77 | instances = instances[m] 78 | annos = [anno for tmp_m, anno in zip(m, annos) if tmp_m] 79 | dataset_dict["instances"] = instances 80 | 81 | dataset_dict['annotations'] = annos 82 | 83 | mono_input_dict.append(dataset_dict) 84 | 85 | # TODO: drop batch that has no annotations? 86 | box_num = 0 87 | for dataset_dict in mono_input_dict: 88 | box_num += dataset_dict["instances"].gt_boxes.tensor.shape[0] 89 | if box_num == 0: 90 | return None 91 | 92 | mono_input_dict = DC(mono_input_dict, cpu_only=True) 93 | results['mono_input_dict'] = mono_input_dict 94 | return results 95 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import numpy as np 4 | from mmcv.parallel import DataContainer as DC 5 | 6 | from mmdet3d.core.bbox import BaseInstance3DBoxes 7 | from mmdet3d.core.points import BasePoints 8 | from mmdet.datasets.builder import PIPELINES 9 | from mmdet.datasets.pipelines import to_tensor 10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D 11 | 12 | @PIPELINES.register_module() 13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D): 14 | """Default formatting bundle. 15 | It simplifies the pipeline of formatting common fields for voxels, 16 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and 17 | "gt_semantic_seg". 18 | These fields are formatted as follows. 19 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 20 | - proposals: (1)to tensor, (2)to DataContainer 21 | - gt_bboxes: (1)to tensor, (2)to DataContainer 22 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer 23 | - gt_labels: (1)to tensor, (2)to DataContainer 24 | """ 25 | 26 | def __call__(self, results): 27 | """Call function to transform and format common fields in results. 28 | Args: 29 | results (dict): Result dict contains the data to convert. 30 | Returns: 31 | dict: The result dict contains the data that is formatted with 32 | default bundle. 33 | """ 34 | # Format 3D data 35 | results = super(CustomDefaultFormatBundle3D, self).__call__(results) 36 | results['gt_map_masks'] = DC( 37 | to_tensor(results['gt_map_masks']), stack=True) 38 | 39 | return results -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/projects/mmdet3d_plugin/datasets/pipelines/loading.py -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_sampler import DistributedGroupSampler 2 | from .distributed_sampler import DistributedSampler 3 | from .sampler import SAMPLER, build_sampler 4 | 5 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | from .sampler import SAMPLER 6 | 7 | 8 | @SAMPLER.register_module() 9 | class DistributedSampler(_DistributedSampler): 10 | 11 | def __init__(self, 12 | dataset=None, 13 | num_replicas=None, 14 | rank=None, 15 | shuffle=True, 16 | seed=0): 17 | super().__init__( 18 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 19 | # for the compatibility from PyTorch 1.3+ 20 | self.seed = seed if seed is not None else 0 21 | 22 | def __iter__(self): 23 | # deterministically shuffle based on epoch 24 | if self.shuffle: 25 | assert False 26 | else: 27 | indices = torch.arange(len(self.dataset)).tolist() 28 | 29 | # add extra samples to make it evenly divisible 30 | # in case that indices is shorter than half of total_size 31 | indices = (indices * 32 | math.ceil(self.total_size / len(indices)))[:self.total_size] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | per_replicas = self.total_size//self.num_replicas 37 | # indices = indices[self.rank:self.total_size:self.num_replicas] 38 | indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas] 39 | assert len(indices) == self.num_samples 40 | 41 | return iter(indices) 42 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/group_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import math 4 | 5 | import numpy as np 6 | import torch 7 | from mmcv.runner import get_dist_info 8 | from torch.utils.data import Sampler 9 | from .sampler import SAMPLER 10 | import random 11 | from IPython import embed 12 | 13 | 14 | @SAMPLER.register_module() 15 | class DistributedGroupSampler(Sampler): 16 | """Sampler that restricts data loading to a subset of the dataset. 17 | It is especially useful in conjunction with 18 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 19 | process can pass a DistributedSampler instance as a DataLoader sampler, 20 | and load a subset of the original dataset that is exclusive to it. 21 | .. note:: 22 | Dataset is assumed to be of constant size. 23 | Arguments: 24 | dataset: Dataset used for sampling. 25 | num_replicas (optional): Number of processes participating in 26 | distributed training. 27 | rank (optional): Rank of the current process within num_replicas. 28 | seed (int, optional): random seed used to shuffle the sampler if 29 | ``shuffle=True``. This number should be identical across all 30 | processes in the distributed group. Default: 0. 31 | """ 32 | 33 | def __init__(self, 34 | dataset, 35 | samples_per_gpu=1, 36 | num_replicas=None, 37 | rank=None, 38 | seed=0): 39 | _rank, _num_replicas = get_dist_info() 40 | if num_replicas is None: 41 | num_replicas = _num_replicas 42 | if rank is None: 43 | rank = _rank 44 | self.dataset = dataset 45 | self.samples_per_gpu = samples_per_gpu 46 | self.num_replicas = num_replicas 47 | self.rank = rank 48 | self.epoch = 0 49 | self.seed = seed if seed is not None else 0 50 | 51 | assert hasattr(self.dataset, 'flag') 52 | self.flag = self.dataset.flag 53 | self.group_sizes = np.bincount(self.flag) 54 | 55 | self.num_samples = 0 56 | for i, j in enumerate(self.group_sizes): 57 | self.num_samples += int( 58 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 59 | self.num_replicas)) * self.samples_per_gpu 60 | self.total_size = self.num_samples * self.num_replicas 61 | 62 | def __iter__(self): 63 | # deterministically shuffle based on epoch 64 | g = torch.Generator() 65 | g.manual_seed(self.epoch + self.seed) 66 | 67 | indices = [] 68 | for i, size in enumerate(self.group_sizes): 69 | if size > 0: 70 | indice = np.where(self.flag == i)[0] 71 | assert len(indice) == size 72 | # add .numpy() to avoid bug when selecting indice in parrots. 73 | # TODO: check whether torch.randperm() can be replaced by 74 | # numpy.random.permutation(). 75 | indice = indice[list( 76 | torch.randperm(int(size), generator=g).numpy())].tolist() 77 | extra = int( 78 | math.ceil( 79 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 80 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 81 | # pad indice 82 | tmp = indice.copy() 83 | for _ in range(extra // size): 84 | indice.extend(tmp) 85 | indice.extend(tmp[:extra % size]) 86 | indices.extend(indice) 87 | 88 | assert len(indices) == self.total_size 89 | 90 | indices = [ 91 | indices[j] for i in list( 92 | torch.randperm( 93 | len(indices) // self.samples_per_gpu, generator=g)) 94 | for j in range(i * self.samples_per_gpu, (i + 1) * 95 | self.samples_per_gpu) 96 | ] 97 | 98 | # subsample 99 | offset = self.num_samples * self.rank 100 | indices = indices[offset:offset + self.num_samples] 101 | assert len(indices) == self.num_samples 102 | 103 | return iter(indices) 104 | 105 | def __len__(self): 106 | return self.num_samples 107 | 108 | def set_epoch(self, epoch): 109 | self.epoch = epoch 110 | 111 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils.registry import Registry, build_from_cfg 2 | 3 | SAMPLER = Registry('sampler') 4 | 5 | 6 | def build_sampler(cfg, default_args): 7 | return build_from_cfg(cfg, SAMPLER, default_args) 8 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .modeling import * -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/projects/mmdet3d_plugin/dd3d/datasets/__init__.py -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/layers/iou_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | # Adapted from AdelaiDet: 3 | # https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/iou_loss.py 4 | import torch 5 | from torch import nn 6 | 7 | 8 | class IOULoss(nn.Module): 9 | """ 10 | Intersetion Over Union (IoU) loss which supports three 11 | different IoU computations: 12 | 13 | * IoU 14 | * Linear IoU 15 | * gIoU 16 | """ 17 | def __init__(self, loc_loss_type='iou'): 18 | super(IOULoss, self).__init__() 19 | self.loc_loss_type = loc_loss_type 20 | 21 | def forward(self, pred, target, weight=None): 22 | """ 23 | Args: 24 | pred: Nx4 predicted bounding boxes 25 | target: Nx4 target bounding boxes 26 | weight: N loss weight for each instance 27 | """ 28 | pred_left = pred[:, 0] 29 | pred_top = pred[:, 1] 30 | pred_right = pred[:, 2] 31 | pred_bottom = pred[:, 3] 32 | 33 | target_left = target[:, 0] 34 | target_top = target[:, 1] 35 | target_right = target[:, 2] 36 | target_bottom = target[:, 3] 37 | 38 | target_aera = (target_left + target_right) * \ 39 | (target_top + target_bottom) 40 | pred_aera = (pred_left + pred_right) * \ 41 | (pred_top + pred_bottom) 42 | 43 | w_intersect = torch.min(pred_left, target_left) + \ 44 | torch.min(pred_right, target_right) 45 | h_intersect = torch.min(pred_bottom, target_bottom) + \ 46 | torch.min(pred_top, target_top) 47 | 48 | g_w_intersect = torch.max(pred_left, target_left) + \ 49 | torch.max(pred_right, target_right) 50 | g_h_intersect = torch.max(pred_bottom, target_bottom) + \ 51 | torch.max(pred_top, target_top) 52 | ac_uion = g_w_intersect * g_h_intersect 53 | 54 | area_intersect = w_intersect * h_intersect 55 | area_union = target_aera + pred_aera - area_intersect 56 | 57 | ious = (area_intersect + 1.0) / (area_union + 1.0) 58 | gious = ious - (ac_uion - area_union) / ac_uion 59 | if self.loc_loss_type == 'iou': 60 | losses = -torch.log(ious) 61 | elif self.loc_loss_type == 'linear_iou': 62 | losses = 1 - ious 63 | elif self.loc_loss_type == 'giou': 64 | losses = 1 - gious 65 | else: 66 | raise NotImplementedError 67 | 68 | if weight is not None: 69 | return (losses * weight).sum() 70 | else: 71 | return losses.sum() 72 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/layers/normalization.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | # Adapted from AdelaiDet 3 | # https://github.com/aim-uofa/AdelaiDet/ 4 | import logging 5 | 6 | import torch 7 | from torch import nn 8 | 9 | LOG = logging.getLogger(__name__) 10 | 11 | 12 | class Scale(nn.Module): 13 | def __init__(self, init_value=1.0): 14 | super(Scale, self).__init__() 15 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 16 | 17 | def forward(self, input): 18 | return input * self.scale 19 | 20 | 21 | class Offset(nn.Module): 22 | def __init__(self, init_value=0.): 23 | super(Offset, self).__init__() 24 | self.bias = nn.Parameter(torch.FloatTensor([init_value])) 25 | 26 | def forward(self, input): 27 | return input + self.bias 28 | 29 | 30 | class ModuleListDial(nn.ModuleList): 31 | def __init__(self, modules=None): 32 | super(ModuleListDial, self).__init__(modules) 33 | self.cur_position = 0 34 | 35 | def forward(self, x): 36 | result = self[self.cur_position](x) 37 | self.cur_position += 1 38 | if self.cur_position >= len(self): 39 | self.cur_position = 0 40 | return result 41 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Copyright 2021 Toyota Research Institute. All rights reserved. 3 | # Adapted from fvcore: 4 | # https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py 5 | 6 | import torch 7 | 8 | 9 | def smooth_l1_loss(input: torch.Tensor, target: torch.Tensor, beta: float, reduction: str = "none") -> torch.Tensor: 10 | """ 11 | Smooth L1 loss defined in the Fast R-CNN paper as: 12 | 13 | | 0.5 * x ** 2 / beta if abs(x) < beta 14 | smoothl1(x) = | 15 | | abs(x) - 0.5 * beta otherwise, 16 | 17 | where x = input - target. 18 | 19 | Smooth L1 loss is related to Huber loss, which is defined as: 20 | 21 | | 0.5 * x ** 2 if abs(x) < beta 22 | huber(x) = | 23 | | beta * (abs(x) - 0.5 * beta) otherwise 24 | 25 | Smooth L1 loss is equal to huber(x) / beta. This leads to the following 26 | differences: 27 | 28 | - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss 29 | converges to a constant 0 loss. 30 | - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss 31 | converges to L2 loss. 32 | - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant 33 | slope of 1. For Huber loss, the slope of the L1 segment is beta. 34 | 35 | Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta 36 | portion replaced with a quadratic function such that at abs(x) = beta, its 37 | slope is 1. The quadratic segment smooths the L1 loss near x = 0. 38 | 39 | Args: 40 | input (Tensor): input tensor of any shape 41 | target (Tensor): target value tensor with the same shape as input 42 | beta (float): L1 to L2 change point. 43 | For beta values < 1e-5, L1 loss is computed. 44 | reduction: 'none' | 'mean' | 'sum' 45 | 'none': No reduction will be applied to the output. 46 | 'mean': The output will be averaged. 47 | 'sum': The output will be summed. 48 | 49 | Returns: 50 | The loss with the reduction option applied. 51 | 52 | Note: 53 | PyTorch's builtin "Smooth L1 loss" implementation does not actually 54 | implement Smooth L1 loss, nor does it implement Huber loss. It implements 55 | the special case of both in which they are equal (beta=1). 56 | See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss. 57 | """ 58 | # (dennis.park) Make it work with mixed precision training. 59 | beta = torch.as_tensor(beta).to(input.dtype) 60 | if beta < 1e-5: 61 | # if beta == 0, then torch.where will result in nan gradients when 62 | # the chain rule is applied due to pytorch implementation details 63 | # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of 64 | # zeros, rather than "no gradient"). To avoid this issue, we define 65 | # small values of beta to be exactly l1 loss. 66 | loss = torch.abs(input - target) 67 | else: 68 | n = torch.abs(input - target) 69 | cond = n < beta 70 | a = 0.5 * n**2 71 | b = n - 0.5 * beta 72 | a, b = a.to(input.dtype), b.to(input.dtype) 73 | loss = torch.where(cond, a, b) 74 | # loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 75 | 76 | if reduction == "mean": 77 | loss = loss.mean() 78 | elif reduction == "sum": 79 | loss = loss.sum() 80 | return loss 81 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .nuscenes_dd3d import NuscenesDD3D -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | import logging 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from projects.mmdet3d_plugin.dd3d.layers.smooth_l1_loss import smooth_l1_loss 8 | 9 | LOG = logging.getLogger(__name__) 10 | 11 | 12 | class DisentangledBox3DLoss(nn.Module): 13 | def __init__(self, smooth_l1_loss_beta, max_loss_per_group): 14 | super().__init__() 15 | self.smooth_l1_loss_beta = smooth_l1_loss_beta 16 | self.max_loss_per_group = max_loss_per_group 17 | 18 | def forward(self, box3d_pred, box3d_targets, locations, weights=None): 19 | 20 | box3d_pred = box3d_pred.to(torch.float32) 21 | box3d_targets = box3d_targets.to(torch.float32) 22 | 23 | target_corners = box3d_targets.corners 24 | 25 | disentangled_losses = {} 26 | for component_key in ["quat", "proj_ctr", "depth", "size"]: 27 | disentangled_boxes = box3d_targets.clone() 28 | setattr(disentangled_boxes, component_key, getattr(box3d_pred, component_key)) 29 | pred_corners = disentangled_boxes.to(torch.float32).corners 30 | 31 | loss = smooth_l1_loss(pred_corners, target_corners, beta=self.smooth_l1_loss_beta) 32 | 33 | # Bound the loss 34 | loss.clamp(max=self.max_loss_per_group) 35 | 36 | if weights is not None: 37 | # loss = torch.sum(loss.reshape(-1, 24) * weights.unsqueeze(-1)) 38 | loss = torch.sum(loss.reshape(-1, 24).mean(dim=1) * weights) 39 | else: 40 | loss = loss.reshape(-1, 24).mean() 41 | 42 | disentangled_losses["loss_box3d_" + component_key] = loss 43 | 44 | entangled_l1_dist = (target_corners - box3d_pred.corners).detach().abs().reshape(-1, 24).mean(dim=1) 45 | 46 | return disentangled_losses, entangled_l1_dist 47 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | from .image_list import ImageList 3 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/utils/comm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | import logging 3 | from functools import wraps 4 | 5 | import torch.distributed as dist 6 | 7 | from detectron2.utils import comm as d2_comm 8 | 9 | LOG = logging.getLogger(__name__) 10 | 11 | _NESTED_BROADCAST_FROM_MASTER = False 12 | 13 | 14 | def is_distributed(): 15 | return d2_comm.get_world_size() > 1 16 | 17 | 18 | def broadcast_from_master(fn): 19 | """If distributed, only the master executes the function and broadcast the results to other workers. 20 | 21 | Usage: 22 | @broadcast_from_master 23 | def foo(a, b): ... 24 | """ 25 | @wraps(fn) 26 | def wrapper(*args, **kwargs): # pylint: disable=unused-argument 27 | global _NESTED_BROADCAST_FROM_MASTER 28 | 29 | if not is_distributed(): 30 | return fn(*args, **kwargs) 31 | 32 | if _NESTED_BROADCAST_FROM_MASTER: 33 | assert d2_comm.is_main_process() 34 | LOG.warning(f"_NESTED_BROADCAST_FROM_MASTER = True, {fn.__name__}") 35 | return fn(*args, **kwargs) 36 | 37 | if d2_comm.is_main_process(): 38 | _NESTED_BROADCAST_FROM_MASTER = True 39 | ret = [fn(*args, **kwargs), ] 40 | _NESTED_BROADCAST_FROM_MASTER = False 41 | else: 42 | ret = [None, ] 43 | if dist.is_initialized(): 44 | dist.broadcast_object_list(ret) 45 | ret = ret[0] 46 | 47 | assert ret is not None 48 | return ret 49 | 50 | return wrapper 51 | 52 | 53 | def master_only(fn): 54 | """If distributed, only the master executes the function. 55 | 56 | Usage: 57 | @master_only 58 | def foo(a, b): ... 59 | """ 60 | @wraps(fn) 61 | def wrapped_fn(*args, **kwargs): 62 | if d2_comm.is_main_process(): 63 | ret = fn(*args, **kwargs) 64 | d2_comm.synchronize() 65 | if d2_comm.is_main_process(): 66 | return ret 67 | 68 | return wrapped_fn 69 | 70 | 71 | def gather_dict(dikt): 72 | """Gather python dictionaries from all workers to the rank=0 worker. 73 | 74 | Assumption: the keys of `dikt` are disjoint across all workers. 75 | 76 | If rank = 0, then returned aggregated dict. 77 | If rank > 0, then return `None`. 78 | """ 79 | dict_lst = d2_comm.gather(dikt, dst=0) 80 | if d2_comm.is_main_process(): 81 | gathered_dict = {} 82 | for dic in dict_lst: 83 | for k in dic.keys(): 84 | assert k not in gathered_dict, f"Dictionary key overlaps: {k}" 85 | gathered_dict.update(dic) 86 | return gathered_dict 87 | else: 88 | return None 89 | 90 | 91 | def reduce_sum(tensor): 92 | """ 93 | Adapted from AdelaiDet: 94 | https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py 95 | """ 96 | if not is_distributed(): 97 | return tensor 98 | tensor = tensor.clone() 99 | dist.all_reduce(tensor, op=dist.ReduceOp.SUM) 100 | return tensor 101 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/utils/tasks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | from collections import OrderedDict 3 | 4 | # from detectron2.config import configurable 5 | 6 | 7 | class Task(): 8 | def __init__(self, name, is_detection_task, is_dense_prediction_task): 9 | self.name = name 10 | self.is_detection_task = is_detection_task 11 | self.is_dense_prediction_task = is_dense_prediction_task 12 | 13 | 14 | # yapf: disable 15 | TASKS = [ 16 | Task( 17 | name="box2d", 18 | is_detection_task=True, 19 | is_dense_prediction_task=False, 20 | ), 21 | Task( 22 | name="box3d", 23 | is_detection_task=True, 24 | is_dense_prediction_task=False, 25 | ), 26 | Task( 27 | name="depth", 28 | is_detection_task=False, 29 | is_dense_prediction_task=True, 30 | ) 31 | ] 32 | # yapf: enable 33 | 34 | NAME_TO_TASK = OrderedDict([(task.name, task) for task in TASKS]) 35 | 36 | 37 | class TaskManager(): 38 | #@configurable 39 | def __init__(self, box2d_on=False, box3d_on=False, depth_on=False): 40 | """ 41 | configurable is experimental. 42 | """ 43 | self._box2d_on = self._mask2d_on = self._box3d_on = self._semseg2d_on = self._depth_on = False 44 | tasks = [] 45 | if box2d_on: 46 | tasks.append(NAME_TO_TASK['box2d']) 47 | self._box2d_on = True 48 | if box3d_on: 49 | tasks.append(NAME_TO_TASK['box3d']) 50 | self._box3d_on = True 51 | if depth_on: 52 | tasks.append(NAME_TO_TASK['depth']) 53 | self._depth_on = True 54 | 55 | if not tasks: 56 | raise ValueError("No task specified.") 57 | 58 | self._tasks = tasks 59 | 60 | @property 61 | def tasks(self): 62 | return self._tasks 63 | 64 | '''@classmethod 65 | def from_config(cls, cfg): 66 | # yapf: disable 67 | return OrderedDict( 68 | box2d_on = cfg.MODEL.BOX2D_ON, 69 | box3d_on = cfg.MODEL.BOX3D_ON, 70 | depth_on = cfg.MODEL.DEPTH_ON, 71 | ) 72 | # yapf: enable''' 73 | 74 | # Indicators that tells if each task is enabled. 75 | @property 76 | def box2d_on(self): 77 | return self._box2d_on 78 | 79 | @property 80 | def box3d_on(self): 81 | return self._box3d_on 82 | 83 | @property 84 | def depth_on(self): 85 | return self._depth_on 86 | 87 | @property 88 | def has_dense_prediction_task(self): 89 | return any([task.is_dense_prediction_task for task in self.tasks]) 90 | 91 | @property 92 | def has_detection_task(self): 93 | return any([task.is_detection_task for task in self.tasks]) 94 | 95 | @property 96 | def task_names(self): 97 | return [task.name for task in self.tasks] 98 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/dd3d/utils/tensor2d.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | def compute_features_locations(h, w, stride, dtype=torch.float32, device='cpu', offset="none"): 7 | """Adapted from AdelaiDet: 8 | https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py 9 | 10 | Key differnece: offset is configurable. 11 | """ 12 | shifts_x = torch.arange(0, w * stride, step=stride, dtype=dtype, device=device) 13 | shifts_y = torch.arange(0, h * stride, step=stride, dtype=dtype, device=device) 14 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) 15 | shift_x = shift_x.reshape(-1) 16 | shift_y = shift_y.reshape(-1) 17 | # (dennis.park) 18 | # locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2 19 | locations = torch.stack((shift_x, shift_y), dim=1) 20 | if offset == "half": 21 | locations += stride // 2 22 | else: 23 | assert offset == "none" 24 | 25 | return locations 26 | 27 | 28 | def aligned_bilinear(tensor, factor, offset="none"): 29 | """Adapted from AdelaiDet: 30 | https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py 31 | """ 32 | assert tensor.dim() == 4 33 | assert factor >= 1 34 | assert int(factor) == factor 35 | 36 | if factor == 1: 37 | return tensor 38 | 39 | h, w = tensor.size()[2:] 40 | tensor = F.pad(tensor, pad=(0, 1, 0, 1), mode="replicate") 41 | oh = factor * h + 1 42 | ow = factor * w + 1 43 | tensor = F.interpolate(tensor, size=(oh, ow), mode='bilinear', align_corners=True) 44 | if offset == "half": 45 | tensor = F.pad(tensor, pad=(factor // 2, 0, factor // 2, 0), mode="replicate") 46 | 47 | return tensor[:, :, :oh - 1, :ow - 1] 48 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vovnet import VoVNet 2 | 3 | __all__ = ['VoVNet'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import GradChecker -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class GradChecker(Hook): 7 | 8 | def after_train_iter(self, runner): 9 | for key, val in runner.model.named_parameters(): 10 | if val.grad == None and val.requires_grad: 11 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) 12 | 13 | 14 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | from .adamw import AdamW2 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .bricks import run_time 3 | from .grid_mask import GridMask 4 | from .position_embedding import RelPositionEmbedding 5 | from .visual import save_tensor -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/bricks.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import time 3 | from collections import defaultdict 4 | import torch 5 | time_maps = defaultdict(lambda :0.) 6 | count_maps = defaultdict(lambda :0.) 7 | def run_time(name): 8 | def middle(fn): 9 | def wrapper(*args, **kwargs): 10 | torch.cuda.synchronize() 11 | start = time.time() 12 | res = fn(*args, **kwargs) 13 | torch.cuda.synchronize() 14 | time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start 15 | count_maps['%s : %s'%(name, fn.__name__) ] +=1 16 | print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] )) 17 | return res 18 | return wrapper 19 | return middle 20 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/grid_mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from PIL import Image 5 | from mmcv.runner import force_fp32, auto_fp16 6 | 7 | class Grid(object): 8 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 9 | self.use_h = use_h 10 | self.use_w = use_w 11 | self.rotate = rotate 12 | self.offset = offset 13 | self.ratio = ratio 14 | self.mode=mode 15 | self.st_prob = prob 16 | self.prob = prob 17 | 18 | def set_prob(self, epoch, max_epoch): 19 | self.prob = self.st_prob * epoch / max_epoch 20 | 21 | def __call__(self, img, label): 22 | if np.random.rand() > self.prob: 23 | return img, label 24 | h = img.size(1) 25 | w = img.size(2) 26 | self.d1 = 2 27 | self.d2 = min(h, w) 28 | hh = int(1.5*h) 29 | ww = int(1.5*w) 30 | d = np.random.randint(self.d1, self.d2) 31 | if self.ratio == 1: 32 | self.l = np.random.randint(1, d) 33 | else: 34 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 35 | mask = np.ones((hh, ww), np.float32) 36 | st_h = np.random.randint(d) 37 | st_w = np.random.randint(d) 38 | if self.use_h: 39 | for i in range(hh//d): 40 | s = d*i + st_h 41 | t = min(s+self.l, hh) 42 | mask[s:t,:] *= 0 43 | if self.use_w: 44 | for i in range(ww//d): 45 | s = d*i + st_w 46 | t = min(s+self.l, ww) 47 | mask[:,s:t] *= 0 48 | 49 | r = np.random.randint(self.rotate) 50 | mask = Image.fromarray(np.uint8(mask)) 51 | mask = mask.rotate(r) 52 | mask = np.asarray(mask) 53 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 54 | 55 | mask = torch.from_numpy(mask).float() 56 | if self.mode == 1: 57 | mask = 1-mask 58 | 59 | mask = mask.expand_as(img) 60 | if self.offset: 61 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 62 | offset = (1 - mask) * offset 63 | img = img * mask + offset 64 | else: 65 | img = img * mask 66 | 67 | return img, label 68 | 69 | 70 | class GridMask(nn.Module): 71 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 72 | super(GridMask, self).__init__() 73 | self.use_h = use_h 74 | self.use_w = use_w 75 | self.rotate = rotate 76 | self.offset = offset 77 | self.ratio = ratio 78 | self.mode = mode 79 | self.st_prob = prob 80 | self.prob = prob 81 | self.fp16_enable = False 82 | def set_prob(self, epoch, max_epoch): 83 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 84 | @auto_fp16() 85 | def forward(self, x): 86 | if np.random.rand() > self.prob or not self.training: 87 | return x 88 | n,c,h,w = x.size() 89 | x = x.view(-1,h,w) 90 | hh = int(1.5*h) 91 | ww = int(1.5*w) 92 | d = np.random.randint(2, h) 93 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 94 | mask = np.ones((hh, ww), np.float32) 95 | st_h = np.random.randint(d) 96 | st_w = np.random.randint(d) 97 | if self.use_h: 98 | for i in range(hh//d): 99 | s = d*i + st_h 100 | t = min(s+self.l, hh) 101 | mask[s:t,:] *= 0 102 | if self.use_w: 103 | for i in range(ww//d): 104 | s = d*i + st_w 105 | t = min(s+self.l, ww) 106 | mask[:,s:t] *= 0 107 | 108 | r = np.random.randint(self.rotate) 109 | mask = Image.fromarray(np.uint8(mask)) 110 | mask = mask.rotate(r) 111 | mask = np.asarray(mask) 112 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 113 | 114 | mask = torch.from_numpy(mask).to(x.dtype).cuda() 115 | if self.mode == 1: 116 | mask = 1-mask 117 | mask = mask.expand_as(x) 118 | if self.offset: 119 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda() 120 | x = x * mask + offset * (1 - mask) 121 | else: 122 | x = x * mask 123 | 124 | return x.view(n,c,h,w) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/position_embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | class RelPositionEmbedding(nn.Module): 6 | def __init__(self, num_pos_feats=64, pos_norm=True): 7 | super().__init__() 8 | self.num_pos_feats = num_pos_feats 9 | self.fc = nn.Linear(4, self.num_pos_feats,bias=False) 10 | #nn.init.orthogonal_(self.fc.weight) 11 | #self.fc.weight.requires_grad = False 12 | self.pos_norm = pos_norm 13 | if self.pos_norm: 14 | self.norm = nn.LayerNorm(self.num_pos_feats) 15 | def forward(self, tensor): 16 | #mask = nesttensor.mask 17 | B,C,H,W = tensor.shape 18 | #print('tensor.shape', tensor.shape) 19 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device) 20 | #y_axis = torch.stack((y_range, 1-y_range),dim=1) 21 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1) 22 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2) 23 | 24 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device) 25 | #x_axis =torch.stack((x_range,1-x_range),dim=1) 26 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1) 27 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2) 28 | x_pos = torch.cat((y_axis, x_axis), dim=1) 29 | x_pos = self.fc(x_pos) 30 | 31 | if self.pos_norm: 32 | x_pos = self.norm(x_pos) 33 | #print('xpos,', x_pos.max(),x_pos.min()) 34 | return x_pos -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/visual.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.utils import make_grid 3 | import torchvision 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | 7 | 8 | def convert_color(img_path): 9 | plt.figure() 10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis')) 12 | plt.close() 13 | 14 | 15 | def save_tensor(tensor, path, pad_value=254.0,): 16 | print('save_tensor', path) 17 | tensor = tensor.to(torch.float).detach().cpu() 18 | if tensor.type() == 'torch.BoolTensor': 19 | tensor = tensor*255 20 | if len(tensor.shape) == 3: 21 | tensor = tensor.unsqueeze(1) 22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy() 23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path) 24 | convert_color(path) 25 | -------------------------------------------------------------------------------- /tools/analysis_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fundamentalvision/BEVFormer/66b65f3a1f58caf0507cb2a971b9c0e7f842376c/tools/analysis_tools/__init__.py -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | import torch 5 | from mmcv import Config 6 | from mmcv.parallel import MMDataParallel 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model 8 | import sys 9 | sys.path.append('.') 10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader 11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset 12 | # from mmdet3d.datasets import build_dataloader, build_dataset 13 | from mmdet3d.models import build_detector 14 | #from tools.misc.fuse_conv_bn import fuse_module 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 19 | parser.add_argument('config', help='test config file path') 20 | parser.add_argument('--checkpoint', default=None, help='checkpoint file') 21 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 22 | parser.add_argument( 23 | '--log-interval', default=50, help='interval of logging') 24 | parser.add_argument( 25 | '--fuse-conv-bn', 26 | action='store_true', 27 | help='Whether to fuse conv and bn, this will slightly increase' 28 | 'the inference speed') 29 | args = parser.parse_args() 30 | return args 31 | 32 | 33 | def main(): 34 | args = parse_args() 35 | 36 | cfg = Config.fromfile(args.config) 37 | # set cudnn_benchmark 38 | if cfg.get('cudnn_benchmark', False): 39 | torch.backends.cudnn.benchmark = True 40 | cfg.model.pretrained = None 41 | cfg.data.test.test_mode = True 42 | 43 | # build the dataloader 44 | # TODO: support multiple images per gpu (only minor changes are needed) 45 | print(cfg.data.test) 46 | dataset = custom_build_dataset(cfg.data.test) 47 | data_loader = build_dataloader( 48 | dataset, 49 | samples_per_gpu=1, 50 | workers_per_gpu=cfg.data.workers_per_gpu, 51 | dist=False, 52 | shuffle=False) 53 | 54 | # build the model and load checkpoint 55 | cfg.model.train_cfg = None 56 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 57 | fp16_cfg = cfg.get('fp16', None) 58 | if fp16_cfg is not None: 59 | wrap_fp16_model(model) 60 | if args.checkpoint is not None: 61 | load_checkpoint(model, args.checkpoint, map_location='cpu') 62 | #if args.fuse_conv_bn: 63 | # model = fuse_module(model) 64 | 65 | model = MMDataParallel(model, device_ids=[0]) 66 | 67 | model.eval() 68 | 69 | # the first several iterations may be very slow so skip them 70 | num_warmup = 5 71 | pure_inf_time = 0 72 | 73 | # benchmark with several samples and take the average 74 | for i, data in enumerate(data_loader): 75 | torch.cuda.synchronize() 76 | start_time = time.perf_counter() 77 | with torch.no_grad(): 78 | model(return_loss=False, rescale=True, **data) 79 | 80 | torch.cuda.synchronize() 81 | elapsed = time.perf_counter() - start_time 82 | 83 | if i >= num_warmup: 84 | pure_inf_time += elapsed 85 | if (i + 1) % args.log_interval == 0: 86 | fps = (i + 1 - num_warmup) / pure_inf_time 87 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 88 | f'fps: {fps:.1f} img / s') 89 | 90 | if (i + 1) == args.samples: 91 | pure_inf_time += elapsed 92 | fps = (i + 1 - num_warmup) / pure_inf_time 93 | print(f'Overall fps: {fps:.1f} img / s') 94 | break 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | file_path = './ckpts/bevformer_v4.pth' 3 | model = torch.load(file_path, map_location='cpu') 4 | all = 0 5 | for key in list(model['state_dict'].keys()): 6 | all += model['state_dict'][key].nelement() 7 | print(all) 8 | 9 | # smaller 63374123 10 | # v4 69140395 11 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /tools/data_converter/lyft_data_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import numpy as np 4 | import os 5 | 6 | 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'): 8 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa 9 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin' 10 | root_folder = os.path.join(root_folder, f'{version}-train') 11 | lidar_path = os.path.join(root_folder, lidar_path) 12 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \ 13 | f'dataset and make sure {lidar_path} is present.' 14 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1) 15 | try: 16 | points.reshape([-1, 5]) 17 | print(f'This fix is not required for version {version}.') 18 | except ValueError: 19 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32') 20 | new_points.tofile(lidar_path) 21 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.') 22 | 23 | 24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser') 25 | parser.add_argument( 26 | '--root-folder', 27 | type=str, 28 | default='./data/lyft', 29 | help='specify the root path of Lyft dataset') 30 | parser.add_argument( 31 | '--version', 32 | type=str, 33 | default='v1.01', 34 | help='specify Lyft dataset version') 35 | args = parser.parse_args() 36 | 37 | if __name__ == '__main__': 38 | fix_lyft(root_folder=args.root_folder, version=args.version) 39 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29503} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox 11 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28509} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /tools/fp16/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28508} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from mmcv.runner import save_checkpoint 5 | from torch import nn as nn 6 | 7 | from mmdet.apis import init_model 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """During inference, the functionary of batch norm layers is turned off but 12 | only the mean and var alone channels are used, which exposes the chance to 13 | fuse it with the preceding conv layers to save computations and simplify 14 | network structures.""" 15 | conv_w = conv.weight 16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 17 | bn.running_mean) 18 | 19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 20 | conv.weight = nn.Parameter(conv_w * 21 | factor.reshape([conv.out_channels, 1, 1, 1])) 22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 23 | return conv 24 | 25 | 26 | def fuse_module(m): 27 | last_conv = None 28 | last_conv_name = None 29 | 30 | for name, child in m.named_children(): 31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 32 | if last_conv is None: # only fuse BN that is after Conv 33 | continue 34 | fused_conv = fuse_conv_bn(last_conv, child) 35 | m._modules[last_conv_name] = fused_conv 36 | # To reduce changes, set BN as Identity instead of deleting it. 37 | m._modules[name] = nn.Identity() 38 | last_conv = None 39 | elif isinstance(child, nn.Conv2d): 40 | last_conv = child 41 | last_conv_name = name 42 | else: 43 | fuse_module(child) 44 | return m 45 | 46 | 47 | def parse_args(): 48 | parser = argparse.ArgumentParser( 49 | description='fuse Conv and BN layers in a model') 50 | parser.add_argument('config', help='config file path') 51 | parser.add_argument('checkpoint', help='checkpoint file path') 52 | parser.add_argument('out', help='output path of the converted model') 53 | args = parser.parse_args() 54 | return args 55 | 56 | 57 | def main(): 58 | args = parse_args() 59 | # build the model from a config file and a checkpoint file 60 | model = init_model(args.config, args.checkpoint) 61 | # fuse conv and bn layers of the model 62 | fused_model = fuse_module(model) 63 | save_checkpoint(fused_model, args.out) 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import mmcv 4 | from mmcv import Config 5 | 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 26 | subprocess.Popen(['mv', out_file, final_file]) 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | process_checkpoint(args.in_file, args.out_file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from collections import OrderedDict 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | --------------------------------------------------------------------------------