├── LICENSE ├── README.md ├── assets ├── architecture.png ├── placeholder └── pred_by_OSP.gif ├── docs └── getting_started.md ├── figs ├── leaderboard-06-10-2023.png ├── mask.jpg ├── mask_camera.jpg ├── mask_lidar.jpg ├── mask_none.jpg └── occupanc_1.gif ├── projects ├── __init__.py ├── __pycache__ │ └── __init__.cpython-37.pyc ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── coco_instance.py │ │ │ ├── kitti-3d-3class.py │ │ │ ├── kitti-3d-car.py │ │ │ ├── lyft-3d.py │ │ │ ├── nuim_instance.py │ │ │ ├── nus-3d.py │ │ │ ├── nus-mono3d.py │ │ │ ├── range100_lyft-3d.py │ │ │ ├── s3dis-3d-5class.py │ │ │ ├── s3dis_seg-3d-13class.py │ │ │ ├── scannet-3d-18class.py │ │ │ ├── scannet_seg-3d-20class.py │ │ │ ├── sunrgbd-3d-10class.py │ │ │ ├── waymoD5-3d-3class.py │ │ │ └── waymoD5-3d-car.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── 3dssd.py │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ │ ├── fcos3d.py │ │ │ ├── groupfree3d.py │ │ │ ├── h3dnet.py │ │ │ ├── hv_pointpillars_fpn_lyft.py │ │ │ ├── hv_pointpillars_fpn_nus.py │ │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ │ ├── hv_second_secfpn_kitti.py │ │ │ ├── hv_second_secfpn_waymo.py │ │ │ ├── imvotenet_image.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── paconv_cuda_ssg.py │ │ │ ├── paconv_ssg.py │ │ │ ├── parta2.py │ │ │ ├── pointnet2_msg.py │ │ │ ├── pointnet2_ssg.py │ │ │ └── votenet.py │ │ └── schedules │ │ │ ├── cosine.py │ │ │ ├── cyclic_20e.py │ │ │ ├── cyclic_40e.py │ │ │ ├── mmdet_schedule_1x.py │ │ │ ├── schedule_2x.py │ │ │ ├── schedule_3x.py │ │ │ ├── seg_cosine_150e.py │ │ │ ├── seg_cosine_200e.py │ │ │ └── seg_cosine_50e.py │ ├── datasets │ │ └── custom_nus-3d.py │ └── osp │ │ ├── osp.py │ │ └── osp_minibatch.py └── mmdet3d_plugin │ ├── __init__.py │ ├── __pycache__ │ └── __init__.cpython-37.pyc │ ├── bevformer │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-37.pyc │ ├── apis │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── mmdet_train.cpython-37.pyc │ │ │ ├── test.cpython-37.pyc │ │ │ └── train.cpython-37.pyc │ │ ├── mmdet_train.py │ │ ├── test.py │ │ └── train.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── bevformer_occ_head.cpython-37.pyc │ │ └── bevformer_occ_head.py │ ├── detectors │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── bevformer_occ.cpython-37.pyc │ │ └── bevformer_occ.py │ ├── hooks │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── custom_hooks.cpython-37.pyc │ │ └── custom_hooks.py │ ├── modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── custom_base_transformer_layer.cpython-37.pyc │ │ │ ├── decoder.cpython-37.pyc │ │ │ ├── encoder.cpython-37.pyc │ │ │ ├── multi_scale_deformable_attn_function.cpython-37.pyc │ │ │ ├── nus_param.cpython-37.pyc │ │ │ ├── occ_spatial_cross_attention.cpython-37.pyc │ │ │ ├── positional_encoding.cpython-37.pyc │ │ │ ├── spatial_cross_attention.cpython-37.pyc │ │ │ ├── temporal_self_attention.cpython-37.pyc │ │ │ ├── transformer.cpython-37.pyc │ │ │ └── transformer_occ.cpython-37.pyc │ │ ├── custom_base_transformer_layer.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── multi_scale_deformable_attn_function.py │ │ ├── nus_param.py │ │ ├── occ_spatial_cross_attention.py │ │ ├── positional_encoding.py │ │ ├── spatial_cross_attention.py │ │ ├── temporal_self_attention.py │ │ ├── transformer.py │ │ └── transformer_occ.py │ └── runner │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── epoch_based_runner.cpython-37.pyc │ │ └── epoch_based_runner.py │ ├── core │ ├── bbox │ │ ├── __pycache__ │ │ │ └── util.cpython-37.pyc │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ └── hungarian_assigner_3d.cpython-37.pyc │ │ │ └── hungarian_assigner_3d.py │ │ ├── coders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ └── nms_free_coder.cpython-37.pyc │ │ │ └── nms_free_coder.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ └── match_cost.cpython-37.pyc │ │ │ └── match_cost.py │ │ └── util.py │ └── evaluation │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── eval_hooks.cpython-37.pyc │ │ └── eval_hooks.py │ ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── builder.cpython-37.pyc │ │ ├── nuscenes_dataset.cpython-37.pyc │ │ ├── nuscenes_occ.cpython-37.pyc │ │ ├── nuscnes_eval.cpython-37.pyc │ │ └── occ_metrics.cpython-37.pyc │ ├── builder.py │ ├── nuscenes_dataset.py │ ├── nuscenes_occ.py │ ├── nuscnes_eval.py │ ├── occ_metrics.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── formating.cpython-37.pyc │ │ │ ├── loading.cpython-37.pyc │ │ │ └── transform_3d.cpython-37.pyc │ │ ├── formating.py │ │ ├── loading.py │ │ └── transform_3d.py │ └── samplers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── distributed_sampler.cpython-37.pyc │ │ ├── group_sampler.cpython-37.pyc │ │ └── sampler.cpython-37.pyc │ │ ├── distributed_sampler.py │ │ ├── group_sampler.py │ │ └── sampler.py │ └── models │ ├── backbones │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── vovnet.cpython-37.pyc │ └── vovnet.py │ ├── hooks │ ├── __init__.py │ └── hooks.py │ ├── opt │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── adamw.cpython-37.pyc │ └── adamw.py │ └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── bricks.cpython-37.pyc │ ├── grid_mask.cpython-37.pyc │ ├── position_embedding.cpython-37.pyc │ ├── positional_encoding.cpython-37.pyc │ └── visual.cpython-37.pyc │ ├── bricks.py │ ├── grid_mask.py │ ├── position_embedding.py │ ├── positional_encoding.py │ └── visual.py ├── test.sh ├── tools ├── analysis_tools │ ├── __init__.py │ ├── analyze_logs.py │ ├── benchmark.py │ ├── get_flops.py │ ├── get_params.py │ └── visual.py ├── benchmark.py ├── create_data.py ├── create_data.sh ├── data_converter │ ├── __init__.py │ ├── create_gt_database.py │ ├── nuscenes_converter.py │ └── nuscenes_occ_converter.py ├── dist_benchmark.sh ├── dist_test.sh ├── dist_train.sh ├── fp16 │ ├── dist_train.sh │ └── train.py ├── misc │ ├── browse_dataset.py │ ├── fuse_conv_bn.py │ ├── print_config.py │ └── visualize_results.py ├── model_converters │ ├── convert_votenet_checkpoints.py │ ├── publish_model.py │ └── regnet2mmdet.py ├── slurm_train.sh ├── test.py └── train.py ├── train.sh └── utils └── vis.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yiang Shi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Occupancy as Set of Points

3 | 4 | [Yiang Shi](https://github.com/shawnsya)1,\*, [Tianheng Cheng](https://scholar.google.com/citations?user=PH8rJHYAAAAJ)1,\*, [Qian Zhang](https://scholar.google.com/citations?user=pCY-bikAAAAJ&hl=zh-CN)2, [Wenyu Liu](http://eic.hust.edu.cn/professor/liuwenyu/)1, [Xinggang Wang](https://xwcv.github.io/)1 :email: 5 | 6 | 1 [School of EIC, HUST](http://english.eic.hust.edu.cn/), 7 | 2 [Horizon Robotics](https://en.horizonrobotics.com/) 8 | 9 | 10 | \* equal contribution, :email: corresponding author. 11 | 12 | [![arxiv paper](https://img.shields.io/badge/arXiv-Paper-red)](https://arxiv.org/abs/2407.04049) 13 | 14 | 15 | **ECCV 2024** 16 | 17 |
18 | 19 | 20 | # 21 | 22 | ### News 23 | * `[2024-7-8]` We have released the [arXiv paper](https://arxiv.org/abs/2407.04049) of OSP. 24 | * `[2024-7-2]` OSP is accepted by ECCV 2024! 25 | 26 | 27 | ## Abstract 28 | In this paper, we explore a novel point representation for 3D occupancy prediction from multi-view images, which is named Occupancy as Set of Points. Existing camera-based methods tend to exploit dense volume-based representation to predict the occupancy of the whole scene, making it hard to focus on the special areas or areas out of the perception range. In comparison, we present the \textit{Points of Interest} (PoIs) to represent the scene and propose OSP, a novel framework for point-based 3D occupancy prediction. Owing to the inherent flexibility of the point-based representation, OSP achieves strong performance compared with existing methods and excels in terms of training and inference adaptability. It extends beyond traditional perception boundaries and can be seamlessly integrated with volume-based methods to significantly enhance their effectiveness. Experiments on the Occ3D-nuScenes occupancy benchmark show that OSP has strong performance and flexibility. 29 | 30 |
31 | 32 |
33 | 34 | ## Preliminary 35 | 36 | ### Installation 37 | 1. Prepare conda environment referring to the documentation of [BEVFormer](https://github.com/fundamentalvision/BEVFormer/blob/master/docs/install.md) 38 | 39 | ### Prepare Dataset 40 | 1. Download nuScenes and prepare annotations referring to the documentation of [3D Occupancy Prediction Challenge at CVPR 2023](https://github.com/CVPR2023-3D-Occupancy-Prediction/CVPR2023-3D-Occupancy-Prediction) 41 | 42 | ### Pretrained Weights 43 | The pretrained weight of fcos3d can be downloaded [here](https://github.com/zhiqi-li/storage/releases/download/v1.0/r101_dcn_fcos3d_pretrain.pth) 44 | 45 | 46 | ## Usage 47 | 1. **Training** 48 | ```shell 49 | bash train.sh 50 | ``` 51 | * Replace the default config file as needed. 52 | * Config osp_minibatch.py represents mini dataset of nuScenes. 53 | 54 | 55 | 2. **Evaluation** 56 | ```shell 57 | bash test.sh 58 | ``` 59 | * Replace the default config file as needed. 60 | * Replace the checkpoint path in the script with your own. 61 | 62 | ## Results 63 | | Backbone | Method | Lr Schd | IoU| Config | Download | 64 | | :---: | :---: | :---: | :---: | :---: | :---: | 65 | | R101 | OSP | 24ep | 39.41 |[config](projects/configs/osp/osp.py) |[model]()| 66 | | R101 | BEVFormer w/ OSP | 24ep | 41.21 |[config](projects/configs/osp/osp.py) |[model]()| 67 | 68 | * Model weights will be released later. 69 | 70 | ## Citations 71 | ```bibtex 72 | @inproceedings{shi2024occupancysetpoints, 73 | title={Occupancy as Set of Points}, 74 | author={Yiang Shi and Tianheng Cheng and Qian Zhang and Wenyu Liu and Xinggang Wang}, 75 | year={2024}, 76 | booktitle={ECCV} 77 | } 78 | ``` 79 | 80 | 81 | ## License 82 | 83 | Released under the [MIT](LICENSE) License. 84 | -------------------------------------------------------------------------------- /assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/assets/architecture.png -------------------------------------------------------------------------------- /assets/placeholder: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /assets/pred_by_OSP.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/assets/pred_by_OSP.gif -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | Follow https://github.com/fundamentalvision/BEVFormer/blob/master/docs/install.md to prepare the environment. 3 | 4 | ## Preparing Dataset 5 | 1. Download the gts and annotations.json we provided. You can download our imgs.tar.gz or using the original sample files of the nuScenes dataset. 6 | 7 | 2. Download the CAN bus expansion data and maps [HERE](https://www.nuscenes.org/download). 8 | 9 | 3. Organize your folder structure as below: 10 | ``` 11 | Occupancy3D 12 | ├── projects/ 13 | ├── tools/ 14 | ├── ckpts/ 15 | │ ├── r101_dcn_fcos3d_pretrain.pth 16 | ├── data/ 17 | │ ├── can_bus/ 18 | │ ├── occ3d-nus/ 19 | │ │ ├── maps/ 20 | │ │ ├── samples/ # You can download our imgs.tar.gz or using the original sample files of the nuScenes dataset 21 | │ │ ├── v1.0-trainval/ 22 | │ │ ├── gts/ 23 | │ │ └── annotations.json 24 | │ │ 25 | │ ├── occ3d-test/ 26 | │ │ ├── maps/ 27 | │ │ ├── samples/ # You can download our imgs.tar.gz or using the original sample files of the nuScenes dataset 28 | │ │ ├── v1.0-test/ 29 | │ │ └── annotations.json 30 | ``` 31 | 32 | 33 | 4. Generate the info files for training and validation: 34 | ``` 35 | python tools/create_data.py occ --root-path ./data/occ3d-nus --out-dir ./data/occ3d-nus --extra-tag occ --version v1.0-trainval --canbus ./data --occ-path ./data/occ3d-nus 36 | ``` 37 | 38 | 5. Generate the info files for test split: 39 | ``` 40 | python tools/create_data.py occ --root-path ./data/occ3d-test --out-dir ./data/occ3d-test --extra-tag occ --version v1.0-test --canbus ./data --occ-path ./data/occ3d-test 41 | ``` 42 | 43 | ## Training 44 | ``` 45 | ./tools/dist_train.sh projects/configs/bevformer/bevformer_base_occ.py 8 46 | ``` 47 | 48 | ## Testing 49 | ``` 50 | ./tools/dist_test.sh projects/configs/bevformer/bevformer_base_occ.py work_dirs/bevformer_base_occ/epoch_24.pth 8 51 | ``` 52 | You can evaluate the F-score at the same time by adding `--eval_fscore`. 53 | 54 | ## Test Submission 55 | Test the baseline model on the test split with 8 GPUs, and generate the npz files and submission data to be submit to the official evaluation server. 56 | ``` 57 | ./tools/dist_test.sh projects/configs/bevformer/bevformer_base_occ_test.py work_dirs/bevformer_base_occ/epoch_24.pth 8 --format-only --eval-options 'submission_prefix=./occ_submission' 58 | ``` 59 | 60 | ### Performance 61 | 62 | model name|weight| split |mIoU | others | barrier | bicycle | bus | car | construction_vehicle | motorcycle | pedestrian | traffic_cone | trailer | truck | driveable_surface | other_flat | sidewalk | terrain | manmade | vegetation | 63 | ----|:----------:| :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :----------------------: | :---: | :------: | :------: | 64 | bevformer_base_occ|[Google Drive](https://drive.google.com/file/d/1NyoiosafAmne1qiABeNOPXR-P-y0i7_I/view?usp=share_link)| val | 23.67 | 5.03 | 38.79 | 9.98 | 34.41 | 41.09 | 13.24 | 16.50 | 18.15 | 17.83 | 18.66 | 27.7 | 48.95 | 27.73 | 29.08 | 25.38 | 15.41 | 14.46 | 65 | bevformer_base_occ|[Google Drive](https://drive.google.com/file/d/1NyoiosafAmne1qiABeNOPXR-P-y0i7_I/view?usp=share_link)| test | 23.7 | 10.24 | 36.77 | 11.7 | 29.87 | 38.92 | 10.29 | 22.05 | 16.21 | 14.69 | 27.44 | 23.13 | 48.19 | 33.1 | 29.8 | 17.64 | 19.01 | 13.75 | 66 | 67 | -------------------------------------------------------------------------------- /figs/leaderboard-06-10-2023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/figs/leaderboard-06-10-2023.png -------------------------------------------------------------------------------- /figs/mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/figs/mask.jpg -------------------------------------------------------------------------------- /figs/mask_camera.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/figs/mask_camera.jpg -------------------------------------------------------------------------------- /figs/mask_lidar.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/figs/mask_lidar.jpg -------------------------------------------------------------------------------- /figs/mask_none.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/figs/mask_none.jpg -------------------------------------------------------------------------------- /figs/occupanc_1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/figs/occupanc_1.gif -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/__init__.py -------------------------------------------------------------------------------- /projects/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-3class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Pedestrian', 'Cyclist', 'Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict( 12 | filter_by_difficulty=[-1], 13 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 14 | classes=class_names, 15 | sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6)) 16 | 17 | file_client_args = dict(backend='disk') 18 | # Uncomment the following if use ceph or other file clients. 19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 20 | # for more details. 21 | # file_client_args = dict( 22 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 23 | 24 | train_pipeline = [ 25 | dict( 26 | type='LoadPointsFromFile', 27 | coord_type='LIDAR', 28 | load_dim=4, 29 | use_dim=4, 30 | file_client_args=file_client_args), 31 | dict( 32 | type='LoadAnnotations3D', 33 | with_bbox_3d=True, 34 | with_label_3d=True, 35 | file_client_args=file_client_args), 36 | dict(type='ObjectSample', db_sampler=db_sampler), 37 | dict( 38 | type='ObjectNoise', 39 | num_try=100, 40 | translation_std=[1.0, 1.0, 0.5], 41 | global_rot_range=[0.0, 0.0], 42 | rot_range=[-0.78539816, 0.78539816]), 43 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[-0.78539816, 0.78539816], 47 | scale_ratio_range=[0.95, 1.05]), 48 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 50 | dict(type='PointShuffle'), 51 | dict(type='DefaultFormatBundle3D', class_names=class_names), 52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 53 | ] 54 | test_pipeline = [ 55 | dict( 56 | type='LoadPointsFromFile', 57 | coord_type='LIDAR', 58 | load_dim=4, 59 | use_dim=4, 60 | file_client_args=file_client_args), 61 | dict( 62 | type='MultiScaleFlipAug3D', 63 | img_scale=(1333, 800), 64 | pts_scale_ratio=1, 65 | flip=False, 66 | transforms=[ 67 | dict( 68 | type='GlobalRotScaleTrans', 69 | rot_range=[0, 0], 70 | scale_ratio_range=[1., 1.], 71 | translation_std=[0, 0, 0]), 72 | dict(type='RandomFlip3D'), 73 | dict( 74 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 75 | dict( 76 | type='DefaultFormatBundle3D', 77 | class_names=class_names, 78 | with_label=False), 79 | dict(type='Collect3D', keys=['points']) 80 | ]) 81 | ] 82 | # construct a pipeline for data and gt loading in show function 83 | # please keep its loading function consistent with test_pipeline (e.g. client) 84 | eval_pipeline = [ 85 | dict( 86 | type='LoadPointsFromFile', 87 | coord_type='LIDAR', 88 | load_dim=4, 89 | use_dim=4, 90 | file_client_args=file_client_args), 91 | dict( 92 | type='DefaultFormatBundle3D', 93 | class_names=class_names, 94 | with_label=False), 95 | dict(type='Collect3D', keys=['points']) 96 | ] 97 | 98 | data = dict( 99 | samples_per_gpu=6, 100 | workers_per_gpu=4, 101 | train=dict( 102 | type='RepeatDataset', 103 | times=2, 104 | dataset=dict( 105 | type=dataset_type, 106 | data_root=data_root, 107 | ann_file=data_root + 'kitti_infos_train.pkl', 108 | split='training', 109 | pts_prefix='velodyne_reduced', 110 | pipeline=train_pipeline, 111 | modality=input_modality, 112 | classes=class_names, 113 | test_mode=False, 114 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 115 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 116 | box_type_3d='LiDAR')), 117 | val=dict( 118 | type=dataset_type, 119 | data_root=data_root, 120 | ann_file=data_root + 'kitti_infos_val.pkl', 121 | split='training', 122 | pts_prefix='velodyne_reduced', 123 | pipeline=test_pipeline, 124 | modality=input_modality, 125 | classes=class_names, 126 | test_mode=True, 127 | box_type_3d='LiDAR'), 128 | test=dict( 129 | type=dataset_type, 130 | data_root=data_root, 131 | ann_file=data_root + 'kitti_infos_val.pkl', 132 | split='training', 133 | pts_prefix='velodyne_reduced', 134 | pipeline=test_pipeline, 135 | modality=input_modality, 136 | classes=class_names, 137 | test_mode=True, 138 | box_type_3d='LiDAR')) 139 | 140 | evaluation = dict(interval=1, pipeline=eval_pipeline) 141 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 12 | classes=class_names, 13 | sample_groups=dict(Car=15)) 14 | 15 | file_client_args = dict(backend='disk') 16 | # Uncomment the following if use ceph or other file clients. 17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 18 | # for more details. 19 | # file_client_args = dict( 20 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 21 | 22 | train_pipeline = [ 23 | dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=4, 27 | use_dim=4, 28 | file_client_args=file_client_args), 29 | dict( 30 | type='LoadAnnotations3D', 31 | with_bbox_3d=True, 32 | with_label_3d=True, 33 | file_client_args=file_client_args), 34 | dict(type='ObjectSample', db_sampler=db_sampler), 35 | dict( 36 | type='ObjectNoise', 37 | num_try=100, 38 | translation_std=[1.0, 1.0, 0.5], 39 | global_rot_range=[0.0, 0.0], 40 | rot_range=[-0.78539816, 0.78539816]), 41 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 42 | dict( 43 | type='GlobalRotScaleTrans', 44 | rot_range=[-0.78539816, 0.78539816], 45 | scale_ratio_range=[0.95, 1.05]), 46 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 47 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='PointShuffle'), 49 | dict(type='DefaultFormatBundle3D', class_names=class_names), 50 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 51 | ] 52 | test_pipeline = [ 53 | dict( 54 | type='LoadPointsFromFile', 55 | coord_type='LIDAR', 56 | load_dim=4, 57 | use_dim=4, 58 | file_client_args=file_client_args), 59 | dict( 60 | type='MultiScaleFlipAug3D', 61 | img_scale=(1333, 800), 62 | pts_scale_ratio=1, 63 | flip=False, 64 | transforms=[ 65 | dict( 66 | type='GlobalRotScaleTrans', 67 | rot_range=[0, 0], 68 | scale_ratio_range=[1., 1.], 69 | translation_std=[0, 0, 0]), 70 | dict(type='RandomFlip3D'), 71 | dict( 72 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 73 | dict( 74 | type='DefaultFormatBundle3D', 75 | class_names=class_names, 76 | with_label=False), 77 | dict(type='Collect3D', keys=['points']) 78 | ]) 79 | ] 80 | # construct a pipeline for data and gt loading in show function 81 | # please keep its loading function consistent with test_pipeline (e.g. client) 82 | eval_pipeline = [ 83 | dict( 84 | type='LoadPointsFromFile', 85 | coord_type='LIDAR', 86 | load_dim=4, 87 | use_dim=4, 88 | file_client_args=file_client_args), 89 | dict( 90 | type='DefaultFormatBundle3D', 91 | class_names=class_names, 92 | with_label=False), 93 | dict(type='Collect3D', keys=['points']) 94 | ] 95 | 96 | data = dict( 97 | samples_per_gpu=6, 98 | workers_per_gpu=4, 99 | train=dict( 100 | type='RepeatDataset', 101 | times=2, 102 | dataset=dict( 103 | type=dataset_type, 104 | data_root=data_root, 105 | ann_file=data_root + 'kitti_infos_train.pkl', 106 | split='training', 107 | pts_prefix='velodyne_reduced', 108 | pipeline=train_pipeline, 109 | modality=input_modality, 110 | classes=class_names, 111 | test_mode=False, 112 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 113 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 114 | box_type_3d='LiDAR')), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'kitti_infos_val.pkl', 119 | split='training', 120 | pts_prefix='velodyne_reduced', 121 | pipeline=test_pipeline, 122 | modality=input_modality, 123 | classes=class_names, 124 | test_mode=True, 125 | box_type_3d='LiDAR'), 126 | test=dict( 127 | type=dataset_type, 128 | data_root=data_root, 129 | ann_file=data_root + 'kitti_infos_val.pkl', 130 | split='training', 131 | pts_prefix='velodyne_reduced', 132 | pipeline=test_pipeline, 133 | modality=input_modality, 134 | classes=class_names, 135 | test_mode=True, 136 | box_type_3d='LiDAR')) 137 | 138 | evaluation = dict(interval=1, pipeline=eval_pipeline) 139 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-80, -80, -5, 80, 80, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nuim_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/nuimages/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1280, 720), (1920, 1080)], 15 | multiscale_mode='range', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1600, 900), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=2, 40 | workers_per_gpu=2, 41 | train=dict( 42 | type=dataset_type, 43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json', 44 | img_prefix=data_root, 45 | classes=class_names, 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 50 | img_prefix=data_root, 51 | classes=class_names, 52 | pipeline=test_pipeline), 53 | test=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 56 | img_prefix=data_root, 57 | classes=class_names, 58 | pipeline=test_pipeline)) 59 | evaluation = dict(metric=['bbox', 'segm']) 60 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nus-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CustomNuScenesMonoDataset' 2 | data_root = 'data/nuscenes/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | # Input modality for nuScenes dataset, this is consistent with the submission 8 | # format which requires the information in input_modality. 9 | input_modality = dict( 10 | use_lidar=False, 11 | use_camera=True, 12 | use_radar=False, 13 | use_map=False, 14 | use_external=False) 15 | img_norm_cfg = dict( 16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 17 | train_pipeline = [ 18 | dict(type='LoadImageFromFileMono3D'), 19 | dict( 20 | type='LoadAnnotations3D', 21 | with_bbox=True, 22 | with_label=True, 23 | with_attr_label=True, 24 | with_bbox_3d=True, 25 | with_label_3d=True, 26 | with_bbox_depth=True), 27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True), 28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle3D', class_names=class_names), 32 | dict( 33 | type='Collect3D', 34 | keys=[ 35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d', 36 | 'gt_labels_3d', 'centers2d', 'depths' 37 | ]), 38 | ] 39 | test_pipeline = [ 40 | dict(type='LoadImageFromFileMono3D'), 41 | dict( 42 | type='MultiScaleFlipAug', 43 | scale_factor=1.0, 44 | flip=False, 45 | transforms=[ 46 | dict(type='RandomFlip3D'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict( 50 | type='DefaultFormatBundle3D', 51 | class_names=class_names, 52 | with_label=False), 53 | dict(type='Collect3D', keys=['img']), 54 | ]) 55 | ] 56 | # construct a pipeline for data and gt loading in show function 57 | # please keep its loading function consistent with test_pipeline (e.g. client) 58 | eval_pipeline = [ 59 | dict(type='LoadImageFromFileMono3D'), 60 | dict( 61 | type='DefaultFormatBundle3D', 62 | class_names=class_names, 63 | with_label=False), 64 | dict(type='Collect3D', keys=['img']) 65 | ] 66 | 67 | data = dict( 68 | samples_per_gpu=2, 69 | workers_per_gpu=2, 70 | train=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json', 74 | img_prefix=data_root, 75 | classes=class_names, 76 | pipeline=train_pipeline, 77 | modality=input_modality, 78 | test_mode=False, 79 | box_type_3d='Camera'), 80 | val=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 84 | img_prefix=data_root, 85 | classes=class_names, 86 | pipeline=test_pipeline, 87 | modality=input_modality, 88 | test_mode=True, 89 | box_type_3d='Camera'), 90 | test=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 94 | img_prefix=data_root, 95 | classes=class_names, 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | test_mode=True, 99 | box_type_3d='Camera')) 100 | evaluation = dict(interval=2) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/range100_lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-100, -100, -5, 100, 100, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis-3d-5class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') 5 | train_area = [1, 2, 3, 4, 6] 6 | test_area = 5 7 | 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2, 3, 4, 5]), 15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 16 | dict(type='PointSample', num_points=40000), 17 | dict( 18 | type='RandomFlip3D', 19 | sync_2d=False, 20 | flip_ratio_bev_horizontal=0.5, 21 | flip_ratio_bev_vertical=0.5), 22 | dict( 23 | type='GlobalRotScaleTrans', 24 | # following ScanNet dataset the rotation range is 5 degrees 25 | rot_range=[-0.087266, 0.087266], 26 | scale_ratio_range=[1.0, 1.0], 27 | shift_height=True), 28 | dict(type='DefaultFormatBundle3D', class_names=class_names), 29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 30 | ] 31 | test_pipeline = [ 32 | dict( 33 | type='LoadPointsFromFile', 34 | coord_type='DEPTH', 35 | shift_height=True, 36 | load_dim=6, 37 | use_dim=[0, 1, 2, 3, 4, 5]), 38 | dict( 39 | type='MultiScaleFlipAug3D', 40 | img_scale=(1333, 800), 41 | pts_scale_ratio=1, 42 | flip=False, 43 | transforms=[ 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[0, 0], 47 | scale_ratio_range=[1., 1.], 48 | translation_std=[0, 0, 0]), 49 | dict( 50 | type='RandomFlip3D', 51 | sync_2d=False, 52 | flip_ratio_bev_horizontal=0.5, 53 | flip_ratio_bev_vertical=0.5), 54 | dict(type='PointSample', num_points=40000), 55 | dict( 56 | type='DefaultFormatBundle3D', 57 | class_names=class_names, 58 | with_label=False), 59 | dict(type='Collect3D', keys=['points']) 60 | ]) 61 | ] 62 | # construct a pipeline for data and gt loading in show function 63 | # please keep its loading function consistent with test_pipeline (e.g. client) 64 | eval_pipeline = [ 65 | dict( 66 | type='LoadPointsFromFile', 67 | coord_type='DEPTH', 68 | shift_height=False, 69 | load_dim=6, 70 | use_dim=[0, 1, 2, 3, 4, 5]), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ] 77 | 78 | data = dict( 79 | samples_per_gpu=8, 80 | workers_per_gpu=4, 81 | train=dict( 82 | type='RepeatDataset', 83 | times=5, 84 | dataset=dict( 85 | type='ConcatDataset', 86 | datasets=[ 87 | dict( 88 | type=dataset_type, 89 | data_root=data_root, 90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl', 91 | pipeline=train_pipeline, 92 | filter_empty_gt=False, 93 | classes=class_names, 94 | box_type_3d='Depth') for i in train_area 95 | ], 96 | separate_eval=False)), 97 | val=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 101 | pipeline=test_pipeline, 102 | classes=class_names, 103 | test_mode=True, 104 | box_type_3d='Depth'), 105 | test=dict( 106 | type=dataset_type, 107 | data_root=data_root, 108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 109 | pipeline=test_pipeline, 110 | classes=class_names, 111 | test_mode=True, 112 | box_type_3d='Depth')) 113 | 114 | evaluation = dict(pipeline=eval_pipeline) 115 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis_seg-3d-13class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISSegDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 5 | 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') 6 | num_points = 4096 7 | train_area = [1, 2, 3, 4, 6] 8 | test_area = 5 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=tuple(range(len(class_names))), 26 | max_cat_id=13), 27 | dict( 28 | type='IndoorPatchPointSample', 29 | num_points=num_points, 30 | block_size=1.0, 31 | ignore_index=len(class_names), 32 | use_normalized_coord=True, 33 | enlarge_size=0.2, 34 | min_unique_num=None), 35 | dict(type='NormalizePointsColor', color_mean=None), 36 | dict(type='DefaultFormatBundle3D', class_names=class_names), 37 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 38 | ] 39 | test_pipeline = [ 40 | dict( 41 | type='LoadPointsFromFile', 42 | coord_type='DEPTH', 43 | shift_height=False, 44 | use_color=True, 45 | load_dim=6, 46 | use_dim=[0, 1, 2, 3, 4, 5]), 47 | dict(type='NormalizePointsColor', color_mean=None), 48 | dict( 49 | # a wrapper in order to successfully call test function 50 | # actually we don't perform test-time-aug 51 | type='MultiScaleFlipAug3D', 52 | img_scale=(1333, 800), 53 | pts_scale_ratio=1, 54 | flip=False, 55 | transforms=[ 56 | dict( 57 | type='GlobalRotScaleTrans', 58 | rot_range=[0, 0], 59 | scale_ratio_range=[1., 1.], 60 | translation_std=[0, 0, 0]), 61 | dict( 62 | type='RandomFlip3D', 63 | sync_2d=False, 64 | flip_ratio_bev_horizontal=0.0, 65 | flip_ratio_bev_vertical=0.0), 66 | dict( 67 | type='DefaultFormatBundle3D', 68 | class_names=class_names, 69 | with_label=False), 70 | dict(type='Collect3D', keys=['points']) 71 | ]) 72 | ] 73 | # construct a pipeline for data and gt loading in show function 74 | # please keep its loading function consistent with test_pipeline (e.g. client) 75 | # we need to load gt seg_mask! 76 | eval_pipeline = [ 77 | dict( 78 | type='LoadPointsFromFile', 79 | coord_type='DEPTH', 80 | shift_height=False, 81 | use_color=True, 82 | load_dim=6, 83 | use_dim=[0, 1, 2, 3, 4, 5]), 84 | dict( 85 | type='LoadAnnotations3D', 86 | with_bbox_3d=False, 87 | with_label_3d=False, 88 | with_mask_3d=False, 89 | with_seg_3d=True), 90 | dict( 91 | type='PointSegClassMapping', 92 | valid_cat_ids=tuple(range(len(class_names))), 93 | max_cat_id=13), 94 | dict( 95 | type='DefaultFormatBundle3D', 96 | with_label=False, 97 | class_names=class_names), 98 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 99 | ] 100 | 101 | data = dict( 102 | samples_per_gpu=8, 103 | workers_per_gpu=4, 104 | # train on area 1, 2, 3, 4, 6 105 | # test on area 5 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_files=[ 110 | data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area 111 | ], 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | test_mode=False, 115 | ignore_index=len(class_names), 116 | scene_idxs=[ 117 | data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy' 118 | for i in train_area 119 | ]), 120 | val=dict( 121 | type=dataset_type, 122 | data_root=data_root, 123 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 124 | pipeline=test_pipeline, 125 | classes=class_names, 126 | test_mode=True, 127 | ignore_index=len(class_names), 128 | scene_idxs=data_root + 129 | f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'), 130 | test=dict( 131 | type=dataset_type, 132 | data_root=data_root, 133 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 134 | pipeline=test_pipeline, 135 | classes=class_names, 136 | test_mode=True, 137 | ignore_index=len(class_names))) 138 | 139 | evaluation = dict(pipeline=eval_pipeline) 140 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 5 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 6 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 7 | 'garbagebin') 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2]), 15 | dict( 16 | type='LoadAnnotations3D', 17 | with_bbox_3d=True, 18 | with_label_3d=True, 19 | with_mask_3d=True, 20 | with_seg_3d=True), 21 | dict(type='GlobalAlignment', rotation_axis=2), 22 | dict( 23 | type='PointSegClassMapping', 24 | valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 25 | 36, 39), 26 | max_cat_id=40), 27 | dict(type='PointSample', num_points=40000), 28 | dict( 29 | type='RandomFlip3D', 30 | sync_2d=False, 31 | flip_ratio_bev_horizontal=0.5, 32 | flip_ratio_bev_vertical=0.5), 33 | dict( 34 | type='GlobalRotScaleTrans', 35 | rot_range=[-0.087266, 0.087266], 36 | scale_ratio_range=[1.0, 1.0], 37 | shift_height=True), 38 | dict(type='DefaultFormatBundle3D', class_names=class_names), 39 | dict( 40 | type='Collect3D', 41 | keys=[ 42 | 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 43 | 'pts_instance_mask' 44 | ]) 45 | ] 46 | test_pipeline = [ 47 | dict( 48 | type='LoadPointsFromFile', 49 | coord_type='DEPTH', 50 | shift_height=True, 51 | load_dim=6, 52 | use_dim=[0, 1, 2]), 53 | dict(type='GlobalAlignment', rotation_axis=2), 54 | dict( 55 | type='MultiScaleFlipAug3D', 56 | img_scale=(1333, 800), 57 | pts_scale_ratio=1, 58 | flip=False, 59 | transforms=[ 60 | dict( 61 | type='GlobalRotScaleTrans', 62 | rot_range=[0, 0], 63 | scale_ratio_range=[1., 1.], 64 | translation_std=[0, 0, 0]), 65 | dict( 66 | type='RandomFlip3D', 67 | sync_2d=False, 68 | flip_ratio_bev_horizontal=0.5, 69 | flip_ratio_bev_vertical=0.5), 70 | dict(type='PointSample', num_points=40000), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ]) 77 | ] 78 | # construct a pipeline for data and gt loading in show function 79 | # please keep its loading function consistent with test_pipeline (e.g. client) 80 | eval_pipeline = [ 81 | dict( 82 | type='LoadPointsFromFile', 83 | coord_type='DEPTH', 84 | shift_height=False, 85 | load_dim=6, 86 | use_dim=[0, 1, 2]), 87 | dict(type='GlobalAlignment', rotation_axis=2), 88 | dict( 89 | type='DefaultFormatBundle3D', 90 | class_names=class_names, 91 | with_label=False), 92 | dict(type='Collect3D', keys=['points']) 93 | ] 94 | 95 | data = dict( 96 | samples_per_gpu=8, 97 | workers_per_gpu=4, 98 | train=dict( 99 | type='RepeatDataset', 100 | times=5, 101 | dataset=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'scannet_infos_train.pkl', 105 | pipeline=train_pipeline, 106 | filter_empty_gt=False, 107 | classes=class_names, 108 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 109 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 110 | box_type_3d='Depth')), 111 | val=dict( 112 | type=dataset_type, 113 | data_root=data_root, 114 | ann_file=data_root + 'scannet_infos_val.pkl', 115 | pipeline=test_pipeline, 116 | classes=class_names, 117 | test_mode=True, 118 | box_type_3d='Depth'), 119 | test=dict( 120 | type=dataset_type, 121 | data_root=data_root, 122 | ann_file=data_root + 'scannet_infos_val.pkl', 123 | pipeline=test_pipeline, 124 | classes=class_names, 125 | test_mode=True, 126 | box_type_3d='Depth')) 127 | 128 | evaluation = dict(pipeline=eval_pipeline) 129 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet_seg-3d-20class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetSegDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 5 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 6 | 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 7 | 'bathtub', 'otherfurniture') 8 | num_points = 8192 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 26 | 33, 34, 36, 39), 27 | max_cat_id=40), 28 | dict( 29 | type='IndoorPatchPointSample', 30 | num_points=num_points, 31 | block_size=1.5, 32 | ignore_index=len(class_names), 33 | use_normalized_coord=False, 34 | enlarge_size=0.2, 35 | min_unique_num=None), 36 | dict(type='NormalizePointsColor', color_mean=None), 37 | dict(type='DefaultFormatBundle3D', class_names=class_names), 38 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 39 | ] 40 | test_pipeline = [ 41 | dict( 42 | type='LoadPointsFromFile', 43 | coord_type='DEPTH', 44 | shift_height=False, 45 | use_color=True, 46 | load_dim=6, 47 | use_dim=[0, 1, 2, 3, 4, 5]), 48 | dict(type='NormalizePointsColor', color_mean=None), 49 | dict( 50 | # a wrapper in order to successfully call test function 51 | # actually we don't perform test-time-aug 52 | type='MultiScaleFlipAug3D', 53 | img_scale=(1333, 800), 54 | pts_scale_ratio=1, 55 | flip=False, 56 | transforms=[ 57 | dict( 58 | type='GlobalRotScaleTrans', 59 | rot_range=[0, 0], 60 | scale_ratio_range=[1., 1.], 61 | translation_std=[0, 0, 0]), 62 | dict( 63 | type='RandomFlip3D', 64 | sync_2d=False, 65 | flip_ratio_bev_horizontal=0.0, 66 | flip_ratio_bev_vertical=0.0), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ]) 73 | ] 74 | # construct a pipeline for data and gt loading in show function 75 | # please keep its loading function consistent with test_pipeline (e.g. client) 76 | # we need to load gt seg_mask! 77 | eval_pipeline = [ 78 | dict( 79 | type='LoadPointsFromFile', 80 | coord_type='DEPTH', 81 | shift_height=False, 82 | use_color=True, 83 | load_dim=6, 84 | use_dim=[0, 1, 2, 3, 4, 5]), 85 | dict( 86 | type='LoadAnnotations3D', 87 | with_bbox_3d=False, 88 | with_label_3d=False, 89 | with_mask_3d=False, 90 | with_seg_3d=True), 91 | dict( 92 | type='PointSegClassMapping', 93 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 94 | 33, 34, 36, 39), 95 | max_cat_id=40), 96 | dict( 97 | type='DefaultFormatBundle3D', 98 | with_label=False, 99 | class_names=class_names), 100 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 101 | ] 102 | 103 | data = dict( 104 | samples_per_gpu=8, 105 | workers_per_gpu=4, 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_file=data_root + 'scannet_infos_train.pkl', 110 | pipeline=train_pipeline, 111 | classes=class_names, 112 | test_mode=False, 113 | ignore_index=len(class_names), 114 | scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'scannet_infos_val.pkl', 119 | pipeline=test_pipeline, 120 | classes=class_names, 121 | test_mode=True, 122 | ignore_index=len(class_names)), 123 | test=dict( 124 | type=dataset_type, 125 | data_root=data_root, 126 | ann_file=data_root + 'scannet_infos_val.pkl', 127 | pipeline=test_pipeline, 128 | classes=class_names, 129 | test_mode=True, 130 | ignore_index=len(class_names))) 131 | 132 | evaluation = dict(pipeline=eval_pipeline) 133 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SUNRGBDDataset' 2 | data_root = 'data/sunrgbd/' 3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 4 | 'night_stand', 'bookshelf', 'bathtub') 5 | train_pipeline = [ 6 | dict( 7 | type='LoadPointsFromFile', 8 | coord_type='DEPTH', 9 | shift_height=True, 10 | load_dim=6, 11 | use_dim=[0, 1, 2]), 12 | dict(type='LoadAnnotations3D'), 13 | dict( 14 | type='RandomFlip3D', 15 | sync_2d=False, 16 | flip_ratio_bev_horizontal=0.5, 17 | ), 18 | dict( 19 | type='GlobalRotScaleTrans', 20 | rot_range=[-0.523599, 0.523599], 21 | scale_ratio_range=[0.85, 1.15], 22 | shift_height=True), 23 | dict(type='PointSample', num_points=20000), 24 | dict(type='DefaultFormatBundle3D', class_names=class_names), 25 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 26 | ] 27 | test_pipeline = [ 28 | dict( 29 | type='LoadPointsFromFile', 30 | coord_type='DEPTH', 31 | shift_height=True, 32 | load_dim=6, 33 | use_dim=[0, 1, 2]), 34 | dict( 35 | type='MultiScaleFlipAug3D', 36 | img_scale=(1333, 800), 37 | pts_scale_ratio=1, 38 | flip=False, 39 | transforms=[ 40 | dict( 41 | type='GlobalRotScaleTrans', 42 | rot_range=[0, 0], 43 | scale_ratio_range=[1., 1.], 44 | translation_std=[0, 0, 0]), 45 | dict( 46 | type='RandomFlip3D', 47 | sync_2d=False, 48 | flip_ratio_bev_horizontal=0.5, 49 | ), 50 | dict(type='PointSample', num_points=20000), 51 | dict( 52 | type='DefaultFormatBundle3D', 53 | class_names=class_names, 54 | with_label=False), 55 | dict(type='Collect3D', keys=['points']) 56 | ]) 57 | ] 58 | # construct a pipeline for data and gt loading in show function 59 | # please keep its loading function consistent with test_pipeline (e.g. client) 60 | eval_pipeline = [ 61 | dict( 62 | type='LoadPointsFromFile', 63 | coord_type='DEPTH', 64 | shift_height=False, 65 | load_dim=6, 66 | use_dim=[0, 1, 2]), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ] 73 | 74 | data = dict( 75 | samples_per_gpu=16, 76 | workers_per_gpu=4, 77 | train=dict( 78 | type='RepeatDataset', 79 | times=5, 80 | dataset=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'sunrgbd_infos_train.pkl', 84 | pipeline=train_pipeline, 85 | classes=class_names, 86 | filter_empty_gt=False, 87 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 88 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 89 | box_type_3d='Depth')), 90 | val=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 94 | pipeline=test_pipeline, 95 | classes=class_names, 96 | test_mode=True, 97 | box_type_3d='Depth'), 98 | test=dict( 99 | type=dataset_type, 100 | data_root=data_root, 101 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 102 | pipeline=test_pipeline, 103 | classes=class_names, 104 | test_mode=True, 105 | box_type_3d='Depth')) 106 | 107 | evaluation = dict(pipeline=eval_pipeline) 108 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/waymoD5-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'WaymoDataset' 5 | data_root = 'data/waymo/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | class_names = ['Car'] 14 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 15 | input_modality = dict(use_lidar=True, use_camera=False) 16 | db_sampler = dict( 17 | data_root=data_root, 18 | info_path=data_root + 'waymo_dbinfos_train.pkl', 19 | rate=1.0, 20 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 21 | classes=class_names, 22 | sample_groups=dict(Car=15), 23 | points_loader=dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=5, 27 | use_dim=[0, 1, 2, 3, 4], 28 | file_client_args=file_client_args)) 29 | 30 | train_pipeline = [ 31 | dict( 32 | type='LoadPointsFromFile', 33 | coord_type='LIDAR', 34 | load_dim=6, 35 | use_dim=5, 36 | file_client_args=file_client_args), 37 | dict( 38 | type='LoadAnnotations3D', 39 | with_bbox_3d=True, 40 | with_label_3d=True, 41 | file_client_args=file_client_args), 42 | dict(type='ObjectSample', db_sampler=db_sampler), 43 | dict( 44 | type='RandomFlip3D', 45 | sync_2d=False, 46 | flip_ratio_bev_horizontal=0.5, 47 | flip_ratio_bev_vertical=0.5), 48 | dict( 49 | type='GlobalRotScaleTrans', 50 | rot_range=[-0.78539816, 0.78539816], 51 | scale_ratio_range=[0.95, 1.05]), 52 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 53 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 54 | dict(type='PointShuffle'), 55 | dict(type='DefaultFormatBundle3D', class_names=class_names), 56 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 57 | ] 58 | test_pipeline = [ 59 | dict( 60 | type='LoadPointsFromFile', 61 | coord_type='LIDAR', 62 | load_dim=6, 63 | use_dim=5, 64 | file_client_args=file_client_args), 65 | dict( 66 | type='MultiScaleFlipAug3D', 67 | img_scale=(1333, 800), 68 | pts_scale_ratio=1, 69 | flip=False, 70 | transforms=[ 71 | dict( 72 | type='GlobalRotScaleTrans', 73 | rot_range=[0, 0], 74 | scale_ratio_range=[1., 1.], 75 | translation_std=[0, 0, 0]), 76 | dict(type='RandomFlip3D'), 77 | dict( 78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 79 | dict( 80 | type='DefaultFormatBundle3D', 81 | class_names=class_names, 82 | with_label=False), 83 | dict(type='Collect3D', keys=['points']) 84 | ]) 85 | ] 86 | # construct a pipeline for data and gt loading in show function 87 | # please keep its loading function consistent with test_pipeline (e.g. client) 88 | eval_pipeline = [ 89 | dict( 90 | type='LoadPointsFromFile', 91 | coord_type='LIDAR', 92 | load_dim=6, 93 | use_dim=5, 94 | file_client_args=file_client_args), 95 | dict( 96 | type='DefaultFormatBundle3D', 97 | class_names=class_names, 98 | with_label=False), 99 | dict(type='Collect3D', keys=['points']) 100 | ] 101 | 102 | data = dict( 103 | samples_per_gpu=2, 104 | workers_per_gpu=4, 105 | train=dict( 106 | type='RepeatDataset', 107 | times=2, 108 | dataset=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'waymo_infos_train.pkl', 112 | split='training', 113 | pipeline=train_pipeline, 114 | modality=input_modality, 115 | classes=class_names, 116 | test_mode=False, 117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 119 | box_type_3d='LiDAR', 120 | # load one frame every five frames 121 | load_interval=5)), 122 | val=dict( 123 | type=dataset_type, 124 | data_root=data_root, 125 | ann_file=data_root + 'waymo_infos_val.pkl', 126 | split='training', 127 | pipeline=test_pipeline, 128 | modality=input_modality, 129 | classes=class_names, 130 | test_mode=True, 131 | box_type_3d='LiDAR'), 132 | test=dict( 133 | type=dataset_type, 134 | data_root=data_root, 135 | ann_file=data_root + 'waymo_infos_val.pkl', 136 | split='training', 137 | pipeline=test_pipeline, 138 | modality=input_modality, 139 | classes=class_names, 140 | test_mode=True, 141 | box_type_3d='LiDAR')) 142 | 143 | evaluation = dict(interval=24, pipeline=eval_pipeline) 144 | -------------------------------------------------------------------------------- /projects/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/3dssd.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SSD3DNet', 3 | backbone=dict( 4 | type='PointNet2SAMSG', 5 | in_channels=4, 6 | num_points=(4096, 512, (256, 256)), 7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)), 8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)), 9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)), 10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)), 11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))), 12 | aggregation_channels=(64, 128, 256), 13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')), 14 | fps_sample_range_lists=((-1), (-1), (512, -1)), 15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 16 | sa_cfg=dict( 17 | type='PointSAModuleMSG', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=False)), 21 | bbox_head=dict( 22 | type='SSD3DHead', 23 | in_channels=256, 24 | vote_module_cfg=dict( 25 | in_channels=256, 26 | num_points=256, 27 | gt_per_seed=1, 28 | conv_channels=(128, ), 29 | conv_cfg=dict(type='Conv1d'), 30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 31 | with_res_feat=False, 32 | vote_xyz_range=(3.0, 3.0, 2.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModuleMSG', 35 | num_point=256, 36 | radii=(4.8, 6.4), 37 | sample_nums=(16, 32), 38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)), 39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 40 | use_xyz=True, 41 | normalize_xyz=False, 42 | bias=True), 43 | pred_layer_cfg=dict( 44 | in_channels=1536, 45 | shared_conv_channels=(512, 128), 46 | cls_conv_channels=(128, ), 47 | reg_conv_channels=(128, ), 48 | conv_cfg=dict(type='Conv1d'), 49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 50 | bias=True), 51 | conv_cfg=dict(type='Conv1d'), 52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 53 | objectness_loss=dict( 54 | type='CrossEntropyLoss', 55 | use_sigmoid=True, 56 | reduction='sum', 57 | loss_weight=1.0), 58 | center_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 60 | dir_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | dir_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 64 | size_res_loss=dict( 65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 66 | corner_loss=dict( 67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05), 72 | test_cfg=dict( 73 | nms_cfg=dict(type='nms', iou_thr=0.1), 74 | sample_mod='spec', 75 | score_thr=0.0, 76 | per_class_proposal=True, 77 | max_output_num=100)) 78 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.1, 0.1, 0.2] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), 6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 7 | pts_middle_encoder=dict( 8 | type='SparseEncoder', 9 | in_channels=5, 10 | sparse_shape=[41, 1024, 1024], 11 | output_channels=128, 12 | order=('conv', 'norm', 'act'), 13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 14 | 128)), 15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), 16 | block_type='basicblock'), 17 | pts_backbone=dict( 18 | type='SECOND', 19 | in_channels=256, 20 | out_channels=[128, 256], 21 | layer_nums=[5, 5], 22 | layer_strides=[1, 2], 23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 24 | conv_cfg=dict(type='Conv2d', bias=False)), 25 | pts_neck=dict( 26 | type='SECONDFPN', 27 | in_channels=[128, 256], 28 | out_channels=[256, 256], 29 | upsample_strides=[1, 2], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | use_conv_for_no_stride=True), 33 | pts_bbox_head=dict( 34 | type='CenterHead', 35 | in_channels=sum([256, 256]), 36 | tasks=[ 37 | dict(num_class=1, class_names=['car']), 38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 39 | dict(num_class=2, class_names=['bus', 'trailer']), 40 | dict(num_class=1, class_names=['barrier']), 41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 43 | ], 44 | common_heads=dict( 45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 46 | share_conv_channel=64, 47 | bbox_coder=dict( 48 | type='CenterPointBBoxCoder', 49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 50 | max_num=500, 51 | score_threshold=0.1, 52 | out_size_factor=8, 53 | voxel_size=voxel_size[:2], 54 | code_size=9), 55 | separate_head=dict( 56 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 59 | norm_bbox=True), 60 | # model training and testing settings 61 | train_cfg=dict( 62 | pts=dict( 63 | grid_size=[1024, 1024, 40], 64 | voxel_size=voxel_size, 65 | out_size_factor=8, 66 | dense_reg=1, 67 | gaussian_overlap=0.1, 68 | max_objs=500, 69 | min_radius=2, 70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 71 | test_cfg=dict( 72 | pts=dict( 73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 74 | max_per_img=500, 75 | max_pool_nms=False, 76 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 77 | score_threshold=0.1, 78 | out_size_factor=8, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.2, 0.2, 8] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), 6 | pts_voxel_encoder=dict( 7 | type='PillarFeatureNet', 8 | in_channels=5, 9 | feat_channels=[64], 10 | with_distance=False, 11 | voxel_size=(0.2, 0.2, 8), 12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), 13 | legacy=False), 14 | pts_middle_encoder=dict( 15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), 16 | pts_backbone=dict( 17 | type='SECOND', 18 | in_channels=64, 19 | out_channels=[64, 128, 256], 20 | layer_nums=[3, 5, 5], 21 | layer_strides=[2, 2, 2], 22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 23 | conv_cfg=dict(type='Conv2d', bias=False)), 24 | pts_neck=dict( 25 | type='SECONDFPN', 26 | in_channels=[64, 128, 256], 27 | out_channels=[128, 128, 128], 28 | upsample_strides=[0.5, 1, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | upsample_cfg=dict(type='deconv', bias=False), 31 | use_conv_for_no_stride=True), 32 | pts_bbox_head=dict( 33 | type='CenterHead', 34 | in_channels=sum([128, 128, 128]), 35 | tasks=[ 36 | dict(num_class=1, class_names=['car']), 37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 38 | dict(num_class=2, class_names=['bus', 'trailer']), 39 | dict(num_class=1, class_names=['barrier']), 40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 42 | ], 43 | common_heads=dict( 44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 45 | share_conv_channel=64, 46 | bbox_coder=dict( 47 | type='CenterPointBBoxCoder', 48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 49 | max_num=500, 50 | score_threshold=0.1, 51 | out_size_factor=4, 52 | voxel_size=voxel_size[:2], 53 | code_size=9), 54 | separate_head=dict( 55 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 58 | norm_bbox=True), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | pts=dict( 62 | grid_size=[512, 512, 1], 63 | voxel_size=voxel_size, 64 | out_size_factor=4, 65 | dense_reg=1, 66 | gaussian_overlap=0.1, 67 | max_objs=500, 68 | min_radius=2, 69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 70 | test_cfg=dict( 71 | pts=dict( 72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 73 | max_per_img=500, 74 | max_pool_nms=False, 75 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 76 | score_threshold=0.1, 77 | pc_range=[-51.2, -51.2], 78 | out_size_factor=4, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/fcos3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FCOSMono3D', 3 | pretrained='open-mmlab://detectron2/resnet101_caffe', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=False), 11 | norm_eval=True, 12 | style='caffe'), 13 | neck=dict( 14 | type='FPN', 15 | in_channels=[256, 512, 1024, 2048], 16 | out_channels=256, 17 | start_level=1, 18 | add_extra_convs='on_output', 19 | num_outs=5, 20 | relu_before_extra_convs=True), 21 | bbox_head=dict( 22 | type='FCOSMono3DHead', 23 | num_classes=10, 24 | in_channels=256, 25 | stacked_convs=2, 26 | feat_channels=256, 27 | use_direction_classifier=True, 28 | diff_rad_by_sin=True, 29 | pred_attrs=True, 30 | pred_velo=True, 31 | dir_offset=0.7854, # pi/4 32 | strides=[8, 16, 32, 64, 128], 33 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 34 | cls_branch=(256, ), 35 | reg_branch=( 36 | (256, ), # offset 37 | (256, ), # depth 38 | (256, ), # size 39 | (256, ), # rot 40 | () # velo 41 | ), 42 | dir_branch=(256, ), 43 | attr_branch=(256, ), 44 | loss_cls=dict( 45 | type='FocalLoss', 46 | use_sigmoid=True, 47 | gamma=2.0, 48 | alpha=0.25, 49 | loss_weight=1.0), 50 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 51 | loss_dir=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_attr=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 57 | norm_on_bbox=True, 58 | centerness_on_reg=True, 59 | center_sampling=True, 60 | conv_bias=True, 61 | dcn_on_last_conv=True), 62 | train_cfg=dict( 63 | allowed_border=0, 64 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], 65 | pos_weight=-1, 66 | debug=False), 67 | test_cfg=dict( 68 | use_rotate_nms=True, 69 | nms_across_levels=False, 70 | nms_pre=1000, 71 | nms_thr=0.8, 72 | score_thr=0.05, 73 | min_bbox_size=0, 74 | max_per_img=200)) 75 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_nus.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.25, 0.25, 8] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=64, 11 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 12 | voxel_size=voxel_size, 13 | max_voxels=(30000, 40000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=4, 17 | feat_channels=[64, 64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[2, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='FPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | act_cfg=dict(type='ReLU'), 37 | in_channels=[64, 128, 256], 38 | out_channels=256, 39 | start_level=0, 40 | num_outs=3), 41 | pts_bbox_head=dict( 42 | type='Anchor3DHead', 43 | num_classes=10, 44 | in_channels=256, 45 | feat_channels=256, 46 | use_direction_classifier=True, 47 | anchor_generator=dict( 48 | type='AlignedAnchor3DRangeGenerator', 49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]], 50 | scales=[1, 2, 4], 51 | sizes=[ 52 | [0.8660, 2.5981, 1.], # 1.5/sqrt(3) 53 | [0.5774, 1.7321, 1.], # 1/sqrt(3) 54 | [1., 1., 1.], 55 | [0.4, 0.4, 1], 56 | ], 57 | custom_values=[0, 0], 58 | rotations=[0, 1.57], 59 | reshape_out=True), 60 | assigner_per_size=False, 61 | diff_rad_by_sin=True, 62 | dir_offset=0.7854, # pi/4 63 | dir_limit_offset=0, 64 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), 65 | loss_cls=dict( 66 | type='FocalLoss', 67 | use_sigmoid=True, 68 | gamma=2.0, 69 | alpha=0.25, 70 | loss_weight=1.0), 71 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 72 | loss_dir=dict( 73 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 74 | # model training and testing settings 75 | train_cfg=dict( 76 | pts=dict( 77 | assigner=dict( 78 | type='MaxIoUAssigner', 79 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 80 | pos_iou_thr=0.6, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.3, 83 | ignore_iof_thr=-1), 84 | allowed_border=0, 85 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 86 | pos_weight=-1, 87 | debug=False)), 88 | test_cfg=dict( 89 | pts=dict( 90 | use_rotate_nms=True, 91 | nms_across_levels=False, 92 | nms_pre=1000, 93 | nms_thr=0.2, 94 | score_thr=0.05, 95 | min_bbox_size=0, 96 | max_num=500))) 97 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.16, 0.16, 4] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=32, # max_points_per_voxel 7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000) # (training, testing) max_voxels 10 | ), 11 | voxel_encoder=dict( 12 | type='PillarFeatureNet', 13 | in_channels=4, 14 | feat_channels=[64], 15 | with_distance=False, 16 | voxel_size=voxel_size, 17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), 18 | middle_encoder=dict( 19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=64, 23 | layer_nums=[3, 5, 5], 24 | layer_strides=[2, 2, 2], 25 | out_channels=[64, 128, 256]), 26 | neck=dict( 27 | type='SECONDFPN', 28 | in_channels=[64, 128, 256], 29 | upsample_strides=[1, 2, 4], 30 | out_channels=[128, 128, 128]), 31 | bbox_head=dict( 32 | type='Anchor3DHead', 33 | num_classes=3, 34 | in_channels=384, 35 | feat_channels=384, 36 | use_direction_classifier=True, 37 | anchor_generator=dict( 38 | type='Anchor3DRangeGenerator', 39 | ranges=[ 40 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 41 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 42 | [0, -39.68, -1.78, 70.4, 39.68, -1.78], 43 | ], 44 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 45 | rotations=[0, 1.57], 46 | reshape_out=False), 47 | diff_rad_by_sin=True, 48 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 49 | loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | gamma=2.0, 53 | alpha=0.25, 54 | loss_weight=1.0), 55 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 56 | loss_dir=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 58 | # model training and testing settings 59 | train_cfg=dict( 60 | assigner=[ 61 | dict( # for Pedestrian 62 | type='MaxIoUAssigner', 63 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 64 | pos_iou_thr=0.5, 65 | neg_iou_thr=0.35, 66 | min_pos_iou=0.35, 67 | ignore_iof_thr=-1), 68 | dict( # for Cyclist 69 | type='MaxIoUAssigner', 70 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 71 | pos_iou_thr=0.5, 72 | neg_iou_thr=0.35, 73 | min_pos_iou=0.35, 74 | ignore_iof_thr=-1), 75 | dict( # for Car 76 | type='MaxIoUAssigner', 77 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 78 | pos_iou_thr=0.6, 79 | neg_iou_thr=0.45, 80 | min_pos_iou=0.45, 81 | ignore_iof_thr=-1), 82 | ], 83 | allowed_border=0, 84 | pos_weight=-1, 85 | debug=False), 86 | test_cfg=dict( 87 | use_rotate_nms=True, 88 | nms_across_levels=False, 89 | nms_thr=0.01, 90 | score_thr=0.1, 91 | min_bbox_size=0, 92 | nms_pre=100, 93 | max_num=50)) 94 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.32, 0.32, 6] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(32000, 32000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=5, 17 | feat_channels=[64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[1, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='SECONDFPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | in_channels=[64, 128, 256], 37 | upsample_strides=[1, 2, 4], 38 | out_channels=[128, 128, 128]), 39 | pts_bbox_head=dict( 40 | type='Anchor3DHead', 41 | num_classes=3, 42 | in_channels=384, 43 | feat_channels=384, 44 | use_direction_classifier=True, 45 | anchor_generator=dict( 46 | type='AlignedAnchor3DRangeGenerator', 47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], 48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], 49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]], 50 | sizes=[ 51 | [2.08, 4.73, 1.77], # car 52 | [0.84, 1.81, 1.77], # cyclist 53 | [0.84, 0.91, 1.74] # pedestrian 54 | ], 55 | rotations=[0, 1.57], 56 | reshape_out=False), 57 | diff_rad_by_sin=True, 58 | dir_offset=0.7854, # pi/4 59 | dir_limit_offset=0, 60 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 61 | loss_cls=dict( 62 | type='FocalLoss', 63 | use_sigmoid=True, 64 | gamma=2.0, 65 | alpha=0.25, 66 | loss_weight=1.0), 67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 68 | loss_dir=dict( 69 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | pts=dict( 73 | assigner=[ 74 | dict( # car 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.55, 78 | neg_iou_thr=0.4, 79 | min_pos_iou=0.4, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1), 88 | dict( # pedestrian 89 | type='MaxIoUAssigner', 90 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 91 | pos_iou_thr=0.5, 92 | neg_iou_thr=0.3, 93 | min_pos_iou=0.3, 94 | ignore_iof_thr=-1), 95 | ], 96 | allowed_border=0, 97 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 98 | pos_weight=-1, 99 | debug=False)), 100 | test_cfg=dict( 101 | pts=dict( 102 | use_rotate_nms=True, 103 | nms_across_levels=False, 104 | nms_pre=4096, 105 | nms_thr=0.25, 106 | score_thr=0.1, 107 | min_bbox_size=0, 108 | max_num=500))) 109 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.05, 0.05, 0.1] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=5, 7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000)), 10 | voxel_encoder=dict(type='HardSimpleVFE'), 11 | middle_encoder=dict( 12 | type='SparseEncoder', 13 | in_channels=4, 14 | sparse_shape=[41, 1600, 1408], 15 | order=('conv', 'norm', 'act')), 16 | backbone=dict( 17 | type='SECOND', 18 | in_channels=256, 19 | layer_nums=[5, 5], 20 | layer_strides=[1, 2], 21 | out_channels=[128, 256]), 22 | neck=dict( 23 | type='SECONDFPN', 24 | in_channels=[128, 256], 25 | upsample_strides=[1, 2], 26 | out_channels=[256, 256]), 27 | bbox_head=dict( 28 | type='Anchor3DHead', 29 | num_classes=3, 30 | in_channels=512, 31 | feat_channels=512, 32 | use_direction_classifier=True, 33 | anchor_generator=dict( 34 | type='Anchor3DRangeGenerator', 35 | ranges=[ 36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78], 39 | ], 40 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 41 | rotations=[0, 1.57], 42 | reshape_out=False), 43 | diff_rad_by_sin=True, 44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 52 | loss_dir=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | assigner=[ 57 | dict( # for Pedestrian 58 | type='MaxIoUAssigner', 59 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 60 | pos_iou_thr=0.35, 61 | neg_iou_thr=0.2, 62 | min_pos_iou=0.2, 63 | ignore_iof_thr=-1), 64 | dict( # for Cyclist 65 | type='MaxIoUAssigner', 66 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 67 | pos_iou_thr=0.35, 68 | neg_iou_thr=0.2, 69 | min_pos_iou=0.2, 70 | ignore_iof_thr=-1), 71 | dict( # for Car 72 | type='MaxIoUAssigner', 73 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 74 | pos_iou_thr=0.6, 75 | neg_iou_thr=0.45, 76 | min_pos_iou=0.45, 77 | ignore_iof_thr=-1), 78 | ], 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | test_cfg=dict( 83 | use_rotate_nms=True, 84 | nms_across_levels=False, 85 | nms_thr=0.01, 86 | score_thr=0.1, 87 | min_bbox_size=0, 88 | nms_pre=100, 89 | max_num=50)) 90 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.08, 0.08, 0.1] 7 | model = dict( 8 | type='VoxelNet', 9 | voxel_layer=dict( 10 | max_num_points=10, 11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(80000, 90000)), 14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 15 | middle_encoder=dict( 16 | type='SparseEncoder', 17 | in_channels=5, 18 | sparse_shape=[61, 1280, 1920], 19 | order=('conv', 'norm', 'act')), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=384, 23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 24 | layer_nums=[5, 5], 25 | layer_strides=[1, 2], 26 | out_channels=[128, 256]), 27 | neck=dict( 28 | type='SECONDFPN', 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | in_channels=[128, 256], 31 | upsample_strides=[1, 2], 32 | out_channels=[256, 256]), 33 | bbox_head=dict( 34 | type='Anchor3DHead', 35 | num_classes=3, 36 | in_channels=512, 37 | feat_channels=512, 38 | use_direction_classifier=True, 39 | anchor_generator=dict( 40 | type='AlignedAnchor3DRangeGenerator', 41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], 42 | [-76.8, -51.2, 0, 76.8, 51.2, 0], 43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], 44 | sizes=[ 45 | [2.08, 4.73, 1.77], # car 46 | [0.84, 0.91, 1.74], # pedestrian 47 | [0.84, 1.81, 1.77] # cyclist 48 | ], 49 | rotations=[0, 1.57], 50 | reshape_out=False), 51 | diff_rad_by_sin=True, 52 | dir_offset=0.7854, # pi/4 53 | dir_limit_offset=0, 54 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 55 | loss_cls=dict( 56 | type='FocalLoss', 57 | use_sigmoid=True, 58 | gamma=2.0, 59 | alpha=0.25, 60 | loss_weight=1.0), 61 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 62 | loss_dir=dict( 63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 64 | # model training and testing settings 65 | train_cfg=dict( 66 | assigner=[ 67 | dict( # car 68 | type='MaxIoUAssigner', 69 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 70 | pos_iou_thr=0.55, 71 | neg_iou_thr=0.4, 72 | min_pos_iou=0.4, 73 | ignore_iof_thr=-1), 74 | dict( # pedestrian 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1) 88 | ], 89 | allowed_border=0, 90 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 91 | pos_weight=-1, 92 | debug=False), 93 | test_cfg=dict( 94 | use_rotate_nms=True, 95 | nms_across_levels=False, 96 | nms_pre=4096, 97 | nms_thr=0.25, 98 | score_thr=0.1, 99 | min_bbox_size=0, 100 | max_num=500)) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/imvotenet_image.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImVoteNet', 3 | img_backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe'), 12 | img_neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | img_rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | img_roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=10, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | 55 | # model training and testing settings 56 | train_cfg=dict( 57 | img_rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | img_rpn_proposal=dict( 75 | nms_across_levels=False, 76 | nms_pre=2000, 77 | nms_post=1000, 78 | max_per_img=1000, 79 | nms=dict(type='nms', iou_threshold=0.7), 80 | min_bbox_size=0), 81 | img_rcnn=dict( 82 | assigner=dict( 83 | type='MaxIoUAssigner', 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.5, 86 | min_pos_iou=0.5, 87 | match_low_quality=False, 88 | ignore_iof_thr=-1), 89 | sampler=dict( 90 | type='RandomSampler', 91 | num=512, 92 | pos_fraction=0.25, 93 | neg_pos_ub=-1, 94 | add_gt_as_proposals=True), 95 | pos_weight=-1, 96 | debug=False)), 97 | test_cfg=dict( 98 | img_rpn=dict( 99 | nms_across_levels=False, 100 | nms_pre=1000, 101 | nms_post=1000, 102 | max_per_img=1000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | img_rcnn=dict( 106 | score_thr=0.05, 107 | nms=dict(type='nms', iou_threshold=0.5), 108 | max_per_img=100))) 109 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_across_levels=False, 90 | nms_pre=2000, 91 | nms_post=1000, 92 | max_num=1000, 93 | nms_thr=0.7, 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | assigner=dict( 97 | type='MaxIoUAssigner', 98 | pos_iou_thr=0.5, 99 | neg_iou_thr=0.5, 100 | min_pos_iou=0.5, 101 | match_low_quality=True, 102 | ignore_iof_thr=-1), 103 | sampler=dict( 104 | type='RandomSampler', 105 | num=512, 106 | pos_fraction=0.25, 107 | neg_pos_ub=-1, 108 | add_gt_as_proposals=True), 109 | mask_size=28, 110 | pos_weight=-1, 111 | debug=False)), 112 | test_cfg=dict( 113 | rpn=dict( 114 | nms_across_levels=False, 115 | nms_pre=1000, 116 | nms_post=1000, 117 | max_num=1000, 118 | nms_thr=0.7, 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.5), 123 | max_per_img=100, 124 | mask_thr_binary=0.5))) 125 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=9, # [xyz, rgb, normalized_xyz] 7 | num_points=(1024, 256, 64, 16), 8 | radius=(None, None, None, None), # use kNN instead of ball query 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d', momentum=0.1), 14 | sa_cfg=dict( 15 | type='PAConvSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False, 19 | paconv_num_kernels=[16, 16, 16], 20 | paconv_kernel_input='w_neighbor', 21 | scorenet_input='w_neighbor_dist', 22 | scorenet_cfg=dict( 23 | mlp_channels=[16, 16, 16], 24 | score_norm='softmax', 25 | temp_factor=1.0, 26 | last_bn=False))), 27 | decode_head=dict( 28 | type='PAConvHead', 29 | # PAConv model's decoder takes skip connections from beckbone 30 | # different from PointNet++, it also concats input features in the last 31 | # level of decoder, leading to `128 + 6` as the channel number 32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 33 | (128 + 6, 128, 128, 128)), 34 | channels=128, 35 | dropout_ratio=0.5, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=dict(type='BN1d'), 38 | act_cfg=dict(type='ReLU'), 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | class_weight=None, # should be modified with dataset 43 | loss_weight=1.0)), 44 | # correlation loss to regularize PAConv's kernel weights 45 | loss_regularization=dict( 46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='slide')) 50 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='VoteHead', 20 | vote_module_cfg=dict( 21 | in_channels=256, 22 | vote_per_seed=1, 23 | gt_per_seed=3, 24 | conv_channels=(256, 256), 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | norm_feats=True, 28 | vote_loss=dict( 29 | type='ChamferDistance', 30 | mode='l1', 31 | reduction='none', 32 | loss_dst_weight=10.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModule', 35 | num_point=256, 36 | radius=0.3, 37 | num_sample=16, 38 | mlp_channels=[256, 128, 128, 128], 39 | use_xyz=True, 40 | normalize_xyz=True), 41 | pred_layer_cfg=dict( 42 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 43 | conv_cfg=dict(type='Conv1d'), 44 | norm_cfg=dict(type='BN1d'), 45 | objectness_loss=dict( 46 | type='CrossEntropyLoss', 47 | class_weight=[0.2, 0.8], 48 | reduction='sum', 49 | loss_weight=5.0), 50 | center_loss=dict( 51 | type='ChamferDistance', 52 | mode='l2', 53 | reduction='sum', 54 | loss_src_weight=10.0, 55 | loss_dst_weight=10.0), 56 | dir_class_loss=dict( 57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 58 | dir_res_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 60 | size_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | size_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 64 | semantic_loss=dict( 65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 66 | # model training and testing settings 67 | train_cfg=dict( 68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 69 | test_cfg=dict( 70 | sample_mod='seed', 71 | nms_thr=0.25, 72 | score_thr=0.05, 73 | per_class_proposal=True)) 74 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the offcial AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 3 | from .core.bbox.match_costs import BBox3DL1Cost 4 | from .core.evaluation.eval_hooks import CustomDistEvalHook 5 | from .datasets.pipelines import ( 6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 7 | NormalizeMultiviewImage, CustomCollect3D) 8 | from .models.backbones.vovnet import VoVNet 9 | from .models.utils import * 10 | from .models.opt.adamw import AdamW2 11 | from .bevformer import * 12 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .dense_heads import * 3 | from .detectors import * 4 | from .modules import * 5 | from .runner import * 6 | from .hooks import * 7 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .train import custom_train_model 2 | from .mmdet_train import custom_train_detector 3 | # from .test import custom_multi_gpu_test -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/apis/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__pycache__/mmdet_train.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/apis/__pycache__/mmdet_train.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__pycache__/test.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/apis/__pycache__/test.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__pycache__/train.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/apis/__pycache__/train.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/train.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from .mmdet_train import custom_train_detector 8 | from mmseg.apis import train_segmentor 9 | from mmdet.apis import train_detector 10 | 11 | def custom_train_model(model, 12 | dataset, 13 | cfg, 14 | distributed=False, 15 | validate=False, 16 | timestamp=None, 17 | eval_model=None, 18 | meta=None): 19 | """A function wrapper for launching model training according to cfg. 20 | 21 | Because we need different eval_hook in runner. Should be deprecated in the 22 | future. 23 | """ 24 | if cfg.model.type in ['EncoderDecoder3D']: 25 | assert False 26 | else: 27 | custom_train_detector( 28 | model, 29 | dataset, 30 | cfg, 31 | distributed=distributed, 32 | validate=validate, 33 | timestamp=timestamp, 34 | eval_model=eval_model, 35 | meta=meta) 36 | 37 | 38 | def train_model(model, 39 | dataset, 40 | cfg, 41 | distributed=False, 42 | validate=False, 43 | timestamp=None, 44 | meta=None): 45 | """A function wrapper for launching model training according to cfg. 46 | 47 | Because we need different eval_hook in runner. Should be deprecated in the 48 | future. 49 | """ 50 | if cfg.model.type in ['EncoderDecoder3D']: 51 | train_segmentor( 52 | model, 53 | dataset, 54 | cfg, 55 | distributed=distributed, 56 | validate=validate, 57 | timestamp=timestamp, 58 | meta=meta) 59 | else: 60 | train_detector( 61 | model, 62 | dataset, 63 | cfg, 64 | distributed=distributed, 65 | validate=validate, 66 | timestamp=timestamp, 67 | meta=meta) 68 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_occ_head import BEVFormerOccHead 2 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/bevformer_occ_head.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/bevformer_occ_head.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_occ import BEVFormerOcc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformer_occ.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformer_occ.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom_hooks import TransferWeight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/__pycache__/custom_hooks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/custom_hooks.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class TransferWeight(Hook): 7 | 8 | def __init__(self, every_n_inters=1): 9 | self.every_n_inters=every_n_inters 10 | 11 | def after_train_iter(self, runner): 12 | if self.every_n_inner_iters(runner, self.every_n_inters): 13 | runner.eval_model.load_state_dict(runner.model.state_dict()) 14 | 15 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import PerceptionTransformer 2 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D 3 | from .occ_spatial_cross_attention import OccSpatialCrossAttention, OccMSDeformableAttention3D 4 | from .temporal_self_attention import TemporalSelfAttention 5 | from .encoder import BEVFormerEncoder, BEVFormerLayer, OccBEVFormerLayer 6 | from .decoder import DetectionTransformerDecoder 7 | from .transformer_occ import TransformerOcc 8 | from .positional_encoding import SinePositionalEncoding3D -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/custom_base_transformer_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/custom_base_transformer_layer.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/decoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/decoder.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/encoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/encoder.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/multi_scale_deformable_attn_function.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/multi_scale_deformable_attn_function.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/nus_param.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/nus_param.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/occ_spatial_cross_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/occ_spatial_cross_attention.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/positional_encoding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/positional_encoding.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/spatial_cross_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/spatial_cross_attention.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/temporal_self_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/temporal_self_attention.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/transformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/transformer.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__pycache__/transformer_occ.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/modules/__pycache__/transformer_occ.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .epoch_based_runner import EpochBasedRunner_video -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/runner/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/__pycache__/epoch_based_runner.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/bevformer/runner/__pycache__/epoch_based_runner.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # --------------------------------------------- 3 | # Modified by Zhiqi Li 4 | # --------------------------------------------- 5 | 6 | import os.path as osp 7 | import torch 8 | import mmcv 9 | from mmcv.runner.base_runner import BaseRunner 10 | from mmcv.runner.epoch_based_runner import EpochBasedRunner 11 | from mmcv.runner.builder import RUNNERS 12 | from mmcv.runner.checkpoint import save_checkpoint 13 | from mmcv.runner.utils import get_host_info 14 | from pprint import pprint 15 | from mmcv.parallel.data_container import DataContainer 16 | 17 | 18 | @RUNNERS.register_module() 19 | class EpochBasedRunner_video(EpochBasedRunner): 20 | 21 | ''' 22 | # basic logic 23 | 24 | input_sequence = [a, b, c] # given a sequence of samples 25 | 26 | prev_bev = None 27 | for each in input_sequcene[:-1] 28 | prev_bev = eval_model(each, prev_bev)) # inference only. 29 | 30 | model(input_sequcene[-1], prev_bev) # train the last sample. 31 | ''' 32 | 33 | def __init__(self, 34 | model, 35 | eval_model=None, 36 | batch_processor=None, 37 | optimizer=None, 38 | work_dir=None, 39 | logger=None, 40 | meta=None, 41 | keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], 42 | max_iters=None, 43 | max_epochs=None): 44 | super().__init__(model, 45 | batch_processor, 46 | optimizer, 47 | work_dir, 48 | logger, 49 | meta, 50 | max_iters, 51 | max_epochs) 52 | keys.append('img_metas') 53 | self.keys = keys 54 | self.eval_model = eval_model 55 | self.eval_model.eval() 56 | 57 | def run_iter(self, data_batch, train_mode, **kwargs): 58 | if self.batch_processor is not None: 59 | assert False 60 | # outputs = self.batch_processor( 61 | # self.model, data_batch, train_mode=train_mode, **kwargs) 62 | elif train_mode: 63 | 64 | num_samples = data_batch['img'].data[0].size(1) 65 | data_list = [] 66 | prev_bev = None 67 | for i in range(num_samples): 68 | data = {} 69 | for key in self.keys: 70 | if key not in ['img_metas', 'img', 'points']: 71 | data[key] = data_batch[key] 72 | else: 73 | if key == 'img': 74 | data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True) 75 | elif key == 'img_metas': 76 | data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only) 77 | else: 78 | assert False 79 | data_list.append(data) 80 | with torch.no_grad(): 81 | for i in range(num_samples-1): 82 | if data_list[i]['img_metas'].data[0][0]['prev_bev_exists']: 83 | data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 84 | prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs) 85 | if data_list[-1]['img_metas'].data[0][0]['prev_bev_exists']: 86 | data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 87 | outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs) 88 | else: 89 | assert False 90 | # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) 91 | 92 | if not isinstance(outputs, dict): 93 | raise TypeError('"batch_processor()" or "model.train_step()"' 94 | 'and "model.val_step()" must return a dict') 95 | if 'log_vars' in outputs: 96 | self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) 97 | self.outputs = outputs -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | 3 | __all__ = ['HungarianAssigner3D'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder 2 | 3 | __all__ = ['NMSFreeCoder'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core.bbox import BaseBBoxCoder 4 | from mmdet.core.bbox.builder import BBOX_CODERS 5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox 6 | import numpy as np 7 | 8 | 9 | @BBOX_CODERS.register_module() 10 | class NMSFreeCoder(BaseBBoxCoder): 11 | """Bbox coder for NMS-free detector. 12 | Args: 13 | pc_range (list[float]): Range of point cloud. 14 | post_center_range (list[float]): Limit of the center. 15 | Default: None. 16 | max_num (int): Max number to be kept. Default: 100. 17 | score_threshold (float): Threshold to filter boxes based on score. 18 | Default: None. 19 | code_size (int): Code size of bboxes. Default: 9 20 | """ 21 | 22 | def __init__(self, 23 | pc_range, 24 | voxel_size=None, 25 | post_center_range=None, 26 | max_num=100, 27 | score_threshold=None, 28 | num_classes=10): 29 | self.pc_range = pc_range 30 | self.voxel_size = voxel_size 31 | self.post_center_range = post_center_range 32 | self.max_num = max_num 33 | self.score_threshold = score_threshold 34 | self.num_classes = num_classes 35 | 36 | def encode(self): 37 | 38 | pass 39 | 40 | def decode_single(self, cls_scores, bbox_preds): 41 | """Decode bboxes. 42 | Args: 43 | cls_scores (Tensor): Outputs from the classification head, \ 44 | shape [num_query, cls_out_channels]. Note \ 45 | cls_out_channels should includes background. 46 | bbox_preds (Tensor): Outputs from the regression \ 47 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 48 | Shape [num_query, 9]. 49 | Returns: 50 | list[dict]: Decoded boxes. 51 | """ 52 | max_num = self.max_num 53 | 54 | cls_scores = cls_scores.sigmoid() 55 | scores, indexs = cls_scores.view(-1).topk(max_num) 56 | labels = indexs % self.num_classes 57 | bbox_index = indexs // self.num_classes 58 | bbox_preds = bbox_preds[bbox_index] 59 | 60 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 61 | final_scores = scores 62 | final_preds = labels 63 | 64 | # use score threshold 65 | if self.score_threshold is not None: 66 | thresh_mask = final_scores > self.score_threshold 67 | tmp_score = self.score_threshold 68 | while thresh_mask.sum() == 0: 69 | tmp_score *= 0.9 70 | if tmp_score < 0.01: 71 | thresh_mask = final_scores > -1 72 | break 73 | thresh_mask = final_scores >= tmp_score 74 | 75 | if self.post_center_range is not None: 76 | self.post_center_range = torch.tensor( 77 | self.post_center_range, device=scores.device) 78 | mask = (final_box_preds[..., :3] >= 79 | self.post_center_range[:3]).all(1) 80 | mask &= (final_box_preds[..., :3] <= 81 | self.post_center_range[3:]).all(1) 82 | 83 | if self.score_threshold: 84 | mask &= thresh_mask 85 | 86 | boxes3d = final_box_preds[mask] 87 | scores = final_scores[mask] 88 | 89 | labels = final_preds[mask] 90 | predictions_dict = { 91 | 'bboxes': boxes3d, 92 | 'scores': scores, 93 | 'labels': labels 94 | } 95 | 96 | else: 97 | raise NotImplementedError( 98 | 'Need to reorganize output as a batch, only ' 99 | 'support post_center_range is not None for now!') 100 | return predictions_dict 101 | 102 | def decode(self, preds_dicts): 103 | """Decode bboxes. 104 | Args: 105 | all_cls_scores (Tensor): Outputs from the classification head, \ 106 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 107 | cls_out_channels should includes background. 108 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 109 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 110 | Shape [nb_dec, bs, num_query, 9]. 111 | Returns: 112 | list[dict]: Decoded boxes. 113 | """ 114 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 115 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1] 116 | 117 | batch_size = all_cls_scores.size()[0] 118 | predictions_list = [] 119 | for i in range(batch_size): 120 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i])) 121 | return predictions_list 122 | 123 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost 3 | 4 | __all__ = ['build_match_cost', 'BBox3DL1Cost'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 3 | 4 | 5 | @MATCH_COST.register_module() 6 | class BBox3DL1Cost(object): 7 | """BBox3DL1Cost. 8 | Args: 9 | weight (int | float, optional): loss_weight 10 | """ 11 | 12 | def __init__(self, weight=1.): 13 | self.weight = weight 14 | 15 | def __call__(self, bbox_pred, gt_bboxes): 16 | """ 17 | Args: 18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 19 | (cx, cy, w, h), which are all in range [0, 1]. Shape 20 | [num_query, 4]. 21 | gt_bboxes (Tensor): Ground truth boxes with normalized 22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 23 | Returns: 24 | torch.Tensor: bbox_cost value with weight 25 | """ 26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 27 | return bbox_cost * self.weight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def normalize_bbox(bboxes, pc_range): 5 | 6 | cx = bboxes[..., 0:1] 7 | cy = bboxes[..., 1:2] 8 | cz = bboxes[..., 2:3] 9 | w = bboxes[..., 3:4].log() 10 | l = bboxes[..., 4:5].log() 11 | h = bboxes[..., 5:6].log() 12 | 13 | rot = bboxes[..., 6:7] 14 | if bboxes.size(-1) > 7: 15 | vx = bboxes[..., 7:8] 16 | vy = bboxes[..., 8:9] 17 | normalized_bboxes = torch.cat( 18 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1 19 | ) 20 | else: 21 | normalized_bboxes = torch.cat( 22 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1 23 | ) 24 | return normalized_bboxes 25 | 26 | def denormalize_bbox(normalized_bboxes, pc_range): 27 | # rotation 28 | rot_sine = normalized_bboxes[..., 6:7] 29 | 30 | rot_cosine = normalized_bboxes[..., 7:8] 31 | rot = torch.atan2(rot_sine, rot_cosine) 32 | 33 | # center in the bev 34 | cx = normalized_bboxes[..., 0:1] 35 | cy = normalized_bboxes[..., 1:2] 36 | cz = normalized_bboxes[..., 4:5] 37 | 38 | # size 39 | w = normalized_bboxes[..., 2:3] 40 | l = normalized_bboxes[..., 3:4] 41 | h = normalized_bboxes[..., 5:6] 42 | 43 | w = w.exp() 44 | l = l.exp() 45 | h = h.exp() 46 | if normalized_bboxes.size(-1) > 8: 47 | # velocity 48 | vx = normalized_bboxes[:, 8:9] 49 | vy = normalized_bboxes[:, 9:10] 50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 51 | else: 52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 53 | return denormalized_bboxes -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import CustomDistEvalHook -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/evaluation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/__pycache__/eval_hooks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/core/evaluation/__pycache__/eval_hooks.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | 2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, 3 | # in order to avoid strong version dependency, we did not directly 4 | # inherit EvalHook but BaseDistEvalHook. 5 | 6 | import bisect 7 | import os.path as osp 8 | 9 | import mmcv 10 | import torch.distributed as dist 11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 12 | from mmcv.runner import EvalHook as BaseEvalHook 13 | from torch.nn.modules.batchnorm import _BatchNorm 14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook 15 | 16 | 17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list): 18 | assert mmcv.is_list_of(dynamic_interval_list, tuple) 19 | 20 | dynamic_milestones = [0] 21 | dynamic_milestones.extend( 22 | [dynamic_interval[0] for dynamic_interval in dynamic_interval_list]) 23 | dynamic_intervals = [start_interval] 24 | dynamic_intervals.extend( 25 | [dynamic_interval[1] for dynamic_interval in dynamic_interval_list]) 26 | return dynamic_milestones, dynamic_intervals 27 | 28 | 29 | class CustomDistEvalHook(BaseDistEvalHook): 30 | 31 | def __init__(self, *args, dynamic_intervals=None, **kwargs): 32 | super(CustomDistEvalHook, self).__init__(*args, **kwargs) 33 | self.use_dynamic_intervals = dynamic_intervals is not None 34 | if self.use_dynamic_intervals: 35 | self.dynamic_milestones, self.dynamic_intervals = \ 36 | _calc_dynamic_intervals(self.interval, dynamic_intervals) 37 | 38 | def _decide_interval(self, runner): 39 | if self.use_dynamic_intervals: 40 | progress = runner.epoch if self.by_epoch else runner.iter 41 | step = bisect.bisect(self.dynamic_milestones, (progress + 1)) 42 | # Dynamically modify the evaluation interval 43 | self.interval = self.dynamic_intervals[step - 1] 44 | 45 | def before_train_epoch(self, runner): 46 | """Evaluate the model only at the start of training by epoch.""" 47 | self._decide_interval(runner) 48 | super().before_train_epoch(runner) 49 | 50 | def before_train_iter(self, runner): 51 | self._decide_interval(runner) 52 | super().before_train_iter(runner) 53 | 54 | def _do_evaluate(self, runner): 55 | """perform evaluation and save ckpt.""" 56 | # Synchronization of BatchNorm's buffer (running_mean 57 | # and running_var) is not supported in the DDP of pytorch, 58 | # which may cause the inconsistent performance of models in 59 | # different ranks, so we broadcast BatchNorm's buffers 60 | # of rank 0 to other ranks to avoid this. 61 | if self.broadcast_bn_buffer: 62 | model = runner.model 63 | for name, module in model.named_modules(): 64 | if isinstance(module, 65 | _BatchNorm) and module.track_running_stats: 66 | dist.broadcast(module.running_var, 0) 67 | dist.broadcast(module.running_mean, 0) 68 | 69 | if not self._should_evaluate(runner): 70 | return 71 | 72 | tmpdir = self.tmpdir 73 | if tmpdir is None: 74 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 75 | 76 | from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import 77 | 78 | results = custom_multi_gpu_test( 79 | runner.model, 80 | self.dataloader, 81 | tmpdir=tmpdir, 82 | gpu_collect=self.gpu_collect) 83 | if runner.rank == 0: 84 | print('\n') 85 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 86 | 87 | # key_score = self.evaluate(runner, results) 88 | eval_res = self.dataloader.dataset.evaluate(results, 89 | runner=runner) 90 | for name, val in eval_res.items(): 91 | runner.log_buffer.output[name] = val 92 | runner.log_buffer.ready = True 93 | 94 | # if self.save_best: 95 | # self._save_ckpt(runner, key_score) 96 | 97 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .nuscenes_dataset import CustomNuScenesDataset 2 | from .nuscenes_occ import NuSceneOcc 3 | from .builder import custom_build_dataset 4 | 5 | __all__ = [ 6 | 'CustomNuScenesDataset' 7 | ] 8 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/builder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/__pycache__/builder.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_occ.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_occ.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/nuscnes_eval.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/__pycache__/nuscnes_eval.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/occ_metrics.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/__pycache__/occ_metrics.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform_3d import ( 2 | PadMultiViewImage, NormalizeMultiviewImage, 3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage) 4 | from .formating import CustomDefaultFormatBundle3D 5 | from .loading import LoadOccGTFromFile 6 | __all__ = [ 7 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 8 | 'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D', 'RandomScaleImageMultiViewImage' 9 | ] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__pycache__/formating.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/formating.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__pycache__/loading.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/loading.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__pycache__/transform_3d.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/transform_3d.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import numpy as np 4 | from mmcv.parallel import DataContainer as DC 5 | 6 | from mmdet3d.core.bbox import BaseInstance3DBoxes 7 | from mmdet3d.core.points import BasePoints 8 | from mmdet.datasets.builder import PIPELINES 9 | from mmdet.datasets.pipelines import to_tensor 10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D 11 | 12 | @PIPELINES.register_module() 13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D): 14 | """Default formatting bundle. 15 | It simplifies the pipeline of formatting common fields for voxels, 16 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and 17 | "gt_semantic_seg". 18 | These fields are formatted as follows. 19 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 20 | - proposals: (1)to tensor, (2)to DataContainer 21 | - gt_bboxes: (1)to tensor, (2)to DataContainer 22 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer 23 | - gt_labels: (1)to tensor, (2)to DataContainer 24 | """ 25 | 26 | def __call__(self, results): 27 | """Call function to transform and format common fields in results. 28 | Args: 29 | results (dict): Result dict contains the data to convert. 30 | Returns: 31 | dict: The result dict contains the data that is formatted with 32 | default bundle. 33 | """ 34 | # Format 3D data 35 | results = super(CustomDefaultFormatBundle3D, self).__call__(results) 36 | results['gt_map_masks'] = DC( 37 | to_tensor(results['gt_map_masks']), stack=True) 38 | 39 | return results -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import random 3 | import mmcv 4 | from mmdet.datasets.builder import PIPELINES 5 | from mmcv.parallel import DataContainer as DC 6 | import os 7 | 8 | @PIPELINES.register_module() 9 | class LoadOccGTFromFile(object): 10 | """Load multi channel images from a list of separate channel files. 11 | 12 | Expects results['img_filename'] to be a list of filenames. 13 | note that we read image in BGR style to align with opencv.imread 14 | Args: 15 | to_float32 (bool): Whether to convert the img to float32. 16 | Defaults to False. 17 | color_type (str): Color type of the file. Defaults to 'unchanged'. 18 | """ 19 | 20 | def __init__( 21 | self, 22 | data_root, 23 | ): 24 | self.data_root = data_root 25 | 26 | def __call__(self, results): 27 | if 'occ_gt_path' in results: 28 | occ_gt_path = results['occ_gt_path'] 29 | occ_gt_path = os.path.join(self.data_root,occ_gt_path) 30 | 31 | occ_labels = np.load(occ_gt_path) 32 | semantics = occ_labels['semantics'] 33 | mask_lidar = occ_labels['mask_lidar'] 34 | mask_camera = occ_labels['mask_camera'] 35 | else: 36 | semantics = np.zeros((200,200,16),dtype=np.uint8) 37 | mask_lidar = np.zeros((200,200,16),dtype=np.uint8) 38 | mask_camera = np.zeros((200, 200, 16), dtype=np.uint8) 39 | 40 | results['voxel_semantics'] = semantics 41 | results['mask_lidar'] = mask_lidar 42 | results['mask_camera'] = mask_camera 43 | 44 | 45 | return results 46 | 47 | def __repr__(self): 48 | """str: Return a string that describes the module.""" 49 | return "{} (data_root={}')".format( 50 | self.__class__.__name__, self.data_root) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_sampler import DistributedGroupSampler 2 | from .distributed_sampler import DistributedSampler 3 | from .sampler import SAMPLER, build_sampler 4 | 5 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/samplers/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__pycache__/distributed_sampler.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/samplers/__pycache__/distributed_sampler.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__pycache__/group_sampler.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/samplers/__pycache__/group_sampler.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__pycache__/sampler.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/datasets/samplers/__pycache__/sampler.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | from .sampler import SAMPLER 6 | 7 | 8 | @SAMPLER.register_module() 9 | class DistributedSampler(_DistributedSampler): 10 | 11 | def __init__(self, 12 | dataset=None, 13 | num_replicas=None, 14 | rank=None, 15 | shuffle=True, 16 | seed=0): 17 | super().__init__( 18 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 19 | # for the compatibility from PyTorch 1.3+ 20 | self.seed = seed if seed is not None else 0 21 | 22 | def __iter__(self): 23 | # deterministically shuffle based on epoch 24 | if self.shuffle: 25 | assert False 26 | else: 27 | indices = torch.arange(len(self.dataset)).tolist() 28 | 29 | # add extra samples to make it evenly divisible 30 | # in case that indices is shorter than half of total_size 31 | indices = (indices * 32 | math.ceil(self.total_size / len(indices)))[:self.total_size] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | per_replicas = self.total_size//self.num_replicas 37 | # indices = indices[self.rank:self.total_size:self.num_replicas] 38 | indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas] 39 | assert len(indices) == self.num_samples 40 | 41 | return iter(indices) 42 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/group_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import math 4 | 5 | import numpy as np 6 | import torch 7 | from mmcv.runner import get_dist_info 8 | from torch.utils.data import Sampler 9 | from .sampler import SAMPLER 10 | import random 11 | from IPython import embed 12 | 13 | 14 | @SAMPLER.register_module() 15 | class DistributedGroupSampler(Sampler): 16 | """Sampler that restricts data loading to a subset of the dataset. 17 | It is especially useful in conjunction with 18 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 19 | process can pass a DistributedSampler instance as a DataLoader sampler, 20 | and load a subset of the original dataset that is exclusive to it. 21 | .. note:: 22 | Dataset is assumed to be of constant size. 23 | Arguments: 24 | dataset: Dataset used for sampling. 25 | num_replicas (optional): Number of processes participating in 26 | distributed training. 27 | rank (optional): Rank of the current process within num_replicas. 28 | seed (int, optional): random seed used to shuffle the sampler if 29 | ``shuffle=True``. This number should be identical across all 30 | processes in the distributed group. Default: 0. 31 | """ 32 | 33 | def __init__(self, 34 | dataset, 35 | samples_per_gpu=1, 36 | num_replicas=None, 37 | rank=None, 38 | seed=0): 39 | _rank, _num_replicas = get_dist_info() 40 | if num_replicas is None: 41 | num_replicas = _num_replicas 42 | if rank is None: 43 | rank = _rank 44 | self.dataset = dataset 45 | self.samples_per_gpu = samples_per_gpu 46 | self.num_replicas = num_replicas 47 | self.rank = rank 48 | self.epoch = 0 49 | self.seed = seed if seed is not None else 0 50 | 51 | assert hasattr(self.dataset, 'flag') 52 | self.flag = self.dataset.flag 53 | self.group_sizes = np.bincount(self.flag) 54 | 55 | self.num_samples = 0 56 | for i, j in enumerate(self.group_sizes): 57 | self.num_samples += int( 58 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 59 | self.num_replicas)) * self.samples_per_gpu 60 | self.total_size = self.num_samples * self.num_replicas 61 | 62 | def __iter__(self): 63 | # deterministically shuffle based on epoch 64 | g = torch.Generator() 65 | g.manual_seed(self.epoch + self.seed) 66 | 67 | indices = [] 68 | for i, size in enumerate(self.group_sizes): 69 | if size > 0: 70 | indice = np.where(self.flag == i)[0] 71 | assert len(indice) == size 72 | # add .numpy() to avoid bug when selecting indice in parrots. 73 | # TODO: check whether torch.randperm() can be replaced by 74 | # numpy.random.permutation(). 75 | indice = indice[list( 76 | torch.randperm(int(size), generator=g).numpy())].tolist() 77 | extra = int( 78 | math.ceil( 79 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 80 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 81 | # pad indice 82 | tmp = indice.copy() 83 | for _ in range(extra // size): 84 | indice.extend(tmp) 85 | indice.extend(tmp[:extra % size]) 86 | indices.extend(indice) 87 | 88 | assert len(indices) == self.total_size 89 | 90 | indices = [ 91 | indices[j] for i in list( 92 | torch.randperm( 93 | len(indices) // self.samples_per_gpu, generator=g)) 94 | for j in range(i * self.samples_per_gpu, (i + 1) * 95 | self.samples_per_gpu) 96 | ] 97 | 98 | # subsample 99 | offset = self.num_samples * self.rank 100 | indices = indices[offset:offset + self.num_samples] 101 | assert len(indices) == self.num_samples 102 | 103 | return iter(indices) 104 | 105 | def __len__(self): 106 | return self.num_samples 107 | 108 | def set_epoch(self, epoch): 109 | self.epoch = epoch 110 | 111 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils.registry import Registry, build_from_cfg 2 | 3 | SAMPLER = Registry('sampler') 4 | 5 | 6 | def build_sampler(cfg, default_args): 7 | return build_from_cfg(cfg, SAMPLER, default_args) 8 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vovnet import VoVNet 2 | 3 | __all__ = ['VoVNet'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import GradChecker -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class GradChecker(Hook): 7 | 8 | def after_train_iter(self, runner): 9 | for key, val in runner.model.named_parameters(): 10 | if val.grad == None and val.requires_grad: 11 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) 12 | 13 | 14 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | from .adamw import AdamW2 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/opt/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/__pycache__/adamw.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/opt/__pycache__/adamw.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .bricks import run_time 3 | from .grid_mask import GridMask 4 | from .position_embedding import RelPositionEmbedding 5 | from .visual import save_tensor 6 | from .positional_encoding import LearnedPositionalEncoding3D -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/bricks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/utils/__pycache__/bricks.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/position_embedding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/utils/__pycache__/position_embedding.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/positional_encoding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/utils/__pycache__/positional_encoding.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/visual.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/osp/573dc48a06a4d11e00a354c8cd833636ee91aaf9/projects/mmdet3d_plugin/models/utils/__pycache__/visual.cpython-37.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/bricks.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import time 3 | from collections import defaultdict 4 | import torch 5 | time_maps = defaultdict(lambda :0.) 6 | count_maps = defaultdict(lambda :0.) 7 | def run_time(name): 8 | def middle(fn): 9 | def wrapper(*args, **kwargs): 10 | torch.cuda.synchronize() 11 | start = time.time() 12 | res = fn(*args, **kwargs) 13 | torch.cuda.synchronize() 14 | time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start 15 | count_maps['%s : %s'%(name, fn.__name__) ] +=1 16 | print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] )) 17 | return res 18 | return wrapper 19 | return middle 20 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/grid_mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from PIL import Image 5 | from mmcv.runner import force_fp32, auto_fp16 6 | 7 | class Grid(object): 8 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 9 | self.use_h = use_h 10 | self.use_w = use_w 11 | self.rotate = rotate 12 | self.offset = offset 13 | self.ratio = ratio 14 | self.mode=mode 15 | self.st_prob = prob 16 | self.prob = prob 17 | 18 | def set_prob(self, epoch, max_epoch): 19 | self.prob = self.st_prob * epoch / max_epoch 20 | 21 | def __call__(self, img, label): 22 | if np.random.rand() > self.prob: 23 | return img, label 24 | h = img.size(1) 25 | w = img.size(2) 26 | self.d1 = 2 27 | self.d2 = min(h, w) 28 | hh = int(1.5*h) 29 | ww = int(1.5*w) 30 | d = np.random.randint(self.d1, self.d2) 31 | if self.ratio == 1: 32 | self.l = np.random.randint(1, d) 33 | else: 34 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 35 | mask = np.ones((hh, ww), np.float32) 36 | st_h = np.random.randint(d) 37 | st_w = np.random.randint(d) 38 | if self.use_h: 39 | for i in range(hh//d): 40 | s = d*i + st_h 41 | t = min(s+self.l, hh) 42 | mask[s:t,:] *= 0 43 | if self.use_w: 44 | for i in range(ww//d): 45 | s = d*i + st_w 46 | t = min(s+self.l, ww) 47 | mask[:,s:t] *= 0 48 | 49 | r = np.random.randint(self.rotate) 50 | mask = Image.fromarray(np.uint8(mask)) 51 | mask = mask.rotate(r) 52 | mask = np.asarray(mask) 53 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 54 | 55 | mask = torch.from_numpy(mask).float() 56 | if self.mode == 1: 57 | mask = 1-mask 58 | 59 | mask = mask.expand_as(img) 60 | if self.offset: 61 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 62 | offset = (1 - mask) * offset 63 | img = img * mask + offset 64 | else: 65 | img = img * mask 66 | 67 | return img, label 68 | 69 | 70 | class GridMask(nn.Module): 71 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 72 | super(GridMask, self).__init__() 73 | self.use_h = use_h 74 | self.use_w = use_w 75 | self.rotate = rotate 76 | self.offset = offset 77 | self.ratio = ratio 78 | self.mode = mode 79 | self.st_prob = prob 80 | self.prob = prob 81 | self.fp16_enable = False 82 | def set_prob(self, epoch, max_epoch): 83 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 84 | @auto_fp16() 85 | def forward(self, x): 86 | if np.random.rand() > self.prob or not self.training: 87 | return x 88 | n,c,h,w = x.size() 89 | x = x.view(-1,h,w) 90 | hh = int(1.5*h) 91 | ww = int(1.5*w) 92 | d = np.random.randint(2, h) 93 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 94 | mask = np.ones((hh, ww), np.float32) 95 | st_h = np.random.randint(d) 96 | st_w = np.random.randint(d) 97 | if self.use_h: 98 | for i in range(hh//d): 99 | s = d*i + st_h 100 | t = min(s+self.l, hh) 101 | mask[s:t,:] *= 0 102 | if self.use_w: 103 | for i in range(ww//d): 104 | s = d*i + st_w 105 | t = min(s+self.l, ww) 106 | mask[:,s:t] *= 0 107 | 108 | r = np.random.randint(self.rotate) 109 | mask = Image.fromarray(np.uint8(mask)) 110 | mask = mask.rotate(r) 111 | mask = np.asarray(mask) 112 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 113 | 114 | mask = torch.from_numpy(mask).to(x.dtype).cuda() 115 | if self.mode == 1: 116 | mask = 1-mask 117 | mask = mask.expand_as(x) 118 | if self.offset: 119 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda() 120 | x = x * mask + offset * (1 - mask) 121 | else: 122 | x = x * mask 123 | 124 | return x.view(n,c,h,w) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/position_embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | class RelPositionEmbedding(nn.Module): 6 | def __init__(self, num_pos_feats=64, pos_norm=True): 7 | super().__init__() 8 | self.num_pos_feats = num_pos_feats 9 | self.fc = nn.Linear(4, self.num_pos_feats,bias=False) 10 | #nn.init.orthogonal_(self.fc.weight) 11 | #self.fc.weight.requires_grad = False 12 | self.pos_norm = pos_norm 13 | if self.pos_norm: 14 | self.norm = nn.LayerNorm(self.num_pos_feats) 15 | def forward(self, tensor): 16 | #mask = nesttensor.mask 17 | B,C,H,W = tensor.shape 18 | #print('tensor.shape', tensor.shape) 19 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device) 20 | #y_axis = torch.stack((y_range, 1-y_range),dim=1) 21 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1) 22 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2) 23 | 24 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device) 25 | #x_axis =torch.stack((x_range,1-x_range),dim=1) 26 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1) 27 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2) 28 | x_pos = torch.cat((y_axis, x_axis), dim=1) 29 | x_pos = self.fc(x_pos) 30 | 31 | if self.pos_norm: 32 | x_pos = self.norm(x_pos) 33 | #print('xpos,', x_pos.max(),x_pos.min()) 34 | return x_pos -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/positional_encoding.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING 6 | from mmcv.runner import BaseModule 7 | 8 | 9 | @POSITIONAL_ENCODING.register_module() 10 | class LearnedPositionalEncoding3D(BaseModule): 11 | """Position embedding with learnable embedding weights. 12 | 13 | Args: 14 | num_feats (int): The feature dimension for each position 15 | along x-axis or y-axis. The final returned dimension for 16 | each position is 2 times of this value. 17 | row_num_embed (int, optional): The dictionary size of row embeddings. 18 | Default 50. 19 | col_num_embed (int, optional): The dictionary size of col embeddings. 20 | Default 50. 21 | init_cfg (dict or list[dict], optional): Initialization config dict. 22 | """ 23 | 24 | def __init__(self, 25 | num_feats, 26 | row_num_embed=50, 27 | col_num_embed=50, 28 | height_num_embed=50, 29 | init_cfg=dict(type='Uniform', layer='Embedding')): 30 | super(LearnedPositionalEncoding3D, self).__init__(init_cfg) 31 | self.row_embed = nn.Embedding(row_num_embed, num_feats) 32 | self.col_embed = nn.Embedding(col_num_embed, num_feats) 33 | self.height_embed = nn.Embedding(height_num_embed, num_feats) 34 | self.num_feats = num_feats 35 | self.row_num_embed = row_num_embed 36 | self.col_num_embed = col_num_embed 37 | self.height_num_embed = height_num_embed 38 | 39 | def forward(self, mask): 40 | """Forward function for `LearnedPositionalEncoding`. 41 | 42 | Args: 43 | mask (Tensor): ByteTensor mask. Non-zero values representing 44 | ignored positions, while zero values means valid positions 45 | for this image. Shape [bs, h, w]. 46 | 47 | Returns: 48 | pos (Tensor): Returned position embedding with shape 49 | [bs, num_feats*2, h, w]. 50 | """ 51 | l, h, w = mask.shape[-3:] 52 | x = torch.arange(w, device=mask.device) 53 | y = torch.arange(h, device=mask.device) 54 | z = torch.arange(l, device=mask.device) 55 | x_embed = self.col_embed(x) 56 | y_embed = self.row_embed(y) 57 | z_embed = self.height_embed(z) 58 | pos = torch.cat( 59 | (x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1), 60 | y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1), 61 | z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)),dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(mask.shape[0],1, 1, 1, 1) 62 | return pos 63 | 64 | def __repr__(self): 65 | """str: a string that describes the module""" 66 | repr_str = self.__class__.__name__ 67 | repr_str += f'(num_feats={self.num_feats}, ' 68 | repr_str += f'row_num_embed={self.row_num_embed}, ' 69 | repr_str += f'col_num_embed={self.col_num_embed})' 70 | repr_str += f'height_num_embed={self.height_num_embed})' 71 | return repr_str 72 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/visual.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.utils import make_grid 3 | import torchvision 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | 7 | 8 | def convert_color(img_path): 9 | plt.figure() 10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis')) 12 | plt.close() 13 | 14 | 15 | def save_tensor(tensor, path, pad_value=254.0,): 16 | print('save_tensor', path) 17 | tensor = tensor.to(torch.float).detach().cpu() 18 | if tensor.type() == 'torch.BoolTensor': 19 | tensor = tensor*255 20 | if len(tensor.shape) == 3: 21 | tensor = tensor.unsqueeze(1) 22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy() 23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path) 24 | convert_color(path) 25 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | export PORT=22228 3 | 4 | bash ./tools/dist_test.sh projects/configs/osp/osp.py \ 5 | your_work_dir/epoch_24.pth \ 6 | 1 ${PORT} 7 | -------------------------------------------------------------------------------- /tools/analysis_tools/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | from projects.mmdet3d_plugin.bevformer.detectors.bevformer_occ import BEVFormerOcc -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | import torch 5 | from mmcv import Config 6 | from mmcv.parallel import MMDataParallel 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model 8 | import sys 9 | sys.path.append('.') 10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader 11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset 12 | # from mmdet3d.datasets import build_dataloader, build_dataset 13 | from mmdet3d.models import build_detector 14 | #from tools.misc.fuse_conv_bn import fuse_module 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 19 | parser.add_argument('config', help='test config file path') 20 | parser.add_argument('--checkpoint', default=None, help='checkpoint file') 21 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 22 | parser.add_argument( 23 | '--log-interval', default=50, help='interval of logging') 24 | parser.add_argument( 25 | '--fuse-conv-bn', 26 | action='store_true', 27 | help='Whether to fuse conv and bn, this will slightly increase' 28 | 'the inference speed') 29 | args = parser.parse_args() 30 | return args 31 | 32 | 33 | def main(): 34 | args = parse_args() 35 | 36 | cfg = Config.fromfile(args.config) 37 | # set cudnn_benchmark 38 | if cfg.get('cudnn_benchmark', False): 39 | torch.backends.cudnn.benchmark = True 40 | cfg.model.pretrained = None 41 | cfg.data.test.test_mode = True 42 | 43 | # build the dataloader 44 | # TODO: support multiple images per gpu (only minor changes are needed) 45 | print(cfg.data.test) 46 | dataset = custom_build_dataset(cfg.data.test) 47 | data_loader = build_dataloader( 48 | dataset, 49 | samples_per_gpu=1, 50 | workers_per_gpu=cfg.data.workers_per_gpu, 51 | dist=False, 52 | shuffle=False) 53 | 54 | # build the model and load checkpoint 55 | cfg.model.train_cfg = None 56 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 57 | fp16_cfg = cfg.get('fp16', None) 58 | if fp16_cfg is not None: 59 | wrap_fp16_model(model) 60 | if args.checkpoint is not None: 61 | load_checkpoint(model, args.checkpoint, map_location='cpu') 62 | #if args.fuse_conv_bn: 63 | # model = fuse_module(model) 64 | 65 | model = MMDataParallel(model, device_ids=[0]) 66 | 67 | model.eval() 68 | 69 | # the first several iterations may be very slow so skip them 70 | num_warmup = 5 71 | pure_inf_time = 0 72 | 73 | # benchmark with several samples and take the average 74 | for i, data in enumerate(data_loader): 75 | torch.cuda.synchronize() 76 | start_time = time.perf_counter() 77 | with torch.no_grad(): 78 | model(return_loss=False, rescale=True, **data) 79 | 80 | torch.cuda.synchronize() 81 | elapsed = time.perf_counter() - start_time 82 | 83 | if i >= num_warmup: 84 | pure_inf_time += elapsed 85 | if (i + 1) % args.log_interval == 0: 86 | fps = (i + 1 - num_warmup) / pure_inf_time 87 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 88 | f'fps: {fps:.1f} img / s') 89 | 90 | if (i + 1) == args.samples: 91 | pure_inf_time += elapsed 92 | fps = (i + 1 - num_warmup) / pure_inf_time 93 | print(f'Overall fps: {fps:.1f} img / s') 94 | break 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import sys 5 | sys.path.append('.') 6 | 7 | import numpy as np 8 | import torch 9 | from mmcv import Config, DictAction 10 | 11 | # from mmdet.models import build_detector 12 | from mmdet3d.models import build_model 13 | 14 | try: 15 | from mmcv.cnn import get_model_complexity_info 16 | except ImportError: 17 | raise ImportError('Please upgrade mmcv to >0.6.2') 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description='Train a detector') 22 | parser.add_argument('config', help='train config file path') 23 | parser.add_argument( 24 | '--shape', 25 | type=int, 26 | nargs='+', 27 | # default=[1280, 800], 28 | default=[900,1600], 29 | 30 | help='input image size') 31 | parser.add_argument( 32 | '--cfg-options', 33 | nargs='+', 34 | action=DictAction, 35 | help='override some settings in the used config, the key-value pair ' 36 | 'in xxx=yyy format will be merged into config file. If the value to ' 37 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 38 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 39 | 'Note that the quotation marks are necessary and that no white space ' 40 | 'is allowed.') 41 | parser.add_argument( 42 | '--size-divisor', 43 | type=int, 44 | default=32, 45 | help='Pad the input image, the minimum size that is divisible ' 46 | 'by size_divisor, -1 means do not pad the image.') 47 | args = parser.parse_args() 48 | return args 49 | 50 | 51 | def main(): 52 | 53 | args = parse_args() 54 | 55 | if len(args.shape) == 1: 56 | h = w = args.shape[0] 57 | elif len(args.shape) == 2: 58 | h, w = args.shape 59 | else: 60 | raise ValueError('invalid input shape') 61 | ori_shape = (3, h, w) 62 | divisor = args.size_divisor 63 | if divisor > 0: 64 | h = int(np.ceil(h / divisor)) * divisor 65 | w = int(np.ceil(w / divisor)) * divisor 66 | 67 | input_shape = (3, h, w) 68 | 69 | cfg = Config.fromfile(args.config) 70 | if args.cfg_options is not None: 71 | cfg.merge_from_dict(args.cfg_options) 72 | 73 | # import modules from plguin/xx, registry will be updated 74 | if hasattr(cfg, 'plugin'): 75 | if cfg.plugin: 76 | import importlib 77 | if hasattr(cfg, 'plugin_dir'): 78 | plugin_dir = cfg.plugin_dir 79 | _module_dir = os.path.dirname(plugin_dir) 80 | _module_dir = _module_dir.split('/') 81 | _module_path = _module_dir[0] 82 | 83 | for m in _module_dir[1:]: 84 | _module_path = _module_path + '.' + m 85 | print(_module_path) 86 | plg_lib = importlib.import_module(_module_path) 87 | else: 88 | # import dir is the dirpath for the config file 89 | _module_dir = os.path.dirname(args.config) 90 | _module_dir = _module_dir.split('/') 91 | _module_path = _module_dir[0] 92 | for m in _module_dir[1:]: 93 | _module_path = _module_path + '.' + m 94 | print(_module_path) 95 | plg_lib = importlib.import_module(_module_path) 96 | 97 | from projects.mmdet3d_plugin.bevformer.apis.train import custom_train_model 98 | 99 | model = build_model( 100 | cfg.model, 101 | train_cfg=cfg.get('train_cfg'), 102 | test_cfg=cfg.get('test_cfg')) 103 | if torch.cuda.is_available(): 104 | model.cuda() 105 | model.eval() 106 | 107 | 108 | if hasattr(model, 'forward_dummy'): 109 | model.forward = model.forward_dummy 110 | else: 111 | raise NotImplementedError( 112 | 'FLOPs counter is currently not currently supported with {}'. 113 | format(model.__class__.__name__)) 114 | 115 | flops, params = get_model_complexity_info(model, input_shape) 116 | split_line = '=' * 30 117 | 118 | if divisor > 0 and \ 119 | input_shape != ori_shape: 120 | print(f'{split_line}\nUse size divisor set input shape ' 121 | f'from {ori_shape} to {input_shape}\n') 122 | print(f'{split_line}\nInput shape: {input_shape}\n' 123 | f'Flops: {flops}\nParams: {params}\n{split_line}') 124 | print('!!!Please be cautious if you use the results in papers. ' 125 | 'You may need to check if all ops are supported and verify that the ' 126 | 'flops computation is correct.') 127 | 128 | 129 | if __name__ == '__main__': 130 | main() -------------------------------------------------------------------------------- /tools/analysis_tools/get_params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | file_path = './ckpts/bevformer_v4.pth' 3 | file_path = '/horizon-bucket/SD_Algorithm/12_perception_bev_hde/02_user/yiang.shi/flexible_occ_log/v1_298/cluster/cluster_v1_298_baselinedebug_addstage/epoch_24.pth' 4 | model = torch.load(file_path, map_location='cpu') 5 | all = 0 6 | for key in list(model['state_dict'].keys()): 7 | all += model['state_dict'][key].nelement() 8 | print(all) 9 | 10 | # smaller 63374123 11 | # v4 69140395 12 | # bev_baseline : 58926586 13 | # v1_298: 51027024 14 | -------------------------------------------------------------------------------- /tools/create_data.sh: -------------------------------------------------------------------------------- 1 | export nuScene_root="/horizon-bucket/aidi_public_data/nuScenes/origin" 2 | # export dataroot="/horizon-bucket/SD_Algorithm/12_perception_bev_hde/02_user/yiang.shi/dataset/Occpancy3D-nuScenes-V1.0" 3 | export dataroot="/horizon-bucket/SD_Algorithm/12_perception_bev_hde/02_user/yiang.shi/dataset/occ3d_nus_v1.0" 4 | export canbus="/horizon-bucket/SD_Algorithm/12_perception_bev_hde/02_user/yiang.shi/dataset/Occpancy3D-nuScenes-V1.0" 5 | export version="v1.0-trainval" 6 | # export version="v1.0-mini" 7 | 8 | # export datamini=${dataroot}/${version} 9 | 10 | 11 | python tools/create_data.py occ \ 12 | --root-path ${nuScene_root} \ 13 | --out-dir ${dataroot} --extra-tag occtrainval \ 14 | --canbus ${canbus} \ 15 | --version ${version} \ 16 | --occ-path ${dataroot} 17 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /tools/dist_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | # GPUS=$3 6 | # PORT=${PORT:-28500} 7 | # PORT=$4 8 | 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | # python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 12 | # $(dirname "$0")/benchmark.py $CONFIG $CHECKPOINT 13 | # --launcher pytorch ${@:4} 14 | python $(dirname "$0")/benchmark.py $CONFIG $CHECKPOINT -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=$4 7 | 8 | # PORT=${PORT:-29500}s 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 12 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:5} --eval bbox 13 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | # PORT=${PORT:-28509} 6 | PORT=$3 7 | 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 11 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:4} --deterministic 12 | -------------------------------------------------------------------------------- /tools/fp16/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28508} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from mmcv.runner import save_checkpoint 5 | from torch import nn as nn 6 | 7 | from mmdet.apis import init_model 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """During inference, the functionary of batch norm layers is turned off but 12 | only the mean and var alone channels are used, which exposes the chance to 13 | fuse it with the preceding conv layers to save computations and simplify 14 | network structures.""" 15 | conv_w = conv.weight 16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 17 | bn.running_mean) 18 | 19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 20 | conv.weight = nn.Parameter(conv_w * 21 | factor.reshape([conv.out_channels, 1, 1, 1])) 22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 23 | return conv 24 | 25 | 26 | def fuse_module(m): 27 | last_conv = None 28 | last_conv_name = None 29 | 30 | for name, child in m.named_children(): 31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 32 | if last_conv is None: # only fuse BN that is after Conv 33 | continue 34 | fused_conv = fuse_conv_bn(last_conv, child) 35 | m._modules[last_conv_name] = fused_conv 36 | # To reduce changes, set BN as Identity instead of deleting it. 37 | m._modules[name] = nn.Identity() 38 | last_conv = None 39 | elif isinstance(child, nn.Conv2d): 40 | last_conv = child 41 | last_conv_name = name 42 | else: 43 | fuse_module(child) 44 | return m 45 | 46 | 47 | def parse_args(): 48 | parser = argparse.ArgumentParser( 49 | description='fuse Conv and BN layers in a model') 50 | parser.add_argument('config', help='config file path') 51 | parser.add_argument('checkpoint', help='checkpoint file path') 52 | parser.add_argument('out', help='output path of the converted model') 53 | args = parser.parse_args() 54 | return args 55 | 56 | 57 | def main(): 58 | args = parse_args() 59 | # build the model from a config file and a checkpoint file 60 | model = init_model(args.config, args.checkpoint) 61 | # fuse conv and bn layers of the model 62 | fused_model = fuse_module(model) 63 | save_checkpoint(fused_model, args.out) 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import mmcv 4 | from mmcv import Config 5 | 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 26 | subprocess.Popen(['mv', out_file, final_file]) 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | process_checkpoint(args.in_file, args.out_file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from collections import OrderedDict 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0,1,2 2 | export WORKDIR="./work_dirs" 3 | export PORT=29990 4 | 5 | bash ./tools/dist_train.sh projects/configs/osp/osp_minibatch.py 3 ${PORT} \ 6 | --work-dir ${WORKDIR} --------------------------------------------------------------------------------