├── .dev_scripts
    ├── gather_models.py
    ├── gen_benchmark_script.py
    ├── linter.sh
    ├── test_benchmark.sh
    └── train_benchmark.sh
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── error-report.md
    │   ├── feature_request.md
    │   ├── general_questions.md
    │   └── reimplementation_questions.md
    ├── pull_request_template.md
    └── workflows
    │   ├── build.yml
    │   ├── deploy.yml
    │   ├── lint.yml
    │   └── test_mim.yml
├── .gitignore
├── 3rd-party-licenses.txt
├── LICENSE
├── README.md
├── assets
    ├── scene-0038_gt.mp4
    ├── scene-0107_gt.mp4
    ├── scene-0558.mp4
    └── scene-0916.mp4
├── configs
    ├── _base_
    │   ├── datasets
    │   │   ├── coco_instance.py
    │   │   ├── kitti-3d-3class.py
    │   │   ├── kitti-3d-car.py
    │   │   ├── kitti-mono3d.py
    │   │   ├── lyft-3d.py
    │   │   ├── nuim_instance.py
    │   │   ├── nus-3d.py
    │   │   ├── nus-mono3d.py
    │   │   ├── range100_lyft-3d.py
    │   │   ├── s3dis-3d-5class.py
    │   │   ├── s3dis_seg-3d-13class.py
    │   │   ├── scannet-3d-18class.py
    │   │   ├── scannet_seg-3d-20class.py
    │   │   ├── sunrgbd-3d-10class.py
    │   │   ├── waymoD5-3d-3class.py
    │   │   └── waymoD5-3d-car.py
    │   ├── default_runtime.py
    │   ├── models
    │   │   ├── 3dssd.py
    │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   ├── centerpoint_01voxel_second_secfpn_nus.py
    │   │   ├── centerpoint_02pillar_second_secfpn_nus.py
    │   │   ├── dgcnn.py
    │   │   ├── fcaf3d.py
    │   │   ├── fcos3d.py
    │   │   ├── groupfree3d.py
    │   │   ├── h3dnet.py
    │   │   ├── hv_pointpillars_fpn_lyft.py
    │   │   ├── hv_pointpillars_fpn_nus.py
    │   │   ├── hv_pointpillars_fpn_range100_lyft.py
    │   │   ├── hv_pointpillars_secfpn_kitti.py
    │   │   ├── hv_pointpillars_secfpn_waymo.py
    │   │   ├── hv_second_secfpn_kitti.py
    │   │   ├── hv_second_secfpn_waymo.py
    │   │   ├── imvotenet_image.py
    │   │   ├── mask_rcnn_r50_fpn.py
    │   │   ├── paconv_cuda_ssg.py
    │   │   ├── paconv_ssg.py
    │   │   ├── parta2.py
    │   │   ├── pgd.py
    │   │   ├── point_rcnn.py
    │   │   ├── pointnet2_msg.py
    │   │   ├── pointnet2_ssg.py
    │   │   ├── smoke.py
    │   │   └── votenet.py
    │   └── schedules
    │   │   ├── cosine.py
    │   │   ├── cyclic_20e.py
    │   │   ├── mmdet_schedule_1x.py
    │   │   ├── schedule_2x.py
    │   │   ├── schedule_3x.py
    │   │   ├── seg_cosine_100e.py
    │   │   ├── seg_cosine_150e.py
    │   │   ├── seg_cosine_200e.py
    │   │   └── seg_cosine_50e.py
    └── occflownet
    │   ├── occflownet_stbase_2d.py
    │   ├── occflownet_stbase_2d_flow.py
    │   └── occflownet_stbase_2d_flow_3d.py
├── mmdet3d
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── inference.py
    │   ├── test.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   └── anchor_3d_generator.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assigners
    │   │   │   └── __init__.py
    │   │   ├── box_np_ops.py
    │   │   ├── coders
    │   │   │   ├── __init__.py
    │   │   │   ├── anchor_free_bbox_coder.py
    │   │   │   ├── centerpoint_bbox_coders.py
    │   │   │   ├── delta_xyzwhlr_bbox_coder.py
    │   │   │   ├── fcos3d_bbox_coder.py
    │   │   │   ├── groupfree3d_bbox_coder.py
    │   │   │   ├── monoflex_bbox_coder.py
    │   │   │   ├── partial_bin_based_bbox_coder.py
    │   │   │   ├── pgd_bbox_coder.py
    │   │   │   ├── point_xyzwhlr_bbox_coder.py
    │   │   │   └── smoke_bbox_coder.py
    │   │   ├── iou_calculators
    │   │   │   ├── __init__.py
    │   │   │   └── iou3d_calculator.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   └── iou_neg_piecewise_sampler.py
    │   │   ├── structures
    │   │   │   ├── __init__.py
    │   │   │   ├── base_box3d.py
    │   │   │   ├── box_3d_mode.py
    │   │   │   ├── cam_box3d.py
    │   │   │   ├── coord_3d_mode.py
    │   │   │   ├── depth_box3d.py
    │   │   │   ├── lidar_box3d.py
    │   │   │   └── utils.py
    │   │   └── transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── indoor_eval.py
    │   │   ├── instance_seg_eval.py
    │   │   ├── kitti_utils
    │   │   │   ├── __init__.py
    │   │   │   ├── eval.py
    │   │   │   └── rotate_iou.py
    │   │   ├── lyft_eval.py
    │   │   ├── scannet_utils
    │   │   │   ├── __init__.py
    │   │   │   ├── evaluate_semantic_instance.py
    │   │   │   └── util_3d.py
    │   │   ├── seg_eval.py
    │   │   └── waymo_utils
    │   │   │   ├── __init__.py
    │   │   │   └── prediction_kitti_to_waymo.py
    │   ├── hook
    │   │   ├── __init__.py
    │   │   ├── customcheckpoint.py
    │   │   ├── ema.py
    │   │   ├── sequentialcontrol.py
    │   │   ├── syncbncontrol.py
    │   │   └── utils.py
    │   ├── points
    │   │   ├── __init__.py
    │   │   ├── base_points.py
    │   │   ├── cam_points.py
    │   │   ├── depth_points.py
    │   │   └── lidar_points.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── box3d_nms.py
    │   │   └── merge_augs.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── array_converter.py
    │   │   └── gaussian.py
    │   ├── visualizer
    │   │   ├── __init__.py
    │   │   ├── image_vis.py
    │   │   ├── open3d_vis.py
    │   │   └── show_result.py
    │   └── voxel
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   └── voxel_generator.py
    ├── datasets
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── custom_3d.py
    │   ├── dataset_wrappers.py
    │   ├── nuscenes_dataset.py
    │   ├── nuscenes_dataset_occ.py
    │   ├── occ_metrics.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── compose.py
    │   │   ├── data_augment_utils.py
    │   │   ├── dbsampler.py
    │   │   ├── formating.py
    │   │   ├── loading.py
    │   │   ├── test_time_aug.py
    │   │   └── transforms_3d.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── resnet.py
    │   │   └── swin.py
    │   ├── builder.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── bevdet.py
    │   │   ├── centerpoint.py
    │   │   ├── mvx_two_stage.py
    │   │   └── occflownet.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── axis_aligned_iou_loss.py
    │   │   ├── bce_loss.py
    │   │   ├── chamfer_distance.py
    │   │   ├── dist_loss.py
    │   │   ├── huber_loss.py
    │   │   ├── multibin_loss.py
    │   │   ├── paconv_regularization_loss.py
    │   │   ├── rotated_iou_loss.py
    │   │   ├── silog_loss.py
    │   │   ├── tv_loss.py
    │   │   └── uncertain_smooth_l1_loss.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   ├── fpn.py
    │   │   ├── lss_fpn.py
    │   │   ├── second_fpn.py
    │   │   └── view_transformer.py
    │   └── occflownet_modules
    │   │   ├── __init__.py
    │   │   ├── hooks.py
    │   │   ├── nerf_decoder.py
    │   │   ├── renderer.py
    │   │   ├── samplers.py
    │   │   └── utils.py
    ├── ops
    │   ├── __init__.py
    │   ├── bev_pool_v2
    │   │   ├── __init__.py
    │   │   ├── bev_pool.py
    │   │   └── src
    │   │   │   ├── bev_pool.cpp
    │   │   │   └── bev_pool_cuda.cu
    │   ├── dgcnn_modules
    │   │   ├── __init__.py
    │   │   ├── dgcnn_fa_module.py
    │   │   ├── dgcnn_fp_module.py
    │   │   └── dgcnn_gf_module.py
    │   ├── norm.py
    │   ├── paconv
    │   │   ├── __init__.py
    │   │   ├── paconv.py
    │   │   └── utils.py
    │   ├── pointnet_modules
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   ├── paconv_sa_module.py
    │   │   ├── point_fp_module.py
    │   │   └── point_sa_module.py
    │   ├── sparse_block.py
    │   └── spconv
    │   │   ├── __init__.py
    │   │   └── overwrite_spconv
    │   │       ├── __init__.py
    │   │       └── write_spconv2.py
    ├── utils
    │   ├── __init__.py
    │   ├── camera.py
    │   ├── collect_env.py
    │   ├── comm.py
    │   ├── compat_cfg.py
    │   ├── constants.py
    │   ├── env.py
    │   ├── logger.py
    │   ├── misc.py
    │   └── setup_env.py
    └── version.py
├── overview.png
├── requirements.txt
├── requirements
    ├── build.txt
    ├── docs.txt
    ├── mminstall.txt
    ├── optional.txt
    ├── readthedocs.txt
    ├── runtime.txt
    └── tests.txt
├── setup.cfg
├── setup.py
└── tools
    ├── create_data_bevdet.py
    ├── create_flow_data.py
    ├── data_converter
        ├── __init__.py
        ├── create_gt_database.py
        └── nuscenes_converter.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── test.py
    └── train.py


/.dev_scripts/linter.sh:
--------------------------------------------------------------------------------
1 | yapf -r -i mmdet3d/ configs/ tests/ tools/
2 | isort mmdet3d/ configs/ tests/ tools/
3 | flake8 .
4 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | - Using welcoming and inclusive language
18 | - Being respectful of differing viewpoints and experiences
19 | - Gracefully accepting constructive criticism
20 | - Focusing on what is best for the community
21 | - Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | - The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | - Trolling, insulting/derogatory comments, and personal or political attacks
28 | - Public or private harassment
29 | - Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | - Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at chenkaidev@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | For answers to common questions about this code of conduct, see
74 | https://www.contributor-covenant.org/faq
75 | 
76 | [homepage]: https://www.contributor-covenant.org
77 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | We appreciate all contributions to improve MMDetection3D. Please refer to [CONTRIBUTING.md](https://github.com/open-mmlab/mmcv/blob/master/CONTRIBUTING.md) in MMCV for more details about the contributing guideline.
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/error-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Error report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | Thanks for your error report and we appreciate it a lot.
10 | 
11 | **Checklist**
12 | 
13 | 1. I have searched related issues but cannot get the expected help.
14 | 2. The bug has not been fixed in the latest version.
15 | 
16 | **Describe the bug**
17 | A clear and concise description of what the bug is.
18 | 
19 | **Reproduction**
20 | 
21 | 1. What command or script did you run?
22 | 
23 | ```
24 | A placeholder for the command.
25 | ```
26 | 
27 | 2. Did you make any modifications on the code or config? Did you understand what you have modified?
28 | 3. What dataset did you use?
29 | 
30 | **Environment**
31 | 
32 | 1. Please run `python mmdet3d/utils/collect_env.py` to collect necessary environment information and paste it here.
33 | 2. You may add addition that may be helpful for locating the problem, such as
34 |    - How you installed PyTorch \[e.g., pip, conda, source\]
35 |    - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
36 | 
37 | **Error traceback**
38 | If applicable, paste the error trackback here.
39 | 
40 | ```
41 | A placeholder for trackback.
42 | ```
43 | 
44 | **Bug fix**
45 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
46 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | **Describe the feature**
10 | 
11 | **Motivation**
12 | A clear and concise description of the motivation of the feature.
13 | Ex1. It is inconvenient when \[....\].
14 | Ex2. There is a recent paper \[....\], which is very helpful for \[....\].
15 | 
16 | **Related resources**
17 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general_questions.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: General questions
3 | about: Ask general questions to get help
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 | ---
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/reimplementation_questions.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Reimplementation Questions
 3 | about: Ask about questions during model reimplementation
 4 | title: ''
 5 | labels: reimplementation
 6 | assignees: ''
 7 | ---
 8 | 
 9 | **Notice**
10 | 
11 | There are several common situations in the reimplementation issues as below
12 | 
13 | 1. Reimplement a model in the model zoo using the provided configs
14 | 2. Reimplement a model in the model zoo on other dataset (e.g., custom datasets)
15 | 3. Reimplement a custom model but all the components are implemented in MMDetection3D
16 | 4. Reimplement a custom model with new modules implemented by yourself
17 | 
18 | There are several things to do for different cases as below.
19 | 
20 | - For case 1 & 3, please follow the steps in the following sections thus we could help to quick identify the issue.
21 | - For case 2 & 4, please understand that we are not able to do much help here because we usually do not know the full code and the users should be responsible to the code they write.
22 | - One suggestion for case 2 & 4 is that the users should first check whether the bug lies in the self-implemted code or the original code. For example, users can first make sure that the same model runs well on supported datasets. If you still need help, please describe what you have done and what you obtain in the issue, and follow the steps in the following sections and try as clear as possible so that we can better help you.
23 | 
24 | **Checklist**
25 | 
26 | 1. I have searched related issues but cannot get the expected help.
27 | 2. The issue has not been fixed in the latest version.
28 | 
29 | **Describe the issue**
30 | 
31 | A clear and concise description of what the problem you meet and what have you done.
32 | 
33 | **Reproduction**
34 | 
35 | 1. What command or script did you run?
36 | 
37 | ```
38 | A placeholder for the command.
39 | ```
40 | 
41 | 2. What config dir you run?
42 | 
43 | ```
44 | A placeholder for the config.
45 | ```
46 | 
47 | 3. Did you make any modifications on the code or config? Did you understand what you have modified?
48 | 4. What dataset did you use?
49 | 
50 | **Environment**
51 | 
52 | 1. Please run `python mmdet3d/utils/collect_env.py` to collect necessary environment information and paste it here.
53 | 2. You may add addition that may be helpful for locating the problem, such as
54 |    - How you installed PyTorch \[e.g., pip, conda, source\]
55 |    - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
56 | 
57 | **Results**
58 | 
59 | If applicable, paste the related results here, e.g., what you expect and what you get.
60 | 
61 | ```
62 | A placeholder for results comparison
63 | ```
64 | 
65 | **Issue fix**
66 | 
67 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
68 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | Thanks for your contribution and we appreciate it a lot. The following instructions would make your pull request more healthy and more easily get feedback. If you do not understand some items, don't worry, just make the pull request and seek help from maintainers.
 2 | 
 3 | ## Motivation
 4 | 
 5 | Please describe the motivation of this PR and the goal you want to achieve through this PR.
 6 | 
 7 | ## Modification
 8 | 
 9 | Please briefly describe what modification is made in this PR.
10 | 
11 | ## BC-breaking (Optional)
12 | 
13 | Does the modification introduce changes that break the back-compatibility of the downstream repos?
14 | If so, please describe how it breaks the compatibility and how the downstream projects should modify their code to keep compatibility with this PR.
15 | 
16 | ## Use cases (Optional)
17 | 
18 | If this PR introduces a new feature, it is better to list some use cases here, and update the documentation.
19 | 
20 | ## Checklist
21 | 
22 | 1. Pre-commit or other linting tools are used to fix the potential lint issues.
23 | 2. The modification is covered by complete unit tests. If not, please add more unit test to ensure the correctness.
24 | 3. If the modification has potential influence on downstream projects, this PR should be tested with downstream projects.
25 | 4. The documentation has been modified accordingly, like docstring or example tutorials.
26 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: deploy
 2 | 
 3 | on: push
 4 | 
 5 | concurrency:
 6 |   group: ${{ github.workflow }}-${{ github.ref }}
 7 |   cancel-in-progress: true
 8 | 
 9 | jobs:
10 |   build-n-publish:
11 |     runs-on: ubuntu-18.04
12 |     if: startsWith(github.event.ref, 'refs/tags')
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - name: Set up Python 3.7
16 |         uses: actions/setup-python@v2
17 |         with:
18 |           python-version: 3.7
19 |       - name: Install torch
20 |         run: pip install torch
21 |       - name: Build MMDet3D
22 |         run: python setup.py sdist
23 |       - name: Publish distribution to PyPI
24 |         run: |
25 |           pip install twine
26 |           twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}
27 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | concurrency:
 6 |   group: ${{ github.workflow }}-${{ github.ref }}
 7 |   cancel-in-progress: true
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-18.04
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - name: Set up Python 3.7
15 |         uses: actions/setup-python@v1
16 |         with:
17 |           python-version: 3.7
18 |       - name: Install linting dependencies
19 |         run: |
20 |           python -m pip install --upgrade pip
21 |           pip install flake8==3.8.3 isort==5.10.1 yapf==v0.30.0 interrogate
22 |       - name: Lint with flake8
23 |         run: flake8 .
24 |       - name: Lint with isort
25 |         run: isort --recursive --check-only --diff mmdet3d/ tests/ examples/
26 |       - name: Format python codes with yapf
27 |         run: yapf -r -d mmdet3d/ tests/ examples/
28 |       - name: Check docstring
29 |         run: interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --exclude mmdet3d/ops --ignore-regex "__repr__" --fail-under 95 mmdet3d
30 | 


--------------------------------------------------------------------------------
/.github/workflows/test_mim.yml:
--------------------------------------------------------------------------------
 1 | name: test-mim
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'model-index.yml'
 7 |       - 'configs/**'
 8 | 
 9 |   pull_request:
10 |     paths:
11 |       - 'model-index.yml'
12 |       - 'configs/**'
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.ref }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build_cpu:
20 |     runs-on: ubuntu-18.04
21 |     strategy:
22 |       matrix:
23 |         python-version: [3.7]
24 |         torch: [1.8.0]
25 |         include:
26 |           - torch: 1.8.0
27 |             torch_version: torch1.8
28 |             torchvision: 0.9.0
29 |     steps:
30 |       - uses: actions/checkout@v2
31 |       - name: Set up Python ${{ matrix.python-version }}
32 |         uses: actions/setup-python@v2
33 |         with:
34 |           python-version: ${{ matrix.python-version }}
35 |       - name: Upgrade pip
36 |         run: pip install pip --upgrade
37 |       - name: Install Pillow
38 |         run: pip install Pillow==6.2.2
39 |         if: ${{matrix.torchvision == '0.4.2'}}
40 |       - name: Install PyTorch
41 |         run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
42 |       - name: Install openmim
43 |         run: pip install openmim
44 |       - name: Build and install
45 |         run: rm -rf .eggs && mim install -e .
46 |       - name: test commands of mim
47 |         run: mim search mmdet3d
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.ipynb
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/en/_build/
 69 | docs/zh_cn/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 
108 | # cython generated cpp
109 | data
110 | .vscode
111 | .idea
112 | 
113 | # custom
114 | *.pkl
115 | *.pkl.json
116 | *.log.json
117 | work_dirs/
118 | exps/
119 | *~
120 | mmdet3d/.mim
121 | ckpts
122 | experiments
123 | 
124 | # Pytorch
125 | *.pth
126 | 
127 | # demo
128 | *.jpg
129 | # *.png
130 | data/s3dis/Stanford3dDataset_v1.2_Aligned_Version/
131 | data/scannet/scans/
132 | data/sunrgbd/OFFICIAL_SUNRGBD/
133 | *.obj
134 | *.ply
135 | 
136 | # Waymo evaluation
137 | mmdet3d/core/evaluation/waymo_utils/compute_detection_metrics_main
138 | 


--------------------------------------------------------------------------------
/3rd-party-licenses.txt:
--------------------------------------------------------------------------------
1 | Third Party Licenses
2 | ====================
3 | 
4 | -------------------------------------------------------------------------
5 | Overview
6 | --------------------------------------------------------------------------
7 | 
8 | BEVDet: https://github.com/HuangJunJie2017/BEVDet
9 | License: Apache-2.0 license


--------------------------------------------------------------------------------
/assets/scene-0038_gt.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0038_gt.mp4


--------------------------------------------------------------------------------
/assets/scene-0107_gt.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0107_gt.mp4


--------------------------------------------------------------------------------
/assets/scene-0558.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0558.mp4


--------------------------------------------------------------------------------
/assets/scene-0916.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0916.mp4


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/kitti-mono3d.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'KittiMonoDataset'
 2 | data_root = 'data/kitti/'
 3 | class_names = ['Pedestrian', 'Cyclist', 'Car']
 4 | input_modality = dict(use_lidar=False, use_camera=True)
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFileMono3D'),
 9 |     dict(
10 |         type='LoadAnnotations3D',
11 |         with_bbox=True,
12 |         with_label=True,
13 |         with_attr_label=False,
14 |         with_bbox_3d=True,
15 |         with_label_3d=True,
16 |         with_bbox_depth=True),
17 |     dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),
18 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
19 |     dict(type='Normalize', **img_norm_cfg),
20 |     dict(type='Pad', size_divisor=32),
21 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
22 |     dict(
23 |         type='Collect3D',
24 |         keys=[
25 |             'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',
26 |             'centers2d', 'depths'
27 |         ]),
28 | ]
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFileMono3D'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(1242, 375),
34 |         flip=False,
35 |         transforms=[
36 |             dict(type='RandomFlip3D'),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='Pad', size_divisor=32),
39 |             dict(
40 |                 type='DefaultFormatBundle3D',
41 |                 class_names=class_names,
42 |                 with_label=False),
43 |             dict(type='Collect3D', keys=['img']),
44 |         ])
45 | ]
46 | # construct a pipeline for data and gt loading in show function
47 | # please keep its loading function consistent with test_pipeline (e.g. client)
48 | eval_pipeline = [
49 |     dict(type='LoadImageFromFileMono3D'),
50 |     dict(
51 |         type='DefaultFormatBundle3D',
52 |         class_names=class_names,
53 |         with_label=False),
54 |     dict(type='Collect3D', keys=['img'])
55 | ]
56 | data = dict(
57 |     samples_per_gpu=2,
58 |     workers_per_gpu=2,
59 |     train=dict(
60 |         type=dataset_type,
61 |         data_root=data_root,
62 |         ann_file=data_root + 'kitti_infos_train_mono3d.coco.json',
63 |         info_file=data_root + 'kitti_infos_train.pkl',
64 |         img_prefix=data_root,
65 |         classes=class_names,
66 |         pipeline=train_pipeline,
67 |         modality=input_modality,
68 |         test_mode=False,
69 |         box_type_3d='Camera'),
70 |     val=dict(
71 |         type=dataset_type,
72 |         data_root=data_root,
73 |         ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',
74 |         info_file=data_root + 'kitti_infos_val.pkl',
75 |         img_prefix=data_root,
76 |         classes=class_names,
77 |         pipeline=test_pipeline,
78 |         modality=input_modality,
79 |         test_mode=True,
80 |         box_type_3d='Camera'),
81 |     test=dict(
82 |         type=dataset_type,
83 |         data_root=data_root,
84 |         ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',
85 |         info_file=data_root + 'kitti_infos_val.pkl',
86 |         img_prefix=data_root,
87 |         classes=class_names,
88 |         pipeline=test_pipeline,
89 |         modality=input_modality,
90 |         test_mode=True,
91 |         box_type_3d='Camera'))
92 | evaluation = dict(interval=2)
93 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/nuim_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/nuimages/'
 3 | class_names = [
 4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
 5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
 6 | ]
 7 | img_norm_cfg = dict(
 8 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 9 | train_pipeline = [
10 |     dict(type='LoadImageFromFile'),
11 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
12 |     dict(
13 |         type='Resize',
14 |         img_scale=[(1280, 720), (1920, 1080)],
15 |         multiscale_mode='range',
16 |         keep_ratio=True),
17 |     dict(type='RandomFlip', flip_ratio=0.5),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='Pad', size_divisor=32),
20 |     dict(type='DefaultFormatBundle'),
21 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
22 | ]
23 | test_pipeline = [
24 |     dict(type='LoadImageFromFile'),
25 |     dict(
26 |         type='MultiScaleFlipAug',
27 |         img_scale=(1600, 900),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='Pad', size_divisor=32),
34 |             dict(type='ImageToTensor', keys=['img']),
35 |             dict(type='Collect', keys=['img']),
36 |         ])
37 | ]
38 | data = dict(
39 |     samples_per_gpu=2,
40 |     workers_per_gpu=2,
41 |     train=dict(
42 |         type=dataset_type,
43 |         ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
44 |         img_prefix=data_root,
45 |         classes=class_names,
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
50 |         img_prefix=data_root,
51 |         classes=class_names,
52 |         pipeline=test_pipeline),
53 |     test=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
56 |         img_prefix=data_root,
57 |         classes=class_names,
58 |         pipeline=test_pipeline))
59 | evaluation = dict(metric=['bbox', 'segm'])
60 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/nus-mono3d.py:
--------------------------------------------------------------------------------
  1 | dataset_type = 'NuScenesMonoDataset'
  2 | data_root = 'data/nuscenes/'
  3 | class_names = [
  4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
  5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
  6 | ]
  7 | # Input modality for nuScenes dataset, this is consistent with the submission
  8 | # format which requires the information in input_modality.
  9 | input_modality = dict(
 10 |     use_lidar=False,
 11 |     use_camera=True,
 12 |     use_radar=False,
 13 |     use_map=False,
 14 |     use_external=False)
 15 | img_norm_cfg = dict(
 16 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 17 | train_pipeline = [
 18 |     dict(type='LoadImageFromFileMono3D'),
 19 |     dict(
 20 |         type='LoadAnnotations3D',
 21 |         with_bbox=True,
 22 |         with_label=True,
 23 |         with_attr_label=True,
 24 |         with_bbox_3d=True,
 25 |         with_label_3d=True,
 26 |         with_bbox_depth=True),
 27 |     dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
 28 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 29 |     dict(type='Normalize', **img_norm_cfg),
 30 |     dict(type='Pad', size_divisor=32),
 31 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 32 |     dict(
 33 |         type='Collect3D',
 34 |         keys=[
 35 |             'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
 36 |             'gt_labels_3d', 'centers2d', 'depths'
 37 |         ]),
 38 | ]
 39 | test_pipeline = [
 40 |     dict(type='LoadImageFromFileMono3D'),
 41 |     dict(
 42 |         type='MultiScaleFlipAug',
 43 |         scale_factor=1.0,
 44 |         flip=False,
 45 |         transforms=[
 46 |             dict(type='RandomFlip3D'),
 47 |             dict(type='Normalize', **img_norm_cfg),
 48 |             dict(type='Pad', size_divisor=32),
 49 |             dict(
 50 |                 type='DefaultFormatBundle3D',
 51 |                 class_names=class_names,
 52 |                 with_label=False),
 53 |             dict(type='Collect3D', keys=['img']),
 54 |         ])
 55 | ]
 56 | # construct a pipeline for data and gt loading in show function
 57 | # please keep its loading function consistent with test_pipeline (e.g. client)
 58 | eval_pipeline = [
 59 |     dict(type='LoadImageFromFileMono3D'),
 60 |     dict(
 61 |         type='DefaultFormatBundle3D',
 62 |         class_names=class_names,
 63 |         with_label=False),
 64 |     dict(type='Collect3D', keys=['img'])
 65 | ]
 66 | 
 67 | data = dict(
 68 |     samples_per_gpu=2,
 69 |     workers_per_gpu=2,
 70 |     train=dict(
 71 |         type=dataset_type,
 72 |         data_root=data_root,
 73 |         ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
 74 |         img_prefix=data_root,
 75 |         classes=class_names,
 76 |         pipeline=train_pipeline,
 77 |         modality=input_modality,
 78 |         test_mode=False,
 79 |         box_type_3d='Camera'),
 80 |     val=dict(
 81 |         type=dataset_type,
 82 |         data_root=data_root,
 83 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 84 |         img_prefix=data_root,
 85 |         classes=class_names,
 86 |         pipeline=test_pipeline,
 87 |         modality=input_modality,
 88 |         test_mode=True,
 89 |         box_type_3d='Camera'),
 90 |     test=dict(
 91 |         type=dataset_type,
 92 |         data_root=data_root,
 93 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 94 |         img_prefix=data_root,
 95 |         classes=class_names,
 96 |         pipeline=test_pipeline,
 97 |         modality=input_modality,
 98 |         test_mode=True,
 99 |         box_type_3d='Camera'))
100 | evaluation = dict(interval=2)
101 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/s3dis-3d-5class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'S3DISDataset'
  3 | data_root = './data/s3dis/'
  4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
  5 | train_area = [1, 2, 3, 4, 6]
  6 | test_area = 5
  7 | 
  8 | train_pipeline = [
  9 |     dict(
 10 |         type='LoadPointsFromFile',
 11 |         coord_type='DEPTH',
 12 |         shift_height=True,
 13 |         load_dim=6,
 14 |         use_dim=[0, 1, 2, 3, 4, 5]),
 15 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 16 |     dict(type='PointSample', num_points=40000),
 17 |     dict(
 18 |         type='RandomFlip3D',
 19 |         sync_2d=False,
 20 |         flip_ratio_bev_horizontal=0.5,
 21 |         flip_ratio_bev_vertical=0.5),
 22 |     dict(
 23 |         type='GlobalRotScaleTrans',
 24 |         # following ScanNet dataset the rotation range is 5 degrees
 25 |         rot_range=[-0.087266, 0.087266],
 26 |         scale_ratio_range=[1.0, 1.0],
 27 |         shift_height=True),
 28 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 29 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 30 | ]
 31 | test_pipeline = [
 32 |     dict(
 33 |         type='LoadPointsFromFile',
 34 |         coord_type='DEPTH',
 35 |         shift_height=True,
 36 |         load_dim=6,
 37 |         use_dim=[0, 1, 2, 3, 4, 5]),
 38 |     dict(
 39 |         type='MultiScaleFlipAug3D',
 40 |         img_scale=(1333, 800),
 41 |         pts_scale_ratio=1,
 42 |         flip=False,
 43 |         transforms=[
 44 |             dict(
 45 |                 type='GlobalRotScaleTrans',
 46 |                 rot_range=[0, 0],
 47 |                 scale_ratio_range=[1., 1.],
 48 |                 translation_std=[0, 0, 0]),
 49 |             dict(
 50 |                 type='RandomFlip3D',
 51 |                 sync_2d=False,
 52 |                 flip_ratio_bev_horizontal=0.5,
 53 |                 flip_ratio_bev_vertical=0.5),
 54 |             dict(type='PointSample', num_points=40000),
 55 |             dict(
 56 |                 type='DefaultFormatBundle3D',
 57 |                 class_names=class_names,
 58 |                 with_label=False),
 59 |             dict(type='Collect3D', keys=['points'])
 60 |         ])
 61 | ]
 62 | # construct a pipeline for data and gt loading in show function
 63 | # please keep its loading function consistent with test_pipeline (e.g. client)
 64 | eval_pipeline = [
 65 |     dict(
 66 |         type='LoadPointsFromFile',
 67 |         coord_type='DEPTH',
 68 |         shift_height=False,
 69 |         load_dim=6,
 70 |         use_dim=[0, 1, 2, 3, 4, 5]),
 71 |     dict(
 72 |         type='DefaultFormatBundle3D',
 73 |         class_names=class_names,
 74 |         with_label=False),
 75 |     dict(type='Collect3D', keys=['points'])
 76 | ]
 77 | 
 78 | data = dict(
 79 |     samples_per_gpu=8,
 80 |     workers_per_gpu=4,
 81 |     train=dict(
 82 |         type='RepeatDataset',
 83 |         times=5,
 84 |         dataset=dict(
 85 |             type='ConcatDataset',
 86 |             datasets=[
 87 |                 dict(
 88 |                     type=dataset_type,
 89 |                     data_root=data_root,
 90 |                     ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
 91 |                     pipeline=train_pipeline,
 92 |                     filter_empty_gt=False,
 93 |                     classes=class_names,
 94 |                     box_type_3d='Depth') for i in train_area
 95 |             ],
 96 |             separate_eval=False)),
 97 |     val=dict(
 98 |         type=dataset_type,
 99 |         data_root=data_root,
100 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
101 |         pipeline=test_pipeline,
102 |         classes=class_names,
103 |         test_mode=True,
104 |         box_type_3d='Depth'),
105 |     test=dict(
106 |         type=dataset_type,
107 |         data_root=data_root,
108 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
109 |         pipeline=test_pipeline,
110 |         classes=class_names,
111 |         test_mode=True,
112 |         box_type_3d='Depth'))
113 | 
114 | evaluation = dict(pipeline=eval_pipeline)
115 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable push
 3 | # By default we use textlogger hook and tensorboard
 4 | # For more loggers see
 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
 6 | log_config = dict(
 7 |     interval=50,
 8 |     hooks=[
 9 |         dict(type='TextLoggerHook'),
10 |         dict(type='TensorboardLoggerHook')
11 |     ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 | 
20 | # disable opencv multithreading to avoid system being overloaded
21 | opencv_num_threads = 0
22 | # set multi-process start method as `fork` to speed up the training
23 | mp_start_method = 'fork'
24 | 


--------------------------------------------------------------------------------
/configs/_base_/models/3dssd.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='SSD3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SAMSG',
 5 |         in_channels=4,
 6 |         num_points=(4096, 512, (256, 256)),
 7 |         radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
 8 |         num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
 9 |         sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
10 |                      ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
11 |                      ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
12 |         aggregation_channels=(64, 128, 256),
13 |         fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
14 |         fps_sample_range_lists=((-1), (-1), (512, -1)),
15 |         norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
16 |         sa_cfg=dict(
17 |             type='PointSAModuleMSG',
18 |             pool_mod='max',
19 |             use_xyz=True,
20 |             normalize_xyz=False)),
21 |     bbox_head=dict(
22 |         type='SSD3DHead',
23 |         in_channels=256,
24 |         vote_module_cfg=dict(
25 |             in_channels=256,
26 |             num_points=256,
27 |             gt_per_seed=1,
28 |             conv_channels=(128, ),
29 |             conv_cfg=dict(type='Conv1d'),
30 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31 |             with_res_feat=False,
32 |             vote_xyz_range=(3.0, 3.0, 2.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModuleMSG',
35 |             num_point=256,
36 |             radii=(4.8, 6.4),
37 |             sample_nums=(16, 32),
38 |             mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39 |             norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40 |             use_xyz=True,
41 |             normalize_xyz=False,
42 |             bias=True),
43 |         pred_layer_cfg=dict(
44 |             in_channels=1536,
45 |             shared_conv_channels=(512, 128),
46 |             cls_conv_channels=(128, ),
47 |             reg_conv_channels=(128, ),
48 |             conv_cfg=dict(type='Conv1d'),
49 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50 |             bias=True),
51 |         conv_cfg=dict(type='Conv1d'),
52 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
53 |         objectness_loss=dict(
54 |             type='CrossEntropyLoss',
55 |             use_sigmoid=True,
56 |             reduction='sum',
57 |             loss_weight=1.0),
58 |         center_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
60 |         dir_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         dir_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
64 |         size_res_loss=dict(
65 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
66 |         corner_loss=dict(
67 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
68 |         vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),
69 |     # model training and testing settings
70 |     train_cfg=dict(
71 |         sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
72 |     test_cfg=dict(
73 |         nms_cfg=dict(type='nms', iou_thr=0.1),
74 |         sample_mod='spec',
75 |         score_thr=0.0,
76 |         per_class_proposal=True,
77 |         max_output_num=100))
78 | 


--------------------------------------------------------------------------------
/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.1, 0.1, 0.2]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
 6 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 7 |     pts_middle_encoder=dict(
 8 |         type='SparseEncoder',
 9 |         in_channels=5,
10 |         sparse_shape=[41, 1024, 1024],
11 |         output_channels=128,
12 |         order=('conv', 'norm', 'act'),
13 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
14 |                                                                       128)),
15 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
16 |         block_type='basicblock'),
17 |     pts_backbone=dict(
18 |         type='SECOND',
19 |         in_channels=256,
20 |         out_channels=[128, 256],
21 |         layer_nums=[5, 5],
22 |         layer_strides=[1, 2],
23 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
24 |         conv_cfg=dict(type='Conv2d', bias=False)),
25 |     pts_neck=dict(
26 |         type='SECONDFPN',
27 |         in_channels=[128, 256],
28 |         out_channels=[256, 256],
29 |         upsample_strides=[1, 2],
30 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
31 |         upsample_cfg=dict(type='deconv', bias=False),
32 |         use_conv_for_no_stride=True),
33 |     pts_bbox_head=dict(
34 |         type='CenterHead',
35 |         in_channels=sum([256, 256]),
36 |         tasks=[
37 |             dict(num_class=1, class_names=['car']),
38 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
39 |             dict(num_class=2, class_names=['bus', 'trailer']),
40 |             dict(num_class=1, class_names=['barrier']),
41 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
42 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
43 |         ],
44 |         common_heads=dict(
45 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
46 |         share_conv_channel=64,
47 |         bbox_coder=dict(
48 |             type='CenterPointBBoxCoder',
49 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
50 |             max_num=500,
51 |             score_threshold=0.1,
52 |             out_size_factor=8,
53 |             voxel_size=voxel_size[:2],
54 |             code_size=9),
55 |         separate_head=dict(
56 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
57 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
58 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
59 |         norm_bbox=True),
60 |     # model training and testing settings
61 |     train_cfg=dict(
62 |         pts=dict(
63 |             grid_size=[1024, 1024, 40],
64 |             voxel_size=voxel_size,
65 |             out_size_factor=8,
66 |             dense_reg=1,
67 |             gaussian_overlap=0.1,
68 |             max_objs=500,
69 |             min_radius=2,
70 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
71 |     test_cfg=dict(
72 |         pts=dict(
73 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
74 |             max_per_img=500,
75 |             max_pool_nms=False,
76 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
77 |             score_threshold=0.1,
78 |             out_size_factor=8,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.2, 0.2, 8]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
 6 |     pts_voxel_encoder=dict(
 7 |         type='PillarFeatureNet',
 8 |         in_channels=5,
 9 |         feat_channels=[64],
10 |         with_distance=False,
11 |         voxel_size=(0.2, 0.2, 8),
12 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
13 |         legacy=False),
14 |     pts_middle_encoder=dict(
15 |         type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
16 |     pts_backbone=dict(
17 |         type='SECOND',
18 |         in_channels=64,
19 |         out_channels=[64, 128, 256],
20 |         layer_nums=[3, 5, 5],
21 |         layer_strides=[2, 2, 2],
22 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
23 |         conv_cfg=dict(type='Conv2d', bias=False)),
24 |     pts_neck=dict(
25 |         type='SECONDFPN',
26 |         in_channels=[64, 128, 256],
27 |         out_channels=[128, 128, 128],
28 |         upsample_strides=[0.5, 1, 2],
29 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
30 |         upsample_cfg=dict(type='deconv', bias=False),
31 |         use_conv_for_no_stride=True),
32 |     pts_bbox_head=dict(
33 |         type='CenterHead',
34 |         in_channels=sum([128, 128, 128]),
35 |         tasks=[
36 |             dict(num_class=1, class_names=['car']),
37 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
38 |             dict(num_class=2, class_names=['bus', 'trailer']),
39 |             dict(num_class=1, class_names=['barrier']),
40 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
41 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
42 |         ],
43 |         common_heads=dict(
44 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
45 |         share_conv_channel=64,
46 |         bbox_coder=dict(
47 |             type='CenterPointBBoxCoder',
48 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
49 |             max_num=500,
50 |             score_threshold=0.1,
51 |             out_size_factor=4,
52 |             voxel_size=voxel_size[:2],
53 |             code_size=9),
54 |         separate_head=dict(
55 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
56 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
57 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
58 |         norm_bbox=True),
59 |     # model training and testing settings
60 |     train_cfg=dict(
61 |         pts=dict(
62 |             grid_size=[512, 512, 1],
63 |             voxel_size=voxel_size,
64 |             out_size_factor=4,
65 |             dense_reg=1,
66 |             gaussian_overlap=0.1,
67 |             max_objs=500,
68 |             min_radius=2,
69 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
70 |     test_cfg=dict(
71 |         pts=dict(
72 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
73 |             max_per_img=500,
74 |             max_pool_nms=False,
75 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
76 |             score_threshold=0.1,
77 |             pc_range=[-51.2, -51.2],
78 |             out_size_factor=4,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/configs/_base_/models/dgcnn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='DGCNNBackbone',
 6 |         in_channels=9,  # [xyz, rgb, normal_xyz], modified with dataset
 7 |         num_samples=(20, 20, 20),
 8 |         knn_modes=('D-KNN', 'F-KNN', 'F-KNN'),
 9 |         radius=(None, None, None),
10 |         gf_channels=((64, 64), (64, 64), (64, )),
11 |         fa_channels=(1024, ),
12 |         act_cfg=dict(type='LeakyReLU', negative_slope=0.2)),
13 |     decode_head=dict(
14 |         type='DGCNNHead',
15 |         fp_channels=(1216, 512),
16 |         channels=256,
17 |         dropout_ratio=0.5,
18 |         conv_cfg=dict(type='Conv1d'),
19 |         norm_cfg=dict(type='BN1d'),
20 |         act_cfg=dict(type='LeakyReLU', negative_slope=0.2),
21 |         loss_decode=dict(
22 |             type='CrossEntropyLoss',
23 |             use_sigmoid=False,
24 |             class_weight=None,  # modified with dataset
25 |             loss_weight=1.0)),
26 |     # model training and testing settings
27 |     train_cfg=dict(),
28 |     test_cfg=dict(mode='slide'))
29 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fcaf3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='MinkSingleStage3DDetector',
 3 |     voxel_size=.01,
 4 |     backbone=dict(type='MinkResNet', in_channels=3, depth=34),
 5 |     head=dict(
 6 |         type='FCAF3DHead',
 7 |         in_channels=(64, 128, 256, 512),
 8 |         out_channels=128,
 9 |         voxel_size=.01,
10 |         pts_prune_threshold=100000,
11 |         pts_assign_threshold=27,
12 |         pts_center_threshold=18,
13 |         n_classes=18,
14 |         n_reg_outs=6),
15 |     train_cfg=dict(),
16 |     test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))
17 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fcos3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='FCOSMono3D',
 3 |     backbone=dict(
 4 |         type='ResNet',
 5 |         depth=101,
 6 |         num_stages=4,
 7 |         out_indices=(0, 1, 2, 3),
 8 |         frozen_stages=1,
 9 |         norm_cfg=dict(type='BN', requires_grad=False),
10 |         norm_eval=True,
11 |         style='caffe',
12 |         init_cfg=dict(
13 |             type='Pretrained',
14 |             checkpoint='open-mmlab://detectron2/resnet101_caffe')),
15 |     neck=dict(
16 |         type='FPN',
17 |         in_channels=[256, 512, 1024, 2048],
18 |         out_channels=256,
19 |         start_level=1,
20 |         add_extra_convs='on_output',
21 |         num_outs=5,
22 |         relu_before_extra_convs=True),
23 |     bbox_head=dict(
24 |         type='FCOSMono3DHead',
25 |         num_classes=10,
26 |         in_channels=256,
27 |         stacked_convs=2,
28 |         feat_channels=256,
29 |         use_direction_classifier=True,
30 |         diff_rad_by_sin=True,
31 |         pred_attrs=True,
32 |         pred_velo=True,
33 |         dir_offset=0.7854,  # pi/4
34 |         dir_limit_offset=0,
35 |         strides=[8, 16, 32, 64, 128],
36 |         group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
37 |         cls_branch=(256, ),
38 |         reg_branch=(
39 |             (256, ),  # offset
40 |             (256, ),  # depth
41 |             (256, ),  # size
42 |             (256, ),  # rot
43 |             ()  # velo
44 |         ),
45 |         dir_branch=(256, ),
46 |         attr_branch=(256, ),
47 |         loss_cls=dict(
48 |             type='FocalLoss',
49 |             use_sigmoid=True,
50 |             gamma=2.0,
51 |             alpha=0.25,
52 |             loss_weight=1.0),
53 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
54 |         loss_dir=dict(
55 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
56 |         loss_attr=dict(
57 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
58 |         loss_centerness=dict(
59 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
60 |         bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),
61 |         norm_on_bbox=True,
62 |         centerness_on_reg=True,
63 |         center_sampling=True,
64 |         conv_bias=True,
65 |         dcn_on_last_conv=True),
66 |     train_cfg=dict(
67 |         allowed_border=0,
68 |         code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
69 |         pos_weight=-1,
70 |         debug=False),
71 |     test_cfg=dict(
72 |         use_rotate_nms=True,
73 |         nms_across_levels=False,
74 |         nms_pre=1000,
75 |         nms_thr=0.8,
76 |         score_thr=0.05,
77 |         min_bbox_size=0,
78 |         max_per_img=200))
79 | 


--------------------------------------------------------------------------------
/configs/_base_/models/groupfree3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='GroupFree3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=3,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 288)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='GroupFree3DHead',
20 |         in_channels=288,
21 |         num_decoder_layers=6,
22 |         num_proposal=256,
23 |         transformerlayers=dict(
24 |             type='BaseTransformerLayer',
25 |             attn_cfgs=dict(
26 |                 type='GroupFree3DMHA',
27 |                 embed_dims=288,
28 |                 num_heads=8,
29 |                 attn_drop=0.1,
30 |                 dropout_layer=dict(type='Dropout', drop_prob=0.1)),
31 |             ffn_cfgs=dict(
32 |                 embed_dims=288,
33 |                 feedforward_channels=2048,
34 |                 ffn_drop=0.1,
35 |                 act_cfg=dict(type='ReLU', inplace=True)),
36 |             operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
37 |                              'norm')),
38 |         pred_layer_cfg=dict(
39 |             in_channels=288, shared_conv_channels=(288, 288), bias=True),
40 |         sampling_objectness_loss=dict(
41 |             type='FocalLoss',
42 |             use_sigmoid=True,
43 |             gamma=2.0,
44 |             alpha=0.25,
45 |             loss_weight=8.0),
46 |         objectness_loss=dict(
47 |             type='FocalLoss',
48 |             use_sigmoid=True,
49 |             gamma=2.0,
50 |             alpha=0.25,
51 |             loss_weight=1.0),
52 |         center_loss=dict(
53 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
54 |         dir_class_loss=dict(
55 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
56 |         dir_res_loss=dict(
57 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
58 |         size_class_loss=dict(
59 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60 |         size_res_loss=dict(
61 |             type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
62 |         semantic_loss=dict(
63 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
64 |     # model training and testing settings
65 |     train_cfg=dict(sample_mod='kps'),
66 |     test_cfg=dict(
67 |         sample_mod='kps',
68 |         nms_thr=0.25,
69 |         score_thr=0.0,
70 |         per_class_proposal=True,
71 |         prediction_stages='last'))
72 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 |     pts_middle_encoder=dict(output_shape=[640, 640]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_fpn_nus.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | # Voxel size for voxel encoder
 3 | # Usually voxel size is changed consistently with the point cloud range
 4 | # If point cloud range is modified, do remember to change all related
 5 | # keys in the config.
 6 | voxel_size = [0.25, 0.25, 8]
 7 | model = dict(
 8 |     type='MVXFasterRCNN',
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=64,
11 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
12 |         voxel_size=voxel_size,
13 |         max_voxels=(30000, 40000)),
14 |     pts_voxel_encoder=dict(
15 |         type='HardVFE',
16 |         in_channels=4,
17 |         feat_channels=[64, 64],
18 |         with_distance=False,
19 |         voxel_size=voxel_size,
20 |         with_cluster_center=True,
21 |         with_voxel_center=True,
22 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 |     pts_middle_encoder=dict(
25 |         type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
26 |     pts_backbone=dict(
27 |         type='SECOND',
28 |         in_channels=64,
29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 |         layer_nums=[3, 5, 5],
31 |         layer_strides=[2, 2, 2],
32 |         out_channels=[64, 128, 256]),
33 |     pts_neck=dict(
34 |         type='FPN',
35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 |         act_cfg=dict(type='ReLU'),
37 |         in_channels=[64, 128, 256],
38 |         out_channels=256,
39 |         start_level=0,
40 |         num_outs=3),
41 |     pts_bbox_head=dict(
42 |         type='Anchor3DHead',
43 |         num_classes=10,
44 |         in_channels=256,
45 |         feat_channels=256,
46 |         use_direction_classifier=True,
47 |         anchor_generator=dict(
48 |             type='AlignedAnchor3DRangeGenerator',
49 |             ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
50 |             scales=[1, 2, 4],
51 |             sizes=[
52 |                 [2.5981, 0.8660, 1.],  # 1.5 / sqrt(3)
53 |                 [1.7321, 0.5774, 1.],  # 1 / sqrt(3)
54 |                 [1., 1., 1.],
55 |                 [0.4, 0.4, 1],
56 |             ],
57 |             custom_values=[0, 0],
58 |             rotations=[0, 1.57],
59 |             reshape_out=True),
60 |         assigner_per_size=False,
61 |         diff_rad_by_sin=True,
62 |         dir_offset=-0.7854,  # -pi / 4
63 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
64 |         loss_cls=dict(
65 |             type='FocalLoss',
66 |             use_sigmoid=True,
67 |             gamma=2.0,
68 |             alpha=0.25,
69 |             loss_weight=1.0),
70 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
71 |         loss_dir=dict(
72 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
73 |     # model training and testing settings
74 |     train_cfg=dict(
75 |         pts=dict(
76 |             assigner=dict(
77 |                 type='MaxIoUAssigner',
78 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
79 |                 pos_iou_thr=0.6,
80 |                 neg_iou_thr=0.3,
81 |                 min_pos_iou=0.3,
82 |                 ignore_iof_thr=-1),
83 |             allowed_border=0,
84 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
85 |             pos_weight=-1,
86 |             debug=False)),
87 |     test_cfg=dict(
88 |         pts=dict(
89 |             use_rotate_nms=True,
90 |             nms_across_levels=False,
91 |             nms_pre=1000,
92 |             nms_thr=0.2,
93 |             score_thr=0.05,
94 |             min_bbox_size=0,
95 |             max_num=500)))
96 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 |     pts_middle_encoder=dict(output_shape=[800, 800]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.16, 0.16, 4]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=32,  # max_points_per_voxel
 7 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)  # (training, testing) max_voxels
10 |     ),
11 |     voxel_encoder=dict(
12 |         type='PillarFeatureNet',
13 |         in_channels=4,
14 |         feat_channels=[64],
15 |         with_distance=False,
16 |         voxel_size=voxel_size,
17 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
18 |     middle_encoder=dict(
19 |         type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
20 |     backbone=dict(
21 |         type='SECOND',
22 |         in_channels=64,
23 |         layer_nums=[3, 5, 5],
24 |         layer_strides=[2, 2, 2],
25 |         out_channels=[64, 128, 256]),
26 |     neck=dict(
27 |         type='SECONDFPN',
28 |         in_channels=[64, 128, 256],
29 |         upsample_strides=[1, 2, 4],
30 |         out_channels=[128, 128, 128]),
31 |     bbox_head=dict(
32 |         type='Anchor3DHead',
33 |         num_classes=3,
34 |         in_channels=384,
35 |         feat_channels=384,
36 |         use_direction_classifier=True,
37 |         assign_per_class=True,
38 |         anchor_generator=dict(
39 |             type='AlignedAnchor3DRangeGenerator',
40 |             ranges=[
41 |                 [0, -39.68, -0.6, 69.12, 39.68, -0.6],
42 |                 [0, -39.68, -0.6, 69.12, 39.68, -0.6],
43 |                 [0, -39.68, -1.78, 69.12, 39.68, -1.78],
44 |             ],
45 |             sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
46 |             rotations=[0, 1.57],
47 |             reshape_out=False),
48 |         diff_rad_by_sin=True,
49 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
50 |         loss_cls=dict(
51 |             type='FocalLoss',
52 |             use_sigmoid=True,
53 |             gamma=2.0,
54 |             alpha=0.25,
55 |             loss_weight=1.0),
56 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
57 |         loss_dir=dict(
58 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
59 |     # model training and testing settings
60 |     train_cfg=dict(
61 |         assigner=[
62 |             dict(  # for Pedestrian
63 |                 type='MaxIoUAssigner',
64 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
65 |                 pos_iou_thr=0.5,
66 |                 neg_iou_thr=0.35,
67 |                 min_pos_iou=0.35,
68 |                 ignore_iof_thr=-1),
69 |             dict(  # for Cyclist
70 |                 type='MaxIoUAssigner',
71 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
72 |                 pos_iou_thr=0.5,
73 |                 neg_iou_thr=0.35,
74 |                 min_pos_iou=0.35,
75 |                 ignore_iof_thr=-1),
76 |             dict(  # for Car
77 |                 type='MaxIoUAssigner',
78 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
79 |                 pos_iou_thr=0.6,
80 |                 neg_iou_thr=0.45,
81 |                 min_pos_iou=0.45,
82 |                 ignore_iof_thr=-1),
83 |         ],
84 |         allowed_border=0,
85 |         pos_weight=-1,
86 |         debug=False),
87 |     test_cfg=dict(
88 |         use_rotate_nms=True,
89 |         nms_across_levels=False,
90 |         nms_thr=0.01,
91 |         score_thr=0.1,
92 |         min_bbox_size=0,
93 |         nms_pre=100,
94 |         max_num=50))
95 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_pointpillars_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.32, 0.32, 6]
  7 | model = dict(
  8 |     type='MVXFasterRCNN',
  9 |     pts_voxel_layer=dict(
 10 |         max_num_points=20,
 11 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(32000, 32000)),
 14 |     pts_voxel_encoder=dict(
 15 |         type='HardVFE',
 16 |         in_channels=5,
 17 |         feat_channels=[64],
 18 |         with_distance=False,
 19 |         voxel_size=voxel_size,
 20 |         with_cluster_center=True,
 21 |         with_voxel_center=True,
 22 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
 24 |     pts_middle_encoder=dict(
 25 |         type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
 26 |     pts_backbone=dict(
 27 |         type='SECOND',
 28 |         in_channels=64,
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         layer_nums=[3, 5, 5],
 31 |         layer_strides=[1, 2, 2],
 32 |         out_channels=[64, 128, 256]),
 33 |     pts_neck=dict(
 34 |         type='SECONDFPN',
 35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 36 |         in_channels=[64, 128, 256],
 37 |         upsample_strides=[1, 2, 4],
 38 |         out_channels=[128, 128, 128]),
 39 |     pts_bbox_head=dict(
 40 |         type='Anchor3DHead',
 41 |         num_classes=3,
 42 |         in_channels=384,
 43 |         feat_channels=384,
 44 |         use_direction_classifier=True,
 45 |         anchor_generator=dict(
 46 |             type='AlignedAnchor3DRangeGenerator',
 47 |             ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
 48 |                     [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
 49 |                     [-74.88, -74.88, 0, 74.88, 74.88, 0]],
 50 |             sizes=[
 51 |                 [4.73, 2.08, 1.77],  # car
 52 |                 [1.81, 0.84, 1.77],  # cyclist
 53 |                 [0.91, 0.84, 1.74]  # pedestrian
 54 |             ],
 55 |             rotations=[0, 1.57],
 56 |             reshape_out=False),
 57 |         diff_rad_by_sin=True,
 58 |         dir_offset=-0.7854,  # -pi / 4
 59 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 60 |         loss_cls=dict(
 61 |             type='FocalLoss',
 62 |             use_sigmoid=True,
 63 |             gamma=2.0,
 64 |             alpha=0.25,
 65 |             loss_weight=1.0),
 66 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 67 |         loss_dir=dict(
 68 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         pts=dict(
 72 |             assigner=[
 73 |                 dict(  # car
 74 |                     type='MaxIoUAssigner',
 75 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 76 |                     pos_iou_thr=0.55,
 77 |                     neg_iou_thr=0.4,
 78 |                     min_pos_iou=0.4,
 79 |                     ignore_iof_thr=-1),
 80 |                 dict(  # cyclist
 81 |                     type='MaxIoUAssigner',
 82 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 83 |                     pos_iou_thr=0.5,
 84 |                     neg_iou_thr=0.3,
 85 |                     min_pos_iou=0.3,
 86 |                     ignore_iof_thr=-1),
 87 |                 dict(  # pedestrian
 88 |                     type='MaxIoUAssigner',
 89 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 90 |                     pos_iou_thr=0.5,
 91 |                     neg_iou_thr=0.3,
 92 |                     min_pos_iou=0.3,
 93 |                     ignore_iof_thr=-1),
 94 |             ],
 95 |             allowed_border=0,
 96 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 97 |             pos_weight=-1,
 98 |             debug=False)),
 99 |     test_cfg=dict(
100 |         pts=dict(
101 |             use_rotate_nms=True,
102 |             nms_across_levels=False,
103 |             nms_pre=4096,
104 |             nms_thr=0.25,
105 |             score_thr=0.1,
106 |             min_bbox_size=0,
107 |             max_num=500)))
108 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_second_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.05, 0.05, 0.1]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=5,
 7 |         point_cloud_range=[0, -40, -3, 70.4, 40, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)),
10 |     voxel_encoder=dict(type='HardSimpleVFE'),
11 |     middle_encoder=dict(
12 |         type='SparseEncoder',
13 |         in_channels=4,
14 |         sparse_shape=[41, 1600, 1408],
15 |         order=('conv', 'norm', 'act')),
16 |     backbone=dict(
17 |         type='SECOND',
18 |         in_channels=256,
19 |         layer_nums=[5, 5],
20 |         layer_strides=[1, 2],
21 |         out_channels=[128, 256]),
22 |     neck=dict(
23 |         type='SECONDFPN',
24 |         in_channels=[128, 256],
25 |         upsample_strides=[1, 2],
26 |         out_channels=[256, 256]),
27 |     bbox_head=dict(
28 |         type='Anchor3DHead',
29 |         num_classes=3,
30 |         in_channels=512,
31 |         feat_channels=512,
32 |         use_direction_classifier=True,
33 |         anchor_generator=dict(
34 |             type='Anchor3DRangeGenerator',
35 |             ranges=[
36 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
37 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
38 |                 [0, -40.0, -1.78, 70.4, 40.0, -1.78],
39 |             ],
40 |             sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
41 |             rotations=[0, 1.57],
42 |             reshape_out=False),
43 |         diff_rad_by_sin=True,
44 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
45 |         loss_cls=dict(
46 |             type='FocalLoss',
47 |             use_sigmoid=True,
48 |             gamma=2.0,
49 |             alpha=0.25,
50 |             loss_weight=1.0),
51 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
52 |         loss_dir=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
54 |     # model training and testing settings
55 |     train_cfg=dict(
56 |         assigner=[
57 |             dict(  # for Pedestrian
58 |                 type='MaxIoUAssigner',
59 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
60 |                 pos_iou_thr=0.35,
61 |                 neg_iou_thr=0.2,
62 |                 min_pos_iou=0.2,
63 |                 ignore_iof_thr=-1),
64 |             dict(  # for Cyclist
65 |                 type='MaxIoUAssigner',
66 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
67 |                 pos_iou_thr=0.35,
68 |                 neg_iou_thr=0.2,
69 |                 min_pos_iou=0.2,
70 |                 ignore_iof_thr=-1),
71 |             dict(  # for Car
72 |                 type='MaxIoUAssigner',
73 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
74 |                 pos_iou_thr=0.6,
75 |                 neg_iou_thr=0.45,
76 |                 min_pos_iou=0.45,
77 |                 ignore_iof_thr=-1),
78 |         ],
79 |         allowed_border=0,
80 |         pos_weight=-1,
81 |         debug=False),
82 |     test_cfg=dict(
83 |         use_rotate_nms=True,
84 |         nms_across_levels=False,
85 |         nms_thr=0.01,
86 |         score_thr=0.1,
87 |         min_bbox_size=0,
88 |         nms_pre=100,
89 |         max_num=50))
90 | 


--------------------------------------------------------------------------------
/configs/_base_/models/hv_second_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.08, 0.08, 0.1]
  7 | model = dict(
  8 |     type='VoxelNet',
  9 |     voxel_layer=dict(
 10 |         max_num_points=10,
 11 |         point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(80000, 90000)),
 14 |     voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 15 |     middle_encoder=dict(
 16 |         type='SparseEncoder',
 17 |         in_channels=5,
 18 |         sparse_shape=[61, 1280, 1920],
 19 |         order=('conv', 'norm', 'act')),
 20 |     backbone=dict(
 21 |         type='SECOND',
 22 |         in_channels=384,
 23 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 24 |         layer_nums=[5, 5],
 25 |         layer_strides=[1, 2],
 26 |         out_channels=[128, 256]),
 27 |     neck=dict(
 28 |         type='SECONDFPN',
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         in_channels=[128, 256],
 31 |         upsample_strides=[1, 2],
 32 |         out_channels=[256, 256]),
 33 |     bbox_head=dict(
 34 |         type='Anchor3DHead',
 35 |         num_classes=3,
 36 |         in_channels=512,
 37 |         feat_channels=512,
 38 |         use_direction_classifier=True,
 39 |         anchor_generator=dict(
 40 |             type='AlignedAnchor3DRangeGenerator',
 41 |             ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
 42 |                     [-76.8, -51.2, 0, 76.8, 51.2, 0],
 43 |                     [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
 44 |             sizes=[
 45 |                 [4.73, 2.08, 1.77],  # car
 46 |                 [0.91, 0.84, 1.74],  # pedestrian
 47 |                 [1.81, 0.84, 1.77]  # cyclist
 48 |             ],
 49 |             rotations=[0, 1.57],
 50 |             reshape_out=False),
 51 |         diff_rad_by_sin=True,
 52 |         dir_offset=-0.7854,  # -pi / 4
 53 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 54 |         loss_cls=dict(
 55 |             type='FocalLoss',
 56 |             use_sigmoid=True,
 57 |             gamma=2.0,
 58 |             alpha=0.25,
 59 |             loss_weight=1.0),
 60 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 61 |         loss_dir=dict(
 62 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 63 |     # model training and testing settings
 64 |     train_cfg=dict(
 65 |         assigner=[
 66 |             dict(  # car
 67 |                 type='MaxIoUAssigner',
 68 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 69 |                 pos_iou_thr=0.55,
 70 |                 neg_iou_thr=0.4,
 71 |                 min_pos_iou=0.4,
 72 |                 ignore_iof_thr=-1),
 73 |             dict(  # pedestrian
 74 |                 type='MaxIoUAssigner',
 75 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 76 |                 pos_iou_thr=0.5,
 77 |                 neg_iou_thr=0.3,
 78 |                 min_pos_iou=0.3,
 79 |                 ignore_iof_thr=-1),
 80 |             dict(  # cyclist
 81 |                 type='MaxIoUAssigner',
 82 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.3,
 85 |                 min_pos_iou=0.3,
 86 |                 ignore_iof_thr=-1)
 87 |         ],
 88 |         allowed_border=0,
 89 |         code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 90 |         pos_weight=-1,
 91 |         debug=False),
 92 |     test_cfg=dict(
 93 |         use_rotate_nms=True,
 94 |         nms_across_levels=False,
 95 |         nms_pre=4096,
 96 |         nms_thr=0.25,
 97 |         score_thr=0.1,
 98 |         min_bbox_size=0,
 99 |         max_num=500))
100 | 


--------------------------------------------------------------------------------
/configs/_base_/models/imvotenet_image.py:
--------------------------------------------------------------------------------
  1 | model = dict(
  2 |     type='ImVoteNet',
  3 |     img_backbone=dict(
  4 |         type='ResNet',
  5 |         depth=50,
  6 |         num_stages=4,
  7 |         out_indices=(0, 1, 2, 3),
  8 |         frozen_stages=1,
  9 |         norm_cfg=dict(type='BN', requires_grad=False),
 10 |         norm_eval=True,
 11 |         style='caffe'),
 12 |     img_neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     img_rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[8],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[4, 8, 16, 32, 64]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     img_roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=256,
 39 |             featmap_strides=[4, 8, 16, 32]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=256,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=10,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 | 
 55 |     # model training and testing settings
 56 |     train_cfg=dict(
 57 |         img_rpn=dict(
 58 |             assigner=dict(
 59 |                 type='MaxIoUAssigner',
 60 |                 pos_iou_thr=0.7,
 61 |                 neg_iou_thr=0.3,
 62 |                 min_pos_iou=0.3,
 63 |                 match_low_quality=True,
 64 |                 ignore_iof_thr=-1),
 65 |             sampler=dict(
 66 |                 type='RandomSampler',
 67 |                 num=256,
 68 |                 pos_fraction=0.5,
 69 |                 neg_pos_ub=-1,
 70 |                 add_gt_as_proposals=False),
 71 |             allowed_border=-1,
 72 |             pos_weight=-1,
 73 |             debug=False),
 74 |         img_rpn_proposal=dict(
 75 |             nms_across_levels=False,
 76 |             nms_pre=2000,
 77 |             nms_post=1000,
 78 |             max_per_img=1000,
 79 |             nms=dict(type='nms', iou_threshold=0.7),
 80 |             min_bbox_size=0),
 81 |         img_rcnn=dict(
 82 |             assigner=dict(
 83 |                 type='MaxIoUAssigner',
 84 |                 pos_iou_thr=0.5,
 85 |                 neg_iou_thr=0.5,
 86 |                 min_pos_iou=0.5,
 87 |                 match_low_quality=False,
 88 |                 ignore_iof_thr=-1),
 89 |             sampler=dict(
 90 |                 type='RandomSampler',
 91 |                 num=512,
 92 |                 pos_fraction=0.25,
 93 |                 neg_pos_ub=-1,
 94 |                 add_gt_as_proposals=True),
 95 |             pos_weight=-1,
 96 |             debug=False)),
 97 |     test_cfg=dict(
 98 |         img_rpn=dict(
 99 |             nms_across_levels=False,
100 |             nms_pre=1000,
101 |             nms_post=1000,
102 |             max_per_img=1000,
103 |             nms=dict(type='nms', iou_threshold=0.7),
104 |             min_bbox_size=0),
105 |         img_rcnn=dict(
106 |             score_thr=0.05,
107 |             nms=dict(type='nms', iou_threshold=0.5),
108 |             max_per_img=100)))
109 | 


--------------------------------------------------------------------------------
/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         sa_cfg=dict(
6 |             type='PAConvCUDASAModule',
7 |             scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 | 


--------------------------------------------------------------------------------
/configs/_base_/models/paconv_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=9,  # [xyz, rgb, normalized_xyz]
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(None, None, None, None),  # use kNN instead of ball query
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d', momentum=0.1),
14 |         sa_cfg=dict(
15 |             type='PAConvSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False,
19 |             paconv_num_kernels=[16, 16, 16],
20 |             paconv_kernel_input='w_neighbor',
21 |             scorenet_input='w_neighbor_dist',
22 |             scorenet_cfg=dict(
23 |                 mlp_channels=[16, 16, 16],
24 |                 score_norm='softmax',
25 |                 temp_factor=1.0,
26 |                 last_bn=False))),
27 |     decode_head=dict(
28 |         type='PAConvHead',
29 |         # PAConv model's decoder takes skip connections from beckbone
30 |         # different from PointNet++, it also concats input features in the last
31 |         # level of decoder, leading to `128 + 6` as the channel number
32 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
33 |                      (128 + 6, 128, 128, 128)),
34 |         channels=128,
35 |         dropout_ratio=0.5,
36 |         conv_cfg=dict(type='Conv1d'),
37 |         norm_cfg=dict(type='BN1d'),
38 |         act_cfg=dict(type='ReLU'),
39 |         loss_decode=dict(
40 |             type='CrossEntropyLoss',
41 |             use_sigmoid=False,
42 |             class_weight=None,  # should be modified with dataset
43 |             loss_weight=1.0)),
44 |     # correlation loss to regularize PAConv's kernel weights
45 |     loss_regularization=dict(
46 |         type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='slide'))
50 | 


--------------------------------------------------------------------------------
/configs/_base_/models/pgd.py:
--------------------------------------------------------------------------------
 1 | _base_ = './fcos3d.py'
 2 | # model settings
 3 | model = dict(
 4 |     bbox_head=dict(
 5 |         _delete_=True,
 6 |         type='PGDHead',
 7 |         num_classes=10,
 8 |         in_channels=256,
 9 |         stacked_convs=2,
10 |         feat_channels=256,
11 |         use_direction_classifier=True,
12 |         diff_rad_by_sin=True,
13 |         pred_attrs=True,
14 |         pred_velo=True,
15 |         pred_bbox2d=True,
16 |         pred_keypoints=False,
17 |         dir_offset=0.7854,  # pi/4
18 |         strides=[8, 16, 32, 64, 128],
19 |         group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
20 |         cls_branch=(256, ),
21 |         reg_branch=(
22 |             (256, ),  # offset
23 |             (256, ),  # depth
24 |             (256, ),  # size
25 |             (256, ),  # rot
26 |             ()  # velo
27 |         ),
28 |         dir_branch=(256, ),
29 |         attr_branch=(256, ),
30 |         loss_cls=dict(
31 |             type='FocalLoss',
32 |             use_sigmoid=True,
33 |             gamma=2.0,
34 |             alpha=0.25,
35 |             loss_weight=1.0),
36 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
37 |         loss_dir=dict(
38 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |         loss_attr=dict(
40 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
41 |         loss_centerness=dict(
42 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
43 |         norm_on_bbox=True,
44 |         centerness_on_reg=True,
45 |         center_sampling=True,
46 |         conv_bias=True,
47 |         dcn_on_last_conv=True,
48 |         use_depth_classifier=True,
49 |         depth_branch=(256, ),
50 |         depth_range=(0, 50),
51 |         depth_unit=10,
52 |         division='uniform',
53 |         depth_bins=6,
54 |         bbox_coder=dict(type='PGDBBoxCoder', code_size=9)),
55 |     test_cfg=dict(nms_pre=1000, nms_thr=0.8, score_thr=0.01, max_per_img=200))
56 | 


--------------------------------------------------------------------------------
/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pointnet2_ssg.py'
 2 | 
 3 | # model settings
 4 | model = dict(
 5 |     backbone=dict(
 6 |         _delete_=True,
 7 |         type='PointNet2SAMSG',
 8 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 9 |         num_points=(1024, 256, 64, 16),
10 |         radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 |         num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 |         sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 |                                                                     128)),
14 |                      ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 |                                                           (256, 384, 512))),
16 |         aggregation_channels=(None, None, None, None),
17 |         fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 |         fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 |         dilated_group=(False, False, False, False),
20 |         out_indices=(0, 1, 2, 3),
21 |         sa_cfg=dict(
22 |             type='PointSAModuleMSG',
23 |             pool_mod='max',
24 |             use_xyz=True,
25 |             normalize_xyz=False)),
26 |     decode_head=dict(
27 |         fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 |                      (128, 128, 128, 128))))
29 | 


--------------------------------------------------------------------------------
/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(0.1, 0.2, 0.4, 0.8),
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d'),
14 |         sa_cfg=dict(
15 |             type='PointSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False)),
19 |     decode_head=dict(
20 |         type='PointNet2Head',
21 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 |                      (128, 128, 128, 128)),
23 |         channels=128,
24 |         dropout_ratio=0.5,
25 |         conv_cfg=dict(type='Conv1d'),
26 |         norm_cfg=dict(type='BN1d'),
27 |         act_cfg=dict(type='ReLU'),
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss',
30 |             use_sigmoid=False,
31 |             class_weight=None,  # should be modified with dataset
32 |             loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(),
35 |     test_cfg=dict(mode='slide'))
36 | 


--------------------------------------------------------------------------------
/configs/_base_/models/smoke.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='SMOKEMono3D',
 3 |     backbone=dict(
 4 |         type='DLANet',
 5 |         depth=34,
 6 |         in_channels=3,
 7 |         norm_cfg=dict(type='GN', num_groups=32),
 8 |         init_cfg=dict(
 9 |             type='Pretrained',
10 |             checkpoint='http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth'
11 |         )),
12 |     neck=dict(
13 |         type='DLANeck',
14 |         in_channels=[16, 32, 64, 128, 256, 512],
15 |         start_level=2,
16 |         end_level=5,
17 |         norm_cfg=dict(type='GN', num_groups=32)),
18 |     bbox_head=dict(
19 |         type='SMOKEMono3DHead',
20 |         num_classes=3,
21 |         in_channels=64,
22 |         dim_channel=[3, 4, 5],
23 |         ori_channel=[6, 7],
24 |         stacked_convs=0,
25 |         feat_channels=64,
26 |         use_direction_classifier=False,
27 |         diff_rad_by_sin=False,
28 |         pred_attrs=False,
29 |         pred_velo=False,
30 |         dir_offset=0,
31 |         strides=None,
32 |         group_reg_dims=(8, ),
33 |         cls_branch=(256, ),
34 |         reg_branch=((256, ), ),
35 |         num_attrs=0,
36 |         bbox_code_size=7,
37 |         dir_branch=(),
38 |         attr_branch=(),
39 |         bbox_coder=dict(
40 |             type='SMOKECoder',
41 |             base_depth=(28.01, 16.32),
42 |             base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63,
43 |                                                                 1.53)),
44 |             code_size=7),
45 |         loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0),
46 |         loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1 / 300),
47 |         loss_dir=dict(
48 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
49 |         loss_attr=None,
50 |         conv_bias=True,
51 |         dcn_on_last_conv=False),
52 |     train_cfg=None,
53 |     test_cfg=dict(topK=100, local_maximum_kernel=3, max_per_img=100))
54 | 


--------------------------------------------------------------------------------
/configs/_base_/models/votenet.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='VoteNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=4,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 256)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='VoteHead',
20 |         vote_module_cfg=dict(
21 |             in_channels=256,
22 |             vote_per_seed=1,
23 |             gt_per_seed=3,
24 |             conv_channels=(256, 256),
25 |             conv_cfg=dict(type='Conv1d'),
26 |             norm_cfg=dict(type='BN1d'),
27 |             norm_feats=True,
28 |             vote_loss=dict(
29 |                 type='ChamferDistance',
30 |                 mode='l1',
31 |                 reduction='none',
32 |                 loss_dst_weight=10.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModule',
35 |             num_point=256,
36 |             radius=0.3,
37 |             num_sample=16,
38 |             mlp_channels=[256, 128, 128, 128],
39 |             use_xyz=True,
40 |             normalize_xyz=True),
41 |         pred_layer_cfg=dict(
42 |             in_channels=128, shared_conv_channels=(128, 128), bias=True),
43 |         conv_cfg=dict(type='Conv1d'),
44 |         norm_cfg=dict(type='BN1d'),
45 |         objectness_loss=dict(
46 |             type='CrossEntropyLoss',
47 |             class_weight=[0.2, 0.8],
48 |             reduction='sum',
49 |             loss_weight=5.0),
50 |         center_loss=dict(
51 |             type='ChamferDistance',
52 |             mode='l2',
53 |             reduction='sum',
54 |             loss_src_weight=10.0,
55 |             loss_dst_weight=10.0),
56 |         dir_class_loss=dict(
57 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
58 |         dir_res_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
60 |         size_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         size_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
64 |         semantic_loss=dict(
65 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
66 |     # model training and testing settings
67 |     train_cfg=dict(
68 |         pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
69 |     test_cfg=dict(
70 |         sample_mod='seed',
71 |         nms_thr=0.25,
72 |         score_thr=0.05,
73 |         per_class_proposal=True))
74 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
 1 | # This schedule is mainly used by models with dynamic voxelization
 2 | # optimizer
 3 | lr = 0.003  # max learning rate
 4 | optimizer = dict(
 5 |     type='AdamW',
 6 |     lr=lr,
 7 |     betas=(0.95, 0.99),  # the momentum is change during training
 8 |     weight_decay=0.001)
 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 | 
11 | lr_config = dict(
12 |     policy='CosineAnnealing',
13 |     warmup='linear',
14 |     warmup_iters=1000,
15 |     warmup_ratio=1.0 / 10,
16 |     min_lr_ratio=1e-5)
17 | 
18 | momentum_config = None
19 | 
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
 1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
 2 | # Since the models are trained by 24 epochs by default, we set evaluation
 3 | # interval to be 20. Please change the interval accordingly if you do not
 4 | # use a default schedule.
 5 | # optimizer
 6 | # This schedule is mainly used by models on nuScenes dataset
 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
 8 | # max_norm=10 is better for SECOND
 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 |     policy='cyclic',
12 |     target_ratio=(10, 1e-4),
13 |     cyclic_times=1,
14 |     step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(0.85 / 0.95, 1),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | 
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on nuScenes dataset
 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
 4 | # max_norm=10 is better for SECOND
 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=1000,
10 |     warmup_ratio=1.0 / 1000,
11 |     step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on indoor dataset,
 3 | # e.g., VoteNet on SUNRGBD and ScanNet
 4 | lr = 0.008  # max learning rate
 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_100e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on S3DIS dataset in segmentation task
3 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
6 | 
7 | # runtime settings
8 | runner = dict(type='EpochBasedRunner', max_epochs=100)
9 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on ScanNet dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 | 


--------------------------------------------------------------------------------
/mmdet3d/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import mmcv
 3 | 
 4 | import mmdet
 5 | import mmseg
 6 | from .version import __version__, short_version
 7 | 
 8 | 
 9 | def digit_version(version_str):
10 |     digit_version = []
11 |     for x in version_str.split('.'):
12 |         if x.isdigit():
13 |             digit_version.append(int(x))
14 |         elif x.find('rc') != -1:
15 |             patch_version = x.split('rc')
16 |             digit_version.append(int(patch_version[0]) - 1)
17 |             digit_version.append(int(patch_version[1]))
18 |     return digit_version
19 | 
20 | 
21 | mmcv_minimum_version = '1.5.2'
22 | mmcv_maximum_version = '1.7.0'
23 | mmcv_version = digit_version(mmcv.__version__)
24 | 
25 | 
26 | assert (mmcv_version >= digit_version(mmcv_minimum_version)
27 |         and mmcv_version <= digit_version(mmcv_maximum_version)), \
28 |     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
29 |     f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
30 | 
31 | mmdet_minimum_version = '2.24.0'
32 | mmdet_maximum_version = '3.0.0'
33 | mmdet_version = digit_version(mmdet.__version__)
34 | assert (mmdet_version >= digit_version(mmdet_minimum_version)
35 |         and mmdet_version <= digit_version(mmdet_maximum_version)), \
36 |     f'MMDET=={mmdet.__version__} is used but incompatible. ' \
37 |     f'Please install mmdet>={mmdet_minimum_version}, ' \
38 |     f'<={mmdet_maximum_version}.'
39 | 
40 | mmseg_minimum_version = '0.20.0'
41 | mmseg_maximum_version = '1.0.0'
42 | mmseg_version = digit_version(mmseg.__version__)
43 | assert (mmseg_version >= digit_version(mmseg_minimum_version)
44 |         and mmseg_version <= digit_version(mmseg_maximum_version)), \
45 |     f'MMSEG=={mmseg.__version__} is used but incompatible. ' \
46 |     f'Please install mmseg>={mmseg_minimum_version}, ' \
47 |     f'<={mmseg_maximum_version}.'
48 | 
49 | __all__ = ['__version__', 'short_version']
50 | 


--------------------------------------------------------------------------------
/mmdet3d/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .inference import (convert_SyncBN, inference_detector,
 3 |                         inference_mono_3d_detector,
 4 |                         inference_multi_modality_detector, inference_segmentor,
 5 |                         init_model, show_result_meshlab)
 6 | from .test import single_gpu_test
 7 | from .train import init_random_seed, train_model
 8 | 
 9 | __all__ = [
10 |     'inference_detector', 'init_model', 'single_gpu_test',
11 |     'inference_mono_3d_detector', 'show_result_meshlab', 'convert_SyncBN',
12 |     'train_model', 'inference_multi_modality_detector', 'inference_segmentor',
13 |     'init_random_seed'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet3d/apis/test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from os import path as osp
 3 | 
 4 | import mmcv
 5 | import torch
 6 | from mmcv.image import tensor2imgs
 7 | 
 8 | from mmdet3d.models import Base3DDetector
 9 | 
10 | 
11 | def single_gpu_test(model,
12 |                     data_loader,
13 |                     show=False,
14 |                     out_dir=None,
15 |                     show_score_thr=0.3):
16 |     """Test model with single gpu.
17 | 
18 |     This method tests model with single gpu and gives the 'show' option.
19 |     By setting ``show=True``, it saves the visualization results under
20 |     ``out_dir``.
21 | 
22 |     Args:
23 |         model (nn.Module): Model to be tested.
24 |         data_loader (nn.Dataloader): Pytorch data loader.
25 |         show (bool, optional): Whether to save viualization results.
26 |             Default: True.
27 |         out_dir (str, optional): The path to save visualization results.
28 |             Default: None.
29 | 
30 |     Returns:
31 |         list[dict]: The prediction results.
32 |     """
33 |     model.eval()
34 |     results = []
35 |     dataset = data_loader.dataset
36 |     prog_bar = mmcv.ProgressBar(len(dataset))
37 |     for i, data in enumerate(data_loader):
38 |         with torch.no_grad():
39 |             result = model(return_loss=False, rescale=True, **data)
40 | 
41 |         if show:
42 |             # Visualize the results of MMDetection3D model
43 |             # 'show_results' is MMdetection3D visualization API
44 |             models_3d = (Base3DDetector)
45 |             if isinstance(model.module, models_3d):
46 |                 model.module.show_results(
47 |                     data,
48 |                     result,
49 |                     out_dir=out_dir,
50 |                     show=show,
51 |                     score_thr=show_score_thr)
52 |             # Visualize the results of MMDetection model
53 |             # 'show_result' is MMdetection visualization API
54 |             else:
55 |                 batch_size = len(result)
56 |                 if batch_size == 1 and isinstance(data['img'][0],
57 |                                                   torch.Tensor):
58 |                     img_tensor = data['img'][0]
59 |                 else:
60 |                     img_tensor = data['img'][0].data[0]
61 |                 img_metas = data['img_metas'][0].data[0]
62 |                 imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
63 |                 assert len(imgs) == len(img_metas)
64 | 
65 |                 for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
66 |                     h, w, _ = img_meta['img_shape']
67 |                     img_show = img[:h, :w, :]
68 | 
69 |                     ori_h, ori_w = img_meta['ori_shape'][:-1]
70 |                     img_show = mmcv.imresize(img_show, (ori_w, ori_h))
71 | 
72 |                     if out_dir:
73 |                         out_file = osp.join(out_dir, img_meta['ori_filename'])
74 |                     else:
75 |                         out_file = None
76 | 
77 |                     model.module.show_result(
78 |                         img_show,
79 |                         result[i],
80 |                         show=show,
81 |                         out_file=out_file,
82 |                         score_thr=show_score_thr)
83 |         results.extend(result)
84 | 
85 |         batch_size = len(result)
86 |         for _ in range(batch_size):
87 |             prog_bar.update()
88 |     return results
89 | 


--------------------------------------------------------------------------------
/mmdet3d/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .anchor import *  # noqa: F401, F403
 3 | from .bbox import *  # noqa: F401, F403
 4 | from .evaluation import *  # noqa: F401, F403
 5 | from .hook import *  # noqa: F401, F403
 6 | from .points import *  # noqa: F401, F403
 7 | from .post_processing import *  # noqa: F401, F403
 8 | from .utils import *  # noqa: F401, F403
 9 | from .visualizer import *  # noqa: F401, F403
10 | from .voxel import *  # noqa: F401, F403
11 | 


--------------------------------------------------------------------------------
/mmdet3d/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.anchor import build_prior_generator
 3 | from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,
 4 |                                   AlignedAnchor3DRangeGeneratorPerCls,
 5 |                                   Anchor3DRangeGenerator)
 6 | 
 7 | __all__ = [
 8 |     'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',
 9 |     'build_prior_generator', 'AlignedAnchor3DRangeGeneratorPerCls'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 3 | from .coders import DeltaXYZWLHRBBoxCoder
 4 | # from .bbox_target import bbox_target
 5 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
 6 |                               BboxOverlapsNearest3D,
 7 |                               axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
 8 |                               bbox_overlaps_nearest_3d)
 9 | from .samplers import (BaseSampler, CombinedSampler,
10 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
11 |                        PseudoSampler, RandomSampler, SamplingResult)
12 | from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
13 |                          Coord3DMode, DepthInstance3DBoxes,
14 |                          LiDARInstance3DBoxes, get_box_type, limit_period,
15 |                          mono_cam_box2vis, points_cam2img, points_img2cam,
16 |                          xywhr2xyxyr)
17 | from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
18 | 
19 | __all__ = [
20 |     'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner',
21 |     'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
22 |     'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
23 |     'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D',
24 |     'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d',
25 |     'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode',
26 |     'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
27 |     'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
28 |     'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
29 |     'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
30 | ]
31 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner
3 | 
4 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.bbox import build_bbox_coder
 3 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder
 4 | from .centerpoint_bbox_coders import CenterPointBBoxCoder
 5 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
 6 | from .fcos3d_bbox_coder import FCOS3DBBoxCoder
 7 | from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder
 8 | from .monoflex_bbox_coder import MonoFlexCoder
 9 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
10 | from .pgd_bbox_coder import PGDBBoxCoder
11 | from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder
12 | from .smoke_bbox_coder import SMOKECoder
13 | 
14 | __all__ = [
15 |     'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder',
16 |     'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder',
17 |     'PointXYZWHLRBBoxCoder', 'FCOS3DBBoxCoder', 'PGDBBoxCoder', 'SMOKECoder',
18 |     'MonoFlexCoder'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | from mmdet.core.bbox import BaseBBoxCoder
 5 | from mmdet.core.bbox.builder import BBOX_CODERS
 6 | 
 7 | 
 8 | @BBOX_CODERS.register_module()
 9 | class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder):
10 |     """Bbox Coder for 3D boxes.
11 | 
12 |     Args:
13 |         code_size (int): The dimension of boxes to be encoded.
14 |     """
15 | 
16 |     def __init__(self, code_size=7):
17 |         super(DeltaXYZWLHRBBoxCoder, self).__init__()
18 |         self.code_size = code_size
19 | 
20 |     @staticmethod
21 |     def encode(src_boxes, dst_boxes):
22 |         """Get box regression transformation deltas (dx, dy, dz, dx_size,
23 |         dy_size, dz_size, dr, dv*) that can be used to transform the
24 |         `src_boxes` into the `target_boxes`.
25 | 
26 |         Args:
27 |             src_boxes (torch.Tensor): source boxes, e.g., object proposals.
28 |             dst_boxes (torch.Tensor): target of the transformation, e.g.,
29 |                 ground-truth boxes.
30 | 
31 |         Returns:
32 |             torch.Tensor: Box transformation deltas.
33 |         """
34 |         box_ndim = src_boxes.shape[-1]
35 |         cas, cgs, cts = [], [], []
36 |         if box_ndim > 7:
37 |             xa, ya, za, wa, la, ha, ra, *cas = torch.split(
38 |                 src_boxes, 1, dim=-1)
39 |             xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split(
40 |                 dst_boxes, 1, dim=-1)
41 |             cts = [g - a for g, a in zip(cgs, cas)]
42 |         else:
43 |             xa, ya, za, wa, la, ha, ra = torch.split(src_boxes, 1, dim=-1)
44 |             xg, yg, zg, wg, lg, hg, rg = torch.split(dst_boxes, 1, dim=-1)
45 |         za = za + ha / 2
46 |         zg = zg + hg / 2
47 |         diagonal = torch.sqrt(la**2 + wa**2)
48 |         xt = (xg - xa) / diagonal
49 |         yt = (yg - ya) / diagonal
50 |         zt = (zg - za) / ha
51 |         lt = torch.log(lg / la)
52 |         wt = torch.log(wg / wa)
53 |         ht = torch.log(hg / ha)
54 |         rt = rg - ra
55 |         return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)
56 | 
57 |     @staticmethod
58 |     def decode(anchors, deltas):
59 |         """Apply transformation `deltas` (dx, dy, dz, dx_size, dy_size,
60 |         dz_size, dr, dv*) to `boxes`.
61 | 
62 |         Args:
63 |             anchors (torch.Tensor): Parameters of anchors with shape (N, 7).
64 |             deltas (torch.Tensor): Encoded boxes with shape
65 |                 (N, 7+n) [x, y, z, x_size, y_size, z_size, r, velo*].
66 | 
67 |         Returns:
68 |             torch.Tensor: Decoded boxes.
69 |         """
70 |         cas, cts = [], []
71 |         box_ndim = anchors.shape[-1]
72 |         if box_ndim > 7:
73 |             xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)
74 |             xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(deltas, 1, dim=-1)
75 |         else:
76 |             xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
77 |             xt, yt, zt, wt, lt, ht, rt = torch.split(deltas, 1, dim=-1)
78 | 
79 |         za = za + ha / 2
80 |         diagonal = torch.sqrt(la**2 + wa**2)
81 |         xg = xt * diagonal + xa
82 |         yg = yt * diagonal + ya
83 |         zg = zt * ha + za
84 | 
85 |         lg = torch.exp(lt) * la
86 |         wg = torch.exp(wt) * wa
87 |         hg = torch.exp(ht) * ha
88 |         rg = rt + ra
89 |         zg = zg - hg / 2
90 |         cgs = [t + a for t, a in zip(cts, cas)]
91 |         return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1)
92 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
 3 |                                BboxOverlapsNearest3D,
 4 |                                axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
 5 |                                bbox_overlaps_nearest_3d)
 6 | 
 7 | __all__ = [
 8 |     'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
 9 |     'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',
10 |     'axis_aligned_bbox_overlaps_3d'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,
 3 |                                       InstanceBalancedPosSampler,
 4 |                                       IoUBalancedNegSampler, OHEMSampler,
 5 |                                       PseudoSampler, RandomSampler,
 6 |                                       SamplingResult)
 7 | from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler
 8 | 
 9 | __all__ = [
10 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
11 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
12 |     'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_box3d import BaseInstance3DBoxes
 3 | from .box_3d_mode import Box3DMode
 4 | from .cam_box3d import CameraInstance3DBoxes
 5 | from .coord_3d_mode import Coord3DMode
 6 | from .depth_box3d import DepthInstance3DBoxes
 7 | from .lidar_box3d import LiDARInstance3DBoxes
 8 | from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,
 9 |                     mono_cam_box2vis, points_cam2img, points_img2cam,
10 |                     rotation_3d_in_axis, xywhr2xyxyr)
11 | 
12 | __all__ = [
13 |     'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
14 |     'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
15 |     'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
16 |     'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis',
17 |     'get_proj_mat_by_coord_type'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet3d/core/bbox/transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical):
 6 |     """Map bboxes from testing scale to original image scale.
 7 | 
 8 |     Args:
 9 |         bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back.
10 |         scale_factor (float): Scale factor.
11 |         flip_horizontal (bool): Whether to flip horizontally.
12 |         flip_vertical (bool): Whether to flip vertically.
13 | 
14 |     Returns:
15 |         :obj:`BaseInstance3DBoxes`: Boxes mapped back.
16 |     """
17 |     new_bboxes = bboxes.clone()
18 |     if flip_horizontal:
19 |         new_bboxes.flip('horizontal')
20 |     if flip_vertical:
21 |         new_bboxes.flip('vertical')
22 |     new_bboxes.scale(1 / scale_factor)
23 | 
24 |     return new_bboxes
25 | 
26 | 
27 | def bbox3d2roi(bbox_list):
28 |     """Convert a list of bounding boxes to roi format.
29 | 
30 |     Args:
31 |         bbox_list (list[torch.Tensor]): A list of bounding boxes
32 |             corresponding to a batch of images.
33 | 
34 |     Returns:
35 |         torch.Tensor: Region of interests in shape (n, c), where
36 |             the channels are in order of [batch_ind, x, y ...].
37 |     """
38 |     rois_list = []
39 |     for img_id, bboxes in enumerate(bbox_list):
40 |         if bboxes.size(0) > 0:
41 |             img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
42 |             rois = torch.cat([img_inds, bboxes], dim=-1)
43 |         else:
44 |             rois = torch.zeros_like(bboxes)
45 |         rois_list.append(rois)
46 |     rois = torch.cat(rois_list, 0)
47 |     return rois
48 | 
49 | 
50 | def bbox3d2result(bboxes, scores, labels, attrs=None):
51 |     """Convert detection results to a list of numpy arrays.
52 | 
53 |     Args:
54 |         bboxes (torch.Tensor): Bounding boxes with shape (N, 5).
55 |         labels (torch.Tensor): Labels with shape (N, ).
56 |         scores (torch.Tensor): Scores with shape (N, ).
57 |         attrs (torch.Tensor, optional): Attributes with shape (N, ).
58 |             Defaults to None.
59 | 
60 |     Returns:
61 |         dict[str, torch.Tensor]: Bounding box results in cpu mode.
62 | 
63 |             - boxes_3d (torch.Tensor): 3D boxes.
64 |             - scores (torch.Tensor): Prediction scores.
65 |             - labels_3d (torch.Tensor): Box labels.
66 |             - attrs_3d (torch.Tensor, optional): Box attributes.
67 |     """
68 |     result_dict = dict(
69 |         boxes_3d=bboxes.to('cpu'),
70 |         scores_3d=scores.cpu(),
71 |         labels_3d=labels.cpu())
72 | 
73 |     if attrs is not None:
74 |         result_dict['attrs_3d'] = attrs.cpu()
75 | 
76 |     return result_dict
77 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .indoor_eval import indoor_eval
 3 | from .instance_seg_eval import instance_seg_eval
 4 | from .kitti_utils import kitti_eval, kitti_eval_coco_style
 5 | from .lyft_eval import lyft_eval
 6 | from .seg_eval import seg_eval
 7 | 
 8 | __all__ = [
 9 |     'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval',
10 |     'seg_eval', 'instance_seg_eval'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/kitti_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .eval import kitti_eval, kitti_eval_coco_style
3 | 
4 | __all__ = ['kitti_eval', 'kitti_eval_coco_style']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/scannet_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .evaluate_semantic_instance import evaluate_matches, scannet_eval
3 | 
4 | __all__ = ['scannet_eval', 'evaluate_matches']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/scannet_utils/util_3d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/util_3d.py # noqa
 3 | import json
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Instance:
 9 |     """Single instance for ScanNet evaluator.
10 | 
11 |     Args:
12 |         mesh_vert_instances (np.array): Instance ids for each point.
13 |         instance_id: Id of single instance.
14 |     """
15 |     instance_id = 0
16 |     label_id = 0
17 |     vert_count = 0
18 |     med_dist = -1
19 |     dist_conf = 0.0
20 | 
21 |     def __init__(self, mesh_vert_instances, instance_id):
22 |         if instance_id == -1:
23 |             return
24 |         self.instance_id = int(instance_id)
25 |         self.label_id = int(self.get_label_id(instance_id))
26 |         self.vert_count = int(
27 |             self.get_instance_verts(mesh_vert_instances, instance_id))
28 | 
29 |     @staticmethod
30 |     def get_label_id(instance_id):
31 |         return int(instance_id // 1000)
32 | 
33 |     @staticmethod
34 |     def get_instance_verts(mesh_vert_instances, instance_id):
35 |         return (mesh_vert_instances == instance_id).sum()
36 | 
37 |     def to_json(self):
38 |         return json.dumps(
39 |             self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
40 | 
41 |     def to_dict(self):
42 |         dict = {}
43 |         dict['instance_id'] = self.instance_id
44 |         dict['label_id'] = self.label_id
45 |         dict['vert_count'] = self.vert_count
46 |         dict['med_dist'] = self.med_dist
47 |         dict['dist_conf'] = self.dist_conf
48 |         return dict
49 | 
50 |     def from_json(self, data):
51 |         self.instance_id = int(data['instance_id'])
52 |         self.label_id = int(data['label_id'])
53 |         self.vert_count = int(data['vert_count'])
54 |         if 'med_dist' in data:
55 |             self.med_dist = float(data['med_dist'])
56 |             self.dist_conf = float(data['dist_conf'])
57 | 
58 |     def __str__(self):
59 |         return '(' + str(self.instance_id) + ')'
60 | 
61 | 
62 | def get_instances(ids, class_ids, class_labels, id2label):
63 |     """Transform gt instance mask to Instance objects.
64 | 
65 |     Args:
66 |         ids (np.array): Instance ids for each point.
67 |         class_ids: (tuple[int]): Ids of valid categories.
68 |         class_labels (tuple[str]): Class names.
69 |         id2label: (dict[int, str]): Mapping of valid class id to class label.
70 | 
71 |     Returns:
72 |         dict [str, list]: Instance objects grouped by class label.
73 |     """
74 |     instances = {}
75 |     for label in class_labels:
76 |         instances[label] = []
77 |     instance_ids = np.unique(ids)
78 |     for id in instance_ids:
79 |         if id == 0:
80 |             continue
81 |         inst = Instance(ids, id)
82 |         if inst.label_id in class_ids:
83 |             instances[id2label[inst.label_id]].append(inst.to_dict())
84 |     return instances
85 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/seg_eval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import numpy as np
  3 | from mmcv.utils import print_log
  4 | from terminaltables import AsciiTable
  5 | 
  6 | 
  7 | def fast_hist(preds, labels, num_classes):
  8 |     """Compute the confusion matrix for every batch.
  9 | 
 10 |     Args:
 11 |         preds (np.ndarray):  Prediction labels of points with shape of
 12 |         (num_points, ).
 13 |         labels (np.ndarray): Ground truth labels of points with shape of
 14 |         (num_points, ).
 15 |         num_classes (int): number of classes
 16 | 
 17 |     Returns:
 18 |         np.ndarray: Calculated confusion matrix.
 19 |     """
 20 | 
 21 |     k = (labels >= 0) & (labels < num_classes)
 22 |     bin_count = np.bincount(
 23 |         num_classes * labels[k].astype(int) + preds[k],
 24 |         minlength=num_classes**2)
 25 |     return bin_count[:num_classes**2].reshape(num_classes, num_classes)
 26 | 
 27 | 
 28 | def per_class_iou(hist):
 29 |     """Compute the per class iou.
 30 | 
 31 |     Args:
 32 |         hist(np.ndarray):  Overall confusion martix
 33 |         (num_classes, num_classes ).
 34 | 
 35 |     Returns:
 36 |         np.ndarray: Calculated per class iou
 37 |     """
 38 | 
 39 |     return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
 40 | 
 41 | 
 42 | def get_acc(hist):
 43 |     """Compute the overall accuracy.
 44 | 
 45 |     Args:
 46 |         hist(np.ndarray):  Overall confusion martix
 47 |         (num_classes, num_classes ).
 48 | 
 49 |     Returns:
 50 |         float: Calculated overall acc
 51 |     """
 52 | 
 53 |     return np.diag(hist).sum() / hist.sum()
 54 | 
 55 | 
 56 | def get_acc_cls(hist):
 57 |     """Compute the class average accuracy.
 58 | 
 59 |     Args:
 60 |         hist(np.ndarray):  Overall confusion martix
 61 |         (num_classes, num_classes ).
 62 | 
 63 |     Returns:
 64 |         float: Calculated class average acc
 65 |     """
 66 | 
 67 |     return np.nanmean(np.diag(hist) / hist.sum(axis=1))
 68 | 
 69 | 
 70 | def seg_eval(gt_labels, seg_preds, label2cat, ignore_index, logger=None):
 71 |     """Semantic Segmentation  Evaluation.
 72 | 
 73 |     Evaluate the result of the Semantic Segmentation.
 74 | 
 75 |     Args:
 76 |         gt_labels (list[torch.Tensor]): Ground truth labels.
 77 |         seg_preds  (list[torch.Tensor]): Predictions.
 78 |         label2cat (dict): Map from label to category name.
 79 |         ignore_index (int): Index that will be ignored in evaluation.
 80 |         logger (logging.Logger | str, optional): The way to print the mAP
 81 |             summary. See `mmdet.utils.print_log()` for details. Default: None.
 82 | 
 83 |     Returns:
 84 |         dict[str, float]: Dict of results.
 85 |     """
 86 |     assert len(seg_preds) == len(gt_labels)
 87 |     num_classes = len(label2cat)
 88 | 
 89 |     hist_list = []
 90 |     for i in range(len(gt_labels)):
 91 |         gt_seg = gt_labels[i].clone().numpy().astype(np.int)
 92 |         pred_seg = seg_preds[i].clone().numpy().astype(np.int)
 93 | 
 94 |         # filter out ignored points
 95 |         pred_seg[gt_seg == ignore_index] = -1
 96 |         gt_seg[gt_seg == ignore_index] = -1
 97 | 
 98 |         # calculate one instance result
 99 |         hist_list.append(fast_hist(pred_seg, gt_seg, num_classes))
100 | 
101 |     iou = per_class_iou(sum(hist_list))
102 |     miou = np.nanmean(iou)
103 |     acc = get_acc(sum(hist_list))
104 |     acc_cls = get_acc_cls(sum(hist_list))
105 | 
106 |     header = ['classes']
107 |     for i in range(len(label2cat)):
108 |         header.append(label2cat[i])
109 |     header.extend(['miou', 'acc', 'acc_cls'])
110 | 
111 |     ret_dict = dict()
112 |     table_columns = [['results']]
113 |     for i in range(len(label2cat)):
114 |         ret_dict[label2cat[i]] = float(iou[i])
115 |         table_columns.append([f'{iou[i]:.4f}'])
116 |     ret_dict['miou'] = float(miou)
117 |     ret_dict['acc'] = float(acc)
118 |     ret_dict['acc_cls'] = float(acc_cls)
119 | 
120 |     table_columns.append([f'{miou:.4f}'])
121 |     table_columns.append([f'{acc:.4f}'])
122 |     table_columns.append([f'{acc_cls:.4f}'])
123 | 
124 |     table_data = [header]
125 |     table_rows = list(zip(*table_columns))
126 |     table_data += table_rows
127 |     table = AsciiTable(table_data)
128 |     table.inner_footing_row_border = True
129 |     print_log('\n' + table.table, logger=logger)
130 | 
131 |     return ret_dict
132 | 


--------------------------------------------------------------------------------
/mmdet3d/core/evaluation/waymo_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .prediction_kitti_to_waymo import KITTI2Waymo
3 | 
4 | __all__ = ['KITTI2Waymo']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/core/hook/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .ema import MEGVIIEMAHook
3 | from .utils import is_parallel
4 | from .sequentialcontrol import SequentialControlHook
5 | from .syncbncontrol import SyncbnControlHook
6 | 
7 | __all__ = ['MEGVIIEMAHook', 'is_parallel', 'SequentialControlHook',
8 |            'SyncbnControlHook']
9 | 


--------------------------------------------------------------------------------
/mmdet3d/core/hook/sequentialcontrol.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.runner.hooks import HOOKS, Hook
 3 | from mmdet3d.core.hook.utils import is_parallel
 4 | 
 5 | __all__ = ['SequentialControlHook']
 6 | 
 7 | 
 8 | @HOOKS.register_module()
 9 | class SequentialControlHook(Hook):
10 |     """ """
11 | 
12 |     def __init__(self, temporal_start_epoch=1):
13 |         super().__init__()
14 |         self.temporal_start_epoch=temporal_start_epoch
15 | 
16 |     def set_temporal_flag(self, runner, flag):
17 |         if is_parallel(runner.model.module):
18 |             runner.model.module.module.with_prev=flag
19 |         else:
20 |             runner.model.module.with_prev = flag
21 | 
22 |     def before_run(self, runner):
23 |         self.set_temporal_flag(runner, False)
24 | 
25 |     def before_train_epoch(self, runner):
26 |         if runner.epoch > self.temporal_start_epoch:
27 |             self.set_temporal_flag(runner, True)


--------------------------------------------------------------------------------
/mmdet3d/core/hook/syncbncontrol.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.runner.hooks import HOOKS, Hook
 3 | from mmdet3d.core.hook.utils import is_parallel
 4 | from torch.nn import SyncBatchNorm
 5 | 
 6 | __all__ = ['SyncbnControlHook']
 7 | 
 8 | 
 9 | @HOOKS.register_module()
10 | class SyncbnControlHook(Hook):
11 |     """ """
12 | 
13 |     def __init__(self, syncbn_start_epoch=1):
14 |         super().__init__()
15 |         self.is_syncbn=False
16 |         self.syncbn_start_epoch = syncbn_start_epoch
17 | 
18 |     def cvt_syncbn(self, runner):
19 |         if is_parallel(runner.model.module):
20 |             runner.model.module.module=\
21 |                 SyncBatchNorm.convert_sync_batchnorm(runner.model.module.module,
22 |                                                      process_group=None)
23 |         else:
24 |             pass
25 |             # runner.model.module=\
26 |             #     SyncBatchNorm.convert_sync_batchnorm(runner.model.module,
27 |             #                                          process_group=None)
28 | 
29 |     def before_train_epoch(self, runner):
30 |         if runner.epoch>= self.syncbn_start_epoch and not self.is_syncbn:
31 |             print('start use syncbn')
32 |             self.cvt_syncbn(runner)
33 |             self.is_syncbn=True
34 | 
35 | 


--------------------------------------------------------------------------------
/mmdet3d/core/hook/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from torch import nn
 3 | 
 4 | __all__ = ['is_parallel']
 5 | 
 6 | 
 7 | def is_parallel(model):
 8 |     """check if model is in parallel mode."""
 9 |     parallel_type = (
10 |         nn.parallel.DataParallel,
11 |         nn.parallel.DistributedDataParallel,
12 |     )
13 |     return isinstance(model, parallel_type)
14 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | from .cam_points import CameraPoints
 4 | from .depth_points import DepthPoints
 5 | from .lidar_points import LiDARPoints
 6 | 
 7 | __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']
 8 | 
 9 | 
10 | def get_points_type(points_type):
11 |     """Get the class of points according to coordinate type.
12 | 
13 |     Args:
14 |         points_type (str): The type of points coordinate.
15 |             The valid value are "CAMERA", "LIDAR", or "DEPTH".
16 | 
17 |     Returns:
18 |         class: Points type.
19 |     """
20 |     if points_type == 'CAMERA':
21 |         points_cls = CameraPoints
22 |     elif points_type == 'LIDAR':
23 |         points_cls = LiDARPoints
24 |     elif points_type == 'DEPTH':
25 |         points_cls = DepthPoints
26 |     else:
27 |         raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"'
28 |                          f' are supported, got {points_type}')
29 | 
30 |     return points_cls
31 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/cam_points.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | 
 4 | 
 5 | class CameraPoints(BasePoints):
 6 |     """Points of instances in CAM coordinates.
 7 | 
 8 |     Args:
 9 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
10 |         points_dim (int, optional): Number of the dimension of a point.
11 |             Each row is (x, y, z). Defaults to 3.
12 |         attribute_dims (dict, optional): Dictionary to indicate the
13 |             meaning of extra dimension. Defaults to None.
14 | 
15 |     Attributes:
16 |         tensor (torch.Tensor): Float matrix of N x points_dim.
17 |         points_dim (int): Integer indicating the dimension of a point.
18 |             Each row is (x, y, z, ...).
19 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
20 |             dimension. Defaults to None.
21 |         rotation_axis (int): Default rotation axis for points rotation.
22 |     """
23 | 
24 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
25 |         super(CameraPoints, self).__init__(
26 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims)
27 |         self.rotation_axis = 1
28 | 
29 |     def flip(self, bev_direction='horizontal'):
30 |         """Flip the points along given BEV direction.
31 | 
32 |         Args:
33 |             bev_direction (str): Flip direction (horizontal or vertical).
34 |         """
35 |         if bev_direction == 'horizontal':
36 |             self.tensor[:, 0] = -self.tensor[:, 0]
37 |         elif bev_direction == 'vertical':
38 |             self.tensor[:, 2] = -self.tensor[:, 2]
39 | 
40 |     @property
41 |     def bev(self):
42 |         """torch.Tensor: BEV of the points in shape (N, 2)."""
43 |         return self.tensor[:, [0, 2]]
44 | 
45 |     def convert_to(self, dst, rt_mat=None):
46 |         """Convert self to ``dst`` mode.
47 | 
48 |         Args:
49 |             dst (:obj:`CoordMode`): The target Point mode.
50 |             rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
51 |                 translation matrix between different coordinates.
52 |                 Defaults to None.
53 |                 The conversion from `src` coordinates to `dst` coordinates
54 |                 usually comes along the change of sensors, e.g., from camera
55 |                 to LiDAR. This requires a transformation matrix.
56 | 
57 |         Returns:
58 |             :obj:`BasePoints`: The converted point of the same type
59 |                 in the `dst` mode.
60 |         """
61 |         from mmdet3d.core.bbox import Coord3DMode
62 |         return Coord3DMode.convert_point(
63 |             point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
64 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/depth_points.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | 
 4 | 
 5 | class DepthPoints(BasePoints):
 6 |     """Points of instances in DEPTH coordinates.
 7 | 
 8 |     Args:
 9 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
10 |         points_dim (int, optional): Number of the dimension of a point.
11 |             Each row is (x, y, z). Defaults to 3.
12 |         attribute_dims (dict, optional): Dictionary to indicate the
13 |             meaning of extra dimension. Defaults to None.
14 | 
15 |     Attributes:
16 |         tensor (torch.Tensor): Float matrix of N x points_dim.
17 |         points_dim (int): Integer indicating the dimension of a point.
18 |             Each row is (x, y, z, ...).
19 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
20 |             dimension. Defaults to None.
21 |         rotation_axis (int): Default rotation axis for points rotation.
22 |     """
23 | 
24 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
25 |         super(DepthPoints, self).__init__(
26 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims)
27 |         self.rotation_axis = 2
28 | 
29 |     def flip(self, bev_direction='horizontal'):
30 |         """Flip the points along given BEV direction.
31 | 
32 |         Args:
33 |             bev_direction (str): Flip direction (horizontal or vertical).
34 |         """
35 |         if bev_direction == 'horizontal':
36 |             self.tensor[:, 0] = -self.tensor[:, 0]
37 |         elif bev_direction == 'vertical':
38 |             self.tensor[:, 1] = -self.tensor[:, 1]
39 | 
40 |     def convert_to(self, dst, rt_mat=None):
41 |         """Convert self to ``dst`` mode.
42 | 
43 |         Args:
44 |             dst (:obj:`CoordMode`): The target Point mode.
45 |             rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
46 |                 translation matrix between different coordinates.
47 |                 Defaults to None.
48 |                 The conversion from `src` coordinates to `dst` coordinates
49 |                 usually comes along the change of sensors, e.g., from camera
50 |                 to LiDAR. This requires a transformation matrix.
51 | 
52 |         Returns:
53 |             :obj:`BasePoints`: The converted point of the same type
54 |                 in the `dst` mode.
55 |         """
56 |         from mmdet3d.core.bbox import Coord3DMode
57 |         return Coord3DMode.convert_point(
58 |             point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
59 | 


--------------------------------------------------------------------------------
/mmdet3d/core/points/lidar_points.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base_points import BasePoints
 3 | 
 4 | 
 5 | class LiDARPoints(BasePoints):
 6 |     """Points of instances in LIDAR coordinates.
 7 | 
 8 |     Args:
 9 |         tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
10 |         points_dim (int, optional): Number of the dimension of a point.
11 |             Each row is (x, y, z). Defaults to 3.
12 |         attribute_dims (dict, optional): Dictionary to indicate the
13 |             meaning of extra dimension. Defaults to None.
14 | 
15 |     Attributes:
16 |         tensor (torch.Tensor): Float matrix of N x points_dim.
17 |         points_dim (int): Integer indicating the dimension of a point.
18 |             Each row is (x, y, z, ...).
19 |         attribute_dims (bool): Dictionary to indicate the meaning of extra
20 |             dimension. Defaults to None.
21 |         rotation_axis (int): Default rotation axis for points rotation.
22 |     """
23 | 
24 |     def __init__(self, tensor, points_dim=3, attribute_dims=None):
25 |         super(LiDARPoints, self).__init__(
26 |             tensor, points_dim=points_dim, attribute_dims=attribute_dims)
27 |         self.rotation_axis = 2
28 | 
29 |     def flip(self, bev_direction='horizontal'):
30 |         """Flip the points along given BEV direction.
31 | 
32 |         Args:
33 |             bev_direction (str): Flip direction (horizontal or vertical).
34 |         """
35 |         if bev_direction == 'horizontal':
36 |             self.tensor[:, 1] = -self.tensor[:, 1]
37 |         elif bev_direction == 'vertical':
38 |             self.tensor[:, 0] = -self.tensor[:, 0]
39 | 
40 |     def convert_to(self, dst, rt_mat=None):
41 |         """Convert self to ``dst`` mode.
42 | 
43 |         Args:
44 |             dst (:obj:`CoordMode`): The target Point mode.
45 |             rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
46 |                 translation matrix between different coordinates.
47 |                 Defaults to None.
48 |                 The conversion from `src` coordinates to `dst` coordinates
49 |                 usually comes along the change of sensors, e.g., from camera
50 |                 to LiDAR. This requires a transformation matrix.
51 | 
52 |         Returns:
53 |             :obj:`BasePoints`: The converted point of the same type
54 |                 in the `dst` mode.
55 |         """
56 |         from mmdet3d.core.bbox import Coord3DMode
57 |         return Coord3DMode.convert_point(
58 |             point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
59 | 


--------------------------------------------------------------------------------
/mmdet3d/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
 3 |                                         merge_aug_proposals, merge_aug_scores,
 4 |                                         multiclass_nms)
 5 | from .box3d_nms import (aligned_3d_nms, box3d_multiclass_nms, circle_nms,
 6 |                         nms_bev, nms_normal_bev)
 7 | from .merge_augs import merge_aug_bboxes_3d
 8 | 
 9 | __all__ = [
10 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
11 |     'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',
12 |     'aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms', 'nms_bev',
13 |     'nms_normal_bev'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet3d/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
 5 | from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr
 6 | 
 7 | 
 8 | def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):
 9 |     """Merge augmented detection 3D bboxes and scores.
10 | 
11 |     Args:
12 |         aug_results (list[dict]): The dict of detection results.
13 |             The dict contains the following keys
14 | 
15 |             - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
16 |             - scores_3d (torch.Tensor): Detection scores.
17 |             - labels_3d (torch.Tensor): Predicted box labels.
18 |         img_metas (list[dict]): Meta information of each sample.
19 |         test_cfg (dict): Test config.
20 | 
21 |     Returns:
22 |         dict: Bounding boxes results in cpu mode, containing merged results.
23 | 
24 |             - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.
25 |             - scores_3d (torch.Tensor): Merged detection scores.
26 |             - labels_3d (torch.Tensor): Merged predicted box labels.
27 |     """
28 | 
29 |     assert len(aug_results) == len(img_metas), \
30 |         '"aug_results" should have the same length as "img_metas", got len(' \
31 |         f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'
32 | 
33 |     recovered_bboxes = []
34 |     recovered_scores = []
35 |     recovered_labels = []
36 | 
37 |     for bboxes, img_info in zip(aug_results, img_metas):
38 |         scale_factor = img_info[0]['pcd_scale_factor']
39 |         pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip']
40 |         pcd_vertical_flip = img_info[0]['pcd_vertical_flip']
41 |         recovered_scores.append(bboxes['scores_3d'])
42 |         recovered_labels.append(bboxes['labels_3d'])
43 |         bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor,
44 |                                      pcd_horizontal_flip, pcd_vertical_flip)
45 |         recovered_bboxes.append(bboxes)
46 | 
47 |     aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes)
48 |     aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev)
49 |     aug_scores = torch.cat(recovered_scores, dim=0)
50 |     aug_labels = torch.cat(recovered_labels, dim=0)
51 | 
52 |     # TODO: use a more elegent way to deal with nms
53 |     if test_cfg.use_rotate_nms:
54 |         nms_func = nms_bev
55 |     else:
56 |         nms_func = nms_normal_bev
57 | 
58 |     merged_bboxes = []
59 |     merged_scores = []
60 |     merged_labels = []
61 | 
62 |     # Apply multi-class nms when merge bboxes
63 |     if len(aug_labels) == 0:
64 |         return bbox3d2result(aug_bboxes, aug_scores, aug_labels)
65 | 
66 |     for class_id in range(torch.max(aug_labels).item() + 1):
67 |         class_inds = (aug_labels == class_id)
68 |         bboxes_i = aug_bboxes[class_inds]
69 |         bboxes_nms_i = aug_bboxes_for_nms[class_inds, :]
70 |         scores_i = aug_scores[class_inds]
71 |         labels_i = aug_labels[class_inds]
72 |         if len(bboxes_nms_i) == 0:
73 |             continue
74 |         selected = nms_func(bboxes_nms_i, scores_i, test_cfg.nms_thr)
75 | 
76 |         merged_bboxes.append(bboxes_i[selected, :])
77 |         merged_scores.append(scores_i[selected])
78 |         merged_labels.append(labels_i[selected])
79 | 
80 |     merged_bboxes = merged_bboxes[0].cat(merged_bboxes)
81 |     merged_scores = torch.cat(merged_scores, dim=0)
82 |     merged_labels = torch.cat(merged_labels, dim=0)
83 | 
84 |     _, order = merged_scores.sort(0, descending=True)
85 |     num = min(test_cfg.max_num, len(aug_bboxes))
86 |     order = order[:num]
87 | 
88 |     merged_bboxes = merged_bboxes[order]
89 |     merged_scores = merged_scores[order]
90 |     merged_labels = merged_labels[order]
91 | 
92 |     return bbox3d2result(merged_bboxes, merged_scores, merged_labels)
93 | 


--------------------------------------------------------------------------------
/mmdet3d/core/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .array_converter import ArrayConverter, array_converter
 3 | from .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d,
 4 |                        gaussian_radius, get_ellip_gaussian_2D)
 5 | 
 6 | __all__ = [
 7 |     'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian',
 8 |     'ArrayConverter', 'array_converter', 'ellip_gaussian2D',
 9 |     'get_ellip_gaussian_2D'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet3d/core/visualizer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .show_result import (show_multi_modality_result, show_result,
3 |                           show_seg_result)
4 | 
5 | __all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
6 | 


--------------------------------------------------------------------------------
/mmdet3d/core/voxel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .builder import build_voxel_generator
3 | from .voxel_generator import VoxelGenerator
4 | 
5 | __all__ = ['build_voxel_generator', 'VoxelGenerator']
6 | 


--------------------------------------------------------------------------------
/mmdet3d/core/voxel/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import mmcv
 3 | 
 4 | from . import voxel_generator
 5 | 
 6 | 
 7 | def build_voxel_generator(cfg, **kwargs):
 8 |     """Builder of voxel generator."""
 9 |     if isinstance(cfg, voxel_generator.VoxelGenerator):
10 |         return cfg
11 |     elif isinstance(cfg, dict):
12 |         return mmcv.runner.obj_from_dict(
13 |             cfg, voxel_generator, default_args=kwargs)
14 |     else:
15 |         raise TypeError('Invalid type {} for building a sampler'.format(
16 |             type(cfg)))
17 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.datasets.builder import build_dataloader
 3 | from .builder import DATASETS, PIPELINES, build_dataset
 4 | from .custom_3d import Custom3DDataset
 5 | from .nuscenes_dataset import NuScenesDataset
 6 | from .nuscenes_dataset_occ import NuScenesDatasetOccpancy
 7 | # yapf: disable
 8 | from .pipelines import (AffineResize, BackgroundPointsFilter, GlobalAlignment,
 9 |                         GlobalRotScaleTrans, IndoorPatchPointSample,
10 |                         IndoorPointSample, LoadAnnotations3D,
11 |                         LoadPointsFromDict, LoadPointsFromFile,
12 |                         LoadPointsFromMultiSweeps, MultiViewWrapper,
13 |                         NormalizePointsColor, ObjectNameFilter, ObjectNoise,
14 |                         ObjectRangeFilter, ObjectSample, PointSample,
15 |                         PointShuffle, PointsRangeFilter, RandomDropPointsColor,
16 |                         RandomFlip3D, RandomJitterPoints, RandomRotate,
17 |                         RandomShiftScale, RangeLimitedRandomCrop,
18 |                         VoxelBasedPointSampler)
19 | # yapf: enable
20 | from .utils import get_loading_pipeline
21 | 
22 | __all__ = [
23 |    'build_dataloader', 'DATASETS',
24 |     'build_dataset', 'NuScenesDataset', 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
25 |     'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
26 |     'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
27 |     'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'Custom3DDataset',
28 |     'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter',
29 |     'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',
30 |     'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',
31 |     'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES',
32 |     'RangeLimitedRandomCrop', 'RandomRotate', 'MultiViewWrapper',
33 |     'NuScenesDatasetOccpancy'
34 | ]
35 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import platform
 3 | 
 4 | from mmcv.utils import Registry, build_from_cfg
 5 | 
 6 | from mmdet.datasets import DATASETS as MMDET_DATASETS
 7 | from mmdet.datasets.builder import _concat_dataset
 8 | 
 9 | if platform.system() != 'Windows':
10 |     # https://github.com/pytorch/pytorch/issues/973
11 |     import resource
12 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
13 |     base_soft_limit = rlimit[0]
14 |     hard_limit = rlimit[1]
15 |     soft_limit = min(max(4096, base_soft_limit), hard_limit)
16 |     resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
17 | 
18 | OBJECTSAMPLERS = Registry('Object sampler')
19 | DATASETS = Registry('dataset')
20 | PIPELINES = Registry('pipeline')
21 | 
22 | 
23 | def build_dataset(cfg, default_args=None):
24 |     from mmdet3d.datasets.dataset_wrappers import CBGSDataset
25 |     from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
26 |                                                  ConcatDataset, RepeatDataset)
27 |     if isinstance(cfg, (list, tuple)):
28 |         dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
29 |     elif cfg['type'] == 'ConcatDataset':
30 |         dataset = ConcatDataset(
31 |             [build_dataset(c, default_args) for c in cfg['datasets']],
32 |             cfg.get('separate_eval', True))
33 |     elif cfg['type'] == 'RepeatDataset':
34 |         dataset = RepeatDataset(
35 |             build_dataset(cfg['dataset'], default_args), cfg['times'])
36 |     elif cfg['type'] == 'ClassBalancedDataset':
37 |         dataset = ClassBalancedDataset(
38 |             build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
39 |     elif cfg['type'] == 'CBGSDataset':
40 |         dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args))
41 |     elif isinstance(cfg.get('ann_file'), (list, tuple)):
42 |         dataset = _concat_dataset(cfg, default_args)
43 |     elif cfg['type'] in DATASETS._module_dict.keys():
44 |         dataset = build_from_cfg(cfg, DATASETS, default_args)
45 |     else:
46 |         dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args)
47 |     return dataset
48 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/dataset_wrappers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import numpy as np
 3 | 
 4 | from .builder import DATASETS
 5 | 
 6 | 
 7 | @DATASETS.register_module()
 8 | class CBGSDataset(object):
 9 |     """A wrapper of class sampled dataset with ann_file path. Implementation of
10 |     paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object
11 |     Detection <https://arxiv.org/abs/1908.09492.>`_.
12 | 
13 |     Balance the number of scenes under different classes.
14 | 
15 |     Args:
16 |         dataset (:obj:`CustomDataset`): The dataset to be class sampled.
17 |     """
18 | 
19 |     def __init__(self, dataset):
20 |         self.dataset = dataset
21 |         self.CLASSES = dataset.CLASSES
22 |         self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
23 |         self.sample_indices = self._get_sample_indices()
24 |         # self.dataset.data_infos = self.data_infos
25 |         if hasattr(self.dataset, 'flag'):
26 |             self.flag = np.array(
27 |                 [self.dataset.flag[ind] for ind in self.sample_indices],
28 |                 dtype=np.uint8)
29 | 
30 |     def _get_sample_indices(self):
31 |         """Load annotations from ann_file.
32 | 
33 |         Args:
34 |             ann_file (str): Path of the annotation file.
35 | 
36 |         Returns:
37 |             list[dict]: List of annotations after class sampling.
38 |         """
39 |         class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()}
40 |         for idx in range(len(self.dataset)):
41 |             sample_cat_ids = self.dataset.get_cat_ids(idx)
42 |             for cat_id in sample_cat_ids:
43 |                 class_sample_idxs[cat_id].append(idx)
44 |         duplicated_samples = sum(
45 |             [len(v) for _, v in class_sample_idxs.items()])
46 |         class_distribution = {
47 |             k: len(v) / duplicated_samples
48 |             for k, v in class_sample_idxs.items()
49 |         }
50 | 
51 |         sample_indices = []
52 | 
53 |         frac = 1.0 / len(self.CLASSES)
54 |         ratios = [frac / v for v in class_distribution.values()]
55 |         for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios):
56 |             sample_indices += np.random.choice(cls_inds,
57 |                                                int(len(cls_inds) *
58 |                                                    ratio)).tolist()
59 |         return sample_indices
60 | 
61 |     def __getitem__(self, idx):
62 |         """Get item from infos according to the given index.
63 | 
64 |         Returns:
65 |             dict: Data dictionary of the corresponding index.
66 |         """
67 |         ori_idx = self.sample_indices[idx]
68 |         return self.dataset[ori_idx]
69 | 
70 |     def __len__(self):
71 |         """Return the length of data infos.
72 | 
73 |         Returns:
74 |             int: Length of data infos.
75 |         """
76 |         return len(self.sample_indices)
77 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .compose import Compose
 3 | from .dbsampler import DataBaseSampler
 4 | from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D
 5 | from .loading import (LoadAnnotations3D, LoadAnnotationsBEVDepth,
 6 |                       LoadImageFromFileMono3D, LoadMultiViewImageFromFiles,
 7 |                       LoadPointsFromDict, LoadPointsFromFile,
 8 |                       LoadPointsFromMultiSweeps, NormalizePointsColor,
 9 |                       PointSegClassMapping, PointToMultiViewDepth,
10 |                       PrepareImageInputs, LoadOccGTFromFile, 
11 |                       LoadAdjacentPointsFromFile, GenerateRays, LoadFlowGT)
12 | from .test_time_aug import MultiScaleFlipAug3D
13 | # yapf: disable
14 | from .transforms_3d import (AffineResize, BackgroundPointsFilter,
15 |                             GlobalAlignment, GlobalRotScaleTrans,
16 |                             IndoorPatchPointSample, IndoorPointSample,
17 |                             MultiViewWrapper, ObjectNameFilter, ObjectNoise,
18 |                             ObjectRangeFilter, ObjectSample, PointSample,
19 |                             PointShuffle, PointsRangeFilter,
20 |                             RandomDropPointsColor, RandomFlip3D,
21 |                             RandomJitterPoints, RandomRotate, RandomShiftScale,
22 |                             RangeLimitedRandomCrop, VoxelBasedPointSampler)
23 | 
24 | __all__ = [
25 |     'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
26 |     'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
27 |     'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
28 |     'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
29 |     'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
30 |     'PointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D',
31 |     'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter',
32 |     'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample',
33 |     'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor',
34 |     'RandomJitterPoints', 'AffineResize', 'RandomShiftScale',
35 |     'LoadPointsFromDict', 'MultiViewWrapper', 'RandomRotate',
36 |     'RangeLimitedRandomCrop', 'PrepareImageInputs',
37 |     'LoadAnnotationsBEVDepth', 'PointToMultiViewDepth',
38 |     'LoadOccGTFromFile', 'LoadAdjacentPointsFromFile', 'GenerateRays', 'LoadFlowGT'
39 | ]
40 | 


--------------------------------------------------------------------------------
/mmdet3d/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import collections
 3 | 
 4 | from mmcv.utils import build_from_cfg
 5 | 
 6 | from mmdet.datasets.builder import PIPELINES as MMDET_PIPELINES
 7 | from ..builder import PIPELINES
 8 | 
 9 | 
10 | @PIPELINES.register_module()
11 | class Compose:
12 |     """Compose multiple transforms sequentially. The pipeline registry of
13 |     mmdet3d separates with mmdet, however, sometimes we may need to use mmdet's
14 |     pipeline. So the class is rewritten to be able to use pipelines from both
15 |     mmdet3d and mmdet.
16 | 
17 |     Args:
18 |         transforms (Sequence[dict | callable]): Sequence of transform object or
19 |             config dict to be composed.
20 |     """
21 | 
22 |     def __init__(self, transforms):
23 |         assert isinstance(transforms, collections.abc.Sequence)
24 |         self.transforms = []
25 |         for transform in transforms:
26 |             if isinstance(transform, dict):
27 |                 _, key = PIPELINES.split_scope_key(transform['type'])
28 |                 if key in PIPELINES._module_dict.keys():
29 |                     transform = build_from_cfg(transform, PIPELINES)
30 |                 else:
31 |                     transform = build_from_cfg(transform, MMDET_PIPELINES)
32 |                 self.transforms.append(transform)
33 |             elif callable(transform):
34 |                 self.transforms.append(transform)
35 |             else:
36 |                 raise TypeError('transform must be callable or a dict')
37 | 
38 |     def __call__(self, data):
39 |         """Call function to apply transforms sequentially.
40 | 
41 |         Args:
42 |             data (dict): A result dict contains the data to transform.
43 | 
44 |         Returns:
45 |            dict: Transformed data.
46 |         """
47 | 
48 |         for t in self.transforms:
49 |             data = t(data)
50 |             if data is None:
51 |                 return None
52 |         return data
53 | 
54 |     def __repr__(self):
55 |         format_string = self.__class__.__name__ + '('
56 |         for t in self.transforms:
57 |             format_string += '\n'
58 |             format_string += f'    {t}'
59 |         format_string += '\n)'
60 |         return format_string
61 | 


--------------------------------------------------------------------------------
/mmdet3d/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .occflownet_modules import * # noqa: F401,F403
 4 | from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES,
 5 |                       MIDDLE_ENCODERS, NECKS, ROI_EXTRACTORS, SEGMENTORS,
 6 |                       SHARED_HEADS, VOXEL_ENCODERS, build_backbone,
 7 |                       build_detector, build_fusion_layer, build_head,
 8 |                       build_loss, build_middle_encoder, build_model,
 9 |                       build_neck, build_roi_extractor, build_shared_head,
10 |                       build_voxel_encoder)
11 | from .detectors import *  # noqa: F401,F403
12 | from .losses import *  # noqa: F401,F403
13 | from .necks import *  # noqa: F401,F403
14 | __all__ = [
15 |     'ACTIVATIONS', 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
16 |     'DETECTORS', 'SEGMENTORS', 'VOXEL_ENCODERS', 'MIDDLE_ENCODERS',
17 |     'FUSION_LAYERS', 'build_backbone', 'build_neck', 'build_roi_extractor',
18 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector',
19 |     'build_fusion_layer', 'build_model', 'build_middle_encoder',
20 |     'build_voxel_encoder'
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet3d/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt
3 | from .resnet import CustomResNet, CustomResNet3D
4 | from .swin import SwinTransformer
5 | 
6 | __all__ = [
7 |     'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet',
8 |     'CustomResNet', 'CustomResNet3D', 'SwinTransformer'
9 | ]


--------------------------------------------------------------------------------
/mmdet3d/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .base import Base3DDetector
 3 | from .bevdet import BEVDepth4D, BEVDet, BEVDet4D, BEVDetTRT, BEVStereo4D
 4 | from .centerpoint import CenterPoint
 5 | from .mvx_two_stage import MVXTwoStageDetector
 6 | from .occflownet import OccFlowNet
 7 | 
 8 | __all__ = [
 9 |     'Base3DDetector', 'MVXTwoStageDetector', 'CenterPoint', 'OccFlowNet', 
10 |     'BEVDet', 'BEVDet4D', 'BEVDepth4D', 'BEVStereo4D'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet3d/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy
 3 | from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss
 4 | from .chamfer_distance import ChamferDistance, chamfer_distance
 5 | from .multibin_loss import MultiBinLoss
 6 | from .paconv_regularization_loss import PAConvRegularizationLoss
 7 | from .rotated_iou_loss import RotatedIoU3DLoss
 8 | from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss
 9 | from .silog_loss import SiLogLoss
10 | from .huber_loss import HuberLoss
11 | from .tv_loss import TVLoss3D
12 | from .bce_loss import MMBCELoss
13 | 
14 | __all__ = [
15 |     'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
16 |     'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',
17 |     'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',
18 |     'MultiBinLoss', 'RotatedIoU3DLoss', 'SiLogLoss', 'HuberLoss', 'TVLoss3D', 'MMBCELoss'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet3d/models/losses/axis_aligned_iou_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from torch import nn as nn
 4 | 
 5 | from mmdet.models.losses.utils import weighted_loss
 6 | from ...core.bbox import AxisAlignedBboxOverlaps3D
 7 | from ..builder import LOSSES
 8 | 
 9 | 
10 | @weighted_loss
11 | def axis_aligned_iou_loss(pred, target):
12 |     """Calculate the IoU loss (1-IoU) of two sets of axis aligned bounding
13 |     boxes. Note that predictions and targets are one-to-one corresponded.
14 | 
15 |     Args:
16 |         pred (torch.Tensor): Bbox predictions with shape [..., 6]
17 |             (x1, y1, z1, x2, y2, z2).
18 |         target (torch.Tensor): Bbox targets (gt) with shape [..., 6]
19 |             (x1, y1, z1, x2, y2, z2).
20 | 
21 |     Returns:
22 |         torch.Tensor: IoU loss between predictions and targets.
23 |     """
24 |     axis_aligned_iou = AxisAlignedBboxOverlaps3D()(
25 |         pred, target, is_aligned=True)
26 |     iou_loss = 1 - axis_aligned_iou
27 |     return iou_loss
28 | 
29 | 
30 | @LOSSES.register_module()
31 | class AxisAlignedIoULoss(nn.Module):
32 |     """Calculate the IoU loss (1-IoU) of axis aligned bounding boxes.
33 | 
34 |     Args:
35 |         reduction (str): Method to reduce losses.
36 |             The valid reduction method are none, sum or mean.
37 |         loss_weight (float, optional): Weight of loss. Defaults to 1.0.
38 |     """
39 | 
40 |     def __init__(self, reduction='mean', loss_weight=1.0):
41 |         super(AxisAlignedIoULoss, self).__init__()
42 |         assert reduction in ['none', 'sum', 'mean']
43 |         self.reduction = reduction
44 |         self.loss_weight = loss_weight
45 | 
46 |     def forward(self,
47 |                 pred,
48 |                 target,
49 |                 weight=None,
50 |                 avg_factor=None,
51 |                 reduction_override=None,
52 |                 **kwargs):
53 |         """Forward function of loss calculation.
54 | 
55 |         Args:
56 |             pred (torch.Tensor): Bbox predictions with shape [..., 6]
57 |                 (x1, y1, z1, x2, y2, z2).
58 |             target (torch.Tensor): Bbox targets (gt) with shape [..., 6]
59 |                 (x1, y1, z1, x2, y2, z2).
60 |             weight (torch.Tensor | float, optional): Weight of loss.
61 |                 Defaults to None.
62 |             avg_factor (int, optional): Average factor that is used to average
63 |                 the loss. Defaults to None.
64 |             reduction_override (str, optional): Method to reduce losses.
65 |                 The valid reduction method are 'none', 'sum' or 'mean'.
66 |                 Defaults to None.
67 | 
68 |         Returns:
69 |             torch.Tensor: IoU loss between predictions and targets.
70 |         """
71 |         assert reduction_override in (None, 'none', 'mean', 'sum')
72 |         reduction = (
73 |             reduction_override if reduction_override else self.reduction)
74 |         if (weight is not None) and (not torch.any(weight > 0)) and (
75 |                 reduction != 'none'):
76 |             return (pred * weight).sum()
77 |         return axis_aligned_iou_loss(
78 |             pred,
79 |             target,
80 |             weight=weight,
81 |             avg_factor=avg_factor,
82 |             reduction=reduction) * self.loss_weight
83 | 


--------------------------------------------------------------------------------
/mmdet3d/models/losses/bce_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | from ..builder import LOSSES
 5 | import torch.nn as nn
 6 | 
 7 | @LOSSES.register_module()
 8 | class MMBCELoss(nn.Module):
 9 |     def __init__(self, loss_weight=1.0, loss_name='loss_bce'):
10 |         super().__init__()
11 |         self.loss_weight = loss_weight
12 |         self._loss_name = loss_name
13 |         self.loss_fn = nn.BCELoss()
14 | 
15 |     def forward(self, pred, target, **kwargs):
16 |         return self.loss_weight * self.loss_fn(pred, target)


--------------------------------------------------------------------------------
/mmdet3d/models/losses/dist_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | from mmdet.models.builder import LOSSES
 5 | import torch.nn as nn
 6 | from torch_efficient_distloss import eff_distloss
 7 | 
 8 | @LOSSES.register_module()
 9 | class DistortionLoss(nn.Module):
10 |     def __init__(self,
11 |                  loss_weight=0.01,
12 |                  loss_name='loss_dist'):
13 |         super().__init__()
14 |         self.loss_weight = loss_weight
15 |         self._loss_name = loss_name
16 | 
17 |     def forward(self, weights, distances, intervals):
18 |         '''
19 |         Efficient O(N) realization of distortion loss.
20 |         There are B rays each with N sampled points.
21 |         weights:        Float tensor in shape [B,N]. Volume rendering weights of each point.
22 |         distances:      Float tensor in shape [B,N]. Midpoint distance to camera of each point.
23 |         intervals:      Float tensor in shape [B,N]. The query interval of each point.
24 |         '''
25 | 
26 |         loss = self.loss_weight * eff_distloss(weights, distances, intervals)
27 |         return loss
28 | 
29 |     @property
30 |     def loss_name(self):
31 |         return self._loss_name 


--------------------------------------------------------------------------------
/mmdet3d/models/losses/huber_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | from ..builder import LOSSES
 5 | import torch.nn as nn
 6 | 
 7 | @LOSSES.register_module()
 8 | class HuberLoss(nn.Module):
 9 |     def __init__(self, loss_weight=1.0, delta=1.0, loss_name='loss_huber'):
10 |         super().__init__()
11 |         self.loss_weight = loss_weight
12 |         self._loss_name = loss_name
13 |         self.loss_fn = nn.HuberLoss(delta=delta)
14 | 
15 |     def forward(self, pred, target, **kwargs):
16 |         return self.loss_weight * self.loss_fn(pred, target)


--------------------------------------------------------------------------------
/mmdet3d/models/losses/multibin_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from torch import nn as nn
 4 | from torch.nn import functional as F
 5 | 
 6 | from mmdet.models.losses.utils import weighted_loss
 7 | from ..builder import LOSSES
 8 | 
 9 | 
10 | @weighted_loss
11 | def multibin_loss(pred_orientations, gt_orientations, num_dir_bins=4):
12 |     """Multi-Bin Loss.
13 | 
14 |     Args:
15 |         pred_orientations(torch.Tensor): Predicted local vector
16 |             orientation in [axis_cls, head_cls, sin, cos] format.
17 |             shape (N, num_dir_bins * 4)
18 |         gt_orientations(torch.Tensor): Corresponding gt bboxes,
19 |             shape (N, num_dir_bins * 2).
20 |         num_dir_bins(int, optional): Number of bins to encode
21 |             direction angle.
22 |             Defaults: 4.
23 | 
24 |     Return:
25 |         torch.Tensor: Loss tensor.
26 |     """
27 |     cls_losses = 0
28 |     reg_losses = 0
29 |     reg_cnt = 0
30 |     for i in range(num_dir_bins):
31 |         # bin cls loss
32 |         cls_ce_loss = F.cross_entropy(
33 |             pred_orientations[:, (i * 2):(i * 2 + 2)],
34 |             gt_orientations[:, i].long(),
35 |             reduction='mean')
36 |         # regression loss
37 |         valid_mask_i = (gt_orientations[:, i] == 1)
38 |         cls_losses += cls_ce_loss
39 |         if valid_mask_i.sum() > 0:
40 |             start = num_dir_bins * 2 + i * 2
41 |             end = start + 2
42 |             pred_offset = F.normalize(pred_orientations[valid_mask_i,
43 |                                                         start:end])
44 |             gt_offset_sin = torch.sin(gt_orientations[valid_mask_i,
45 |                                                       num_dir_bins + i])
46 |             gt_offset_cos = torch.cos(gt_orientations[valid_mask_i,
47 |                                                       num_dir_bins + i])
48 |             reg_loss = \
49 |                 F.l1_loss(pred_offset[:, 0], gt_offset_sin,
50 |                           reduction='none') + \
51 |                 F.l1_loss(pred_offset[:, 1], gt_offset_cos,
52 |                           reduction='none')
53 | 
54 |             reg_losses += reg_loss.sum()
55 |             reg_cnt += valid_mask_i.sum()
56 | 
57 |         return cls_losses / num_dir_bins + reg_losses / reg_cnt
58 | 
59 | 
60 | @LOSSES.register_module()
61 | class MultiBinLoss(nn.Module):
62 |     """Multi-Bin Loss for orientation.
63 | 
64 |     Args:
65 |         reduction (str, optional): The method to reduce the loss.
66 |             Options are 'none', 'mean' and 'sum'. Defaults to 'none'.
67 |         loss_weight (float, optional): The weight of loss. Defaults
68 |             to 1.0.
69 |     """
70 | 
71 |     def __init__(self, reduction='none', loss_weight=1.0):
72 |         super(MultiBinLoss, self).__init__()
73 |         assert reduction in ['none', 'sum', 'mean']
74 |         self.reduction = reduction
75 |         self.loss_weight = loss_weight
76 | 
77 |     def forward(self, pred, target, num_dir_bins, reduction_override=None):
78 |         """Forward function.
79 | 
80 |         Args:
81 |             pred (torch.Tensor): The prediction.
82 |             target (torch.Tensor): The learning target of the prediction.
83 |             num_dir_bins (int): Number of bins to encode direction angle.
84 |             reduction_override (str, optional): The reduction method used to
85 |                 override the original reduction method of the loss.
86 |                 Defaults to None.
87 |         """
88 |         assert reduction_override in (None, 'none', 'mean', 'sum')
89 |         reduction = (
90 |             reduction_override if reduction_override else self.reduction)
91 |         loss = self.loss_weight * multibin_loss(
92 |             pred, target, num_dir_bins=num_dir_bins, reduction=reduction)
93 |         return loss
94 | 


--------------------------------------------------------------------------------
/mmdet3d/models/losses/rotated_iou_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from mmcv.ops import diff_iou_rotated_3d
 4 | from torch import nn as nn
 5 | 
 6 | from mmdet.models.losses.utils import weighted_loss
 7 | from ..builder import LOSSES
 8 | 
 9 | 
10 | @weighted_loss
11 | def rotated_iou_3d_loss(pred, target):
12 |     """Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.
13 |     Note that predictions and targets are one-to-one corresponded.
14 | 
15 |     Args:
16 |         pred (torch.Tensor): Bbox predictions with shape [N, 7]
17 |             (x, y, z, w, l, h, alpha).
18 |         target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
19 |             (x, y, z, w, l, h, alpha).
20 | 
21 |     Returns:
22 |         torch.Tensor: IoU loss between predictions and targets.
23 |     """
24 |     iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0),
25 |                                        target.unsqueeze(0))[0]
26 |     return iou_loss
27 | 
28 | 
29 | @LOSSES.register_module()
30 | class RotatedIoU3DLoss(nn.Module):
31 |     """Calculate the IoU loss (1-IoU) of rotated bounding boxes.
32 | 
33 |     Args:
34 |         reduction (str): Method to reduce losses.
35 |             The valid reduction method are none, sum or mean.
36 |         loss_weight (float, optional): Weight of loss. Defaults to 1.0.
37 |     """
38 | 
39 |     def __init__(self, reduction='mean', loss_weight=1.0):
40 |         super().__init__()
41 |         self.reduction = reduction
42 |         self.loss_weight = loss_weight
43 | 
44 |     def forward(self,
45 |                 pred,
46 |                 target,
47 |                 weight=None,
48 |                 avg_factor=None,
49 |                 reduction_override=None,
50 |                 **kwargs):
51 |         """Forward function of loss calculation.
52 | 
53 |         Args:
54 |             pred (torch.Tensor): Bbox predictions with shape [..., 7]
55 |                 (x, y, z, w, l, h, alpha).
56 |             target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
57 |                 (x, y, z, w, l, h, alpha).
58 |             weight (torch.Tensor | float, optional): Weight of loss.
59 |                 Defaults to None.
60 |             avg_factor (int, optional): Average factor that is used to average
61 |                 the loss. Defaults to None.
62 |             reduction_override (str, optional): Method to reduce losses.
63 |                 The valid reduction method are 'none', 'sum' or 'mean'.
64 |                 Defaults to None.
65 | 
66 |         Returns:
67 |             torch.Tensor: IoU loss between predictions and targets.
68 |         """
69 |         if weight is not None and not torch.any(weight > 0):
70 |             return pred.sum() * weight.sum()  # 0
71 |         assert reduction_override in (None, 'none', 'mean', 'sum')
72 |         reduction = (
73 |             reduction_override if reduction_override else self.reduction)
74 |         if weight is not None and weight.dim() > 1:
75 |             weight = weight.mean(-1)
76 |         loss = self.loss_weight * rotated_iou_3d_loss(
77 |             pred,
78 |             target,
79 |             weight,
80 |             reduction=reduction,
81 |             avg_factor=avg_factor,
82 |             **kwargs)
83 | 
84 |         return loss
85 | 


--------------------------------------------------------------------------------
/mmdet3d/models/losses/silog_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | from ..builder import LOSSES
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | def silog_loss(pred, target, lambd=.85):
 9 |     d = torch.log(pred + 1e-7) - torch.log(target)
10 |     return torch.sqrt((d ** 2).mean() - lambd * (d.mean() ** 2)) 
11 |     # return torch.sqrt((d ** 2).mean() - lambd * (d.mean() ** 2)) * 10.0
12 | 
13 | @LOSSES.register_module()
14 | class SiLogLoss(nn.Module):
15 |     def __init__(self, loss_weight=1.0, lambd=.85, loss_name='loss_silog'):
16 |         super().__init__()
17 |         self.loss_weight = loss_weight
18 |         self._loss_name = loss_name 
19 |         self.lambd = lambd
20 |     def forward(self, pred, target, **kwargs):
21 |         return self.loss_weight * silog_loss(pred, target, lambd=self.lambd)


--------------------------------------------------------------------------------
/mmdet3d/models/losses/tv_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | from mmdet.models.builder import LOSSES
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | def tv_3d(voxels, weight):
10 |     # bs, Z, H, W, C = voxels.size()
11 |     tv_z = torch.pow(voxels[:, 1:, :, :, :] - voxels[:, :-1, :, :, :], 2).sum()
12 |     tv_h = torch.pow(voxels[:, :, 1:, :, :] - voxels[:, :, :-1, :, :], 2).sum()
13 |     tv_w = torch.pow(voxels[:, :, :, 1:, :] - voxels[:, :, :, :-1, :], 2).sum()
14 |     return weight * (tv_z + tv_h + tv_w) / voxels.numel()
15 | 
16 | @LOSSES.register_module()
17 | class TVLoss3D(nn.Module):
18 |     def __init__(self, loss_weight=0.01,  loss_name='loss_tv', density_weight = 5, semantics_weight = .2):
19 |         super().__init__()
20 |         self.loss_weight = loss_weight
21 |         self._loss_name = loss_name 
22 |         self.density_weight = density_weight 
23 |         self.semantics_weight = semantics_weight 
24 |     def forward(self, voxel_outs):
25 | 
26 |         density_tv = tv_3d(voxel_outs[0], self.density_weight)
27 |         semantics_tv = tv_3d(voxel_outs[1], self.semantics_weight)
28 |         return self.loss_weight * (density_tv + semantics_tv)
29 |     
30 |     @property
31 |     def loss_name(self):
32 |         return self._loss_name
33 | 
34 | 


--------------------------------------------------------------------------------
/mmdet3d/models/necks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.models.necks.fpn import FPN
 3 | from .fpn import CustomFPN
 4 | from .lss_fpn import FPN_LSS
 5 | from .second_fpn import SECONDFPN
 6 | from .view_transformer import LSSViewTransformer, LSSViewTransformerBEVDepth, \
 7 |     LSSViewTransformerBEVStereo
 8 | 
 9 | __all__ = [
10 |     'FPN', 'SECONDFPN', 'LSSViewTransformer', 'CustomFPN', 'FPN_LSS', 'LSSViewTransformerBEVDepth',
11 |     'LSSViewTransformerBEVStereo'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet3d/models/necks/second_fpn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import numpy as np
 3 | import torch
 4 | from mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer
 5 | from mmcv.runner import BaseModule, auto_fp16
 6 | from torch import nn as nn
 7 | 
 8 | from ..builder import NECKS
 9 | 
10 | 
11 | @NECKS.register_module()
12 | class SECONDFPN(BaseModule):
13 |     """FPN used in SECOND/PointPillars/PartA2/MVXNet.
14 | 
15 |     Args:
16 |         in_channels (list[int]): Input channels of multi-scale feature maps.
17 |         out_channels (list[int]): Output channels of feature maps.
18 |         upsample_strides (list[int]): Strides used to upsample the
19 |             feature maps.
20 |         norm_cfg (dict): Config dict of normalization layers.
21 |         upsample_cfg (dict): Config dict of upsample layers.
22 |         conv_cfg (dict): Config dict of conv layers.
23 |         use_conv_for_no_stride (bool): Whether to use conv when stride is 1.
24 |     """
25 | 
26 |     def __init__(self,
27 |                  in_channels=[128, 128, 256],
28 |                  out_channels=[256, 256, 256],
29 |                  upsample_strides=[1, 2, 4],
30 |                  norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
31 |                  upsample_cfg=dict(type='deconv', bias=False),
32 |                  conv_cfg=dict(type='Conv2d', bias=False),
33 |                  use_conv_for_no_stride=False,
34 |                  init_cfg=None):
35 |         # if for GroupNorm,
36 |         # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)
37 |         super(SECONDFPN, self).__init__(init_cfg=init_cfg)
38 |         assert len(out_channels) == len(upsample_strides) == len(in_channels)
39 |         self.in_channels = in_channels
40 |         self.out_channels = out_channels
41 |         self.fp16_enabled = False
42 | 
43 |         deblocks = []
44 |         for i, out_channel in enumerate(out_channels):
45 |             stride = upsample_strides[i]
46 |             if stride > 1 or (stride == 1 and not use_conv_for_no_stride):
47 |                 upsample_layer = build_upsample_layer(
48 |                     upsample_cfg,
49 |                     in_channels=in_channels[i],
50 |                     out_channels=out_channel,
51 |                     kernel_size=upsample_strides[i],
52 |                     stride=upsample_strides[i])
53 |             else:
54 |                 stride = np.round(1 / stride).astype(np.int64)
55 |                 upsample_layer = build_conv_layer(
56 |                     conv_cfg,
57 |                     in_channels=in_channels[i],
58 |                     out_channels=out_channel,
59 |                     kernel_size=stride,
60 |                     stride=stride)
61 | 
62 |             deblock = nn.Sequential(upsample_layer,
63 |                                     build_norm_layer(norm_cfg, out_channel)[1],
64 |                                     nn.ReLU(inplace=True))
65 |             deblocks.append(deblock)
66 |         self.deblocks = nn.ModuleList(deblocks)
67 | 
68 |         if init_cfg is None:
69 |             self.init_cfg = [
70 |                 dict(type='Kaiming', layer='ConvTranspose2d'),
71 |                 dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)
72 |             ]
73 | 
74 |     @auto_fp16()
75 |     def forward(self, x):
76 |         """Forward function.
77 | 
78 |         Args:
79 |             x (torch.Tensor): 4D Tensor in (N, C, H, W) shape.
80 | 
81 |         Returns:
82 |             list[torch.Tensor]: Multi-level feature maps.
83 |         """
84 |         assert len(x) == len(self.in_channels)
85 |         ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)]
86 | 
87 |         if len(ups) > 1:
88 |             out = torch.cat(ups, dim=1)
89 |         else:
90 |             out = ups[0]
91 |         return [out]
92 | 


--------------------------------------------------------------------------------
/mmdet3d/models/occflownet_modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | from .nerf_decoder import PointDecoder
 5 | from .renderer import Renderer, RenderModule
 6 | from .hooks import CustomCosineAnealingLrUpdaterHook
 7 | 
 8 | __all__ = [
 9 |    "Renderer", "RenderModule", "PointDecoder", "CustomCosineAnealingLrUpdaterHook"
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet3d/models/occflownet_modules/hooks.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | from mmcv import runner
 5 | 
 6 | from mmcv.runner.hooks import HOOKS, CosineAnnealingLrUpdaterHook
 7 | from mmcv.runner.hooks.lr_updater import annealing_cos
 8 | 
 9 | @HOOKS.register_module()
10 | class CustomCosineAnealingLrUpdaterHook(CosineAnnealingLrUpdaterHook):
11 | 
12 |     def __init__(self,
13 |                  start_at: int = 0,
14 |                  **kwargs) -> None:
15 |         self.start_at = start_at
16 |         super().__init__(**kwargs)
17 | 
18 |     def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
19 |         if self.by_epoch:
20 |             progress = runner.epoch - self.start_at
21 |             max_progress = runner.max_epochs - self.start_at
22 |         else:
23 |             iter_per_epoch = runner.max_iters // runner.max_epochs
24 |             progress = runner.iter - (iter_per_epoch * self.start_at)
25 |             max_progress = runner.max_iters - (iter_per_epoch * self.start_at)
26 | 
27 |         if runner.epoch < self.start_at:
28 |             return base_lr
29 | 
30 |         if self.min_lr_ratio is not None:
31 |             target_lr = base_lr * self.min_lr_ratio
32 |         else:
33 |             target_lr = self.min_lr  # type:ignore
34 |         return annealing_cos(base_lr, target_lr, progress / max_progress)


--------------------------------------------------------------------------------
/mmdet3d/models/occflownet_modules/nerf_decoder.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Robert Bosch GmbH
  2 | # SPDX-License-Identifier: AGPL-3.0
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | from mmcv.cnn import build_activation_layer
  7 | from mmcv.cnn.bricks.registry import FEEDFORWARD_NETWORK
  8 | from mmcv.cnn.bricks.conv_module import ConvModule
  9 | from mmcv.runner.base_module import BaseModule
 10 | 
 11 | class SimpleBasicBlock(nn.Module):
 12 |     def __init__(self, channels_in, channels_out, stride=1):
 13 |         super(SimpleBasicBlock, self).__init__()
 14 |         self.conv = ConvModule(
 15 |             channels_in,
 16 |             channels_out,
 17 |             kernel_size=3,
 18 |             stride=stride,
 19 |             padding=1,
 20 |             bias=False,
 21 |             conv_cfg=dict(type='Conv3d'),
 22 |             norm_cfg=dict(type='BN3d', ),
 23 |             act_cfg=dict(type='ReLU',inplace=True))
 24 |         
 25 |         if channels_in != channels_out:
 26 |              self.skip_conv = ConvModule(
 27 |                   channels_in, channels_out,
 28 |                   kernel_size=1,
 29 |                   stride=1,
 30 |                   padding=0,
 31 |                   bias=False,
 32 |                   conv_cfg=dict(type='Conv3d'),
 33 |                   act_cfg=None
 34 |              )
 35 |         else:
 36 |              self.skip_conv = None
 37 | 
 38 |     def forward(self, x):
 39 |           skip = self.skip_conv(x) if self.skip_conv is not None else x
 40 |           return self.conv(x) + skip
 41 | 
 42 | @FEEDFORWARD_NETWORK.register_module()
 43 | class PointDecoder(BaseModule):
 44 |      """
 45 |      Decoder that predicts values for individual points.
 46 |      """
 47 |      def __init__(self,
 48 |                 init_cfg = dict(type='Xavier', layer=['Linear'], distribution='uniform', bias=0.),
 49 |                 in_channels = 256,
 50 |                 embed_dims = 256,
 51 |                 num_hidden_layers=1,
 52 |                 num_classes=1,
 53 |                 ffn_drop=0,
 54 |                 bias_init = None,
 55 |                 act_cfg=dict(type='ReLU', inplace=True),
 56 |                 final_act_cfg=None,
 57 |     ):
 58 |           super().__init__(init_cfg=init_cfg)  
 59 |           self.embed_dims = embed_dims
 60 |           self.activate = build_activation_layer(act_cfg)
 61 |           self.final_activate = build_activation_layer(final_act_cfg) if final_act_cfg is not None else None
 62 |           self.num_classes = num_classes
 63 | 
 64 |           layers = []
 65 |           for _ in range(num_hidden_layers):
 66 |                layers.extend(
 67 |                     [
 68 |                     nn.Linear(in_channels, embed_dims), 
 69 |                     self.activate,
 70 |                     nn.Dropout(ffn_drop)
 71 |                     ]
 72 |                )
 73 |                in_channels = embed_dims
 74 |           layers.append(nn.Linear(embed_dims, num_classes))
 75 |           self.layers = nn.Sequential(*layers)
 76 | 
 77 |           # initialize bias of last linear layer to represent data distribution
 78 |           if bias_init is not None:
 79 |                self.layers[-1].bias.data = torch.tensor(bias_init, dtype=torch.float32)
 80 | 
 81 |      def forward(self, x):
 82 |           x = self.layers(x)
 83 |           if self.final_activate is not None:
 84 |                x = self.final_activate(x)
 85 |           return x
 86 | 
 87 | @FEEDFORWARD_NETWORK.register_module()
 88 | class VoxelDecoder(BaseModule):
 89 |      def __init__(self,
 90 |           init_cfg = dict(type='Xavier', layer=['Conv3d'], distribution='uniform', bias=0.),
 91 |           embed_dims=256,
 92 |           in_channels=256,
 93 |           num_layers=2,
 94 |           out_layer=None
 95 |     ):
 96 |           super().__init__(init_cfg=init_cfg)  
 97 |           self.embed_dims = embed_dims
 98 |           
 99 |           layers = []
100 |           
101 | 
102 |           for i in range(num_layers):
103 |                layers.extend(
104 |                     [
105 |                     SimpleBasicBlock(embed_dims if i!=0 else in_channels, embed_dims)
106 |                     ]
107 |                )
108 |           self.out_layer = nn.Linear(embed_dims, out_layer) if out_layer is not None else None
109 | 
110 | 
111 |           self.layers = nn.Sequential(*layers)
112 | 
113 |      def forward(self, voxel_features):
114 |           out = self.layers(voxel_features.permute(0, 4, 1, 2, 3)).permute(0, 2, 3, 4, 1)
115 | 
116 |           if self.out_layer is not None:
117 |                out = self.out_layer(out)
118 | 
119 |           return out


--------------------------------------------------------------------------------
/mmdet3d/models/occflownet_modules/samplers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Robert Bosch GmbH
 2 | # SPDX-License-Identifier: AGPL-3.0
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | from abc import abstractmethod
 7 | 
 8 | class Sampler(nn.Module):
 9 |     "Abstract Sampler superclass in case some common logic is needed in the future."
10 |     def __init__(self) -> None:
11 |         super().__init__()
12 | 
13 |     @abstractmethod
14 |     def forward(self, rays, num_samples, near, far, *args, **kwargs):
15 |         "Generate Ray Samples"
16 | 
17 |     def linear_scale_to_target_space(self, samples, near, far):
18 |         return samples * far + (1 - samples) * near
19 | 
20 |     
21 | class UniformSampler(Sampler):
22 |     def __init__(self, single_jitter=True) -> None:
23 |         super().__init__()
24 |         self.single_jitter = single_jitter
25 | 
26 |     def forward(self, origins, directions, num_samples, near, far):
27 |         num_rays = directions.shape[0]
28 |         bins = torch.linspace(0.0, 1.0, num_samples + 1).to(directions.device)
29 |         
30 |         # add random jitter to bin borders (except beginning and end)
31 |         jitter_scale = lambda x: x * (1 / num_samples) + (- 1 / num_samples / 2)
32 |         jitter = torch.rand((num_rays, num_samples - 1), dtype=bins.dtype, device=bins.device)
33 |         jitter = torch.cat((jitter.new_zeros(num_rays, 1), jitter_scale(jitter), jitter.new_zeros(num_rays, 1) ), dim=-1)
34 |         bins = bins + jitter
35 | 
36 |         bin_upper = bins[:, 1:]
37 |         bin_lower = bins[:, :-1]
38 |         bin_centers = (bin_lower + bin_upper) / 2.0
39 | 
40 |         # scale to metric space (meters)
41 |         samples_start = self.linear_scale_to_target_space(bin_lower, near, far)
42 |         samples_end = self.linear_scale_to_target_space(bin_upper, near, far)
43 |         samples_center = self.linear_scale_to_target_space(bin_centers, near, far)
44 | 
45 |         return samples_start, samples_end, samples_center
46 | 
47 | class PDFSampler(Sampler):
48 |     def __init__(self, single_jitter=True) -> None:
49 |         super().__init__()
50 |         self.histogram_padding = 0.01
51 |         self.single_jitter = single_jitter
52 | 
53 |     def forward(self, origins, directions, num_samples, near, far, weights=None, existing_bins=None):
54 |     # def forward(self, rays, num_samples, near, far, weights=None, existing_bins=None):
55 |         assert weights is not None and existing_bins is not None
56 |         
57 |         # nerf studio version
58 |         weights = weights + self.histogram_padding # add small amount to weights
59 |         num_bins = num_samples + 1
60 |         weights_sum = torch.sum(weights, dim=-1, keepdim=True)
61 |         padding = torch.relu(1e-5 - weights_sum)
62 |         weights = weights + padding / weights.shape[-1]
63 |         weights_sum += padding
64 | 
65 |         # construct pdf and cdf
66 |         pdf = weights / weights_sum
67 |         cdf = torch.min(torch.ones_like(pdf), torch.cumsum(pdf, dim=-1))
68 |         cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], dim=-1)
69 | 
70 |         # create uniform stratified samples 
71 |         u = torch.linspace(0.0, 1.0 - (1.0 / num_bins), steps=num_bins, device=cdf.device)
72 |         u = u.expand(size=(*cdf.shape[:-1], num_bins))
73 |         if self.single_jitter:
74 |             rand = torch.rand((*cdf.shape[:-1], 1), device=cdf.device) / num_bins
75 |         else:
76 |             rand = torch.rand((*cdf.shape[:-1], num_samples + 1), device=cdf.device) / num_bins
77 |         u = (u + rand).contiguous()
78 | 
79 |         existing_bins = torch.cat((existing_bins[0], existing_bins[1][..., -1:]), dim=-1)
80 | 
81 |         num_initial_samples = weights.shape[-1]
82 |         inds = torch.searchsorted(cdf, u, side="right")
83 |         below = torch.clamp(inds - 1, 0, num_initial_samples)
84 |         above = torch.clamp(inds, 0, num_initial_samples)
85 |         cdf_g0 = torch.gather(cdf, -1, below)
86 |         bins_g0 = torch.gather(existing_bins, -1, below)
87 |         cdf_g1 = torch.gather(cdf, -1, above)
88 |         bins_g1 = torch.gather(existing_bins, -1, above)
89 | 
90 |         t = torch.clip(torch.nan_to_num((u - cdf_g0) / (cdf_g1 - cdf_g0), 0), 0, 1)
91 |         bins = bins_g0 + t * (bins_g1 - bins_g0)
92 | 
93 |         bins = bins.detach()
94 | 
95 |         lower = bins[:, :-1]
96 |         upper = bins[:, 1:]
97 |         center = (upper + lower) / 2.
98 | 
99 |         return lower, upper, center


--------------------------------------------------------------------------------
/mmdet3d/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
 3 |                       get_compiling_cuda_version, nms, roi_align,
 4 |                       sigmoid_focal_loss)
 5 | from mmcv.ops.assign_score_withk import assign_score_withk
 6 | from mmcv.ops.ball_query import ball_query
 7 | from mmcv.ops.furthest_point_sample import (furthest_point_sample,
 8 |                                             furthest_point_sample_with_dist)
 9 | from mmcv.ops.gather_points import gather_points
10 | from mmcv.ops.group_points import GroupAll, QueryAndGroup, grouping_operation
11 | from mmcv.ops.knn import knn
12 | from mmcv.ops.points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,
13 |                                       points_in_boxes_part)
14 | from mmcv.ops.points_sampler import PointsSampler as Points_Sampler
15 | from mmcv.ops.roiaware_pool3d import RoIAwarePool3d
16 | from mmcv.ops.roipoint_pool3d import RoIPointPool3d
17 | from mmcv.ops.scatter_points import DynamicScatter, dynamic_scatter
18 | from mmcv.ops.three_interpolate import three_interpolate
19 | from mmcv.ops.three_nn import three_nn
20 | from mmcv.ops.voxelize import Voxelization, voxelization
21 | 
22 | from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule
23 | from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
24 | from .paconv import PAConv, PAConvCUDA
25 | from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
26 |                                PAConvSAModule, PAConvSAModuleMSG,
27 |                                PointFPModule, PointSAModule, PointSAModuleMSG,
28 |                                build_sa_module)
29 | from .sparse_block import (SparseBasicBlock, SparseBottleneck,
30 |                            make_sparse_convmodule)
31 | 
32 | __all__ = [
33 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
34 |     'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
35 |     'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
36 |     'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
37 |     'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',
38 |     'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu',
39 |     'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',
40 |     'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',
41 |     'gather_points', 'grouping_operation', 'GroupAll', 'QueryAndGroup',
42 |     'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', 'DGCNNFPModule',
43 |     'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all',
44 |     'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version',
45 |     'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA',
46 |     'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule',
47 |     'PAConvCUDASAModuleMSG', 'RoIPointPool3d'
48 | ]
49 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/bev_pool_v2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Phigent Robotics. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/dgcnn_modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dgcnn_fa_module import DGCNNFAModule
3 | from .dgcnn_fp_module import DGCNNFPModule
4 | from .dgcnn_gf_module import DGCNNGFModule
5 | 
6 | __all__ = ['DGCNNFAModule', 'DGCNNFPModule', 'DGCNNGFModule']
7 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/dgcnn_modules/dgcnn_fa_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from mmcv.cnn import ConvModule
 4 | from mmcv.runner import BaseModule, force_fp32
 5 | from torch import nn as nn
 6 | 
 7 | 
 8 | class DGCNNFAModule(BaseModule):
 9 |     """Point feature aggregation module used in DGCNN.
10 | 
11 |     Aggregate all the features of points.
12 | 
13 |     Args:
14 |         mlp_channels (list[int]): List of mlp channels.
15 |         norm_cfg (dict, optional): Type of normalization method.
16 |             Defaults to dict(type='BN1d').
17 |         act_cfg (dict, optional): Type of activation method.
18 |             Defaults to dict(type='ReLU').
19 |         init_cfg (dict, optional): Initialization config. Defaults to None.
20 |     """
21 | 
22 |     def __init__(self,
23 |                  mlp_channels,
24 |                  norm_cfg=dict(type='BN1d'),
25 |                  act_cfg=dict(type='ReLU'),
26 |                  init_cfg=None):
27 |         super().__init__(init_cfg=init_cfg)
28 |         self.fp16_enabled = False
29 |         self.mlps = nn.Sequential()
30 |         for i in range(len(mlp_channels) - 1):
31 |             self.mlps.add_module(
32 |                 f'layer{i}',
33 |                 ConvModule(
34 |                     mlp_channels[i],
35 |                     mlp_channels[i + 1],
36 |                     kernel_size=(1, ),
37 |                     stride=(1, ),
38 |                     conv_cfg=dict(type='Conv1d'),
39 |                     norm_cfg=norm_cfg,
40 |                     act_cfg=act_cfg))
41 | 
42 |     @force_fp32()
43 |     def forward(self, points):
44 |         """forward.
45 | 
46 |         Args:
47 |             points (List[Tensor]): tensor of the features to be aggregated.
48 | 
49 |         Returns:
50 |             Tensor: (B, N, M) M = mlp[-1], tensor of the output points.
51 |         """
52 | 
53 |         if len(points) > 1:
54 |             new_points = torch.cat(points[1:], dim=-1)
55 |             new_points = new_points.transpose(1, 2).contiguous()  # (B, C, N)
56 |             new_points_copy = new_points
57 | 
58 |             new_points = self.mlps(new_points)
59 | 
60 |             new_fa_points = new_points.max(dim=-1, keepdim=True)[0]
61 |             new_fa_points = new_fa_points.repeat(1, 1, new_points.shape[-1])
62 | 
63 |             new_points = torch.cat([new_fa_points, new_points_copy], dim=1)
64 |             new_points = new_points.transpose(1, 2).contiguous()
65 |         else:
66 |             new_points = points
67 | 
68 |         return new_points
69 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/dgcnn_modules/dgcnn_fp_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.cnn import ConvModule
 3 | from mmcv.runner import BaseModule, force_fp32
 4 | from torch import nn as nn
 5 | 
 6 | 
 7 | class DGCNNFPModule(BaseModule):
 8 |     """Point feature propagation module used in DGCNN.
 9 | 
10 |     Propagate the features from one set to another.
11 | 
12 |     Args:
13 |         mlp_channels (list[int]): List of mlp channels.
14 |         norm_cfg (dict, optional): Type of activation method.
15 |             Defaults to dict(type='BN1d').
16 |         act_cfg (dict, optional): Type of activation method.
17 |             Defaults to dict(type='ReLU').
18 |         init_cfg (dict, optional): Initialization config. Defaults to None.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  mlp_channels,
23 |                  norm_cfg=dict(type='BN1d'),
24 |                  act_cfg=dict(type='ReLU'),
25 |                  init_cfg=None):
26 |         super().__init__(init_cfg=init_cfg)
27 |         self.fp16_enabled = False
28 |         self.mlps = nn.Sequential()
29 |         for i in range(len(mlp_channels) - 1):
30 |             self.mlps.add_module(
31 |                 f'layer{i}',
32 |                 ConvModule(
33 |                     mlp_channels[i],
34 |                     mlp_channels[i + 1],
35 |                     kernel_size=(1, ),
36 |                     stride=(1, ),
37 |                     conv_cfg=dict(type='Conv1d'),
38 |                     norm_cfg=norm_cfg,
39 |                     act_cfg=act_cfg))
40 | 
41 |     @force_fp32()
42 |     def forward(self, points):
43 |         """forward.
44 | 
45 |         Args:
46 |             points (Tensor): (B, N, C) tensor of the input points.
47 | 
48 |         Returns:
49 |             Tensor: (B, N, M) M = mlp[-1], tensor of the new points.
50 |         """
51 | 
52 |         if points is not None:
53 |             new_points = points.transpose(1, 2).contiguous()  # (B, C, N)
54 |             new_points = self.mlps(new_points)
55 |             new_points = new_points.transpose(1, 2).contiguous()
56 |         else:
57 |             new_points = points
58 | 
59 |         return new_points
60 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/paconv/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .paconv import PAConv, PAConvCUDA
3 | 
4 | __all__ = ['PAConv', 'PAConvCUDA']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/paconv/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def calc_euclidian_dist(xyz1, xyz2):
 6 |     """Calculate the Euclidean distance between two sets of points.
 7 | 
 8 |     Args:
 9 |         xyz1 (torch.Tensor): (N, 3), the first set of points.
10 |         xyz2 (torch.Tensor): (N, 3), the second set of points.
11 | 
12 |     Returns:
13 |         torch.Tensor: (N, ), the Euclidean distance between each point pair.
14 |     """
15 |     assert xyz1.shape[0] == xyz2.shape[0], 'number of points are not the same'
16 |     assert xyz1.shape[1] == xyz2.shape[1] == 3, \
17 |         'points coordinates dimension is not 3'
18 |     return torch.norm(xyz1 - xyz2, dim=-1)
19 | 
20 | 
21 | def assign_score(scores, point_features):
22 |     """Perform weighted sum to aggregate output features according to scores.
23 |     This function is used in non-CUDA version of PAConv.
24 | 
25 |     Compared to the cuda op assigh_score_withk, this pytorch implementation
26 |         pre-computes output features for the neighbors of all centers, and then
27 |         performs aggregation. It consumes more GPU memories.
28 | 
29 |     Args:
30 |         scores (torch.Tensor): (B, npoint, K, M), predicted scores to
31 |             aggregate weight matrices in the weight bank.
32 |             `npoint` is the number of sampled centers.
33 |             `K` is the number of queried neighbors.
34 |             `M` is the number of weight matrices in the weight bank.
35 |         point_features (torch.Tensor): (B, npoint, K, M, out_dim)
36 |             Pre-computed point features to be aggregated.
37 | 
38 |     Returns:
39 |         torch.Tensor: (B, npoint, K, out_dim), the aggregated features.
40 |     """
41 |     B, npoint, K, M = scores.size()
42 |     scores = scores.view(B, npoint, K, 1, M)
43 |     output = torch.matmul(scores, point_features).view(B, npoint, K, -1)
44 |     return output
45 | 
46 | 
47 | def assign_kernel_withoutk(features, kernels, M):
48 |     """Pre-compute features with weight matrices in weight bank. This function
49 |     is used before cuda op assign_score_withk in CUDA version PAConv.
50 | 
51 |     Args:
52 |         features (torch.Tensor): (B, in_dim, N), input features of all points.
53 |             `N` is the number of points in current point cloud.
54 |         kernels (torch.Tensor): (2 * in_dim, M * out_dim), weight matrices in
55 |             the weight bank, transformed from (M, 2 * in_dim, out_dim).
56 |             `2 * in_dim` is because the input features are concatenation of
57 |             (point_features - center_features, point_features).
58 |         M (int): Number of weight matrices in the weight bank.
59 | 
60 |     Returns:
61 |         Tuple[torch.Tensor]: both of shape (B, N, M, out_dim):
62 | 
63 |             - point_features: Pre-computed features for points.
64 |             - center_features: Pre-computed features for centers.
65 |     """
66 |     B, in_dim, N = features.size()
67 |     feat_trans = features.permute(0, 2, 1)  # [B, N, in_dim]
68 |     out_feat_half1 = torch.matmul(feat_trans, kernels[:in_dim]).view(
69 |         B, N, M, -1)  # [B, N, M, out_dim]
70 |     out_feat_half2 = torch.matmul(feat_trans, kernels[in_dim:]).view(
71 |         B, N, M, -1)  # [B, N, M, out_dim]
72 | 
73 |     # TODO: why this hard-coded if condition?
74 |     # when the network input is only xyz without additional features
75 |     # xyz will be used as features, so that features.size(1) == 3 % 2 != 0
76 |     # we need to compensate center_features because otherwise
77 |     # `point_features - center_features` will result in all zeros?
78 |     if features.size(1) % 2 != 0:
79 |         out_feat_half_coord = torch.matmul(
80 |             feat_trans[:, :, :3],  # [B, N, 3]
81 |             kernels[in_dim:in_dim + 3]).view(B, N, M, -1)  # [B, N, M, out_dim]
82 |     else:
83 |         out_feat_half_coord = torch.zeros_like(out_feat_half2)
84 | 
85 |     point_features = out_feat_half1 + out_feat_half2
86 |     center_features = out_feat_half1 + out_feat_half_coord
87 |     return point_features, center_features
88 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .builder import build_sa_module
 3 | from .paconv_sa_module import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
 4 |                                PAConvSAModule, PAConvSAModuleMSG)
 5 | from .point_fp_module import PointFPModule
 6 | from .point_sa_module import PointSAModule, PointSAModuleMSG
 7 | 
 8 | __all__ = [
 9 |     'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule',
10 |     'PAConvSAModule', 'PAConvSAModuleMSG', 'PAConvCUDASAModule',
11 |     'PAConvCUDASAModuleMSG'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry
 3 | 
 4 | SA_MODULES = Registry('point_sa_module')
 5 | 
 6 | 
 7 | def build_sa_module(cfg, *args, **kwargs):
 8 |     """Build PointNet2 set abstraction (SA) module.
 9 | 
10 |     Args:
11 |         cfg (None or dict): The SA module config, which should contain:
12 |             - type (str): Module type.
13 |             - module args: Args needed to instantiate an SA module.
14 |         args (argument list): Arguments passed to the `__init__`
15 |             method of the corresponding module.
16 |         kwargs (keyword arguments): Keyword arguments passed to the `__init__`
17 |             method of the corresponding SA module .
18 | 
19 |     Returns:
20 |         nn.Module: Created SA module.
21 |     """
22 |     if cfg is None:
23 |         cfg_ = dict(type='PointSAModule')
24 |     else:
25 |         if not isinstance(cfg, dict):
26 |             raise TypeError('cfg must be a dict')
27 |         if 'type' not in cfg:
28 |             raise KeyError('the cfg dict must contain the key "type"')
29 |         cfg_ = cfg.copy()
30 | 
31 |     module_type = cfg_.pop('type')
32 |     if module_type not in SA_MODULES:
33 |         raise KeyError(f'Unrecognized module type {module_type}')
34 |     else:
35 |         sa_module = SA_MODULES.get(module_type)
36 | 
37 |     module = sa_module(*args, **kwargs, **cfg_)
38 | 
39 |     return module
40 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/pointnet_modules/point_fp_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import List
 3 | 
 4 | import torch
 5 | from mmcv.cnn import ConvModule
 6 | from mmcv.ops import three_interpolate, three_nn
 7 | from mmcv.runner import BaseModule, force_fp32
 8 | from torch import nn as nn
 9 | 
10 | 
11 | class PointFPModule(BaseModule):
12 |     """Point feature propagation module used in PointNets.
13 | 
14 |     Propagate the features from one set to another.
15 | 
16 |     Args:
17 |         mlp_channels (list[int]): List of mlp channels.
18 |         norm_cfg (dict, optional): Type of normalization method.
19 |             Default: dict(type='BN2d').
20 |     """
21 | 
22 |     def __init__(self,
23 |                  mlp_channels: List[int],
24 |                  norm_cfg: dict = dict(type='BN2d'),
25 |                  init_cfg=None):
26 |         super().__init__(init_cfg=init_cfg)
27 |         self.fp16_enabled = False
28 |         self.mlps = nn.Sequential()
29 |         for i in range(len(mlp_channels) - 1):
30 |             self.mlps.add_module(
31 |                 f'layer{i}',
32 |                 ConvModule(
33 |                     mlp_channels[i],
34 |                     mlp_channels[i + 1],
35 |                     kernel_size=(1, 1),
36 |                     stride=(1, 1),
37 |                     conv_cfg=dict(type='Conv2d'),
38 |                     norm_cfg=norm_cfg))
39 | 
40 |     @force_fp32()
41 |     def forward(self, target: torch.Tensor, source: torch.Tensor,
42 |                 target_feats: torch.Tensor,
43 |                 source_feats: torch.Tensor) -> torch.Tensor:
44 |         """forward.
45 | 
46 |         Args:
47 |             target (Tensor): (B, n, 3) tensor of the xyz positions of
48 |                 the target features.
49 |             source (Tensor): (B, m, 3) tensor of the xyz positions of
50 |                 the source features.
51 |             target_feats (Tensor): (B, C1, n) tensor of the features to be
52 |                 propagated to.
53 |             source_feats (Tensor): (B, C2, m) tensor of features
54 |                 to be propagated.
55 | 
56 |         Return:
57 |             Tensor: (B, M, N) M = mlp[-1], tensor of the target features.
58 |         """
59 |         if source is not None:
60 |             dist, idx = three_nn(target, source)
61 |             dist_reciprocal = 1.0 / (dist + 1e-8)
62 |             norm = torch.sum(dist_reciprocal, dim=2, keepdim=True)
63 |             weight = dist_reciprocal / norm
64 | 
65 |             interpolated_feats = three_interpolate(source_feats, idx, weight)
66 |         else:
67 |             interpolated_feats = source_feats.expand(*source_feats.size()[0:2],
68 |                                                      target.size(1))
69 | 
70 |         if target_feats is not None:
71 |             new_features = torch.cat([interpolated_feats, target_feats],
72 |                                      dim=1)  # (B, C2 + C1, n)
73 |         else:
74 |             new_features = interpolated_feats
75 | 
76 |         new_features = new_features.unsqueeze(-1)
77 |         new_features = self.mlps(new_features)
78 | 
79 |         return new_features.squeeze(-1)
80 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/spconv/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .overwrite_spconv.write_spconv2 import register_spconv2
 3 | 
 4 | try:
 5 |     import spconv
 6 | except ImportError:
 7 |     IS_SPCONV2_AVAILABLE = False
 8 | else:
 9 |     if hasattr(spconv, '__version__') and spconv.__version__ >= '2.0.0':
10 |         IS_SPCONV2_AVAILABLE = register_spconv2()
11 |     else:
12 |         IS_SPCONV2_AVAILABLE = False
13 | 
14 | __all__ = ['IS_SPCONV2_AVAILABLE']
15 | 


--------------------------------------------------------------------------------
/mmdet3d/ops/spconv/overwrite_spconv/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .write_spconv2 import register_spconv2
3 | 
4 | __all__ = ['register_spconv2']
5 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry, build_from_cfg, print_log
 3 | 
 4 | from .collect_env import collect_env
 5 | from .compat_cfg import compat_cfg
 6 | from .logger import get_root_logger
 7 | from .misc import find_latest_checkpoint
 8 | from .setup_env import setup_multi_processes
 9 | 
10 | __all__ = [
11 |     'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env',
12 |     'print_log', 'setup_multi_processes', 'find_latest_checkpoint',
13 |     'compat_cfg'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import collect_env as collect_base_env
 3 | from mmcv.utils import get_git_hash
 4 | 
 5 | import mmdet
 6 | import mmdet3d
 7 | import mmseg
 8 | from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
 9 | 
10 | 
11 | def collect_env():
12 |     """Collect the information of the running environments."""
13 |     env_info = collect_base_env()
14 |     env_info['MMDetection'] = mmdet.__version__
15 |     env_info['MMSegmentation'] = mmseg.__version__
16 |     env_info['MMDetection3D'] = mmdet3d.__version__ + '+' + get_git_hash()[:7]
17 |     env_info['spconv2.0'] = IS_SPCONV2_AVAILABLE
18 |     return env_info
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     for name, val in collect_env().items():
23 |         print(f'{name}: {val}')
24 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import logging
 3 | 
 4 | from mmcv.utils import get_logger
 5 | 
 6 | 
 7 | def get_root_logger(log_file=None, log_level=logging.INFO, name='mmdet3d'):
 8 |     """Get root logger and add a keyword filter to it.
 9 | 
10 |     The logger will be initialized if it has not been initialized. By default a
11 |     StreamHandler will be added. If `log_file` is specified, a FileHandler will
12 |     also be added. The name of the root logger is the top-level package name,
13 |     e.g., "mmdet3d".
14 | 
15 |     Args:
16 |         log_file (str, optional): File path of log. Defaults to None.
17 |         log_level (int, optional): The level of logger.
18 |             Defaults to logging.INFO.
19 |         name (str, optional): The name of the root logger, also used as a
20 |             filter keyword. Defaults to 'mmdet3d'.
21 | 
22 |     Returns:
23 |         :obj:`logging.Logger`: The obtained logger
24 |     """
25 |     logger = get_logger(name=name, log_file=log_file, log_level=log_level)
26 | 
27 |     # add a logging filter
28 |     logging_filter = logging.Filter(name)
29 |     logging_filter.filter = lambda record: record.find(name) != -1
30 | 
31 |     return logger
32 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import glob
 3 | import os.path as osp
 4 | import warnings
 5 | 
 6 | 
 7 | def find_latest_checkpoint(path, suffix='pth'):
 8 |     """Find the latest checkpoint from the working directory. This function is
 9 |     copied from mmdetection.
10 | 
11 |     Args:
12 |         path(str): The path to find checkpoints.
13 |         suffix(str): File extension.
14 |             Defaults to pth.
15 | 
16 |     Returns:
17 |         latest_path(str | None): File path of the latest checkpoint.
18 |     References:
19 |         .. [1] https://github.com/microsoft/SoftTeacher
20 |                   /blob/main/ssod/utils/patch.py
21 |     """
22 |     if not osp.exists(path):
23 |         warnings.warn('The path of checkpoints does not exist.')
24 |         return None
25 |     if osp.exists(osp.join(path, f'latest.{suffix}')):
26 |         return osp.join(path, f'latest.{suffix}')
27 | 
28 |     checkpoints = glob.glob(osp.join(path, f'*.{suffix}'))
29 |     if len(checkpoints) == 0:
30 |         warnings.warn('There are no checkpoints in the path.')
31 |         return None
32 |     latest = -1
33 |     latest_path = None
34 |     for checkpoint in checkpoints:
35 |         count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0])
36 |         if count > latest:
37 |             latest = count
38 |             latest_path = checkpoint
39 |     return latest_path
40 | 


--------------------------------------------------------------------------------
/mmdet3d/utils/setup_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os
 3 | import platform
 4 | import warnings
 5 | 
 6 | import cv2
 7 | from torch import multiprocessing as mp
 8 | 
 9 | 
10 | def setup_multi_processes(cfg):
11 |     """Setup multi-processing environment variables."""
12 |     # set multi-process start method as `fork` to speed up the training
13 |     if platform.system() != 'Windows':
14 |         mp_start_method = cfg.get('mp_start_method', 'fork')
15 |         current_method = mp.get_start_method(allow_none=True)
16 |         if current_method is not None and current_method != mp_start_method:
17 |             warnings.warn(
18 |                 f'Multi-processing start method `{mp_start_method}` is '
19 |                 f'different from the previous setting `{current_method}`.'
20 |                 f'It will be force set to `{mp_start_method}`. You can change '
21 |                 f'this behavior by changing `mp_start_method` in your config.')
22 |         mp.set_start_method(mp_start_method, force=True)
23 | 
24 |     # disable opencv multithreading to avoid system being overloaded
25 |     opencv_num_threads = cfg.get('opencv_num_threads', 0)
26 |     cv2.setNumThreads(opencv_num_threads)
27 | 
28 |     # setup OMP threads
29 |     # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py  # noqa
30 |     workers_per_gpu = cfg.data.get('workers_per_gpu', 1)
31 |     if 'train_dataloader' in cfg.data:
32 |         workers_per_gpu = \
33 |             max(cfg.data.train_dataloader.get('workers_per_gpu', 1),
34 |                 workers_per_gpu)
35 | 
36 |     if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1:
37 |         omp_num_threads = 1
38 |         warnings.warn(
39 |             f'Setting OMP_NUM_THREADS environment variable for each process '
40 |             f'to be {omp_num_threads} in default, to avoid your system being '
41 |             f'overloaded, please further tune the variable for optimal '
42 |             f'performance in your application as needed.')
43 |         os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)
44 | 
45 |     # setup MKL threads
46 |     if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1:
47 |         mkl_num_threads = 1
48 |         warnings.warn(
49 |             f'Setting MKL_NUM_THREADS environment variable for each process '
50 |             f'to be {mkl_num_threads} in default, to avoid your system being '
51 |             f'overloaded, please further tune the variable for optimal '
52 |             f'performance in your application as needed.')
53 |         os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)
54 | 


--------------------------------------------------------------------------------
/mmdet3d/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | 
 3 | __version__ = '1.0.0rc4'
 4 | short_version = __version__
 5 | 
 6 | 
 7 | def parse_version_info(version_str):
 8 |     version_info = []
 9 |     for x in version_str.split('.'):
10 |         if x.isdigit():
11 |             version_info.append(int(x))
12 |         elif x.find('rc') != -1:
13 |             patch_version = x.split('rc')
14 |             version_info.append(int(patch_version[0]))
15 |             version_info.append(f'rc{patch_version[1]}')
16 |     return tuple(version_info)
17 | 
18 | 
19 | version_info = parse_version_info(__version__)
20 | 


--------------------------------------------------------------------------------
/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/overview.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/optional.txt
3 | -r requirements/runtime.txt
4 | -r requirements/tests.txt
5 | 


--------------------------------------------------------------------------------
/requirements/build.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/requirements/build.txt


--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | docutils==0.16.0
2 | m2r
3 | mistune==0.8.4
4 | myst-parser
5 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
6 | sphinx==4.0.2
7 | sphinx-copybutton
8 | sphinx_markdown_tables
9 | 


--------------------------------------------------------------------------------
/requirements/mminstall.txt:
--------------------------------------------------------------------------------
1 | mmcv-full>=1.4.8,<=1.6.0
2 | mmdet>=2.24.0,<=3.0.0
3 | mmsegmentation>=0.20.0,<=1.0.0
4 | 


--------------------------------------------------------------------------------
/requirements/optional.txt:
--------------------------------------------------------------------------------
1 | open3d
2 | spconv
3 | waymo-open-dataset-tf-2-1-0==1.2.0
4 | 


--------------------------------------------------------------------------------
/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
1 | mmcv>=1.4.8
2 | mmdet>=2.24.0
3 | mmsegmentation>=0.20.1
4 | torch
5 | torchvision
6 | 


--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
 1 | lyft_dataset_sdk
 2 | networkx>=2.2,<2.3
 3 | numba==0.53.0
 4 | nuscenes-devkit
 5 | numpy==1.23.5
 6 | nerfacc==0.5.3
 7 | ninja==1.11.1
 8 | torch_efficient_distloss==0.1.3
 9 | open3d==0.18.0
10 | moviepy
11 | plyfile
12 | scikit-image
13 | # by default we also use tensorboard to log results
14 | tensorboard
15 | trimesh>=2.35.39,<2.35.40
16 | yapf==0.40.1


--------------------------------------------------------------------------------
/requirements/tests.txt:
--------------------------------------------------------------------------------
 1 | asynctest
 2 | codecov
 3 | flake8
 4 | interrogate
 5 | isort
 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
 7 | kwarray
 8 | pytest
 9 | pytest-cov
10 | pytest-runner
11 | ubelt
12 | xdoctest >= 0.10.0


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [yapf]
 2 | BASED_ON_STYLE = pep8
 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
 5 | 
 6 | [isort]
 7 | line_length = 79
 8 | multi_line_output = 0
 9 | extra_standard_library = setuptools
10 | known_first_party = mmdet,mmseg,mmdet3d
11 | known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,pytorch_sphinx_theme,recommonmark,requests,scannet_utils,scipy,seaborn,shapely,skimage,sphinx,tensorflow,terminaltables,torch,trimesh,ts,waymo_open_dataset
12 | no_lines_before = STDLIB,LOCALFOLDER
13 | default_section = THIRDPARTY
14 | 
15 | [codespell]
16 | ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD
17 | 


--------------------------------------------------------------------------------
/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29522}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/test.py \
19 |     $CONFIG \
20 |     $CHECKPOINT \
21 |     --launcher pytorch \
22 |     ${@:4}
23 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29520}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |     --seed 0 \
20 |     --launcher pytorch ${@:3}
21 | 


--------------------------------------------------------------------------------