├── .dev_scripts ├── gather_models.py ├── gen_benchmark_script.py ├── linter.sh ├── test_benchmark.sh └── train_benchmark.sh ├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── error-report.md │ ├── feature_request.md │ ├── general_questions.md │ └── reimplementation_questions.md ├── pull_request_template.md └── workflows │ ├── build.yml │ ├── deploy.yml │ ├── lint.yml │ └── test_mim.yml ├── .gitignore ├── 3rd-party-licenses.txt ├── LICENSE ├── README.md ├── assets ├── scene-0038_gt.mp4 ├── scene-0107_gt.mp4 ├── scene-0558.mp4 └── scene-0916.mp4 ├── configs ├── _base_ │ ├── datasets │ │ ├── coco_instance.py │ │ ├── kitti-3d-3class.py │ │ ├── kitti-3d-car.py │ │ ├── kitti-mono3d.py │ │ ├── lyft-3d.py │ │ ├── nuim_instance.py │ │ ├── nus-3d.py │ │ ├── nus-mono3d.py │ │ ├── range100_lyft-3d.py │ │ ├── s3dis-3d-5class.py │ │ ├── s3dis_seg-3d-13class.py │ │ ├── scannet-3d-18class.py │ │ ├── scannet_seg-3d-20class.py │ │ ├── sunrgbd-3d-10class.py │ │ ├── waymoD5-3d-3class.py │ │ └── waymoD5-3d-car.py │ ├── default_runtime.py │ ├── models │ │ ├── 3dssd.py │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ ├── dgcnn.py │ │ ├── fcaf3d.py │ │ ├── fcos3d.py │ │ ├── groupfree3d.py │ │ ├── h3dnet.py │ │ ├── hv_pointpillars_fpn_lyft.py │ │ ├── hv_pointpillars_fpn_nus.py │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ ├── hv_second_secfpn_kitti.py │ │ ├── hv_second_secfpn_waymo.py │ │ ├── imvotenet_image.py │ │ ├── mask_rcnn_r50_fpn.py │ │ ├── paconv_cuda_ssg.py │ │ ├── paconv_ssg.py │ │ ├── parta2.py │ │ ├── pgd.py │ │ ├── point_rcnn.py │ │ ├── pointnet2_msg.py │ │ ├── pointnet2_ssg.py │ │ ├── smoke.py │ │ └── votenet.py │ └── schedules │ │ ├── cosine.py │ │ ├── cyclic_20e.py │ │ ├── mmdet_schedule_1x.py │ │ ├── schedule_2x.py │ │ ├── schedule_3x.py │ │ ├── seg_cosine_100e.py │ │ ├── seg_cosine_150e.py │ │ ├── seg_cosine_200e.py │ │ └── seg_cosine_50e.py └── occflownet │ ├── occflownet_stbase_2d.py │ ├── occflownet_stbase_2d_flow.py │ └── occflownet_stbase_2d_flow_3d.py ├── mmdet3d ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ └── anchor_3d_generator.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assigners │ │ │ └── __init__.py │ │ ├── box_np_ops.py │ │ ├── coders │ │ │ ├── __init__.py │ │ │ ├── anchor_free_bbox_coder.py │ │ │ ├── centerpoint_bbox_coders.py │ │ │ ├── delta_xyzwhlr_bbox_coder.py │ │ │ ├── fcos3d_bbox_coder.py │ │ │ ├── groupfree3d_bbox_coder.py │ │ │ ├── monoflex_bbox_coder.py │ │ │ ├── partial_bin_based_bbox_coder.py │ │ │ ├── pgd_bbox_coder.py │ │ │ ├── point_xyzwhlr_bbox_coder.py │ │ │ └── smoke_bbox_coder.py │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ └── iou3d_calculator.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ └── iou_neg_piecewise_sampler.py │ │ ├── structures │ │ │ ├── __init__.py │ │ │ ├── base_box3d.py │ │ │ ├── box_3d_mode.py │ │ │ ├── cam_box3d.py │ │ │ ├── coord_3d_mode.py │ │ │ ├── depth_box3d.py │ │ │ ├── lidar_box3d.py │ │ │ └── utils.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── indoor_eval.py │ │ ├── instance_seg_eval.py │ │ ├── kitti_utils │ │ │ ├── __init__.py │ │ │ ├── eval.py │ │ │ └── rotate_iou.py │ │ ├── lyft_eval.py │ │ ├── scannet_utils │ │ │ ├── __init__.py │ │ │ ├── evaluate_semantic_instance.py │ │ │ └── util_3d.py │ │ ├── seg_eval.py │ │ └── waymo_utils │ │ │ ├── __init__.py │ │ │ └── prediction_kitti_to_waymo.py │ ├── hook │ │ ├── __init__.py │ │ ├── customcheckpoint.py │ │ ├── ema.py │ │ ├── sequentialcontrol.py │ │ ├── syncbncontrol.py │ │ └── utils.py │ ├── points │ │ ├── __init__.py │ │ ├── base_points.py │ │ ├── cam_points.py │ │ ├── depth_points.py │ │ └── lidar_points.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── box3d_nms.py │ │ └── merge_augs.py │ ├── utils │ │ ├── __init__.py │ │ ├── array_converter.py │ │ └── gaussian.py │ ├── visualizer │ │ ├── __init__.py │ │ ├── image_vis.py │ │ ├── open3d_vis.py │ │ └── show_result.py │ └── voxel │ │ ├── __init__.py │ │ ├── builder.py │ │ └── voxel_generator.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── custom_3d.py │ ├── dataset_wrappers.py │ ├── nuscenes_dataset.py │ ├── nuscenes_dataset_occ.py │ ├── occ_metrics.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── compose.py │ │ ├── data_augment_utils.py │ │ ├── dbsampler.py │ │ ├── formating.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ └── transforms_3d.py │ └── utils.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── resnet.py │ │ └── swin.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bevdet.py │ │ ├── centerpoint.py │ │ ├── mvx_two_stage.py │ │ └── occflownet.py │ ├── losses │ │ ├── __init__.py │ │ ├── axis_aligned_iou_loss.py │ │ ├── bce_loss.py │ │ ├── chamfer_distance.py │ │ ├── dist_loss.py │ │ ├── huber_loss.py │ │ ├── multibin_loss.py │ │ ├── paconv_regularization_loss.py │ │ ├── rotated_iou_loss.py │ │ ├── silog_loss.py │ │ ├── tv_loss.py │ │ └── uncertain_smooth_l1_loss.py │ ├── necks │ │ ├── __init__.py │ │ ├── fpn.py │ │ ├── lss_fpn.py │ │ ├── second_fpn.py │ │ └── view_transformer.py │ └── occflownet_modules │ │ ├── __init__.py │ │ ├── hooks.py │ │ ├── nerf_decoder.py │ │ ├── renderer.py │ │ ├── samplers.py │ │ └── utils.py ├── ops │ ├── __init__.py │ ├── bev_pool_v2 │ │ ├── __init__.py │ │ ├── bev_pool.py │ │ └── src │ │ │ ├── bev_pool.cpp │ │ │ └── bev_pool_cuda.cu │ ├── dgcnn_modules │ │ ├── __init__.py │ │ ├── dgcnn_fa_module.py │ │ ├── dgcnn_fp_module.py │ │ └── dgcnn_gf_module.py │ ├── norm.py │ ├── paconv │ │ ├── __init__.py │ │ ├── paconv.py │ │ └── utils.py │ ├── pointnet_modules │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── paconv_sa_module.py │ │ ├── point_fp_module.py │ │ └── point_sa_module.py │ ├── sparse_block.py │ └── spconv │ │ ├── __init__.py │ │ └── overwrite_spconv │ │ ├── __init__.py │ │ └── write_spconv2.py ├── utils │ ├── __init__.py │ ├── camera.py │ ├── collect_env.py │ ├── comm.py │ ├── compat_cfg.py │ ├── constants.py │ ├── env.py │ ├── logger.py │ ├── misc.py │ └── setup_env.py └── version.py ├── overview.png ├── requirements.txt ├── requirements ├── build.txt ├── docs.txt ├── mminstall.txt ├── optional.txt ├── readthedocs.txt ├── runtime.txt └── tests.txt ├── setup.cfg ├── setup.py └── tools ├── create_data_bevdet.py ├── create_flow_data.py ├── data_converter ├── __init__.py ├── create_gt_database.py └── nuscenes_converter.py ├── dist_test.sh ├── dist_train.sh ├── test.py └── train.py /.dev_scripts/linter.sh: -------------------------------------------------------------------------------- 1 | yapf -r -i mmdet3d/ configs/ tests/ tools/ 2 | isort mmdet3d/ configs/ tests/ tools/ 3 | flake8 . 4 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | - Using welcoming and inclusive language 18 | - Being respectful of differing viewpoints and experiences 19 | - Gracefully accepting constructive criticism 20 | - Focusing on what is best for the community 21 | - Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | - The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | - Trolling, insulting/derogatory comments, and personal or political attacks 28 | - Public or private harassment 29 | - Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | - Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at chenkaidev@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | For answers to common questions about this code of conduct, see 74 | https://www.contributor-covenant.org/faq 75 | 76 | [homepage]: https://www.contributor-covenant.org 77 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | We appreciate all contributions to improve MMDetection3D. Please refer to [CONTRIBUTING.md](https://github.com/open-mmlab/mmcv/blob/master/CONTRIBUTING.md) in MMCV for more details about the contributing guideline. 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/error-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Error report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | Thanks for your error report and we appreciate it a lot. 10 | 11 | **Checklist** 12 | 13 | 1. I have searched related issues but cannot get the expected help. 14 | 2. The bug has not been fixed in the latest version. 15 | 16 | **Describe the bug** 17 | A clear and concise description of what the bug is. 18 | 19 | **Reproduction** 20 | 21 | 1. What command or script did you run? 22 | 23 | ``` 24 | A placeholder for the command. 25 | ``` 26 | 27 | 2. Did you make any modifications on the code or config? Did you understand what you have modified? 28 | 3. What dataset did you use? 29 | 30 | **Environment** 31 | 32 | 1. Please run `python mmdet3d/utils/collect_env.py` to collect necessary environment information and paste it here. 33 | 2. You may add addition that may be helpful for locating the problem, such as 34 | - How you installed PyTorch \[e.g., pip, conda, source\] 35 | - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) 36 | 37 | **Error traceback** 38 | If applicable, paste the error trackback here. 39 | 40 | ``` 41 | A placeholder for trackback. 42 | ``` 43 | 44 | **Bug fix** 45 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | **Describe the feature** 10 | 11 | **Motivation** 12 | A clear and concise description of the motivation of the feature. 13 | Ex1. It is inconvenient when \[....\]. 14 | Ex2. There is a recent paper \[....\], which is very helpful for \[....\]. 15 | 16 | **Related resources** 17 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated. 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General questions 3 | about: Ask general questions to get help 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/reimplementation_questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Reimplementation Questions 3 | about: Ask about questions during model reimplementation 4 | title: '' 5 | labels: reimplementation 6 | assignees: '' 7 | --- 8 | 9 | **Notice** 10 | 11 | There are several common situations in the reimplementation issues as below 12 | 13 | 1. Reimplement a model in the model zoo using the provided configs 14 | 2. Reimplement a model in the model zoo on other dataset (e.g., custom datasets) 15 | 3. Reimplement a custom model but all the components are implemented in MMDetection3D 16 | 4. Reimplement a custom model with new modules implemented by yourself 17 | 18 | There are several things to do for different cases as below. 19 | 20 | - For case 1 & 3, please follow the steps in the following sections thus we could help to quick identify the issue. 21 | - For case 2 & 4, please understand that we are not able to do much help here because we usually do not know the full code and the users should be responsible to the code they write. 22 | - One suggestion for case 2 & 4 is that the users should first check whether the bug lies in the self-implemted code or the original code. For example, users can first make sure that the same model runs well on supported datasets. If you still need help, please describe what you have done and what you obtain in the issue, and follow the steps in the following sections and try as clear as possible so that we can better help you. 23 | 24 | **Checklist** 25 | 26 | 1. I have searched related issues but cannot get the expected help. 27 | 2. The issue has not been fixed in the latest version. 28 | 29 | **Describe the issue** 30 | 31 | A clear and concise description of what the problem you meet and what have you done. 32 | 33 | **Reproduction** 34 | 35 | 1. What command or script did you run? 36 | 37 | ``` 38 | A placeholder for the command. 39 | ``` 40 | 41 | 2. What config dir you run? 42 | 43 | ``` 44 | A placeholder for the config. 45 | ``` 46 | 47 | 3. Did you make any modifications on the code or config? Did you understand what you have modified? 48 | 4. What dataset did you use? 49 | 50 | **Environment** 51 | 52 | 1. Please run `python mmdet3d/utils/collect_env.py` to collect necessary environment information and paste it here. 53 | 2. You may add addition that may be helpful for locating the problem, such as 54 | - How you installed PyTorch \[e.g., pip, conda, source\] 55 | - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) 56 | 57 | **Results** 58 | 59 | If applicable, paste the related results here, e.g., what you expect and what you get. 60 | 61 | ``` 62 | A placeholder for results comparison 63 | ``` 64 | 65 | **Issue fix** 66 | 67 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! 68 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Thanks for your contribution and we appreciate it a lot. The following instructions would make your pull request more healthy and more easily get feedback. If you do not understand some items, don't worry, just make the pull request and seek help from maintainers. 2 | 3 | ## Motivation 4 | 5 | Please describe the motivation of this PR and the goal you want to achieve through this PR. 6 | 7 | ## Modification 8 | 9 | Please briefly describe what modification is made in this PR. 10 | 11 | ## BC-breaking (Optional) 12 | 13 | Does the modification introduce changes that break the back-compatibility of the downstream repos? 14 | If so, please describe how it breaks the compatibility and how the downstream projects should modify their code to keep compatibility with this PR. 15 | 16 | ## Use cases (Optional) 17 | 18 | If this PR introduces a new feature, it is better to list some use cases here, and update the documentation. 19 | 20 | ## Checklist 21 | 22 | 1. Pre-commit or other linting tools are used to fix the potential lint issues. 23 | 2. The modification is covered by complete unit tests. If not, please add more unit test to ensure the correctness. 24 | 3. If the modification has potential influence on downstream projects, this PR should be tested with downstream projects. 25 | 4. The documentation has been modified accordingly, like docstring or example tutorials. 26 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: deploy 2 | 3 | on: push 4 | 5 | concurrency: 6 | group: ${{ github.workflow }}-${{ github.ref }} 7 | cancel-in-progress: true 8 | 9 | jobs: 10 | build-n-publish: 11 | runs-on: ubuntu-18.04 12 | if: startsWith(github.event.ref, 'refs/tags') 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 3.7 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.7 19 | - name: Install torch 20 | run: pip install torch 21 | - name: Build MMDet3D 22 | run: python setup.py sdist 23 | - name: Publish distribution to PyPI 24 | run: | 25 | pip install twine 26 | twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }} 27 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: [push, pull_request] 4 | 5 | concurrency: 6 | group: ${{ github.workflow }}-${{ github.ref }} 7 | cancel-in-progress: true 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-18.04 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 3.7 15 | uses: actions/setup-python@v1 16 | with: 17 | python-version: 3.7 18 | - name: Install linting dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install flake8==3.8.3 isort==5.10.1 yapf==v0.30.0 interrogate 22 | - name: Lint with flake8 23 | run: flake8 . 24 | - name: Lint with isort 25 | run: isort --recursive --check-only --diff mmdet3d/ tests/ examples/ 26 | - name: Format python codes with yapf 27 | run: yapf -r -d mmdet3d/ tests/ examples/ 28 | - name: Check docstring 29 | run: interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --exclude mmdet3d/ops --ignore-regex "__repr__" --fail-under 95 mmdet3d 30 | -------------------------------------------------------------------------------- /.github/workflows/test_mim.yml: -------------------------------------------------------------------------------- 1 | name: test-mim 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'model-index.yml' 7 | - 'configs/**' 8 | 9 | pull_request: 10 | paths: 11 | - 'model-index.yml' 12 | - 'configs/**' 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.ref }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build_cpu: 20 | runs-on: ubuntu-18.04 21 | strategy: 22 | matrix: 23 | python-version: [3.7] 24 | torch: [1.8.0] 25 | include: 26 | - torch: 1.8.0 27 | torch_version: torch1.8 28 | torchvision: 0.9.0 29 | steps: 30 | - uses: actions/checkout@v2 31 | - name: Set up Python ${{ matrix.python-version }} 32 | uses: actions/setup-python@v2 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | - name: Upgrade pip 36 | run: pip install pip --upgrade 37 | - name: Install Pillow 38 | run: pip install Pillow==6.2.2 39 | if: ${{matrix.torchvision == '0.4.2'}} 40 | - name: Install PyTorch 41 | run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html 42 | - name: Install openmim 43 | run: pip install openmim 44 | - name: Build and install 45 | run: rm -rf .eggs && mim install -e . 46 | - name: test commands of mim 47 | run: mim search mmdet3d 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.ipynb 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/en/_build/ 69 | docs/zh_cn/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # cython generated cpp 109 | data 110 | .vscode 111 | .idea 112 | 113 | # custom 114 | *.pkl 115 | *.pkl.json 116 | *.log.json 117 | work_dirs/ 118 | exps/ 119 | *~ 120 | mmdet3d/.mim 121 | ckpts 122 | experiments 123 | 124 | # Pytorch 125 | *.pth 126 | 127 | # demo 128 | *.jpg 129 | # *.png 130 | data/s3dis/Stanford3dDataset_v1.2_Aligned_Version/ 131 | data/scannet/scans/ 132 | data/sunrgbd/OFFICIAL_SUNRGBD/ 133 | *.obj 134 | *.ply 135 | 136 | # Waymo evaluation 137 | mmdet3d/core/evaluation/waymo_utils/compute_detection_metrics_main 138 | -------------------------------------------------------------------------------- /3rd-party-licenses.txt: -------------------------------------------------------------------------------- 1 | Third Party Licenses 2 | ==================== 3 | 4 | ------------------------------------------------------------------------- 5 | Overview 6 | -------------------------------------------------------------------------- 7 | 8 | BEVDet: https://github.com/HuangJunJie2017/BEVDet 9 | License: Apache-2.0 license -------------------------------------------------------------------------------- /assets/scene-0038_gt.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0038_gt.mp4 -------------------------------------------------------------------------------- /assets/scene-0107_gt.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0107_gt.mp4 -------------------------------------------------------------------------------- /assets/scene-0558.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0558.mp4 -------------------------------------------------------------------------------- /assets/scene-0916.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/assets/scene-0916.mp4 -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /configs/_base_/datasets/kitti-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'KittiMonoDataset' 2 | data_root = 'data/kitti/' 3 | class_names = ['Pedestrian', 'Cyclist', 'Car'] 4 | input_modality = dict(use_lidar=False, use_camera=True) 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFileMono3D'), 9 | dict( 10 | type='LoadAnnotations3D', 11 | with_bbox=True, 12 | with_label=True, 13 | with_attr_label=False, 14 | with_bbox_3d=True, 15 | with_label_3d=True, 16 | with_bbox_depth=True), 17 | dict(type='Resize', img_scale=(1242, 375), keep_ratio=True), 18 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='Pad', size_divisor=32), 21 | dict(type='DefaultFormatBundle3D', class_names=class_names), 22 | dict( 23 | type='Collect3D', 24 | keys=[ 25 | 'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d', 26 | 'centers2d', 'depths' 27 | ]), 28 | ] 29 | test_pipeline = [ 30 | dict(type='LoadImageFromFileMono3D'), 31 | dict( 32 | type='MultiScaleFlipAug', 33 | img_scale=(1242, 375), 34 | flip=False, 35 | transforms=[ 36 | dict(type='RandomFlip3D'), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='Pad', size_divisor=32), 39 | dict( 40 | type='DefaultFormatBundle3D', 41 | class_names=class_names, 42 | with_label=False), 43 | dict(type='Collect3D', keys=['img']), 44 | ]) 45 | ] 46 | # construct a pipeline for data and gt loading in show function 47 | # please keep its loading function consistent with test_pipeline (e.g. client) 48 | eval_pipeline = [ 49 | dict(type='LoadImageFromFileMono3D'), 50 | dict( 51 | type='DefaultFormatBundle3D', 52 | class_names=class_names, 53 | with_label=False), 54 | dict(type='Collect3D', keys=['img']) 55 | ] 56 | data = dict( 57 | samples_per_gpu=2, 58 | workers_per_gpu=2, 59 | train=dict( 60 | type=dataset_type, 61 | data_root=data_root, 62 | ann_file=data_root + 'kitti_infos_train_mono3d.coco.json', 63 | info_file=data_root + 'kitti_infos_train.pkl', 64 | img_prefix=data_root, 65 | classes=class_names, 66 | pipeline=train_pipeline, 67 | modality=input_modality, 68 | test_mode=False, 69 | box_type_3d='Camera'), 70 | val=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'kitti_infos_val_mono3d.coco.json', 74 | info_file=data_root + 'kitti_infos_val.pkl', 75 | img_prefix=data_root, 76 | classes=class_names, 77 | pipeline=test_pipeline, 78 | modality=input_modality, 79 | test_mode=True, 80 | box_type_3d='Camera'), 81 | test=dict( 82 | type=dataset_type, 83 | data_root=data_root, 84 | ann_file=data_root + 'kitti_infos_val_mono3d.coco.json', 85 | info_file=data_root + 'kitti_infos_val.pkl', 86 | img_prefix=data_root, 87 | classes=class_names, 88 | pipeline=test_pipeline, 89 | modality=input_modality, 90 | test_mode=True, 91 | box_type_3d='Camera')) 92 | evaluation = dict(interval=2) 93 | -------------------------------------------------------------------------------- /configs/_base_/datasets/nuim_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/nuimages/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1280, 720), (1920, 1080)], 15 | multiscale_mode='range', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1600, 900), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=2, 40 | workers_per_gpu=2, 41 | train=dict( 42 | type=dataset_type, 43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json', 44 | img_prefix=data_root, 45 | classes=class_names, 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 50 | img_prefix=data_root, 51 | classes=class_names, 52 | pipeline=test_pipeline), 53 | test=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 56 | img_prefix=data_root, 57 | classes=class_names, 58 | pipeline=test_pipeline)) 59 | evaluation = dict(metric=['bbox', 'segm']) 60 | -------------------------------------------------------------------------------- /configs/_base_/datasets/nus-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'NuScenesMonoDataset' 2 | data_root = 'data/nuscenes/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | # Input modality for nuScenes dataset, this is consistent with the submission 8 | # format which requires the information in input_modality. 9 | input_modality = dict( 10 | use_lidar=False, 11 | use_camera=True, 12 | use_radar=False, 13 | use_map=False, 14 | use_external=False) 15 | img_norm_cfg = dict( 16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 17 | train_pipeline = [ 18 | dict(type='LoadImageFromFileMono3D'), 19 | dict( 20 | type='LoadAnnotations3D', 21 | with_bbox=True, 22 | with_label=True, 23 | with_attr_label=True, 24 | with_bbox_3d=True, 25 | with_label_3d=True, 26 | with_bbox_depth=True), 27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True), 28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle3D', class_names=class_names), 32 | dict( 33 | type='Collect3D', 34 | keys=[ 35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d', 36 | 'gt_labels_3d', 'centers2d', 'depths' 37 | ]), 38 | ] 39 | test_pipeline = [ 40 | dict(type='LoadImageFromFileMono3D'), 41 | dict( 42 | type='MultiScaleFlipAug', 43 | scale_factor=1.0, 44 | flip=False, 45 | transforms=[ 46 | dict(type='RandomFlip3D'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict( 50 | type='DefaultFormatBundle3D', 51 | class_names=class_names, 52 | with_label=False), 53 | dict(type='Collect3D', keys=['img']), 54 | ]) 55 | ] 56 | # construct a pipeline for data and gt loading in show function 57 | # please keep its loading function consistent with test_pipeline (e.g. client) 58 | eval_pipeline = [ 59 | dict(type='LoadImageFromFileMono3D'), 60 | dict( 61 | type='DefaultFormatBundle3D', 62 | class_names=class_names, 63 | with_label=False), 64 | dict(type='Collect3D', keys=['img']) 65 | ] 66 | 67 | data = dict( 68 | samples_per_gpu=2, 69 | workers_per_gpu=2, 70 | train=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json', 74 | img_prefix=data_root, 75 | classes=class_names, 76 | pipeline=train_pipeline, 77 | modality=input_modality, 78 | test_mode=False, 79 | box_type_3d='Camera'), 80 | val=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 84 | img_prefix=data_root, 85 | classes=class_names, 86 | pipeline=test_pipeline, 87 | modality=input_modality, 88 | test_mode=True, 89 | box_type_3d='Camera'), 90 | test=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 94 | img_prefix=data_root, 95 | classes=class_names, 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | test_mode=True, 99 | box_type_3d='Camera')) 100 | evaluation = dict(interval=2) 101 | -------------------------------------------------------------------------------- /configs/_base_/datasets/s3dis-3d-5class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') 5 | train_area = [1, 2, 3, 4, 6] 6 | test_area = 5 7 | 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2, 3, 4, 5]), 15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 16 | dict(type='PointSample', num_points=40000), 17 | dict( 18 | type='RandomFlip3D', 19 | sync_2d=False, 20 | flip_ratio_bev_horizontal=0.5, 21 | flip_ratio_bev_vertical=0.5), 22 | dict( 23 | type='GlobalRotScaleTrans', 24 | # following ScanNet dataset the rotation range is 5 degrees 25 | rot_range=[-0.087266, 0.087266], 26 | scale_ratio_range=[1.0, 1.0], 27 | shift_height=True), 28 | dict(type='DefaultFormatBundle3D', class_names=class_names), 29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 30 | ] 31 | test_pipeline = [ 32 | dict( 33 | type='LoadPointsFromFile', 34 | coord_type='DEPTH', 35 | shift_height=True, 36 | load_dim=6, 37 | use_dim=[0, 1, 2, 3, 4, 5]), 38 | dict( 39 | type='MultiScaleFlipAug3D', 40 | img_scale=(1333, 800), 41 | pts_scale_ratio=1, 42 | flip=False, 43 | transforms=[ 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[0, 0], 47 | scale_ratio_range=[1., 1.], 48 | translation_std=[0, 0, 0]), 49 | dict( 50 | type='RandomFlip3D', 51 | sync_2d=False, 52 | flip_ratio_bev_horizontal=0.5, 53 | flip_ratio_bev_vertical=0.5), 54 | dict(type='PointSample', num_points=40000), 55 | dict( 56 | type='DefaultFormatBundle3D', 57 | class_names=class_names, 58 | with_label=False), 59 | dict(type='Collect3D', keys=['points']) 60 | ]) 61 | ] 62 | # construct a pipeline for data and gt loading in show function 63 | # please keep its loading function consistent with test_pipeline (e.g. client) 64 | eval_pipeline = [ 65 | dict( 66 | type='LoadPointsFromFile', 67 | coord_type='DEPTH', 68 | shift_height=False, 69 | load_dim=6, 70 | use_dim=[0, 1, 2, 3, 4, 5]), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ] 77 | 78 | data = dict( 79 | samples_per_gpu=8, 80 | workers_per_gpu=4, 81 | train=dict( 82 | type='RepeatDataset', 83 | times=5, 84 | dataset=dict( 85 | type='ConcatDataset', 86 | datasets=[ 87 | dict( 88 | type=dataset_type, 89 | data_root=data_root, 90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl', 91 | pipeline=train_pipeline, 92 | filter_empty_gt=False, 93 | classes=class_names, 94 | box_type_3d='Depth') for i in train_area 95 | ], 96 | separate_eval=False)), 97 | val=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 101 | pipeline=test_pipeline, 102 | classes=class_names, 103 | test_mode=True, 104 | box_type_3d='Depth'), 105 | test=dict( 106 | type=dataset_type, 107 | data_root=data_root, 108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 109 | pipeline=test_pipeline, 110 | classes=class_names, 111 | test_mode=True, 112 | box_type_3d='Depth')) 113 | 114 | evaluation = dict(pipeline=eval_pipeline) 115 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | 20 | # disable opencv multithreading to avoid system being overloaded 21 | opencv_num_threads = 0 22 | # set multi-process start method as `fork` to speed up the training 23 | mp_start_method = 'fork' 24 | -------------------------------------------------------------------------------- /configs/_base_/models/3dssd.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SSD3DNet', 3 | backbone=dict( 4 | type='PointNet2SAMSG', 5 | in_channels=4, 6 | num_points=(4096, 512, (256, 256)), 7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)), 8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)), 9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)), 10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)), 11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))), 12 | aggregation_channels=(64, 128, 256), 13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')), 14 | fps_sample_range_lists=((-1), (-1), (512, -1)), 15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 16 | sa_cfg=dict( 17 | type='PointSAModuleMSG', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=False)), 21 | bbox_head=dict( 22 | type='SSD3DHead', 23 | in_channels=256, 24 | vote_module_cfg=dict( 25 | in_channels=256, 26 | num_points=256, 27 | gt_per_seed=1, 28 | conv_channels=(128, ), 29 | conv_cfg=dict(type='Conv1d'), 30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 31 | with_res_feat=False, 32 | vote_xyz_range=(3.0, 3.0, 2.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModuleMSG', 35 | num_point=256, 36 | radii=(4.8, 6.4), 37 | sample_nums=(16, 32), 38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)), 39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 40 | use_xyz=True, 41 | normalize_xyz=False, 42 | bias=True), 43 | pred_layer_cfg=dict( 44 | in_channels=1536, 45 | shared_conv_channels=(512, 128), 46 | cls_conv_channels=(128, ), 47 | reg_conv_channels=(128, ), 48 | conv_cfg=dict(type='Conv1d'), 49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 50 | bias=True), 51 | conv_cfg=dict(type='Conv1d'), 52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 53 | objectness_loss=dict( 54 | type='CrossEntropyLoss', 55 | use_sigmoid=True, 56 | reduction='sum', 57 | loss_weight=1.0), 58 | center_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 60 | dir_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | dir_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 64 | size_res_loss=dict( 65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 66 | corner_loss=dict( 67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05), 72 | test_cfg=dict( 73 | nms_cfg=dict(type='nms', iou_thr=0.1), 74 | sample_mod='spec', 75 | score_thr=0.0, 76 | per_class_proposal=True, 77 | max_output_num=100)) 78 | -------------------------------------------------------------------------------- /configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.1, 0.1, 0.2] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), 6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 7 | pts_middle_encoder=dict( 8 | type='SparseEncoder', 9 | in_channels=5, 10 | sparse_shape=[41, 1024, 1024], 11 | output_channels=128, 12 | order=('conv', 'norm', 'act'), 13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 14 | 128)), 15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), 16 | block_type='basicblock'), 17 | pts_backbone=dict( 18 | type='SECOND', 19 | in_channels=256, 20 | out_channels=[128, 256], 21 | layer_nums=[5, 5], 22 | layer_strides=[1, 2], 23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 24 | conv_cfg=dict(type='Conv2d', bias=False)), 25 | pts_neck=dict( 26 | type='SECONDFPN', 27 | in_channels=[128, 256], 28 | out_channels=[256, 256], 29 | upsample_strides=[1, 2], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | use_conv_for_no_stride=True), 33 | pts_bbox_head=dict( 34 | type='CenterHead', 35 | in_channels=sum([256, 256]), 36 | tasks=[ 37 | dict(num_class=1, class_names=['car']), 38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 39 | dict(num_class=2, class_names=['bus', 'trailer']), 40 | dict(num_class=1, class_names=['barrier']), 41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 43 | ], 44 | common_heads=dict( 45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 46 | share_conv_channel=64, 47 | bbox_coder=dict( 48 | type='CenterPointBBoxCoder', 49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 50 | max_num=500, 51 | score_threshold=0.1, 52 | out_size_factor=8, 53 | voxel_size=voxel_size[:2], 54 | code_size=9), 55 | separate_head=dict( 56 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 59 | norm_bbox=True), 60 | # model training and testing settings 61 | train_cfg=dict( 62 | pts=dict( 63 | grid_size=[1024, 1024, 40], 64 | voxel_size=voxel_size, 65 | out_size_factor=8, 66 | dense_reg=1, 67 | gaussian_overlap=0.1, 68 | max_objs=500, 69 | min_radius=2, 70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 71 | test_cfg=dict( 72 | pts=dict( 73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 74 | max_per_img=500, 75 | max_pool_nms=False, 76 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 77 | score_threshold=0.1, 78 | out_size_factor=8, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.2, 0.2, 8] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), 6 | pts_voxel_encoder=dict( 7 | type='PillarFeatureNet', 8 | in_channels=5, 9 | feat_channels=[64], 10 | with_distance=False, 11 | voxel_size=(0.2, 0.2, 8), 12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), 13 | legacy=False), 14 | pts_middle_encoder=dict( 15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), 16 | pts_backbone=dict( 17 | type='SECOND', 18 | in_channels=64, 19 | out_channels=[64, 128, 256], 20 | layer_nums=[3, 5, 5], 21 | layer_strides=[2, 2, 2], 22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 23 | conv_cfg=dict(type='Conv2d', bias=False)), 24 | pts_neck=dict( 25 | type='SECONDFPN', 26 | in_channels=[64, 128, 256], 27 | out_channels=[128, 128, 128], 28 | upsample_strides=[0.5, 1, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | upsample_cfg=dict(type='deconv', bias=False), 31 | use_conv_for_no_stride=True), 32 | pts_bbox_head=dict( 33 | type='CenterHead', 34 | in_channels=sum([128, 128, 128]), 35 | tasks=[ 36 | dict(num_class=1, class_names=['car']), 37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 38 | dict(num_class=2, class_names=['bus', 'trailer']), 39 | dict(num_class=1, class_names=['barrier']), 40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 42 | ], 43 | common_heads=dict( 44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 45 | share_conv_channel=64, 46 | bbox_coder=dict( 47 | type='CenterPointBBoxCoder', 48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 49 | max_num=500, 50 | score_threshold=0.1, 51 | out_size_factor=4, 52 | voxel_size=voxel_size[:2], 53 | code_size=9), 54 | separate_head=dict( 55 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 58 | norm_bbox=True), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | pts=dict( 62 | grid_size=[512, 512, 1], 63 | voxel_size=voxel_size, 64 | out_size_factor=4, 65 | dense_reg=1, 66 | gaussian_overlap=0.1, 67 | max_objs=500, 68 | min_radius=2, 69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 70 | test_cfg=dict( 71 | pts=dict( 72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 73 | max_per_img=500, 74 | max_pool_nms=False, 75 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 76 | score_threshold=0.1, 77 | pc_range=[-51.2, -51.2], 78 | out_size_factor=4, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /configs/_base_/models/dgcnn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='DGCNNBackbone', 6 | in_channels=9, # [xyz, rgb, normal_xyz], modified with dataset 7 | num_samples=(20, 20, 20), 8 | knn_modes=('D-KNN', 'F-KNN', 'F-KNN'), 9 | radius=(None, None, None), 10 | gf_channels=((64, 64), (64, 64), (64, )), 11 | fa_channels=(1024, ), 12 | act_cfg=dict(type='LeakyReLU', negative_slope=0.2)), 13 | decode_head=dict( 14 | type='DGCNNHead', 15 | fp_channels=(1216, 512), 16 | channels=256, 17 | dropout_ratio=0.5, 18 | conv_cfg=dict(type='Conv1d'), 19 | norm_cfg=dict(type='BN1d'), 20 | act_cfg=dict(type='LeakyReLU', negative_slope=0.2), 21 | loss_decode=dict( 22 | type='CrossEntropyLoss', 23 | use_sigmoid=False, 24 | class_weight=None, # modified with dataset 25 | loss_weight=1.0)), 26 | # model training and testing settings 27 | train_cfg=dict(), 28 | test_cfg=dict(mode='slide')) 29 | -------------------------------------------------------------------------------- /configs/_base_/models/fcaf3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='MinkSingleStage3DDetector', 3 | voxel_size=.01, 4 | backbone=dict(type='MinkResNet', in_channels=3, depth=34), 5 | head=dict( 6 | type='FCAF3DHead', 7 | in_channels=(64, 128, 256, 512), 8 | out_channels=128, 9 | voxel_size=.01, 10 | pts_prune_threshold=100000, 11 | pts_assign_threshold=27, 12 | pts_center_threshold=18, 13 | n_classes=18, 14 | n_reg_outs=6), 15 | train_cfg=dict(), 16 | test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01)) 17 | -------------------------------------------------------------------------------- /configs/_base_/models/fcos3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FCOSMono3D', 3 | backbone=dict( 4 | type='ResNet', 5 | depth=101, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint='open-mmlab://detectron2/resnet101_caffe')), 15 | neck=dict( 16 | type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_output', 21 | num_outs=5, 22 | relu_before_extra_convs=True), 23 | bbox_head=dict( 24 | type='FCOSMono3DHead', 25 | num_classes=10, 26 | in_channels=256, 27 | stacked_convs=2, 28 | feat_channels=256, 29 | use_direction_classifier=True, 30 | diff_rad_by_sin=True, 31 | pred_attrs=True, 32 | pred_velo=True, 33 | dir_offset=0.7854, # pi/4 34 | dir_limit_offset=0, 35 | strides=[8, 16, 32, 64, 128], 36 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 37 | cls_branch=(256, ), 38 | reg_branch=( 39 | (256, ), # offset 40 | (256, ), # depth 41 | (256, ), # size 42 | (256, ), # rot 43 | () # velo 44 | ), 45 | dir_branch=(256, ), 46 | attr_branch=(256, ), 47 | loss_cls=dict( 48 | type='FocalLoss', 49 | use_sigmoid=True, 50 | gamma=2.0, 51 | alpha=0.25, 52 | loss_weight=1.0), 53 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 54 | loss_dir=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 56 | loss_attr=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 58 | loss_centerness=dict( 59 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 60 | bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9), 61 | norm_on_bbox=True, 62 | centerness_on_reg=True, 63 | center_sampling=True, 64 | conv_bias=True, 65 | dcn_on_last_conv=True), 66 | train_cfg=dict( 67 | allowed_border=0, 68 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], 69 | pos_weight=-1, 70 | debug=False), 71 | test_cfg=dict( 72 | use_rotate_nms=True, 73 | nms_across_levels=False, 74 | nms_pre=1000, 75 | nms_thr=0.8, 76 | score_thr=0.05, 77 | min_bbox_size=0, 78 | max_per_img=200)) 79 | -------------------------------------------------------------------------------- /configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_fpn_nus.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.25, 0.25, 8] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=64, 11 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 12 | voxel_size=voxel_size, 13 | max_voxels=(30000, 40000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=4, 17 | feat_channels=[64, 64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[2, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='FPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | act_cfg=dict(type='ReLU'), 37 | in_channels=[64, 128, 256], 38 | out_channels=256, 39 | start_level=0, 40 | num_outs=3), 41 | pts_bbox_head=dict( 42 | type='Anchor3DHead', 43 | num_classes=10, 44 | in_channels=256, 45 | feat_channels=256, 46 | use_direction_classifier=True, 47 | anchor_generator=dict( 48 | type='AlignedAnchor3DRangeGenerator', 49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]], 50 | scales=[1, 2, 4], 51 | sizes=[ 52 | [2.5981, 0.8660, 1.], # 1.5 / sqrt(3) 53 | [1.7321, 0.5774, 1.], # 1 / sqrt(3) 54 | [1., 1., 1.], 55 | [0.4, 0.4, 1], 56 | ], 57 | custom_values=[0, 0], 58 | rotations=[0, 1.57], 59 | reshape_out=True), 60 | assigner_per_size=False, 61 | diff_rad_by_sin=True, 62 | dir_offset=-0.7854, # -pi / 4 63 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), 64 | loss_cls=dict( 65 | type='FocalLoss', 66 | use_sigmoid=True, 67 | gamma=2.0, 68 | alpha=0.25, 69 | loss_weight=1.0), 70 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 71 | loss_dir=dict( 72 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 73 | # model training and testing settings 74 | train_cfg=dict( 75 | pts=dict( 76 | assigner=dict( 77 | type='MaxIoUAssigner', 78 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 79 | pos_iou_thr=0.6, 80 | neg_iou_thr=0.3, 81 | min_pos_iou=0.3, 82 | ignore_iof_thr=-1), 83 | allowed_border=0, 84 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 85 | pos_weight=-1, 86 | debug=False)), 87 | test_cfg=dict( 88 | pts=dict( 89 | use_rotate_nms=True, 90 | nms_across_levels=False, 91 | nms_pre=1000, 92 | nms_thr=0.2, 93 | score_thr=0.05, 94 | min_bbox_size=0, 95 | max_num=500))) 96 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.16, 0.16, 4] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=32, # max_points_per_voxel 7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000) # (training, testing) max_voxels 10 | ), 11 | voxel_encoder=dict( 12 | type='PillarFeatureNet', 13 | in_channels=4, 14 | feat_channels=[64], 15 | with_distance=False, 16 | voxel_size=voxel_size, 17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), 18 | middle_encoder=dict( 19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=64, 23 | layer_nums=[3, 5, 5], 24 | layer_strides=[2, 2, 2], 25 | out_channels=[64, 128, 256]), 26 | neck=dict( 27 | type='SECONDFPN', 28 | in_channels=[64, 128, 256], 29 | upsample_strides=[1, 2, 4], 30 | out_channels=[128, 128, 128]), 31 | bbox_head=dict( 32 | type='Anchor3DHead', 33 | num_classes=3, 34 | in_channels=384, 35 | feat_channels=384, 36 | use_direction_classifier=True, 37 | assign_per_class=True, 38 | anchor_generator=dict( 39 | type='AlignedAnchor3DRangeGenerator', 40 | ranges=[ 41 | [0, -39.68, -0.6, 69.12, 39.68, -0.6], 42 | [0, -39.68, -0.6, 69.12, 39.68, -0.6], 43 | [0, -39.68, -1.78, 69.12, 39.68, -1.78], 44 | ], 45 | sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], 46 | rotations=[0, 1.57], 47 | reshape_out=False), 48 | diff_rad_by_sin=True, 49 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 50 | loss_cls=dict( 51 | type='FocalLoss', 52 | use_sigmoid=True, 53 | gamma=2.0, 54 | alpha=0.25, 55 | loss_weight=1.0), 56 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 57 | loss_dir=dict( 58 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | assigner=[ 62 | dict( # for Pedestrian 63 | type='MaxIoUAssigner', 64 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 65 | pos_iou_thr=0.5, 66 | neg_iou_thr=0.35, 67 | min_pos_iou=0.35, 68 | ignore_iof_thr=-1), 69 | dict( # for Cyclist 70 | type='MaxIoUAssigner', 71 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 72 | pos_iou_thr=0.5, 73 | neg_iou_thr=0.35, 74 | min_pos_iou=0.35, 75 | ignore_iof_thr=-1), 76 | dict( # for Car 77 | type='MaxIoUAssigner', 78 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 79 | pos_iou_thr=0.6, 80 | neg_iou_thr=0.45, 81 | min_pos_iou=0.45, 82 | ignore_iof_thr=-1), 83 | ], 84 | allowed_border=0, 85 | pos_weight=-1, 86 | debug=False), 87 | test_cfg=dict( 88 | use_rotate_nms=True, 89 | nms_across_levels=False, 90 | nms_thr=0.01, 91 | score_thr=0.1, 92 | min_bbox_size=0, 93 | nms_pre=100, 94 | max_num=50)) 95 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_pointpillars_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.32, 0.32, 6] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(32000, 32000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=5, 17 | feat_channels=[64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[1, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='SECONDFPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | in_channels=[64, 128, 256], 37 | upsample_strides=[1, 2, 4], 38 | out_channels=[128, 128, 128]), 39 | pts_bbox_head=dict( 40 | type='Anchor3DHead', 41 | num_classes=3, 42 | in_channels=384, 43 | feat_channels=384, 44 | use_direction_classifier=True, 45 | anchor_generator=dict( 46 | type='AlignedAnchor3DRangeGenerator', 47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], 48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], 49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]], 50 | sizes=[ 51 | [4.73, 2.08, 1.77], # car 52 | [1.81, 0.84, 1.77], # cyclist 53 | [0.91, 0.84, 1.74] # pedestrian 54 | ], 55 | rotations=[0, 1.57], 56 | reshape_out=False), 57 | diff_rad_by_sin=True, 58 | dir_offset=-0.7854, # -pi / 4 59 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 60 | loss_cls=dict( 61 | type='FocalLoss', 62 | use_sigmoid=True, 63 | gamma=2.0, 64 | alpha=0.25, 65 | loss_weight=1.0), 66 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 67 | loss_dir=dict( 68 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | pts=dict( 72 | assigner=[ 73 | dict( # car 74 | type='MaxIoUAssigner', 75 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 76 | pos_iou_thr=0.55, 77 | neg_iou_thr=0.4, 78 | min_pos_iou=0.4, 79 | ignore_iof_thr=-1), 80 | dict( # cyclist 81 | type='MaxIoUAssigner', 82 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.3, 85 | min_pos_iou=0.3, 86 | ignore_iof_thr=-1), 87 | dict( # pedestrian 88 | type='MaxIoUAssigner', 89 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.3, 92 | min_pos_iou=0.3, 93 | ignore_iof_thr=-1), 94 | ], 95 | allowed_border=0, 96 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 97 | pos_weight=-1, 98 | debug=False)), 99 | test_cfg=dict( 100 | pts=dict( 101 | use_rotate_nms=True, 102 | nms_across_levels=False, 103 | nms_pre=4096, 104 | nms_thr=0.25, 105 | score_thr=0.1, 106 | min_bbox_size=0, 107 | max_num=500))) 108 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_second_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.05, 0.05, 0.1] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=5, 7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000)), 10 | voxel_encoder=dict(type='HardSimpleVFE'), 11 | middle_encoder=dict( 12 | type='SparseEncoder', 13 | in_channels=4, 14 | sparse_shape=[41, 1600, 1408], 15 | order=('conv', 'norm', 'act')), 16 | backbone=dict( 17 | type='SECOND', 18 | in_channels=256, 19 | layer_nums=[5, 5], 20 | layer_strides=[1, 2], 21 | out_channels=[128, 256]), 22 | neck=dict( 23 | type='SECONDFPN', 24 | in_channels=[128, 256], 25 | upsample_strides=[1, 2], 26 | out_channels=[256, 256]), 27 | bbox_head=dict( 28 | type='Anchor3DHead', 29 | num_classes=3, 30 | in_channels=512, 31 | feat_channels=512, 32 | use_direction_classifier=True, 33 | anchor_generator=dict( 34 | type='Anchor3DRangeGenerator', 35 | ranges=[ 36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78], 39 | ], 40 | sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]], 41 | rotations=[0, 1.57], 42 | reshape_out=False), 43 | diff_rad_by_sin=True, 44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 52 | loss_dir=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | assigner=[ 57 | dict( # for Pedestrian 58 | type='MaxIoUAssigner', 59 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 60 | pos_iou_thr=0.35, 61 | neg_iou_thr=0.2, 62 | min_pos_iou=0.2, 63 | ignore_iof_thr=-1), 64 | dict( # for Cyclist 65 | type='MaxIoUAssigner', 66 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 67 | pos_iou_thr=0.35, 68 | neg_iou_thr=0.2, 69 | min_pos_iou=0.2, 70 | ignore_iof_thr=-1), 71 | dict( # for Car 72 | type='MaxIoUAssigner', 73 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 74 | pos_iou_thr=0.6, 75 | neg_iou_thr=0.45, 76 | min_pos_iou=0.45, 77 | ignore_iof_thr=-1), 78 | ], 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | test_cfg=dict( 83 | use_rotate_nms=True, 84 | nms_across_levels=False, 85 | nms_thr=0.01, 86 | score_thr=0.1, 87 | min_bbox_size=0, 88 | nms_pre=100, 89 | max_num=50)) 90 | -------------------------------------------------------------------------------- /configs/_base_/models/hv_second_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.08, 0.08, 0.1] 7 | model = dict( 8 | type='VoxelNet', 9 | voxel_layer=dict( 10 | max_num_points=10, 11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(80000, 90000)), 14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 15 | middle_encoder=dict( 16 | type='SparseEncoder', 17 | in_channels=5, 18 | sparse_shape=[61, 1280, 1920], 19 | order=('conv', 'norm', 'act')), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=384, 23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 24 | layer_nums=[5, 5], 25 | layer_strides=[1, 2], 26 | out_channels=[128, 256]), 27 | neck=dict( 28 | type='SECONDFPN', 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | in_channels=[128, 256], 31 | upsample_strides=[1, 2], 32 | out_channels=[256, 256]), 33 | bbox_head=dict( 34 | type='Anchor3DHead', 35 | num_classes=3, 36 | in_channels=512, 37 | feat_channels=512, 38 | use_direction_classifier=True, 39 | anchor_generator=dict( 40 | type='AlignedAnchor3DRangeGenerator', 41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], 42 | [-76.8, -51.2, 0, 76.8, 51.2, 0], 43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], 44 | sizes=[ 45 | [4.73, 2.08, 1.77], # car 46 | [0.91, 0.84, 1.74], # pedestrian 47 | [1.81, 0.84, 1.77] # cyclist 48 | ], 49 | rotations=[0, 1.57], 50 | reshape_out=False), 51 | diff_rad_by_sin=True, 52 | dir_offset=-0.7854, # -pi / 4 53 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 54 | loss_cls=dict( 55 | type='FocalLoss', 56 | use_sigmoid=True, 57 | gamma=2.0, 58 | alpha=0.25, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 61 | loss_dir=dict( 62 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | assigner=[ 66 | dict( # car 67 | type='MaxIoUAssigner', 68 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 69 | pos_iou_thr=0.55, 70 | neg_iou_thr=0.4, 71 | min_pos_iou=0.4, 72 | ignore_iof_thr=-1), 73 | dict( # pedestrian 74 | type='MaxIoUAssigner', 75 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 76 | pos_iou_thr=0.5, 77 | neg_iou_thr=0.3, 78 | min_pos_iou=0.3, 79 | ignore_iof_thr=-1), 80 | dict( # cyclist 81 | type='MaxIoUAssigner', 82 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.3, 85 | min_pos_iou=0.3, 86 | ignore_iof_thr=-1) 87 | ], 88 | allowed_border=0, 89 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 90 | pos_weight=-1, 91 | debug=False), 92 | test_cfg=dict( 93 | use_rotate_nms=True, 94 | nms_across_levels=False, 95 | nms_pre=4096, 96 | nms_thr=0.25, 97 | score_thr=0.1, 98 | min_bbox_size=0, 99 | max_num=500)) 100 | -------------------------------------------------------------------------------- /configs/_base_/models/imvotenet_image.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImVoteNet', 3 | img_backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe'), 12 | img_neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | img_rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | img_roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=10, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | 55 | # model training and testing settings 56 | train_cfg=dict( 57 | img_rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | img_rpn_proposal=dict( 75 | nms_across_levels=False, 76 | nms_pre=2000, 77 | nms_post=1000, 78 | max_per_img=1000, 79 | nms=dict(type='nms', iou_threshold=0.7), 80 | min_bbox_size=0), 81 | img_rcnn=dict( 82 | assigner=dict( 83 | type='MaxIoUAssigner', 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.5, 86 | min_pos_iou=0.5, 87 | match_low_quality=False, 88 | ignore_iof_thr=-1), 89 | sampler=dict( 90 | type='RandomSampler', 91 | num=512, 92 | pos_fraction=0.25, 93 | neg_pos_ub=-1, 94 | add_gt_as_proposals=True), 95 | pos_weight=-1, 96 | debug=False)), 97 | test_cfg=dict( 98 | img_rpn=dict( 99 | nms_across_levels=False, 100 | nms_pre=1000, 101 | nms_post=1000, 102 | max_per_img=1000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | img_rcnn=dict( 106 | score_thr=0.05, 107 | nms=dict(type='nms', iou_threshold=0.5), 108 | max_per_img=100))) 109 | -------------------------------------------------------------------------------- /configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /configs/_base_/models/paconv_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=9, # [xyz, rgb, normalized_xyz] 7 | num_points=(1024, 256, 64, 16), 8 | radius=(None, None, None, None), # use kNN instead of ball query 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d', momentum=0.1), 14 | sa_cfg=dict( 15 | type='PAConvSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False, 19 | paconv_num_kernels=[16, 16, 16], 20 | paconv_kernel_input='w_neighbor', 21 | scorenet_input='w_neighbor_dist', 22 | scorenet_cfg=dict( 23 | mlp_channels=[16, 16, 16], 24 | score_norm='softmax', 25 | temp_factor=1.0, 26 | last_bn=False))), 27 | decode_head=dict( 28 | type='PAConvHead', 29 | # PAConv model's decoder takes skip connections from beckbone 30 | # different from PointNet++, it also concats input features in the last 31 | # level of decoder, leading to `128 + 6` as the channel number 32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 33 | (128 + 6, 128, 128, 128)), 34 | channels=128, 35 | dropout_ratio=0.5, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=dict(type='BN1d'), 38 | act_cfg=dict(type='ReLU'), 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | class_weight=None, # should be modified with dataset 43 | loss_weight=1.0)), 44 | # correlation loss to regularize PAConv's kernel weights 45 | loss_regularization=dict( 46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='slide')) 50 | -------------------------------------------------------------------------------- /configs/_base_/models/pgd.py: -------------------------------------------------------------------------------- 1 | _base_ = './fcos3d.py' 2 | # model settings 3 | model = dict( 4 | bbox_head=dict( 5 | _delete_=True, 6 | type='PGDHead', 7 | num_classes=10, 8 | in_channels=256, 9 | stacked_convs=2, 10 | feat_channels=256, 11 | use_direction_classifier=True, 12 | diff_rad_by_sin=True, 13 | pred_attrs=True, 14 | pred_velo=True, 15 | pred_bbox2d=True, 16 | pred_keypoints=False, 17 | dir_offset=0.7854, # pi/4 18 | strides=[8, 16, 32, 64, 128], 19 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 20 | cls_branch=(256, ), 21 | reg_branch=( 22 | (256, ), # offset 23 | (256, ), # depth 24 | (256, ), # size 25 | (256, ), # rot 26 | () # velo 27 | ), 28 | dir_branch=(256, ), 29 | attr_branch=(256, ), 30 | loss_cls=dict( 31 | type='FocalLoss', 32 | use_sigmoid=True, 33 | gamma=2.0, 34 | alpha=0.25, 35 | loss_weight=1.0), 36 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 37 | loss_dir=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_attr=dict( 40 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 41 | loss_centerness=dict( 42 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 43 | norm_on_bbox=True, 44 | centerness_on_reg=True, 45 | center_sampling=True, 46 | conv_bias=True, 47 | dcn_on_last_conv=True, 48 | use_depth_classifier=True, 49 | depth_branch=(256, ), 50 | depth_range=(0, 50), 51 | depth_unit=10, 52 | division='uniform', 53 | depth_bins=6, 54 | bbox_coder=dict(type='PGDBBoxCoder', code_size=9)), 55 | test_cfg=dict(nms_pre=1000, nms_thr=0.8, score_thr=0.01, max_per_img=200)) 56 | -------------------------------------------------------------------------------- /configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /configs/_base_/models/smoke.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SMOKEMono3D', 3 | backbone=dict( 4 | type='DLANet', 5 | depth=34, 6 | in_channels=3, 7 | norm_cfg=dict(type='GN', num_groups=32), 8 | init_cfg=dict( 9 | type='Pretrained', 10 | checkpoint='http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth' 11 | )), 12 | neck=dict( 13 | type='DLANeck', 14 | in_channels=[16, 32, 64, 128, 256, 512], 15 | start_level=2, 16 | end_level=5, 17 | norm_cfg=dict(type='GN', num_groups=32)), 18 | bbox_head=dict( 19 | type='SMOKEMono3DHead', 20 | num_classes=3, 21 | in_channels=64, 22 | dim_channel=[3, 4, 5], 23 | ori_channel=[6, 7], 24 | stacked_convs=0, 25 | feat_channels=64, 26 | use_direction_classifier=False, 27 | diff_rad_by_sin=False, 28 | pred_attrs=False, 29 | pred_velo=False, 30 | dir_offset=0, 31 | strides=None, 32 | group_reg_dims=(8, ), 33 | cls_branch=(256, ), 34 | reg_branch=((256, ), ), 35 | num_attrs=0, 36 | bbox_code_size=7, 37 | dir_branch=(), 38 | attr_branch=(), 39 | bbox_coder=dict( 40 | type='SMOKECoder', 41 | base_depth=(28.01, 16.32), 42 | base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63, 43 | 1.53)), 44 | code_size=7), 45 | loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0), 46 | loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1 / 300), 47 | loss_dir=dict( 48 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 49 | loss_attr=None, 50 | conv_bias=True, 51 | dcn_on_last_conv=False), 52 | train_cfg=None, 53 | test_cfg=dict(topK=100, local_maximum_kernel=3, max_per_img=100)) 54 | -------------------------------------------------------------------------------- /configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='VoteHead', 20 | vote_module_cfg=dict( 21 | in_channels=256, 22 | vote_per_seed=1, 23 | gt_per_seed=3, 24 | conv_channels=(256, 256), 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | norm_feats=True, 28 | vote_loss=dict( 29 | type='ChamferDistance', 30 | mode='l1', 31 | reduction='none', 32 | loss_dst_weight=10.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModule', 35 | num_point=256, 36 | radius=0.3, 37 | num_sample=16, 38 | mlp_channels=[256, 128, 128, 128], 39 | use_xyz=True, 40 | normalize_xyz=True), 41 | pred_layer_cfg=dict( 42 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 43 | conv_cfg=dict(type='Conv1d'), 44 | norm_cfg=dict(type='BN1d'), 45 | objectness_loss=dict( 46 | type='CrossEntropyLoss', 47 | class_weight=[0.2, 0.8], 48 | reduction='sum', 49 | loss_weight=5.0), 50 | center_loss=dict( 51 | type='ChamferDistance', 52 | mode='l2', 53 | reduction='sum', 54 | loss_src_weight=10.0, 55 | loss_dst_weight=10.0), 56 | dir_class_loss=dict( 57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 58 | dir_res_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 60 | size_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | size_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 64 | semantic_loss=dict( 65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 66 | # model training and testing settings 67 | train_cfg=dict( 68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 69 | test_cfg=dict( 70 | sample_mod='seed', 71 | nms_thr=0.25, 72 | score_thr=0.05, 73 | per_class_proposal=True)) 74 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_100e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | 7 | # runtime settings 8 | runner = dict(type='EpochBasedRunner', max_epochs=100) 9 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /mmdet3d/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | 4 | import mmdet 5 | import mmseg 6 | from .version import __version__, short_version 7 | 8 | 9 | def digit_version(version_str): 10 | digit_version = [] 11 | for x in version_str.split('.'): 12 | if x.isdigit(): 13 | digit_version.append(int(x)) 14 | elif x.find('rc') != -1: 15 | patch_version = x.split('rc') 16 | digit_version.append(int(patch_version[0]) - 1) 17 | digit_version.append(int(patch_version[1])) 18 | return digit_version 19 | 20 | 21 | mmcv_minimum_version = '1.5.2' 22 | mmcv_maximum_version = '1.7.0' 23 | mmcv_version = digit_version(mmcv.__version__) 24 | 25 | 26 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 27 | and mmcv_version <= digit_version(mmcv_maximum_version)), \ 28 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 29 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 30 | 31 | mmdet_minimum_version = '2.24.0' 32 | mmdet_maximum_version = '3.0.0' 33 | mmdet_version = digit_version(mmdet.__version__) 34 | assert (mmdet_version >= digit_version(mmdet_minimum_version) 35 | and mmdet_version <= digit_version(mmdet_maximum_version)), \ 36 | f'MMDET=={mmdet.__version__} is used but incompatible. ' \ 37 | f'Please install mmdet>={mmdet_minimum_version}, ' \ 38 | f'<={mmdet_maximum_version}.' 39 | 40 | mmseg_minimum_version = '0.20.0' 41 | mmseg_maximum_version = '1.0.0' 42 | mmseg_version = digit_version(mmseg.__version__) 43 | assert (mmseg_version >= digit_version(mmseg_minimum_version) 44 | and mmseg_version <= digit_version(mmseg_maximum_version)), \ 45 | f'MMSEG=={mmseg.__version__} is used but incompatible. ' \ 46 | f'Please install mmseg>={mmseg_minimum_version}, ' \ 47 | f'<={mmseg_maximum_version}.' 48 | 49 | __all__ = ['__version__', 'short_version'] 50 | -------------------------------------------------------------------------------- /mmdet3d/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inference import (convert_SyncBN, inference_detector, 3 | inference_mono_3d_detector, 4 | inference_multi_modality_detector, inference_segmentor, 5 | init_model, show_result_meshlab) 6 | from .test import single_gpu_test 7 | from .train import init_random_seed, train_model 8 | 9 | __all__ = [ 10 | 'inference_detector', 'init_model', 'single_gpu_test', 11 | 'inference_mono_3d_detector', 'show_result_meshlab', 'convert_SyncBN', 12 | 'train_model', 'inference_multi_modality_detector', 'inference_segmentor', 13 | 'init_random_seed' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/apis/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from os import path as osp 3 | 4 | import mmcv 5 | import torch 6 | from mmcv.image import tensor2imgs 7 | 8 | from mmdet3d.models import Base3DDetector 9 | 10 | 11 | def single_gpu_test(model, 12 | data_loader, 13 | show=False, 14 | out_dir=None, 15 | show_score_thr=0.3): 16 | """Test model with single gpu. 17 | 18 | This method tests model with single gpu and gives the 'show' option. 19 | By setting ``show=True``, it saves the visualization results under 20 | ``out_dir``. 21 | 22 | Args: 23 | model (nn.Module): Model to be tested. 24 | data_loader (nn.Dataloader): Pytorch data loader. 25 | show (bool, optional): Whether to save viualization results. 26 | Default: True. 27 | out_dir (str, optional): The path to save visualization results. 28 | Default: None. 29 | 30 | Returns: 31 | list[dict]: The prediction results. 32 | """ 33 | model.eval() 34 | results = [] 35 | dataset = data_loader.dataset 36 | prog_bar = mmcv.ProgressBar(len(dataset)) 37 | for i, data in enumerate(data_loader): 38 | with torch.no_grad(): 39 | result = model(return_loss=False, rescale=True, **data) 40 | 41 | if show: 42 | # Visualize the results of MMDetection3D model 43 | # 'show_results' is MMdetection3D visualization API 44 | models_3d = (Base3DDetector) 45 | if isinstance(model.module, models_3d): 46 | model.module.show_results( 47 | data, 48 | result, 49 | out_dir=out_dir, 50 | show=show, 51 | score_thr=show_score_thr) 52 | # Visualize the results of MMDetection model 53 | # 'show_result' is MMdetection visualization API 54 | else: 55 | batch_size = len(result) 56 | if batch_size == 1 and isinstance(data['img'][0], 57 | torch.Tensor): 58 | img_tensor = data['img'][0] 59 | else: 60 | img_tensor = data['img'][0].data[0] 61 | img_metas = data['img_metas'][0].data[0] 62 | imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) 63 | assert len(imgs) == len(img_metas) 64 | 65 | for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): 66 | h, w, _ = img_meta['img_shape'] 67 | img_show = img[:h, :w, :] 68 | 69 | ori_h, ori_w = img_meta['ori_shape'][:-1] 70 | img_show = mmcv.imresize(img_show, (ori_w, ori_h)) 71 | 72 | if out_dir: 73 | out_file = osp.join(out_dir, img_meta['ori_filename']) 74 | else: 75 | out_file = None 76 | 77 | model.module.show_result( 78 | img_show, 79 | result[i], 80 | show=show, 81 | out_file=out_file, 82 | score_thr=show_score_thr) 83 | results.extend(result) 84 | 85 | batch_size = len(result) 86 | for _ in range(batch_size): 87 | prog_bar.update() 88 | return results 89 | -------------------------------------------------------------------------------- /mmdet3d/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .anchor import * # noqa: F401, F403 3 | from .bbox import * # noqa: F401, F403 4 | from .evaluation import * # noqa: F401, F403 5 | from .hook import * # noqa: F401, F403 6 | from .points import * # noqa: F401, F403 7 | from .post_processing import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 9 | from .visualizer import * # noqa: F401, F403 10 | from .voxel import * # noqa: F401, F403 11 | -------------------------------------------------------------------------------- /mmdet3d/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.anchor import build_prior_generator 3 | from .anchor_3d_generator import (AlignedAnchor3DRangeGenerator, 4 | AlignedAnchor3DRangeGeneratorPerCls, 5 | Anchor3DRangeGenerator) 6 | 7 | __all__ = [ 8 | 'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator', 9 | 'build_prior_generator', 'AlignedAnchor3DRangeGeneratorPerCls' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 3 | from .coders import DeltaXYZWLHRBBoxCoder 4 | # from .bbox_target import bbox_target 5 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 6 | BboxOverlapsNearest3D, 7 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 8 | bbox_overlaps_nearest_3d) 9 | from .samplers import (BaseSampler, CombinedSampler, 10 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 11 | PseudoSampler, RandomSampler, SamplingResult) 12 | from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, 13 | Coord3DMode, DepthInstance3DBoxes, 14 | LiDARInstance3DBoxes, get_box_type, limit_period, 15 | mono_cam_box2vis, points_cam2img, points_img2cam, 16 | xywhr2xyxyr) 17 | from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back 18 | 19 | __all__ = [ 20 | 'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner', 21 | 'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler', 22 | 'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult', 23 | 'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 24 | 'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d', 25 | 'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode', 26 | 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi', 27 | 'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes', 28 | 'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img', 29 | 'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis' 30 | ] 31 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner 3 | 4 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox import build_bbox_coder 3 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder 4 | from .centerpoint_bbox_coders import CenterPointBBoxCoder 5 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder 6 | from .fcos3d_bbox_coder import FCOS3DBBoxCoder 7 | from .groupfree3d_bbox_coder import GroupFree3DBBoxCoder 8 | from .monoflex_bbox_coder import MonoFlexCoder 9 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder 10 | from .pgd_bbox_coder import PGDBBoxCoder 11 | from .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder 12 | from .smoke_bbox_coder import SMOKECoder 13 | 14 | __all__ = [ 15 | 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder', 16 | 'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder', 17 | 'PointXYZWHLRBBoxCoder', 'FCOS3DBBoxCoder', 'PGDBBoxCoder', 'SMOKECoder', 18 | 'MonoFlexCoder' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from mmdet.core.bbox import BaseBBoxCoder 5 | from mmdet.core.bbox.builder import BBOX_CODERS 6 | 7 | 8 | @BBOX_CODERS.register_module() 9 | class DeltaXYZWLHRBBoxCoder(BaseBBoxCoder): 10 | """Bbox Coder for 3D boxes. 11 | 12 | Args: 13 | code_size (int): The dimension of boxes to be encoded. 14 | """ 15 | 16 | def __init__(self, code_size=7): 17 | super(DeltaXYZWLHRBBoxCoder, self).__init__() 18 | self.code_size = code_size 19 | 20 | @staticmethod 21 | def encode(src_boxes, dst_boxes): 22 | """Get box regression transformation deltas (dx, dy, dz, dx_size, 23 | dy_size, dz_size, dr, dv*) that can be used to transform the 24 | `src_boxes` into the `target_boxes`. 25 | 26 | Args: 27 | src_boxes (torch.Tensor): source boxes, e.g., object proposals. 28 | dst_boxes (torch.Tensor): target of the transformation, e.g., 29 | ground-truth boxes. 30 | 31 | Returns: 32 | torch.Tensor: Box transformation deltas. 33 | """ 34 | box_ndim = src_boxes.shape[-1] 35 | cas, cgs, cts = [], [], [] 36 | if box_ndim > 7: 37 | xa, ya, za, wa, la, ha, ra, *cas = torch.split( 38 | src_boxes, 1, dim=-1) 39 | xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split( 40 | dst_boxes, 1, dim=-1) 41 | cts = [g - a for g, a in zip(cgs, cas)] 42 | else: 43 | xa, ya, za, wa, la, ha, ra = torch.split(src_boxes, 1, dim=-1) 44 | xg, yg, zg, wg, lg, hg, rg = torch.split(dst_boxes, 1, dim=-1) 45 | za = za + ha / 2 46 | zg = zg + hg / 2 47 | diagonal = torch.sqrt(la**2 + wa**2) 48 | xt = (xg - xa) / diagonal 49 | yt = (yg - ya) / diagonal 50 | zt = (zg - za) / ha 51 | lt = torch.log(lg / la) 52 | wt = torch.log(wg / wa) 53 | ht = torch.log(hg / ha) 54 | rt = rg - ra 55 | return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1) 56 | 57 | @staticmethod 58 | def decode(anchors, deltas): 59 | """Apply transformation `deltas` (dx, dy, dz, dx_size, dy_size, 60 | dz_size, dr, dv*) to `boxes`. 61 | 62 | Args: 63 | anchors (torch.Tensor): Parameters of anchors with shape (N, 7). 64 | deltas (torch.Tensor): Encoded boxes with shape 65 | (N, 7+n) [x, y, z, x_size, y_size, z_size, r, velo*]. 66 | 67 | Returns: 68 | torch.Tensor: Decoded boxes. 69 | """ 70 | cas, cts = [], [] 71 | box_ndim = anchors.shape[-1] 72 | if box_ndim > 7: 73 | xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1) 74 | xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(deltas, 1, dim=-1) 75 | else: 76 | xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1) 77 | xt, yt, zt, wt, lt, ht, rt = torch.split(deltas, 1, dim=-1) 78 | 79 | za = za + ha / 2 80 | diagonal = torch.sqrt(la**2 + wa**2) 81 | xg = xt * diagonal + xa 82 | yg = yt * diagonal + ya 83 | zg = zt * ha + za 84 | 85 | lg = torch.exp(lt) * la 86 | wg = torch.exp(wt) * wa 87 | hg = torch.exp(ht) * ha 88 | rg = rt + ra 89 | zg = zg - hg / 2 90 | cgs = [t + a for t, a in zip(cts, cas)] 91 | return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1) 92 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 3 | BboxOverlapsNearest3D, 4 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 5 | bbox_overlaps_nearest_3d) 6 | 7 | __all__ = [ 8 | 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 9 | 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D', 10 | 'axis_aligned_bbox_overlaps_3d' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler, 3 | InstanceBalancedPosSampler, 4 | IoUBalancedNegSampler, OHEMSampler, 5 | PseudoSampler, RandomSampler, 6 | SamplingResult) 7 | from .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler 8 | 9 | __all__ = [ 10 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 11 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 12 | 'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_box3d import BaseInstance3DBoxes 3 | from .box_3d_mode import Box3DMode 4 | from .cam_box3d import CameraInstance3DBoxes 5 | from .coord_3d_mode import Coord3DMode 6 | from .depth_box3d import DepthInstance3DBoxes 7 | from .lidar_box3d import LiDARInstance3DBoxes 8 | from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period, 9 | mono_cam_box2vis, points_cam2img, points_img2cam, 10 | rotation_3d_in_axis, xywhr2xyxyr) 11 | 12 | __all__ = [ 13 | 'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes', 14 | 'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr', 15 | 'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img', 16 | 'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis', 17 | 'get_proj_mat_by_coord_type' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet3d/core/bbox/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | 5 | def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical): 6 | """Map bboxes from testing scale to original image scale. 7 | 8 | Args: 9 | bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back. 10 | scale_factor (float): Scale factor. 11 | flip_horizontal (bool): Whether to flip horizontally. 12 | flip_vertical (bool): Whether to flip vertically. 13 | 14 | Returns: 15 | :obj:`BaseInstance3DBoxes`: Boxes mapped back. 16 | """ 17 | new_bboxes = bboxes.clone() 18 | if flip_horizontal: 19 | new_bboxes.flip('horizontal') 20 | if flip_vertical: 21 | new_bboxes.flip('vertical') 22 | new_bboxes.scale(1 / scale_factor) 23 | 24 | return new_bboxes 25 | 26 | 27 | def bbox3d2roi(bbox_list): 28 | """Convert a list of bounding boxes to roi format. 29 | 30 | Args: 31 | bbox_list (list[torch.Tensor]): A list of bounding boxes 32 | corresponding to a batch of images. 33 | 34 | Returns: 35 | torch.Tensor: Region of interests in shape (n, c), where 36 | the channels are in order of [batch_ind, x, y ...]. 37 | """ 38 | rois_list = [] 39 | for img_id, bboxes in enumerate(bbox_list): 40 | if bboxes.size(0) > 0: 41 | img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) 42 | rois = torch.cat([img_inds, bboxes], dim=-1) 43 | else: 44 | rois = torch.zeros_like(bboxes) 45 | rois_list.append(rois) 46 | rois = torch.cat(rois_list, 0) 47 | return rois 48 | 49 | 50 | def bbox3d2result(bboxes, scores, labels, attrs=None): 51 | """Convert detection results to a list of numpy arrays. 52 | 53 | Args: 54 | bboxes (torch.Tensor): Bounding boxes with shape (N, 5). 55 | labels (torch.Tensor): Labels with shape (N, ). 56 | scores (torch.Tensor): Scores with shape (N, ). 57 | attrs (torch.Tensor, optional): Attributes with shape (N, ). 58 | Defaults to None. 59 | 60 | Returns: 61 | dict[str, torch.Tensor]: Bounding box results in cpu mode. 62 | 63 | - boxes_3d (torch.Tensor): 3D boxes. 64 | - scores (torch.Tensor): Prediction scores. 65 | - labels_3d (torch.Tensor): Box labels. 66 | - attrs_3d (torch.Tensor, optional): Box attributes. 67 | """ 68 | result_dict = dict( 69 | boxes_3d=bboxes.to('cpu'), 70 | scores_3d=scores.cpu(), 71 | labels_3d=labels.cpu()) 72 | 73 | if attrs is not None: 74 | result_dict['attrs_3d'] = attrs.cpu() 75 | 76 | return result_dict 77 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .indoor_eval import indoor_eval 3 | from .instance_seg_eval import instance_seg_eval 4 | from .kitti_utils import kitti_eval, kitti_eval_coco_style 5 | from .lyft_eval import lyft_eval 6 | from .seg_eval import seg_eval 7 | 8 | __all__ = [ 9 | 'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval', 10 | 'seg_eval', 'instance_seg_eval' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/kitti_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .eval import kitti_eval, kitti_eval_coco_style 3 | 4 | __all__ = ['kitti_eval', 'kitti_eval_coco_style'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/scannet_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .evaluate_semantic_instance import evaluate_matches, scannet_eval 3 | 4 | __all__ = ['scannet_eval', 'evaluate_matches'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/scannet_utils/util_3d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/util_3d.py # noqa 3 | import json 4 | 5 | import numpy as np 6 | 7 | 8 | class Instance: 9 | """Single instance for ScanNet evaluator. 10 | 11 | Args: 12 | mesh_vert_instances (np.array): Instance ids for each point. 13 | instance_id: Id of single instance. 14 | """ 15 | instance_id = 0 16 | label_id = 0 17 | vert_count = 0 18 | med_dist = -1 19 | dist_conf = 0.0 20 | 21 | def __init__(self, mesh_vert_instances, instance_id): 22 | if instance_id == -1: 23 | return 24 | self.instance_id = int(instance_id) 25 | self.label_id = int(self.get_label_id(instance_id)) 26 | self.vert_count = int( 27 | self.get_instance_verts(mesh_vert_instances, instance_id)) 28 | 29 | @staticmethod 30 | def get_label_id(instance_id): 31 | return int(instance_id // 1000) 32 | 33 | @staticmethod 34 | def get_instance_verts(mesh_vert_instances, instance_id): 35 | return (mesh_vert_instances == instance_id).sum() 36 | 37 | def to_json(self): 38 | return json.dumps( 39 | self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 40 | 41 | def to_dict(self): 42 | dict = {} 43 | dict['instance_id'] = self.instance_id 44 | dict['label_id'] = self.label_id 45 | dict['vert_count'] = self.vert_count 46 | dict['med_dist'] = self.med_dist 47 | dict['dist_conf'] = self.dist_conf 48 | return dict 49 | 50 | def from_json(self, data): 51 | self.instance_id = int(data['instance_id']) 52 | self.label_id = int(data['label_id']) 53 | self.vert_count = int(data['vert_count']) 54 | if 'med_dist' in data: 55 | self.med_dist = float(data['med_dist']) 56 | self.dist_conf = float(data['dist_conf']) 57 | 58 | def __str__(self): 59 | return '(' + str(self.instance_id) + ')' 60 | 61 | 62 | def get_instances(ids, class_ids, class_labels, id2label): 63 | """Transform gt instance mask to Instance objects. 64 | 65 | Args: 66 | ids (np.array): Instance ids for each point. 67 | class_ids: (tuple[int]): Ids of valid categories. 68 | class_labels (tuple[str]): Class names. 69 | id2label: (dict[int, str]): Mapping of valid class id to class label. 70 | 71 | Returns: 72 | dict [str, list]: Instance objects grouped by class label. 73 | """ 74 | instances = {} 75 | for label in class_labels: 76 | instances[label] = [] 77 | instance_ids = np.unique(ids) 78 | for id in instance_ids: 79 | if id == 0: 80 | continue 81 | inst = Instance(ids, id) 82 | if inst.label_id in class_ids: 83 | instances[id2label[inst.label_id]].append(inst.to_dict()) 84 | return instances 85 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/seg_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | from mmcv.utils import print_log 4 | from terminaltables import AsciiTable 5 | 6 | 7 | def fast_hist(preds, labels, num_classes): 8 | """Compute the confusion matrix for every batch. 9 | 10 | Args: 11 | preds (np.ndarray): Prediction labels of points with shape of 12 | (num_points, ). 13 | labels (np.ndarray): Ground truth labels of points with shape of 14 | (num_points, ). 15 | num_classes (int): number of classes 16 | 17 | Returns: 18 | np.ndarray: Calculated confusion matrix. 19 | """ 20 | 21 | k = (labels >= 0) & (labels < num_classes) 22 | bin_count = np.bincount( 23 | num_classes * labels[k].astype(int) + preds[k], 24 | minlength=num_classes**2) 25 | return bin_count[:num_classes**2].reshape(num_classes, num_classes) 26 | 27 | 28 | def per_class_iou(hist): 29 | """Compute the per class iou. 30 | 31 | Args: 32 | hist(np.ndarray): Overall confusion martix 33 | (num_classes, num_classes ). 34 | 35 | Returns: 36 | np.ndarray: Calculated per class iou 37 | """ 38 | 39 | return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) 40 | 41 | 42 | def get_acc(hist): 43 | """Compute the overall accuracy. 44 | 45 | Args: 46 | hist(np.ndarray): Overall confusion martix 47 | (num_classes, num_classes ). 48 | 49 | Returns: 50 | float: Calculated overall acc 51 | """ 52 | 53 | return np.diag(hist).sum() / hist.sum() 54 | 55 | 56 | def get_acc_cls(hist): 57 | """Compute the class average accuracy. 58 | 59 | Args: 60 | hist(np.ndarray): Overall confusion martix 61 | (num_classes, num_classes ). 62 | 63 | Returns: 64 | float: Calculated class average acc 65 | """ 66 | 67 | return np.nanmean(np.diag(hist) / hist.sum(axis=1)) 68 | 69 | 70 | def seg_eval(gt_labels, seg_preds, label2cat, ignore_index, logger=None): 71 | """Semantic Segmentation Evaluation. 72 | 73 | Evaluate the result of the Semantic Segmentation. 74 | 75 | Args: 76 | gt_labels (list[torch.Tensor]): Ground truth labels. 77 | seg_preds (list[torch.Tensor]): Predictions. 78 | label2cat (dict): Map from label to category name. 79 | ignore_index (int): Index that will be ignored in evaluation. 80 | logger (logging.Logger | str, optional): The way to print the mAP 81 | summary. See `mmdet.utils.print_log()` for details. Default: None. 82 | 83 | Returns: 84 | dict[str, float]: Dict of results. 85 | """ 86 | assert len(seg_preds) == len(gt_labels) 87 | num_classes = len(label2cat) 88 | 89 | hist_list = [] 90 | for i in range(len(gt_labels)): 91 | gt_seg = gt_labels[i].clone().numpy().astype(np.int) 92 | pred_seg = seg_preds[i].clone().numpy().astype(np.int) 93 | 94 | # filter out ignored points 95 | pred_seg[gt_seg == ignore_index] = -1 96 | gt_seg[gt_seg == ignore_index] = -1 97 | 98 | # calculate one instance result 99 | hist_list.append(fast_hist(pred_seg, gt_seg, num_classes)) 100 | 101 | iou = per_class_iou(sum(hist_list)) 102 | miou = np.nanmean(iou) 103 | acc = get_acc(sum(hist_list)) 104 | acc_cls = get_acc_cls(sum(hist_list)) 105 | 106 | header = ['classes'] 107 | for i in range(len(label2cat)): 108 | header.append(label2cat[i]) 109 | header.extend(['miou', 'acc', 'acc_cls']) 110 | 111 | ret_dict = dict() 112 | table_columns = [['results']] 113 | for i in range(len(label2cat)): 114 | ret_dict[label2cat[i]] = float(iou[i]) 115 | table_columns.append([f'{iou[i]:.4f}']) 116 | ret_dict['miou'] = float(miou) 117 | ret_dict['acc'] = float(acc) 118 | ret_dict['acc_cls'] = float(acc_cls) 119 | 120 | table_columns.append([f'{miou:.4f}']) 121 | table_columns.append([f'{acc:.4f}']) 122 | table_columns.append([f'{acc_cls:.4f}']) 123 | 124 | table_data = [header] 125 | table_rows = list(zip(*table_columns)) 126 | table_data += table_rows 127 | table = AsciiTable(table_data) 128 | table.inner_footing_row_border = True 129 | print_log('\n' + table.table, logger=logger) 130 | 131 | return ret_dict 132 | -------------------------------------------------------------------------------- /mmdet3d/core/evaluation/waymo_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .prediction_kitti_to_waymo import KITTI2Waymo 3 | 4 | __all__ = ['KITTI2Waymo'] 5 | -------------------------------------------------------------------------------- /mmdet3d/core/hook/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .ema import MEGVIIEMAHook 3 | from .utils import is_parallel 4 | from .sequentialcontrol import SequentialControlHook 5 | from .syncbncontrol import SyncbnControlHook 6 | 7 | __all__ = ['MEGVIIEMAHook', 'is_parallel', 'SequentialControlHook', 8 | 'SyncbnControlHook'] 9 | -------------------------------------------------------------------------------- /mmdet3d/core/hook/sequentialcontrol.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.runner.hooks import HOOKS, Hook 3 | from mmdet3d.core.hook.utils import is_parallel 4 | 5 | __all__ = ['SequentialControlHook'] 6 | 7 | 8 | @HOOKS.register_module() 9 | class SequentialControlHook(Hook): 10 | """ """ 11 | 12 | def __init__(self, temporal_start_epoch=1): 13 | super().__init__() 14 | self.temporal_start_epoch=temporal_start_epoch 15 | 16 | def set_temporal_flag(self, runner, flag): 17 | if is_parallel(runner.model.module): 18 | runner.model.module.module.with_prev=flag 19 | else: 20 | runner.model.module.with_prev = flag 21 | 22 | def before_run(self, runner): 23 | self.set_temporal_flag(runner, False) 24 | 25 | def before_train_epoch(self, runner): 26 | if runner.epoch > self.temporal_start_epoch: 27 | self.set_temporal_flag(runner, True) -------------------------------------------------------------------------------- /mmdet3d/core/hook/syncbncontrol.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.runner.hooks import HOOKS, Hook 3 | from mmdet3d.core.hook.utils import is_parallel 4 | from torch.nn import SyncBatchNorm 5 | 6 | __all__ = ['SyncbnControlHook'] 7 | 8 | 9 | @HOOKS.register_module() 10 | class SyncbnControlHook(Hook): 11 | """ """ 12 | 13 | def __init__(self, syncbn_start_epoch=1): 14 | super().__init__() 15 | self.is_syncbn=False 16 | self.syncbn_start_epoch = syncbn_start_epoch 17 | 18 | def cvt_syncbn(self, runner): 19 | if is_parallel(runner.model.module): 20 | runner.model.module.module=\ 21 | SyncBatchNorm.convert_sync_batchnorm(runner.model.module.module, 22 | process_group=None) 23 | else: 24 | pass 25 | # runner.model.module=\ 26 | # SyncBatchNorm.convert_sync_batchnorm(runner.model.module, 27 | # process_group=None) 28 | 29 | def before_train_epoch(self, runner): 30 | if runner.epoch>= self.syncbn_start_epoch and not self.is_syncbn: 31 | print('start use syncbn') 32 | self.cvt_syncbn(runner) 33 | self.is_syncbn=True 34 | 35 | -------------------------------------------------------------------------------- /mmdet3d/core/hook/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from torch import nn 3 | 4 | __all__ = ['is_parallel'] 5 | 6 | 7 | def is_parallel(model): 8 | """check if model is in parallel mode.""" 9 | parallel_type = ( 10 | nn.parallel.DataParallel, 11 | nn.parallel.DistributedDataParallel, 12 | ) 13 | return isinstance(model, parallel_type) 14 | -------------------------------------------------------------------------------- /mmdet3d/core/points/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | from .cam_points import CameraPoints 4 | from .depth_points import DepthPoints 5 | from .lidar_points import LiDARPoints 6 | 7 | __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints'] 8 | 9 | 10 | def get_points_type(points_type): 11 | """Get the class of points according to coordinate type. 12 | 13 | Args: 14 | points_type (str): The type of points coordinate. 15 | The valid value are "CAMERA", "LIDAR", or "DEPTH". 16 | 17 | Returns: 18 | class: Points type. 19 | """ 20 | if points_type == 'CAMERA': 21 | points_cls = CameraPoints 22 | elif points_type == 'LIDAR': 23 | points_cls = LiDARPoints 24 | elif points_type == 'DEPTH': 25 | points_cls = DepthPoints 26 | else: 27 | raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"' 28 | f' are supported, got {points_type}') 29 | 30 | return points_cls 31 | -------------------------------------------------------------------------------- /mmdet3d/core/points/cam_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class CameraPoints(BasePoints): 6 | """Points of instances in CAM coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(CameraPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 1 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 0] = -self.tensor[:, 0] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 2] = -self.tensor[:, 2] 39 | 40 | @property 41 | def bev(self): 42 | """torch.Tensor: BEV of the points in shape (N, 2).""" 43 | return self.tensor[:, [0, 2]] 44 | 45 | def convert_to(self, dst, rt_mat=None): 46 | """Convert self to ``dst`` mode. 47 | 48 | Args: 49 | dst (:obj:`CoordMode`): The target Point mode. 50 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 51 | translation matrix between different coordinates. 52 | Defaults to None. 53 | The conversion from `src` coordinates to `dst` coordinates 54 | usually comes along the change of sensors, e.g., from camera 55 | to LiDAR. This requires a transformation matrix. 56 | 57 | Returns: 58 | :obj:`BasePoints`: The converted point of the same type 59 | in the `dst` mode. 60 | """ 61 | from mmdet3d.core.bbox import Coord3DMode 62 | return Coord3DMode.convert_point( 63 | point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat) 64 | -------------------------------------------------------------------------------- /mmdet3d/core/points/depth_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class DepthPoints(BasePoints): 6 | """Points of instances in DEPTH coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(DepthPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 0] = -self.tensor[:, 0] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 1] = -self.tensor[:, 1] 39 | 40 | def convert_to(self, dst, rt_mat=None): 41 | """Convert self to ``dst`` mode. 42 | 43 | Args: 44 | dst (:obj:`CoordMode`): The target Point mode. 45 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 46 | translation matrix between different coordinates. 47 | Defaults to None. 48 | The conversion from `src` coordinates to `dst` coordinates 49 | usually comes along the change of sensors, e.g., from camera 50 | to LiDAR. This requires a transformation matrix. 51 | 52 | Returns: 53 | :obj:`BasePoints`: The converted point of the same type 54 | in the `dst` mode. 55 | """ 56 | from mmdet3d.core.bbox import Coord3DMode 57 | return Coord3DMode.convert_point( 58 | point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat) 59 | -------------------------------------------------------------------------------- /mmdet3d/core/points/lidar_points.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_points import BasePoints 3 | 4 | 5 | class LiDARPoints(BasePoints): 6 | """Points of instances in LIDAR coordinates. 7 | 8 | Args: 9 | tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. 10 | points_dim (int, optional): Number of the dimension of a point. 11 | Each row is (x, y, z). Defaults to 3. 12 | attribute_dims (dict, optional): Dictionary to indicate the 13 | meaning of extra dimension. Defaults to None. 14 | 15 | Attributes: 16 | tensor (torch.Tensor): Float matrix of N x points_dim. 17 | points_dim (int): Integer indicating the dimension of a point. 18 | Each row is (x, y, z, ...). 19 | attribute_dims (bool): Dictionary to indicate the meaning of extra 20 | dimension. Defaults to None. 21 | rotation_axis (int): Default rotation axis for points rotation. 22 | """ 23 | 24 | def __init__(self, tensor, points_dim=3, attribute_dims=None): 25 | super(LiDARPoints, self).__init__( 26 | tensor, points_dim=points_dim, attribute_dims=attribute_dims) 27 | self.rotation_axis = 2 28 | 29 | def flip(self, bev_direction='horizontal'): 30 | """Flip the points along given BEV direction. 31 | 32 | Args: 33 | bev_direction (str): Flip direction (horizontal or vertical). 34 | """ 35 | if bev_direction == 'horizontal': 36 | self.tensor[:, 1] = -self.tensor[:, 1] 37 | elif bev_direction == 'vertical': 38 | self.tensor[:, 0] = -self.tensor[:, 0] 39 | 40 | def convert_to(self, dst, rt_mat=None): 41 | """Convert self to ``dst`` mode. 42 | 43 | Args: 44 | dst (:obj:`CoordMode`): The target Point mode. 45 | rt_mat (np.ndarray | torch.Tensor, optional): The rotation and 46 | translation matrix between different coordinates. 47 | Defaults to None. 48 | The conversion from `src` coordinates to `dst` coordinates 49 | usually comes along the change of sensors, e.g., from camera 50 | to LiDAR. This requires a transformation matrix. 51 | 52 | Returns: 53 | :obj:`BasePoints`: The converted point of the same type 54 | in the `dst` mode. 55 | """ 56 | from mmdet3d.core.bbox import Coord3DMode 57 | return Coord3DMode.convert_point( 58 | point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat) 59 | -------------------------------------------------------------------------------- /mmdet3d/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores, 4 | multiclass_nms) 5 | from .box3d_nms import (aligned_3d_nms, box3d_multiclass_nms, circle_nms, 6 | nms_bev, nms_normal_bev) 7 | from .merge_augs import merge_aug_bboxes_3d 8 | 9 | __all__ = [ 10 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 11 | 'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms', 12 | 'aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms', 'nms_bev', 13 | 'nms_normal_bev' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from mmdet3d.core.post_processing import nms_bev, nms_normal_bev 5 | from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr 6 | 7 | 8 | def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg): 9 | """Merge augmented detection 3D bboxes and scores. 10 | 11 | Args: 12 | aug_results (list[dict]): The dict of detection results. 13 | The dict contains the following keys 14 | 15 | - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox. 16 | - scores_3d (torch.Tensor): Detection scores. 17 | - labels_3d (torch.Tensor): Predicted box labels. 18 | img_metas (list[dict]): Meta information of each sample. 19 | test_cfg (dict): Test config. 20 | 21 | Returns: 22 | dict: Bounding boxes results in cpu mode, containing merged results. 23 | 24 | - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox. 25 | - scores_3d (torch.Tensor): Merged detection scores. 26 | - labels_3d (torch.Tensor): Merged predicted box labels. 27 | """ 28 | 29 | assert len(aug_results) == len(img_metas), \ 30 | '"aug_results" should have the same length as "img_metas", got len(' \ 31 | f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}' 32 | 33 | recovered_bboxes = [] 34 | recovered_scores = [] 35 | recovered_labels = [] 36 | 37 | for bboxes, img_info in zip(aug_results, img_metas): 38 | scale_factor = img_info[0]['pcd_scale_factor'] 39 | pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip'] 40 | pcd_vertical_flip = img_info[0]['pcd_vertical_flip'] 41 | recovered_scores.append(bboxes['scores_3d']) 42 | recovered_labels.append(bboxes['labels_3d']) 43 | bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor, 44 | pcd_horizontal_flip, pcd_vertical_flip) 45 | recovered_bboxes.append(bboxes) 46 | 47 | aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes) 48 | aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev) 49 | aug_scores = torch.cat(recovered_scores, dim=0) 50 | aug_labels = torch.cat(recovered_labels, dim=0) 51 | 52 | # TODO: use a more elegent way to deal with nms 53 | if test_cfg.use_rotate_nms: 54 | nms_func = nms_bev 55 | else: 56 | nms_func = nms_normal_bev 57 | 58 | merged_bboxes = [] 59 | merged_scores = [] 60 | merged_labels = [] 61 | 62 | # Apply multi-class nms when merge bboxes 63 | if len(aug_labels) == 0: 64 | return bbox3d2result(aug_bboxes, aug_scores, aug_labels) 65 | 66 | for class_id in range(torch.max(aug_labels).item() + 1): 67 | class_inds = (aug_labels == class_id) 68 | bboxes_i = aug_bboxes[class_inds] 69 | bboxes_nms_i = aug_bboxes_for_nms[class_inds, :] 70 | scores_i = aug_scores[class_inds] 71 | labels_i = aug_labels[class_inds] 72 | if len(bboxes_nms_i) == 0: 73 | continue 74 | selected = nms_func(bboxes_nms_i, scores_i, test_cfg.nms_thr) 75 | 76 | merged_bboxes.append(bboxes_i[selected, :]) 77 | merged_scores.append(scores_i[selected]) 78 | merged_labels.append(labels_i[selected]) 79 | 80 | merged_bboxes = merged_bboxes[0].cat(merged_bboxes) 81 | merged_scores = torch.cat(merged_scores, dim=0) 82 | merged_labels = torch.cat(merged_labels, dim=0) 83 | 84 | _, order = merged_scores.sort(0, descending=True) 85 | num = min(test_cfg.max_num, len(aug_bboxes)) 86 | order = order[:num] 87 | 88 | merged_bboxes = merged_bboxes[order] 89 | merged_scores = merged_scores[order] 90 | merged_labels = merged_labels[order] 91 | 92 | return bbox3d2result(merged_bboxes, merged_scores, merged_labels) 93 | -------------------------------------------------------------------------------- /mmdet3d/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .array_converter import ArrayConverter, array_converter 3 | from .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d, 4 | gaussian_radius, get_ellip_gaussian_2D) 5 | 6 | __all__ = [ 7 | 'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian', 8 | 'ArrayConverter', 'array_converter', 'ellip_gaussian2D', 9 | 'get_ellip_gaussian_2D' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/core/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .show_result import (show_multi_modality_result, show_result, 3 | show_seg_result) 4 | 5 | __all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result'] 6 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_voxel_generator 3 | from .voxel_generator import VoxelGenerator 4 | 5 | __all__ = ['build_voxel_generator', 'VoxelGenerator'] 6 | -------------------------------------------------------------------------------- /mmdet3d/core/voxel/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | 4 | from . import voxel_generator 5 | 6 | 7 | def build_voxel_generator(cfg, **kwargs): 8 | """Builder of voxel generator.""" 9 | if isinstance(cfg, voxel_generator.VoxelGenerator): 10 | return cfg 11 | elif isinstance(cfg, dict): 12 | return mmcv.runner.obj_from_dict( 13 | cfg, voxel_generator, default_args=kwargs) 14 | else: 15 | raise TypeError('Invalid type {} for building a sampler'.format( 16 | type(cfg))) 17 | -------------------------------------------------------------------------------- /mmdet3d/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.datasets.builder import build_dataloader 3 | from .builder import DATASETS, PIPELINES, build_dataset 4 | from .custom_3d import Custom3DDataset 5 | from .nuscenes_dataset import NuScenesDataset 6 | from .nuscenes_dataset_occ import NuScenesDatasetOccpancy 7 | # yapf: disable 8 | from .pipelines import (AffineResize, BackgroundPointsFilter, GlobalAlignment, 9 | GlobalRotScaleTrans, IndoorPatchPointSample, 10 | IndoorPointSample, LoadAnnotations3D, 11 | LoadPointsFromDict, LoadPointsFromFile, 12 | LoadPointsFromMultiSweeps, MultiViewWrapper, 13 | NormalizePointsColor, ObjectNameFilter, ObjectNoise, 14 | ObjectRangeFilter, ObjectSample, PointSample, 15 | PointShuffle, PointsRangeFilter, RandomDropPointsColor, 16 | RandomFlip3D, RandomJitterPoints, RandomRotate, 17 | RandomShiftScale, RangeLimitedRandomCrop, 18 | VoxelBasedPointSampler) 19 | # yapf: enable 20 | from .utils import get_loading_pipeline 21 | 22 | __all__ = [ 23 | 'build_dataloader', 'DATASETS', 24 | 'build_dataset', 'NuScenesDataset', 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 25 | 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 26 | 'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample', 27 | 'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'Custom3DDataset', 28 | 'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter', 29 | 'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor', 30 | 'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize', 31 | 'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES', 32 | 'RangeLimitedRandomCrop', 'RandomRotate', 'MultiViewWrapper', 33 | 'NuScenesDatasetOccpancy' 34 | ] 35 | -------------------------------------------------------------------------------- /mmdet3d/datasets/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import platform 3 | 4 | from mmcv.utils import Registry, build_from_cfg 5 | 6 | from mmdet.datasets import DATASETS as MMDET_DATASETS 7 | from mmdet.datasets.builder import _concat_dataset 8 | 9 | if platform.system() != 'Windows': 10 | # https://github.com/pytorch/pytorch/issues/973 11 | import resource 12 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 13 | base_soft_limit = rlimit[0] 14 | hard_limit = rlimit[1] 15 | soft_limit = min(max(4096, base_soft_limit), hard_limit) 16 | resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) 17 | 18 | OBJECTSAMPLERS = Registry('Object sampler') 19 | DATASETS = Registry('dataset') 20 | PIPELINES = Registry('pipeline') 21 | 22 | 23 | def build_dataset(cfg, default_args=None): 24 | from mmdet3d.datasets.dataset_wrappers import CBGSDataset 25 | from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset, 26 | ConcatDataset, RepeatDataset) 27 | if isinstance(cfg, (list, tuple)): 28 | dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) 29 | elif cfg['type'] == 'ConcatDataset': 30 | dataset = ConcatDataset( 31 | [build_dataset(c, default_args) for c in cfg['datasets']], 32 | cfg.get('separate_eval', True)) 33 | elif cfg['type'] == 'RepeatDataset': 34 | dataset = RepeatDataset( 35 | build_dataset(cfg['dataset'], default_args), cfg['times']) 36 | elif cfg['type'] == 'ClassBalancedDataset': 37 | dataset = ClassBalancedDataset( 38 | build_dataset(cfg['dataset'], default_args), cfg['oversample_thr']) 39 | elif cfg['type'] == 'CBGSDataset': 40 | dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args)) 41 | elif isinstance(cfg.get('ann_file'), (list, tuple)): 42 | dataset = _concat_dataset(cfg, default_args) 43 | elif cfg['type'] in DATASETS._module_dict.keys(): 44 | dataset = build_from_cfg(cfg, DATASETS, default_args) 45 | else: 46 | dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args) 47 | return dataset 48 | -------------------------------------------------------------------------------- /mmdet3d/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | 4 | from .builder import DATASETS 5 | 6 | 7 | @DATASETS.register_module() 8 | class CBGSDataset(object): 9 | """A wrapper of class sampled dataset with ann_file path. Implementation of 10 | paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object 11 | Detection `_. 12 | 13 | Balance the number of scenes under different classes. 14 | 15 | Args: 16 | dataset (:obj:`CustomDataset`): The dataset to be class sampled. 17 | """ 18 | 19 | def __init__(self, dataset): 20 | self.dataset = dataset 21 | self.CLASSES = dataset.CLASSES 22 | self.cat2id = {name: i for i, name in enumerate(self.CLASSES)} 23 | self.sample_indices = self._get_sample_indices() 24 | # self.dataset.data_infos = self.data_infos 25 | if hasattr(self.dataset, 'flag'): 26 | self.flag = np.array( 27 | [self.dataset.flag[ind] for ind in self.sample_indices], 28 | dtype=np.uint8) 29 | 30 | def _get_sample_indices(self): 31 | """Load annotations from ann_file. 32 | 33 | Args: 34 | ann_file (str): Path of the annotation file. 35 | 36 | Returns: 37 | list[dict]: List of annotations after class sampling. 38 | """ 39 | class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()} 40 | for idx in range(len(self.dataset)): 41 | sample_cat_ids = self.dataset.get_cat_ids(idx) 42 | for cat_id in sample_cat_ids: 43 | class_sample_idxs[cat_id].append(idx) 44 | duplicated_samples = sum( 45 | [len(v) for _, v in class_sample_idxs.items()]) 46 | class_distribution = { 47 | k: len(v) / duplicated_samples 48 | for k, v in class_sample_idxs.items() 49 | } 50 | 51 | sample_indices = [] 52 | 53 | frac = 1.0 / len(self.CLASSES) 54 | ratios = [frac / v for v in class_distribution.values()] 55 | for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios): 56 | sample_indices += np.random.choice(cls_inds, 57 | int(len(cls_inds) * 58 | ratio)).tolist() 59 | return sample_indices 60 | 61 | def __getitem__(self, idx): 62 | """Get item from infos according to the given index. 63 | 64 | Returns: 65 | dict: Data dictionary of the corresponding index. 66 | """ 67 | ori_idx = self.sample_indices[idx] 68 | return self.dataset[ori_idx] 69 | 70 | def __len__(self): 71 | """Return the length of data infos. 72 | 73 | Returns: 74 | int: Length of data infos. 75 | """ 76 | return len(self.sample_indices) 77 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .compose import Compose 3 | from .dbsampler import DataBaseSampler 4 | from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D 5 | from .loading import (LoadAnnotations3D, LoadAnnotationsBEVDepth, 6 | LoadImageFromFileMono3D, LoadMultiViewImageFromFiles, 7 | LoadPointsFromDict, LoadPointsFromFile, 8 | LoadPointsFromMultiSweeps, NormalizePointsColor, 9 | PointSegClassMapping, PointToMultiViewDepth, 10 | PrepareImageInputs, LoadOccGTFromFile, 11 | LoadAdjacentPointsFromFile, GenerateRays, LoadFlowGT) 12 | from .test_time_aug import MultiScaleFlipAug3D 13 | # yapf: disable 14 | from .transforms_3d import (AffineResize, BackgroundPointsFilter, 15 | GlobalAlignment, GlobalRotScaleTrans, 16 | IndoorPatchPointSample, IndoorPointSample, 17 | MultiViewWrapper, ObjectNameFilter, ObjectNoise, 18 | ObjectRangeFilter, ObjectSample, PointSample, 19 | PointShuffle, PointsRangeFilter, 20 | RandomDropPointsColor, RandomFlip3D, 21 | RandomJitterPoints, RandomRotate, RandomShiftScale, 22 | RangeLimitedRandomCrop, VoxelBasedPointSampler) 23 | 24 | __all__ = [ 25 | 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 26 | 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 27 | 'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile', 28 | 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 29 | 'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample', 30 | 'PointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D', 31 | 'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter', 32 | 'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample', 33 | 'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor', 34 | 'RandomJitterPoints', 'AffineResize', 'RandomShiftScale', 35 | 'LoadPointsFromDict', 'MultiViewWrapper', 'RandomRotate', 36 | 'RangeLimitedRandomCrop', 'PrepareImageInputs', 37 | 'LoadAnnotationsBEVDepth', 'PointToMultiViewDepth', 38 | 'LoadOccGTFromFile', 'LoadAdjacentPointsFromFile', 'GenerateRays', 'LoadFlowGT' 39 | ] 40 | -------------------------------------------------------------------------------- /mmdet3d/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import collections 3 | 4 | from mmcv.utils import build_from_cfg 5 | 6 | from mmdet.datasets.builder import PIPELINES as MMDET_PIPELINES 7 | from ..builder import PIPELINES 8 | 9 | 10 | @PIPELINES.register_module() 11 | class Compose: 12 | """Compose multiple transforms sequentially. The pipeline registry of 13 | mmdet3d separates with mmdet, however, sometimes we may need to use mmdet's 14 | pipeline. So the class is rewritten to be able to use pipelines from both 15 | mmdet3d and mmdet. 16 | 17 | Args: 18 | transforms (Sequence[dict | callable]): Sequence of transform object or 19 | config dict to be composed. 20 | """ 21 | 22 | def __init__(self, transforms): 23 | assert isinstance(transforms, collections.abc.Sequence) 24 | self.transforms = [] 25 | for transform in transforms: 26 | if isinstance(transform, dict): 27 | _, key = PIPELINES.split_scope_key(transform['type']) 28 | if key in PIPELINES._module_dict.keys(): 29 | transform = build_from_cfg(transform, PIPELINES) 30 | else: 31 | transform = build_from_cfg(transform, MMDET_PIPELINES) 32 | self.transforms.append(transform) 33 | elif callable(transform): 34 | self.transforms.append(transform) 35 | else: 36 | raise TypeError('transform must be callable or a dict') 37 | 38 | def __call__(self, data): 39 | """Call function to apply transforms sequentially. 40 | 41 | Args: 42 | data (dict): A result dict contains the data to transform. 43 | 44 | Returns: 45 | dict: Transformed data. 46 | """ 47 | 48 | for t in self.transforms: 49 | data = t(data) 50 | if data is None: 51 | return None 52 | return data 53 | 54 | def __repr__(self): 55 | format_string = self.__class__.__name__ + '(' 56 | for t in self.transforms: 57 | format_string += '\n' 58 | format_string += f' {t}' 59 | format_string += '\n)' 60 | return format_string 61 | -------------------------------------------------------------------------------- /mmdet3d/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # noqa: F401,F403 3 | from .occflownet_modules import * # noqa: F401,F403 4 | from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES, 5 | MIDDLE_ENCODERS, NECKS, ROI_EXTRACTORS, SEGMENTORS, 6 | SHARED_HEADS, VOXEL_ENCODERS, build_backbone, 7 | build_detector, build_fusion_layer, build_head, 8 | build_loss, build_middle_encoder, build_model, 9 | build_neck, build_roi_extractor, build_shared_head, 10 | build_voxel_encoder) 11 | from .detectors import * # noqa: F401,F403 12 | from .losses import * # noqa: F401,F403 13 | from .necks import * # noqa: F401,F403 14 | __all__ = [ 15 | 'ACTIVATIONS', 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 16 | 'DETECTORS', 'SEGMENTORS', 'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 17 | 'FUSION_LAYERS', 'build_backbone', 'build_neck', 'build_roi_extractor', 18 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector', 19 | 'build_fusion_layer', 'build_model', 'build_middle_encoder', 20 | 'build_voxel_encoder' 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet3d/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt 3 | from .resnet import CustomResNet, CustomResNet3D 4 | from .swin import SwinTransformer 5 | 6 | __all__ = [ 7 | 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 8 | 'CustomResNet', 'CustomResNet3D', 'SwinTransformer' 9 | ] -------------------------------------------------------------------------------- /mmdet3d/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import Base3DDetector 3 | from .bevdet import BEVDepth4D, BEVDet, BEVDet4D, BEVDetTRT, BEVStereo4D 4 | from .centerpoint import CenterPoint 5 | from .mvx_two_stage import MVXTwoStageDetector 6 | from .occflownet import OccFlowNet 7 | 8 | __all__ = [ 9 | 'Base3DDetector', 'MVXTwoStageDetector', 'CenterPoint', 'OccFlowNet', 10 | 'BEVDet', 'BEVDet4D', 'BEVDepth4D', 'BEVStereo4D' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy 3 | from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss 4 | from .chamfer_distance import ChamferDistance, chamfer_distance 5 | from .multibin_loss import MultiBinLoss 6 | from .paconv_regularization_loss import PAConvRegularizationLoss 7 | from .rotated_iou_loss import RotatedIoU3DLoss 8 | from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss 9 | from .silog_loss import SiLogLoss 10 | from .huber_loss import HuberLoss 11 | from .tv_loss import TVLoss3D 12 | from .bce_loss import MMBCELoss 13 | 14 | __all__ = [ 15 | 'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance', 16 | 'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss', 17 | 'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss', 18 | 'MultiBinLoss', 'RotatedIoU3DLoss', 'SiLogLoss', 'HuberLoss', 'TVLoss3D', 'MMBCELoss' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/axis_aligned_iou_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from torch import nn as nn 4 | 5 | from mmdet.models.losses.utils import weighted_loss 6 | from ...core.bbox import AxisAlignedBboxOverlaps3D 7 | from ..builder import LOSSES 8 | 9 | 10 | @weighted_loss 11 | def axis_aligned_iou_loss(pred, target): 12 | """Calculate the IoU loss (1-IoU) of two sets of axis aligned bounding 13 | boxes. Note that predictions and targets are one-to-one corresponded. 14 | 15 | Args: 16 | pred (torch.Tensor): Bbox predictions with shape [..., 6] 17 | (x1, y1, z1, x2, y2, z2). 18 | target (torch.Tensor): Bbox targets (gt) with shape [..., 6] 19 | (x1, y1, z1, x2, y2, z2). 20 | 21 | Returns: 22 | torch.Tensor: IoU loss between predictions and targets. 23 | """ 24 | axis_aligned_iou = AxisAlignedBboxOverlaps3D()( 25 | pred, target, is_aligned=True) 26 | iou_loss = 1 - axis_aligned_iou 27 | return iou_loss 28 | 29 | 30 | @LOSSES.register_module() 31 | class AxisAlignedIoULoss(nn.Module): 32 | """Calculate the IoU loss (1-IoU) of axis aligned bounding boxes. 33 | 34 | Args: 35 | reduction (str): Method to reduce losses. 36 | The valid reduction method are none, sum or mean. 37 | loss_weight (float, optional): Weight of loss. Defaults to 1.0. 38 | """ 39 | 40 | def __init__(self, reduction='mean', loss_weight=1.0): 41 | super(AxisAlignedIoULoss, self).__init__() 42 | assert reduction in ['none', 'sum', 'mean'] 43 | self.reduction = reduction 44 | self.loss_weight = loss_weight 45 | 46 | def forward(self, 47 | pred, 48 | target, 49 | weight=None, 50 | avg_factor=None, 51 | reduction_override=None, 52 | **kwargs): 53 | """Forward function of loss calculation. 54 | 55 | Args: 56 | pred (torch.Tensor): Bbox predictions with shape [..., 6] 57 | (x1, y1, z1, x2, y2, z2). 58 | target (torch.Tensor): Bbox targets (gt) with shape [..., 6] 59 | (x1, y1, z1, x2, y2, z2). 60 | weight (torch.Tensor | float, optional): Weight of loss. 61 | Defaults to None. 62 | avg_factor (int, optional): Average factor that is used to average 63 | the loss. Defaults to None. 64 | reduction_override (str, optional): Method to reduce losses. 65 | The valid reduction method are 'none', 'sum' or 'mean'. 66 | Defaults to None. 67 | 68 | Returns: 69 | torch.Tensor: IoU loss between predictions and targets. 70 | """ 71 | assert reduction_override in (None, 'none', 'mean', 'sum') 72 | reduction = ( 73 | reduction_override if reduction_override else self.reduction) 74 | if (weight is not None) and (not torch.any(weight > 0)) and ( 75 | reduction != 'none'): 76 | return (pred * weight).sum() 77 | return axis_aligned_iou_loss( 78 | pred, 79 | target, 80 | weight=weight, 81 | avg_factor=avg_factor, 82 | reduction=reduction) * self.loss_weight 83 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/bce_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | from ..builder import LOSSES 5 | import torch.nn as nn 6 | 7 | @LOSSES.register_module() 8 | class MMBCELoss(nn.Module): 9 | def __init__(self, loss_weight=1.0, loss_name='loss_bce'): 10 | super().__init__() 11 | self.loss_weight = loss_weight 12 | self._loss_name = loss_name 13 | self.loss_fn = nn.BCELoss() 14 | 15 | def forward(self, pred, target, **kwargs): 16 | return self.loss_weight * self.loss_fn(pred, target) -------------------------------------------------------------------------------- /mmdet3d/models/losses/dist_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | from mmdet.models.builder import LOSSES 5 | import torch.nn as nn 6 | from torch_efficient_distloss import eff_distloss 7 | 8 | @LOSSES.register_module() 9 | class DistortionLoss(nn.Module): 10 | def __init__(self, 11 | loss_weight=0.01, 12 | loss_name='loss_dist'): 13 | super().__init__() 14 | self.loss_weight = loss_weight 15 | self._loss_name = loss_name 16 | 17 | def forward(self, weights, distances, intervals): 18 | ''' 19 | Efficient O(N) realization of distortion loss. 20 | There are B rays each with N sampled points. 21 | weights: Float tensor in shape [B,N]. Volume rendering weights of each point. 22 | distances: Float tensor in shape [B,N]. Midpoint distance to camera of each point. 23 | intervals: Float tensor in shape [B,N]. The query interval of each point. 24 | ''' 25 | 26 | loss = self.loss_weight * eff_distloss(weights, distances, intervals) 27 | return loss 28 | 29 | @property 30 | def loss_name(self): 31 | return self._loss_name -------------------------------------------------------------------------------- /mmdet3d/models/losses/huber_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | from ..builder import LOSSES 5 | import torch.nn as nn 6 | 7 | @LOSSES.register_module() 8 | class HuberLoss(nn.Module): 9 | def __init__(self, loss_weight=1.0, delta=1.0, loss_name='loss_huber'): 10 | super().__init__() 11 | self.loss_weight = loss_weight 12 | self._loss_name = loss_name 13 | self.loss_fn = nn.HuberLoss(delta=delta) 14 | 15 | def forward(self, pred, target, **kwargs): 16 | return self.loss_weight * self.loss_fn(pred, target) -------------------------------------------------------------------------------- /mmdet3d/models/losses/multibin_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from torch import nn as nn 4 | from torch.nn import functional as F 5 | 6 | from mmdet.models.losses.utils import weighted_loss 7 | from ..builder import LOSSES 8 | 9 | 10 | @weighted_loss 11 | def multibin_loss(pred_orientations, gt_orientations, num_dir_bins=4): 12 | """Multi-Bin Loss. 13 | 14 | Args: 15 | pred_orientations(torch.Tensor): Predicted local vector 16 | orientation in [axis_cls, head_cls, sin, cos] format. 17 | shape (N, num_dir_bins * 4) 18 | gt_orientations(torch.Tensor): Corresponding gt bboxes, 19 | shape (N, num_dir_bins * 2). 20 | num_dir_bins(int, optional): Number of bins to encode 21 | direction angle. 22 | Defaults: 4. 23 | 24 | Return: 25 | torch.Tensor: Loss tensor. 26 | """ 27 | cls_losses = 0 28 | reg_losses = 0 29 | reg_cnt = 0 30 | for i in range(num_dir_bins): 31 | # bin cls loss 32 | cls_ce_loss = F.cross_entropy( 33 | pred_orientations[:, (i * 2):(i * 2 + 2)], 34 | gt_orientations[:, i].long(), 35 | reduction='mean') 36 | # regression loss 37 | valid_mask_i = (gt_orientations[:, i] == 1) 38 | cls_losses += cls_ce_loss 39 | if valid_mask_i.sum() > 0: 40 | start = num_dir_bins * 2 + i * 2 41 | end = start + 2 42 | pred_offset = F.normalize(pred_orientations[valid_mask_i, 43 | start:end]) 44 | gt_offset_sin = torch.sin(gt_orientations[valid_mask_i, 45 | num_dir_bins + i]) 46 | gt_offset_cos = torch.cos(gt_orientations[valid_mask_i, 47 | num_dir_bins + i]) 48 | reg_loss = \ 49 | F.l1_loss(pred_offset[:, 0], gt_offset_sin, 50 | reduction='none') + \ 51 | F.l1_loss(pred_offset[:, 1], gt_offset_cos, 52 | reduction='none') 53 | 54 | reg_losses += reg_loss.sum() 55 | reg_cnt += valid_mask_i.sum() 56 | 57 | return cls_losses / num_dir_bins + reg_losses / reg_cnt 58 | 59 | 60 | @LOSSES.register_module() 61 | class MultiBinLoss(nn.Module): 62 | """Multi-Bin Loss for orientation. 63 | 64 | Args: 65 | reduction (str, optional): The method to reduce the loss. 66 | Options are 'none', 'mean' and 'sum'. Defaults to 'none'. 67 | loss_weight (float, optional): The weight of loss. Defaults 68 | to 1.0. 69 | """ 70 | 71 | def __init__(self, reduction='none', loss_weight=1.0): 72 | super(MultiBinLoss, self).__init__() 73 | assert reduction in ['none', 'sum', 'mean'] 74 | self.reduction = reduction 75 | self.loss_weight = loss_weight 76 | 77 | def forward(self, pred, target, num_dir_bins, reduction_override=None): 78 | """Forward function. 79 | 80 | Args: 81 | pred (torch.Tensor): The prediction. 82 | target (torch.Tensor): The learning target of the prediction. 83 | num_dir_bins (int): Number of bins to encode direction angle. 84 | reduction_override (str, optional): The reduction method used to 85 | override the original reduction method of the loss. 86 | Defaults to None. 87 | """ 88 | assert reduction_override in (None, 'none', 'mean', 'sum') 89 | reduction = ( 90 | reduction_override if reduction_override else self.reduction) 91 | loss = self.loss_weight * multibin_loss( 92 | pred, target, num_dir_bins=num_dir_bins, reduction=reduction) 93 | return loss 94 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/rotated_iou_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.ops import diff_iou_rotated_3d 4 | from torch import nn as nn 5 | 6 | from mmdet.models.losses.utils import weighted_loss 7 | from ..builder import LOSSES 8 | 9 | 10 | @weighted_loss 11 | def rotated_iou_3d_loss(pred, target): 12 | """Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes. 13 | Note that predictions and targets are one-to-one corresponded. 14 | 15 | Args: 16 | pred (torch.Tensor): Bbox predictions with shape [N, 7] 17 | (x, y, z, w, l, h, alpha). 18 | target (torch.Tensor): Bbox targets (gt) with shape [N, 7] 19 | (x, y, z, w, l, h, alpha). 20 | 21 | Returns: 22 | torch.Tensor: IoU loss between predictions and targets. 23 | """ 24 | iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0), 25 | target.unsqueeze(0))[0] 26 | return iou_loss 27 | 28 | 29 | @LOSSES.register_module() 30 | class RotatedIoU3DLoss(nn.Module): 31 | """Calculate the IoU loss (1-IoU) of rotated bounding boxes. 32 | 33 | Args: 34 | reduction (str): Method to reduce losses. 35 | The valid reduction method are none, sum or mean. 36 | loss_weight (float, optional): Weight of loss. Defaults to 1.0. 37 | """ 38 | 39 | def __init__(self, reduction='mean', loss_weight=1.0): 40 | super().__init__() 41 | self.reduction = reduction 42 | self.loss_weight = loss_weight 43 | 44 | def forward(self, 45 | pred, 46 | target, 47 | weight=None, 48 | avg_factor=None, 49 | reduction_override=None, 50 | **kwargs): 51 | """Forward function of loss calculation. 52 | 53 | Args: 54 | pred (torch.Tensor): Bbox predictions with shape [..., 7] 55 | (x, y, z, w, l, h, alpha). 56 | target (torch.Tensor): Bbox targets (gt) with shape [..., 7] 57 | (x, y, z, w, l, h, alpha). 58 | weight (torch.Tensor | float, optional): Weight of loss. 59 | Defaults to None. 60 | avg_factor (int, optional): Average factor that is used to average 61 | the loss. Defaults to None. 62 | reduction_override (str, optional): Method to reduce losses. 63 | The valid reduction method are 'none', 'sum' or 'mean'. 64 | Defaults to None. 65 | 66 | Returns: 67 | torch.Tensor: IoU loss between predictions and targets. 68 | """ 69 | if weight is not None and not torch.any(weight > 0): 70 | return pred.sum() * weight.sum() # 0 71 | assert reduction_override in (None, 'none', 'mean', 'sum') 72 | reduction = ( 73 | reduction_override if reduction_override else self.reduction) 74 | if weight is not None and weight.dim() > 1: 75 | weight = weight.mean(-1) 76 | loss = self.loss_weight * rotated_iou_3d_loss( 77 | pred, 78 | target, 79 | weight, 80 | reduction=reduction, 81 | avg_factor=avg_factor, 82 | **kwargs) 83 | 84 | return loss 85 | -------------------------------------------------------------------------------- /mmdet3d/models/losses/silog_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | from ..builder import LOSSES 5 | import torch 6 | import torch.nn as nn 7 | 8 | def silog_loss(pred, target, lambd=.85): 9 | d = torch.log(pred + 1e-7) - torch.log(target) 10 | return torch.sqrt((d ** 2).mean() - lambd * (d.mean() ** 2)) 11 | # return torch.sqrt((d ** 2).mean() - lambd * (d.mean() ** 2)) * 10.0 12 | 13 | @LOSSES.register_module() 14 | class SiLogLoss(nn.Module): 15 | def __init__(self, loss_weight=1.0, lambd=.85, loss_name='loss_silog'): 16 | super().__init__() 17 | self.loss_weight = loss_weight 18 | self._loss_name = loss_name 19 | self.lambd = lambd 20 | def forward(self, pred, target, **kwargs): 21 | return self.loss_weight * silog_loss(pred, target, lambd=self.lambd) -------------------------------------------------------------------------------- /mmdet3d/models/losses/tv_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | from mmdet.models.builder import LOSSES 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | def tv_3d(voxels, weight): 10 | # bs, Z, H, W, C = voxels.size() 11 | tv_z = torch.pow(voxels[:, 1:, :, :, :] - voxels[:, :-1, :, :, :], 2).sum() 12 | tv_h = torch.pow(voxels[:, :, 1:, :, :] - voxels[:, :, :-1, :, :], 2).sum() 13 | tv_w = torch.pow(voxels[:, :, :, 1:, :] - voxels[:, :, :, :-1, :], 2).sum() 14 | return weight * (tv_z + tv_h + tv_w) / voxels.numel() 15 | 16 | @LOSSES.register_module() 17 | class TVLoss3D(nn.Module): 18 | def __init__(self, loss_weight=0.01, loss_name='loss_tv', density_weight = 5, semantics_weight = .2): 19 | super().__init__() 20 | self.loss_weight = loss_weight 21 | self._loss_name = loss_name 22 | self.density_weight = density_weight 23 | self.semantics_weight = semantics_weight 24 | def forward(self, voxel_outs): 25 | 26 | density_tv = tv_3d(voxel_outs[0], self.density_weight) 27 | semantics_tv = tv_3d(voxel_outs[1], self.semantics_weight) 28 | return self.loss_weight * (density_tv + semantics_tv) 29 | 30 | @property 31 | def loss_name(self): 32 | return self._loss_name 33 | 34 | -------------------------------------------------------------------------------- /mmdet3d/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.necks.fpn import FPN 3 | from .fpn import CustomFPN 4 | from .lss_fpn import FPN_LSS 5 | from .second_fpn import SECONDFPN 6 | from .view_transformer import LSSViewTransformer, LSSViewTransformerBEVDepth, \ 7 | LSSViewTransformerBEVStereo 8 | 9 | __all__ = [ 10 | 'FPN', 'SECONDFPN', 'LSSViewTransformer', 'CustomFPN', 'FPN_LSS', 'LSSViewTransformerBEVDepth', 11 | 'LSSViewTransformerBEVStereo' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet3d/models/necks/second_fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch 4 | from mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer 5 | from mmcv.runner import BaseModule, auto_fp16 6 | from torch import nn as nn 7 | 8 | from ..builder import NECKS 9 | 10 | 11 | @NECKS.register_module() 12 | class SECONDFPN(BaseModule): 13 | """FPN used in SECOND/PointPillars/PartA2/MVXNet. 14 | 15 | Args: 16 | in_channels (list[int]): Input channels of multi-scale feature maps. 17 | out_channels (list[int]): Output channels of feature maps. 18 | upsample_strides (list[int]): Strides used to upsample the 19 | feature maps. 20 | norm_cfg (dict): Config dict of normalization layers. 21 | upsample_cfg (dict): Config dict of upsample layers. 22 | conv_cfg (dict): Config dict of conv layers. 23 | use_conv_for_no_stride (bool): Whether to use conv when stride is 1. 24 | """ 25 | 26 | def __init__(self, 27 | in_channels=[128, 128, 256], 28 | out_channels=[256, 256, 256], 29 | upsample_strides=[1, 2, 4], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | conv_cfg=dict(type='Conv2d', bias=False), 33 | use_conv_for_no_stride=False, 34 | init_cfg=None): 35 | # if for GroupNorm, 36 | # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True) 37 | super(SECONDFPN, self).__init__(init_cfg=init_cfg) 38 | assert len(out_channels) == len(upsample_strides) == len(in_channels) 39 | self.in_channels = in_channels 40 | self.out_channels = out_channels 41 | self.fp16_enabled = False 42 | 43 | deblocks = [] 44 | for i, out_channel in enumerate(out_channels): 45 | stride = upsample_strides[i] 46 | if stride > 1 or (stride == 1 and not use_conv_for_no_stride): 47 | upsample_layer = build_upsample_layer( 48 | upsample_cfg, 49 | in_channels=in_channels[i], 50 | out_channels=out_channel, 51 | kernel_size=upsample_strides[i], 52 | stride=upsample_strides[i]) 53 | else: 54 | stride = np.round(1 / stride).astype(np.int64) 55 | upsample_layer = build_conv_layer( 56 | conv_cfg, 57 | in_channels=in_channels[i], 58 | out_channels=out_channel, 59 | kernel_size=stride, 60 | stride=stride) 61 | 62 | deblock = nn.Sequential(upsample_layer, 63 | build_norm_layer(norm_cfg, out_channel)[1], 64 | nn.ReLU(inplace=True)) 65 | deblocks.append(deblock) 66 | self.deblocks = nn.ModuleList(deblocks) 67 | 68 | if init_cfg is None: 69 | self.init_cfg = [ 70 | dict(type='Kaiming', layer='ConvTranspose2d'), 71 | dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0) 72 | ] 73 | 74 | @auto_fp16() 75 | def forward(self, x): 76 | """Forward function. 77 | 78 | Args: 79 | x (torch.Tensor): 4D Tensor in (N, C, H, W) shape. 80 | 81 | Returns: 82 | list[torch.Tensor]: Multi-level feature maps. 83 | """ 84 | assert len(x) == len(self.in_channels) 85 | ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)] 86 | 87 | if len(ups) > 1: 88 | out = torch.cat(ups, dim=1) 89 | else: 90 | out = ups[0] 91 | return [out] 92 | -------------------------------------------------------------------------------- /mmdet3d/models/occflownet_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | from .nerf_decoder import PointDecoder 5 | from .renderer import Renderer, RenderModule 6 | from .hooks import CustomCosineAnealingLrUpdaterHook 7 | 8 | __all__ = [ 9 | "Renderer", "RenderModule", "PointDecoder", "CustomCosineAnealingLrUpdaterHook" 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet3d/models/occflownet_modules/hooks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | from mmcv import runner 5 | 6 | from mmcv.runner.hooks import HOOKS, CosineAnnealingLrUpdaterHook 7 | from mmcv.runner.hooks.lr_updater import annealing_cos 8 | 9 | @HOOKS.register_module() 10 | class CustomCosineAnealingLrUpdaterHook(CosineAnnealingLrUpdaterHook): 11 | 12 | def __init__(self, 13 | start_at: int = 0, 14 | **kwargs) -> None: 15 | self.start_at = start_at 16 | super().__init__(**kwargs) 17 | 18 | def get_lr(self, runner: 'runner.BaseRunner', base_lr: float): 19 | if self.by_epoch: 20 | progress = runner.epoch - self.start_at 21 | max_progress = runner.max_epochs - self.start_at 22 | else: 23 | iter_per_epoch = runner.max_iters // runner.max_epochs 24 | progress = runner.iter - (iter_per_epoch * self.start_at) 25 | max_progress = runner.max_iters - (iter_per_epoch * self.start_at) 26 | 27 | if runner.epoch < self.start_at: 28 | return base_lr 29 | 30 | if self.min_lr_ratio is not None: 31 | target_lr = base_lr * self.min_lr_ratio 32 | else: 33 | target_lr = self.min_lr # type:ignore 34 | return annealing_cos(base_lr, target_lr, progress / max_progress) -------------------------------------------------------------------------------- /mmdet3d/models/occflownet_modules/nerf_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | import torch 5 | import torch.nn as nn 6 | from mmcv.cnn import build_activation_layer 7 | from mmcv.cnn.bricks.registry import FEEDFORWARD_NETWORK 8 | from mmcv.cnn.bricks.conv_module import ConvModule 9 | from mmcv.runner.base_module import BaseModule 10 | 11 | class SimpleBasicBlock(nn.Module): 12 | def __init__(self, channels_in, channels_out, stride=1): 13 | super(SimpleBasicBlock, self).__init__() 14 | self.conv = ConvModule( 15 | channels_in, 16 | channels_out, 17 | kernel_size=3, 18 | stride=stride, 19 | padding=1, 20 | bias=False, 21 | conv_cfg=dict(type='Conv3d'), 22 | norm_cfg=dict(type='BN3d', ), 23 | act_cfg=dict(type='ReLU',inplace=True)) 24 | 25 | if channels_in != channels_out: 26 | self.skip_conv = ConvModule( 27 | channels_in, channels_out, 28 | kernel_size=1, 29 | stride=1, 30 | padding=0, 31 | bias=False, 32 | conv_cfg=dict(type='Conv3d'), 33 | act_cfg=None 34 | ) 35 | else: 36 | self.skip_conv = None 37 | 38 | def forward(self, x): 39 | skip = self.skip_conv(x) if self.skip_conv is not None else x 40 | return self.conv(x) + skip 41 | 42 | @FEEDFORWARD_NETWORK.register_module() 43 | class PointDecoder(BaseModule): 44 | """ 45 | Decoder that predicts values for individual points. 46 | """ 47 | def __init__(self, 48 | init_cfg = dict(type='Xavier', layer=['Linear'], distribution='uniform', bias=0.), 49 | in_channels = 256, 50 | embed_dims = 256, 51 | num_hidden_layers=1, 52 | num_classes=1, 53 | ffn_drop=0, 54 | bias_init = None, 55 | act_cfg=dict(type='ReLU', inplace=True), 56 | final_act_cfg=None, 57 | ): 58 | super().__init__(init_cfg=init_cfg) 59 | self.embed_dims = embed_dims 60 | self.activate = build_activation_layer(act_cfg) 61 | self.final_activate = build_activation_layer(final_act_cfg) if final_act_cfg is not None else None 62 | self.num_classes = num_classes 63 | 64 | layers = [] 65 | for _ in range(num_hidden_layers): 66 | layers.extend( 67 | [ 68 | nn.Linear(in_channels, embed_dims), 69 | self.activate, 70 | nn.Dropout(ffn_drop) 71 | ] 72 | ) 73 | in_channels = embed_dims 74 | layers.append(nn.Linear(embed_dims, num_classes)) 75 | self.layers = nn.Sequential(*layers) 76 | 77 | # initialize bias of last linear layer to represent data distribution 78 | if bias_init is not None: 79 | self.layers[-1].bias.data = torch.tensor(bias_init, dtype=torch.float32) 80 | 81 | def forward(self, x): 82 | x = self.layers(x) 83 | if self.final_activate is not None: 84 | x = self.final_activate(x) 85 | return x 86 | 87 | @FEEDFORWARD_NETWORK.register_module() 88 | class VoxelDecoder(BaseModule): 89 | def __init__(self, 90 | init_cfg = dict(type='Xavier', layer=['Conv3d'], distribution='uniform', bias=0.), 91 | embed_dims=256, 92 | in_channels=256, 93 | num_layers=2, 94 | out_layer=None 95 | ): 96 | super().__init__(init_cfg=init_cfg) 97 | self.embed_dims = embed_dims 98 | 99 | layers = [] 100 | 101 | 102 | for i in range(num_layers): 103 | layers.extend( 104 | [ 105 | SimpleBasicBlock(embed_dims if i!=0 else in_channels, embed_dims) 106 | ] 107 | ) 108 | self.out_layer = nn.Linear(embed_dims, out_layer) if out_layer is not None else None 109 | 110 | 111 | self.layers = nn.Sequential(*layers) 112 | 113 | def forward(self, voxel_features): 114 | out = self.layers(voxel_features.permute(0, 4, 1, 2, 3)).permute(0, 2, 3, 4, 1) 115 | 116 | if self.out_layer is not None: 117 | out = self.out_layer(out) 118 | 119 | return out -------------------------------------------------------------------------------- /mmdet3d/models/occflownet_modules/samplers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Robert Bosch GmbH 2 | # SPDX-License-Identifier: AGPL-3.0 3 | 4 | import torch 5 | import torch.nn as nn 6 | from abc import abstractmethod 7 | 8 | class Sampler(nn.Module): 9 | "Abstract Sampler superclass in case some common logic is needed in the future." 10 | def __init__(self) -> None: 11 | super().__init__() 12 | 13 | @abstractmethod 14 | def forward(self, rays, num_samples, near, far, *args, **kwargs): 15 | "Generate Ray Samples" 16 | 17 | def linear_scale_to_target_space(self, samples, near, far): 18 | return samples * far + (1 - samples) * near 19 | 20 | 21 | class UniformSampler(Sampler): 22 | def __init__(self, single_jitter=True) -> None: 23 | super().__init__() 24 | self.single_jitter = single_jitter 25 | 26 | def forward(self, origins, directions, num_samples, near, far): 27 | num_rays = directions.shape[0] 28 | bins = torch.linspace(0.0, 1.0, num_samples + 1).to(directions.device) 29 | 30 | # add random jitter to bin borders (except beginning and end) 31 | jitter_scale = lambda x: x * (1 / num_samples) + (- 1 / num_samples / 2) 32 | jitter = torch.rand((num_rays, num_samples - 1), dtype=bins.dtype, device=bins.device) 33 | jitter = torch.cat((jitter.new_zeros(num_rays, 1), jitter_scale(jitter), jitter.new_zeros(num_rays, 1) ), dim=-1) 34 | bins = bins + jitter 35 | 36 | bin_upper = bins[:, 1:] 37 | bin_lower = bins[:, :-1] 38 | bin_centers = (bin_lower + bin_upper) / 2.0 39 | 40 | # scale to metric space (meters) 41 | samples_start = self.linear_scale_to_target_space(bin_lower, near, far) 42 | samples_end = self.linear_scale_to_target_space(bin_upper, near, far) 43 | samples_center = self.linear_scale_to_target_space(bin_centers, near, far) 44 | 45 | return samples_start, samples_end, samples_center 46 | 47 | class PDFSampler(Sampler): 48 | def __init__(self, single_jitter=True) -> None: 49 | super().__init__() 50 | self.histogram_padding = 0.01 51 | self.single_jitter = single_jitter 52 | 53 | def forward(self, origins, directions, num_samples, near, far, weights=None, existing_bins=None): 54 | # def forward(self, rays, num_samples, near, far, weights=None, existing_bins=None): 55 | assert weights is not None and existing_bins is not None 56 | 57 | # nerf studio version 58 | weights = weights + self.histogram_padding # add small amount to weights 59 | num_bins = num_samples + 1 60 | weights_sum = torch.sum(weights, dim=-1, keepdim=True) 61 | padding = torch.relu(1e-5 - weights_sum) 62 | weights = weights + padding / weights.shape[-1] 63 | weights_sum += padding 64 | 65 | # construct pdf and cdf 66 | pdf = weights / weights_sum 67 | cdf = torch.min(torch.ones_like(pdf), torch.cumsum(pdf, dim=-1)) 68 | cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], dim=-1) 69 | 70 | # create uniform stratified samples 71 | u = torch.linspace(0.0, 1.0 - (1.0 / num_bins), steps=num_bins, device=cdf.device) 72 | u = u.expand(size=(*cdf.shape[:-1], num_bins)) 73 | if self.single_jitter: 74 | rand = torch.rand((*cdf.shape[:-1], 1), device=cdf.device) / num_bins 75 | else: 76 | rand = torch.rand((*cdf.shape[:-1], num_samples + 1), device=cdf.device) / num_bins 77 | u = (u + rand).contiguous() 78 | 79 | existing_bins = torch.cat((existing_bins[0], existing_bins[1][..., -1:]), dim=-1) 80 | 81 | num_initial_samples = weights.shape[-1] 82 | inds = torch.searchsorted(cdf, u, side="right") 83 | below = torch.clamp(inds - 1, 0, num_initial_samples) 84 | above = torch.clamp(inds, 0, num_initial_samples) 85 | cdf_g0 = torch.gather(cdf, -1, below) 86 | bins_g0 = torch.gather(existing_bins, -1, below) 87 | cdf_g1 = torch.gather(cdf, -1, above) 88 | bins_g1 = torch.gather(existing_bins, -1, above) 89 | 90 | t = torch.clip(torch.nan_to_num((u - cdf_g0) / (cdf_g1 - cdf_g0), 0), 0, 1) 91 | bins = bins_g0 + t * (bins_g1 - bins_g0) 92 | 93 | bins = bins.detach() 94 | 95 | lower = bins[:, :-1] 96 | upper = bins[:, 1:] 97 | center = (upper + lower) / 2. 98 | 99 | return lower, upper, center -------------------------------------------------------------------------------- /mmdet3d/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version, 3 | get_compiling_cuda_version, nms, roi_align, 4 | sigmoid_focal_loss) 5 | from mmcv.ops.assign_score_withk import assign_score_withk 6 | from mmcv.ops.ball_query import ball_query 7 | from mmcv.ops.furthest_point_sample import (furthest_point_sample, 8 | furthest_point_sample_with_dist) 9 | from mmcv.ops.gather_points import gather_points 10 | from mmcv.ops.group_points import GroupAll, QueryAndGroup, grouping_operation 11 | from mmcv.ops.knn import knn 12 | from mmcv.ops.points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, 13 | points_in_boxes_part) 14 | from mmcv.ops.points_sampler import PointsSampler as Points_Sampler 15 | from mmcv.ops.roiaware_pool3d import RoIAwarePool3d 16 | from mmcv.ops.roipoint_pool3d import RoIPointPool3d 17 | from mmcv.ops.scatter_points import DynamicScatter, dynamic_scatter 18 | from mmcv.ops.three_interpolate import three_interpolate 19 | from mmcv.ops.three_nn import three_nn 20 | from mmcv.ops.voxelize import Voxelization, voxelization 21 | 22 | from .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule 23 | from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d 24 | from .paconv import PAConv, PAConvCUDA 25 | from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, 26 | PAConvSAModule, PAConvSAModuleMSG, 27 | PointFPModule, PointSAModule, PointSAModuleMSG, 28 | build_sa_module) 29 | from .sparse_block import (SparseBasicBlock, SparseBottleneck, 30 | make_sparse_convmodule) 31 | 32 | __all__ = [ 33 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version', 34 | 'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d', 35 | 'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization', 36 | 'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss', 37 | 'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck', 38 | 'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu', 39 | 'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample', 40 | 'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn', 41 | 'gather_points', 'grouping_operation', 'GroupAll', 'QueryAndGroup', 42 | 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', 'DGCNNFPModule', 43 | 'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all', 44 | 'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version', 45 | 'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA', 46 | 'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule', 47 | 'PAConvCUDASAModuleMSG', 'RoIPointPool3d' 48 | ] 49 | -------------------------------------------------------------------------------- /mmdet3d/ops/bev_pool_v2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Phigent Robotics. All rights reserved. 2 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dgcnn_fa_module import DGCNNFAModule 3 | from .dgcnn_fp_module import DGCNNFPModule 4 | from .dgcnn_gf_module import DGCNNGFModule 5 | 6 | __all__ = ['DGCNNFAModule', 'DGCNNFPModule', 'DGCNNGFModule'] 7 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/dgcnn_fa_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.cnn import ConvModule 4 | from mmcv.runner import BaseModule, force_fp32 5 | from torch import nn as nn 6 | 7 | 8 | class DGCNNFAModule(BaseModule): 9 | """Point feature aggregation module used in DGCNN. 10 | 11 | Aggregate all the features of points. 12 | 13 | Args: 14 | mlp_channels (list[int]): List of mlp channels. 15 | norm_cfg (dict, optional): Type of normalization method. 16 | Defaults to dict(type='BN1d'). 17 | act_cfg (dict, optional): Type of activation method. 18 | Defaults to dict(type='ReLU'). 19 | init_cfg (dict, optional): Initialization config. Defaults to None. 20 | """ 21 | 22 | def __init__(self, 23 | mlp_channels, 24 | norm_cfg=dict(type='BN1d'), 25 | act_cfg=dict(type='ReLU'), 26 | init_cfg=None): 27 | super().__init__(init_cfg=init_cfg) 28 | self.fp16_enabled = False 29 | self.mlps = nn.Sequential() 30 | for i in range(len(mlp_channels) - 1): 31 | self.mlps.add_module( 32 | f'layer{i}', 33 | ConvModule( 34 | mlp_channels[i], 35 | mlp_channels[i + 1], 36 | kernel_size=(1, ), 37 | stride=(1, ), 38 | conv_cfg=dict(type='Conv1d'), 39 | norm_cfg=norm_cfg, 40 | act_cfg=act_cfg)) 41 | 42 | @force_fp32() 43 | def forward(self, points): 44 | """forward. 45 | 46 | Args: 47 | points (List[Tensor]): tensor of the features to be aggregated. 48 | 49 | Returns: 50 | Tensor: (B, N, M) M = mlp[-1], tensor of the output points. 51 | """ 52 | 53 | if len(points) > 1: 54 | new_points = torch.cat(points[1:], dim=-1) 55 | new_points = new_points.transpose(1, 2).contiguous() # (B, C, N) 56 | new_points_copy = new_points 57 | 58 | new_points = self.mlps(new_points) 59 | 60 | new_fa_points = new_points.max(dim=-1, keepdim=True)[0] 61 | new_fa_points = new_fa_points.repeat(1, 1, new_points.shape[-1]) 62 | 63 | new_points = torch.cat([new_fa_points, new_points_copy], dim=1) 64 | new_points = new_points.transpose(1, 2).contiguous() 65 | else: 66 | new_points = points 67 | 68 | return new_points 69 | -------------------------------------------------------------------------------- /mmdet3d/ops/dgcnn_modules/dgcnn_fp_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import ConvModule 3 | from mmcv.runner import BaseModule, force_fp32 4 | from torch import nn as nn 5 | 6 | 7 | class DGCNNFPModule(BaseModule): 8 | """Point feature propagation module used in DGCNN. 9 | 10 | Propagate the features from one set to another. 11 | 12 | Args: 13 | mlp_channels (list[int]): List of mlp channels. 14 | norm_cfg (dict, optional): Type of activation method. 15 | Defaults to dict(type='BN1d'). 16 | act_cfg (dict, optional): Type of activation method. 17 | Defaults to dict(type='ReLU'). 18 | init_cfg (dict, optional): Initialization config. Defaults to None. 19 | """ 20 | 21 | def __init__(self, 22 | mlp_channels, 23 | norm_cfg=dict(type='BN1d'), 24 | act_cfg=dict(type='ReLU'), 25 | init_cfg=None): 26 | super().__init__(init_cfg=init_cfg) 27 | self.fp16_enabled = False 28 | self.mlps = nn.Sequential() 29 | for i in range(len(mlp_channels) - 1): 30 | self.mlps.add_module( 31 | f'layer{i}', 32 | ConvModule( 33 | mlp_channels[i], 34 | mlp_channels[i + 1], 35 | kernel_size=(1, ), 36 | stride=(1, ), 37 | conv_cfg=dict(type='Conv1d'), 38 | norm_cfg=norm_cfg, 39 | act_cfg=act_cfg)) 40 | 41 | @force_fp32() 42 | def forward(self, points): 43 | """forward. 44 | 45 | Args: 46 | points (Tensor): (B, N, C) tensor of the input points. 47 | 48 | Returns: 49 | Tensor: (B, N, M) M = mlp[-1], tensor of the new points. 50 | """ 51 | 52 | if points is not None: 53 | new_points = points.transpose(1, 2).contiguous() # (B, C, N) 54 | new_points = self.mlps(new_points) 55 | new_points = new_points.transpose(1, 2).contiguous() 56 | else: 57 | new_points = points 58 | 59 | return new_points 60 | -------------------------------------------------------------------------------- /mmdet3d/ops/paconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .paconv import PAConv, PAConvCUDA 3 | 4 | __all__ = ['PAConv', 'PAConvCUDA'] 5 | -------------------------------------------------------------------------------- /mmdet3d/ops/paconv/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | 5 | def calc_euclidian_dist(xyz1, xyz2): 6 | """Calculate the Euclidean distance between two sets of points. 7 | 8 | Args: 9 | xyz1 (torch.Tensor): (N, 3), the first set of points. 10 | xyz2 (torch.Tensor): (N, 3), the second set of points. 11 | 12 | Returns: 13 | torch.Tensor: (N, ), the Euclidean distance between each point pair. 14 | """ 15 | assert xyz1.shape[0] == xyz2.shape[0], 'number of points are not the same' 16 | assert xyz1.shape[1] == xyz2.shape[1] == 3, \ 17 | 'points coordinates dimension is not 3' 18 | return torch.norm(xyz1 - xyz2, dim=-1) 19 | 20 | 21 | def assign_score(scores, point_features): 22 | """Perform weighted sum to aggregate output features according to scores. 23 | This function is used in non-CUDA version of PAConv. 24 | 25 | Compared to the cuda op assigh_score_withk, this pytorch implementation 26 | pre-computes output features for the neighbors of all centers, and then 27 | performs aggregation. It consumes more GPU memories. 28 | 29 | Args: 30 | scores (torch.Tensor): (B, npoint, K, M), predicted scores to 31 | aggregate weight matrices in the weight bank. 32 | `npoint` is the number of sampled centers. 33 | `K` is the number of queried neighbors. 34 | `M` is the number of weight matrices in the weight bank. 35 | point_features (torch.Tensor): (B, npoint, K, M, out_dim) 36 | Pre-computed point features to be aggregated. 37 | 38 | Returns: 39 | torch.Tensor: (B, npoint, K, out_dim), the aggregated features. 40 | """ 41 | B, npoint, K, M = scores.size() 42 | scores = scores.view(B, npoint, K, 1, M) 43 | output = torch.matmul(scores, point_features).view(B, npoint, K, -1) 44 | return output 45 | 46 | 47 | def assign_kernel_withoutk(features, kernels, M): 48 | """Pre-compute features with weight matrices in weight bank. This function 49 | is used before cuda op assign_score_withk in CUDA version PAConv. 50 | 51 | Args: 52 | features (torch.Tensor): (B, in_dim, N), input features of all points. 53 | `N` is the number of points in current point cloud. 54 | kernels (torch.Tensor): (2 * in_dim, M * out_dim), weight matrices in 55 | the weight bank, transformed from (M, 2 * in_dim, out_dim). 56 | `2 * in_dim` is because the input features are concatenation of 57 | (point_features - center_features, point_features). 58 | M (int): Number of weight matrices in the weight bank. 59 | 60 | Returns: 61 | Tuple[torch.Tensor]: both of shape (B, N, M, out_dim): 62 | 63 | - point_features: Pre-computed features for points. 64 | - center_features: Pre-computed features for centers. 65 | """ 66 | B, in_dim, N = features.size() 67 | feat_trans = features.permute(0, 2, 1) # [B, N, in_dim] 68 | out_feat_half1 = torch.matmul(feat_trans, kernels[:in_dim]).view( 69 | B, N, M, -1) # [B, N, M, out_dim] 70 | out_feat_half2 = torch.matmul(feat_trans, kernels[in_dim:]).view( 71 | B, N, M, -1) # [B, N, M, out_dim] 72 | 73 | # TODO: why this hard-coded if condition? 74 | # when the network input is only xyz without additional features 75 | # xyz will be used as features, so that features.size(1) == 3 % 2 != 0 76 | # we need to compensate center_features because otherwise 77 | # `point_features - center_features` will result in all zeros? 78 | if features.size(1) % 2 != 0: 79 | out_feat_half_coord = torch.matmul( 80 | feat_trans[:, :, :3], # [B, N, 3] 81 | kernels[in_dim:in_dim + 3]).view(B, N, M, -1) # [B, N, M, out_dim] 82 | else: 83 | out_feat_half_coord = torch.zeros_like(out_feat_half2) 84 | 85 | point_features = out_feat_half1 + out_feat_half2 86 | center_features = out_feat_half1 + out_feat_half_coord 87 | return point_features, center_features 88 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_sa_module 3 | from .paconv_sa_module import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, 4 | PAConvSAModule, PAConvSAModuleMSG) 5 | from .point_fp_module import PointFPModule 6 | from .point_sa_module import PointSAModule, PointSAModuleMSG 7 | 8 | __all__ = [ 9 | 'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule', 10 | 'PAConvSAModule', 'PAConvSAModuleMSG', 'PAConvCUDASAModule', 11 | 'PAConvCUDASAModuleMSG' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry 3 | 4 | SA_MODULES = Registry('point_sa_module') 5 | 6 | 7 | def build_sa_module(cfg, *args, **kwargs): 8 | """Build PointNet2 set abstraction (SA) module. 9 | 10 | Args: 11 | cfg (None or dict): The SA module config, which should contain: 12 | - type (str): Module type. 13 | - module args: Args needed to instantiate an SA module. 14 | args (argument list): Arguments passed to the `__init__` 15 | method of the corresponding module. 16 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 17 | method of the corresponding SA module . 18 | 19 | Returns: 20 | nn.Module: Created SA module. 21 | """ 22 | if cfg is None: 23 | cfg_ = dict(type='PointSAModule') 24 | else: 25 | if not isinstance(cfg, dict): 26 | raise TypeError('cfg must be a dict') 27 | if 'type' not in cfg: 28 | raise KeyError('the cfg dict must contain the key "type"') 29 | cfg_ = cfg.copy() 30 | 31 | module_type = cfg_.pop('type') 32 | if module_type not in SA_MODULES: 33 | raise KeyError(f'Unrecognized module type {module_type}') 34 | else: 35 | sa_module = SA_MODULES.get(module_type) 36 | 37 | module = sa_module(*args, **kwargs, **cfg_) 38 | 39 | return module 40 | -------------------------------------------------------------------------------- /mmdet3d/ops/pointnet_modules/point_fp_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import List 3 | 4 | import torch 5 | from mmcv.cnn import ConvModule 6 | from mmcv.ops import three_interpolate, three_nn 7 | from mmcv.runner import BaseModule, force_fp32 8 | from torch import nn as nn 9 | 10 | 11 | class PointFPModule(BaseModule): 12 | """Point feature propagation module used in PointNets. 13 | 14 | Propagate the features from one set to another. 15 | 16 | Args: 17 | mlp_channels (list[int]): List of mlp channels. 18 | norm_cfg (dict, optional): Type of normalization method. 19 | Default: dict(type='BN2d'). 20 | """ 21 | 22 | def __init__(self, 23 | mlp_channels: List[int], 24 | norm_cfg: dict = dict(type='BN2d'), 25 | init_cfg=None): 26 | super().__init__(init_cfg=init_cfg) 27 | self.fp16_enabled = False 28 | self.mlps = nn.Sequential() 29 | for i in range(len(mlp_channels) - 1): 30 | self.mlps.add_module( 31 | f'layer{i}', 32 | ConvModule( 33 | mlp_channels[i], 34 | mlp_channels[i + 1], 35 | kernel_size=(1, 1), 36 | stride=(1, 1), 37 | conv_cfg=dict(type='Conv2d'), 38 | norm_cfg=norm_cfg)) 39 | 40 | @force_fp32() 41 | def forward(self, target: torch.Tensor, source: torch.Tensor, 42 | target_feats: torch.Tensor, 43 | source_feats: torch.Tensor) -> torch.Tensor: 44 | """forward. 45 | 46 | Args: 47 | target (Tensor): (B, n, 3) tensor of the xyz positions of 48 | the target features. 49 | source (Tensor): (B, m, 3) tensor of the xyz positions of 50 | the source features. 51 | target_feats (Tensor): (B, C1, n) tensor of the features to be 52 | propagated to. 53 | source_feats (Tensor): (B, C2, m) tensor of features 54 | to be propagated. 55 | 56 | Return: 57 | Tensor: (B, M, N) M = mlp[-1], tensor of the target features. 58 | """ 59 | if source is not None: 60 | dist, idx = three_nn(target, source) 61 | dist_reciprocal = 1.0 / (dist + 1e-8) 62 | norm = torch.sum(dist_reciprocal, dim=2, keepdim=True) 63 | weight = dist_reciprocal / norm 64 | 65 | interpolated_feats = three_interpolate(source_feats, idx, weight) 66 | else: 67 | interpolated_feats = source_feats.expand(*source_feats.size()[0:2], 68 | target.size(1)) 69 | 70 | if target_feats is not None: 71 | new_features = torch.cat([interpolated_feats, target_feats], 72 | dim=1) # (B, C2 + C1, n) 73 | else: 74 | new_features = interpolated_feats 75 | 76 | new_features = new_features.unsqueeze(-1) 77 | new_features = self.mlps(new_features) 78 | 79 | return new_features.squeeze(-1) 80 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .overwrite_spconv.write_spconv2 import register_spconv2 3 | 4 | try: 5 | import spconv 6 | except ImportError: 7 | IS_SPCONV2_AVAILABLE = False 8 | else: 9 | if hasattr(spconv, '__version__') and spconv.__version__ >= '2.0.0': 10 | IS_SPCONV2_AVAILABLE = register_spconv2() 11 | else: 12 | IS_SPCONV2_AVAILABLE = False 13 | 14 | __all__ = ['IS_SPCONV2_AVAILABLE'] 15 | -------------------------------------------------------------------------------- /mmdet3d/ops/spconv/overwrite_spconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .write_spconv2 import register_spconv2 3 | 4 | __all__ = ['register_spconv2'] 5 | -------------------------------------------------------------------------------- /mmdet3d/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg, print_log 3 | 4 | from .collect_env import collect_env 5 | from .compat_cfg import compat_cfg 6 | from .logger import get_root_logger 7 | from .misc import find_latest_checkpoint 8 | from .setup_env import setup_multi_processes 9 | 10 | __all__ = [ 11 | 'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env', 12 | 'print_log', 'setup_multi_processes', 'find_latest_checkpoint', 13 | 'compat_cfg' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet3d/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import collect_env as collect_base_env 3 | from mmcv.utils import get_git_hash 4 | 5 | import mmdet 6 | import mmdet3d 7 | import mmseg 8 | from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE 9 | 10 | 11 | def collect_env(): 12 | """Collect the information of the running environments.""" 13 | env_info = collect_base_env() 14 | env_info['MMDetection'] = mmdet.__version__ 15 | env_info['MMSegmentation'] = mmseg.__version__ 16 | env_info['MMDetection3D'] = mmdet3d.__version__ + '+' + get_git_hash()[:7] 17 | env_info['spconv2.0'] = IS_SPCONV2_AVAILABLE 18 | return env_info 19 | 20 | 21 | if __name__ == '__main__': 22 | for name, val in collect_env().items(): 23 | print(f'{name}: {val}') 24 | -------------------------------------------------------------------------------- /mmdet3d/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import logging 3 | 4 | from mmcv.utils import get_logger 5 | 6 | 7 | def get_root_logger(log_file=None, log_level=logging.INFO, name='mmdet3d'): 8 | """Get root logger and add a keyword filter to it. 9 | 10 | The logger will be initialized if it has not been initialized. By default a 11 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 12 | also be added. The name of the root logger is the top-level package name, 13 | e.g., "mmdet3d". 14 | 15 | Args: 16 | log_file (str, optional): File path of log. Defaults to None. 17 | log_level (int, optional): The level of logger. 18 | Defaults to logging.INFO. 19 | name (str, optional): The name of the root logger, also used as a 20 | filter keyword. Defaults to 'mmdet3d'. 21 | 22 | Returns: 23 | :obj:`logging.Logger`: The obtained logger 24 | """ 25 | logger = get_logger(name=name, log_file=log_file, log_level=log_level) 26 | 27 | # add a logging filter 28 | logging_filter = logging.Filter(name) 29 | logging_filter.filter = lambda record: record.find(name) != -1 30 | 31 | return logger 32 | -------------------------------------------------------------------------------- /mmdet3d/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import glob 3 | import os.path as osp 4 | import warnings 5 | 6 | 7 | def find_latest_checkpoint(path, suffix='pth'): 8 | """Find the latest checkpoint from the working directory. This function is 9 | copied from mmdetection. 10 | 11 | Args: 12 | path(str): The path to find checkpoints. 13 | suffix(str): File extension. 14 | Defaults to pth. 15 | 16 | Returns: 17 | latest_path(str | None): File path of the latest checkpoint. 18 | References: 19 | .. [1] https://github.com/microsoft/SoftTeacher 20 | /blob/main/ssod/utils/patch.py 21 | """ 22 | if not osp.exists(path): 23 | warnings.warn('The path of checkpoints does not exist.') 24 | return None 25 | if osp.exists(osp.join(path, f'latest.{suffix}')): 26 | return osp.join(path, f'latest.{suffix}') 27 | 28 | checkpoints = glob.glob(osp.join(path, f'*.{suffix}')) 29 | if len(checkpoints) == 0: 30 | warnings.warn('There are no checkpoints in the path.') 31 | return None 32 | latest = -1 33 | latest_path = None 34 | for checkpoint in checkpoints: 35 | count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0]) 36 | if count > latest: 37 | latest = count 38 | latest_path = checkpoint 39 | return latest_path 40 | -------------------------------------------------------------------------------- /mmdet3d/utils/setup_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | import platform 4 | import warnings 5 | 6 | import cv2 7 | from torch import multiprocessing as mp 8 | 9 | 10 | def setup_multi_processes(cfg): 11 | """Setup multi-processing environment variables.""" 12 | # set multi-process start method as `fork` to speed up the training 13 | if platform.system() != 'Windows': 14 | mp_start_method = cfg.get('mp_start_method', 'fork') 15 | current_method = mp.get_start_method(allow_none=True) 16 | if current_method is not None and current_method != mp_start_method: 17 | warnings.warn( 18 | f'Multi-processing start method `{mp_start_method}` is ' 19 | f'different from the previous setting `{current_method}`.' 20 | f'It will be force set to `{mp_start_method}`. You can change ' 21 | f'this behavior by changing `mp_start_method` in your config.') 22 | mp.set_start_method(mp_start_method, force=True) 23 | 24 | # disable opencv multithreading to avoid system being overloaded 25 | opencv_num_threads = cfg.get('opencv_num_threads', 0) 26 | cv2.setNumThreads(opencv_num_threads) 27 | 28 | # setup OMP threads 29 | # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py # noqa 30 | workers_per_gpu = cfg.data.get('workers_per_gpu', 1) 31 | if 'train_dataloader' in cfg.data: 32 | workers_per_gpu = \ 33 | max(cfg.data.train_dataloader.get('workers_per_gpu', 1), 34 | workers_per_gpu) 35 | 36 | if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1: 37 | omp_num_threads = 1 38 | warnings.warn( 39 | f'Setting OMP_NUM_THREADS environment variable for each process ' 40 | f'to be {omp_num_threads} in default, to avoid your system being ' 41 | f'overloaded, please further tune the variable for optimal ' 42 | f'performance in your application as needed.') 43 | os.environ['OMP_NUM_THREADS'] = str(omp_num_threads) 44 | 45 | # setup MKL threads 46 | if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1: 47 | mkl_num_threads = 1 48 | warnings.warn( 49 | f'Setting MKL_NUM_THREADS environment variable for each process ' 50 | f'to be {mkl_num_threads} in default, to avoid your system being ' 51 | f'overloaded, please further tune the variable for optimal ' 52 | f'performance in your application as needed.') 53 | os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads) 54 | -------------------------------------------------------------------------------- /mmdet3d/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '1.0.0rc4' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/overview.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boschresearch/OccFlowNet/27e102e467b771651977e69d3bc0b10177ff6779/requirements/build.txt -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | docutils==0.16.0 2 | m2r 3 | mistune==0.8.4 4 | myst-parser 5 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 6 | sphinx==4.0.2 7 | sphinx-copybutton 8 | sphinx_markdown_tables 9 | -------------------------------------------------------------------------------- /requirements/mminstall.txt: -------------------------------------------------------------------------------- 1 | mmcv-full>=1.4.8,<=1.6.0 2 | mmdet>=2.24.0,<=3.0.0 3 | mmsegmentation>=0.20.0,<=1.0.0 4 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | open3d 2 | spconv 3 | waymo-open-dataset-tf-2-1-0==1.2.0 4 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv>=1.4.8 2 | mmdet>=2.24.0 3 | mmsegmentation>=0.20.1 4 | torch 5 | torchvision 6 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | lyft_dataset_sdk 2 | networkx>=2.2,<2.3 3 | numba==0.53.0 4 | nuscenes-devkit 5 | numpy==1.23.5 6 | nerfacc==0.5.3 7 | ninja==1.11.1 8 | torch_efficient_distloss==0.1.3 9 | open3d==0.18.0 10 | moviepy 11 | plyfile 12 | scikit-image 13 | # by default we also use tensorboard to log results 14 | tensorboard 15 | trimesh>=2.35.39,<2.35.40 16 | yapf==0.40.1 -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | pytest 9 | pytest-cov 10 | pytest-runner 11 | ubelt 12 | xdoctest >= 0.10.0 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [yapf] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | 6 | [isort] 7 | line_length = 79 8 | multi_line_output = 0 9 | extra_standard_library = setuptools 10 | known_first_party = mmdet,mmseg,mmdet3d 11 | known_third_party = cv2,imageio,indoor3d_util,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,pytorch_sphinx_theme,recommonmark,requests,scannet_utils,scipy,seaborn,shapely,skimage,sphinx,tensorflow,terminaltables,torch,trimesh,ts,waymo_open_dataset 12 | no_lines_before = STDLIB,LOCALFOLDER 13 | default_section = THIRDPARTY 14 | 15 | [codespell] 16 | ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD 17 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29522} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29520} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --seed 0 \ 20 | --launcher pytorch ${@:3} 21 | --------------------------------------------------------------------------------