├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── LICENSE ├── MANIFEST ├── README.md ├── coperception ├── README.txt ├── __init__.py ├── configs │ ├── Config.py │ ├── ConfigGlobal.py │ └── __init__.py ├── datasets │ ├── MbbSampler.py │ ├── NuscenesDataset.py │ ├── V2XSimDet.py │ ├── V2XSimSeg.py │ └── __init__.py ├── models │ ├── __init__.py │ ├── det │ │ ├── AgentWiseWeightedFusion.py │ │ ├── CatFusion.py │ │ ├── DiscoNet.py │ │ ├── FaFNet.py │ │ ├── MaxFusion.py │ │ ├── MeanFusion.py │ │ ├── SumFusion.py │ │ ├── TeacherNet.py │ │ ├── V2VNet.py │ │ ├── When2com.py │ │ ├── __init__.py │ │ ├── backbone │ │ │ ├── Backbone.py │ │ │ └── __init__.py │ │ └── base │ │ │ ├── DetModelBase.py │ │ │ ├── FusionBase.py │ │ │ ├── IntermediateModelBase.py │ │ │ ├── NonIntermediateModelBase.py │ │ │ └── __init__.py │ └── seg │ │ ├── AgentWiseWeightedFusion.py │ │ ├── CatFusion.py │ │ ├── DiscoNet.py │ │ ├── FusionBase.py │ │ ├── MaxFusion.py │ │ ├── MeanFusion.py │ │ ├── SegModelBase.py │ │ ├── SumFusion.py │ │ ├── UNet.py │ │ ├── V2VNet.py │ │ ├── When2Com_UNet.py │ │ └── __init__.py └── utils │ ├── AverageMeter.py │ ├── CoDetModule.py │ ├── SegMetrics.py │ ├── SegModule.py │ ├── __init__.py │ ├── convolutional_rnn │ ├── __init__.py │ ├── functional.py │ ├── module.py │ └── utils.py │ ├── data_util.py │ ├── detection_util.py │ ├── loss.py │ ├── mapping.cpython-37m-x86_64-linux-gnu.so │ ├── mapping.py │ ├── mbb_util.py │ ├── mean_ap.py │ ├── min_norm_solvers.py │ ├── nuscenes_pc_util.py │ ├── obj_util.py │ └── postprocess.py ├── environment.yml ├── requirements.txt ├── setup.py └── tools ├── det ├── Makefile ├── README.md ├── compute_mbb_covar.py ├── create_data_det.py ├── test_codet.py ├── test_mbb.py ├── train_codet.py └── train_mbb.py └── utils ├── move_scenes.py ├── test_scenes.txt ├── train_scenes.txt └── val_scenes.txt /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | dist/ 3 | site/ 4 | *.egg-info/ 5 | logs 6 | .ipynb_checkpoints 7 | check_* 8 | check 9 | err_logs 10 | v2x-sim* 11 | .DS_Store -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: stable 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | - repo: https://gitlab.com/pycqa/flake8 8 | rev: 3.7.9 9 | hooks: 10 | - id: flake8 -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 1 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.9" 13 | 14 | mkdocs: 15 | configuration: mkdocs.yml 16 | 17 | python: 18 | install: 19 | - requirements: docs/requirements.txt 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | setup.cfg 3 | setup.py 4 | coperception/__init__.py 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Uncertainty Quantification of Collaborative Detection for Self-Driving (ICRA 2023) 2 | [Sanbao Su](https://sanbaosu.netlify.app/), [Yiming Li](https://roboticsyimingli.github.io), [Sihong He](https://scholar.google.com/citations?hl=en&user=jLLDCeoAAAAJ), [Songyang Han](https://songyanghan.com/), [Chen Feng](https://scholar.google.com/citations?user=YeG8ZM0AAAAJ&hl=en), [Caiwen Ding](https://scholar.google.com/citations?hl=en&user=7hR0r_EAAAAJ), [Fei Miao](http://feimiao.org/index.html) 3 | 4 | Implementation of paper "Uncertainty Quantification of Collaborative Detection for Self-Driving" [paper](https://arxiv.org/abs/2209.08162), [website](https://coperception.github.io/double-m-quantification/) 5 | 6 | ![main](https://github.com/coperception/double-m-quantification/blob/gh-pages/static/images/main.png) 7 | 8 | ## Abstract: 9 | 10 | Sharing information between connected and autonomous vehicles (CAVs) fundamentally improves the performance of collaborative object detection for self-driving. However, CAVs still have uncertainties on object detection due to practical challenges, which will affect the later modules in self-driving such as planning and control. Hence, uncertainty quantification is crucial for safety-critical systems such as CAVs. Our work is the first to estimate the uncertainty of collaborative object detection. We propose a novel uncertainty quantification method, called Double-M Quantification, which tailors a moving block bootstrap (MBB) algorithm with direct modeling of the multivariant Gaussian distribution of each corner of the bounding box. Our method captures both the epistemic uncertainty and aleatoric uncertainty with one inference based on the offline Double-M training process. And it can be used with different collaborative object detectors. Through experiments on the comprehensive CAVs collaborative perception dataset, we show that our Double-M method achieves up to 4.09 times improvement on uncertainty score and up to 3.13% accuracy improvement, compared with the state-of-the-art uncertainty quantification. The results also validate that sharing information between CAVs is beneficial for the system in both improving accuracy and reducing uncertainty. 11 | 12 | ## Install: 13 | 1. Clone this repository. 14 | 2. `cd` into the cloned repository. 15 | 3. Install `coperception` package with pip: 16 | ```bash 17 | pip install -e . 18 | ``` 19 | ## Getting started: 20 | Please refer to our docs website for detailed documentations for models: https://coperception.readthedocs.io/en/latest/ 21 | Installation: 22 | - [Installation documentations](https://coperception.readthedocs.io/en/latest/getting_started/installation/) 23 | 24 | Download dataset: 25 | - [V2X-Sim](https://coperception.readthedocs.io/en/latest/datasets/v2x_sim/) 26 | 27 | ## Training 28 | 29 | ```bash 30 | cd ./tools/det/ 31 | ``` 32 | 33 | ### Pretrain stage: 34 | 35 | Train benchmark detectors: 36 | - Lowerbound / Upperbound 37 | ```bash 38 | CUDA_VISIBLE_DEVICES=0 make train com=upperbound loss_type=corner_loss logpath=check/check_loss_base nepoch=60 39 | CUDA_VISIBLE_DEVICES=0 make train com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_base nepoch=80 40 | ``` 41 | 42 | - DiscoNet 43 | ```bash 44 | CUDA_VISIBLE_DEVICES=0 make train_disco_no_rsu loss_type=corner_loss logpath=check/check_loss_base nepoch=60 45 | CUDA_VISIBLE_DEVICES=0 make train_disco_no_rsu loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_base nepoch=80 46 | ``` 47 | 48 | ### Train stage: 49 | 50 | Train benchmark detectors: 51 | - Lowerbound / Upperbound 52 | ```bash 53 | CUDA_VISIBLE_DEVICES=0 make mbb_train com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25 54 | ``` 55 | 56 | - DiscoNet 57 | ```bash 58 | CUDA_VISIBLE_DEVICES=0 make mbb_train_disco_no_rsu loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25 59 | ``` 60 | 61 | Compute the covariance for MBB 62 | ```bash 63 | CUDA_VISIBLE_DEVICES=0 make mbb_test_no_rsu com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25 64 | CUDA_VISIBLE_DEVICES=0 make compute_mbb_covar com=upperbound logpath=check/check_loss_corner_pair_ind 65 | ``` 66 | 67 | ## Test: 68 | 69 | ### Test stage: 70 | 71 | 72 | Train benchmark detectors: 73 | - Lowerbound / Upperbound/ DiscoNet 74 | ```bash 75 | CUDA_VISIBLE_DEVICES=0 make test_no_rsu com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25 76 | ``` 77 | 78 | ## Related works: 79 | - [coperception Github repo](https://github.com/coperception/coperception) 80 | 81 | ## Related papers: 82 | Double-M Qualification: 83 | ```bibtex 84 | @article{Su2022uncertainty, 85 | author = {Su, Sanbao and Li, Yiming and He, Sihong and Han, Songyang and Feng, Chen and Ding, Caiwen and Miao, Fei}, 86 | title = {Uncertainty Quantification of Collaborative Detection for Self-Driving}, 87 | year={2023}, 88 | booktitle={IEEE International Conference on Robotics and Automation (ICRA)} 89 | } 90 | ``` 91 | 92 | V2X-Sim dataset: 93 | ```bibtex 94 | @article{Li_2021_RAL, 95 | title = {V2X-Sim: A Virtual Collaborative Perception Dataset and Benchmark for Autonomous Driving}, 96 | author = {Li, Yiming and Ma, Dekun and An, Ziyan and Wang, Zixun and Zhong, Yiqi and Chen, Siheng and Feng, Chen}, 97 | booktitle = {IEEE Robotics and Automation Letters}, 98 | year = {2022} 99 | } 100 | ``` 101 | -------------------------------------------------------------------------------- /coperception/README.txt: -------------------------------------------------------------------------------- 1 | A library for multi-agent collaborative perception. 2 | See https://github.com/coperception/coperception for details. -------------------------------------------------------------------------------- /coperception/__init__.py: -------------------------------------------------------------------------------- 1 | from .configs import * 2 | from .utils import * 3 | from .models import * 4 | from .datasets import * 5 | -------------------------------------------------------------------------------- /coperception/configs/Config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | class Config(object): 6 | """The config class""" 7 | 8 | def __init__( 9 | self, 10 | split, 11 | binary=True, 12 | only_det=True, 13 | code_type="faf", 14 | loss_type="faf_loss", 15 | savepath="", 16 | root="", 17 | is_cross_road=False, 18 | use_vis=False, 19 | ): 20 | # for segmentaion task only 21 | # ========================= 22 | self.num_class = 8 23 | self.in_channels = 13 24 | self.nepoch = 10 25 | 26 | self.class_to_rgb = { 27 | 0: [255, 255, 255], # Unlabeled 28 | 1: [71, 141, 230], # Vehicles 29 | 2: [122, 217, 209], # Sidewalk 30 | 3: [145, 171, 100], # Ground / Terrain 31 | 4: [231, 136, 101], # Road / Traffic light / Pole 32 | 5: [142, 80, 204], # Buildings 33 | 6: [224, 8, 50], # Pedestrian 34 | 7: [106, 142, 34] # Vegetation 35 | # 7: [102, 102, 156], # Walls 36 | # 0: [55, 90, 80], # Other 37 | } 38 | 39 | # Remap pixel values given by carla 40 | self.classes_remap = { 41 | 0: 0, # Unlabeled (so that we don't forget this class) 42 | 10: 1, # Vehicles 43 | 8: 2, # Sidewalk 44 | 14: 3, # Ground (non-drivable) 45 | 22: 3, # Terrain (non-drivable) 46 | 7: 4, # Road 47 | 6: 4, # Road line 48 | 18: 4, # Traffic light 49 | 5: 4, # Pole 50 | 1: 5, # Building 51 | 4: 6, # Pedestrian 52 | 9: 7, # Vegetation 53 | } 54 | 55 | self.class_idx_to_name = { 56 | 0: "Unlabeled", 57 | 1: "Vehicles", 58 | 2: "Sidewalk", 59 | 3: "Ground & Terrain", 60 | 4: "Road", 61 | 5: "Buildings", 62 | 6: "Pedestrian", 63 | 7: "Vegetation", 64 | } 65 | # ========================= 66 | 67 | self.device = None 68 | self.split = split 69 | self.savepath = savepath 70 | self.binary = binary 71 | self.only_det = only_det 72 | self.code_type = code_type 73 | self.loss_type = loss_type # corner_loss faf_loss kl_loss_center kl_loss_center_add, kl_loss_corner, kl_loss_center_ind, kl_loss_center_offset_ind, kl_loss_corner_pair_ind 74 | self.covar_length = 21 # number of variables in the covariance matrix, decomposition matrix for covariance matrix, for multivariate Gaussian of (x,y,w,h,sin,cos), it should be 21 75 | if self.loss_type == "kl_loss_corner": 76 | self.covar_length = 8 77 | elif self.loss_type == "kl_loss_center_ind" or self.loss_type == "kl_loss_center_offset_ind": 78 | self.covar_length = 6 79 | elif self.loss_type == "kl_loss_corner_pair_ind": 80 | self.covar_length = 12 81 | self.covar_matrix_size = 6 # one dimensional size of the covariance matrix 82 | self.loss_loc_weight = 0.1 83 | self.loc_det_weight = 0.1 84 | self.regMeanCovToge = False # always be False 85 | if self.loss_type == "kl_loss_corner_all": 86 | self.regMeanCovToge = True 87 | 88 | # The specifications for BEV maps 89 | self.voxel_size = (0.25, 0.25, 0.4) 90 | #self.voxel_size = (0.375, 0.375, 0.4) 91 | self.area_extents = ( 92 | np.array([[-32.0, 32.0], [-32.0, 32.0], [-8.0, -3.0]]) 93 | #np.array([[-48.0, 48.0], [-48.0, 48.0], [-8.0, -3.0]]) 94 | if is_cross_road 95 | else np.array([[-32.0, 32.0], [-32.0, 32.0], [-3.0, 2.0]]) 96 | #else np.array([[-48.0, 48.0], [-48.0, 48.0], [-3.0, 2.0]]) 97 | ) 98 | self.is_cross_road = is_cross_road 99 | self.past_frame_skip = 3 # when generating the BEV maps, how many history frames need to be skipped 100 | self.future_frame_skip = ( 101 | 0 # when generating the BEV maps, how many future frames need to be skipped 102 | ) 103 | self.num_past_frames_for_bev_seq = ( 104 | 1 # the number of past frames for BEV map sequence 105 | ) 106 | self.num_past_pcs = 1 # duplicate self.num_past_frames_for_bev_seq 107 | 108 | self.map_dims = [ 109 | math.ceil( 110 | (self.area_extents[0][1] - self.area_extents[0][0]) / self.voxel_size[0] 111 | ), 112 | math.ceil( 113 | (self.area_extents[1][1] - self.area_extents[1][0]) / self.voxel_size[1] 114 | ), 115 | math.ceil( 116 | (self.area_extents[2][1] - self.area_extents[2][0]) / self.voxel_size[2] 117 | ), 118 | ] 119 | self.only_det = True 120 | self.root = root 121 | # debug Data: 122 | self.code_type = "faf" 123 | self.pred_type = "motion" 124 | # debug Loss 125 | # self.loss_type = "corner_loss" 126 | # debug MGDA 127 | self.MGDA = False 128 | # debug when2com 129 | self.MIMO = True 130 | # debug Motion Classification 131 | self.motion_state = False 132 | self.static_thre = 0.2 # speed lower bound 133 | 134 | # debug use_vis 135 | self.use_vis = use_vis 136 | self.use_map = False 137 | 138 | # The specifications for object detection encode 139 | if self.code_type in ["corner_1", "corner_2"]: 140 | self.box_code_size = 8 # (\delta{x1},\delta{y1},\delta{x2},\delta{y2},\delta{x3},\delta{y3},\delta{x4},\delta{y4}) 141 | elif self.code_type in ["corner_3"]: 142 | self.box_code_size = 10 143 | elif self.code_type[0] == "f": 144 | self.box_code_size = 6 # (x,y,w,h,sin,cos) 145 | else: 146 | print(code_type, " code type is not implemented yet!") 147 | exit() 148 | 149 | self.pred_len = ( 150 | 1 # the number of frames for prediction, including the current frame 151 | ) 152 | 153 | # anchor size: (w,h,angle) (according to nuscenes w < h) 154 | if not self.binary: 155 | self.anchor_size = np.asarray( 156 | [ 157 | [2.0, 4.0, 0], 158 | [2.0, 4.0, math.pi / 2.0], 159 | [1.0, 1.0, 0], 160 | [1.0, 2.0, 0.0], 161 | [1.0, 2.0, math.pi / 2.0], 162 | [3.0, 12.0, 0.0], 163 | [3.0, 12.0, math.pi / 2.0], 164 | ] 165 | ) 166 | else: 167 | self.anchor_size = np.asarray( 168 | [ 169 | [2.0, 4.0, 0], 170 | [2.0, 4.0, math.pi / 2.0], 171 | [2.0, 4.0, -math.pi / 4.0], 172 | [3.0, 12.0, 0], 173 | [3.0, 12.0, math.pi / 2.0], 174 | [3.0, 12.0, -math.pi / 4.0], 175 | ] 176 | ) 177 | 178 | self.category_threshold = [0.4, 0.4, 0.25, 0.25, 0.4] 179 | self.class_map = { 180 | "vehicle.car": 1, 181 | "vehicle.emergency.police": 1, 182 | "vehicle.bicycle": 3, 183 | "vehicle.motorcycle": 3, 184 | "vehicle.bus.rigid": 2, 185 | } 186 | 187 | if self.binary: 188 | self.category_num = 2 189 | else: 190 | self.category_num = len(self.category_threshold) 191 | self.print_feq = 100 192 | if self.split == "train": 193 | self.num_keyframe_skipped = ( 194 | 0 # The number of keyframes we will skip when dumping the data 195 | ) 196 | self.nsweeps_back = 1 # Number of frames back to the history (including the current timestamp) 197 | self.nsweeps_forward = 0 # Number of frames into the future (does not include the current timestamp) 198 | self.skip_frame = ( 199 | 0 # The number of frames skipped for the adjacent sequence 200 | ) 201 | self.num_adj_seqs = ( 202 | 1 # number of adjacent sequences, among which the time gap is \delta t 203 | ) 204 | else: 205 | self.num_keyframe_skipped = 0 206 | self.nsweeps_back = 1 # Setting this to 30 (for training) or 25 (for testing) allows conducting ablation studies on frame numbers 207 | self.nsweeps_forward = 0 208 | self.skip_frame = 0 209 | self.num_adj_seqs = 1 210 | -------------------------------------------------------------------------------- /coperception/configs/ConfigGlobal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | class ConfigGlobal(object): 6 | def __init__( 7 | self, 8 | split, 9 | binary=True, 10 | only_det=True, 11 | code_type="faf", 12 | loss_type="faf_loss", 13 | savepath="", 14 | root="", 15 | ): 16 | 17 | self.device = None 18 | self.split = split 19 | self.savepath = savepath 20 | self.binary = binary 21 | self.only_det = only_det 22 | self.code_type = code_type 23 | self.loss_type = loss_type # corner_loss faf_loss 24 | 25 | # The specifications for BEV maps 26 | self.voxel_size = (0.25, 0.25, 0.4) 27 | self.area_extents = np.array([[-96.0, 96.0], [-96.0, 96.0], [-3.0, 2.0]]) 28 | self.past_frame_skip = 0 # when generating the BEV maps, how many history frames need to be skipped 29 | self.future_frame_skip = ( 30 | 0 # when generating the BEV maps, how many future frames need to be skipped 31 | ) 32 | self.num_past_frames_for_bev_seq = ( 33 | 1 # the number of past frames for BEV map sequence 34 | ) 35 | self.num_past_pcs = 4 # duplicate self.num_past_frames_for_bev_seq 36 | 37 | self.map_dims = [ 38 | math.ceil( 39 | (self.area_extents[0][1] - self.area_extents[0][0]) / self.voxel_size[0] 40 | ), 41 | math.ceil( 42 | (self.area_extents[1][1] - self.area_extents[1][0]) / self.voxel_size[1] 43 | ), 44 | math.ceil( 45 | (self.area_extents[2][1] - self.area_extents[2][0]) / self.voxel_size[2] 46 | ), 47 | ] 48 | self.only_det = True 49 | self.root = root 50 | 51 | # debug Data: 52 | self.code_type = "faf" 53 | self.pred_type = "motion" 54 | # debug Loss 55 | self.loss_type = "corner_loss" 56 | 57 | # debug MGDA 58 | self.MGDA = False 59 | # debug when2com 60 | self.MIMO = False 61 | # debug Motion Classification 62 | self.motion_state = False 63 | self.static_thre = 0.2 # speed lower bound 64 | 65 | # debug use_vis 66 | self.use_vis = True 67 | self.use_map = False 68 | 69 | # The specifications for object detection encode 70 | if self.code_type in ["corner_1", "corner_2"]: 71 | self.box_code_size = 8 # (\delta{x1},\delta{y1},\delta{x2},\delta{y2},\delta{x3},\delta{y3},\delta{x4},\delta{y4}) 72 | elif self.code_type in ["corner_3"]: 73 | self.box_code_size = 10 74 | elif self.code_type[0] == "f": 75 | self.box_code_size = 6 # (x,y,w,h,sin,cos) 76 | else: 77 | print(code_type, " code type is not implemented yet!") 78 | exit() 79 | 80 | self.pred_len = ( 81 | 1 # the number of frames for prediction, including the current frame 82 | ) 83 | 84 | # anchor size: (w,h,angle) (according to nuscenes w < h) 85 | if not self.binary: 86 | self.anchor_size = np.asarray( 87 | [ 88 | [2.0, 4.0, 0], 89 | [2.0, 4.0, math.pi / 2.0], 90 | [1.0, 1.0, 0], 91 | [1.0, 2.0, 0.0], 92 | [1.0, 2.0, math.pi / 2.0], 93 | [3.0, 12.0, 0.0], 94 | [3.0, 12.0, math.pi / 2.0], 95 | ] 96 | ) 97 | else: 98 | self.anchor_size = np.asarray( 99 | [ 100 | [2.0, 4.0, 0], 101 | [2.0, 4.0, math.pi / 2.0], 102 | [2.0, 4.0, -math.pi / 4.0], 103 | [3.0, 12.0, 0], 104 | [3.0, 12.0, math.pi / 2.0], 105 | [3.0, 12.0, -math.pi / 4.0], 106 | ] 107 | ) 108 | 109 | self.category_threshold = [0.4, 0.4, 0.25, 0.25, 0.4] 110 | self.class_map = { 111 | "vehicle.audi.a2": 1, 112 | "vehicle.audi.etron": 1, 113 | "vehicle.audi.tt": 1, 114 | "vehicle.bmw.grandtourer": 1, 115 | "vehicle.bmw.isetta": 1, 116 | "vehicle.chevrolet.impala": 1, 117 | "vehicle.citroen.c3": 1, 118 | "vehicle.dodge_charger.police": 1, 119 | "vehicle.jeep.wrangler_rubicon": 1, 120 | "vehicle.lincoln.mkz2017": 1, 121 | "vehicle.mercedes-benz.coupe": 1, 122 | "vehicle.mini.cooperst": 1, 123 | "vehicle.mustang.mustang": 1, 124 | "vehicle.nissan.micra": 1, 125 | "vehicle.nissan.patrol": 1, 126 | "vehicle.seat.leon": 1, 127 | "vehicle.tesla.cybertruck": 1, 128 | "vehicle.tesla.model3": 1, 129 | "vehicle.toyota.prius": 1, 130 | "vehicle.volkswagen.t2": 1, 131 | "vehicle.carlamotors.carlacola": 1, 132 | "human.pedestrian": 2, 133 | "vehicle.bh.crossbike": 3, 134 | "vehicle.diamondback.century": 3, 135 | "vehicle.gazelle.omafiets": 3, 136 | "vehicle.harley-davidson.low_rider": 3, 137 | "vehicle.kawasaki.ninja": 3, 138 | "vehicle.yamaha.yzf": 3, 139 | } # background: 0, other: 4 140 | # self.class_map = {'vehicle.car': 1, 'vehicle.truck': 1, 'vehicle.bus': 1, 'human.pedestrian': 2, 'vehicle.bicycle': 3, 'vehicle.motorcycle': 3} # background: 0, other: 4 141 | if self.binary: 142 | self.category_num = 2 143 | else: 144 | self.category_num = len(self.category_threshold) 145 | self.print_feq = 100 146 | if self.split == "train": 147 | self.num_keyframe_skipped = ( 148 | 0 # The number of keyframes we will skip when dumping the data 149 | ) 150 | self.nsweeps_back = 1 # Number of frames back to the history (including the current timestamp) 151 | self.nsweeps_forward = 0 # Number of frames into the future (does not include the current timestamp) 152 | self.skip_frame = ( 153 | 0 # The number of frames skipped for the adjacent sequence 154 | ) 155 | self.num_adj_seqs = ( 156 | 1 # number of adjacent sequences, among which the time gap is \delta t 157 | ) 158 | else: 159 | self.num_keyframe_skipped = 0 160 | self.nsweeps_back = 1 # Setting this to 30 (for training) or 25 (for testing) allows conducting ablation studies on frame numbers 161 | self.nsweeps_forward = 0 162 | self.skip_frame = 0 163 | self.num_adj_seqs = 1 164 | -------------------------------------------------------------------------------- /coperception/configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .Config import Config 2 | from .ConfigGlobal import ConfigGlobal 3 | -------------------------------------------------------------------------------- /coperception/datasets/MbbSampler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import numpy as np 4 | import torch 5 | from coperception.utils.obj_util import * 6 | from coperception.datasets import V2XSimDet 7 | from typing import Iterator, Optional, Sequence, List, TypeVar, Generic, Sized 8 | from torch.utils.data import Sampler 9 | 10 | class MbbSampler(Sampler[int]): 11 | r"""Samples elements sequentially, always in the same order. 12 | 13 | Args: 14 | data_source (Dataset): dataset to sample from 15 | """ 16 | data_source: Sized 17 | 18 | def __init__(self, data_source: Sized, block_len: int) -> None: 19 | self.data_source = data_source 20 | self.frame_len = data_source.num_sample_seqs 21 | self.scene_len = data_source.scene_len 22 | self.block_len = block_len 23 | self.frame_pre_scene = self.frame_len // self.scene_len 24 | self.iter_len = len(self.data_source) // self.block_len * self.block_len 25 | 26 | def __iter__(self) -> Iterator[int]: 27 | self.iter_list = [] 28 | perm = torch.randperm(len(self.data_source)) 29 | require_range = self.frame_pre_scene - self.block_len 30 | for idx in perm: 31 | if idx % self.frame_pre_scene > require_range: 32 | continue 33 | if len(self.iter_list) > self.iter_len: 34 | break 35 | self.iter_list.extend(range(idx, idx + self.block_len)) 36 | return iter(self.iter_list) 37 | 38 | def __len__(self) -> int: 39 | return self.iter_len -------------------------------------------------------------------------------- /coperception/datasets/NuscenesDataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from multiprocessing import Manager 4 | 5 | import numpy as np 6 | from torch.utils.data import Dataset 7 | from coperception.utils.obj_util import init_anchors_no_check 8 | 9 | 10 | class NuscenesDataset(Dataset): 11 | def __init__( 12 | self, dataset_root=None, config=None, split=None, cache_size=10000, val=False 13 | ): 14 | """ 15 | This dataloader loads single sequence for a keyframe, and is not designed for computing the 16 | spatio-temporal consistency losses. It supports train, val and test splits. 17 | 18 | dataset_root: Data path to the preprocessed sparse nuScenes data (for training) 19 | split: [train/val/test] 20 | future_frame_skip: Specify to skip how many future frames 21 | voxel_size: The lattice resolution. Should be consistent with the preprocessed data 22 | area_extents: The area extents of the processed LiDAR data. Should be consistent with the preprocessed data 23 | category_num: The number of object categories (including the background) 24 | cache_size: The cache size for storing parts of data in the memory (for reducing the IO cost) 25 | """ 26 | if split is None: 27 | self.split = config.split 28 | else: 29 | self.split = split 30 | self.voxel_size = config.voxel_size 31 | self.area_extents = config.area_extents 32 | self.category_num = config.category_num 33 | self.future_frame_skip = config.future_frame_skip 34 | self.pred_len = config.pred_len 35 | self.box_code_size = config.box_code_size 36 | self.anchor_size = config.anchor_size 37 | self.val = val 38 | self.only_det = config.only_det 39 | self.binary = config.binary 40 | self.config = config 41 | self.use_vis = config.use_vis 42 | # dataset_root = dataset_root + '/'+split 43 | if dataset_root is None: 44 | raise ValueError( 45 | "The {} dataset root is None. Should specify its value.".format( 46 | self.split 47 | ) 48 | ) 49 | self.dataset_root = dataset_root 50 | seq_dirs = [ 51 | os.path.join(self.dataset_root, d) 52 | for d in os.listdir(self.dataset_root) 53 | if os.path.isdir(os.path.join(self.dataset_root, d)) 54 | ] 55 | seq_dirs = sorted(seq_dirs) 56 | self.seq_files = [ 57 | os.path.join(seq_dir, f) 58 | for seq_dir in seq_dirs 59 | for f in os.listdir(seq_dir) 60 | if os.path.isfile(os.path.join(seq_dir, f)) 61 | ] 62 | 63 | self.num_sample_seqs = len(self.seq_files) 64 | print("The number of {} sequences: {}".format(self.split, self.num_sample_seqs)) 65 | 66 | """ 67 | # For training, the size of dataset should be 17065 * 2; for validation: 1623; for testing: 4309 68 | if split == 'train' and self.num_sample_seqs != 17065 * 2: 69 | warnings.warn(">> The size of training dataset is not 17065 * 2.\n") 70 | elif split == 'val' and self.num_sample_seqs != 1623: 71 | warnings.warn(">> The size of validation dataset is not 1719.\n") 72 | elif split == 'test' and self.num_sample_seqs != 4309: 73 | warnings.warn('>> The size of test dataset is not 4309.\n') 74 | """ 75 | 76 | # object information 77 | self.anchors_map = init_anchors_no_check( 78 | self.area_extents, self.voxel_size, self.box_code_size, self.anchor_size 79 | ) 80 | self.map_dims = [ 81 | int( 82 | (self.area_extents[0][1] - self.area_extents[0][0]) / self.voxel_size[0] 83 | ), 84 | int( 85 | (self.area_extents[1][1] - self.area_extents[1][0]) / self.voxel_size[1] 86 | ), 87 | ] 88 | self.reg_target_shape = ( 89 | self.map_dims[0], 90 | self.map_dims[1], 91 | len(self.anchor_size), 92 | self.pred_len, 93 | self.box_code_size, 94 | ) 95 | self.label_shape = (self.map_dims[0], self.map_dims[1], len(self.anchor_size)) 96 | self.label_one_hot_shape = ( 97 | self.map_dims[0], 98 | self.map_dims[1], 99 | len(self.anchor_size), 100 | self.category_num, 101 | ) 102 | self.dims = config.map_dims 103 | self.num_past_pcs = config.num_past_pcs 104 | manager = Manager() 105 | self.cache = manager.dict() 106 | self.cache_size = cache_size if split == "train" else 0 107 | # self.cache_size = cache_size 108 | 109 | def __len__(self): 110 | return self.num_sample_seqs 111 | 112 | def get_one_hot(self, label, category_num): 113 | one_hot_label = np.zeros((label.shape[0], category_num)) 114 | for i in range(label.shape[0]): 115 | one_hot_label[i][label[i]] = 1 116 | 117 | return one_hot_label 118 | 119 | def __getitem__(self, idx): 120 | if idx in self.cache: 121 | gt_dict = self.cache[idx] 122 | else: 123 | seq_file = self.seq_files[idx] 124 | gt_data_handle = np.load(seq_file, allow_pickle=True) 125 | gt_dict = gt_data_handle.item() 126 | if len(self.cache) < self.cache_size: 127 | self.cache[idx] = gt_dict 128 | 129 | allocation_mask = gt_dict["allocation_mask"].astype(bool) 130 | reg_loss_mask = gt_dict["reg_loss_mask"].astype(bool) 131 | gt_max_iou = gt_dict["gt_max_iou"] 132 | motion_one_hot = np.zeros(5) 133 | motion_mask = np.zeros(5) 134 | 135 | # load regression target 136 | reg_target_sparse = gt_dict["reg_target_sparse"] 137 | # need to be modified Yiqi , only use reg_target and allocation_map 138 | reg_target = np.zeros(self.reg_target_shape).astype(reg_target_sparse.dtype) 139 | 140 | reg_target[allocation_mask] = reg_target_sparse 141 | reg_target[np.bitwise_not(reg_loss_mask)] = 0 142 | label_sparse = gt_dict["label_sparse"] 143 | 144 | one_hot_label_sparse = self.get_one_hot(label_sparse, self.category_num) 145 | label_one_hot = np.zeros(self.label_one_hot_shape) 146 | label_one_hot[:, :, :, 0] = 1 147 | label_one_hot[allocation_mask] = one_hot_label_sparse 148 | 149 | if self.config.motion_state: 150 | motion_sparse = gt_dict["motion_state"] 151 | motion_one_hot_label_sparse = self.get_one_hot(motion_sparse, 3) 152 | motion_one_hot = np.zeros(self.label_one_hot_shape[:-1] + (3,)) 153 | motion_one_hot[:, :, :, 0] = 1 154 | motion_one_hot[allocation_mask] = motion_one_hot_label_sparse 155 | motion_mask = motion_one_hot[:, :, :, 2] == 1 156 | 157 | if self.only_det: 158 | reg_target = reg_target[:, :, :, :1] 159 | reg_loss_mask = reg_loss_mask[:, :, :, :1] 160 | 161 | # only center for pred 162 | 163 | elif self.config.pred_type in ["motion", "center"]: 164 | reg_loss_mask = np.expand_dims(reg_loss_mask, axis=-1) 165 | reg_loss_mask = np.repeat(reg_loss_mask, self.box_code_size, axis=-1) 166 | reg_loss_mask[:, :, :, 1:, 2:] = False 167 | 168 | if self.config.use_map: 169 | if ("map_allocation_0" in gt_dict.keys()) or ( 170 | "map_allocation" in gt_dict.keys() 171 | ): 172 | semantic_maps = [] 173 | for m_id in range(self.config.map_channel): 174 | map_alloc = gt_dict["map_allocation_" + str(m_id)] 175 | map_sparse = gt_dict["map_sparse_" + str(m_id)] 176 | recover = np.zeros(tuple(self.config.map_dims[:2])) 177 | recover[map_alloc] = map_sparse 178 | recover = np.rot90(recover, 3) 179 | # recover_map = cv2.resize(recover,(self.config.map_dims[0],self.config.map_dims[1])) 180 | semantic_maps.append(recover) 181 | semantic_maps = np.asarray(semantic_maps) 182 | else: 183 | semantic_maps = np.zeros(0) 184 | """ 185 | if self.binary: 186 | reg_target = np.concatenate([reg_target[:,:,:2],reg_target[:,:,5:]],axis=2) 187 | reg_loss_mask = np.concatenate([reg_loss_mask[:,:,:2],reg_loss_mask[:,:,5:]],axis=2) 188 | label_one_hot = np.concatenate([label_one_hot[:,:,:2],label_one_hot[:,:,5:]],axis=2) 189 | 190 | """ 191 | padded_voxel_points = list() 192 | 193 | for i in range(self.num_past_pcs): 194 | indices = gt_dict["voxel_indices_" + str(i)] 195 | curr_voxels = np.zeros(self.dims, dtype=bool) 196 | curr_voxels[indices[:, 0], indices[:, 1], indices[:, 2]] = 1 197 | curr_voxels = np.rot90(curr_voxels, 3) 198 | padded_voxel_points.append(curr_voxels) 199 | padded_voxel_points = np.stack(padded_voxel_points, 0).astype(np.float32) 200 | anchors_map = self.anchors_map 201 | """ 202 | if self.binary: 203 | anchors_map = np.concatenate([anchors_map[:,:,:2],anchors_map[:,:,5:]],axis=2) 204 | """ 205 | if self.config.use_vis: 206 | vis_maps = np.zeros( 207 | ( 208 | self.num_past_pcs, 209 | self.config.map_dims[-1], 210 | self.config.map_dims[0], 211 | self.config.map_dims[1], 212 | ) 213 | ) 214 | vis_free_indices = gt_dict["vis_free_indices"] 215 | vis_occupy_indices = gt_dict["vis_occupy_indices"] 216 | vis_maps[ 217 | vis_occupy_indices[0, :], 218 | vis_occupy_indices[1, :], 219 | vis_occupy_indices[2, :], 220 | vis_occupy_indices[3, :], 221 | ] = math.log(0.7 / (1 - 0.7)) 222 | vis_maps[ 223 | vis_free_indices[0, :], 224 | vis_free_indices[1, :], 225 | vis_free_indices[2, :], 226 | vis_free_indices[3, :], 227 | ] = math.log(0.4 / (1 - 0.4)) 228 | vis_maps = np.swapaxes(vis_maps, 2, 3) 229 | vis_maps = np.transpose(vis_maps, (0, 2, 3, 1)) 230 | for v_id in range(vis_maps.shape[0]): 231 | vis_maps[v_id] = np.rot90(vis_maps[v_id], 3) 232 | vis_maps = vis_maps[-1] 233 | 234 | else: 235 | vis_maps = np.zeros(0) 236 | 237 | padded_voxel_points = padded_voxel_points.astype(np.float32) 238 | label_one_hot = label_one_hot.astype(np.float32) 239 | reg_target = reg_target.astype(np.float32) 240 | anchors_map = anchors_map.astype(np.float32) 241 | motion_one_hot = motion_one_hot.astype(np.float32) 242 | semantic_maps = semantic_maps.astype(np.float32) 243 | vis_maps = vis_maps.astype(np.float32) 244 | 245 | if self.val: 246 | return ( 247 | padded_voxel_points, 248 | label_one_hot, 249 | reg_target, 250 | reg_loss_mask, 251 | anchors_map, 252 | motion_one_hot, 253 | motion_mask, 254 | vis_maps, 255 | [{"gt_box": gt_max_iou}], 256 | [seq_file], 257 | ) 258 | else: 259 | return ( 260 | padded_voxel_points, 261 | label_one_hot, 262 | reg_target, 263 | reg_loss_mask, 264 | anchors_map, 265 | motion_one_hot, 266 | motion_mask, 267 | vis_maps, 268 | ) 269 | -------------------------------------------------------------------------------- /coperception/datasets/V2XSimSeg.py: -------------------------------------------------------------------------------- 1 | import os 2 | from multiprocessing import Manager 3 | 4 | import cv2 5 | import numpy as np 6 | import torch 7 | import torchvision.transforms as transforms 8 | import torchvision.transforms.functional as TF 9 | from torch.utils.data import Dataset 10 | 11 | 12 | class V2XSimSeg(Dataset): 13 | def __init__( 14 | self, 15 | dataset_roots=None, 16 | config=None, 17 | split=None, 18 | cache_size=1000, 19 | val=False, 20 | com=False, 21 | bound=None, 22 | kd_flag=False, 23 | rsu=False, 24 | ): 25 | """ 26 | This dataloader loads single sequence for a keyframe, and is not designed for computing the 27 | spatio-temporal consistency losses. It supports train, val and test splits. 28 | 29 | dataset_root: Data path to the preprocessed sparse nuScenes data (for training) 30 | split: [train/val/test] 31 | future_frame_skip: Specify to skip how many future frames 32 | voxel_size: The lattice resolution. Should be consistent with the preprocessed data 33 | area_extents: The area extents of the processed LiDAR data. Should be consistent with the preprocessed data 34 | category_num: The number of object categories (including the background) 35 | cache_size: The cache size for storing parts of data in the memory (for reducing the IO cost) 36 | """ 37 | if split is None: 38 | self.split = config.split 39 | else: 40 | self.split = split 41 | self.voxel_size = config.voxel_size 42 | self.area_extents = config.area_extents 43 | self.pred_len = config.pred_len 44 | self.val = val 45 | self.config = config 46 | self.use_vis = config.use_vis 47 | self.com = com 48 | self.bound = bound 49 | self.kd_flag = kd_flag 50 | self.rsu = rsu 51 | 52 | if dataset_roots is None: 53 | raise ValueError( 54 | "The {} dataset root is None. Should specify its value.".format( 55 | self.split 56 | ) 57 | ) 58 | self.dataset_roots = dataset_roots 59 | self.seq_files = [] 60 | self.seq_scenes = [] 61 | for dataset_root in self.dataset_roots: 62 | # sort directories 63 | dir_list = [d.split("_") for d in os.listdir(dataset_root)] 64 | dir_list.sort(key=lambda x: (int(x[0]), int(x[1]))) 65 | self.seq_scenes.append( 66 | [int(s[0]) for s in dir_list] 67 | ) # which scene this frame belongs to (required for visualization) 68 | dir_list = ["_".join(x) for x in dir_list] 69 | 70 | seq_dirs = [ 71 | os.path.join(dataset_root, d) 72 | for d in dir_list 73 | if os.path.isdir(os.path.join(dataset_root, d)) 74 | ] 75 | 76 | self.seq_files.append( 77 | [ 78 | os.path.join(seq_dir, f) 79 | for seq_dir in seq_dirs 80 | for f in os.listdir(seq_dir) 81 | if os.path.isfile(os.path.join(seq_dir, f)) 82 | ] 83 | ) 84 | 85 | self.num_agent = len(self.dataset_roots) 86 | 87 | self.num_sample_seqs = len(self.seq_files[0]) 88 | print("The number of {} sequences: {}".format(self.split, self.num_sample_seqs)) 89 | # object information 90 | self.dims = config.map_dims 91 | self.num_past_pcs = config.num_past_pcs 92 | manager = Manager() 93 | self.cache = [manager.dict() for i in range(self.num_agent)] 94 | self.cache_size = cache_size if split == "train" else 0 95 | 96 | self.transform = Transform(self.split) 97 | 98 | def __len__(self): 99 | return self.num_sample_seqs 100 | 101 | def get_one_hot(self, label, category_num): 102 | one_hot_label = np.zeros((label.shape[0], category_num)) 103 | for i in range(label.shape[0]): 104 | one_hot_label[i][label[i]] = 1 105 | 106 | return one_hot_label 107 | 108 | def get_seginfo_from_single_agent(self, agent_id, idx): 109 | empty_flag = False 110 | if idx in self.cache[agent_id]: 111 | gt_dict = self.cache[agent_id][idx] 112 | else: 113 | seq_file = self.seq_files[agent_id][idx] 114 | gt_data_handle = np.load(seq_file, allow_pickle=True) 115 | if gt_data_handle == 0: 116 | empty_flag = True 117 | if self.com != 'lowerbound' and self.com != 'upperbound': 118 | return ( 119 | torch.zeros((256, 256, 13)).bool(), 120 | torch.zeros((256, 256, 13)).bool(), 121 | torch.zeros((256, 256)).int(), 122 | torch.zeros((self.num_agent, 4, 4)), 123 | 0, 124 | 0, 125 | ) 126 | else: 127 | return ( 128 | torch.zeros((256, 256, 13)).bool(), 129 | torch.zeros((256, 256, 13)).bool(), 130 | torch.zeros((256, 256)).int(), 131 | ) 132 | else: 133 | gt_dict = gt_data_handle.item() 134 | if len(self.cache[agent_id]) < self.cache_size: 135 | self.cache[agent_id][idx] = gt_dict 136 | 137 | if not empty_flag: 138 | bev_seg = gt_dict["bev_seg"].astype(np.int32) 139 | 140 | padded_voxel_points = list() 141 | 142 | # if self.bound == 'lowerbound': 143 | for i in range(self.num_past_pcs): 144 | indices = gt_dict["voxel_indices_" + str(i)] 145 | curr_voxels = np.zeros(self.dims, dtype=bool) 146 | curr_voxels[indices[:, 0], indices[:, 1], indices[:, 2]] = 1 147 | 148 | curr_voxels = np.rot90(curr_voxels, 3) 149 | # curr_voxels = np.rot90(np.fliplr(curr_voxels), 3) 150 | bev_seg = np.rot90(bev_seg, 1) # to align with voxel 151 | 152 | padded_voxel_points.append(curr_voxels) 153 | padded_voxel_points = np.stack(padded_voxel_points, 0) 154 | padded_voxel_points = np.squeeze(padded_voxel_points, 0) 155 | 156 | padded_voxel_points_teacher = list() 157 | # if self.bound == 'upperbound' or self.kd_flag: 158 | if self.rsu: 159 | indices_teacher = gt_dict["voxel_indices_teacher"] 160 | else: 161 | indices_teacher = gt_dict["voxel_indices_teacher_no_cross_road"] 162 | 163 | curr_voxels_teacher = np.zeros(self.dims, dtype=bool) 164 | curr_voxels_teacher[ 165 | indices_teacher[:, 0], indices_teacher[:, 1], indices_teacher[:, 2] 166 | ] = 1 167 | curr_voxels_teacher = np.rot90(curr_voxels_teacher, 3) 168 | padded_voxel_points_teacher.append(curr_voxels_teacher) 169 | padded_voxel_points_teacher = np.stack(padded_voxel_points_teacher, 0) 170 | padded_voxel_points_teacher = np.squeeze(padded_voxel_points_teacher, 0) 171 | 172 | if self.com != 'lowerbound' and self.com != 'upperbound': 173 | if self.rsu: 174 | trans_matrices = gt_dict["trans_matrices"] 175 | else: 176 | trans_matrices = gt_dict["trans_matrices_no_cross_road"] 177 | 178 | target_agent_id = gt_dict["target_agent_id"] 179 | num_sensor = gt_dict["num_sensor"] 180 | 181 | return ( 182 | torch.from_numpy(padded_voxel_points), 183 | torch.from_numpy(padded_voxel_points_teacher), 184 | torch.from_numpy(bev_seg.copy()), 185 | torch.from_numpy(trans_matrices.copy()), 186 | target_agent_id, 187 | num_sensor, 188 | ) 189 | else: 190 | return ( 191 | torch.from_numpy(padded_voxel_points), 192 | torch.from_numpy(padded_voxel_points_teacher), 193 | torch.from_numpy(bev_seg.copy()), 194 | ) 195 | 196 | def __getitem__(self, idx): 197 | res = [] 198 | for i in range(self.num_agent): 199 | res.append(self.get_seginfo_from_single_agent(i, idx)) 200 | return res 201 | 202 | 203 | class Transform: 204 | def __init__(self, split): 205 | self.totensor = transforms.ToTensor() 206 | self.resize = transforms.Resize((256, 256)) 207 | self.split = split 208 | 209 | def __call__(self, img, label): 210 | img = self.totensor(img.copy()) 211 | label = self.totensor(label.copy()) 212 | 213 | if self.split != "train": 214 | return img.permute(1, 2, 0).float(), label.squeeze(0).int() 215 | 216 | crop = transforms.RandomResizedCrop(256) 217 | params = crop.get_params(img, scale=(0.08, 1.0), ratio=(0.75, 1.33)) 218 | img = TF.crop(img, *params) 219 | label = TF.crop(label, *params) 220 | 221 | if np.random.random() > 0.5: 222 | img = TF.hflip(img) 223 | label = TF.hflip(label) 224 | 225 | if np.random.random() > 0.5: 226 | img = TF.vflip(img) 227 | label = TF.vflip(label) 228 | 229 | img = self.resize(img) 230 | label = cv2.resize( 231 | label.squeeze(0).numpy(), dsize=(256, 256), interpolation=cv2.INTER_NEAREST 232 | ) # Resize provided by pytorch will have some random noise 233 | # return img.permute(1, 2, 0).float(), label.squeeze(0).int() 234 | return img.permute(1, 2, 0).float(), label 235 | -------------------------------------------------------------------------------- /coperception/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .V2XSimDet import V2XSimDet 2 | from .V2XSimSeg import V2XSimSeg 3 | from .NuscenesDataset import NuscenesDataset 4 | from .MbbSampler import MbbSampler -------------------------------------------------------------------------------- /coperception/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /coperception/models/det/AgentWiseWeightedFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from coperception.models.det.base import FusionBase 5 | 6 | 7 | class AgentWiseWeightedFusion(FusionBase): 8 | """Agent-wise weighted fusion. Used as a lower-bound in the DiscoNet fusion.""" 9 | 10 | def __init__( 11 | self, 12 | config, 13 | layer=3, 14 | in_channels=13, 15 | kd_flag=True, 16 | num_agent=5, 17 | compress_level=0, 18 | only_v2i=False, 19 | ): 20 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i) 21 | self.agent_weighted_fusion = AgentWeightedFusion() 22 | 23 | def fusion(self): 24 | agent_weight_list = list() 25 | for k in range(self.num_agent): 26 | cat_feat = torch.cat([self.tg_agent, self.neighbor_feat_list[k]], dim=0) 27 | cat_feat = cat_feat.unsqueeze(0) 28 | agent_weight = self.agent_weighted_fusion(cat_feat) 29 | agent_weight_list.append(agent_weight) 30 | 31 | soft_agent_weight_list = torch.squeeze( 32 | F.softmax(torch.tensor(agent_weight_list).unsqueeze(0), dim=1) 33 | ) 34 | 35 | agent_wise_weight_feat = 0 36 | for k in range(self.num_agent): 37 | agent_wise_weight_feat = ( 38 | agent_wise_weight_feat 39 | + soft_agent_weight_list[k] * self.neighbor_feat_list[k] 40 | ) 41 | 42 | return agent_wise_weight_feat 43 | 44 | 45 | class AgentWeightedFusion(nn.Module): 46 | def __init__(self): 47 | super(AgentWeightedFusion, self).__init__() 48 | 49 | self.conv1_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0) 50 | self.bn1_1 = nn.BatchNorm2d(128) 51 | 52 | self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0) 53 | self.bn1_2 = nn.BatchNorm2d(32) 54 | 55 | self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0) 56 | self.bn1_3 = nn.BatchNorm2d(8) 57 | 58 | self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0) 59 | 60 | # self.conv1_1 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0) 61 | # self.bn1_1 = nn.BatchNorm2d(1) 62 | self.conv1_5 = nn.Conv2d(1, 1, kernel_size=32, stride=1, padding=0) 63 | # # self.bn1_2 = nn.BatchNorm2d(1) 64 | 65 | def forward(self, x): 66 | # x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 67 | # x_1 = F.relu(self.bn1_1(self.conv1_1(x))) 68 | # x_1 = F.sigmoid(self.conv1_2(x_1)) 69 | x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 70 | x_1 = F.relu(self.bn1_1(self.conv1_1(x))) 71 | x_1 = F.relu(self.bn1_2(self.conv1_2(x_1))) 72 | x_1 = F.relu(self.bn1_3(self.conv1_3(x_1))) 73 | x_1 = F.relu(self.conv1_4(x_1)) 74 | x_1 = F.relu(self.conv1_5(x_1)) 75 | 76 | return x_1 77 | -------------------------------------------------------------------------------- /coperception/models/det/CatFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from coperception.models.det.base import FusionBase 5 | 6 | 7 | class CatFusion(FusionBase): 8 | """Concatenate fusion. Used as a lower-bound in the DisoNet paper.""" 9 | 10 | def __init__( 11 | self, 12 | config, 13 | layer=3, 14 | in_channels=13, 15 | kd_flag=True, 16 | num_agent=5, 17 | compress_level=0, 18 | only_v2i=False, 19 | ): 20 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i) 21 | self._modulation_layer_3 = ModulationLayer3() 22 | 23 | def fusion(self): 24 | mean_feat = torch.mean(torch.stack(self.neighbor_feat_list), dim=0) # [c, h, w] 25 | cat_feat = torch.cat([self.tg_agent, mean_feat], dim=0) 26 | cat_feat = cat_feat.unsqueeze(0) # [1, 1, c, h, w] 27 | return self._modulation_layer_3(cat_feat) 28 | 29 | 30 | class ModulationLayer3(nn.Module): 31 | def __init__(self): 32 | super(ModulationLayer3, self).__init__() 33 | 34 | self._conv1_1 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0) 35 | self._bn1_1 = nn.BatchNorm2d(256) 36 | 37 | def forward(self, x): 38 | x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 39 | x_1 = F.relu(self._bn1_1(self._conv1_1(x))) 40 | 41 | return x_1 42 | -------------------------------------------------------------------------------- /coperception/models/det/DiscoNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from coperception.models.det.base import IntermediateModelBase 5 | 6 | 7 | class DiscoNet(IntermediateModelBase): 8 | """DiscoNet. 9 | 10 | https://github.com/ai4ce/DiscoNet 11 | 12 | Args: 13 | config (object): The config object. 14 | layer (int, optional): Collaborate on which layer. Defaults to 3. 15 | in_channels (int, optional): The input channels. Defaults to 13. 16 | kd_flag (bool, optional): Whether to use knowledge distillation. Defaults to True. 17 | num_agent (int, optional): The number of agents (including RSU). Defaults to 5. 18 | 19 | """ 20 | 21 | def __init__(self, config, layer=3, in_channels=13, kd_flag=True, num_agent=5, compress_level=0, only_v2i=False): 22 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i) 23 | if self.layer == 3: 24 | self.pixel_weighted_fusion = PixelWeightedFusionSoftmax(256) 25 | elif self.layer == 2: 26 | self.pixel_weighted_fusion = PixelWeightedFusionSoftmax(128) 27 | 28 | def forward(self, bevs, trans_matrices, num_agent_tensor, batch_size=1): 29 | """Forward pass. 30 | 31 | Args: 32 | bevs (tensor): BEV data 33 | trans_matrices (tensor): Matrix for transforming features among agents. 34 | num_agent_tensor (tensor): Number of agents to communicate for each agent. 35 | batch_size (int, optional): The batch size. Defaults to 1. 36 | 37 | Returns: 38 | result, all decoded layers, and fused feature maps if kd_flag is set. 39 | else return result and list of weights for each agent. 40 | """ 41 | 42 | bevs = bevs.permute(0, 1, 4, 2, 3) # (Batch, seq, z, h, w) 43 | encoded_layers = self.u_encoder(bevs) 44 | device = bevs.device 45 | 46 | feat_maps, size = super().get_feature_maps_and_size(encoded_layers) 47 | 48 | feat_list = super().build_feature_list(batch_size, feat_maps) 49 | 50 | local_com_mat = super().build_local_communication_matrix( 51 | feat_list 52 | ) # [2 5 512 16 16] [batch, agent, channel, height, width] 53 | local_com_mat_update = super().build_local_communication_matrix( 54 | feat_list 55 | ) # to avoid the inplace operation 56 | 57 | save_agent_weight_list = list() 58 | 59 | for b in range(batch_size): 60 | num_agent = num_agent_tensor[b, 0] 61 | for i in range(num_agent): 62 | tg_agent = local_com_mat[b, i] 63 | all_warp = trans_matrices[b, i] # transformation [2 5 5 4 4] 64 | 65 | self.neighbor_feat_list = list() 66 | self.neighbor_feat_list.append(tg_agent) 67 | 68 | if super().outage(): 69 | agent_wise_weight_feat = self.neighbor_feat_list[0] 70 | else: 71 | super().build_neighbors_feature_list( 72 | b, 73 | i, 74 | all_warp, 75 | num_agent, 76 | local_com_mat, 77 | device, 78 | size, 79 | trans_matrices, 80 | ) 81 | 82 | # agent-wise weighted fusion 83 | tmp_agent_weight_list = list() 84 | sum_weight = 0 85 | nb_len = len(self.neighbor_feat_list) 86 | for k in range(nb_len): 87 | cat_feat = torch.cat( 88 | [tg_agent, self.neighbor_feat_list[k]], dim=0 89 | ) 90 | cat_feat = cat_feat.unsqueeze(0) 91 | agent_weight = torch.squeeze( 92 | self.pixel_weighted_fusion(cat_feat) 93 | ) 94 | tmp_agent_weight_list.append(torch.exp(agent_weight)) 95 | sum_weight = sum_weight + torch.exp(agent_weight) 96 | 97 | agent_weight_list = list() 98 | for k in range(nb_len): 99 | agent_weight = torch.div(tmp_agent_weight_list[k], sum_weight) 100 | agent_weight.expand([256, -1, -1]) 101 | agent_weight_list.append(agent_weight) 102 | 103 | agent_wise_weight_feat = 0 104 | for k in range(nb_len): 105 | agent_wise_weight_feat = ( 106 | agent_wise_weight_feat 107 | + agent_weight_list[k] * self.neighbor_feat_list[k] 108 | ) 109 | 110 | # feature update 111 | local_com_mat_update[b, i] = agent_wise_weight_feat 112 | 113 | save_agent_weight_list.append(agent_weight_list) 114 | 115 | # weighted feature maps is passed to decoder 116 | feat_fuse_mat = super().agents_to_batch(local_com_mat_update) 117 | 118 | decoded_layers = super().get_decoded_layers( 119 | encoded_layers, feat_fuse_mat, batch_size 120 | ) 121 | x = decoded_layers[0] 122 | 123 | cls_preds, loc_preds, result = super().get_cls_loc_result(x) 124 | 125 | if self.kd_flag == 1: 126 | return (result, *decoded_layers, feat_fuse_mat) 127 | else: 128 | # return result 129 | return result, save_agent_weight_list 130 | 131 | 132 | class PixelWeightedFusionSoftmax(nn.Module): 133 | def __init__(self, channel): 134 | super(PixelWeightedFusionSoftmax, self).__init__() 135 | 136 | self.conv1_1 = nn.Conv2d(channel * 2, 128, kernel_size=1, stride=1, padding=0) 137 | self.bn1_1 = nn.BatchNorm2d(128) 138 | 139 | self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0) 140 | self.bn1_2 = nn.BatchNorm2d(32) 141 | 142 | self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0) 143 | self.bn1_3 = nn.BatchNorm2d(8) 144 | 145 | self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0) 146 | # self.bn1_4 = nn.BatchNorm2d(1) 147 | 148 | def forward(self, x): 149 | x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 150 | x_1 = F.relu(self.bn1_1(self.conv1_1(x))) 151 | x_1 = F.relu(self.bn1_2(self.conv1_2(x_1))) 152 | x_1 = F.relu(self.bn1_3(self.conv1_3(x_1))) 153 | x_1 = F.relu(self.conv1_4(x_1)) 154 | 155 | return x_1 156 | -------------------------------------------------------------------------------- /coperception/models/det/FaFNet.py: -------------------------------------------------------------------------------- 1 | from coperception.models.det.base import NonIntermediateModelBase 2 | 3 | 4 | class FaFNet(NonIntermediateModelBase): 5 | """The model of early fusion. Used as lower-bound and upper-bound depending on the input features (fused or not). 6 | 7 | https://arxiv.org/pdf/2012.12395.pdf 8 | 9 | Args: 10 | config (object): The Config object. 11 | layer (int, optional): Collaborate on which layer. Defaults to 3. 12 | in_channels (int, optional): The input channels. Defaults to 13. 13 | kd_flag (bool, optional): Whether to use knowledge distillation (for DiscoNet to ues). Defaults to True. 14 | num_agent (int, optional): The number of agents (including RSU). Defaults to 5. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | config, 20 | layer=3, 21 | in_channels=13, 22 | kd_flag=True, 23 | num_agent=5, 24 | compress_level=0, 25 | ): 26 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level) 27 | 28 | def forward(self, bevs, maps=None, vis=None, batch_size=None): 29 | bevs = bevs.permute(0, 1, 4, 2, 3) # (Batch, seq, z, h, w) 30 | 31 | x_8, x_7, x_6, x_5, x_3, x_2 = self.stpn(bevs) 32 | x = x_8 33 | 34 | cls_preds, loc_preds, result = super().get_cls_loc_result(x) 35 | 36 | if self.kd_flag == 1: 37 | return result, x_8, x_7, x_6, x_5, x_3 38 | else: 39 | return result 40 | -------------------------------------------------------------------------------- /coperception/models/det/MaxFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from coperception.models.det.base import FusionBase 3 | 4 | 5 | class MaxFusion(FusionBase): 6 | "Maximum fusion. Used as a lower-bound in the DiscoNet fusion." 7 | 8 | def __init__( 9 | self, 10 | config, 11 | layer=3, 12 | in_channels=13, 13 | kd_flag=True, 14 | num_agent=5, 15 | compress_level=0, 16 | only_v2i=False, 17 | ): 18 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i) 19 | 20 | def fusion(self): 21 | return torch.max(torch.stack(self.neighbor_feat_list), dim=0).values 22 | -------------------------------------------------------------------------------- /coperception/models/det/MeanFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from coperception.models.det.base import FusionBase 3 | 4 | 5 | class MeanFusion(FusionBase): 6 | "Mean fusion. Used as a lower-bound in the DiscoNet fusion." 7 | 8 | def __init__(self, config, layer=3, in_channels=13, kd_flag=True, num_agent=5, compress_level=0, only_v2i=False): 9 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i) 10 | 11 | def fusion(self): 12 | return torch.mean(torch.stack(self.neighbor_feat_list), dim=0) 13 | -------------------------------------------------------------------------------- /coperception/models/det/SumFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from coperception.models.det.base import FusionBase 3 | 4 | 5 | class SumFusion(FusionBase): 6 | """Sum fusion. Used as a lower-bound in the DiscoNet fusion.""" 7 | 8 | def __init__( 9 | self, 10 | config, 11 | layer=3, 12 | in_channels=13, 13 | kd_flag=True, 14 | num_agent=5, 15 | compress_level=0, 16 | only_v2i=False, 17 | ): 18 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i) 19 | 20 | def fusion(self): 21 | return torch.sum(torch.stack(self.neighbor_feat_list), dim=0) 22 | -------------------------------------------------------------------------------- /coperception/models/det/TeacherNet.py: -------------------------------------------------------------------------------- 1 | from coperception.models.det.base import NonIntermediateModelBase 2 | 3 | 4 | class TeacherNet(NonIntermediateModelBase): 5 | """The teacher net for knowledged distillation in DiscoNet.""" 6 | 7 | def __init__(self, config): 8 | super(TeacherNet, self).__init__(config, compress_level=0) 9 | 10 | def forward(self, bevs, maps=None, vis=None): 11 | bevs = bevs.permute(0, 1, 4, 2, 3) # (Batch, seq, z, h, w) 12 | # vis = vis.permute(0, 3, 1, 2) 13 | return self.stpn(bevs) 14 | -------------------------------------------------------------------------------- /coperception/models/det/V2VNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import coperception.utils.convolutional_rnn as convrnn 3 | from coperception.models.det.base import IntermediateModelBase 4 | import torch.nn.functional as F 5 | 6 | 7 | class V2VNet(IntermediateModelBase): 8 | """V2V Net 9 | 10 | https://arxiv.org/abs/2008.07519 11 | 12 | """ 13 | 14 | def __init__( 15 | self, 16 | config, 17 | gnn_iter_times, 18 | layer, 19 | layer_channel, 20 | in_channels=13, 21 | num_agent=5, 22 | compress_level=0, 23 | only_v2i=False, 24 | ): 25 | super().__init__( 26 | config, 27 | layer, 28 | in_channels, 29 | num_agent=num_agent, 30 | compress_level=compress_level, 31 | only_v2i=only_v2i, 32 | ) 33 | 34 | self.layer_channel = layer_channel 35 | self.gnn_iter_num = gnn_iter_times 36 | self.convgru = convrnn.Conv2dGRU( 37 | in_channels=self.layer_channel * 2, 38 | out_channels=self.layer_channel, 39 | kernel_size=3, 40 | num_layers=1, 41 | bidirectional=False, 42 | dilation=1, 43 | stride=1, 44 | ) 45 | self.compress_level = compress_level 46 | 47 | def forward(self, bevs, trans_matrices, num_agent_tensor, batch_size=1): 48 | # trans_matrices [batch 5 5 4 4] 49 | # num_agent_tensor, shape: [batch, num_agent]; how many non-empty agent in this scene 50 | 51 | bevs = bevs.permute(0, 1, 4, 2, 3) # (Batch, seq, z, h, w) 52 | encoded_layers = self.u_encoder(bevs) 53 | device = bevs.device 54 | 55 | feat_maps, size = super().get_feature_maps_and_size(encoded_layers) 56 | # get feat maps for each agent [10 512 16 16] -> [2 5 512 16 16] 57 | feat_list = super().build_feature_list(batch_size, feat_maps) 58 | 59 | local_com_mat = super().build_local_communication_matrix( 60 | feat_list 61 | ) # [2 5 512 16 16] [batch, agent, channel, height, width] 62 | local_com_mat_update = super().build_local_communication_matrix( 63 | feat_list 64 | ) # to avoid the inplace operation 65 | 66 | for b in range(batch_size): 67 | num_agent = num_agent_tensor[b, 0] 68 | 69 | agent_feat_list = list() 70 | for nb in range(self.agent_num): 71 | agent_feat_list.append(local_com_mat[b, nb]) 72 | 73 | for _ in range(self.gnn_iter_num): 74 | 75 | updated_feats_list = [] 76 | 77 | for i in range(num_agent): 78 | self.neighbor_feat_list = [] 79 | all_warp = trans_matrices[b, i] # transformation [2 5 5 4 4] 80 | 81 | if super().outage(): 82 | updated_feats_list.append(agent_feat_list[i]) 83 | 84 | else: 85 | super().build_neighbors_feature_list( 86 | b, 87 | i, 88 | all_warp, 89 | num_agent, 90 | local_com_mat, 91 | device, 92 | size, 93 | trans_matrices, 94 | ) 95 | 96 | mean_feat = torch.mean( 97 | torch.stack(self.neighbor_feat_list), dim=0 98 | ) # [c, h, w] 99 | cat_feat = torch.cat([agent_feat_list[i], mean_feat], dim=0) 100 | cat_feat = cat_feat.unsqueeze(0).unsqueeze(0) # [1, 1, c, h, w] 101 | updated_feat, _ = self.convgru(cat_feat, None) 102 | updated_feat = torch.squeeze( 103 | torch.squeeze(updated_feat, 0), 0 104 | ) # [c, h, w] 105 | updated_feats_list.append(updated_feat) 106 | 107 | agent_feat_list = updated_feats_list 108 | 109 | for k in range(num_agent): 110 | local_com_mat_update[b, k] = agent_feat_list[k] 111 | 112 | feat_maps = super().agents_to_batch(local_com_mat_update) 113 | 114 | decoded_layers = super().get_decoded_layers( 115 | encoded_layers, feat_maps, batch_size 116 | ) 117 | x = decoded_layers[0] 118 | 119 | cls_pred, loc_preds, result = super().get_cls_loc_result(x) 120 | return result 121 | -------------------------------------------------------------------------------- /coperception/models/det/__init__.py: -------------------------------------------------------------------------------- 1 | from .DiscoNet import DiscoNet 2 | from .V2VNet import V2VNet 3 | from .When2com import When2com 4 | from .SumFusion import SumFusion 5 | from .MeanFusion import MeanFusion 6 | from .MaxFusion import MaxFusion 7 | from .CatFusion import CatFusion 8 | from .AgentWiseWeightedFusion import AgentWiseWeightedFusion 9 | from .TeacherNet import TeacherNet 10 | from .FaFNet import FaFNet 11 | -------------------------------------------------------------------------------- /coperception/models/det/backbone/Backbone.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import torch.nn as nn 3 | import torch 4 | 5 | 6 | class Backbone(nn.Module): 7 | """The backbone class that contains encode and decode function""" 8 | 9 | def __init__(self, height_feat_size, compress_level=0): 10 | super().__init__() 11 | self.conv_pre_1 = nn.Conv2d( 12 | height_feat_size, 32, kernel_size=3, stride=1, padding=1 13 | ) 14 | self.conv_pre_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1) 15 | self.bn_pre_1 = nn.BatchNorm2d(32) 16 | self.bn_pre_2 = nn.BatchNorm2d(32) 17 | 18 | self.conv3d_1 = Conv3D( 19 | 64, 64, kernel_size=(1, 1, 1), stride=1, padding=(0, 0, 0) 20 | ) 21 | self.conv3d_2 = Conv3D( 22 | 128, 128, kernel_size=(1, 1, 1), stride=1, padding=(0, 0, 0) 23 | ) 24 | 25 | self.conv1_1 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1) 26 | self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1) 27 | 28 | self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) 29 | self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) 30 | 31 | self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) 32 | self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 33 | 34 | self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1) 35 | self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) 36 | 37 | self.conv5_1 = nn.Conv2d(512 + 256, 256, kernel_size=3, stride=1, padding=1) 38 | self.conv5_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 39 | 40 | self.conv6_1 = nn.Conv2d(256 + 128, 128, kernel_size=3, stride=1, padding=1) 41 | self.conv6_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) 42 | 43 | self.conv7_1 = nn.Conv2d(128 + 64, 64, kernel_size=3, stride=1, padding=1) 44 | self.conv7_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1) 45 | 46 | self.conv8_1 = nn.Conv2d(64 + 32, 32, kernel_size=3, stride=1, padding=1) 47 | self.conv8_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1) 48 | 49 | self.bn1_1 = nn.BatchNorm2d(64) 50 | self.bn1_2 = nn.BatchNorm2d(64) 51 | 52 | self.bn2_1 = nn.BatchNorm2d(128) 53 | self.bn2_2 = nn.BatchNorm2d(128) 54 | 55 | self.bn3_1 = nn.BatchNorm2d(256) 56 | self.bn3_2 = nn.BatchNorm2d(256) 57 | 58 | self.bn4_1 = nn.BatchNorm2d(512) 59 | self.bn4_2 = nn.BatchNorm2d(512) 60 | 61 | self.bn5_1 = nn.BatchNorm2d(256) 62 | self.bn5_2 = nn.BatchNorm2d(256) 63 | 64 | self.bn6_1 = nn.BatchNorm2d(128) 65 | self.bn6_2 = nn.BatchNorm2d(128) 66 | 67 | self.bn7_1 = nn.BatchNorm2d(64) 68 | self.bn7_2 = nn.BatchNorm2d(64) 69 | 70 | self.bn8_1 = nn.BatchNorm2d(32) 71 | self.bn8_2 = nn.BatchNorm2d(32) 72 | 73 | self.compress_level = compress_level 74 | if compress_level > 0: 75 | assert compress_level <= 8 76 | compress_channel_num = 256 // (2**compress_level) 77 | 78 | # currently only support compress/decompress at layer x_3 79 | self.com_compresser = nn.Conv2d( 80 | 256, compress_channel_num, kernel_size=1, stride=1 81 | ) 82 | self.bn_compress = nn.BatchNorm2d(compress_channel_num) 83 | 84 | self.com_decompresser = nn.Conv2d( 85 | compress_channel_num, 256, kernel_size=1, stride=1 86 | ) 87 | self.bn_decompress = nn.BatchNorm2d(256) 88 | 89 | def encode(self, x): 90 | """Encode the input BEV features. 91 | 92 | Args: 93 | x (tensor): the input BEV features. 94 | 95 | Returns: 96 | A list that contains all the encoded layers. 97 | """ 98 | batch, seq, z, h, w = x.size() 99 | 100 | x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 101 | x = x.to(torch.float) 102 | x = F.relu(self.bn_pre_1(self.conv_pre_1(x))) 103 | x = F.relu(self.bn_pre_2(self.conv_pre_2(x))) 104 | 105 | # -------------------------------- Encoder Path -------------------------------- 106 | # -- STC block 1 107 | x_1 = F.relu(self.bn1_1(self.conv1_1(x))) 108 | x_1 = F.relu(self.bn1_2(self.conv1_2(x_1))) 109 | 110 | x_1 = x_1.view( 111 | batch, -1, x_1.size(1), x_1.size(2), x_1.size(3) 112 | ).contiguous() # (batch, seq, c, h, w) 113 | x_1 = self.conv3d_1(x_1) 114 | x_1 = x_1.view( 115 | -1, x_1.size(2), x_1.size(3), x_1.size(4) 116 | ).contiguous() # (batch * seq, c, h, w) 117 | 118 | # -- STC block 2 119 | x_2 = F.relu(self.bn2_1(self.conv2_1(x_1))) 120 | x_2 = F.relu(self.bn2_2(self.conv2_2(x_2))) 121 | 122 | x_2 = x_2.view( 123 | batch, -1, x_2.size(1), x_2.size(2), x_2.size(3) 124 | ).contiguous() # (batch, seq, c, h, w) 125 | x_2 = self.conv3d_2(x_2) 126 | x_2 = x_2.view( 127 | -1, x_2.size(2), x_2.size(3), x_2.size(4) 128 | ).contiguous() # (batch * seq, c, h, w), seq = 1 129 | 130 | # -- STC block 3 131 | x_3 = F.relu(self.bn3_1(self.conv3_1(x_2))) 132 | x_3 = F.relu(self.bn3_2(self.conv3_2(x_3))) 133 | 134 | # -- STC block 4 135 | x_4 = F.relu(self.bn4_1(self.conv4_1(x_3))) 136 | x_4 = F.relu(self.bn4_2(self.conv4_2(x_4))) 137 | 138 | # compress x_3 (the layer that agents communicates on) 139 | if self.compress_level > 0: 140 | x_3 = F.relu(self.bn_compress(self.com_compresser(x_3))) 141 | x_3 = F.relu(self.bn_decompress(self.com_decompresser(x_3))) 142 | 143 | return [x, x_1, x_2, x_3, x_4] 144 | 145 | def decode( 146 | self, 147 | x, 148 | x_1, 149 | x_2, 150 | x_3, 151 | x_4, 152 | batch, 153 | kd_flag=False, 154 | requires_adaptive_max_pool3d=False, 155 | ): 156 | """Decode the input features. 157 | 158 | Args: 159 | x (tensor): layer-0 features. 160 | x_1 (tensor): layer-1 features. 161 | x_2 (tensor): layer-2 features. 162 | x_3 (tensor): layer-3 features. 163 | x_4 (tensor): layer-4 featuers. 164 | batch (int): The batch size. 165 | kd_flag (bool, optional): Required to be true for DiscoNet. Defaults to False. 166 | requires_adaptive_max_pool3d (bool, optional): If set to true, use adaptive max pooling 3d. Defaults to False. 167 | 168 | Returns: 169 | if kd_flag is true, return a list of output from layer-8 to layer-5 170 | else return a list of a single element: the output after passing through the decoder 171 | """ 172 | # -------------------------------- Decoder Path -------------------------------- 173 | x_5 = F.relu( 174 | self.bn5_1( 175 | self.conv5_1( 176 | torch.cat((F.interpolate(x_4, scale_factor=(2, 2)), x_3), dim=1) 177 | ) 178 | ) 179 | ) 180 | x_5 = F.relu(self.bn5_2(self.conv5_2(x_5))) 181 | 182 | x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2), x_2.size(3)) 183 | x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous() 184 | x_2 = ( 185 | F.adaptive_max_pool3d(x_2, (1, None, None)) 186 | if requires_adaptive_max_pool3d 187 | else x_2 188 | ) 189 | x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous() 190 | x_2 = x_2.view(-1, x_2.size(2), x_2.size(3), x_2.size(4)).contiguous() 191 | 192 | x_6 = F.relu( 193 | self.bn6_1( 194 | self.conv6_1( 195 | torch.cat((F.interpolate(x_5, scale_factor=(2, 2)), x_2), dim=1) 196 | ) 197 | ) 198 | ) 199 | x_6 = F.relu(self.bn6_2(self.conv6_2(x_6))) 200 | 201 | x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)) 202 | x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous() 203 | x_1 = ( 204 | F.adaptive_max_pool3d(x_1, (1, None, None)) 205 | if requires_adaptive_max_pool3d 206 | else x_1 207 | ) 208 | x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous() 209 | x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous() 210 | 211 | x_7 = F.relu( 212 | self.bn7_1( 213 | self.conv7_1( 214 | torch.cat((F.interpolate(x_6, scale_factor=(2, 2)), x_1), dim=1) 215 | ) 216 | ) 217 | ) 218 | x_7 = F.relu(self.bn7_2(self.conv7_2(x_7))) 219 | 220 | x = x.view(batch, -1, x.size(1), x.size(2), x.size(3)) 221 | x = x.permute(0, 2, 1, 3, 4).contiguous() 222 | x = ( 223 | F.adaptive_max_pool3d(x, (1, None, None)) 224 | if requires_adaptive_max_pool3d 225 | else x 226 | ) 227 | x = x.permute(0, 2, 1, 3, 4).contiguous() 228 | x = x.view(-1, x.size(2), x.size(3), x.size(4)).contiguous() 229 | 230 | x_8 = F.relu( 231 | self.bn8_1( 232 | self.conv8_1( 233 | torch.cat((F.interpolate(x_7, scale_factor=(2, 2)), x), dim=1) 234 | ) 235 | ) 236 | ) 237 | res_x = F.relu(self.bn8_2(self.conv8_2(x_8))) 238 | 239 | if kd_flag: 240 | return [res_x, x_7, x_6, x_5] 241 | else: 242 | return [res_x] 243 | 244 | 245 | class STPN_KD(Backbone): 246 | """Used by non-intermediate models. Pass the output from encoder directly to decoder.""" 247 | 248 | def __init__(self, height_feat_size=13, compress_level=0): 249 | super().__init__(height_feat_size, compress_level) 250 | 251 | def forward(self, x): 252 | batch, seq, z, h, w = x.size() 253 | encoded_layers = super().encode(x) 254 | decoded_layers = super().decode( 255 | *encoded_layers, batch, kd_flag=True, requires_adaptive_max_pool3d=True 256 | ) 257 | return (*decoded_layers, encoded_layers[3], encoded_layers[4]) 258 | 259 | 260 | class LidarEncoder(Backbone): 261 | """The encoder class. Encodes input features in forward pass.""" 262 | 263 | def __init__(self, height_feat_size=13, compress_level=0): 264 | super().__init__(height_feat_size, compress_level) 265 | 266 | def forward(self, x): 267 | return super().encode(x) 268 | 269 | 270 | class LidarDecoder(Backbone): 271 | """The decoder class. Decodes input features in forward pass.""" 272 | 273 | def __init__(self, height_feat_size=13): 274 | super().__init__(height_feat_size) 275 | 276 | def forward(self, x, x_1, x_2, x_3, x_4, batch, kd_flag=False): 277 | return super().decode(x, x_1, x_2, x_3, x_4, batch, kd_flag) 278 | 279 | 280 | class Conv3D(nn.Module): 281 | """3D cnn used in the encoder.""" 282 | 283 | def __init__(self, in_channel, out_channel, kernel_size, stride, padding): 284 | super(Conv3D, self).__init__() 285 | self.conv3d = nn.Conv3d( 286 | in_channel, 287 | out_channel, 288 | kernel_size=kernel_size, 289 | stride=stride, 290 | padding=padding, 291 | ) 292 | self.bn3d = nn.BatchNorm3d(out_channel) 293 | 294 | def forward(self, x): 295 | # input x: (batch, seq, c, h, w) 296 | x = x.permute(0, 2, 1, 3, 4).contiguous() # (batch, c, seq_len, h, w) 297 | x = F.relu(self.bn3d(self.conv3d(x))) 298 | x = x.permute(0, 2, 1, 3, 4).contiguous() # (batch, seq_len, c, h, w) 299 | 300 | return x 301 | 302 | 303 | """""" """""" """""" """ 304 | Added by Yiming 305 | 306 | """ """""" """""" """""" 307 | 308 | 309 | class Conv2DBatchNormRelu(nn.Module): 310 | def __init__( 311 | self, 312 | in_channels, 313 | n_filters, 314 | k_size, 315 | stride, 316 | padding, 317 | bias=True, 318 | dilation=1, 319 | is_batchnorm=True, 320 | ): 321 | super(Conv2DBatchNormRelu, self).__init__() 322 | 323 | conv_mod = nn.Conv2d( 324 | int(in_channels), 325 | int(n_filters), 326 | kernel_size=k_size, 327 | padding=padding, 328 | stride=stride, 329 | bias=bias, 330 | dilation=dilation, 331 | ) 332 | 333 | if is_batchnorm: 334 | self.cbr_unit = nn.Sequential( 335 | conv_mod, nn.BatchNorm2d(int(n_filters)), nn.ReLU(inplace=True) 336 | ) 337 | else: 338 | self.cbr_unit = nn.Sequential(conv_mod, nn.ReLU(inplace=True)) 339 | 340 | def forward(self, inputs): 341 | outputs = self.cbr_unit(inputs) 342 | return outputs 343 | 344 | 345 | class Sparsemax(nn.Module): 346 | """Sparsemax function.""" 347 | 348 | def __init__(self, dim=None): 349 | """Initialize sparsemax activation 350 | 351 | Args: 352 | dim (int, optional): The dimension over which to apply the sparsemax function. 353 | """ 354 | super(Sparsemax, self).__init__() 355 | 356 | self.dim = -1 if dim is None else dim 357 | 358 | def forward(self, input): 359 | """Forward function. 360 | Args: 361 | input (torch.Tensor): Input tensor. First dimension should be the batch size 362 | Returns: 363 | torch.Tensor: [batch_size x number_of_logits] Output tensor 364 | """ 365 | # Sparsemax currently only handles 2-dim tensors, 366 | # so we reshape and reshape back after sparsemax 367 | original_size = input.size() 368 | input = input.view(-1, input.size(self.dim)) 369 | 370 | dim = 1 371 | number_of_logits = input.size(dim) 372 | 373 | # Translate input by max for numerical stability 374 | input = input - torch.max(input, dim=dim, keepdim=True)[0].expand_as(input) 375 | 376 | # Sort input in descending order. 377 | # (NOTE: Can be replaced with linear time selection method described here: 378 | # http://stanford.edu/~jduchi/projects/DuchiShSiCh08.html) 379 | zs = torch.sort(input=input, dim=dim, descending=True)[0] 380 | range = torch.range(start=1, end=number_of_logits, device=input.device).view( 381 | 1, -1 382 | ) 383 | range = range.expand_as(zs) 384 | 385 | # Determine sparsity of projection 386 | bound = 1 + range * zs 387 | cumulative_sum_zs = torch.cumsum(zs, dim) 388 | is_gt = torch.gt(bound, cumulative_sum_zs).type(input.type()) 389 | k = torch.max(is_gt * range, dim, keepdim=True)[0] 390 | 391 | # Compute threshold function 392 | zs_sparse = is_gt * zs 393 | 394 | # Compute taus 395 | taus = (torch.sum(zs_sparse, dim, keepdim=True) - 1) / k 396 | taus = taus.expand_as(input) 397 | 398 | # Sparsemax 399 | self.output = torch.max(torch.zeros_like(input), input - taus) 400 | 401 | output = self.output.view(original_size) 402 | 403 | return output 404 | 405 | def backward(self, grad_output): 406 | """Backward function.""" 407 | dim = 1 408 | 409 | nonzeros = torch.ne(self.output, 0) 410 | sum = torch.sum(grad_output * nonzeros, dim=dim) / torch.sum(nonzeros, dim=dim) 411 | self.grad_input = nonzeros * (grad_output - sum.expand_as(grad_output)) 412 | 413 | return self.grad_input 414 | -------------------------------------------------------------------------------- /coperception/models/det/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .Backbone import Backbone 2 | -------------------------------------------------------------------------------- /coperception/models/det/base/FusionBase.py: -------------------------------------------------------------------------------- 1 | from coperception.models.det.base.IntermediateModelBase import IntermediateModelBase 2 | 3 | 4 | class FusionBase(IntermediateModelBase): 5 | def __init__( 6 | self, 7 | config, 8 | layer=3, 9 | in_channels=13, 10 | kd_flag=True, 11 | num_agent=5, 12 | compress_level=0, 13 | only_v2i=False, 14 | ): 15 | super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i) 16 | self.num_agent = 0 17 | 18 | def fusion(self): 19 | raise NotImplementedError( 20 | "Please implement this method for specific fusion strategies" 21 | ) 22 | 23 | def forward(self, bevs, trans_matrices, num_agent_tensor, batch_size=1): 24 | 25 | bevs = bevs.permute(0, 1, 4, 2, 3) # (Batch, seq, z, h, w) 26 | encoded_layers = self.u_encoder(bevs) 27 | device = bevs.device 28 | 29 | feat_maps, size = super().get_feature_maps_and_size(encoded_layers) 30 | 31 | feat_list = super().build_feature_list(batch_size, feat_maps) 32 | 33 | local_com_mat = super().build_local_communication_matrix( 34 | feat_list 35 | ) # [2 5 512 16 16] [batch, agent, channel, height, width] 36 | local_com_mat_update = super().build_local_communication_matrix( 37 | feat_list 38 | ) # to avoid the inplace operation 39 | 40 | for b in range(batch_size): 41 | self.num_agent = num_agent_tensor[b, 0] 42 | for i in range(self.num_agent): 43 | self.tg_agent = local_com_mat[b, i] 44 | self.neighbor_feat_list = [] 45 | self.neighbor_feat_list.append(self.tg_agent) 46 | all_warp = trans_matrices[b, i] # transformation [2 5 5 4 4] 47 | 48 | super().build_neighbors_feature_list( 49 | b, 50 | i, 51 | all_warp, 52 | self.num_agent, 53 | local_com_mat, 54 | device, 55 | size, 56 | trans_matrices, 57 | ) 58 | 59 | # feature update 60 | local_com_mat_update[b, i] = self.fusion() 61 | 62 | # weighted feature maps is passed to decoder 63 | feat_fuse_mat = super().agents_to_batch(local_com_mat_update) 64 | 65 | decoded_layers = super().get_decoded_layers( 66 | encoded_layers, feat_fuse_mat, batch_size 67 | ) 68 | x = decoded_layers[0] 69 | 70 | cls_preds, loc_preds, result = super().get_cls_loc_result(x) 71 | 72 | if self.kd_flag == 1: 73 | return (result, *decoded_layers, feat_fuse_mat) 74 | else: 75 | return result 76 | -------------------------------------------------------------------------------- /coperception/models/det/base/IntermediateModelBase.py: -------------------------------------------------------------------------------- 1 | from coperception.models.det.backbone.Backbone import * 2 | from coperception.models.det.base.DetModelBase import DetModelBase 3 | 4 | 5 | class IntermediateModelBase(DetModelBase): 6 | """Abstract class. The parent class for all intermediate models. 7 | 8 | Attributes: 9 | u_encoder (nn.Module): The feature encoder. 10 | decoder (nn.Module): The feature decoder. 11 | """ 12 | 13 | def __init__( 14 | self, 15 | config, 16 | layer=3, 17 | in_channels=13, 18 | kd_flag=True, 19 | num_agent=5, 20 | compress_level=0, 21 | only_v2i=False, 22 | ): 23 | super().__init__(config, layer, in_channels, kd_flag, num_agent=num_agent, only_v2i=only_v2i) 24 | self.u_encoder = LidarEncoder(in_channels, compress_level) 25 | self.decoder = LidarDecoder(height_feat_size=in_channels) 26 | -------------------------------------------------------------------------------- /coperception/models/det/base/NonIntermediateModelBase.py: -------------------------------------------------------------------------------- 1 | from coperception.models.det.backbone.Backbone import * 2 | from coperception.models.det.base.DetModelBase import DetModelBase 3 | 4 | 5 | class NonIntermediateModelBase(DetModelBase): 6 | """Abstract class. The parent class for non-intermediate models. 7 | 8 | Attributes: 9 | stpn (nn.Module): Pass the features through encoder, then decoder. 10 | """ 11 | 12 | def __init__( 13 | self, 14 | config, 15 | layer=3, 16 | in_channels=13, 17 | kd_flag=True, 18 | num_agent=5, 19 | compress_level=0, 20 | ): 21 | super(NonIntermediateModelBase, self).__init__( 22 | config, layer, in_channels, kd_flag, num_agent 23 | ) 24 | self.stpn = STPN_KD(config.map_dims[2], compress_level) 25 | -------------------------------------------------------------------------------- /coperception/models/det/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .DetModelBase import DetModelBase 2 | from .FusionBase import FusionBase 3 | from .IntermediateModelBase import IntermediateModelBase 4 | from .NonIntermediateModelBase import NonIntermediateModelBase 5 | -------------------------------------------------------------------------------- /coperception/models/seg/AgentWiseWeightedFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from coperception.models.seg.FusionBase import FusionBase 6 | 7 | 8 | class AgentWiseWeightedFusion(FusionBase): 9 | def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False): 10 | super().__init__( 11 | n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i 12 | ) 13 | self.agent_weighted_fusion = AgentWeightedFusion() 14 | 15 | def fusion(self): 16 | agent_weight_list = list() 17 | for k in range(self.com_num_agent): 18 | cat_feat = torch.cat([self.tg_agent, self.neighbor_feat_list[k]], dim=0) 19 | cat_feat = cat_feat.unsqueeze(0) 20 | agent_weight = self.agent_weighted_fusion(cat_feat) 21 | agent_weight_list.append(agent_weight) 22 | 23 | soft_agent_weight_list = torch.squeeze( 24 | F.softmax(torch.tensor(agent_weight_list).unsqueeze(0), dim=1) 25 | ) 26 | 27 | agent_wise_weight_feat = 0 28 | for k in range(self.com_num_agent): 29 | agent_wise_weight_feat = ( 30 | agent_wise_weight_feat 31 | + soft_agent_weight_list[k] * self.neighbor_feat_list[k] 32 | ) 33 | 34 | return agent_wise_weight_feat 35 | 36 | 37 | # FIXME: Change size 38 | class AgentWeightedFusion(nn.Module): 39 | def __init__(self): 40 | super(AgentWeightedFusion, self).__init__() 41 | 42 | # self.conv1_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0) 43 | # self.bn1_1 = nn.BatchNorm2d(128) 44 | # 45 | # self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0) 46 | # self.bn1_2 = nn.BatchNorm2d(32) 47 | # 48 | # self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0) 49 | # self.bn1_3 = nn.BatchNorm2d(8) 50 | # 51 | # self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0) 52 | # 53 | # # self.conv1_1 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0) 54 | # # self.bn1_1 = nn.BatchNorm2d(1) 55 | # self.conv1_5 = nn.Conv2d(1, 1, kernel_size=32, stride=1, padding=0) 56 | # # # self.bn1_2 = nn.BatchNorm2d(1) 57 | 58 | self.conv1_1 = nn.Conv2d(1024, 128, kernel_size=1, stride=1, padding=0) 59 | self.bn1_1 = nn.BatchNorm2d(128) 60 | 61 | self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0) 62 | self.bn1_2 = nn.BatchNorm2d(32) 63 | 64 | self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0) 65 | self.bn1_3 = nn.BatchNorm2d(8) 66 | 67 | self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0) 68 | 69 | self.conv1_5 = nn.Conv2d(1, 1, kernel_size=32, stride=1, padding=0) 70 | 71 | def forward(self, x): 72 | x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 73 | x_1 = F.relu(self.bn1_1(self.conv1_1(x))) 74 | x_1 = F.relu(self.bn1_2(self.conv1_2(x_1))) 75 | x_1 = F.relu(self.bn1_3(self.conv1_3(x_1))) 76 | x_1 = F.relu(self.conv1_4(x_1)) 77 | x_1 = F.relu(self.conv1_5(x_1)) 78 | 79 | return x_1 80 | -------------------------------------------------------------------------------- /coperception/models/seg/CatFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from coperception.models.seg.FusionBase import FusionBase 6 | 7 | 8 | class CatFusion(FusionBase): 9 | def __init__(self, n_channels, n_classes, num_agent, compress_level, only_v2i): 10 | super().__init__( 11 | n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i 12 | ) 13 | self.modulation_layer_3 = ModulationLayer3() 14 | 15 | def fusion(self): 16 | mean_feat = torch.mean(torch.stack(self.neighbor_feat_list), dim=0) # [c, h, w] 17 | cat_feat = torch.cat([self.tg_agent, mean_feat], dim=0) 18 | cat_feat = cat_feat.unsqueeze(0) # [1, 1, c, h, w] 19 | return self.modulation_layer_3(cat_feat) 20 | 21 | 22 | # FIXME: Change size 23 | class ModulationLayer3(nn.Module): 24 | def __init__(self): 25 | super(ModulationLayer3, self).__init__() 26 | 27 | self.conv1_1 = nn.Conv2d(1024, 512, kernel_size=1, stride=1, padding=0) 28 | self.bn1_1 = nn.BatchNorm2d(512) 29 | 30 | def forward(self, x): 31 | x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 32 | x_1 = F.relu(self.bn1_1(self.conv1_1(x))) 33 | 34 | return x_1 35 | -------------------------------------------------------------------------------- /coperception/models/seg/DiscoNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from coperception.models.seg.FusionBase import FusionBase 6 | 7 | 8 | class DiscoNet(FusionBase): 9 | def __init__( 10 | self, n_channels, n_classes, num_agent, kd_flag=True, compress_level=0, only_v2i=False 11 | ): 12 | super().__init__( 13 | n_channels, 14 | n_classes, 15 | num_agent, 16 | kd_flag=kd_flag, 17 | compress_level=compress_level, 18 | only_v2i=only_v2i, 19 | ) 20 | self.pixel_weighted_fusion = PixelWeightedFusionSoftmax(512) 21 | 22 | # def forward(self, x, trans_matrices, num_agent_tensor): 23 | # device = x.device 24 | # x1 = self.inc(x) 25 | # x2 = self.down1(x1) 26 | # x3 = self.down2(x2) 27 | # x4 = self.down3(x3) # b 512 32 32 28 | # size = (1, 512, 32, 32) 29 | 30 | # batch_size = x.size(0) // self.num_agent 31 | # feat_map, feat_list = super().build_feat_map_and_feat_list(x4, batch_size) 32 | 33 | # local_com_mat = torch.cat(tuple(feat_list), 1) 34 | # local_com_mat_update = torch.cat(tuple(feat_list), 1) 35 | 36 | # for b in range(batch_size): 37 | # self.com_num_agent = num_agent_tensor[b, 0] 38 | 39 | # agent_feat_list = list() 40 | # for nb in range(self.num_agent): 41 | # agent_feat_list.append(local_com_mat[b, nb]) 42 | 43 | # for i in range(self.num_agent): 44 | # self.tg_agent = local_com_mat[b, i] 45 | # all_warp = trans_matrices[b, i] 46 | 47 | # self.neighbor_feat_list = list() 48 | # self.neighbor_feat_list.append(self.tg_agent) 49 | 50 | # for j in range(self.num_agent): 51 | # if j != i: 52 | # self.neighbor_feat_list.append( 53 | # super().feature_transformation( 54 | # b, j, local_com_mat, all_warp, device, size 55 | # ) 56 | # ) 57 | 58 | # local_com_mat_update[b, i] = self.fusion() 59 | 60 | # feat_list = [] 61 | # for i in range(self.num_agent): 62 | # feat_list.append(local_com_mat_update[:, i, :, :, :]) 63 | # feat_mat = torch.cat(feat_list, 0) 64 | 65 | # x5 = self.down4(feat_mat) 66 | # x6 = self.up1(x5, feat_mat) 67 | # x7 = self.up2(x6, x3) 68 | # x8 = self.up3(x7, x2) 69 | # x9 = self.up4(x8, x1) 70 | # logits = self.outc(x9) 71 | 72 | # if self.kd_flag: 73 | # return logits, x9, x8, x7, x6, x5, feat_mat 74 | # else: 75 | # return logits 76 | 77 | def fusion(self): 78 | tmp_agent_weight_list = list() 79 | sum_weight = 0 80 | nb_len = len(self.neighbor_feat_list) 81 | for k in range(nb_len): 82 | cat_feat = torch.cat([self.tg_agent, self.neighbor_feat_list[k]], dim=0) 83 | cat_feat = cat_feat.unsqueeze(0) 84 | agent_weight = torch.squeeze(self.pixel_weighted_fusion(cat_feat)) 85 | tmp_agent_weight_list.append(torch.exp(agent_weight)) 86 | sum_weight = sum_weight + torch.exp(agent_weight) 87 | 88 | agent_weight_list = list() 89 | for k in range(nb_len): 90 | agent_weight = torch.div(tmp_agent_weight_list[k], sum_weight) 91 | agent_weight.expand([256, -1, -1]) 92 | agent_weight_list.append(agent_weight) 93 | 94 | agent_wise_weight_feat = 0 95 | for k in range(nb_len): 96 | agent_wise_weight_feat = ( 97 | agent_wise_weight_feat 98 | + agent_weight_list[k] * self.neighbor_feat_list[k] 99 | ) 100 | 101 | return agent_wise_weight_feat 102 | 103 | 104 | class PixelWeightedFusionSoftmax(nn.Module): 105 | def __init__(self, channel): 106 | super(PixelWeightedFusionSoftmax, self).__init__() 107 | 108 | self.conv1_1 = nn.Conv2d(channel * 2, 128, kernel_size=1, stride=1, padding=0) 109 | self.bn1_1 = nn.BatchNorm2d(128) 110 | 111 | self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0) 112 | self.bn1_2 = nn.BatchNorm2d(32) 113 | 114 | self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0) 115 | self.bn1_3 = nn.BatchNorm2d(8) 116 | 117 | self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0) 118 | 119 | def forward(self, x): 120 | x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) 121 | x_1 = F.relu(self.bn1_1(self.conv1_1(x))) 122 | x_1 = F.relu(self.bn1_2(self.conv1_2(x_1))) 123 | x_1 = F.relu(self.bn1_3(self.conv1_3(x_1))) 124 | x_1 = F.relu(self.conv1_4(x_1)) 125 | 126 | return x_1 127 | -------------------------------------------------------------------------------- /coperception/models/seg/FusionBase.py: -------------------------------------------------------------------------------- 1 | from coperception.models.seg.SegModelBase import SegModelBase 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class FusionBase(SegModelBase): 7 | def __init__( 8 | self, n_channels, n_classes, num_agent=5, kd_flag=False, compress_level=0, only_v2i=False 9 | ): 10 | super().__init__( 11 | n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i 12 | ) 13 | self.neighbor_feat_list = None 14 | self.tg_agent = None 15 | self.current_num_agent = None 16 | self.kd_flag = kd_flag 17 | self.only_v2i = only_v2i 18 | 19 | def fusion(self): 20 | raise NotImplementedError( 21 | "Please implement this method for specific fusion strategies" 22 | ) 23 | 24 | def forward(self, x, trans_matrices, num_agent_tensor): 25 | x1 = self.inc(x) 26 | x2 = self.down1(x1) 27 | x3 = self.down2(x2) 28 | x4 = self.down3(x3) # b 512 32 32 29 | size = (1, 512, 32, 32) 30 | 31 | if self.compress_level > 0: 32 | x4 = F.relu(self.bn_compress(self.com_compresser(x4))) 33 | x4 = F.relu(self.bn_decompress(self.com_decompresser(x4))) 34 | 35 | batch_size = x.size(0) // self.num_agent 36 | feat_list = super().build_feat_list(x4, batch_size) 37 | 38 | local_com_mat = torch.cat(tuple(feat_list), 1) 39 | local_com_mat_update = torch.cat(tuple(feat_list), 1) 40 | 41 | for b in range(batch_size): 42 | self.com_num_agent = num_agent_tensor[b, 0] 43 | 44 | agent_feat_list = list() 45 | for nb in range(self.com_num_agent): 46 | agent_feat_list.append(local_com_mat[b, nb]) 47 | 48 | for i in range(self.com_num_agent): 49 | self.tg_agent = local_com_mat[b, i] 50 | 51 | self.neighbor_feat_list = list() 52 | self.neighbor_feat_list.append(self.tg_agent) 53 | 54 | for j in range(self.com_num_agent): 55 | if j != i: 56 | if self.only_v2i and i != 0 and j != 0: 57 | continue 58 | 59 | self.neighbor_feat_list.append( 60 | super().feature_transformation( 61 | b, 62 | j, 63 | i, 64 | local_com_mat, 65 | size, 66 | trans_matrices, 67 | ) 68 | ) 69 | 70 | local_com_mat_update[b, i] = self.fusion() 71 | 72 | feat_mat = super().agents_to_batch(local_com_mat_update) 73 | 74 | x5 = self.down4(feat_mat) 75 | x6 = self.up1(x5, feat_mat) 76 | x7 = self.up2(x6, x3) 77 | x8 = self.up3(x7, x2) 78 | x9 = self.up4(x8, x1) 79 | logits = self.outc(x9) 80 | 81 | if self.kd_flag: 82 | return logits, x9, x8, x7, x6, x5, feat_mat 83 | else: 84 | return logits 85 | -------------------------------------------------------------------------------- /coperception/models/seg/MaxFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from coperception.models.seg.FusionBase import FusionBase 4 | 5 | 6 | class MaxFusion(FusionBase): 7 | def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False): 8 | super().__init__( 9 | n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i 10 | ) 11 | 12 | def fusion(self): 13 | return torch.max(torch.stack(self.neighbor_feat_list), dim=0).values 14 | -------------------------------------------------------------------------------- /coperception/models/seg/MeanFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from coperception.models.seg.FusionBase import FusionBase 4 | 5 | 6 | class MeanFusion(FusionBase): 7 | def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False): 8 | super().__init__( 9 | n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i 10 | ) 11 | 12 | def fusion(self): 13 | return torch.mean(torch.stack(self.neighbor_feat_list), dim=0) 14 | -------------------------------------------------------------------------------- /coperception/models/seg/SegModelBase.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SegModelBase(nn.Module): 7 | def __init__( 8 | self, n_channels, n_classes, bilinear=True, num_agent=5, compress_level=0, only_v2i=False 9 | ): 10 | super().__init__() 11 | self.n_channels = n_channels 12 | self.n_classes = n_classes 13 | self.bilinear = bilinear 14 | self.num_agent = num_agent 15 | self.only_v2i = only_v2i 16 | 17 | self.inc = DoubleConv(n_channels, 64) 18 | self.down1 = Down(64, 128) 19 | self.down2 = Down(128, 256) 20 | self.down3 = Down(256, 512) 21 | factor = 2 if bilinear else 1 22 | self.down4 = Down(512, 1024 // factor) 23 | self.up1 = Up(1024, 512 // factor, bilinear) 24 | self.up2 = Up(512, 256 // factor, bilinear) 25 | self.up3 = Up(256, 128 // factor, bilinear) 26 | self.up4 = Up(128, 64, bilinear) 27 | self.outc = OutConv(64, n_classes) 28 | 29 | self.compress_level = compress_level 30 | if compress_level > 0: 31 | assert compress_level <= 9 32 | feat_map_channel_num = 512 33 | compress_channel_num = feat_map_channel_num // (2**compress_level) 34 | 35 | self.com_compresser = nn.Conv2d( 36 | feat_map_channel_num, compress_channel_num, kernel_size=1, stride=1 37 | ) 38 | self.bn_compress = nn.BatchNorm2d(compress_channel_num) 39 | 40 | self.com_decompresser = nn.Conv2d( 41 | compress_channel_num, feat_map_channel_num, kernel_size=1, stride=1 42 | ) 43 | self.bn_decompress = nn.BatchNorm2d(feat_map_channel_num) 44 | 45 | def build_feat_list(self, feat_maps, batch_size): 46 | feat_maps = torch.flip(feat_maps, (2,)) 47 | 48 | tmp_feat_map = {} 49 | feat_list = [] 50 | for i in range(self.num_agent): 51 | tmp_feat_map[i] = torch.unsqueeze( 52 | feat_maps[batch_size * i : batch_size * (i + 1)], 1 53 | ) 54 | feat_list.append(tmp_feat_map[i]) 55 | 56 | return feat_list 57 | 58 | @staticmethod 59 | def feature_transformation(b, j, agent_idx, local_com_mat, size, trans_matrices): 60 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 61 | nb_agent = torch.unsqueeze(local_com_mat[b, j], 0) 62 | 63 | tfm_ji = trans_matrices[b, j, agent_idx] 64 | M = ( 65 | torch.hstack((tfm_ji[:2, :2], -tfm_ji[:2, 3:4])).float().unsqueeze(0) 66 | ) # [1,2,3] 67 | M = M.to(device) 68 | 69 | mask = torch.tensor([[[1, 1, 4 / 128], [1, 1, 4 / 128]]], device=M.device) 70 | 71 | M *= mask 72 | 73 | grid = F.affine_grid(M, size=torch.Size(size)) 74 | warp_feat = F.grid_sample(nb_agent, grid).squeeze() 75 | return warp_feat 76 | 77 | def agents_to_batch(self, feats): 78 | feat_list = [] 79 | for i in range(self.num_agent): 80 | feat_list.append(feats[:, i, :, :, :]) 81 | feat_mat = torch.cat(feat_list, 0) 82 | 83 | feat_mat = torch.flip(feat_mat, (2,)) 84 | 85 | return feat_mat 86 | 87 | 88 | ################## 89 | # Unet # ref: https://github.com/milesial/Pytorch-UNet 90 | ################## 91 | class DoubleConv(nn.Module): 92 | def __init__(self, in_channels, out_channels, mid_channels=None): 93 | super().__init__() 94 | if not mid_channels: 95 | mid_channels = out_channels 96 | self.double_conv = nn.Sequential( 97 | nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1), 98 | nn.BatchNorm2d(mid_channels), 99 | nn.ReLU(inplace=True), 100 | nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1), 101 | nn.BatchNorm2d(out_channels), 102 | nn.ReLU(inplace=True), 103 | ) 104 | 105 | def forward(self, x): 106 | return self.double_conv(x) 107 | 108 | 109 | class Down(nn.Module): 110 | def __init__(self, in_channels, out_channels): 111 | super().__init__() 112 | self.maxpool_conv = nn.Sequential( 113 | nn.MaxPool2d(2), 114 | DoubleConv(in_channels, out_channels), 115 | ) 116 | 117 | def forward(self, x): 118 | return self.maxpool_conv(x) 119 | 120 | 121 | class Up(nn.Module): 122 | def __init__(self, in_channels, out_channels, bilinear=True): 123 | super().__init__() 124 | if bilinear: 125 | self.up = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) 126 | self.conv = DoubleConv(in_channels, out_channels, in_channels // 2) 127 | else: 128 | self.up = nn.ConvTranspose2d( 129 | in_channels, in_channels // 2, kernel_size=2, stride=2 130 | ) 131 | self.conv = DoubleConv(in_channels, out_channels) 132 | 133 | def forward(self, x1, x2): 134 | x1 = self.up(x1) 135 | diff_y = x2.size()[2] - x1.size()[2] 136 | diff_x = x2.size()[3] - x1.size()[3] 137 | 138 | x1 = F.pad( 139 | x1, [diff_x // 2, diff_x - diff_x // 2, diff_y // 2, diff_y - diff_y // 2] 140 | ) 141 | x = torch.cat([x2, x1], dim=1) 142 | return self.conv(x) 143 | 144 | 145 | class OutConv(nn.Module): 146 | def __init__(self, in_channels, out_channels): 147 | super(OutConv, self).__init__() 148 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) 149 | 150 | def forward(self, x): 151 | return self.conv(x) 152 | -------------------------------------------------------------------------------- /coperception/models/seg/SumFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from coperception.models.seg.FusionBase import FusionBase 4 | 5 | 6 | class SumFusion(FusionBase): 7 | def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False): 8 | super().__init__( 9 | n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i 10 | ) 11 | 12 | def fusion(self): 13 | return torch.sum(torch.stack(self.neighbor_feat_list), dim=0) 14 | -------------------------------------------------------------------------------- /coperception/models/seg/UNet.py: -------------------------------------------------------------------------------- 1 | from coperception.models.seg.SegModelBase import SegModelBase 2 | import torch.nn.functional as F 3 | 4 | 5 | class UNet(SegModelBase): 6 | def __init__( 7 | self, 8 | n_channels, 9 | n_classes, 10 | bilinear=True, 11 | num_agent=5, 12 | kd_flag=False, 13 | compress_level=0, 14 | ): 15 | super().__init__( 16 | n_channels, 17 | n_classes, 18 | bilinear, 19 | num_agent=num_agent, 20 | compress_level=compress_level, 21 | ) 22 | self.kd_flag = kd_flag 23 | 24 | def forward(self, x): 25 | x1 = self.inc(x) 26 | x2 = self.down1(x1) 27 | x3 = self.down2(x2) 28 | x4 = self.down3(x3) 29 | 30 | if self.compress_level > 0: 31 | x4 = F.relu(self.bn_compress(self.com_compresser(x4))) 32 | x4 = F.relu(self.bn_decompress(self.com_decompresser(x4))) 33 | 34 | x5 = self.down4(x4) 35 | x6 = self.up1(x5, x4) 36 | x7 = self.up2(x6, x3) 37 | x8 = self.up3(x7, x2) 38 | x9 = self.up4(x8, x1) 39 | logits = self.outc(x9) 40 | 41 | if self.kd_flag: 42 | return logits, x9, x8, x7, x6, x5, x4 43 | else: 44 | return logits 45 | -------------------------------------------------------------------------------- /coperception/models/seg/V2VNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import coperception.utils.convolutional_rnn as convrnn 4 | from coperception.models.seg.SegModelBase import SegModelBase 5 | import torch.nn.functional as F 6 | 7 | 8 | class V2VNet(SegModelBase): 9 | def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False): 10 | super().__init__( 11 | n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i 12 | ) 13 | self.layer_channel = 512 14 | self.gnn_iter_num = 1 15 | self.convgru = convrnn.Conv2dGRU( 16 | in_channels=self.layer_channel * 2, 17 | out_channels=self.layer_channel, 18 | kernel_size=3, 19 | num_layers=1, 20 | bidirectional=False, 21 | dilation=1, 22 | stride=1, 23 | ) 24 | 25 | def forward(self, x, trans_matrices, num_agent_tensor): 26 | x1 = self.inc(x) 27 | x2 = self.down1(x1) 28 | x3 = self.down2(x2) 29 | x4 = self.down3(x3) # b 512 32 32 30 | size = (1, 512, 32, 32) 31 | 32 | if self.compress_level > 0: 33 | x4 = F.relu(self.bn_compress(self.com_compresser(x4))) 34 | x4 = F.relu(self.bn_decompress(self.com_decompresser(x4))) 35 | 36 | batch_size = x.size(0) // self.num_agent 37 | feat_list = super().build_feat_list(x4, batch_size) 38 | 39 | local_com_mat = torch.cat(tuple(feat_list), 1) 40 | local_com_mat_update = torch.cat(tuple(feat_list), 1) 41 | 42 | for b in range(batch_size): 43 | com_num_agent = num_agent_tensor[b, 0] 44 | 45 | agent_feat_list = list() 46 | for nb in range(self.num_agent): 47 | agent_feat_list.append(local_com_mat[b, nb]) 48 | 49 | for _ in range(self.gnn_iter_num): 50 | updated_feats_list = list() 51 | 52 | for i in range(com_num_agent): 53 | tg_agent = local_com_mat[b, i] 54 | 55 | neighbor_feat_list = list() 56 | neighbor_feat_list.append(tg_agent) 57 | 58 | for j in range(com_num_agent): 59 | if j != i: 60 | if self.only_v2i and i != 0 and j != 0: 61 | continue 62 | 63 | neighbor_feat_list.append( 64 | super().feature_transformation( 65 | b, 66 | j, 67 | i, 68 | local_com_mat, 69 | size, 70 | trans_matrices, 71 | ) 72 | ) 73 | 74 | mean_feat = torch.mean(torch.stack(neighbor_feat_list), dim=0) 75 | cat_feat = torch.cat([agent_feat_list[i], mean_feat], dim=0) 76 | cat_feat = cat_feat.unsqueeze(0).unsqueeze(0) 77 | updated_feat, _ = self.convgru(cat_feat, None) 78 | updated_feat = torch.squeeze(torch.squeeze(updated_feat, 0), 0) 79 | updated_feats_list.append(updated_feat) 80 | agent_feat_list = updated_feats_list 81 | for k in range(com_num_agent): 82 | local_com_mat_update[b, k] = agent_feat_list[k] 83 | 84 | feat_mat = super().agents_to_batch(local_com_mat_update) 85 | 86 | x5 = self.down4(feat_mat) 87 | x = self.up1(x5, feat_mat) 88 | x = self.up2(x, x3) 89 | x = self.up3(x, x2) 90 | x = self.up4(x, x1) 91 | logits = self.outc(x) 92 | return logits 93 | -------------------------------------------------------------------------------- /coperception/models/seg/__init__.py: -------------------------------------------------------------------------------- 1 | from .SegModelBase import SegModelBase 2 | from .V2VNet import V2VNet 3 | from .When2Com_UNet import When2Com_UNet 4 | from .UNet import UNet 5 | from .FusionBase import FusionBase 6 | from .MeanFusion import MeanFusion 7 | from .MaxFusion import MaxFusion 8 | from .SumFusion import SumFusion 9 | from .CatFusion import CatFusion 10 | from .AgentWiseWeightedFusion import AgentWiseWeightedFusion 11 | from .DiscoNet import DiscoNet 12 | -------------------------------------------------------------------------------- /coperception/utils/AverageMeter.py: -------------------------------------------------------------------------------- 1 | class AverageMeter: 2 | def __init__(self, name, fmt=":f"): 3 | self.name = name 4 | self.fmt = fmt 5 | self.reset() 6 | self.val = 0 7 | self.avg = 0 8 | self.sum = 0 9 | self.count = 0 10 | 11 | def reset(self): 12 | self.val = 0 13 | self.avg = 0 14 | self.sum = 0 15 | self.count = 0 16 | 17 | def update(self, val, n=1): 18 | self.val = val 19 | self.sum += val * n 20 | self.count += n 21 | self.avg = self.sum / self.count 22 | 23 | def __str__(self): 24 | fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" 25 | return fmtstr.format(**self.__dict__) 26 | -------------------------------------------------------------------------------- /coperception/utils/SegMetrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from coperception.configs import Config 4 | 5 | 6 | def fast_hist(a, b, n): 7 | """ 8 | Return a histogram that's the confusion matrix of a and b 9 | :param a: np.ndarray with shape (HxW,) 10 | :param b: np.ndarray with shape (HxW,) 11 | :param n: num of classes 12 | :return: np.ndarray with shape (n, n) 13 | """ 14 | k = (a >= 0) & (a < n) 15 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n) 16 | 17 | 18 | def per_class_iu(hist): 19 | """ 20 | Calculate the IoU(Intersection over Union) for each class 21 | :param hist: np.ndarray with shape (n, n) 22 | :return: np.ndarray with shape (n,) 23 | """ 24 | np.seterr(divide="ignore", invalid="ignore") 25 | res = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) 26 | np.seterr(divide="warn", invalid="warn") 27 | res[np.isnan(res)] = 0.0 28 | return res 29 | 30 | 31 | class ComputeIoU(object): 32 | """ 33 | IoU: Intersection over Union 34 | """ 35 | 36 | def __init__(self, num_class): 37 | self.num_class = num_class 38 | self.cfsmatrix = np.zeros( 39 | (self.num_class, self.num_class), dtype="uint64" 40 | ) # confusion matrix 41 | self.ious = dict() 42 | self.config = Config(None) 43 | 44 | def get_cfsmatrix(self): 45 | return self.cfsmatrix 46 | 47 | def get_ious(self): 48 | ious_by_class = per_class_iu(self.cfsmatrix) 49 | self.ious = { 50 | self.config.class_idx_to_name[idx]: ious_by_class[idx] 51 | for idx in range(self.num_class) 52 | } 53 | 54 | return self.ious 55 | 56 | def get_miou(self, ignore=None): 57 | self.get_ious() 58 | total_iou = 0 59 | count = 0 60 | for key, value in self.ious.items(): 61 | if ( 62 | isinstance(ignore, list) 63 | and key in ignore 64 | or isinstance(ignore, int) 65 | and key == ignore 66 | ): 67 | continue 68 | total_iou += value 69 | count += 1 70 | return total_iou / count 71 | 72 | def __call__(self, pred, label): 73 | """ 74 | :param pred: [N, H, W] 75 | :param label: [N, H, W} 76 | Channel == 1 77 | """ 78 | 79 | pred = pred.cpu().numpy() 80 | # label = label.cpu().numpy() 81 | 82 | assert pred.shape == label.shape 83 | 84 | self.cfsmatrix += fast_hist( 85 | pred.reshape(-1), label.reshape(-1), self.num_class 86 | ).astype("uint64") 87 | -------------------------------------------------------------------------------- /coperception/utils/SegModule.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import torch.nn as nn 3 | import torch 4 | from coperception.utils.detection_util import * 5 | 6 | 7 | class SegModule(object): 8 | def __init__(self, model, teacher, config, optimizer, kd_flag): 9 | self.config = config 10 | self.model = model 11 | self.optimizer = optimizer 12 | self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( 13 | optimizer, T_max=self.config.nepoch 14 | ) 15 | # self.scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 100, 150, 200], gamma=0.5) 16 | self.criterion = nn.CrossEntropyLoss() 17 | self.teacher = teacher 18 | if kd_flag: 19 | for k, v in self.teacher.named_parameters(): 20 | v.requires_grad = False # fix parameters 21 | 22 | self.kd_flag = kd_flag 23 | 24 | self.com = config.com 25 | 26 | def resume(self, path): 27 | def map_func(storage, location): 28 | return storage.cuda() 29 | 30 | if os.path.isfile(path): 31 | if rank == 0: 32 | print("=> loading checkpoint '{}'".format(path)) 33 | 34 | checkpoint = torch.load(path, map_location=map_func) 35 | self.model.load_state_dict(checkpoint["state_dict"], strict=False) 36 | 37 | ckpt_keys = set(checkpoint["state_dict"].keys()) 38 | own_keys = set(model.state_dict().keys()) 39 | missing_keys = own_keys - ckpt_keys 40 | for k in missing_keys: 41 | print("caution: missing keys from checkpoint {}: {}".format(path, k)) 42 | else: 43 | print("=> no checkpoint found at '{}'".format(path)) 44 | 45 | def step(self, data, num_agent, batch_size, loss=True): 46 | bev = data["bev_seq"] 47 | labels = data["labels"] 48 | self.optimizer.zero_grad() 49 | bev = bev.permute(0, 3, 1, 2).contiguous() 50 | 51 | if not self.com: 52 | filtered_bev = [] 53 | filtered_label = [] 54 | for i in range(bev.size(0)): 55 | if torch.sum(bev[i]) > 1e-4: 56 | filtered_bev.append(bev[i]) 57 | filtered_label.append(labels[i]) 58 | bev = torch.stack(filtered_bev, 0) 59 | labels = torch.stack(filtered_label, 0) 60 | 61 | if self.kd_flag: 62 | data["bev_seq_teacher"] = ( 63 | data["bev_seq_teacher"].permute(0, 3, 1, 2).contiguous() 64 | ) 65 | 66 | if self.com: 67 | if self.kd_flag: 68 | pred, x9, x8, x7, x6, x5, fused_layer = self.model( 69 | bev, data["trans_matrices"], data["num_sensor"] 70 | ) 71 | elif self.config.flag.startswith("when2com") or self.config.flag.startswith( 72 | "who2com" 73 | ): 74 | if self.config.split == "train": 75 | pred = self.model( 76 | bev, data["trans_matrices"], data["num_sensor"], training=True 77 | ) 78 | else: 79 | pred = self.model( 80 | bev, 81 | data["trans_matrices"], 82 | data["num_sensor"], 83 | inference=self.config.inference, 84 | training=False, 85 | ) 86 | else: 87 | pred = self.model(bev, data["trans_matrices"], data["num_sensor"]) 88 | else: 89 | pred = self.model(bev) 90 | 91 | if self.com: 92 | filtered_pred = [] 93 | filtered_label = [] 94 | for i in range(bev.size(0)): 95 | if torch.sum(bev[i]) > 1e-4: 96 | filtered_pred.append(pred[i]) 97 | filtered_label.append(labels[i]) 98 | pred = torch.stack(filtered_pred, 0) 99 | labels = torch.stack(filtered_label, 0) 100 | if not loss: 101 | return pred, labels 102 | 103 | kd_loss = ( 104 | self.get_kd_loss(batch_size, data, fused_layer, num_agent, x5, x6, x7) 105 | if self.kd_flag 106 | else 0 107 | ) 108 | loss = self.criterion(pred, labels.long()) + kd_loss 109 | 110 | if isinstance(self.criterion, nn.DataParallel): 111 | loss = loss.mean() 112 | 113 | loss_data = loss.data.item() 114 | if np.isnan(loss_data): 115 | raise ValueError("loss is nan while training") 116 | 117 | loss.backward() 118 | self.optimizer.step() 119 | 120 | return pred, loss_data 121 | 122 | def get_kd_loss(self, batch_size, data, fused_layer, num_agent, x5, x6, x7): 123 | if not self.kd_flag: 124 | return 0 125 | 126 | bev_seq_teacher = data["bev_seq_teacher"].type(torch.cuda.FloatTensor) 127 | kd_weight = data["kd_weight"] 128 | ( 129 | logit_teacher, 130 | x9_teacher, 131 | x8_teacher, 132 | x7_teacher, 133 | x6_teacher, 134 | x5_teacher, 135 | x4_teacher, 136 | ) = self.teacher(bev_seq_teacher) 137 | kl_loss_mean = nn.KLDivLoss(size_average=True, reduce=True) 138 | 139 | target_x5 = x5_teacher.permute(0, 2, 3, 1).reshape( 140 | num_agent * batch_size * 16 * 16, -1 141 | ) 142 | student_x5 = x5.permute(0, 2, 3, 1).reshape( 143 | num_agent * batch_size * 16 * 16, -1 144 | ) 145 | kd_loss_x5 = kl_loss_mean( 146 | F.log_softmax(student_x5, dim=1), F.softmax(target_x5, dim=1) 147 | ) 148 | 149 | target_x6 = x6_teacher.permute(0, 2, 3, 1).reshape( 150 | num_agent * batch_size * 32 * 32, -1 151 | ) 152 | student_x6 = x6.permute(0, 2, 3, 1).reshape( 153 | num_agent * batch_size * 32 * 32, -1 154 | ) 155 | kd_loss_x6 = kl_loss_mean( 156 | F.log_softmax(student_x6, dim=1), F.softmax(target_x6, dim=1) 157 | ) 158 | 159 | target_x7 = x7_teacher.permute(0, 2, 3, 1).reshape( 160 | num_agent * batch_size * 64 * 64, -1 161 | ) 162 | student_x7 = x7.permute(0, 2, 3, 1).reshape( 163 | num_agent * batch_size * 64 * 64, -1 164 | ) 165 | kd_loss_x7 = kl_loss_mean( 166 | F.log_softmax(student_x7, dim=1), F.softmax(target_x7, dim=1) 167 | ) 168 | 169 | target_x4 = x4_teacher.permute(0, 2, 3, 1).reshape( 170 | num_agent * batch_size * 32 * 32, -1 171 | ) 172 | student_x4 = fused_layer.permute(0, 2, 3, 1).reshape( 173 | num_agent * batch_size * 32 * 32, -1 174 | ) 175 | kd_loss_fused_layer = kl_loss_mean( 176 | F.log_softmax(student_x4, dim=1), F.softmax(target_x4, dim=1) 177 | ) 178 | 179 | return kd_weight * (kd_loss_x5 + kd_loss_x6 + kd_loss_x7 + kd_loss_fused_layer) 180 | -------------------------------------------------------------------------------- /coperception/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | from .AverageMeter import AverageMeter 3 | -------------------------------------------------------------------------------- /coperception/utils/convolutional_rnn/__init__.py: -------------------------------------------------------------------------------- 1 | from .module import Conv1dRNN 2 | from .module import Conv1dLSTM 3 | from .module import Conv1dPeepholeLSTM 4 | from .module import Conv1dGRU 5 | 6 | from .module import Conv2dRNN 7 | from .module import Conv2dLSTM 8 | from .module import Conv2dPeepholeLSTM 9 | from .module import Conv2dGRU 10 | 11 | from .module import Conv3dRNN 12 | from .module import Conv3dLSTM 13 | from .module import Conv3dPeepholeLSTM 14 | from .module import Conv3dGRU 15 | 16 | from .module import Conv1dRNNCell 17 | from .module import Conv1dLSTMCell 18 | from .module import Conv1dPeepholeLSTMCell 19 | from .module import Conv1dGRUCell 20 | 21 | from .module import Conv2dRNNCell 22 | from .module import Conv2dLSTMCell 23 | from .module import Conv2dPeepholeLSTMCell 24 | from .module import Conv2dGRUCell 25 | 26 | from .module import Conv3dRNNCell 27 | from .module import Conv3dLSTMCell 28 | from .module import Conv3dPeepholeLSTMCell 29 | from .module import Conv3dGRUCell 30 | -------------------------------------------------------------------------------- /coperception/utils/convolutional_rnn/functional.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | try: 7 | # pytorch<=0.4.1 8 | from torch.nn._functions.thnn import rnnFusedPointwise as fusedBackend 9 | except ImportError: 10 | fusedBackend = None 11 | 12 | from .utils import _single, _pair, _triple 13 | 14 | 15 | def RNNReLUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None): 16 | """Copied from torch.nn._functions.rnn and modified""" 17 | if linear_func is None: 18 | linear_func = F.linear 19 | hy = F.relu(linear_func(input, w_ih, b_ih) + linear_func(hidden, w_hh, b_hh)) 20 | return hy 21 | 22 | 23 | def RNNTanhCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None): 24 | """Copied from torch.nn._functions.rnn and modified""" 25 | if linear_func is None: 26 | linear_func = F.linear 27 | hy = torch.tanh(linear_func(input, w_ih, b_ih) + linear_func(hidden, w_hh, b_hh)) 28 | return hy 29 | 30 | 31 | def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None): 32 | """Copied from torch.nn._functions.rnn and modified""" 33 | if linear_func is None: 34 | linear_func = F.linear 35 | if input.is_cuda and linear_func is F.linear and fusedBackend is not None: 36 | igates = linear_func(input, w_ih) 37 | hgates = linear_func(hidden[0], w_hh) 38 | state = fusedBackend.LSTMFused.apply 39 | return ( 40 | state(igates, hgates, hidden[1]) 41 | if b_ih is None 42 | else state(igates, hgates, hidden[1], b_ih, b_hh) 43 | ) 44 | 45 | hx, cx = hidden 46 | gates = linear_func(input, w_ih, b_ih) + linear_func(hx, w_hh, b_hh) 47 | ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) 48 | 49 | ingate = torch.sigmoid(ingate) 50 | forgetgate = torch.sigmoid(forgetgate) 51 | cellgate = torch.tanh(cellgate) 52 | outgate = torch.sigmoid(outgate) 53 | 54 | cy = (forgetgate * cx) + (ingate * cellgate) 55 | hy = outgate * torch.tanh(cy) 56 | 57 | return hy, cy 58 | 59 | 60 | def PeepholeLSTMCell( 61 | input, hidden, w_ih, w_hh, w_pi, w_pf, w_po, b_ih=None, b_hh=None, linear_func=None 62 | ): 63 | if linear_func is None: 64 | linear_func = F.linear 65 | hx, cx = hidden 66 | gates = linear_func(input, w_ih, b_ih) + linear_func(hx, w_hh, b_hh) 67 | ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) 68 | 69 | ingate += linear_func(cx, w_pi) 70 | forgetgate += linear_func(cx, w_pf) 71 | ingate = torch.sigmoid(ingate) 72 | forgetgate = torch.sigmoid(forgetgate) 73 | cellgate = torch.tanh(cellgate) 74 | 75 | cy = (forgetgate * cx) + (ingate * cellgate) 76 | outgate += linear_func(cy, w_po) 77 | outgate = torch.sigmoid(outgate) 78 | 79 | hy = outgate * torch.tanh(cy) 80 | 81 | return hy, cy 82 | 83 | 84 | def GRUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None): 85 | """Copied from torch.nn._functions.rnn and modified""" 86 | if linear_func is None: 87 | linear_func = F.linear 88 | if input.is_cuda and linear_func is F.linear and fusedBackend is not None: 89 | gi = linear_func(input, w_ih) 90 | gh = linear_func(hidden, w_hh) 91 | state = fusedBackend.GRUFused.apply 92 | return ( 93 | state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh) 94 | ) 95 | gi = linear_func(input, w_ih, b_ih) 96 | gh = linear_func(hidden, w_hh, b_hh) 97 | i_r, i_i, i_n = gi.chunk(3, 1) 98 | h_r, h_i, h_n = gh.chunk(3, 1) 99 | 100 | resetgate = torch.sigmoid(i_r + h_r) 101 | inputgate = torch.sigmoid(i_i + h_i) 102 | newgate = torch.tanh(i_n + resetgate * h_n) 103 | hy = newgate + inputgate * (hidden - newgate) 104 | 105 | return hy 106 | 107 | 108 | def StackedRNN(inners, num_layers, lstm=False, dropout=0, train=True): 109 | """Copied from torch.nn._functions.rnn and modified""" 110 | 111 | num_directions = len(inners) 112 | total_layers = num_layers * num_directions 113 | 114 | def forward(input, hidden, weight, batch_sizes): 115 | assert len(weight) == total_layers 116 | next_hidden = [] 117 | ch_dim = input.dim() - weight[0][0].dim() + 1 118 | 119 | if lstm: 120 | hidden = list(zip(*hidden)) 121 | 122 | for i in range(num_layers): 123 | all_output = [] 124 | for j, inner in enumerate(inners): 125 | l = i * num_directions + j 126 | 127 | hy, output = inner(input, hidden[l], weight[l], batch_sizes) 128 | next_hidden.append(hy) 129 | all_output.append(output) 130 | 131 | input = torch.cat(all_output, ch_dim) 132 | 133 | if dropout != 0 and i < num_layers - 1: 134 | input = F.dropout(input, p=dropout, training=train, inplace=False) 135 | 136 | if lstm: 137 | next_h, next_c = zip(*next_hidden) 138 | next_hidden = ( 139 | torch.cat(next_h, 0).view(total_layers, *next_h[0].size()), 140 | torch.cat(next_c, 0).view(total_layers, *next_c[0].size()), 141 | ) 142 | else: 143 | next_hidden = torch.cat(next_hidden, 0).view( 144 | total_layers, *next_hidden[0].size() 145 | ) 146 | 147 | return next_hidden, input 148 | 149 | return forward 150 | 151 | 152 | def Recurrent(inner, reverse=False): 153 | """Copied from torch.nn._functions.rnn without any modification""" 154 | 155 | def forward(input, hidden, weight, batch_sizes): 156 | output = [] 157 | steps = range(input.size(0) - 1, -1, -1) if reverse else range(input.size(0)) 158 | for i in steps: 159 | hidden = inner(input[i], hidden, *weight) 160 | # hack to handle LSTM 161 | output.append(hidden[0] if isinstance(hidden, tuple) else hidden) 162 | 163 | if reverse: 164 | output.reverse() 165 | output = torch.cat(output, 0).view(input.size(0), *output[0].size()) 166 | 167 | return hidden, output 168 | 169 | return forward 170 | 171 | 172 | def variable_recurrent_factory(inner, reverse=False): 173 | """Copied from torch.nn._functions.rnn without any modification""" 174 | if reverse: 175 | return VariableRecurrentReverse(inner) 176 | else: 177 | return VariableRecurrent(inner) 178 | 179 | 180 | def VariableRecurrent(inner): 181 | """Copied from torch.nn._functions.rnn without any modification""" 182 | 183 | def forward(input, hidden, weight, batch_sizes): 184 | output = [] 185 | input_offset = 0 186 | last_batch_size = batch_sizes[0] 187 | hiddens = [] 188 | flat_hidden = not isinstance(hidden, tuple) 189 | if flat_hidden: 190 | hidden = (hidden,) 191 | for batch_size in batch_sizes: 192 | step_input = input[input_offset : input_offset + batch_size] 193 | input_offset += batch_size 194 | 195 | dec = last_batch_size - batch_size 196 | if dec > 0: 197 | hiddens.append(tuple(h[-dec:] for h in hidden)) 198 | hidden = tuple(h[:-dec] for h in hidden) 199 | last_batch_size = batch_size 200 | 201 | if flat_hidden: 202 | hidden = (inner(step_input, hidden[0], *weight),) 203 | else: 204 | hidden = inner(step_input, hidden, *weight) 205 | 206 | output.append(hidden[0]) 207 | hiddens.append(hidden) 208 | hiddens.reverse() 209 | 210 | hidden = tuple(torch.cat(h, 0) for h in zip(*hiddens)) 211 | assert hidden[0].size(0) == batch_sizes[0] 212 | if flat_hidden: 213 | hidden = hidden[0] 214 | output = torch.cat(output, 0) 215 | 216 | return hidden, output 217 | 218 | return forward 219 | 220 | 221 | def VariableRecurrentReverse(inner): 222 | """Copied from torch.nn._functions.rnn without any modification""" 223 | 224 | def forward(input, hidden, weight, batch_sizes): 225 | output = [] 226 | input_offset = input.size(0) 227 | last_batch_size = batch_sizes[-1] 228 | initial_hidden = hidden 229 | flat_hidden = not isinstance(hidden, tuple) 230 | if flat_hidden: 231 | hidden = (hidden,) 232 | initial_hidden = (initial_hidden,) 233 | hidden = tuple(h[: batch_sizes[-1]] for h in hidden) 234 | for i in reversed(range(len(batch_sizes))): 235 | batch_size = batch_sizes[i] 236 | inc = batch_size - last_batch_size 237 | if inc > 0: 238 | hidden = tuple( 239 | torch.cat((h, ih[last_batch_size:batch_size]), 0) 240 | for h, ih in zip(hidden, initial_hidden) 241 | ) 242 | last_batch_size = batch_size 243 | step_input = input[input_offset - batch_size : input_offset] 244 | input_offset -= batch_size 245 | 246 | if flat_hidden: 247 | hidden = (inner(step_input, hidden[0], *weight),) 248 | else: 249 | hidden = inner(step_input, hidden, *weight) 250 | output.append(hidden[0]) 251 | 252 | output.reverse() 253 | output = torch.cat(output, 0) 254 | if flat_hidden: 255 | hidden = hidden[0] 256 | return hidden, output 257 | 258 | return forward 259 | 260 | 261 | def ConvNdWithSamePadding(convndim=2, stride=1, dilation=1, groups=1): 262 | def forward(input, w, b=None): 263 | if convndim == 1: 264 | ntuple = _single 265 | elif convndim == 2: 266 | ntuple = _pair 267 | elif convndim == 3: 268 | ntuple = _triple 269 | else: 270 | raise ValueError("convndim must be 1, 2, or 3, but got {}".format(convndim)) 271 | 272 | if input.dim() != convndim + 2: 273 | raise RuntimeError( 274 | "Input dim must be {}, bot got {}".format(convndim + 2, input.dim()) 275 | ) 276 | if w.dim() != convndim + 2: 277 | raise RuntimeError("w must be {}, bot got {}".format(convndim + 2, w.dim())) 278 | 279 | insize = input.shape[2:] 280 | kernel_size = w.shape[2:] 281 | _stride = ntuple(stride) 282 | _dilation = ntuple(dilation) 283 | 284 | ps = [ 285 | (i + 1 - h + s * (h - 1) + d * (k - 1)) // 2 286 | for h, k, s, d in list(zip(insize, kernel_size, _stride, _dilation))[::-1] 287 | for i in range(2) 288 | ] 289 | # Padding to make the output shape to have the same shape as the input 290 | input = F.pad(input, ps, "constant", 0) 291 | return getattr(F, "conv{}d".format(convndim))( 292 | input, 293 | w, 294 | b, 295 | stride=_stride, 296 | padding=ntuple(0), 297 | dilation=_dilation, 298 | groups=groups, 299 | ) 300 | 301 | return forward 302 | 303 | 304 | def _conv_cell_helper(mode, convndim=2, stride=1, dilation=1, groups=1): 305 | linear_func = ConvNdWithSamePadding( 306 | convndim=convndim, stride=stride, dilation=dilation, groups=groups 307 | ) 308 | 309 | if mode == "RNN_RELU": 310 | cell = partial(RNNReLUCell, linear_func=linear_func) 311 | elif mode == "RNN_TANH": 312 | cell = partial(RNNTanhCell, linear_func=linear_func) 313 | elif mode == "LSTM": 314 | cell = partial(LSTMCell, linear_func=linear_func) 315 | elif mode == "GRU": 316 | cell = partial(GRUCell, linear_func=linear_func) 317 | elif mode == "PeepholeLSTM": 318 | cell = partial(PeepholeLSTMCell, linear_func=linear_func) 319 | else: 320 | raise Exception("Unknown mode: {}".format(mode)) 321 | return cell 322 | 323 | 324 | def AutogradConvRNN( 325 | mode, 326 | num_layers=1, 327 | batch_first=False, 328 | dropout=0, 329 | train=True, 330 | bidirectional=False, 331 | variable_length=False, 332 | convndim=2, 333 | stride=1, 334 | dilation=1, 335 | groups=1, 336 | ): 337 | """Copied from torch.nn._functions.rnn and modified""" 338 | cell = _conv_cell_helper( 339 | mode, convndim=convndim, stride=stride, dilation=dilation, groups=groups 340 | ) 341 | 342 | rec_factory = variable_recurrent_factory if variable_length else Recurrent 343 | 344 | if bidirectional: 345 | layer = (rec_factory(cell), rec_factory(cell, reverse=True)) 346 | else: 347 | layer = (rec_factory(cell),) 348 | 349 | func = StackedRNN( 350 | layer, 351 | num_layers, 352 | (mode in ("LSTM", "PeepholeLSTM")), 353 | dropout=dropout, 354 | train=train, 355 | ) 356 | 357 | def forward(input, weight, hidden, batch_sizes): 358 | if batch_first and batch_sizes is None: 359 | input = input.transpose(0, 1) 360 | 361 | nexth, output = func(input, hidden, weight, batch_sizes) 362 | 363 | if batch_first and batch_sizes is None: 364 | output = output.transpose(0, 1) 365 | 366 | return output, nexth 367 | 368 | return forward 369 | -------------------------------------------------------------------------------- /coperception/utils/convolutional_rnn/utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from itertools import repeat 3 | 4 | 5 | """ Copied from torch.nn.modules.utils """ 6 | 7 | 8 | def _ntuple(n): 9 | def parse(x): 10 | if isinstance(x, collections.Iterable): 11 | return x 12 | return tuple(repeat(x, n)) 13 | 14 | return parse 15 | 16 | 17 | _single = _ntuple(1) 18 | _pair = _ntuple(2) 19 | _triple = _ntuple(3) 20 | _quadruple = _ntuple(4) 21 | -------------------------------------------------------------------------------- /coperception/utils/mapping.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coperception/double-m-quantification/aa5dda688302135ef95818f1757864652bab83b4/coperception/utils/mapping.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /coperception/utils/mapping.py: -------------------------------------------------------------------------------- 1 | def __bootstrap__(): 2 | global __bootstrap__, __loader__, __file__ 3 | import sys, pkg_resources, imp 4 | 5 | __file__ = pkg_resources.resource_filename( 6 | __name__, "mapping.cpython-37m-x86_64-linux-gnu.so" 7 | ) 8 | __loader__ = None 9 | del __bootstrap__, __loader__ 10 | imp.load_dynamic(__name__, __file__) 11 | 12 | 13 | __bootstrap__() 14 | -------------------------------------------------------------------------------- /coperception/utils/mbb_util.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | import torch.optim as optim 5 | from torch.utils.data import DataLoader 6 | from tqdm import tqdm 7 | from copy import deepcopy 8 | 9 | from coperception.datasets import V2XSimDet, MbbSampler 10 | from coperception.configs import Config, ConfigGlobal 11 | from coperception.utils.CoDetModule import * 12 | from coperception.utils.loss import * 13 | from coperception.models.det import * 14 | from coperception.utils import AverageMeter 15 | from coperception.utils.data_util import apply_pose_noise 16 | from coperception.utils.mean_ap import eval_map, get_residual_error_and_cov 17 | 18 | import glob 19 | import os 20 | 21 | def check_folder(folder_path): 22 | if not os.path.exists(folder_path): 23 | os.mkdir(folder_path) 24 | return folder_path 25 | 26 | def test_model(fafmodule, validation_data_loader, flag, device, config, epoch, args): 27 | fafmodule.model.eval() 28 | num_agent = args.num_agent 29 | apply_late_fusion = args.apply_late_fusion 30 | agent_idx_range = range(num_agent) if args.rsu else range(1, num_agent) 31 | save_epoch_path = check_folder(os.path.join(args.test_store, str(epoch))) 32 | save_fig_path = [ 33 | check_folder(os.path.join(save_epoch_path, f"vis{i}")) for i in agent_idx_range 34 | ] 35 | tracking_path = [ 36 | check_folder(os.path.join(save_epoch_path, f"result{i}")) 37 | for i in agent_idx_range 38 | ] 39 | 40 | # for local and global mAP evaluation 41 | det_results_local = [[] for i in agent_idx_range] 42 | annotations_local = [[] for i in agent_idx_range] 43 | 44 | if not args.rsu: 45 | num_agent -= 1 46 | tracking_file = [set()] * num_agent 47 | 48 | for cnt, sample in enumerate(validation_data_loader): 49 | t = time.time() 50 | ( 51 | padded_voxel_point_list, 52 | padded_voxel_points_teacher_list, 53 | label_one_hot_list, 54 | reg_target_list, 55 | reg_loss_mask_list, 56 | anchors_map_list, 57 | vis_maps_list, 58 | gt_max_iou, 59 | filenames, 60 | target_agent_id_list, 61 | num_agent_list, 62 | trans_matrices_list, 63 | ) = zip(*sample) 64 | 65 | print(filenames) 66 | 67 | filename0 = filenames[0] 68 | trans_matrices = torch.stack(tuple(trans_matrices_list), 1) 69 | target_agent_ids = torch.stack(tuple(target_agent_id_list), 1) 70 | num_all_agents = torch.stack(tuple(num_agent_list), 1) 71 | 72 | # add pose noise 73 | if args.pose_noise > 0: 74 | apply_pose_noise(args.pose_noise, trans_matrices) 75 | 76 | if not args.rsu: 77 | num_all_agents -= 1 78 | 79 | if flag == "upperbound": 80 | padded_voxel_points = torch.cat(tuple(padded_voxel_points_teacher_list), 0) 81 | else: 82 | padded_voxel_points = torch.cat(tuple(padded_voxel_point_list), 0) 83 | 84 | label_one_hot = torch.cat(tuple(label_one_hot_list), 0) 85 | reg_target = torch.cat(tuple(reg_target_list), 0) 86 | reg_loss_mask = torch.cat(tuple(reg_loss_mask_list), 0) 87 | anchors_map = torch.cat(tuple(anchors_map_list), 0) 88 | vis_maps = torch.cat(tuple(vis_maps_list), 0) 89 | 90 | data = { 91 | "bev_seq": padded_voxel_points.to(device), 92 | "labels": label_one_hot.to(device), 93 | "reg_targets": reg_target.to(device), 94 | "anchors": anchors_map.to(device), 95 | "vis_maps": vis_maps.to(device), 96 | "reg_loss_mask": reg_loss_mask.to(device).type(dtype=torch.bool), 97 | "target_agent_ids": target_agent_ids.to(device), 98 | "num_agent": num_all_agents.to(device), 99 | "trans_matrices": trans_matrices.to(device), 100 | } 101 | 102 | if flag == "lowerbound_box_com": 103 | loss, cls_loss, loc_loss, result = fafmodule.predict_all_with_box_com( 104 | data, data["trans_matrices"] 105 | ) 106 | elif flag == "disco": 107 | ( 108 | loss, 109 | cls_loss, 110 | loc_loss, 111 | result, 112 | save_agent_weight_list, 113 | ) = fafmodule.predict_all(data, 1, num_agent=num_agent) 114 | else: 115 | loss, cls_loss, loc_loss, result = fafmodule.predict_all( 116 | data, 1, num_agent=num_agent 117 | ) 118 | 119 | box_color_map = ["red", "yellow", "blue", "purple", "black", "orange"] 120 | 121 | # If has RSU, do not count RSU's output into evaluation 122 | eval_start_idx = 1 if args.rsu else 0 123 | 124 | # local qualitative evaluation 125 | for k in range(eval_start_idx, num_agent): 126 | box_colors = None 127 | if apply_late_fusion == 1 and len(result[k]) != 0: 128 | pred_restore = result[k][0][0][0]["pred"] 129 | score_restore = result[k][0][0][0]["score"] 130 | selected_idx_restore = result[k][0][0][0]["selected_idx"] 131 | 132 | data_agents = { 133 | "bev_seq": torch.unsqueeze(padded_voxel_points[k, :, :, :, :], 1), 134 | "reg_targets": torch.unsqueeze(reg_target[k, :, :, :, :, :], 0), 135 | "anchors": torch.unsqueeze(anchors_map[k, :, :, :, :], 0), 136 | } 137 | temp = gt_max_iou[k] 138 | 139 | if len(temp[0]["gt_box"]) == 0: 140 | data_agents["gt_max_iou"] = [] 141 | else: 142 | data_agents["gt_max_iou"] = temp[0]["gt_box"][0, :, :] 143 | 144 | # late fusion 145 | if apply_late_fusion == 1 and len(result[k]) != 0: 146 | box_colors = late_fusion( 147 | k, num_agent, result, trans_matrices, box_color_map 148 | ) 149 | 150 | result_temp = result[k] 151 | 152 | temp = { 153 | "bev_seq": data_agents["bev_seq"][0, -1].cpu().numpy(), 154 | "result": [] if len(result_temp) == 0 else result_temp[0][0], 155 | "reg_targets": data_agents["reg_targets"].cpu().numpy()[0], 156 | "anchors_map": data_agents["anchors"].cpu().numpy()[0], 157 | "gt_max_iou": data_agents["gt_max_iou"], 158 | } 159 | det_results_local[k], annotations_local[k], det_results_frame, annotations_frame = cal_local_mAP( 160 | config, temp, det_results_local[k], annotations_local[k], True 161 | ) 162 | 163 | filename = str(filename0[0][0]) 164 | cut = filename[filename.rfind("agent") + 7 :] 165 | seq_name = cut[: cut.rfind("_")] 166 | idx = cut[cut.rfind("_") + 1 : cut.rfind("/")] 167 | seq_save = os.path.join(save_fig_path[k], seq_name) 168 | check_folder(seq_save) 169 | idx_save = str(idx) + ".png" 170 | #temp_ = deepcopy(temp) 171 | if args.visualization: 172 | visualization( 173 | config, 174 | temp, 175 | box_colors, 176 | box_color_map, 177 | apply_late_fusion, 178 | os.path.join(seq_save, idx_save), 179 | ) 180 | 181 | # restore data before late-fusion 182 | if apply_late_fusion == 1 and len(result[k]) != 0: 183 | result[k][0][0][0]["pred"] = pred_restore 184 | result[k][0][0][0]["score"] = score_restore 185 | result[k][0][0][0]["selected_idx"] = selected_idx_restore 186 | 187 | print("Validation scene {}, at frame {}".format(seq_name, idx)) 188 | print("Takes {} s\n".format(str(time.time() - t))) 189 | 190 | log_file_path = os.path.join(args.test_store, "log_test.txt") 191 | if os.path.exists(log_file_path): 192 | log_file = open(log_file_path, "a") 193 | else: 194 | log_file = open(log_file_path, "w") 195 | 196 | def print_and_write_log(log_str): 197 | print(log_str) 198 | log_file.write(log_str + "\n") 199 | 200 | # local mAP evaluation 201 | det_results_all_local = [] 202 | annotations_all_local = [] 203 | mean_ap_5 = [] 204 | mean_ap_7 = [] 205 | mean_ap_all = [] 206 | for k in range(eval_start_idx, num_agent): 207 | if type(det_results_local[k]) != list or len(det_results_local[k]) == 0: 208 | continue 209 | 210 | print_and_write_log("Local mAP@0.5 from agent {}".format(k)) 211 | mean_ap, _ = eval_map( 212 | det_results_local[k], 213 | annotations_local[k], 214 | scale_ranges=None, 215 | iou_thr=0.5, 216 | dataset=None, 217 | logger=None, 218 | ) 219 | mean_ap_5.append(mean_ap) 220 | print_and_write_log("Local mAP@0.7 from agent {}".format(k)) 221 | 222 | mean_ap, _ = eval_map( 223 | det_results_local[k], 224 | annotations_local[k], 225 | scale_ranges=None, 226 | iou_thr=0.7, 227 | dataset=None, 228 | logger=None, 229 | ) 230 | mean_ap_7.append(mean_ap) 231 | 232 | det_results_all_local += det_results_local[k] 233 | annotations_all_local += annotations_local[k] 234 | 235 | npy_frame_file = os.path.join(save_epoch_path, "all_data.npy") 236 | det_res = {"det_results_frame": det_results_all_local, "annotations_frame": annotations_all_local} 237 | np.save(npy_frame_file, det_res) 238 | mean_ap_local_average, _ = eval_map( 239 | det_results_all_local, 240 | annotations_all_local, 241 | scale_ranges=None, 242 | iou_thr=0.5, 243 | dataset=None, 244 | logger=None, 245 | ) 246 | mean_ap_all.append(mean_ap_local_average) 247 | 248 | mean_ap_local_average, _ = eval_map( 249 | det_results_all_local, 250 | annotations_all_local, 251 | scale_ranges=None, 252 | iou_thr=0.7, 253 | dataset=None, 254 | logger=None, 255 | ) 256 | mean_ap_all.append(mean_ap_local_average) 257 | mean_ap_agents = [] 258 | mean_ap_agents.append(mean_ap_5) 259 | mean_ap_agents.append(mean_ap_7) 260 | 261 | print_and_write_log( 262 | "Quantitative evaluation results of model, at epoch {}".format( 263 | epoch 264 | ) 265 | ) 266 | 267 | print_and_write_log( 268 | "average local mAP@0.5 is {} and average local mAP@0.7 is {}".format( 269 | mean_ap_all[0], mean_ap_all[1] 270 | ) 271 | ) 272 | 273 | def computer_mbb_covar(args): 274 | start_epoch = 0 275 | end_epoch = args.nepoch 276 | res_diff = [] 277 | all_predicted_covariance = [] 278 | covar_flag = False 279 | iou_thr = 0.5 280 | for epoch in range(start_epoch, end_epoch+1): 281 | data_path = args.test_store + "/{}".format(epoch) +"/all_data.npy" 282 | print("Load data from {}".format(data_path)) 283 | data = np.load(data_path, allow_pickle=True) 284 | det_results_all_local = data.item()['det_results_frame'] 285 | annotations_all_local = data.item()['annotations_frame'] 286 | res_diff_one_epoch, predicted_covar = get_residual_error_and_cov(det_results_all_local, annotations_all_local, scale_ranges=None, iou_thr=iou_thr) 287 | res_diff.extend(res_diff_one_epoch) 288 | if predicted_covar != None: 289 | all_predicted_covariance.extend(predicted_covar) 290 | covar_flag = True 291 | print("Number of corners of all bounding box: {}".format(len(res_diff[epoch]))) 292 | res_diff_np = np.array(res_diff[0]) 293 | if covar_flag: 294 | all_predicted_covariance_np = np.array(all_predicted_covariance[0]) 295 | for i in range(1, len(res_diff)): 296 | res_diff_np = np.concatenate((res_diff_np, res_diff[i])) 297 | if covar_flag: 298 | all_predicted_covariance_np = np.concatenate((all_predicted_covariance_np, all_predicted_covariance[i])) 299 | print(res_diff_np.shape) 300 | print("covariance matrix for residual error:") 301 | covar_e = np.cov(res_diff_np.T) 302 | print(covar_e) 303 | save_data = {"covar_e":covar_e} 304 | if covar_flag: 305 | print(all_predicted_covariance_np.shape) 306 | print("mean of predicted covariance matrix:") 307 | covar_a = np.mean(all_predicted_covariance_np, axis=0) 308 | print(covar_a) 309 | save_data['covar_a'] = covar_a 310 | save_data_path = args.test_store + "/mbb_covar.npy" 311 | np.save(save_data_path, save_data) 312 | print("Save computed covariance in {}".format(save_data_path)) -------------------------------------------------------------------------------- /coperception/utils/min_norm_solvers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Mitsubishi Electric Research Laboratories (MERL). All rights reserved. The software, documentation and/or data in this file is provided on an "as is" basis, and MERL has no obligations to provide maintenance, support, updates, enhancements or modifications. MERL specifically disclaims any warranties, including, but not limited to, the implied warranties of merchantability and fitness for any particular purpose. In no event shall MERL be liable to any party for direct, indirect, special, incidental, or consequential damages, including lost profits, arising out of the use of this software and its documentation, even if MERL has been advised of the possibility of such damages. As more fully described in the license agreement that was required in order to download this software, documentation and/or data, permission to use, copy and modify this software without fee is granted, but only for educational, research and non-commercial purposes. 2 | 3 | # Original code from https://github.com/intel-isl/MultiObjectiveOptimization 4 | 5 | ############################################################################# 6 | # Note: The functions in this file require PyTorch 1.1 # 7 | ############################################################################# 8 | 9 | import numpy as np 10 | import torch 11 | 12 | 13 | class MinNormSolver: 14 | MAX_ITER = 250 15 | STOP_CRIT = 1e-5 16 | 17 | def _min_norm_element_from2(v1v1, v1v2, v2v2): 18 | """ 19 | Analytical solution for min_{c} |cx_1 + (1-c)x_2|_2^2 20 | d is the distance (objective) optimzed 21 | v1v1 = 22 | v1v2 = 23 | v2v2 = 24 | """ 25 | if v1v2 >= v1v1: 26 | # Case: Fig 1, third column 27 | gamma = 0.999 28 | cost = v1v1 29 | return gamma, cost 30 | if v1v2 >= v2v2: 31 | # Case: Fig 1, first column 32 | gamma = 0.001 33 | cost = v2v2 34 | return gamma, cost 35 | # Case: Fig 1, second column 36 | gamma = -1.0 * ((v1v2 - v2v2) / (v1v1 + v2v2 - 2 * v1v2)) 37 | cost = v2v2 + gamma * (v1v2 - v2v2) 38 | return gamma, cost 39 | 40 | def _min_norm_2d(vecs, dps): 41 | """ 42 | Find the minimum norm solution as combination of two points 43 | This is correct only in 2D 44 | ie. min_c |\sum c_i x_i|_2^2 st. \sum c_i = 1 , 1 >= c_1 >= 0 for all i, c_i + c_j = 1.0 for some i, j 45 | """ 46 | dmin = 1e8 47 | for i in range(len(vecs)): 48 | for j in range(i + 1, len(vecs)): 49 | if (i, j) not in dps: 50 | dps[(i, j)] = 0.0 51 | for k in range(len(vecs[i])): 52 | dps[(i, j)] += torch.sum(vecs[i][k] * vecs[j][k]).item() 53 | dps[(j, i)] = dps[(i, j)] 54 | if (i, i) not in dps: 55 | dps[(i, i)] = 0.0 56 | for k in range(len(vecs[i])): 57 | dps[(i, i)] += torch.sum(vecs[i][k] * vecs[i][k]).item() 58 | if (j, j) not in dps: 59 | dps[(j, j)] = 0.0 60 | for k in range(len(vecs[i])): 61 | dps[(j, j)] += torch.sum(vecs[j][k] * vecs[j][k]).item() 62 | c, d = MinNormSolver._min_norm_element_from2( 63 | dps[(i, i)], dps[(i, j)], dps[(j, j)] 64 | ) 65 | if d < dmin: 66 | dmin = d 67 | sol = [(i, j), c, d] 68 | return sol, dps 69 | 70 | def _projection2simplex(y): 71 | """ 72 | Given y, it solves argmin_z |y-z|_2 st \sum z = 1 , 1 >= z_i >= 0 for all i 73 | """ 74 | m = len(y) 75 | sorted_y = np.flip(np.sort(y), axis=0) 76 | tmpsum = 0.0 77 | tmax_f = (np.sum(y) - 1.0) / m 78 | for i in range(m - 1): 79 | tmpsum += sorted_y[i] 80 | tmax = (tmpsum - 1) / (i + 1.0) 81 | if tmax > sorted_y[i + 1]: 82 | tmax_f = tmax 83 | break 84 | return np.maximum(y - tmax_f, np.zeros(y.shape)) 85 | 86 | def _next_point(cur_val, grad, n): 87 | proj_grad = grad - (np.sum(grad) / n) 88 | tm1 = -1.0 * cur_val[proj_grad < 0] / proj_grad[proj_grad < 0] 89 | tm2 = (1.0 - cur_val[proj_grad > 0]) / (proj_grad[proj_grad > 0]) 90 | 91 | # skippers = np.sum(tm1 < 1e-7) + np.sum(tm2 < 1e-7) 92 | t = 1 93 | if len(tm1[tm1 > 1e-7]) > 0: 94 | t = np.min(tm1[tm1 > 1e-7]) 95 | if len(tm2[tm2 > 1e-7]) > 0: 96 | t = min(t, np.min(tm2[tm2 > 1e-7])) 97 | 98 | next_point = proj_grad * t + cur_val 99 | next_point = MinNormSolver._projection2simplex(next_point) 100 | return next_point 101 | 102 | def find_min_norm_element(vecs): 103 | """ 104 | Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull 105 | as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. 106 | It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) 107 | Hence, we find the best 2-task solution, and then run the projected gradient descent until convergence 108 | """ 109 | # Solution lying at the combination of two points 110 | dps = {} 111 | init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) 112 | 113 | n = len(vecs) 114 | sol_vec = np.zeros(n) 115 | sol_vec[init_sol[0][0]] = init_sol[1] 116 | sol_vec[init_sol[0][1]] = 1 - init_sol[1] 117 | 118 | if n < 3: 119 | # This is optimal for n=2, so return the solution 120 | return sol_vec, init_sol[2] 121 | 122 | iter_count = 0 123 | 124 | grad_mat = np.zeros((n, n)) 125 | for i in range(n): 126 | for j in range(n): 127 | grad_mat[i, j] = dps[(i, j)] 128 | 129 | while iter_count < MinNormSolver.MAX_ITER: 130 | grad_dir = -1.0 * np.dot(grad_mat, sol_vec) 131 | new_point = MinNormSolver._next_point(sol_vec, grad_dir, n) 132 | # Re-compute the inner products for line search 133 | v1v1 = 0.0 134 | v1v2 = 0.0 135 | v2v2 = 0.0 136 | for i in range(n): 137 | for j in range(n): 138 | v1v1 += sol_vec[i] * sol_vec[j] * dps[(i, j)] 139 | v1v2 += sol_vec[i] * new_point[j] * dps[(i, j)] 140 | v2v2 += new_point[i] * new_point[j] * dps[(i, j)] 141 | nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) 142 | new_sol_vec = nc * sol_vec + (1 - nc) * new_point 143 | change = new_sol_vec - sol_vec 144 | if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: 145 | return sol_vec, nd 146 | sol_vec = new_sol_vec 147 | 148 | def find_min_norm_element_FW(vecs): 149 | """ 150 | Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull 151 | as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. 152 | It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) 153 | Hence, we find the best 2-task solution, and then run the Frank Wolfe until convergence 154 | """ 155 | # Solution lying at the combination of two points 156 | dps = {} 157 | init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) 158 | 159 | n = len(vecs) 160 | sol_vec = np.zeros(n) 161 | sol_vec[init_sol[0][0]] = init_sol[1] 162 | sol_vec[init_sol[0][1]] = 1 - init_sol[1] 163 | 164 | if n < 3: 165 | # This is optimal for n=2, so return the solution 166 | return sol_vec, init_sol[2] 167 | 168 | iter_count = 0 169 | 170 | grad_mat = np.zeros((n, n)) 171 | for i in range(n): 172 | for j in range(n): 173 | grad_mat[i, j] = dps[(i, j)] 174 | 175 | while iter_count < MinNormSolver.MAX_ITER: 176 | t_iter = np.argmin(np.dot(grad_mat, sol_vec)) 177 | 178 | v1v1 = np.dot(sol_vec, np.dot(grad_mat, sol_vec)) 179 | v1v2 = np.dot(sol_vec, grad_mat[:, t_iter]) 180 | v2v2 = grad_mat[t_iter, t_iter] 181 | 182 | nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) 183 | new_sol_vec = nc * sol_vec 184 | new_sol_vec[t_iter] += 1 - nc 185 | 186 | change = new_sol_vec - sol_vec 187 | if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: 188 | return sol_vec, nd 189 | sol_vec = new_sol_vec 190 | 191 | 192 | def gradient_normalizers(grads, losses, normalization_type): 193 | gn = {} 194 | if normalization_type == "l2": 195 | for t in grads: 196 | gn[t] = np.sqrt(np.sum([gr.pow(2).sum().item() for gr in grads[t]])) 197 | elif normalization_type == "loss": 198 | for t in grads: 199 | gn[t] = losses[t] 200 | elif normalization_type == "loss+": 201 | for t in grads: 202 | gn[t] = losses[t] * np.sqrt( 203 | np.sum([gr.pow(2).sum().item() for gr in grads[t]]) 204 | ) 205 | elif normalization_type == "none": 206 | for t in grads: 207 | gn[t] = 1.0 208 | else: 209 | print("ERROR: Invalid Normalization Type") 210 | return gn 211 | -------------------------------------------------------------------------------- /coperception/utils/postprocess.py: -------------------------------------------------------------------------------- 1 | """ 2 | Non Max Suppression 3 | IOU, Recall, Precision, Find overlap and Average Precisions 4 | Source Code is adapted from github.com/matterport/MaskRCNN 5 | """ 6 | 7 | import numpy as np 8 | import torch 9 | from shapely.geometry import Polygon 10 | 11 | 12 | def convert_format(boxes_array): 13 | """ 14 | :param array: an array of shape [# bboxs, 4, 2] 15 | :return: a shapely.geometry.Polygon object 16 | """ 17 | 18 | polygons = [ 19 | Polygon([(box[i, 0], box[i, 1]) for i in range(4)]) for box in boxes_array 20 | ] 21 | return np.array(polygons) 22 | 23 | 24 | def compute_overlaps(boxes1, boxes2): 25 | """Computes IoU overlaps between two sets of boxes. 26 | boxes1, boxes2: a np array of boxes 27 | For better performance, pass the largest set first and the smaller second. 28 | :return: a matrix of overlaps [boxes1 count, boxes2 count] 29 | """ 30 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 31 | # Each cell contains the IoU value. 32 | 33 | boxes1 = convert_format(boxes1) 34 | boxes2 = convert_format(boxes2) 35 | overlaps = np.zeros((len(boxes1), len(boxes2))) 36 | for i in range(overlaps.shape[1]): 37 | box2 = boxes2[i] 38 | overlaps[:, i] = compute_iou(box2, boxes1) 39 | return overlaps 40 | 41 | 42 | def compute_iou(box, boxes): 43 | """Calculates IoU of the given box with the array of the given boxes. 44 | box: a polygon 45 | boxes: a vector of polygons 46 | Note: the areas are passed in rather than calculated here for 47 | efficiency. Calculate once in the caller to avoid duplicate work. 48 | """ 49 | # Calculate intersection areas 50 | iou = [box.intersection(b).area / box.union(b).area for b in boxes] 51 | 52 | return np.array(iou, dtype=np.float32) 53 | 54 | 55 | def compute_recall(pred_boxes, gt_boxes, iou): 56 | """Compute the recall at the given IoU threshold. It's an indication 57 | of how many GT boxes were found by the given prediction boxes. 58 | pred_boxes: a list of predicted Polygons of size N 59 | gt_boxes: a list of ground truth Polygons of size N 60 | """ 61 | # Measure overlaps 62 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 63 | iou_max = np.max(overlaps, axis=1) 64 | iou_argmax = np.argmax(overlaps, axis=1) 65 | positive_ids = np.where(iou_max >= iou)[0] 66 | matched_gt_boxes = iou_argmax[positive_ids] 67 | 68 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 69 | return recall, positive_ids 70 | 71 | 72 | def non_max_suppression(boxes, scores, threshold): 73 | """Performs non-maximum suppression and returns indices of kept boxes. 74 | scores: 1-D array of box scores. 75 | threshold: Float. IoU threshold to use for filtering. 76 | return an numpy array of the positions of picks 77 | """ 78 | assert boxes.shape[0] > 0 79 | if boxes.dtype.kind != "f": 80 | boxes = boxes.astype(np.float32) 81 | 82 | # Get indicies of boxes sorted by scores (highest first) 83 | # ixs = scores.argsort()[::-1][:top] 84 | fil_id = np.where(scores > 0.7)[0] 85 | ixs_sort = scores[fil_id].argsort()[::-1] 86 | # print(fil_id) 87 | ixs = [] 88 | for i in range(len(fil_id)): 89 | ixs.append(fil_id[ixs_sort[i]]) 90 | 91 | polygons = convert_format(boxes[ixs]) 92 | iter_ixs = [ii for ii in range(len(polygons))] 93 | 94 | pick = [] 95 | # print('ori: ',len(ixs)) 96 | while len(iter_ixs) > 0: 97 | # Pick top box and add its index to the list 98 | i = iter_ixs[0] 99 | pick.append(ixs[i]) 100 | # Compute IoU of the picked box with the rest 101 | iou = compute_iou(polygons[i], polygons[iter_ixs[1:]]) 102 | # Identify boxes with IoU over the threshold. This 103 | # returns indices into ixs[1:], so add 1 to get 104 | # indices into ixs. 105 | 106 | remove_ixs = np.where(iou > threshold)[0] + 1 107 | 108 | # Remove indices of the picked and overlapped boxes. 109 | iter_ixs = np.delete(iter_ixs, remove_ixs) 110 | iter_ixs = np.delete(iter_ixs, 0) 111 | 112 | print("selected: ", len(pick)) 113 | return np.array(pick, dtype=np.int32) 114 | 115 | 116 | def filter_pred(config, pred): 117 | if len(pred.size()) == 4: 118 | if pred.size(0) == 1: 119 | pred.squeeze_(0) 120 | else: 121 | raise ValueError("Tensor dimension is not right") 122 | 123 | cls_pred = pred[0, ...] 124 | activation = cls_pred > config["cls_threshold"] 125 | num_boxes = int(activation.sum()) 126 | 127 | if num_boxes == 0: 128 | # print("No bounding box found") 129 | return [], [] 130 | 131 | corners = torch.zeros((num_boxes, 8)) 132 | for i in range(7, 15): 133 | corners[:, i - 7] = torch.masked_select(pred[i, ...], activation) 134 | corners = corners.view(-1, 4, 2).numpy() 135 | scores = torch.masked_select(cls_pred, activation).cpu().numpy() 136 | 137 | # NMS 138 | selected_ids = non_max_suppression(corners, scores, config["nms_iou_threshold"]) 139 | corners = corners[selected_ids] 140 | scores = scores[selected_ids] 141 | 142 | return corners, scores 143 | 144 | 145 | def compute_ap_range( 146 | gt_box, 147 | gt_class_id, 148 | pred_box, 149 | pred_class_id, 150 | pred_score, 151 | iou_thresholds=None, 152 | verbose=1, 153 | ): 154 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" 155 | # Default is 0.5 to 0.95 with increments of 0.05 156 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) 157 | 158 | # Compute AP over range of IoU thresholds 159 | AP = [] 160 | for iou_threshold in iou_thresholds: 161 | ap, precisions, recalls, overlaps = compute_ap( 162 | gt_box, 163 | gt_class_id, 164 | pred_box, 165 | pred_class_id, 166 | pred_score, 167 | iou_threshold=iou_threshold, 168 | ) 169 | if verbose: 170 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) 171 | AP.append(ap) 172 | AP = np.array(AP).mean() 173 | if verbose: 174 | print( 175 | "AP @{:.2f}-{:.2f}:\t {:.3f}".format( 176 | iou_thresholds[0], iou_thresholds[-1], AP 177 | ) 178 | ) 179 | return AP 180 | 181 | 182 | def compute_ap(pred_match, num_gt, num_pred): 183 | 184 | assert num_gt != 0 185 | assert num_pred != 0 186 | tp = (pred_match > -1).sum() 187 | # Compute precision and recall at each prediction box step 188 | precisions = np.cumsum(pred_match > -1) / (np.arange(num_pred) + 1) 189 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / num_gt 190 | 191 | # Pad with start and end values to simplify the math 192 | precisions = np.concatenate([[0], precisions, [0]]) 193 | recalls = np.concatenate([[0], recalls, [1]]) 194 | 195 | # Ensure precision values decrease but don't increase. This way, the 196 | # precision value at each recall threshold is the maximum it can be 197 | # for all following recall thresholds, as specified by the VOC paper. 198 | for i in range(len(precisions) - 2, -1, -1): 199 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 200 | 201 | # Compute mean AP over recall range 202 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 203 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * precisions[indices]) 204 | precision = tp / num_pred 205 | recall = tp / num_gt 206 | return mAP, precisions, recalls, precision, recall 207 | 208 | 209 | def compute_matches( 210 | gt_boxes, pred_boxes, pred_scores, iou_threshold=0.5, score_threshold=0.0 211 | ): 212 | """Finds matches between prediction and ground truth instances. 213 | Returns: 214 | gt_match: 1-D array. For each GT box it has the index of the matched 215 | predicted box. 216 | pred_match: 1-D array. For each predicted box, it has the index of 217 | the matched ground truth box. 218 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 219 | """ 220 | 221 | if len(pred_scores) == 0: 222 | return -1 * np.ones([gt_boxes.shape[0]]), np.array([]), np.array([]) 223 | 224 | gt_class_ids = np.ones(len(gt_boxes), dtype=int) 225 | pred_class_ids = np.ones(len(pred_scores), dtype=int) 226 | 227 | # Sort predictions by score from high to low 228 | indices = np.argsort(pred_scores)[::-1] 229 | pred_boxes = pred_boxes[indices] 230 | pred_class_ids = pred_class_ids[indices] 231 | pred_scores = pred_scores[indices] 232 | 233 | # Compute IoU overlaps [pred_boxes, gt_boxes] 234 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 235 | 236 | # Loop through predictions and find matching ground truth boxes 237 | match_count = 0 238 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 239 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 240 | for i in range(len(pred_boxes)): 241 | # Find best matching ground truth box 242 | # 1. Sort matches by score 243 | sorted_ixs = np.argsort(overlaps[i])[::-1] 244 | # 2. Remove low scores 245 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 246 | if low_score_idx.size > 0: 247 | sorted_ixs = sorted_ixs[: low_score_idx[0]] 248 | # 3. Find the match 249 | for j in sorted_ixs: 250 | # If ground truth box is already matched, go to next one 251 | if gt_match[j] > 0: 252 | continue 253 | # If we reach IoU smaller than the threshold, end the loop 254 | iou = overlaps[i, j] 255 | if iou < iou_threshold: 256 | break 257 | # Do we have a match? 258 | if pred_class_ids[i] == gt_class_ids[j]: 259 | match_count += 1 260 | gt_match[j] = i 261 | pred_match[i] = j 262 | break 263 | 264 | return gt_match, pred_match, overlaps 265 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: coperception 2 | 3 | dependencies: 4 | - python==3.7 5 | - pip 6 | - pip: 7 | - numpy 8 | - torch 9 | - opencv-python 10 | - torchvision 11 | - typing 12 | - nuscenes-devkit==1.0.9 13 | - pyquaternion 14 | - numba 15 | - matplotlib 16 | - mmcv 17 | - terminaltables 18 | - shapely -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | torch 3 | opencv-python 4 | torchvision 5 | typing 6 | nuscenes-devkit==1.0.9 7 | pyquaternion 8 | numba 9 | matplotlib 10 | mmcv 11 | terminaltables 12 | shapely -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from distutils.core import setup 3 | import setuptools 4 | 5 | # The directory containing this file 6 | HERE = pathlib.Path(__file__).parent 7 | 8 | # The text of the README file 9 | README = (HERE / "README.md").read_text() 10 | 11 | setup( 12 | name="coperception", 13 | version="0.0.10", 14 | package_data={ 15 | "": ["*.so"], 16 | }, 17 | packages=setuptools.find_packages(), 18 | license="apache-2.0", # Chose a license from here: https://help.github.com/articles/licensing-a-repository 19 | description="A library for collaborative perception.", 20 | author="AI4CE Lab @NYU", 21 | author_email="dm4524@nyu.edu", 22 | url="https://ai4ce.github.io/", 23 | download_url="https://github.com/coperception/coperception/archive/refs/tags/v0.0.1-alpha.tar.gz", 24 | keywords=[ 25 | "computer-vision", 26 | "deep-learning", 27 | "autonomous-driving", 28 | "collaborative-learning", 29 | "knowledge-distillation", 30 | "communication-networks", 31 | "multi-agent-learning", 32 | "multi-agent-system", 33 | "3d-object-detection", 34 | "graph-learning", 35 | "point-cloud-processing", 36 | "v2x-communication", 37 | "multi-agent-perception", 38 | "3d-scene-understanding", 39 | ], # Keywords that define your package best 40 | install_requires=[ 41 | "numpy", 42 | "torch", 43 | "opencv-python", 44 | "torchvision", 45 | "typing", 46 | "nuscenes-devkit", 47 | "pyquaternion", 48 | "numba", 49 | "matplotlib", 50 | "mmcv", 51 | "terminaltables", 52 | "shapely", 53 | "seaborn", 54 | ], 55 | classifiers=[ 56 | "Development Status :: 3 - Alpha", # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package 57 | "Intended Audience :: Developers", # Define that your audience are developers 58 | "Topic :: Software Development :: Libraries", 59 | "License :: OSI Approved :: Apache Software License", 60 | "Programming Language :: Python :: 3.7", # Specify which pyhton versions that you want to support 61 | ], 62 | ) 63 | -------------------------------------------------------------------------------- /tools/det/Makefile: -------------------------------------------------------------------------------- 1 | # Path to the original V2X-Sim dataset 2 | original_data_path := /data/v2x-sim 3 | # Where to save the created data 4 | #V2X-Sim-det V2X-Sim-demo 5 | create_data_save_path := /data/v2x-sim-nyu/V2X-Sim-det 6 | # Index of the begining scene 7 | scene_begin := 0 8 | # Index of the ending scene + 1 9 | scene_end := 100 10 | # Index of the start agent 11 | from_agent := 0 12 | # Index of the end agent + 1 13 | to_agent := 6 14 | 15 | training_script := train_codet.py 16 | # Path to the created training data 17 | training_data := $(create_data_save_path)/train 18 | 19 | testing_script := test_codet.py 20 | # Path to the test/val data 21 | testing_data := $(create_data_save_path)/test 22 | val_data := $(create_data_save_path)/val 23 | # [lowerbound / upperbound / v2v / disco / when2com / max / mean / sum / agent] 24 | com := disco 25 | batch_size := 1 26 | # [train / test / val] 27 | split := train 28 | # Where to store the logs 29 | logpath := check/check_loss_base 30 | testlogpath := check/test_loss_corner_pair_ind 31 | # Train for how many epochs 32 | nepoch := 25 33 | # loss type: corner_loss faf_loss kl_loss_center kl_loss_center_add, kl_loss_corner, kl_loss_center_ind, kl_loss_center_offset_ind, kl_loss_corner_pair_ind 34 | loss_type = kl_loss_corner_pair_ind 35 | # If given, the model will resume from its most recent (by modification time) check point 36 | auto_resume_path := $(logpath) 37 | # experiment name 38 | exp_name = test_mbb_base 39 | # compress_level 40 | compress_level := 0 41 | # 1: apply late fusion. 0: no late fusion 42 | apply_late_fusion := 0 43 | # 1: do visualizaton. 0: no visualization 44 | visualization := 0 45 | # pose_noise in meters 46 | pose_noise := 0 47 | # only apply v2i communication 48 | only_v2i := 0 49 | # 0: no RSU, 1: RSU 50 | rsu := 0 51 | # Whether to use pose info for When2com 52 | warp_flag := 0 53 | # Used when testing when2com / who2com 54 | # when2com: activated, who2com: argmax_test 55 | inference := activated 56 | #block len for MBB method 57 | block_len := 16 58 | 59 | create_data: 60 | python create_data_det.py \ 61 | --root $(original_data_path) \ 62 | --split $(split) \ 63 | --scene_begin $(scene_begin) \ 64 | --scene_end $(scene_end) \ 65 | --savepath $(create_data_save_path) \ 66 | --from_agent $(from_agent) \ 67 | --to_agent $(to_agent) 68 | 69 | train: 70 | python $(training_script) \ 71 | --data $(training_data) \ 72 | --com $(com) \ 73 | --log \ 74 | --batch_size $(batch_size) \ 75 | --auto_resume_path $(auto_resume_path) \ 76 | --nepoch $(nepoch) \ 77 | --logpath $(logpath) \ 78 | --warp_flag $(warp_flag) \ 79 | --rsu $(rsu) \ 80 | --compress_level $(compress_level) \ 81 | --pose_noise $(pose_noise) \ 82 | --only_v2i $(only_v2i) \ 83 | --loss_type $(loss_type) \ 84 | --exp_name $(exp_name) 85 | 86 | 87 | train_disco: 88 | python $(training_script) \ 89 | --data $(training_data) \ 90 | --com disco \ 91 | --log --batch $(batch_size) \ 92 | --kd_flag 1 \ 93 | --resume_teacher $(logpath)/upperbound/with_rsu/epoch_$(nepoch).pth \ 94 | --auto_resume_path $(auto_resume_path) \ 95 | --logpath $(logpath) \ 96 | --nepoch $(nepoch) \ 97 | --compress_level $(compress_level) \ 98 | --pose_noise $(pose_noise) \ 99 | --only_v2i $(only_v2i) \ 100 | -- rsu 1 \ 101 | --loss_type $(loss_type) 102 | 103 | train_disco_no_rsu: 104 | python $(training_script) \ 105 | --data $(training_data) \ 106 | --com disco \ 107 | --log --batch $(batch_size) \ 108 | --kd_flag 1 \ 109 | --resume_teacher check/check_loss_base/upperbound/no_rsu/epoch_100.pth \ 110 | --auto_resume_path $(auto_resume_path) \ 111 | --logpath $(logpath) \ 112 | --nepoch $(nepoch) \ 113 | --compress_level $(compress_level) \ 114 | --pose_noise $(pose_noise) \ 115 | --only_v2i $(only_v2i) \ 116 | --rsu 0 \ 117 | --loss_type $(loss_type) \ 118 | --exp_name $(exp_name) 119 | 120 | mbb_train: 121 | python train_mbb.py \ 122 | --data $(training_data) \ 123 | --com $(com) \ 124 | --log \ 125 | --batch_size $(batch_size) \ 126 | --auto_resume_path $(auto_resume_path) \ 127 | --nepoch $(nepoch) \ 128 | --logpath $(logpath) \ 129 | --warp_flag $(warp_flag) \ 130 | --rsu $(rsu) \ 131 | --compress_level $(compress_level) \ 132 | --pose_noise $(pose_noise) \ 133 | --only_v2i $(only_v2i) \ 134 | --block_len $(block_len) \ 135 | --init_resume_path check/check_loss_base/$(com)/no_rsu/epoch_80.pth \ 136 | --test_store $(logpath)/$(com)/no_rsu \ 137 | --test_data $(create_data_save_path) \ 138 | --loss_type $(loss_type) \ 139 | --exp_name $(exp_name) \ 140 | --test 141 | 142 | mbb_train_disco_no_rsu: 143 | python train_mbb.py \ 144 | --data $(training_data) \ 145 | --com disco \ 146 | --log --batch $(batch_size) \ 147 | --kd_flag 1 \ 148 | --resume_teacher check/check_loss_base/upperbound/no_rsu/epoch_100.pth \ 149 | --auto_resume_path $(auto_resume_path) \ 150 | --logpath $(logpath) \ 151 | --nepoch $(nepoch) \ 152 | --compress_level $(compress_level) \ 153 | --pose_noise $(pose_noise) \ 154 | --only_v2i $(only_v2i) \ 155 | --rsu 0 \ 156 | --block_len $(block_len) \ 157 | --init_resume_path check/check_loss_base/disco/no_rsu/epoch_80.pth \ 158 | --test_store $(logpath)/disco/no_rsu \ 159 | --test_data $(create_data_save_path) \ 160 | --loss_type $(loss_type) \ 161 | --exp_name $(exp_name) \ 162 | --test 163 | 164 | test: 165 | python $(testing_script) \ 166 | --data $(testing_data) \ 167 | --com $(com) \ 168 | --resume $(logpath)/$(com)/with_rsu/epoch_$(nepoch).pth \ 169 | --tracking \ 170 | --logpath $(logpath) \ 171 | --apply_late_fusion $(apply_late_fusion) \ 172 | --visualization $(visualization) \ 173 | --inference $(inference) \ 174 | --warp_flag $(warp_flag) \ 175 | --rsu 1 \ 176 | --covar_path $(logpath)/${com}/no_rsu/mbb_covar.npy \ 177 | --loss_type $(loss_type) 178 | 179 | test_no_rsu: 180 | python $(testing_script) \ 181 | --data $(testing_data) \ 182 | --com $(com) \ 183 | --resume $(logpath)/$(com)/no_rsu/epoch_$(nepoch).pth \ 184 | --logpath $(logpath) \ 185 | --apply_late_fusion $(apply_late_fusion) \ 186 | --visualization $(visualization) \ 187 | --inference $(inference) \ 188 | --warp_flag $(warp_flag) \ 189 | --rsu 0 \ 190 | --covar_path $(logpath)/${com}/no_rsu/mbb_covar.npy \ 191 | --loss_type $(loss_type) 192 | 193 | mbb_test_no_rsu: 194 | python test_mbb.py \ 195 | --data $(val_data) \ 196 | --com $(com) \ 197 | --resume $(logpath)/$(com)/no_rsu \ 198 | --logpath $(logpath)/${com}/no_rsu \ 199 | --apply_late_fusion $(apply_late_fusion) \ 200 | --visualization $(visualization) \ 201 | --inference $(inference) \ 202 | --warp_flag $(warp_flag) \ 203 | --rsu 0 \ 204 | --test_store $(logpath)/$(com)/no_rsu \ 205 | --init_resume_path check/check_loss_base/$(com)/no_rsu/epoch_80.pth \ 206 | --nepoch ${nepoch} \ 207 | --loss_type $(loss_type) 208 | 209 | compute_mbb_covar: 210 | python compute_mbb_covar.py \ 211 | --mbb_path $(logpath)/${com}/no_rsu \ 212 | --min_epoch 0 \ 213 | --max_epoch $(nepoch) -------------------------------------------------------------------------------- /tools/det/README.md: -------------------------------------------------------------------------------- 1 | # Detection benchmark on V2XSIM 2 | 3 | We implement lowerbound, upperbound, when2com, who2com, V2VNet as our benchmark detectors. Please see more details in our paper. 4 | 5 | ## Preparation 6 | 7 | - Download V2XSIM datasets from our [website](https://ai4ce.github.io/V2X-Sim/index.html) 8 | - Run the code below to generate preprocessed data 9 | ```bash 10 | make create_data 11 | ``` 12 | - You might want to consult `./Makefile` for all the arguments you can pass in 13 | 14 | 15 | ## Training 16 | 17 | Train benchmark detectors: 18 | - Lowerbound / Upperbound / V2VNet / When2Com 19 | ```bash 20 | make train com=[lowerbound/upperbound/v2v/when2com] rsu=[0/1] 21 | ``` 22 | 23 | - DiscoNet 24 | ```bash 25 | # DiscoNet 26 | make train_disco 27 | 28 | # DiscoNet with no cross road (RSU) data 29 | make train_disco_no_rsu 30 | ``` 31 | 32 | - When2com_warp 33 | ```bash 34 | # When2com_warp 35 | make train com=when2com warp_flag=1 rsu=[0/1] 36 | ``` 37 | 38 | - Note: Who2com is trained the same way as When2com. They only differ in inference. 39 | 40 | ## Evaluation 41 | 42 | Evaluate benchmark detectors: 43 | 44 | - Lowerbound 45 | ```bash 46 | # with RSU 47 | make test com=[lowerbound/upperbound/v2v/when2com/who2com] 48 | 49 | # no RSU 50 | make test_no_rsu com=[lowerbound/upperbound/v2v/when2com/who2com] 51 | ``` 52 | 53 | - When2com 54 | ```bash 55 | # with RSU 56 | make test com=when2com inference=activated warp_flag=[0/1] 57 | 58 | # no RSU 59 | make test_no_rsu com=when2com inference=activated warp_flag=[0/1] 60 | ``` 61 | 62 | - Who2com 63 | ```bash 64 | # with RSU 65 | make test com=who2com inference=argmax_test warp_flag=[0/1] 66 | 67 | # no RSU 68 | make test_no_rsu com=who2com inference=argmax_test warp_flag=[0/1] 69 | ``` 70 | 71 | 72 | ## Results 73 | | **Method** | **AP@0.5 w/o RSU** | AP@0.5 w/ RSU | **Δ** | AP@0.7 w/o RSU | **AP@0.7 w/ RSU** | Δ | 74 | | :-----------: | :----------------: | :-----------: | :---: | :------------: | :---------------: | :---: | 75 | | Lower-bound | 49.90 | 46.96 | -2.94 | 44.21 | 42.33 | -1.88 | 76 | | Co-lower-bound | 43.99 | 42.98 | -1.01 | 39.10 | 38.26 | -0.84 | 77 | | When2com | 44.02 | 46.39 | +2.37 | 39.89 | 40.32 | +0.43 | 78 | | When2com* | 45.35 | 48.28 | +2.93 | 40.45 | 41.43 | +0.68 | 79 | | Who2com | 44.02 | 46.39 | +2.37 | 39.89 | 40.32 | +0.43 | 80 | | Who2com* | 45.35 | 48.28 | +2.93 | 40.45 | 41.13 | +0.68 | 81 | | V2VNet | 68.35 | 72.08 | +3.73 | 62.83 | 65.85 | +3.02 | 82 | | DiscoNet | 69.03 | 72.87 | +3.84 | 63.44 | 66.40 | +2.96 | 83 | | Upper-bound | 70.43 | 77.08 | +6.65 | 67.04 | 72.57 | +5.53 | 84 | 85 | -------------------------------------------------------------------------------- /tools/det/compute_mbb_covar.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from copy import deepcopy 4 | 5 | from coperception.utils.CoDetModule import * 6 | from coperception.utils.loss import * 7 | from coperception.utils.mean_ap import eval_map, eval_nll, get_residual_error_and_cov 8 | 9 | def main(args): 10 | start_epoch = args.min_epoch 11 | end_epoch = args.max_epoch 12 | res_diff = [] 13 | all_predicted_covariance = [] 14 | covar_flag = False 15 | iou_thr = 0.5 16 | for epoch in range(start_epoch, end_epoch+1): 17 | data_path = args.mbb_path + "/{}".format(epoch) +"/all_data.npy" 18 | print("Load data from {}".format(data_path)) 19 | data = np.load(data_path, allow_pickle=True) 20 | det_results_all_local = data.item()['det_results_frame'] 21 | annotations_all_local = data.item()['annotations_frame'] 22 | res_diff_one_epoch, predicted_covar = get_residual_error_and_cov(det_results_all_local, annotations_all_local, scale_ranges=None, iou_thr=iou_thr) 23 | res_diff.extend(res_diff_one_epoch) 24 | if predicted_covar != None: 25 | all_predicted_covariance.extend(predicted_covar) 26 | covar_flag = True 27 | print("Number of corners of all bounding box: {}".format(len(res_diff[epoch]))) 28 | res_diff_np = np.array(res_diff[0]) 29 | if covar_flag: 30 | all_predicted_covariance_np = np.array(all_predicted_covariance[0]) 31 | for i in range(1, len(res_diff)): 32 | res_diff_np = np.concatenate((res_diff_np, res_diff[i])) 33 | if covar_flag: 34 | all_predicted_covariance_np = np.concatenate((all_predicted_covariance_np, all_predicted_covariance[i])) 35 | print(res_diff_np.shape) 36 | print("covariance matrix for residual error:") 37 | covar_e = np.cov(res_diff_np.T) 38 | print(covar_e) 39 | save_data = {"covar_e":covar_e} 40 | if covar_flag: 41 | print(all_predicted_covariance_np.shape) 42 | print("mean of predicted covariance matrix:") 43 | covar_a = np.mean(all_predicted_covariance_np, axis=0) 44 | print(covar_a) 45 | save_data['covar_a'] = covar_a 46 | save_data_path = args.mbb_path + "/mbb_covar.npy" 47 | np.save(save_data_path, save_data) 48 | print("Save computed covariance in {}".format(save_data_path)) 49 | 50 | if __name__ == "__main__": 51 | parser = argparse.ArgumentParser() 52 | parser.add_argument("--min_epoch", default=0, type=int, help="min epochs we consider") 53 | parser.add_argument("--max_epoch", default=25, type=int, help="max epochs we consider") 54 | parser.add_argument("--nworker", default=1, type=int, help="Number of workers") 55 | parser.add_argument( 56 | "--mbb_path", 57 | default="", 58 | type=str, 59 | help="The path to the serval mbb models", 60 | ) 61 | 62 | torch.multiprocessing.set_sharing_strategy("file_system") 63 | args = parser.parse_args() 64 | print(args) 65 | main(args) -------------------------------------------------------------------------------- /tools/det/test_mbb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from copy import deepcopy 4 | 5 | import seaborn as sns 6 | import torch.optim as optim 7 | from torch.utils.data import DataLoader 8 | 9 | from coperception.datasets import V2XSimDet 10 | from coperception.configs import Config, ConfigGlobal 11 | from coperception.utils.CoDetModule import * 12 | from coperception.utils.loss import * 13 | from coperception.utils.mean_ap import eval_map 14 | from coperception.models.det import * 15 | from coperception.utils.detection_util import late_fusion 16 | from coperception.utils.data_util import apply_pose_noise 17 | from coperception.utils.mbb_util import test_model 18 | import socket 19 | 20 | 21 | def check_folder(folder_path): 22 | if not os.path.exists(folder_path): 23 | os.mkdir(folder_path) 24 | return folder_path 25 | 26 | @torch.no_grad() 27 | def main(args): 28 | config = Config("train", binary=True, only_det=True, loss_type = args.loss_type) 29 | config_global = ConfigGlobal("train", binary=True, only_det=True, loss_type = args.loss_type) 30 | 31 | need_log = args.log 32 | num_workers = args.nworker 33 | apply_late_fusion = args.apply_late_fusion 34 | pose_noise = args.pose_noise 35 | compress_level = args.compress_level 36 | only_v2i = args.only_v2i 37 | 38 | # Specify gpu device 39 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 40 | device_num = torch.cuda.device_count() 41 | print("device number", device_num) 42 | 43 | config.inference = args.inference 44 | if args.com == "upperbound": 45 | flag = "upperbound" 46 | elif args.com == "when2com": 47 | flag = "when2com" 48 | if args.inference == "argmax_test": 49 | flag = "who2com" 50 | if args.warp_flag: 51 | flag = flag + "_warp" 52 | elif args.com in {"v2v", "disco", "sum", "mean", "max", "cat", "agent"}: 53 | flag = args.com 54 | elif args.com == "lowerbound": 55 | flag = "lowerbound" 56 | if args.box_com: 57 | flag += "_box_com" 58 | else: 59 | raise ValueError(f"com: {args.com} is not supported") 60 | 61 | print("flag", flag) 62 | config.flag = flag 63 | config.split = "test" 64 | 65 | num_agent = args.num_agent 66 | # agent0 is the RSU 67 | agent_idx_range = range(num_agent) if args.rsu else range(1, num_agent) 68 | validation_dataset = V2XSimDet( 69 | dataset_roots=[f"{args.data}/agent{i}" for i in agent_idx_range], 70 | config=config, 71 | config_global=config_global, 72 | split="val", 73 | val=True, 74 | bound="upperbound" if args.com == "upperbound" else "lowerbound", 75 | kd_flag=args.kd_flag, 76 | rsu=args.rsu, 77 | ) 78 | validation_data_loader = DataLoader( 79 | validation_dataset, batch_size=1, shuffle=False, num_workers=num_workers 80 | ) 81 | print("Validation dataset size:", len(validation_dataset)) 82 | 83 | if not args.rsu: 84 | num_agent -= 1 85 | 86 | if flag == "upperbound" or flag.startswith("lowerbound"): 87 | model = FaFNet( 88 | config, layer=args.layer, kd_flag=args.kd_flag, num_agent=num_agent 89 | ) 90 | elif flag.startswith("when2com") or flag.startswith("who2com"): 91 | # model = PixelwiseWeightedFusionSoftmax(config, layer=args.layer) 92 | model = When2com( 93 | config, 94 | layer=args.layer, 95 | warp_flag=args.warp_flag, 96 | num_agent=num_agent, 97 | compress_level=compress_level, 98 | only_v2i=only_v2i, 99 | ) 100 | elif args.com == "disco": 101 | model = DiscoNet( 102 | config, 103 | layer=args.layer, 104 | kd_flag=args.kd_flag, 105 | num_agent=num_agent, 106 | compress_level=compress_level, 107 | only_v2i=only_v2i, 108 | ) 109 | elif args.com == "sum": 110 | model = SumFusion( 111 | config, 112 | layer=args.layer, 113 | kd_flag=args.kd_flag, 114 | num_agent=num_agent, 115 | compress_level=compress_level, 116 | only_v2i=only_v2i, 117 | ) 118 | elif args.com == "mean": 119 | model = MeanFusion( 120 | config, 121 | layer=args.layer, 122 | kd_flag=args.kd_flag, 123 | num_agent=num_agent, 124 | compress_level=compress_level, 125 | only_v2i=only_v2i, 126 | ) 127 | elif args.com == "max": 128 | model = MaxFusion( 129 | config, 130 | layer=args.layer, 131 | kd_flag=args.kd_flag, 132 | num_agent=num_agent, 133 | compress_level=compress_level, 134 | only_v2i=only_v2i, 135 | ) 136 | elif args.com == "cat": 137 | model = CatFusion( 138 | config, 139 | layer=args.layer, 140 | kd_flag=args.kd_flag, 141 | num_agent=num_agent, 142 | compress_level=compress_level, 143 | only_v2i=only_v2i, 144 | ) 145 | elif args.com == "agent": 146 | model = AgentWiseWeightedFusion( 147 | config, 148 | layer=args.layer, 149 | kd_flag=args.kd_flag, 150 | num_agent=num_agent, 151 | compress_level=compress_level, 152 | only_v2i=only_v2i, 153 | ) 154 | elif args.com == "v2v": 155 | model = V2VNet( 156 | config, 157 | gnn_iter_times=args.gnn_iter_times, 158 | layer=args.layer, 159 | layer_channel=256, 160 | num_agent=num_agent, 161 | compress_level=compress_level, 162 | only_v2i=only_v2i, 163 | ) 164 | 165 | model = nn.DataParallel(model) 166 | model = model.to(device) 167 | optimizer = optim.Adam(model.parameters(), lr=0.001) 168 | criterion = { 169 | "cls": SoftmaxFocalClassificationLoss(), 170 | "loc": WeightedSmoothL1LocalizationLoss(), 171 | } 172 | 173 | fafmodule = FaFModule(model, model, config, optimizer, criterion, args.kd_flag) 174 | 175 | model_save_path = args.resume[: args.resume.rfind("/")] 176 | 177 | if args.inference == "argmax_test": 178 | model_save_path = model_save_path.replace("when2com", "who2com") 179 | 180 | os.makedirs(model_save_path, exist_ok=True) 181 | log_file_name = os.path.join(model_save_path, "log.txt") 182 | saver = open(log_file_name, "a") 183 | saver.write("GPU number: {}\n".format(torch.cuda.device_count())) 184 | saver.flush() 185 | 186 | # Logging the details for this experiment 187 | saver.write("command line: {}\n".format(" ".join(sys.argv[1:]))) 188 | saver.write(args.__repr__() + "\n\n") 189 | saver.flush() 190 | 191 | for epoch in range(args.nepoch+1): 192 | if epoch == 0: 193 | checkpoint_path = args.init_resume_path 194 | else: 195 | checkpoint_path = os.path.join(args.resume, f"epoch_{epoch}.pth") 196 | checkpoint = torch.load(checkpoint_path, map_location="cpu") 197 | fafmodule.model.load_state_dict(checkpoint["model_state_dict"]) 198 | fafmodule.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) 199 | fafmodule.scheduler.load_state_dict(checkpoint["scheduler_state_dict"]) 200 | print("Load model from {}, at epoch {}".format(args.resume, epoch)) 201 | test_model(fafmodule, validation_data_loader, flag, device, config, epoch, args) 202 | 203 | 204 | if __name__ == "__main__": 205 | parser = argparse.ArgumentParser() 206 | parser.add_argument( 207 | "-d", 208 | "--data", 209 | default=None, 210 | type=str, 211 | help="The path to the preprocessed sparse BEV training data", 212 | ) 213 | parser.add_argument("--nepoch", default=100, type=int, help="Number of epochs") 214 | parser.add_argument("--nworker", default=1, type=int, help="Number of workers") 215 | parser.add_argument("--lr", default=0.001, type=float, help="Initial learning rate") 216 | parser.add_argument("--log", action="store_true", help="Whether to log") 217 | parser.add_argument("--logpath", default="", help="The path to the output log file") 218 | parser.add_argument( 219 | "--resume", 220 | default="", 221 | type=str, 222 | help="The path to the saved model that is loaded to resume training", 223 | ) 224 | parser.add_argument( 225 | "--resume_teacher", 226 | default="", 227 | type=str, 228 | help="The path to the saved teacher model that is loaded to resume training", 229 | ) 230 | parser.add_argument( 231 | "--layer", 232 | default=3, 233 | type=int, 234 | help="Communicate which layer in the single layer com mode", 235 | ) 236 | parser.add_argument( 237 | "--warp_flag", default=0, type=int, help="Whether to use pose info for When2com" 238 | ) 239 | parser.add_argument( 240 | "--kd_flag", 241 | default=0, 242 | type=int, 243 | help="Whether to enable distillation (only DiscNet is 1 )", 244 | ) 245 | parser.add_argument("--kd_weight", default=100000, type=int, help="KD loss weight") 246 | parser.add_argument( 247 | "--gnn_iter_times", 248 | default=3, 249 | type=int, 250 | help="Number of message passing for V2VNet", 251 | ) 252 | parser.add_argument( 253 | "--visualization", type=int, default=0, help="Visualize validation result" 254 | ) 255 | parser.add_argument( 256 | "--com", 257 | default="", 258 | type=str, 259 | help="lowerbound/upperbound/disco/when2com/v2v/sum/mean/max/cat/agent", 260 | ) 261 | parser.add_argument("--inference", type=str) 262 | parser.add_argument("--tracking", action="store_true") 263 | parser.add_argument("--box_com", action="store_true") 264 | parser.add_argument("--rsu", default=0, type=int, help="0: no RSU, 1: RSU") 265 | # scene_batch => batch size in each scene 266 | parser.add_argument( 267 | "--num_agent", default=6, type=int, help="The total number of agents" 268 | ) 269 | parser.add_argument( 270 | "--apply_late_fusion", 271 | default=0, 272 | type=int, 273 | help="1: apply late fusion. 0: no late fusion", 274 | ) 275 | parser.add_argument( 276 | "--compress_level", 277 | default=0, 278 | type=int, 279 | help="Compress the communication layer channels by 2**x times in encoder", 280 | ) 281 | parser.add_argument( 282 | "--pose_noise", 283 | default=0, 284 | type=float, 285 | help="draw noise from normal distribution with given mean (in meters), apply to transformation matrix.", 286 | ) 287 | parser.add_argument( 288 | "--only_v2i", 289 | default=0, 290 | type=int, 291 | help="1: only v2i, 0: v2v and v2i", 292 | ) 293 | parser.add_argument( 294 | "--test_store", 295 | default="", 296 | type=str, 297 | help="The path to store the output of testing", 298 | ) 299 | parser.add_argument( 300 | "--init_resume_path", 301 | default="", 302 | type=str, 303 | help="The path to reload the initial pth", 304 | ) 305 | parser.add_argument( 306 | "--loss_type", 307 | default="corner_loss", 308 | type=str, 309 | help="corner_loss faf_loss kl_loss_center kl_loss_center_add, kl_loss_corner, kl_loss_center_ind, kl_loss_center_offset_ind, kl_loss_corner_pair_ind", 310 | ) 311 | torch.multiprocessing.set_sharing_strategy("file_system") 312 | args = parser.parse_args() 313 | print(args) 314 | main(args) 315 | -------------------------------------------------------------------------------- /tools/utils/move_scenes.py: -------------------------------------------------------------------------------- 1 | # !!! WARNING !!! 2 | # Some of the "0.npy" files inside the folder of each scene might be moved out of the folder, and the folder will be disappeared. 3 | # E.g. We encountered this on scene 45_30 4 | # Please check manually for whether some scenes have this problem. 5 | import os 6 | import shutil 7 | 8 | scene_file = 'test_scenes.txt' 9 | train_scene_file = open(scene_file, 'r') 10 | 11 | train_idxs = set() 12 | for line in train_scene_file: 13 | line = line.strip() 14 | train_idxs.add(int(line)) 15 | 16 | from_loc = '/scratch/dm4524/data/V2X-Sim-det/all' 17 | to_loc = '/scratch/dm4524/data/V2X-Sim-det/test' 18 | 19 | for agent_dir in os.listdir(from_loc): 20 | to_dir = os.path.join(to_loc, agent_dir) 21 | agent_dir = os.path.join(from_loc, agent_dir) 22 | for f in os.listdir(agent_dir): 23 | scene_file_path = os.path.join(agent_dir, f) 24 | scene_idx = int(f.split('_')[0]) 25 | if scene_idx in train_idxs: 26 | shutil.move(scene_file_path, to_dir) -------------------------------------------------------------------------------- /tools/utils/test_scenes.txt: -------------------------------------------------------------------------------- 1 | 5 2 | 8 3 | 19 4 | 27 5 | 28 6 | 29 7 | 91 8 | 92 9 | 96 10 | 97 -------------------------------------------------------------------------------- /tools/utils/train_scenes.txt: -------------------------------------------------------------------------------- 1 | 82 2 | 25 3 | 95 4 | 0 5 | 2 6 | 6 7 | 7 8 | 9 9 | 10 10 | 11 11 | 12 12 | 13 13 | 14 14 | 15 15 | 16 16 | 17 17 | 18 18 | 20 19 | 21 20 | 22 21 | 23 22 | 24 23 | 26 24 | 30 25 | 31 26 | 32 27 | 33 28 | 34 29 | 35 30 | 36 31 | 37 32 | 38 33 | 39 34 | 40 35 | 41 36 | 42 37 | 43 38 | 44 39 | 45 40 | 46 41 | 47 42 | 48 43 | 49 44 | 50 45 | 51 46 | 52 47 | 53 48 | 54 49 | 55 50 | 56 51 | 57 52 | 58 53 | 59 54 | 60 55 | 61 56 | 62 57 | 64 58 | 66 59 | 67 60 | 69 61 | 70 62 | 71 63 | 72 64 | 73 65 | 74 66 | 75 67 | 77 68 | 80 69 | 81 70 | 83 71 | 85 72 | 86 73 | 87 74 | 88 75 | 89 76 | 90 77 | 93 78 | 94 79 | 98 80 | 99 -------------------------------------------------------------------------------- /tools/utils/val_scenes.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 3 3 | 4 4 | 63 5 | 65 6 | 68 7 | 76 8 | 78 9 | 79 10 | 84 --------------------------------------------------------------------------------