├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── LICENSE
├── MANIFEST
├── README.md
├── coperception
    ├── README.txt
    ├── __init__.py
    ├── configs
    │   ├── Config.py
    │   ├── ConfigGlobal.py
    │   └── __init__.py
    ├── datasets
    │   ├── MbbSampler.py
    │   ├── NuscenesDataset.py
    │   ├── V2XSimDet.py
    │   ├── V2XSimSeg.py
    │   └── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── det
    │   │   ├── AgentWiseWeightedFusion.py
    │   │   ├── CatFusion.py
    │   │   ├── DiscoNet.py
    │   │   ├── FaFNet.py
    │   │   ├── MaxFusion.py
    │   │   ├── MeanFusion.py
    │   │   ├── SumFusion.py
    │   │   ├── TeacherNet.py
    │   │   ├── V2VNet.py
    │   │   ├── When2com.py
    │   │   ├── __init__.py
    │   │   ├── backbone
    │   │   │   ├── Backbone.py
    │   │   │   └── __init__.py
    │   │   └── base
    │   │   │   ├── DetModelBase.py
    │   │   │   ├── FusionBase.py
    │   │   │   ├── IntermediateModelBase.py
    │   │   │   ├── NonIntermediateModelBase.py
    │   │   │   └── __init__.py
    │   └── seg
    │   │   ├── AgentWiseWeightedFusion.py
    │   │   ├── CatFusion.py
    │   │   ├── DiscoNet.py
    │   │   ├── FusionBase.py
    │   │   ├── MaxFusion.py
    │   │   ├── MeanFusion.py
    │   │   ├── SegModelBase.py
    │   │   ├── SumFusion.py
    │   │   ├── UNet.py
    │   │   ├── V2VNet.py
    │   │   ├── When2Com_UNet.py
    │   │   └── __init__.py
    └── utils
    │   ├── AverageMeter.py
    │   ├── CoDetModule.py
    │   ├── SegMetrics.py
    │   ├── SegModule.py
    │   ├── __init__.py
    │   ├── convolutional_rnn
    │       ├── __init__.py
    │       ├── functional.py
    │       ├── module.py
    │       └── utils.py
    │   ├── data_util.py
    │   ├── detection_util.py
    │   ├── loss.py
    │   ├── mapping.cpython-37m-x86_64-linux-gnu.so
    │   ├── mapping.py
    │   ├── mbb_util.py
    │   ├── mean_ap.py
    │   ├── min_norm_solvers.py
    │   ├── nuscenes_pc_util.py
    │   ├── obj_util.py
    │   └── postprocess.py
├── environment.yml
├── requirements.txt
├── setup.py
└── tools
    ├── det
        ├── Makefile
        ├── README.md
        ├── compute_mbb_covar.py
        ├── create_data_det.py
        ├── test_codet.py
        ├── test_mbb.py
        ├── train_codet.py
        └── train_mbb.py
    └── utils
        ├── move_scenes.py
        ├── test_scenes.txt
        ├── train_scenes.txt
        └── val_scenes.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | dist/
 3 | site/
 4 | *.egg-info/
 5 | logs
 6 | .ipynb_checkpoints
 7 | check_*
 8 | check
 9 | err_logs
10 | v2x-sim*
11 | .DS_Store


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/psf/black
 3 |     rev: stable
 4 |     hooks:
 5 |     - id: black
 6 |       language_version: python3
 7 | -   repo: https://gitlab.com/pycqa/flake8
 8 |     rev: 3.7.9
 9 |     hooks:
10 |     - id: flake8


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 1
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-20.04
11 |   tools:
12 |     python: "3.9"
13 | 
14 | mkdocs:
15 |   configuration: mkdocs.yml
16 | 
17 | python:
18 |   install:
19 |     - requirements: docs/requirements.txt
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | setup.cfg
3 | setup.py
4 | coperception/__init__.py
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Uncertainty Quantification of Collaborative Detection for Self-Driving (ICRA 2023)
  2 | [Sanbao Su](https://sanbaosu.netlify.app/), [Yiming Li](https://roboticsyimingli.github.io), [Sihong He](https://scholar.google.com/citations?hl=en&user=jLLDCeoAAAAJ), [Songyang Han](https://songyanghan.com/), [Chen Feng](https://scholar.google.com/citations?user=YeG8ZM0AAAAJ&hl=en), [Caiwen Ding](https://scholar.google.com/citations?hl=en&user=7hR0r_EAAAAJ), [Fei Miao](http://feimiao.org/index.html)
  3 | 
  4 | Implementation of paper "Uncertainty Quantification of Collaborative Detection for Self-Driving" [paper](https://arxiv.org/abs/2209.08162), [website](https://coperception.github.io/double-m-quantification/)
  5 | 
  6 | ![main](https://github.com/coperception/double-m-quantification/blob/gh-pages/static/images/main.png)
  7 | 
  8 | ## Abstract:
  9 | 
 10 | Sharing information between connected and autonomous vehicles (CAVs) fundamentally improves the performance of collaborative object detection for self-driving. However, CAVs still have uncertainties on object detection due to practical challenges, which will affect the later modules in self-driving such as planning and control. Hence, uncertainty quantification is crucial for safety-critical systems such as CAVs. Our work is the first to estimate the uncertainty of collaborative object detection. We propose a novel uncertainty quantification method, called Double-M Quantification, which tailors a moving block bootstrap (MBB) algorithm with direct modeling of the multivariant Gaussian distribution of each corner of the bounding box. Our method captures both the epistemic uncertainty and aleatoric uncertainty with one inference based on the offline Double-M training process. And it can be used with different collaborative object detectors. Through experiments on the comprehensive CAVs collaborative perception dataset, we show that our Double-M method achieves up to 4.09 times improvement on uncertainty score and up to 3.13% accuracy improvement, compared with the state-of-the-art uncertainty quantification. The results also validate that sharing information between CAVs is beneficial for the system in both improving accuracy and reducing uncertainty.
 11 | 
 12 | ## Install:
 13 | 1. Clone this repository.
 14 | 2. `cd` into the cloned repository.
 15 | 3. Install `coperception` package with pip:
 16 |   ```bash
 17 |   pip install -e .
 18 |   ```
 19 | ## Getting started:
 20 | Please refer to our docs website for detailed documentations for models: https://coperception.readthedocs.io/en/latest/  
 21 | Installation:
 22 | - [Installation documentations](https://coperception.readthedocs.io/en/latest/getting_started/installation/)
 23 | 
 24 | Download dataset:
 25 | - [V2X-Sim](https://coperception.readthedocs.io/en/latest/datasets/v2x_sim/)
 26 | 
 27 | ## Training
 28 | 
 29 | ```bash
 30 |   cd ./tools/det/
 31 | ```
 32 | 
 33 | ### Pretrain stage:
 34 | 
 35 | Train benchmark detectors:
 36 | - Lowerbound / Upperbound
 37 | ```bash
 38 |     CUDA_VISIBLE_DEVICES=0 make train com=upperbound loss_type=corner_loss logpath=check/check_loss_base nepoch=60
 39 |     CUDA_VISIBLE_DEVICES=0 make train com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_base nepoch=80
 40 | ```
 41 | 
 42 | - DiscoNet
 43 | ```bash
 44 |     CUDA_VISIBLE_DEVICES=0 make train_disco_no_rsu loss_type=corner_loss logpath=check/check_loss_base nepoch=60
 45 |     CUDA_VISIBLE_DEVICES=0 make train_disco_no_rsu loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_base nepoch=80
 46 | ```
 47 | 
 48 | ### Train stage:
 49 | 
 50 | Train benchmark detectors:
 51 | - Lowerbound / Upperbound
 52 | ```bash
 53 |     CUDA_VISIBLE_DEVICES=0 make mbb_train com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25
 54 | ```
 55 | 
 56 | - DiscoNet
 57 | ```bash
 58 |     CUDA_VISIBLE_DEVICES=0 make mbb_train_disco_no_rsu loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25
 59 | ```
 60 | 
 61 | Compute the covariance for MBB
 62 | ```bash
 63 | CUDA_VISIBLE_DEVICES=0 make mbb_test_no_rsu com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25 
 64 | CUDA_VISIBLE_DEVICES=0 make compute_mbb_covar com=upperbound logpath=check/check_loss_corner_pair_ind
 65 | ```
 66 | 
 67 | ## Test:
 68 | 
 69 | ### Test stage:
 70 | 
 71 | 
 72 | Train benchmark detectors:
 73 | - Lowerbound / Upperbound/ DiscoNet
 74 | ```bash
 75 |     CUDA_VISIBLE_DEVICES=0 make test_no_rsu com=upperbound loss_type=kl_loss_corner_pair_ind logpath=check/check_loss_corner_pair_ind nepoch=25
 76 | ```
 77 | 
 78 | ## Related works:
 79 | - [coperception Github repo](https://github.com/coperception/coperception)
 80 | 
 81 | ## Related papers:
 82 | Double-M Qualification:
 83 | ```bibtex
 84 | @article{Su2022uncertainty,
 85 |       author    = {Su, Sanbao and Li, Yiming and He, Sihong and Han, Songyang and Feng, Chen and Ding, Caiwen and Miao, Fei},
 86 |       title     = {Uncertainty Quantification of Collaborative Detection for Self-Driving},
 87 |       year={2023},
 88 |       booktitle={IEEE International Conference on Robotics and Automation (ICRA)}
 89 | }
 90 | ```
 91 | 
 92 | V2X-Sim dataset:
 93 | ```bibtex
 94 | @article{Li_2021_RAL,
 95 |   title = {V2X-Sim: A Virtual Collaborative Perception Dataset and Benchmark for Autonomous Driving},
 96 |   author = {Li, Yiming and Ma, Dekun and An, Ziyan and Wang, Zixun and Zhong, Yiqi and Chen, Siheng and Feng, Chen},
 97 |   booktitle = {IEEE Robotics and Automation Letters},
 98 |   year = {2022}
 99 | }
100 | ```
101 | 


--------------------------------------------------------------------------------
/coperception/README.txt:
--------------------------------------------------------------------------------
1 | A library for multi-agent collaborative perception.  
2 | See https://github.com/coperception/coperception for details.


--------------------------------------------------------------------------------
/coperception/__init__.py:
--------------------------------------------------------------------------------
1 | from .configs import *
2 | from .utils import *
3 | from .models import *
4 | from .datasets import *
5 | 


--------------------------------------------------------------------------------
/coperception/configs/Config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | 
  4 | 
  5 | class Config(object):
  6 |     """The config class"""
  7 | 
  8 |     def __init__(
  9 |         self,
 10 |         split,
 11 |         binary=True,
 12 |         only_det=True,
 13 |         code_type="faf",
 14 |         loss_type="faf_loss",
 15 |         savepath="",
 16 |         root="",
 17 |         is_cross_road=False,
 18 |         use_vis=False,
 19 |     ):
 20 |         # for segmentaion task only
 21 |         # =========================
 22 |         self.num_class = 8
 23 |         self.in_channels = 13
 24 |         self.nepoch = 10
 25 | 
 26 |         self.class_to_rgb = {
 27 |             0: [255, 255, 255],  # Unlabeled
 28 |             1: [71, 141, 230],  # Vehicles
 29 |             2: [122, 217, 209],  # Sidewalk
 30 |             3: [145, 171, 100],  # Ground / Terrain
 31 |             4: [231, 136, 101],  # Road / Traffic light / Pole
 32 |             5: [142, 80, 204],  # Buildings
 33 |             6: [224, 8, 50],  # Pedestrian
 34 |             7: [106, 142, 34]  # Vegetation
 35 |             # 7: [102, 102, 156],  # Walls
 36 |             # 0: [55, 90, 80],  # Other
 37 |         }
 38 | 
 39 |         # Remap pixel values given by carla
 40 |         self.classes_remap = {
 41 |             0: 0,  # Unlabeled (so that we don't forget this class)
 42 |             10: 1,  # Vehicles
 43 |             8: 2,  # Sidewalk
 44 |             14: 3,  # Ground (non-drivable)
 45 |             22: 3,  # Terrain (non-drivable)
 46 |             7: 4,  # Road
 47 |             6: 4,  # Road line
 48 |             18: 4,  # Traffic light
 49 |             5: 4,  # Pole
 50 |             1: 5,  # Building
 51 |             4: 6,  # Pedestrian
 52 |             9: 7,  # Vegetation
 53 |         }
 54 | 
 55 |         self.class_idx_to_name = {
 56 |             0: "Unlabeled",
 57 |             1: "Vehicles",
 58 |             2: "Sidewalk",
 59 |             3: "Ground & Terrain",
 60 |             4: "Road",
 61 |             5: "Buildings",
 62 |             6: "Pedestrian",
 63 |             7: "Vegetation",
 64 |         }
 65 |         # =========================
 66 | 
 67 |         self.device = None
 68 |         self.split = split
 69 |         self.savepath = savepath
 70 |         self.binary = binary
 71 |         self.only_det = only_det
 72 |         self.code_type = code_type
 73 |         self.loss_type = loss_type  # corner_loss faf_loss kl_loss_center kl_loss_center_add, kl_loss_corner, kl_loss_center_ind, kl_loss_center_offset_ind, kl_loss_corner_pair_ind
 74 |         self.covar_length = 21 # number of variables in the covariance matrix, decomposition matrix for covariance matrix, for multivariate Gaussian of (x,y,w,h,sin,cos), it should be 21
 75 |         if self.loss_type == "kl_loss_corner":
 76 |             self.covar_length = 8
 77 |         elif self.loss_type == "kl_loss_center_ind" or self.loss_type == "kl_loss_center_offset_ind":
 78 |             self.covar_length = 6
 79 |         elif self.loss_type == "kl_loss_corner_pair_ind":
 80 |             self.covar_length = 12
 81 |         self.covar_matrix_size = 6 # one dimensional size of the covariance matrix
 82 |         self.loss_loc_weight = 0.1
 83 |         self.loc_det_weight = 0.1
 84 |         self.regMeanCovToge = False # always be False
 85 |         if self.loss_type == "kl_loss_corner_all":
 86 |             self.regMeanCovToge = True
 87 | 
 88 |         # The specifications for BEV maps
 89 |         self.voxel_size = (0.25, 0.25, 0.4)
 90 |         #self.voxel_size = (0.375, 0.375, 0.4)
 91 |         self.area_extents = (
 92 |             np.array([[-32.0, 32.0], [-32.0, 32.0], [-8.0, -3.0]])
 93 |             #np.array([[-48.0, 48.0], [-48.0, 48.0], [-8.0, -3.0]])
 94 |             if is_cross_road
 95 |             else np.array([[-32.0, 32.0], [-32.0, 32.0], [-3.0, 2.0]])
 96 |             #else np.array([[-48.0, 48.0], [-48.0, 48.0], [-3.0, 2.0]])
 97 |         )
 98 |         self.is_cross_road = is_cross_road
 99 |         self.past_frame_skip = 3  # when generating the BEV maps, how many history frames need to be skipped
100 |         self.future_frame_skip = (
101 |             0  # when generating the BEV maps, how many future frames need to be skipped
102 |         )
103 |         self.num_past_frames_for_bev_seq = (
104 |             1  # the number of past frames for BEV map sequence
105 |         )
106 |         self.num_past_pcs = 1  # duplicate self.num_past_frames_for_bev_seq
107 | 
108 |         self.map_dims = [
109 |             math.ceil(
110 |                 (self.area_extents[0][1] - self.area_extents[0][0]) / self.voxel_size[0]
111 |             ),
112 |             math.ceil(
113 |                 (self.area_extents[1][1] - self.area_extents[1][0]) / self.voxel_size[1]
114 |             ),
115 |             math.ceil(
116 |                 (self.area_extents[2][1] - self.area_extents[2][0]) / self.voxel_size[2]
117 |             ),
118 |         ]
119 |         self.only_det = True
120 |         self.root = root
121 |         # debug Data:
122 |         self.code_type = "faf"
123 |         self.pred_type = "motion"
124 |         # debug Loss
125 |         # self.loss_type = "corner_loss"
126 |         # debug MGDA
127 |         self.MGDA = False
128 |         # debug when2com
129 |         self.MIMO = True
130 |         # debug Motion Classification
131 |         self.motion_state = False
132 |         self.static_thre = 0.2  # speed lower bound
133 | 
134 |         # debug use_vis
135 |         self.use_vis = use_vis
136 |         self.use_map = False
137 | 
138 |         # The specifications for object detection encode
139 |         if self.code_type in ["corner_1", "corner_2"]:
140 |             self.box_code_size = 8  # (\delta{x1},\delta{y1},\delta{x2},\delta{y2},\delta{x3},\delta{y3},\delta{x4},\delta{y4})
141 |         elif self.code_type in ["corner_3"]:
142 |             self.box_code_size = 10
143 |         elif self.code_type[0] == "f":
144 |             self.box_code_size = 6  # (x,y,w,h,sin,cos)
145 |         else:
146 |             print(code_type, " code type is not implemented yet!")
147 |             exit()
148 | 
149 |         self.pred_len = (
150 |             1  # the number of frames for prediction, including the current frame
151 |         )
152 | 
153 |         # anchor size: (w,h,angle) (according to nuscenes w < h)
154 |         if not self.binary:
155 |             self.anchor_size = np.asarray(
156 |                 [
157 |                     [2.0, 4.0, 0],
158 |                     [2.0, 4.0, math.pi / 2.0],
159 |                     [1.0, 1.0, 0],
160 |                     [1.0, 2.0, 0.0],
161 |                     [1.0, 2.0, math.pi / 2.0],
162 |                     [3.0, 12.0, 0.0],
163 |                     [3.0, 12.0, math.pi / 2.0],
164 |                 ]
165 |             )
166 |         else:
167 |             self.anchor_size = np.asarray(
168 |                 [
169 |                     [2.0, 4.0, 0],
170 |                     [2.0, 4.0, math.pi / 2.0],
171 |                     [2.0, 4.0, -math.pi / 4.0],
172 |                     [3.0, 12.0, 0],
173 |                     [3.0, 12.0, math.pi / 2.0],
174 |                     [3.0, 12.0, -math.pi / 4.0],
175 |                 ]
176 |             )
177 | 
178 |         self.category_threshold = [0.4, 0.4, 0.25, 0.25, 0.4]
179 |         self.class_map = {
180 |             "vehicle.car": 1,
181 |             "vehicle.emergency.police": 1,
182 |             "vehicle.bicycle": 3,
183 |             "vehicle.motorcycle": 3,
184 |             "vehicle.bus.rigid": 2,
185 |         }
186 | 
187 |         if self.binary:
188 |             self.category_num = 2
189 |         else:
190 |             self.category_num = len(self.category_threshold)
191 |         self.print_feq = 100
192 |         if self.split == "train":
193 |             self.num_keyframe_skipped = (
194 |                 0  # The number of keyframes we will skip when dumping the data
195 |             )
196 |             self.nsweeps_back = 1  # Number of frames back to the history (including the current timestamp)
197 |             self.nsweeps_forward = 0  # Number of frames into the future (does not include the current timestamp)
198 |             self.skip_frame = (
199 |                 0  # The number of frames skipped for the adjacent sequence
200 |             )
201 |             self.num_adj_seqs = (
202 |                 1  # number of adjacent sequences, among which the time gap is \delta t
203 |             )
204 |         else:
205 |             self.num_keyframe_skipped = 0
206 |             self.nsweeps_back = 1  # Setting this to 30 (for training) or 25 (for testing) allows conducting ablation studies on frame numbers
207 |             self.nsweeps_forward = 0
208 |             self.skip_frame = 0
209 |             self.num_adj_seqs = 1
210 | 


--------------------------------------------------------------------------------
/coperception/configs/ConfigGlobal.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | 
  4 | 
  5 | class ConfigGlobal(object):
  6 |     def __init__(
  7 |         self,
  8 |         split,
  9 |         binary=True,
 10 |         only_det=True,
 11 |         code_type="faf",
 12 |         loss_type="faf_loss",
 13 |         savepath="",
 14 |         root="",
 15 |     ):
 16 | 
 17 |         self.device = None
 18 |         self.split = split
 19 |         self.savepath = savepath
 20 |         self.binary = binary
 21 |         self.only_det = only_det
 22 |         self.code_type = code_type
 23 |         self.loss_type = loss_type  # corner_loss faf_loss
 24 | 
 25 |         # The specifications for BEV maps
 26 |         self.voxel_size = (0.25, 0.25, 0.4)
 27 |         self.area_extents = np.array([[-96.0, 96.0], [-96.0, 96.0], [-3.0, 2.0]])
 28 |         self.past_frame_skip = 0  # when generating the BEV maps, how many history frames need to be skipped
 29 |         self.future_frame_skip = (
 30 |             0  # when generating the BEV maps, how many future frames need to be skipped
 31 |         )
 32 |         self.num_past_frames_for_bev_seq = (
 33 |             1  # the number of past frames for BEV map sequence
 34 |         )
 35 |         self.num_past_pcs = 4  # duplicate self.num_past_frames_for_bev_seq
 36 | 
 37 |         self.map_dims = [
 38 |             math.ceil(
 39 |                 (self.area_extents[0][1] - self.area_extents[0][0]) / self.voxel_size[0]
 40 |             ),
 41 |             math.ceil(
 42 |                 (self.area_extents[1][1] - self.area_extents[1][0]) / self.voxel_size[1]
 43 |             ),
 44 |             math.ceil(
 45 |                 (self.area_extents[2][1] - self.area_extents[2][0]) / self.voxel_size[2]
 46 |             ),
 47 |         ]
 48 |         self.only_det = True
 49 |         self.root = root
 50 | 
 51 |         # debug Data:
 52 |         self.code_type = "faf"
 53 |         self.pred_type = "motion"
 54 |         # debug Loss
 55 |         self.loss_type = "corner_loss"
 56 | 
 57 |         # debug MGDA
 58 |         self.MGDA = False
 59 |         # debug when2com
 60 |         self.MIMO = False
 61 |         # debug Motion Classification
 62 |         self.motion_state = False
 63 |         self.static_thre = 0.2  # speed lower bound
 64 | 
 65 |         # debug use_vis
 66 |         self.use_vis = True
 67 |         self.use_map = False
 68 | 
 69 |         # The specifications for object detection encode
 70 |         if self.code_type in ["corner_1", "corner_2"]:
 71 |             self.box_code_size = 8  # (\delta{x1},\delta{y1},\delta{x2},\delta{y2},\delta{x3},\delta{y3},\delta{x4},\delta{y4})
 72 |         elif self.code_type in ["corner_3"]:
 73 |             self.box_code_size = 10
 74 |         elif self.code_type[0] == "f":
 75 |             self.box_code_size = 6  # (x,y,w,h,sin,cos)
 76 |         else:
 77 |             print(code_type, " code type is not implemented yet!")
 78 |             exit()
 79 | 
 80 |         self.pred_len = (
 81 |             1  # the number of frames for prediction, including the current frame
 82 |         )
 83 | 
 84 |         # anchor size: (w,h,angle) (according to nuscenes w < h)
 85 |         if not self.binary:
 86 |             self.anchor_size = np.asarray(
 87 |                 [
 88 |                     [2.0, 4.0, 0],
 89 |                     [2.0, 4.0, math.pi / 2.0],
 90 |                     [1.0, 1.0, 0],
 91 |                     [1.0, 2.0, 0.0],
 92 |                     [1.0, 2.0, math.pi / 2.0],
 93 |                     [3.0, 12.0, 0.0],
 94 |                     [3.0, 12.0, math.pi / 2.0],
 95 |                 ]
 96 |             )
 97 |         else:
 98 |             self.anchor_size = np.asarray(
 99 |                 [
100 |                     [2.0, 4.0, 0],
101 |                     [2.0, 4.0, math.pi / 2.0],
102 |                     [2.0, 4.0, -math.pi / 4.0],
103 |                     [3.0, 12.0, 0],
104 |                     [3.0, 12.0, math.pi / 2.0],
105 |                     [3.0, 12.0, -math.pi / 4.0],
106 |                 ]
107 |             )
108 | 
109 |         self.category_threshold = [0.4, 0.4, 0.25, 0.25, 0.4]
110 |         self.class_map = {
111 |             "vehicle.audi.a2": 1,
112 |             "vehicle.audi.etron": 1,
113 |             "vehicle.audi.tt": 1,
114 |             "vehicle.bmw.grandtourer": 1,
115 |             "vehicle.bmw.isetta": 1,
116 |             "vehicle.chevrolet.impala": 1,
117 |             "vehicle.citroen.c3": 1,
118 |             "vehicle.dodge_charger.police": 1,
119 |             "vehicle.jeep.wrangler_rubicon": 1,
120 |             "vehicle.lincoln.mkz2017": 1,
121 |             "vehicle.mercedes-benz.coupe": 1,
122 |             "vehicle.mini.cooperst": 1,
123 |             "vehicle.mustang.mustang": 1,
124 |             "vehicle.nissan.micra": 1,
125 |             "vehicle.nissan.patrol": 1,
126 |             "vehicle.seat.leon": 1,
127 |             "vehicle.tesla.cybertruck": 1,
128 |             "vehicle.tesla.model3": 1,
129 |             "vehicle.toyota.prius": 1,
130 |             "vehicle.volkswagen.t2": 1,
131 |             "vehicle.carlamotors.carlacola": 1,
132 |             "human.pedestrian": 2,
133 |             "vehicle.bh.crossbike": 3,
134 |             "vehicle.diamondback.century": 3,
135 |             "vehicle.gazelle.omafiets": 3,
136 |             "vehicle.harley-davidson.low_rider": 3,
137 |             "vehicle.kawasaki.ninja": 3,
138 |             "vehicle.yamaha.yzf": 3,
139 |         }  # background: 0, other: 4
140 |         # self.class_map = {'vehicle.car': 1, 'vehicle.truck': 1, 'vehicle.bus': 1, 'human.pedestrian': 2, 'vehicle.bicycle': 3, 'vehicle.motorcycle': 3}  # background: 0, other: 4
141 |         if self.binary:
142 |             self.category_num = 2
143 |         else:
144 |             self.category_num = len(self.category_threshold)
145 |         self.print_feq = 100
146 |         if self.split == "train":
147 |             self.num_keyframe_skipped = (
148 |                 0  # The number of keyframes we will skip when dumping the data
149 |             )
150 |             self.nsweeps_back = 1  # Number of frames back to the history (including the current timestamp)
151 |             self.nsweeps_forward = 0  # Number of frames into the future (does not include the current timestamp)
152 |             self.skip_frame = (
153 |                 0  # The number of frames skipped for the adjacent sequence
154 |             )
155 |             self.num_adj_seqs = (
156 |                 1  # number of adjacent sequences, among which the time gap is \delta t
157 |             )
158 |         else:
159 |             self.num_keyframe_skipped = 0
160 |             self.nsweeps_back = 1  # Setting this to 30 (for training) or 25 (for testing) allows conducting ablation studies on frame numbers
161 |             self.nsweeps_forward = 0
162 |             self.skip_frame = 0
163 |             self.num_adj_seqs = 1
164 | 


--------------------------------------------------------------------------------
/coperception/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .Config import Config
2 | from .ConfigGlobal import ConfigGlobal
3 | 


--------------------------------------------------------------------------------
/coperception/datasets/MbbSampler.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import math
 3 | import numpy as np
 4 | import torch
 5 | from coperception.utils.obj_util import *
 6 | from coperception.datasets import V2XSimDet
 7 | from typing import Iterator, Optional, Sequence, List, TypeVar, Generic, Sized
 8 | from torch.utils.data import Sampler
 9 | 
10 | class MbbSampler(Sampler[int]):
11 |     r"""Samples elements sequentially, always in the same order.
12 | 
13 |     Args:
14 |         data_source (Dataset): dataset to sample from
15 |     """
16 |     data_source: Sized
17 |     
18 |     def __init__(self, data_source: Sized, block_len: int) -> None:
19 |         self.data_source = data_source
20 |         self.frame_len = data_source.num_sample_seqs
21 |         self.scene_len = data_source.scene_len
22 |         self.block_len = block_len
23 |         self.frame_pre_scene = self.frame_len // self.scene_len
24 |         self.iter_len = len(self.data_source) // self.block_len * self.block_len
25 | 
26 |     def __iter__(self) -> Iterator[int]:
27 |         self.iter_list = []
28 |         perm = torch.randperm(len(self.data_source))
29 |         require_range = self.frame_pre_scene - self.block_len
30 |         for idx in perm:
31 |             if idx % self.frame_pre_scene > require_range:
32 |                 continue
33 |             if len(self.iter_list) > self.iter_len:
34 |                 break
35 |             self.iter_list.extend(range(idx, idx + self.block_len))
36 |         return iter(self.iter_list)
37 | 
38 |     def __len__(self) -> int:
39 |         return self.iter_len


--------------------------------------------------------------------------------
/coperception/datasets/NuscenesDataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | from multiprocessing import Manager
  4 | 
  5 | import numpy as np
  6 | from torch.utils.data import Dataset
  7 | from coperception.utils.obj_util import init_anchors_no_check
  8 | 
  9 | 
 10 | class NuscenesDataset(Dataset):
 11 |     def __init__(
 12 |         self, dataset_root=None, config=None, split=None, cache_size=10000, val=False
 13 |     ):
 14 |         """
 15 |         This dataloader loads single sequence for a keyframe, and is not designed for computing the
 16 |          spatio-temporal consistency losses. It supports train, val and test splits.
 17 | 
 18 |         dataset_root: Data path to the preprocessed sparse nuScenes data (for training)
 19 |         split: [train/val/test]
 20 |         future_frame_skip: Specify to skip how many future frames
 21 |         voxel_size: The lattice resolution. Should be consistent with the preprocessed data
 22 |         area_extents: The area extents of the processed LiDAR data. Should be consistent with the preprocessed data
 23 |         category_num: The number of object categories (including the background)
 24 |         cache_size: The cache size for storing parts of data in the memory (for reducing the IO cost)
 25 |         """
 26 |         if split is None:
 27 |             self.split = config.split
 28 |         else:
 29 |             self.split = split
 30 |         self.voxel_size = config.voxel_size
 31 |         self.area_extents = config.area_extents
 32 |         self.category_num = config.category_num
 33 |         self.future_frame_skip = config.future_frame_skip
 34 |         self.pred_len = config.pred_len
 35 |         self.box_code_size = config.box_code_size
 36 |         self.anchor_size = config.anchor_size
 37 |         self.val = val
 38 |         self.only_det = config.only_det
 39 |         self.binary = config.binary
 40 |         self.config = config
 41 |         self.use_vis = config.use_vis
 42 |         # dataset_root = dataset_root + '/'+split
 43 |         if dataset_root is None:
 44 |             raise ValueError(
 45 |                 "The {} dataset root is None. Should specify its value.".format(
 46 |                     self.split
 47 |                 )
 48 |             )
 49 |         self.dataset_root = dataset_root
 50 |         seq_dirs = [
 51 |             os.path.join(self.dataset_root, d)
 52 |             for d in os.listdir(self.dataset_root)
 53 |             if os.path.isdir(os.path.join(self.dataset_root, d))
 54 |         ]
 55 |         seq_dirs = sorted(seq_dirs)
 56 |         self.seq_files = [
 57 |             os.path.join(seq_dir, f)
 58 |             for seq_dir in seq_dirs
 59 |             for f in os.listdir(seq_dir)
 60 |             if os.path.isfile(os.path.join(seq_dir, f))
 61 |         ]
 62 | 
 63 |         self.num_sample_seqs = len(self.seq_files)
 64 |         print("The number of {} sequences: {}".format(self.split, self.num_sample_seqs))
 65 | 
 66 |         """
 67 |         # For training, the size of dataset should be 17065 * 2; for validation: 1623; for testing: 4309
 68 |         if split == 'train' and self.num_sample_seqs != 17065 * 2:
 69 |             warnings.warn(">> The size of training dataset is not 17065 * 2.\n")
 70 |         elif split == 'val' and self.num_sample_seqs != 1623:
 71 |             warnings.warn(">> The size of validation dataset is not 1719.\n")
 72 |         elif split == 'test' and self.num_sample_seqs != 4309:
 73 |             warnings.warn('>> The size of test dataset is not 4309.\n')
 74 |         """
 75 | 
 76 |         # object information
 77 |         self.anchors_map = init_anchors_no_check(
 78 |             self.area_extents, self.voxel_size, self.box_code_size, self.anchor_size
 79 |         )
 80 |         self.map_dims = [
 81 |             int(
 82 |                 (self.area_extents[0][1] - self.area_extents[0][0]) / self.voxel_size[0]
 83 |             ),
 84 |             int(
 85 |                 (self.area_extents[1][1] - self.area_extents[1][0]) / self.voxel_size[1]
 86 |             ),
 87 |         ]
 88 |         self.reg_target_shape = (
 89 |             self.map_dims[0],
 90 |             self.map_dims[1],
 91 |             len(self.anchor_size),
 92 |             self.pred_len,
 93 |             self.box_code_size,
 94 |         )
 95 |         self.label_shape = (self.map_dims[0], self.map_dims[1], len(self.anchor_size))
 96 |         self.label_one_hot_shape = (
 97 |             self.map_dims[0],
 98 |             self.map_dims[1],
 99 |             len(self.anchor_size),
100 |             self.category_num,
101 |         )
102 |         self.dims = config.map_dims
103 |         self.num_past_pcs = config.num_past_pcs
104 |         manager = Manager()
105 |         self.cache = manager.dict()
106 |         self.cache_size = cache_size if split == "train" else 0
107 |         # self.cache_size = cache_size
108 | 
109 |     def __len__(self):
110 |         return self.num_sample_seqs
111 | 
112 |     def get_one_hot(self, label, category_num):
113 |         one_hot_label = np.zeros((label.shape[0], category_num))
114 |         for i in range(label.shape[0]):
115 |             one_hot_label[i][label[i]] = 1
116 | 
117 |         return one_hot_label
118 | 
119 |     def __getitem__(self, idx):
120 |         if idx in self.cache:
121 |             gt_dict = self.cache[idx]
122 |         else:
123 |             seq_file = self.seq_files[idx]
124 |             gt_data_handle = np.load(seq_file, allow_pickle=True)
125 |             gt_dict = gt_data_handle.item()
126 |             if len(self.cache) < self.cache_size:
127 |                 self.cache[idx] = gt_dict
128 | 
129 |         allocation_mask = gt_dict["allocation_mask"].astype(bool)
130 |         reg_loss_mask = gt_dict["reg_loss_mask"].astype(bool)
131 |         gt_max_iou = gt_dict["gt_max_iou"]
132 |         motion_one_hot = np.zeros(5)
133 |         motion_mask = np.zeros(5)
134 | 
135 |         # load regression target
136 |         reg_target_sparse = gt_dict["reg_target_sparse"]
137 |         # need to be modified Yiqi , only use reg_target and allocation_map
138 |         reg_target = np.zeros(self.reg_target_shape).astype(reg_target_sparse.dtype)
139 | 
140 |         reg_target[allocation_mask] = reg_target_sparse
141 |         reg_target[np.bitwise_not(reg_loss_mask)] = 0
142 |         label_sparse = gt_dict["label_sparse"]
143 | 
144 |         one_hot_label_sparse = self.get_one_hot(label_sparse, self.category_num)
145 |         label_one_hot = np.zeros(self.label_one_hot_shape)
146 |         label_one_hot[:, :, :, 0] = 1
147 |         label_one_hot[allocation_mask] = one_hot_label_sparse
148 | 
149 |         if self.config.motion_state:
150 |             motion_sparse = gt_dict["motion_state"]
151 |             motion_one_hot_label_sparse = self.get_one_hot(motion_sparse, 3)
152 |             motion_one_hot = np.zeros(self.label_one_hot_shape[:-1] + (3,))
153 |             motion_one_hot[:, :, :, 0] = 1
154 |             motion_one_hot[allocation_mask] = motion_one_hot_label_sparse
155 |             motion_mask = motion_one_hot[:, :, :, 2] == 1
156 | 
157 |         if self.only_det:
158 |             reg_target = reg_target[:, :, :, :1]
159 |             reg_loss_mask = reg_loss_mask[:, :, :, :1]
160 | 
161 |         # only center for pred
162 | 
163 |         elif self.config.pred_type in ["motion", "center"]:
164 |             reg_loss_mask = np.expand_dims(reg_loss_mask, axis=-1)
165 |             reg_loss_mask = np.repeat(reg_loss_mask, self.box_code_size, axis=-1)
166 |             reg_loss_mask[:, :, :, 1:, 2:] = False
167 | 
168 |         if self.config.use_map:
169 |             if ("map_allocation_0" in gt_dict.keys()) or (
170 |                 "map_allocation" in gt_dict.keys()
171 |             ):
172 |                 semantic_maps = []
173 |                 for m_id in range(self.config.map_channel):
174 |                     map_alloc = gt_dict["map_allocation_" + str(m_id)]
175 |                     map_sparse = gt_dict["map_sparse_" + str(m_id)]
176 |                     recover = np.zeros(tuple(self.config.map_dims[:2]))
177 |                     recover[map_alloc] = map_sparse
178 |                     recover = np.rot90(recover, 3)
179 |                     # recover_map = cv2.resize(recover,(self.config.map_dims[0],self.config.map_dims[1]))
180 |                     semantic_maps.append(recover)
181 |                 semantic_maps = np.asarray(semantic_maps)
182 |         else:
183 |             semantic_maps = np.zeros(0)
184 |         """
185 |         if self.binary:
186 |             reg_target = np.concatenate([reg_target[:,:,:2],reg_target[:,:,5:]],axis=2)
187 |             reg_loss_mask = np.concatenate([reg_loss_mask[:,:,:2],reg_loss_mask[:,:,5:]],axis=2)
188 |             label_one_hot = np.concatenate([label_one_hot[:,:,:2],label_one_hot[:,:,5:]],axis=2)
189 | 
190 |         """
191 |         padded_voxel_points = list()
192 | 
193 |         for i in range(self.num_past_pcs):
194 |             indices = gt_dict["voxel_indices_" + str(i)]
195 |             curr_voxels = np.zeros(self.dims, dtype=bool)
196 |             curr_voxels[indices[:, 0], indices[:, 1], indices[:, 2]] = 1
197 |             curr_voxels = np.rot90(curr_voxels, 3)
198 |             padded_voxel_points.append(curr_voxels)
199 |         padded_voxel_points = np.stack(padded_voxel_points, 0).astype(np.float32)
200 |         anchors_map = self.anchors_map
201 |         """
202 |         if self.binary:
203 |             anchors_map = np.concatenate([anchors_map[:,:,:2],anchors_map[:,:,5:]],axis=2)
204 |         """
205 |         if self.config.use_vis:
206 |             vis_maps = np.zeros(
207 |                 (
208 |                     self.num_past_pcs,
209 |                     self.config.map_dims[-1],
210 |                     self.config.map_dims[0],
211 |                     self.config.map_dims[1],
212 |                 )
213 |             )
214 |             vis_free_indices = gt_dict["vis_free_indices"]
215 |             vis_occupy_indices = gt_dict["vis_occupy_indices"]
216 |             vis_maps[
217 |                 vis_occupy_indices[0, :],
218 |                 vis_occupy_indices[1, :],
219 |                 vis_occupy_indices[2, :],
220 |                 vis_occupy_indices[3, :],
221 |             ] = math.log(0.7 / (1 - 0.7))
222 |             vis_maps[
223 |                 vis_free_indices[0, :],
224 |                 vis_free_indices[1, :],
225 |                 vis_free_indices[2, :],
226 |                 vis_free_indices[3, :],
227 |             ] = math.log(0.4 / (1 - 0.4))
228 |             vis_maps = np.swapaxes(vis_maps, 2, 3)
229 |             vis_maps = np.transpose(vis_maps, (0, 2, 3, 1))
230 |             for v_id in range(vis_maps.shape[0]):
231 |                 vis_maps[v_id] = np.rot90(vis_maps[v_id], 3)
232 |             vis_maps = vis_maps[-1]
233 | 
234 |         else:
235 |             vis_maps = np.zeros(0)
236 | 
237 |         padded_voxel_points = padded_voxel_points.astype(np.float32)
238 |         label_one_hot = label_one_hot.astype(np.float32)
239 |         reg_target = reg_target.astype(np.float32)
240 |         anchors_map = anchors_map.astype(np.float32)
241 |         motion_one_hot = motion_one_hot.astype(np.float32)
242 |         semantic_maps = semantic_maps.astype(np.float32)
243 |         vis_maps = vis_maps.astype(np.float32)
244 | 
245 |         if self.val:
246 |             return (
247 |                 padded_voxel_points,
248 |                 label_one_hot,
249 |                 reg_target,
250 |                 reg_loss_mask,
251 |                 anchors_map,
252 |                 motion_one_hot,
253 |                 motion_mask,
254 |                 vis_maps,
255 |                 [{"gt_box": gt_max_iou}],
256 |                 [seq_file],
257 |             )
258 |         else:
259 |             return (
260 |                 padded_voxel_points,
261 |                 label_one_hot,
262 |                 reg_target,
263 |                 reg_loss_mask,
264 |                 anchors_map,
265 |                 motion_one_hot,
266 |                 motion_mask,
267 |                 vis_maps,
268 |             )
269 | 


--------------------------------------------------------------------------------
/coperception/datasets/V2XSimSeg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from multiprocessing import Manager
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | import torch
  7 | import torchvision.transforms as transforms
  8 | import torchvision.transforms.functional as TF
  9 | from torch.utils.data import Dataset
 10 | 
 11 | 
 12 | class V2XSimSeg(Dataset):
 13 |     def __init__(
 14 |         self,
 15 |         dataset_roots=None,
 16 |         config=None,
 17 |         split=None,
 18 |         cache_size=1000,
 19 |         val=False,
 20 |         com=False,
 21 |         bound=None,
 22 |         kd_flag=False,
 23 |         rsu=False,
 24 |     ):
 25 |         """
 26 |         This dataloader loads single sequence for a keyframe, and is not designed for computing the
 27 |          spatio-temporal consistency losses. It supports train, val and test splits.
 28 | 
 29 |         dataset_root: Data path to the preprocessed sparse nuScenes data (for training)
 30 |         split: [train/val/test]
 31 |         future_frame_skip: Specify to skip how many future frames
 32 |         voxel_size: The lattice resolution. Should be consistent with the preprocessed data
 33 |         area_extents: The area extents of the processed LiDAR data. Should be consistent with the preprocessed data
 34 |         category_num: The number of object categories (including the background)
 35 |         cache_size: The cache size for storing parts of data in the memory (for reducing the IO cost)
 36 |         """
 37 |         if split is None:
 38 |             self.split = config.split
 39 |         else:
 40 |             self.split = split
 41 |         self.voxel_size = config.voxel_size
 42 |         self.area_extents = config.area_extents
 43 |         self.pred_len = config.pred_len
 44 |         self.val = val
 45 |         self.config = config
 46 |         self.use_vis = config.use_vis
 47 |         self.com = com
 48 |         self.bound = bound
 49 |         self.kd_flag = kd_flag
 50 |         self.rsu = rsu
 51 | 
 52 |         if dataset_roots is None:
 53 |             raise ValueError(
 54 |                 "The {} dataset root is None. Should specify its value.".format(
 55 |                     self.split
 56 |                 )
 57 |             )
 58 |         self.dataset_roots = dataset_roots
 59 |         self.seq_files = []
 60 |         self.seq_scenes = []
 61 |         for dataset_root in self.dataset_roots:
 62 |             # sort directories
 63 |             dir_list = [d.split("_") for d in os.listdir(dataset_root)]
 64 |             dir_list.sort(key=lambda x: (int(x[0]), int(x[1])))
 65 |             self.seq_scenes.append(
 66 |                 [int(s[0]) for s in dir_list]
 67 |             )  # which scene this frame belongs to (required for visualization)
 68 |             dir_list = ["_".join(x) for x in dir_list]
 69 | 
 70 |             seq_dirs = [
 71 |                 os.path.join(dataset_root, d)
 72 |                 for d in dir_list
 73 |                 if os.path.isdir(os.path.join(dataset_root, d))
 74 |             ]
 75 | 
 76 |             self.seq_files.append(
 77 |                 [
 78 |                     os.path.join(seq_dir, f)
 79 |                     for seq_dir in seq_dirs
 80 |                     for f in os.listdir(seq_dir)
 81 |                     if os.path.isfile(os.path.join(seq_dir, f))
 82 |                 ]
 83 |             )
 84 | 
 85 |         self.num_agent = len(self.dataset_roots)
 86 | 
 87 |         self.num_sample_seqs = len(self.seq_files[0])
 88 |         print("The number of {} sequences: {}".format(self.split, self.num_sample_seqs))
 89 |         # object information
 90 |         self.dims = config.map_dims
 91 |         self.num_past_pcs = config.num_past_pcs
 92 |         manager = Manager()
 93 |         self.cache = [manager.dict() for i in range(self.num_agent)]
 94 |         self.cache_size = cache_size if split == "train" else 0
 95 | 
 96 |         self.transform = Transform(self.split)
 97 | 
 98 |     def __len__(self):
 99 |         return self.num_sample_seqs
100 | 
101 |     def get_one_hot(self, label, category_num):
102 |         one_hot_label = np.zeros((label.shape[0], category_num))
103 |         for i in range(label.shape[0]):
104 |             one_hot_label[i][label[i]] = 1
105 | 
106 |         return one_hot_label
107 | 
108 |     def get_seginfo_from_single_agent(self, agent_id, idx):
109 |         empty_flag = False
110 |         if idx in self.cache[agent_id]:
111 |             gt_dict = self.cache[agent_id][idx]
112 |         else:
113 |             seq_file = self.seq_files[agent_id][idx]
114 |             gt_data_handle = np.load(seq_file, allow_pickle=True)
115 |             if gt_data_handle == 0:
116 |                 empty_flag = True
117 |                 if self.com != 'lowerbound' and self.com != 'upperbound':
118 |                     return (
119 |                         torch.zeros((256, 256, 13)).bool(),
120 |                         torch.zeros((256, 256, 13)).bool(),
121 |                         torch.zeros((256, 256)).int(),
122 |                         torch.zeros((self.num_agent, 4, 4)),
123 |                         0,
124 |                         0,
125 |                     )
126 |                 else:
127 |                     return (
128 |                         torch.zeros((256, 256, 13)).bool(),
129 |                         torch.zeros((256, 256, 13)).bool(),
130 |                         torch.zeros((256, 256)).int(),
131 |                     )
132 |             else:
133 |                 gt_dict = gt_data_handle.item()
134 |                 if len(self.cache[agent_id]) < self.cache_size:
135 |                     self.cache[agent_id][idx] = gt_dict
136 | 
137 |         if not empty_flag:
138 |             bev_seg = gt_dict["bev_seg"].astype(np.int32)
139 | 
140 |             padded_voxel_points = list()
141 | 
142 |             # if self.bound == 'lowerbound':
143 |             for i in range(self.num_past_pcs):
144 |                 indices = gt_dict["voxel_indices_" + str(i)]
145 |                 curr_voxels = np.zeros(self.dims, dtype=bool)
146 |                 curr_voxels[indices[:, 0], indices[:, 1], indices[:, 2]] = 1
147 | 
148 |                 curr_voxels = np.rot90(curr_voxels, 3)
149 |                 # curr_voxels = np.rot90(np.fliplr(curr_voxels), 3)
150 |                 bev_seg = np.rot90(bev_seg, 1)  # to align with voxel
151 | 
152 |                 padded_voxel_points.append(curr_voxels)
153 |             padded_voxel_points = np.stack(padded_voxel_points, 0)
154 |             padded_voxel_points = np.squeeze(padded_voxel_points, 0)
155 | 
156 |             padded_voxel_points_teacher = list()
157 |             # if self.bound == 'upperbound' or self.kd_flag:
158 |             if self.rsu:
159 |                 indices_teacher = gt_dict["voxel_indices_teacher"]
160 |             else:
161 |                 indices_teacher = gt_dict["voxel_indices_teacher_no_cross_road"]
162 | 
163 |             curr_voxels_teacher = np.zeros(self.dims, dtype=bool)
164 |             curr_voxels_teacher[
165 |                 indices_teacher[:, 0], indices_teacher[:, 1], indices_teacher[:, 2]
166 |             ] = 1
167 |             curr_voxels_teacher = np.rot90(curr_voxels_teacher, 3)
168 |             padded_voxel_points_teacher.append(curr_voxels_teacher)
169 |             padded_voxel_points_teacher = np.stack(padded_voxel_points_teacher, 0)
170 |             padded_voxel_points_teacher = np.squeeze(padded_voxel_points_teacher, 0)
171 | 
172 |             if self.com != 'lowerbound' and self.com != 'upperbound':
173 |                 if self.rsu:
174 |                     trans_matrices = gt_dict["trans_matrices"]
175 |                 else:
176 |                     trans_matrices = gt_dict["trans_matrices_no_cross_road"]
177 | 
178 |                 target_agent_id = gt_dict["target_agent_id"]
179 |                 num_sensor = gt_dict["num_sensor"]
180 | 
181 |                 return (
182 |                     torch.from_numpy(padded_voxel_points),
183 |                     torch.from_numpy(padded_voxel_points_teacher),
184 |                     torch.from_numpy(bev_seg.copy()),
185 |                     torch.from_numpy(trans_matrices.copy()),
186 |                     target_agent_id,
187 |                     num_sensor,
188 |                 )
189 |             else:
190 |                 return (
191 |                     torch.from_numpy(padded_voxel_points),
192 |                     torch.from_numpy(padded_voxel_points_teacher),
193 |                     torch.from_numpy(bev_seg.copy()),
194 |                 )
195 | 
196 |     def __getitem__(self, idx):
197 |         res = []
198 |         for i in range(self.num_agent):
199 |             res.append(self.get_seginfo_from_single_agent(i, idx))
200 |         return res
201 | 
202 | 
203 | class Transform:
204 |     def __init__(self, split):
205 |         self.totensor = transforms.ToTensor()
206 |         self.resize = transforms.Resize((256, 256))
207 |         self.split = split
208 | 
209 |     def __call__(self, img, label):
210 |         img = self.totensor(img.copy())
211 |         label = self.totensor(label.copy())
212 | 
213 |         if self.split != "train":
214 |             return img.permute(1, 2, 0).float(), label.squeeze(0).int()
215 | 
216 |         crop = transforms.RandomResizedCrop(256)
217 |         params = crop.get_params(img, scale=(0.08, 1.0), ratio=(0.75, 1.33))
218 |         img = TF.crop(img, *params)
219 |         label = TF.crop(label, *params)
220 | 
221 |         if np.random.random() > 0.5:
222 |             img = TF.hflip(img)
223 |             label = TF.hflip(label)
224 | 
225 |         if np.random.random() > 0.5:
226 |             img = TF.vflip(img)
227 |             label = TF.vflip(label)
228 | 
229 |         img = self.resize(img)
230 |         label = cv2.resize(
231 |             label.squeeze(0).numpy(), dsize=(256, 256), interpolation=cv2.INTER_NEAREST
232 |         )  # Resize provided by pytorch will have some random noise
233 |         # return img.permute(1, 2, 0).float(), label.squeeze(0).int()
234 |         return img.permute(1, 2, 0).float(), label
235 | 


--------------------------------------------------------------------------------
/coperception/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .V2XSimDet import V2XSimDet
2 | from .V2XSimSeg import V2XSimSeg
3 | from .NuscenesDataset import NuscenesDataset
4 | from .MbbSampler import MbbSampler


--------------------------------------------------------------------------------
/coperception/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 | 


--------------------------------------------------------------------------------
/coperception/models/det/AgentWiseWeightedFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from coperception.models.det.base import FusionBase
 5 | 
 6 | 
 7 | class AgentWiseWeightedFusion(FusionBase):
 8 |     """Agent-wise weighted fusion. Used as a lower-bound in the DiscoNet fusion."""
 9 | 
10 |     def __init__(
11 |         self,
12 |         config,
13 |         layer=3,
14 |         in_channels=13,
15 |         kd_flag=True,
16 |         num_agent=5,
17 |         compress_level=0,
18 |         only_v2i=False,
19 |     ):
20 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i)
21 |         self.agent_weighted_fusion = AgentWeightedFusion()
22 | 
23 |     def fusion(self):
24 |         agent_weight_list = list()
25 |         for k in range(self.num_agent):
26 |             cat_feat = torch.cat([self.tg_agent, self.neighbor_feat_list[k]], dim=0)
27 |             cat_feat = cat_feat.unsqueeze(0)
28 |             agent_weight = self.agent_weighted_fusion(cat_feat)
29 |             agent_weight_list.append(agent_weight)
30 | 
31 |         soft_agent_weight_list = torch.squeeze(
32 |             F.softmax(torch.tensor(agent_weight_list).unsqueeze(0), dim=1)
33 |         )
34 | 
35 |         agent_wise_weight_feat = 0
36 |         for k in range(self.num_agent):
37 |             agent_wise_weight_feat = (
38 |                 agent_wise_weight_feat
39 |                 + soft_agent_weight_list[k] * self.neighbor_feat_list[k]
40 |             )
41 | 
42 |         return agent_wise_weight_feat
43 | 
44 | 
45 | class AgentWeightedFusion(nn.Module):
46 |     def __init__(self):
47 |         super(AgentWeightedFusion, self).__init__()
48 | 
49 |         self.conv1_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0)
50 |         self.bn1_1 = nn.BatchNorm2d(128)
51 | 
52 |         self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0)
53 |         self.bn1_2 = nn.BatchNorm2d(32)
54 | 
55 |         self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0)
56 |         self.bn1_3 = nn.BatchNorm2d(8)
57 | 
58 |         self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0)
59 | 
60 |         # self.conv1_1 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0)
61 |         # self.bn1_1 = nn.BatchNorm2d(1)
62 |         self.conv1_5 = nn.Conv2d(1, 1, kernel_size=32, stride=1, padding=0)
63 |         # # self.bn1_2 = nn.BatchNorm2d(1)
64 | 
65 |     def forward(self, x):
66 |         # x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
67 |         # x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
68 |         # x_1 = F.sigmoid(self.conv1_2(x_1))
69 |         x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
70 |         x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
71 |         x_1 = F.relu(self.bn1_2(self.conv1_2(x_1)))
72 |         x_1 = F.relu(self.bn1_3(self.conv1_3(x_1)))
73 |         x_1 = F.relu(self.conv1_4(x_1))
74 |         x_1 = F.relu(self.conv1_5(x_1))
75 | 
76 |         return x_1
77 | 


--------------------------------------------------------------------------------
/coperception/models/det/CatFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from coperception.models.det.base import FusionBase
 5 | 
 6 | 
 7 | class CatFusion(FusionBase):
 8 |     """Concatenate fusion. Used as a lower-bound in the DisoNet paper."""
 9 | 
10 |     def __init__(
11 |         self,
12 |         config,
13 |         layer=3,
14 |         in_channels=13,
15 |         kd_flag=True,
16 |         num_agent=5,
17 |         compress_level=0,
18 |         only_v2i=False,
19 |     ):
20 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i)
21 |         self._modulation_layer_3 = ModulationLayer3()
22 | 
23 |     def fusion(self):
24 |         mean_feat = torch.mean(torch.stack(self.neighbor_feat_list), dim=0)  # [c, h, w]
25 |         cat_feat = torch.cat([self.tg_agent, mean_feat], dim=0)
26 |         cat_feat = cat_feat.unsqueeze(0)  # [1, 1, c, h, w]
27 |         return self._modulation_layer_3(cat_feat)
28 | 
29 | 
30 | class ModulationLayer3(nn.Module):
31 |     def __init__(self):
32 |         super(ModulationLayer3, self).__init__()
33 | 
34 |         self._conv1_1 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0)
35 |         self._bn1_1 = nn.BatchNorm2d(256)
36 | 
37 |     def forward(self, x):
38 |         x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
39 |         x_1 = F.relu(self._bn1_1(self._conv1_1(x)))
40 | 
41 |         return x_1
42 | 


--------------------------------------------------------------------------------
/coperception/models/det/DiscoNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from coperception.models.det.base import IntermediateModelBase
  5 | 
  6 | 
  7 | class DiscoNet(IntermediateModelBase):
  8 |     """DiscoNet.
  9 | 
 10 |     https://github.com/ai4ce/DiscoNet
 11 | 
 12 |     Args:
 13 |         config (object): The config object.
 14 |         layer (int, optional): Collaborate on which layer. Defaults to 3.
 15 |         in_channels (int, optional): The input channels. Defaults to 13.
 16 |         kd_flag (bool, optional): Whether to use knowledge distillation. Defaults to True.
 17 |         num_agent (int, optional): The number of agents (including RSU). Defaults to 5.
 18 | 
 19 |     """
 20 | 
 21 |     def __init__(self, config, layer=3, in_channels=13, kd_flag=True, num_agent=5, compress_level=0, only_v2i=False):
 22 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i)
 23 |         if self.layer == 3:
 24 |             self.pixel_weighted_fusion = PixelWeightedFusionSoftmax(256)
 25 |         elif self.layer == 2:
 26 |             self.pixel_weighted_fusion = PixelWeightedFusionSoftmax(128)
 27 | 
 28 |     def forward(self, bevs, trans_matrices, num_agent_tensor, batch_size=1):
 29 |         """Forward pass.
 30 | 
 31 |         Args:
 32 |             bevs (tensor): BEV data
 33 |             trans_matrices (tensor): Matrix for transforming features among agents.
 34 |             num_agent_tensor (tensor): Number of agents to communicate for each agent.
 35 |             batch_size (int, optional): The batch size. Defaults to 1.
 36 | 
 37 |         Returns:
 38 |             result, all decoded layers, and fused feature maps if kd_flag is set.
 39 |             else return result and list of weights for each agent.
 40 |         """
 41 | 
 42 |         bevs = bevs.permute(0, 1, 4, 2, 3)  # (Batch, seq, z, h, w)
 43 |         encoded_layers = self.u_encoder(bevs)
 44 |         device = bevs.device
 45 | 
 46 |         feat_maps, size = super().get_feature_maps_and_size(encoded_layers)
 47 | 
 48 |         feat_list = super().build_feature_list(batch_size, feat_maps)
 49 | 
 50 |         local_com_mat = super().build_local_communication_matrix(
 51 |             feat_list
 52 |         )  # [2 5 512 16 16] [batch, agent, channel, height, width]
 53 |         local_com_mat_update = super().build_local_communication_matrix(
 54 |             feat_list
 55 |         )  # to avoid the inplace operation
 56 | 
 57 |         save_agent_weight_list = list()
 58 | 
 59 |         for b in range(batch_size):
 60 |             num_agent = num_agent_tensor[b, 0]
 61 |             for i in range(num_agent):
 62 |                 tg_agent = local_com_mat[b, i]
 63 |                 all_warp = trans_matrices[b, i]  # transformation [2 5 5 4 4]
 64 | 
 65 |                 self.neighbor_feat_list = list()
 66 |                 self.neighbor_feat_list.append(tg_agent)
 67 | 
 68 |                 if super().outage():
 69 |                     agent_wise_weight_feat = self.neighbor_feat_list[0]
 70 |                 else:
 71 |                     super().build_neighbors_feature_list(
 72 |                         b,
 73 |                         i,
 74 |                         all_warp,
 75 |                         num_agent,
 76 |                         local_com_mat,
 77 |                         device,
 78 |                         size,
 79 |                         trans_matrices,
 80 |                     )
 81 | 
 82 |                     # agent-wise weighted fusion
 83 |                     tmp_agent_weight_list = list()
 84 |                     sum_weight = 0
 85 |                     nb_len = len(self.neighbor_feat_list)
 86 |                     for k in range(nb_len):
 87 |                         cat_feat = torch.cat(
 88 |                             [tg_agent, self.neighbor_feat_list[k]], dim=0
 89 |                         )
 90 |                         cat_feat = cat_feat.unsqueeze(0)
 91 |                         agent_weight = torch.squeeze(
 92 |                             self.pixel_weighted_fusion(cat_feat)
 93 |                         )
 94 |                         tmp_agent_weight_list.append(torch.exp(agent_weight))
 95 |                         sum_weight = sum_weight + torch.exp(agent_weight)
 96 | 
 97 |                     agent_weight_list = list()
 98 |                     for k in range(nb_len):
 99 |                         agent_weight = torch.div(tmp_agent_weight_list[k], sum_weight)
100 |                         agent_weight.expand([256, -1, -1])
101 |                         agent_weight_list.append(agent_weight)
102 | 
103 |                     agent_wise_weight_feat = 0
104 |                     for k in range(nb_len):
105 |                         agent_wise_weight_feat = (
106 |                             agent_wise_weight_feat
107 |                             + agent_weight_list[k] * self.neighbor_feat_list[k]
108 |                         )
109 | 
110 |                 # feature update
111 |                 local_com_mat_update[b, i] = agent_wise_weight_feat
112 | 
113 |                 save_agent_weight_list.append(agent_weight_list)
114 | 
115 |         # weighted feature maps is passed to decoder
116 |         feat_fuse_mat = super().agents_to_batch(local_com_mat_update)
117 | 
118 |         decoded_layers = super().get_decoded_layers(
119 |             encoded_layers, feat_fuse_mat, batch_size
120 |         )
121 |         x = decoded_layers[0]
122 | 
123 |         cls_preds, loc_preds, result = super().get_cls_loc_result(x)
124 | 
125 |         if self.kd_flag == 1:
126 |             return (result, *decoded_layers, feat_fuse_mat)
127 |         else:
128 |             # return result
129 |             return result, save_agent_weight_list
130 | 
131 | 
132 | class PixelWeightedFusionSoftmax(nn.Module):
133 |     def __init__(self, channel):
134 |         super(PixelWeightedFusionSoftmax, self).__init__()
135 | 
136 |         self.conv1_1 = nn.Conv2d(channel * 2, 128, kernel_size=1, stride=1, padding=0)
137 |         self.bn1_1 = nn.BatchNorm2d(128)
138 | 
139 |         self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0)
140 |         self.bn1_2 = nn.BatchNorm2d(32)
141 | 
142 |         self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0)
143 |         self.bn1_3 = nn.BatchNorm2d(8)
144 | 
145 |         self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0)
146 |         # self.bn1_4 = nn.BatchNorm2d(1)
147 | 
148 |     def forward(self, x):
149 |         x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
150 |         x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
151 |         x_1 = F.relu(self.bn1_2(self.conv1_2(x_1)))
152 |         x_1 = F.relu(self.bn1_3(self.conv1_3(x_1)))
153 |         x_1 = F.relu(self.conv1_4(x_1))
154 | 
155 |         return x_1
156 | 


--------------------------------------------------------------------------------
/coperception/models/det/FaFNet.py:
--------------------------------------------------------------------------------
 1 | from coperception.models.det.base import NonIntermediateModelBase
 2 | 
 3 | 
 4 | class FaFNet(NonIntermediateModelBase):
 5 |     """The model of early fusion. Used as lower-bound and upper-bound depending on the input features (fused or not).
 6 | 
 7 |     https://arxiv.org/pdf/2012.12395.pdf
 8 | 
 9 |     Args:
10 |         config (object): The Config object.
11 |         layer (int, optional): Collaborate on which layer. Defaults to 3.
12 |         in_channels (int, optional): The input channels. Defaults to 13.
13 |         kd_flag (bool, optional): Whether to use knowledge distillation (for DiscoNet to ues). Defaults to True.
14 |         num_agent (int, optional): The number of agents (including RSU). Defaults to 5.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         config,
20 |         layer=3,
21 |         in_channels=13,
22 |         kd_flag=True,
23 |         num_agent=5,
24 |         compress_level=0,
25 |     ):
26 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level)
27 | 
28 |     def forward(self, bevs, maps=None, vis=None, batch_size=None):
29 |         bevs = bevs.permute(0, 1, 4, 2, 3)  # (Batch, seq, z, h, w)
30 | 
31 |         x_8, x_7, x_6, x_5, x_3, x_2 = self.stpn(bevs)
32 |         x = x_8
33 | 
34 |         cls_preds, loc_preds, result = super().get_cls_loc_result(x)
35 | 
36 |         if self.kd_flag == 1:
37 |             return result, x_8, x_7, x_6, x_5, x_3
38 |         else:
39 |             return result
40 | 


--------------------------------------------------------------------------------
/coperception/models/det/MaxFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from coperception.models.det.base import FusionBase
 3 | 
 4 | 
 5 | class MaxFusion(FusionBase):
 6 |     "Maximum fusion. Used as a lower-bound in the DiscoNet fusion."
 7 | 
 8 |     def __init__(
 9 |         self,
10 |         config,
11 |         layer=3,
12 |         in_channels=13,
13 |         kd_flag=True,
14 |         num_agent=5,
15 |         compress_level=0,
16 |         only_v2i=False,
17 |     ):
18 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i)
19 | 
20 |     def fusion(self):
21 |         return torch.max(torch.stack(self.neighbor_feat_list), dim=0).values
22 | 


--------------------------------------------------------------------------------
/coperception/models/det/MeanFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from coperception.models.det.base import FusionBase
 3 | 
 4 | 
 5 | class MeanFusion(FusionBase):
 6 |     "Mean fusion. Used as a lower-bound in the DiscoNet fusion."
 7 | 
 8 |     def __init__(self, config, layer=3, in_channels=13, kd_flag=True, num_agent=5, compress_level=0, only_v2i=False):
 9 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i)
10 | 
11 |     def fusion(self):
12 |         return torch.mean(torch.stack(self.neighbor_feat_list), dim=0)
13 | 


--------------------------------------------------------------------------------
/coperception/models/det/SumFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from coperception.models.det.base import FusionBase
 3 | 
 4 | 
 5 | class SumFusion(FusionBase):
 6 |     """Sum fusion. Used as a lower-bound in the DiscoNet fusion."""
 7 | 
 8 |     def __init__(
 9 |         self,
10 |         config,
11 |         layer=3,
12 |         in_channels=13,
13 |         kd_flag=True,
14 |         num_agent=5,
15 |         compress_level=0,
16 |         only_v2i=False,
17 |     ):
18 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i)
19 | 
20 |     def fusion(self):
21 |         return torch.sum(torch.stack(self.neighbor_feat_list), dim=0)
22 | 


--------------------------------------------------------------------------------
/coperception/models/det/TeacherNet.py:
--------------------------------------------------------------------------------
 1 | from coperception.models.det.base import NonIntermediateModelBase
 2 | 
 3 | 
 4 | class TeacherNet(NonIntermediateModelBase):
 5 |     """The teacher net for knowledged distillation in DiscoNet."""
 6 | 
 7 |     def __init__(self, config):
 8 |         super(TeacherNet, self).__init__(config, compress_level=0)
 9 | 
10 |     def forward(self, bevs, maps=None, vis=None):
11 |         bevs = bevs.permute(0, 1, 4, 2, 3)  # (Batch, seq, z, h, w)
12 |         # vis = vis.permute(0, 3, 1, 2)
13 |         return self.stpn(bevs)
14 | 


--------------------------------------------------------------------------------
/coperception/models/det/V2VNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import coperception.utils.convolutional_rnn as convrnn
  3 | from coperception.models.det.base import IntermediateModelBase
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class V2VNet(IntermediateModelBase):
  8 |     """V2V Net
  9 | 
 10 |     https://arxiv.org/abs/2008.07519
 11 | 
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         config,
 17 |         gnn_iter_times,
 18 |         layer,
 19 |         layer_channel,
 20 |         in_channels=13,
 21 |         num_agent=5,
 22 |         compress_level=0,
 23 |         only_v2i=False,
 24 |     ):
 25 |         super().__init__(
 26 |             config,
 27 |             layer,
 28 |             in_channels,
 29 |             num_agent=num_agent,
 30 |             compress_level=compress_level,
 31 |             only_v2i=only_v2i,
 32 |         )
 33 | 
 34 |         self.layer_channel = layer_channel
 35 |         self.gnn_iter_num = gnn_iter_times
 36 |         self.convgru = convrnn.Conv2dGRU(
 37 |             in_channels=self.layer_channel * 2,
 38 |             out_channels=self.layer_channel,
 39 |             kernel_size=3,
 40 |             num_layers=1,
 41 |             bidirectional=False,
 42 |             dilation=1,
 43 |             stride=1,
 44 |         )
 45 |         self.compress_level = compress_level
 46 | 
 47 |     def forward(self, bevs, trans_matrices, num_agent_tensor, batch_size=1):
 48 |         # trans_matrices [batch 5 5 4 4]
 49 |         # num_agent_tensor, shape: [batch, num_agent]; how many non-empty agent in this scene
 50 | 
 51 |         bevs = bevs.permute(0, 1, 4, 2, 3)  # (Batch, seq, z, h, w)
 52 |         encoded_layers = self.u_encoder(bevs)
 53 |         device = bevs.device
 54 | 
 55 |         feat_maps, size = super().get_feature_maps_and_size(encoded_layers)
 56 |         # get feat maps for each agent [10 512 16 16] -> [2 5 512 16 16]
 57 |         feat_list = super().build_feature_list(batch_size, feat_maps)
 58 | 
 59 |         local_com_mat = super().build_local_communication_matrix(
 60 |             feat_list
 61 |         )  # [2 5 512 16 16] [batch, agent, channel, height, width]
 62 |         local_com_mat_update = super().build_local_communication_matrix(
 63 |             feat_list
 64 |         )  # to avoid the inplace operation
 65 | 
 66 |         for b in range(batch_size):
 67 |             num_agent = num_agent_tensor[b, 0]
 68 |             
 69 |             agent_feat_list = list()
 70 |             for nb in range(self.agent_num):
 71 |                 agent_feat_list.append(local_com_mat[b, nb])
 72 |                 
 73 |             for _ in range(self.gnn_iter_num):
 74 | 
 75 |                 updated_feats_list = []
 76 | 
 77 |                 for i in range(num_agent):
 78 |                     self.neighbor_feat_list = []
 79 |                     all_warp = trans_matrices[b, i]  # transformation [2 5 5 4 4]
 80 | 
 81 |                     if super().outage():
 82 |                         updated_feats_list.append(agent_feat_list[i])
 83 | 
 84 |                     else:
 85 |                         super().build_neighbors_feature_list(
 86 |                             b,
 87 |                             i,
 88 |                             all_warp,
 89 |                             num_agent,
 90 |                             local_com_mat,
 91 |                             device,
 92 |                             size,
 93 |                             trans_matrices,
 94 |                         )
 95 | 
 96 |                         mean_feat = torch.mean(
 97 |                             torch.stack(self.neighbor_feat_list), dim=0
 98 |                         )  # [c, h, w]
 99 |                         cat_feat = torch.cat([agent_feat_list[i], mean_feat], dim=0)
100 |                         cat_feat = cat_feat.unsqueeze(0).unsqueeze(0)  # [1, 1, c, h, w]
101 |                         updated_feat, _ = self.convgru(cat_feat, None)
102 |                         updated_feat = torch.squeeze(
103 |                             torch.squeeze(updated_feat, 0), 0
104 |                         )  # [c, h, w]
105 |                         updated_feats_list.append(updated_feat)
106 | 
107 |                 agent_feat_list = updated_feats_list
108 | 
109 |             for k in range(num_agent):
110 |                 local_com_mat_update[b, k] = agent_feat_list[k]
111 |         
112 |         feat_maps = super().agents_to_batch(local_com_mat_update)
113 | 
114 |         decoded_layers = super().get_decoded_layers(
115 |             encoded_layers, feat_maps, batch_size
116 |         )
117 |         x = decoded_layers[0]
118 | 
119 |         cls_pred, loc_preds, result = super().get_cls_loc_result(x)
120 |         return result
121 | 


--------------------------------------------------------------------------------
/coperception/models/det/__init__.py:
--------------------------------------------------------------------------------
 1 | from .DiscoNet import DiscoNet
 2 | from .V2VNet import V2VNet
 3 | from .When2com import When2com
 4 | from .SumFusion import SumFusion
 5 | from .MeanFusion import MeanFusion
 6 | from .MaxFusion import MaxFusion
 7 | from .CatFusion import CatFusion
 8 | from .AgentWiseWeightedFusion import AgentWiseWeightedFusion
 9 | from .TeacherNet import TeacherNet
10 | from .FaFNet import FaFNet
11 | 


--------------------------------------------------------------------------------
/coperception/models/det/backbone/Backbone.py:
--------------------------------------------------------------------------------
  1 | import torch.nn.functional as F
  2 | import torch.nn as nn
  3 | import torch
  4 | 
  5 | 
  6 | class Backbone(nn.Module):
  7 |     """The backbone class that contains encode and decode function"""
  8 | 
  9 |     def __init__(self, height_feat_size, compress_level=0):
 10 |         super().__init__()
 11 |         self.conv_pre_1 = nn.Conv2d(
 12 |             height_feat_size, 32, kernel_size=3, stride=1, padding=1
 13 |         )
 14 |         self.conv_pre_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
 15 |         self.bn_pre_1 = nn.BatchNorm2d(32)
 16 |         self.bn_pre_2 = nn.BatchNorm2d(32)
 17 | 
 18 |         self.conv3d_1 = Conv3D(
 19 |             64, 64, kernel_size=(1, 1, 1), stride=1, padding=(0, 0, 0)
 20 |         )
 21 |         self.conv3d_2 = Conv3D(
 22 |             128, 128, kernel_size=(1, 1, 1), stride=1, padding=(0, 0, 0)
 23 |         )
 24 | 
 25 |         self.conv1_1 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
 26 |         self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
 27 | 
 28 |         self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
 29 |         self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
 30 | 
 31 |         self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
 32 |         self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 33 | 
 34 |         self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
 35 |         self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 36 | 
 37 |         self.conv5_1 = nn.Conv2d(512 + 256, 256, kernel_size=3, stride=1, padding=1)
 38 |         self.conv5_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 39 | 
 40 |         self.conv6_1 = nn.Conv2d(256 + 128, 128, kernel_size=3, stride=1, padding=1)
 41 |         self.conv6_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
 42 | 
 43 |         self.conv7_1 = nn.Conv2d(128 + 64, 64, kernel_size=3, stride=1, padding=1)
 44 |         self.conv7_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
 45 | 
 46 |         self.conv8_1 = nn.Conv2d(64 + 32, 32, kernel_size=3, stride=1, padding=1)
 47 |         self.conv8_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
 48 | 
 49 |         self.bn1_1 = nn.BatchNorm2d(64)
 50 |         self.bn1_2 = nn.BatchNorm2d(64)
 51 | 
 52 |         self.bn2_1 = nn.BatchNorm2d(128)
 53 |         self.bn2_2 = nn.BatchNorm2d(128)
 54 | 
 55 |         self.bn3_1 = nn.BatchNorm2d(256)
 56 |         self.bn3_2 = nn.BatchNorm2d(256)
 57 | 
 58 |         self.bn4_1 = nn.BatchNorm2d(512)
 59 |         self.bn4_2 = nn.BatchNorm2d(512)
 60 | 
 61 |         self.bn5_1 = nn.BatchNorm2d(256)
 62 |         self.bn5_2 = nn.BatchNorm2d(256)
 63 | 
 64 |         self.bn6_1 = nn.BatchNorm2d(128)
 65 |         self.bn6_2 = nn.BatchNorm2d(128)
 66 | 
 67 |         self.bn7_1 = nn.BatchNorm2d(64)
 68 |         self.bn7_2 = nn.BatchNorm2d(64)
 69 | 
 70 |         self.bn8_1 = nn.BatchNorm2d(32)
 71 |         self.bn8_2 = nn.BatchNorm2d(32)
 72 | 
 73 |         self.compress_level = compress_level
 74 |         if compress_level > 0:
 75 |             assert compress_level <= 8
 76 |             compress_channel_num = 256 // (2**compress_level)
 77 | 
 78 |             # currently only support compress/decompress at layer x_3
 79 |             self.com_compresser = nn.Conv2d(
 80 |                 256, compress_channel_num, kernel_size=1, stride=1
 81 |             )
 82 |             self.bn_compress = nn.BatchNorm2d(compress_channel_num)
 83 | 
 84 |             self.com_decompresser = nn.Conv2d(
 85 |                 compress_channel_num, 256, kernel_size=1, stride=1
 86 |             )
 87 |             self.bn_decompress = nn.BatchNorm2d(256)
 88 | 
 89 |     def encode(self, x):
 90 |         """Encode the input BEV features.
 91 | 
 92 |         Args:
 93 |             x (tensor): the input BEV features.
 94 | 
 95 |         Returns:
 96 |             A list that contains all the encoded layers.
 97 |         """
 98 |         batch, seq, z, h, w = x.size()
 99 | 
100 |         x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
101 |         x = x.to(torch.float)
102 |         x = F.relu(self.bn_pre_1(self.conv_pre_1(x)))
103 |         x = F.relu(self.bn_pre_2(self.conv_pre_2(x)))
104 | 
105 |         # -------------------------------- Encoder Path --------------------------------
106 |         # -- STC block 1
107 |         x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
108 |         x_1 = F.relu(self.bn1_2(self.conv1_2(x_1)))
109 | 
110 |         x_1 = x_1.view(
111 |             batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)
112 |         ).contiguous()  # (batch, seq, c, h, w)
113 |         x_1 = self.conv3d_1(x_1)
114 |         x_1 = x_1.view(
115 |             -1, x_1.size(2), x_1.size(3), x_1.size(4)
116 |         ).contiguous()  # (batch * seq, c, h, w)
117 | 
118 |         # -- STC block 2
119 |         x_2 = F.relu(self.bn2_1(self.conv2_1(x_1)))
120 |         x_2 = F.relu(self.bn2_2(self.conv2_2(x_2)))
121 | 
122 |         x_2 = x_2.view(
123 |             batch, -1, x_2.size(1), x_2.size(2), x_2.size(3)
124 |         ).contiguous()  # (batch, seq, c, h, w)
125 |         x_2 = self.conv3d_2(x_2)
126 |         x_2 = x_2.view(
127 |             -1, x_2.size(2), x_2.size(3), x_2.size(4)
128 |         ).contiguous()  # (batch * seq, c, h, w), seq = 1
129 | 
130 |         # -- STC block 3
131 |         x_3 = F.relu(self.bn3_1(self.conv3_1(x_2)))
132 |         x_3 = F.relu(self.bn3_2(self.conv3_2(x_3)))
133 | 
134 |         # -- STC block 4
135 |         x_4 = F.relu(self.bn4_1(self.conv4_1(x_3)))
136 |         x_4 = F.relu(self.bn4_2(self.conv4_2(x_4)))
137 | 
138 |         # compress x_3 (the layer that agents communicates on)
139 |         if self.compress_level > 0:
140 |             x_3 = F.relu(self.bn_compress(self.com_compresser(x_3)))
141 |             x_3 = F.relu(self.bn_decompress(self.com_decompresser(x_3)))
142 | 
143 |         return [x, x_1, x_2, x_3, x_4]
144 | 
145 |     def decode(
146 |         self,
147 |         x,
148 |         x_1,
149 |         x_2,
150 |         x_3,
151 |         x_4,
152 |         batch,
153 |         kd_flag=False,
154 |         requires_adaptive_max_pool3d=False,
155 |     ):
156 |         """Decode the input features.
157 | 
158 |         Args:
159 |             x (tensor): layer-0 features.
160 |             x_1 (tensor): layer-1 features.
161 |             x_2 (tensor): layer-2 features.
162 |             x_3 (tensor): layer-3 features.
163 |             x_4 (tensor): layer-4 featuers.
164 |             batch (int): The batch size.
165 |             kd_flag (bool, optional): Required to be true for DiscoNet. Defaults to False.
166 |             requires_adaptive_max_pool3d (bool, optional): If set to true, use adaptive max pooling 3d. Defaults to False.
167 | 
168 |         Returns:
169 |             if kd_flag is true, return a list of output from layer-8 to layer-5
170 |             else return a list of a single element: the output after passing through the decoder
171 |         """
172 |         # -------------------------------- Decoder Path --------------------------------
173 |         x_5 = F.relu(
174 |             self.bn5_1(
175 |                 self.conv5_1(
176 |                     torch.cat((F.interpolate(x_4, scale_factor=(2, 2)), x_3), dim=1)
177 |                 )
178 |             )
179 |         )
180 |         x_5 = F.relu(self.bn5_2(self.conv5_2(x_5)))
181 | 
182 |         x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2), x_2.size(3))
183 |         x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous()
184 |         x_2 = (
185 |             F.adaptive_max_pool3d(x_2, (1, None, None))
186 |             if requires_adaptive_max_pool3d
187 |             else x_2
188 |         )
189 |         x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous()
190 |         x_2 = x_2.view(-1, x_2.size(2), x_2.size(3), x_2.size(4)).contiguous()
191 | 
192 |         x_6 = F.relu(
193 |             self.bn6_1(
194 |                 self.conv6_1(
195 |                     torch.cat((F.interpolate(x_5, scale_factor=(2, 2)), x_2), dim=1)
196 |                 )
197 |             )
198 |         )
199 |         x_6 = F.relu(self.bn6_2(self.conv6_2(x_6)))
200 | 
201 |         x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3))
202 |         x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous()
203 |         x_1 = (
204 |             F.adaptive_max_pool3d(x_1, (1, None, None))
205 |             if requires_adaptive_max_pool3d
206 |             else x_1
207 |         )
208 |         x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous()
209 |         x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous()
210 | 
211 |         x_7 = F.relu(
212 |             self.bn7_1(
213 |                 self.conv7_1(
214 |                     torch.cat((F.interpolate(x_6, scale_factor=(2, 2)), x_1), dim=1)
215 |                 )
216 |             )
217 |         )
218 |         x_7 = F.relu(self.bn7_2(self.conv7_2(x_7)))
219 | 
220 |         x = x.view(batch, -1, x.size(1), x.size(2), x.size(3))
221 |         x = x.permute(0, 2, 1, 3, 4).contiguous()
222 |         x = (
223 |             F.adaptive_max_pool3d(x, (1, None, None))
224 |             if requires_adaptive_max_pool3d
225 |             else x
226 |         )
227 |         x = x.permute(0, 2, 1, 3, 4).contiguous()
228 |         x = x.view(-1, x.size(2), x.size(3), x.size(4)).contiguous()
229 | 
230 |         x_8 = F.relu(
231 |             self.bn8_1(
232 |                 self.conv8_1(
233 |                     torch.cat((F.interpolate(x_7, scale_factor=(2, 2)), x), dim=1)
234 |                 )
235 |             )
236 |         )
237 |         res_x = F.relu(self.bn8_2(self.conv8_2(x_8)))
238 | 
239 |         if kd_flag:
240 |             return [res_x, x_7, x_6, x_5]
241 |         else:
242 |             return [res_x]
243 | 
244 | 
245 | class STPN_KD(Backbone):
246 |     """Used by non-intermediate models. Pass the output from encoder directly to decoder."""
247 | 
248 |     def __init__(self, height_feat_size=13, compress_level=0):
249 |         super().__init__(height_feat_size, compress_level)
250 | 
251 |     def forward(self, x):
252 |         batch, seq, z, h, w = x.size()
253 |         encoded_layers = super().encode(x)
254 |         decoded_layers = super().decode(
255 |             *encoded_layers, batch, kd_flag=True, requires_adaptive_max_pool3d=True
256 |         )
257 |         return (*decoded_layers, encoded_layers[3], encoded_layers[4])
258 | 
259 | 
260 | class LidarEncoder(Backbone):
261 |     """The encoder class. Encodes input features in forward pass."""
262 | 
263 |     def __init__(self, height_feat_size=13, compress_level=0):
264 |         super().__init__(height_feat_size, compress_level)
265 | 
266 |     def forward(self, x):
267 |         return super().encode(x)
268 | 
269 | 
270 | class LidarDecoder(Backbone):
271 |     """The decoder class. Decodes input features in forward pass."""
272 | 
273 |     def __init__(self, height_feat_size=13):
274 |         super().__init__(height_feat_size)
275 | 
276 |     def forward(self, x, x_1, x_2, x_3, x_4, batch, kd_flag=False):
277 |         return super().decode(x, x_1, x_2, x_3, x_4, batch, kd_flag)
278 | 
279 | 
280 | class Conv3D(nn.Module):
281 |     """3D cnn used in the encoder."""
282 | 
283 |     def __init__(self, in_channel, out_channel, kernel_size, stride, padding):
284 |         super(Conv3D, self).__init__()
285 |         self.conv3d = nn.Conv3d(
286 |             in_channel,
287 |             out_channel,
288 |             kernel_size=kernel_size,
289 |             stride=stride,
290 |             padding=padding,
291 |         )
292 |         self.bn3d = nn.BatchNorm3d(out_channel)
293 | 
294 |     def forward(self, x):
295 |         # input x: (batch, seq, c, h, w)
296 |         x = x.permute(0, 2, 1, 3, 4).contiguous()  # (batch, c, seq_len, h, w)
297 |         x = F.relu(self.bn3d(self.conv3d(x)))
298 |         x = x.permute(0, 2, 1, 3, 4).contiguous()  # (batch, seq_len, c, h, w)
299 | 
300 |         return x
301 | 
302 | 
303 | """""" """""" """""" """
304 | Added by Yiming
305 | 
306 | """ """""" """""" """"""
307 | 
308 | 
309 | class Conv2DBatchNormRelu(nn.Module):
310 |     def __init__(
311 |         self,
312 |         in_channels,
313 |         n_filters,
314 |         k_size,
315 |         stride,
316 |         padding,
317 |         bias=True,
318 |         dilation=1,
319 |         is_batchnorm=True,
320 |     ):
321 |         super(Conv2DBatchNormRelu, self).__init__()
322 | 
323 |         conv_mod = nn.Conv2d(
324 |             int(in_channels),
325 |             int(n_filters),
326 |             kernel_size=k_size,
327 |             padding=padding,
328 |             stride=stride,
329 |             bias=bias,
330 |             dilation=dilation,
331 |         )
332 | 
333 |         if is_batchnorm:
334 |             self.cbr_unit = nn.Sequential(
335 |                 conv_mod, nn.BatchNorm2d(int(n_filters)), nn.ReLU(inplace=True)
336 |             )
337 |         else:
338 |             self.cbr_unit = nn.Sequential(conv_mod, nn.ReLU(inplace=True))
339 | 
340 |     def forward(self, inputs):
341 |         outputs = self.cbr_unit(inputs)
342 |         return outputs
343 | 
344 | 
345 | class Sparsemax(nn.Module):
346 |     """Sparsemax function."""
347 | 
348 |     def __init__(self, dim=None):
349 |         """Initialize sparsemax activation
350 | 
351 |         Args:
352 |             dim (int, optional): The dimension over which to apply the sparsemax function.
353 |         """
354 |         super(Sparsemax, self).__init__()
355 | 
356 |         self.dim = -1 if dim is None else dim
357 | 
358 |     def forward(self, input):
359 |         """Forward function.
360 |         Args:
361 |             input (torch.Tensor): Input tensor. First dimension should be the batch size
362 |         Returns:
363 |             torch.Tensor: [batch_size x number_of_logits] Output tensor
364 |         """
365 |         # Sparsemax currently only handles 2-dim tensors,
366 |         # so we reshape and reshape back after sparsemax
367 |         original_size = input.size()
368 |         input = input.view(-1, input.size(self.dim))
369 | 
370 |         dim = 1
371 |         number_of_logits = input.size(dim)
372 | 
373 |         # Translate input by max for numerical stability
374 |         input = input - torch.max(input, dim=dim, keepdim=True)[0].expand_as(input)
375 | 
376 |         # Sort input in descending order.
377 |         # (NOTE: Can be replaced with linear time selection method described here:
378 |         # http://stanford.edu/~jduchi/projects/DuchiShSiCh08.html)
379 |         zs = torch.sort(input=input, dim=dim, descending=True)[0]
380 |         range = torch.range(start=1, end=number_of_logits, device=input.device).view(
381 |             1, -1
382 |         )
383 |         range = range.expand_as(zs)
384 | 
385 |         # Determine sparsity of projection
386 |         bound = 1 + range * zs
387 |         cumulative_sum_zs = torch.cumsum(zs, dim)
388 |         is_gt = torch.gt(bound, cumulative_sum_zs).type(input.type())
389 |         k = torch.max(is_gt * range, dim, keepdim=True)[0]
390 | 
391 |         # Compute threshold function
392 |         zs_sparse = is_gt * zs
393 | 
394 |         # Compute taus
395 |         taus = (torch.sum(zs_sparse, dim, keepdim=True) - 1) / k
396 |         taus = taus.expand_as(input)
397 | 
398 |         # Sparsemax
399 |         self.output = torch.max(torch.zeros_like(input), input - taus)
400 | 
401 |         output = self.output.view(original_size)
402 | 
403 |         return output
404 | 
405 |     def backward(self, grad_output):
406 |         """Backward function."""
407 |         dim = 1
408 | 
409 |         nonzeros = torch.ne(self.output, 0)
410 |         sum = torch.sum(grad_output * nonzeros, dim=dim) / torch.sum(nonzeros, dim=dim)
411 |         self.grad_input = nonzeros * (grad_output - sum.expand_as(grad_output))
412 | 
413 |         return self.grad_input
414 | 


--------------------------------------------------------------------------------
/coperception/models/det/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .Backbone import Backbone
2 | 


--------------------------------------------------------------------------------
/coperception/models/det/base/FusionBase.py:
--------------------------------------------------------------------------------
 1 | from coperception.models.det.base.IntermediateModelBase import IntermediateModelBase
 2 | 
 3 | 
 4 | class FusionBase(IntermediateModelBase):
 5 |     def __init__(
 6 |         self,
 7 |         config,
 8 |         layer=3,
 9 |         in_channels=13,
10 |         kd_flag=True,
11 |         num_agent=5,
12 |         compress_level=0,
13 |         only_v2i=False,
14 |     ):
15 |         super().__init__(config, layer, in_channels, kd_flag, num_agent, compress_level, only_v2i)
16 |         self.num_agent = 0
17 | 
18 |     def fusion(self):
19 |         raise NotImplementedError(
20 |             "Please implement this method for specific fusion strategies"
21 |         )
22 | 
23 |     def forward(self, bevs, trans_matrices, num_agent_tensor, batch_size=1):
24 | 
25 |         bevs = bevs.permute(0, 1, 4, 2, 3)  # (Batch, seq, z, h, w)
26 |         encoded_layers = self.u_encoder(bevs)
27 |         device = bevs.device
28 | 
29 |         feat_maps, size = super().get_feature_maps_and_size(encoded_layers)
30 | 
31 |         feat_list = super().build_feature_list(batch_size, feat_maps)
32 | 
33 |         local_com_mat = super().build_local_communication_matrix(
34 |             feat_list
35 |         )  # [2 5 512 16 16] [batch, agent, channel, height, width]
36 |         local_com_mat_update = super().build_local_communication_matrix(
37 |             feat_list
38 |         )  # to avoid the inplace operation
39 | 
40 |         for b in range(batch_size):
41 |             self.num_agent = num_agent_tensor[b, 0]
42 |             for i in range(self.num_agent):
43 |                 self.tg_agent = local_com_mat[b, i]
44 |                 self.neighbor_feat_list = []
45 |                 self.neighbor_feat_list.append(self.tg_agent)
46 |                 all_warp = trans_matrices[b, i]  # transformation [2 5 5 4 4]
47 | 
48 |                 super().build_neighbors_feature_list(
49 |                     b,
50 |                     i,
51 |                     all_warp,
52 |                     self.num_agent,
53 |                     local_com_mat,
54 |                     device,
55 |                     size,
56 |                     trans_matrices,
57 |                 )
58 | 
59 |                 # feature update
60 |                 local_com_mat_update[b, i] = self.fusion()
61 | 
62 |         # weighted feature maps is passed to decoder
63 |         feat_fuse_mat = super().agents_to_batch(local_com_mat_update)
64 | 
65 |         decoded_layers = super().get_decoded_layers(
66 |             encoded_layers, feat_fuse_mat, batch_size
67 |         )
68 |         x = decoded_layers[0]
69 | 
70 |         cls_preds, loc_preds, result = super().get_cls_loc_result(x)
71 | 
72 |         if self.kd_flag == 1:
73 |             return (result, *decoded_layers, feat_fuse_mat)
74 |         else:
75 |             return result
76 | 


--------------------------------------------------------------------------------
/coperception/models/det/base/IntermediateModelBase.py:
--------------------------------------------------------------------------------
 1 | from coperception.models.det.backbone.Backbone import *
 2 | from coperception.models.det.base.DetModelBase import DetModelBase
 3 | 
 4 | 
 5 | class IntermediateModelBase(DetModelBase):
 6 |     """Abstract class. The parent class for all intermediate models.
 7 | 
 8 |     Attributes:
 9 |         u_encoder (nn.Module): The feature encoder.
10 |         decoder (nn.Module): The feature decoder.
11 |     """
12 | 
13 |     def __init__(
14 |         self,
15 |         config,
16 |         layer=3,
17 |         in_channels=13,
18 |         kd_flag=True,
19 |         num_agent=5,
20 |         compress_level=0,
21 |         only_v2i=False,
22 |     ):
23 |         super().__init__(config, layer, in_channels, kd_flag, num_agent=num_agent, only_v2i=only_v2i)
24 |         self.u_encoder = LidarEncoder(in_channels, compress_level)
25 |         self.decoder = LidarDecoder(height_feat_size=in_channels)
26 | 


--------------------------------------------------------------------------------
/coperception/models/det/base/NonIntermediateModelBase.py:
--------------------------------------------------------------------------------
 1 | from coperception.models.det.backbone.Backbone import *
 2 | from coperception.models.det.base.DetModelBase import DetModelBase
 3 | 
 4 | 
 5 | class NonIntermediateModelBase(DetModelBase):
 6 |     """Abstract class. The parent class for non-intermediate models.
 7 | 
 8 |     Attributes:
 9 |         stpn (nn.Module): Pass the features through encoder, then decoder.
10 |     """
11 | 
12 |     def __init__(
13 |         self,
14 |         config,
15 |         layer=3,
16 |         in_channels=13,
17 |         kd_flag=True,
18 |         num_agent=5,
19 |         compress_level=0,
20 |     ):
21 |         super(NonIntermediateModelBase, self).__init__(
22 |             config, layer, in_channels, kd_flag, num_agent
23 |         )
24 |         self.stpn = STPN_KD(config.map_dims[2], compress_level)
25 | 


--------------------------------------------------------------------------------
/coperception/models/det/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .DetModelBase import DetModelBase
2 | from .FusionBase import FusionBase
3 | from .IntermediateModelBase import IntermediateModelBase
4 | from .NonIntermediateModelBase import NonIntermediateModelBase
5 | 


--------------------------------------------------------------------------------
/coperception/models/seg/AgentWiseWeightedFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from coperception.models.seg.FusionBase import FusionBase
 6 | 
 7 | 
 8 | class AgentWiseWeightedFusion(FusionBase):
 9 |     def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False):
10 |         super().__init__(
11 |             n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i
12 |         )
13 |         self.agent_weighted_fusion = AgentWeightedFusion()
14 | 
15 |     def fusion(self):
16 |         agent_weight_list = list()
17 |         for k in range(self.com_num_agent):
18 |             cat_feat = torch.cat([self.tg_agent, self.neighbor_feat_list[k]], dim=0)
19 |             cat_feat = cat_feat.unsqueeze(0)
20 |             agent_weight = self.agent_weighted_fusion(cat_feat)
21 |             agent_weight_list.append(agent_weight)
22 | 
23 |         soft_agent_weight_list = torch.squeeze(
24 |             F.softmax(torch.tensor(agent_weight_list).unsqueeze(0), dim=1)
25 |         )
26 | 
27 |         agent_wise_weight_feat = 0
28 |         for k in range(self.com_num_agent):
29 |             agent_wise_weight_feat = (
30 |                 agent_wise_weight_feat
31 |                 + soft_agent_weight_list[k] * self.neighbor_feat_list[k]
32 |             )
33 | 
34 |         return agent_wise_weight_feat
35 | 
36 | 
37 | # FIXME: Change size
38 | class AgentWeightedFusion(nn.Module):
39 |     def __init__(self):
40 |         super(AgentWeightedFusion, self).__init__()
41 | 
42 |         # self.conv1_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0)
43 |         # self.bn1_1 = nn.BatchNorm2d(128)
44 |         #
45 |         # self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0)
46 |         # self.bn1_2 = nn.BatchNorm2d(32)
47 |         #
48 |         # self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0)
49 |         # self.bn1_3 = nn.BatchNorm2d(8)
50 |         #
51 |         # self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0)
52 |         #
53 |         # # self.conv1_1 = nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0)
54 |         # # self.bn1_1 = nn.BatchNorm2d(1)
55 |         # self.conv1_5 = nn.Conv2d(1, 1, kernel_size=32, stride=1, padding=0)
56 |         # # # self.bn1_2 = nn.BatchNorm2d(1)
57 | 
58 |         self.conv1_1 = nn.Conv2d(1024, 128, kernel_size=1, stride=1, padding=0)
59 |         self.bn1_1 = nn.BatchNorm2d(128)
60 | 
61 |         self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0)
62 |         self.bn1_2 = nn.BatchNorm2d(32)
63 | 
64 |         self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0)
65 |         self.bn1_3 = nn.BatchNorm2d(8)
66 | 
67 |         self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0)
68 | 
69 |         self.conv1_5 = nn.Conv2d(1, 1, kernel_size=32, stride=1, padding=0)
70 | 
71 |     def forward(self, x):
72 |         x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
73 |         x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
74 |         x_1 = F.relu(self.bn1_2(self.conv1_2(x_1)))
75 |         x_1 = F.relu(self.bn1_3(self.conv1_3(x_1)))
76 |         x_1 = F.relu(self.conv1_4(x_1))
77 |         x_1 = F.relu(self.conv1_5(x_1))
78 | 
79 |         return x_1
80 | 


--------------------------------------------------------------------------------
/coperception/models/seg/CatFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from coperception.models.seg.FusionBase import FusionBase
 6 | 
 7 | 
 8 | class CatFusion(FusionBase):
 9 |     def __init__(self, n_channels, n_classes, num_agent, compress_level, only_v2i):
10 |         super().__init__(
11 |             n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i
12 |         )
13 |         self.modulation_layer_3 = ModulationLayer3()
14 | 
15 |     def fusion(self):
16 |         mean_feat = torch.mean(torch.stack(self.neighbor_feat_list), dim=0)  # [c, h, w]
17 |         cat_feat = torch.cat([self.tg_agent, mean_feat], dim=0)
18 |         cat_feat = cat_feat.unsqueeze(0)  # [1, 1, c, h, w]
19 |         return self.modulation_layer_3(cat_feat)
20 | 
21 | 
22 | # FIXME: Change size
23 | class ModulationLayer3(nn.Module):
24 |     def __init__(self):
25 |         super(ModulationLayer3, self).__init__()
26 | 
27 |         self.conv1_1 = nn.Conv2d(1024, 512, kernel_size=1, stride=1, padding=0)
28 |         self.bn1_1 = nn.BatchNorm2d(512)
29 | 
30 |     def forward(self, x):
31 |         x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
32 |         x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
33 | 
34 |         return x_1
35 | 


--------------------------------------------------------------------------------
/coperception/models/seg/DiscoNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from coperception.models.seg.FusionBase import FusionBase
  6 | 
  7 | 
  8 | class DiscoNet(FusionBase):
  9 |     def __init__(
 10 |         self, n_channels, n_classes, num_agent, kd_flag=True, compress_level=0, only_v2i=False
 11 |     ):
 12 |         super().__init__(
 13 |             n_channels,
 14 |             n_classes,
 15 |             num_agent,
 16 |             kd_flag=kd_flag,
 17 |             compress_level=compress_level,
 18 |             only_v2i=only_v2i,
 19 |         )
 20 |         self.pixel_weighted_fusion = PixelWeightedFusionSoftmax(512)
 21 | 
 22 |     # def forward(self, x, trans_matrices, num_agent_tensor):
 23 |     #     device = x.device
 24 |     #     x1 = self.inc(x)
 25 |     #     x2 = self.down1(x1)
 26 |     #     x3 = self.down2(x2)
 27 |     #     x4 = self.down3(x3)  # b 512 32 32
 28 |     #     size = (1, 512, 32, 32)
 29 | 
 30 |     #     batch_size = x.size(0) // self.num_agent
 31 |     #     feat_map, feat_list = super().build_feat_map_and_feat_list(x4, batch_size)
 32 | 
 33 |     #     local_com_mat = torch.cat(tuple(feat_list), 1)
 34 |     #     local_com_mat_update = torch.cat(tuple(feat_list), 1)
 35 | 
 36 |     #     for b in range(batch_size):
 37 |     #         self.com_num_agent = num_agent_tensor[b, 0]
 38 | 
 39 |     #         agent_feat_list = list()
 40 |     #         for nb in range(self.num_agent):
 41 |     #             agent_feat_list.append(local_com_mat[b, nb])
 42 | 
 43 |     #         for i in range(self.num_agent):
 44 |     #             self.tg_agent = local_com_mat[b, i]
 45 |     #             all_warp = trans_matrices[b, i]
 46 | 
 47 |     #             self.neighbor_feat_list = list()
 48 |     #             self.neighbor_feat_list.append(self.tg_agent)
 49 | 
 50 |     #             for j in range(self.num_agent):
 51 |     #                 if j != i:
 52 |     #                     self.neighbor_feat_list.append(
 53 |     #                         super().feature_transformation(
 54 |     #                             b, j, local_com_mat, all_warp, device, size
 55 |     #                         )
 56 |     #                     )
 57 | 
 58 |     #             local_com_mat_update[b, i] = self.fusion()
 59 | 
 60 |     #     feat_list = []
 61 |     #     for i in range(self.num_agent):
 62 |     #         feat_list.append(local_com_mat_update[:, i, :, :, :])
 63 |     #     feat_mat = torch.cat(feat_list, 0)
 64 | 
 65 |     #     x5 = self.down4(feat_mat)
 66 |     #     x6 = self.up1(x5, feat_mat)
 67 |     #     x7 = self.up2(x6, x3)
 68 |     #     x8 = self.up3(x7, x2)
 69 |     #     x9 = self.up4(x8, x1)
 70 |     #     logits = self.outc(x9)
 71 | 
 72 |     #     if self.kd_flag:
 73 |     #         return logits, x9, x8, x7, x6, x5, feat_mat
 74 |     #     else:
 75 |     #         return logits
 76 | 
 77 |     def fusion(self):
 78 |         tmp_agent_weight_list = list()
 79 |         sum_weight = 0
 80 |         nb_len = len(self.neighbor_feat_list)
 81 |         for k in range(nb_len):
 82 |             cat_feat = torch.cat([self.tg_agent, self.neighbor_feat_list[k]], dim=0)
 83 |             cat_feat = cat_feat.unsqueeze(0)
 84 |             agent_weight = torch.squeeze(self.pixel_weighted_fusion(cat_feat))
 85 |             tmp_agent_weight_list.append(torch.exp(agent_weight))
 86 |             sum_weight = sum_weight + torch.exp(agent_weight)
 87 | 
 88 |         agent_weight_list = list()
 89 |         for k in range(nb_len):
 90 |             agent_weight = torch.div(tmp_agent_weight_list[k], sum_weight)
 91 |             agent_weight.expand([256, -1, -1])
 92 |             agent_weight_list.append(agent_weight)
 93 | 
 94 |         agent_wise_weight_feat = 0
 95 |         for k in range(nb_len):
 96 |             agent_wise_weight_feat = (
 97 |                 agent_wise_weight_feat
 98 |                 + agent_weight_list[k] * self.neighbor_feat_list[k]
 99 |             )
100 | 
101 |         return agent_wise_weight_feat
102 | 
103 | 
104 | class PixelWeightedFusionSoftmax(nn.Module):
105 |     def __init__(self, channel):
106 |         super(PixelWeightedFusionSoftmax, self).__init__()
107 | 
108 |         self.conv1_1 = nn.Conv2d(channel * 2, 128, kernel_size=1, stride=1, padding=0)
109 |         self.bn1_1 = nn.BatchNorm2d(128)
110 | 
111 |         self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0)
112 |         self.bn1_2 = nn.BatchNorm2d(32)
113 | 
114 |         self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0)
115 |         self.bn1_3 = nn.BatchNorm2d(8)
116 | 
117 |         self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0)
118 | 
119 |     def forward(self, x):
120 |         x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
121 |         x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
122 |         x_1 = F.relu(self.bn1_2(self.conv1_2(x_1)))
123 |         x_1 = F.relu(self.bn1_3(self.conv1_3(x_1)))
124 |         x_1 = F.relu(self.conv1_4(x_1))
125 | 
126 |         return x_1
127 | 


--------------------------------------------------------------------------------
/coperception/models/seg/FusionBase.py:
--------------------------------------------------------------------------------
 1 | from coperception.models.seg.SegModelBase import SegModelBase
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class FusionBase(SegModelBase):
 7 |     def __init__(
 8 |         self, n_channels, n_classes, num_agent=5, kd_flag=False, compress_level=0, only_v2i=False
 9 |     ):
10 |         super().__init__(
11 |             n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i
12 |         )
13 |         self.neighbor_feat_list = None
14 |         self.tg_agent = None
15 |         self.current_num_agent = None
16 |         self.kd_flag = kd_flag
17 |         self.only_v2i = only_v2i
18 | 
19 |     def fusion(self):
20 |         raise NotImplementedError(
21 |             "Please implement this method for specific fusion strategies"
22 |         )
23 | 
24 |     def forward(self, x, trans_matrices, num_agent_tensor):
25 |         x1 = self.inc(x)
26 |         x2 = self.down1(x1)
27 |         x3 = self.down2(x2)
28 |         x4 = self.down3(x3)  # b 512 32 32
29 |         size = (1, 512, 32, 32)
30 | 
31 |         if self.compress_level > 0:
32 |             x4 = F.relu(self.bn_compress(self.com_compresser(x4)))
33 |             x4 = F.relu(self.bn_decompress(self.com_decompresser(x4)))
34 | 
35 |         batch_size = x.size(0) // self.num_agent
36 |         feat_list = super().build_feat_list(x4, batch_size)
37 | 
38 |         local_com_mat = torch.cat(tuple(feat_list), 1)
39 |         local_com_mat_update = torch.cat(tuple(feat_list), 1)
40 | 
41 |         for b in range(batch_size):
42 |             self.com_num_agent = num_agent_tensor[b, 0]
43 | 
44 |             agent_feat_list = list()
45 |             for nb in range(self.com_num_agent):
46 |                 agent_feat_list.append(local_com_mat[b, nb])
47 | 
48 |             for i in range(self.com_num_agent):
49 |                 self.tg_agent = local_com_mat[b, i]
50 | 
51 |                 self.neighbor_feat_list = list()
52 |                 self.neighbor_feat_list.append(self.tg_agent)
53 | 
54 |                 for j in range(self.com_num_agent):
55 |                     if j != i:
56 |                         if self.only_v2i and i != 0 and j != 0:
57 |                             continue
58 | 
59 |                         self.neighbor_feat_list.append(
60 |                             super().feature_transformation(
61 |                                 b,
62 |                                 j,
63 |                                 i,
64 |                                 local_com_mat,
65 |                                 size,
66 |                                 trans_matrices,
67 |                             )
68 |                         )
69 | 
70 |                 local_com_mat_update[b, i] = self.fusion()
71 | 
72 |         feat_mat = super().agents_to_batch(local_com_mat_update)
73 | 
74 |         x5 = self.down4(feat_mat)
75 |         x6 = self.up1(x5, feat_mat)
76 |         x7 = self.up2(x6, x3)
77 |         x8 = self.up3(x7, x2)
78 |         x9 = self.up4(x8, x1)
79 |         logits = self.outc(x9)
80 | 
81 |         if self.kd_flag:
82 |             return logits, x9, x8, x7, x6, x5, feat_mat
83 |         else:
84 |             return logits
85 | 


--------------------------------------------------------------------------------
/coperception/models/seg/MaxFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from coperception.models.seg.FusionBase import FusionBase
 4 | 
 5 | 
 6 | class MaxFusion(FusionBase):
 7 |     def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False):
 8 |         super().__init__(
 9 |             n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i
10 |         )
11 | 
12 |     def fusion(self):
13 |         return torch.max(torch.stack(self.neighbor_feat_list), dim=0).values
14 | 


--------------------------------------------------------------------------------
/coperception/models/seg/MeanFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from coperception.models.seg.FusionBase import FusionBase
 4 | 
 5 | 
 6 | class MeanFusion(FusionBase):
 7 |     def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False):
 8 |         super().__init__(
 9 |             n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i
10 |         )
11 | 
12 |     def fusion(self):
13 |         return torch.mean(torch.stack(self.neighbor_feat_list), dim=0)
14 | 


--------------------------------------------------------------------------------
/coperception/models/seg/SegModelBase.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class SegModelBase(nn.Module):
  7 |     def __init__(
  8 |         self, n_channels, n_classes, bilinear=True, num_agent=5, compress_level=0, only_v2i=False
  9 |     ):
 10 |         super().__init__()
 11 |         self.n_channels = n_channels
 12 |         self.n_classes = n_classes
 13 |         self.bilinear = bilinear
 14 |         self.num_agent = num_agent
 15 |         self.only_v2i = only_v2i
 16 | 
 17 |         self.inc = DoubleConv(n_channels, 64)
 18 |         self.down1 = Down(64, 128)
 19 |         self.down2 = Down(128, 256)
 20 |         self.down3 = Down(256, 512)
 21 |         factor = 2 if bilinear else 1
 22 |         self.down4 = Down(512, 1024 // factor)
 23 |         self.up1 = Up(1024, 512 // factor, bilinear)
 24 |         self.up2 = Up(512, 256 // factor, bilinear)
 25 |         self.up3 = Up(256, 128 // factor, bilinear)
 26 |         self.up4 = Up(128, 64, bilinear)
 27 |         self.outc = OutConv(64, n_classes)
 28 | 
 29 |         self.compress_level = compress_level
 30 |         if compress_level > 0:
 31 |             assert compress_level <= 9
 32 |             feat_map_channel_num = 512
 33 |             compress_channel_num = feat_map_channel_num // (2**compress_level)
 34 | 
 35 |             self.com_compresser = nn.Conv2d(
 36 |                 feat_map_channel_num, compress_channel_num, kernel_size=1, stride=1
 37 |             )
 38 |             self.bn_compress = nn.BatchNorm2d(compress_channel_num)
 39 | 
 40 |             self.com_decompresser = nn.Conv2d(
 41 |                 compress_channel_num, feat_map_channel_num, kernel_size=1, stride=1
 42 |             )
 43 |             self.bn_decompress = nn.BatchNorm2d(feat_map_channel_num)
 44 | 
 45 |     def build_feat_list(self, feat_maps, batch_size):
 46 |         feat_maps = torch.flip(feat_maps, (2,))
 47 | 
 48 |         tmp_feat_map = {}
 49 |         feat_list = []
 50 |         for i in range(self.num_agent):
 51 |             tmp_feat_map[i] = torch.unsqueeze(
 52 |                 feat_maps[batch_size * i : batch_size * (i + 1)], 1
 53 |             )
 54 |             feat_list.append(tmp_feat_map[i])
 55 | 
 56 |         return feat_list
 57 | 
 58 |     @staticmethod
 59 |     def feature_transformation(b, j, agent_idx, local_com_mat, size, trans_matrices):
 60 |         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 61 |         nb_agent = torch.unsqueeze(local_com_mat[b, j], 0)
 62 | 
 63 |         tfm_ji = trans_matrices[b, j, agent_idx]
 64 |         M = (
 65 |             torch.hstack((tfm_ji[:2, :2], -tfm_ji[:2, 3:4])).float().unsqueeze(0)
 66 |         )  # [1,2,3]
 67 |         M = M.to(device)
 68 | 
 69 |         mask = torch.tensor([[[1, 1, 4 / 128], [1, 1, 4 / 128]]], device=M.device)
 70 | 
 71 |         M *= mask
 72 | 
 73 |         grid = F.affine_grid(M, size=torch.Size(size))
 74 |         warp_feat = F.grid_sample(nb_agent, grid).squeeze()
 75 |         return warp_feat
 76 | 
 77 |     def agents_to_batch(self, feats):
 78 |         feat_list = []
 79 |         for i in range(self.num_agent):
 80 |             feat_list.append(feats[:, i, :, :, :])
 81 |         feat_mat = torch.cat(feat_list, 0)
 82 | 
 83 |         feat_mat = torch.flip(feat_mat, (2,))
 84 | 
 85 |         return feat_mat
 86 | 
 87 | 
 88 | ##################
 89 | #      Unet      # ref: https://github.com/milesial/Pytorch-UNet
 90 | ##################
 91 | class DoubleConv(nn.Module):
 92 |     def __init__(self, in_channels, out_channels, mid_channels=None):
 93 |         super().__init__()
 94 |         if not mid_channels:
 95 |             mid_channels = out_channels
 96 |         self.double_conv = nn.Sequential(
 97 |             nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
 98 |             nn.BatchNorm2d(mid_channels),
 99 |             nn.ReLU(inplace=True),
100 |             nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
101 |             nn.BatchNorm2d(out_channels),
102 |             nn.ReLU(inplace=True),
103 |         )
104 | 
105 |     def forward(self, x):
106 |         return self.double_conv(x)
107 | 
108 | 
109 | class Down(nn.Module):
110 |     def __init__(self, in_channels, out_channels):
111 |         super().__init__()
112 |         self.maxpool_conv = nn.Sequential(
113 |             nn.MaxPool2d(2),
114 |             DoubleConv(in_channels, out_channels),
115 |         )
116 | 
117 |     def forward(self, x):
118 |         return self.maxpool_conv(x)
119 | 
120 | 
121 | class Up(nn.Module):
122 |     def __init__(self, in_channels, out_channels, bilinear=True):
123 |         super().__init__()
124 |         if bilinear:
125 |             self.up = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
126 |             self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
127 |         else:
128 |             self.up = nn.ConvTranspose2d(
129 |                 in_channels, in_channels // 2, kernel_size=2, stride=2
130 |             )
131 |             self.conv = DoubleConv(in_channels, out_channels)
132 | 
133 |     def forward(self, x1, x2):
134 |         x1 = self.up(x1)
135 |         diff_y = x2.size()[2] - x1.size()[2]
136 |         diff_x = x2.size()[3] - x1.size()[3]
137 | 
138 |         x1 = F.pad(
139 |             x1, [diff_x // 2, diff_x - diff_x // 2, diff_y // 2, diff_y - diff_y // 2]
140 |         )
141 |         x = torch.cat([x2, x1], dim=1)
142 |         return self.conv(x)
143 | 
144 | 
145 | class OutConv(nn.Module):
146 |     def __init__(self, in_channels, out_channels):
147 |         super(OutConv, self).__init__()
148 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
149 | 
150 |     def forward(self, x):
151 |         return self.conv(x)
152 | 


--------------------------------------------------------------------------------
/coperception/models/seg/SumFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from coperception.models.seg.FusionBase import FusionBase
 4 | 
 5 | 
 6 | class SumFusion(FusionBase):
 7 |     def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False):
 8 |         super().__init__(
 9 |             n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i
10 |         )
11 | 
12 |     def fusion(self):
13 |         return torch.sum(torch.stack(self.neighbor_feat_list), dim=0)
14 | 


--------------------------------------------------------------------------------
/coperception/models/seg/UNet.py:
--------------------------------------------------------------------------------
 1 | from coperception.models.seg.SegModelBase import SegModelBase
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class UNet(SegModelBase):
 6 |     def __init__(
 7 |         self,
 8 |         n_channels,
 9 |         n_classes,
10 |         bilinear=True,
11 |         num_agent=5,
12 |         kd_flag=False,
13 |         compress_level=0,
14 |     ):
15 |         super().__init__(
16 |             n_channels,
17 |             n_classes,
18 |             bilinear,
19 |             num_agent=num_agent,
20 |             compress_level=compress_level,
21 |         )
22 |         self.kd_flag = kd_flag
23 | 
24 |     def forward(self, x):
25 |         x1 = self.inc(x)
26 |         x2 = self.down1(x1)
27 |         x3 = self.down2(x2)
28 |         x4 = self.down3(x3)
29 | 
30 |         if self.compress_level > 0:
31 |             x4 = F.relu(self.bn_compress(self.com_compresser(x4)))
32 |             x4 = F.relu(self.bn_decompress(self.com_decompresser(x4)))
33 | 
34 |         x5 = self.down4(x4)
35 |         x6 = self.up1(x5, x4)
36 |         x7 = self.up2(x6, x3)
37 |         x8 = self.up3(x7, x2)
38 |         x9 = self.up4(x8, x1)
39 |         logits = self.outc(x9)
40 | 
41 |         if self.kd_flag:
42 |             return logits, x9, x8, x7, x6, x5, x4
43 |         else:
44 |             return logits
45 | 


--------------------------------------------------------------------------------
/coperception/models/seg/V2VNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import coperception.utils.convolutional_rnn as convrnn
 4 | from coperception.models.seg.SegModelBase import SegModelBase
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class V2VNet(SegModelBase):
 9 |     def __init__(self, n_channels, n_classes, num_agent=5, compress_level=0, only_v2i=False):
10 |         super().__init__(
11 |             n_channels, n_classes, num_agent=num_agent, compress_level=compress_level, only_v2i=only_v2i
12 |         )
13 |         self.layer_channel = 512
14 |         self.gnn_iter_num = 1
15 |         self.convgru = convrnn.Conv2dGRU(
16 |             in_channels=self.layer_channel * 2,
17 |             out_channels=self.layer_channel,
18 |             kernel_size=3,
19 |             num_layers=1,
20 |             bidirectional=False,
21 |             dilation=1,
22 |             stride=1,
23 |         )
24 | 
25 |     def forward(self, x, trans_matrices, num_agent_tensor):
26 |         x1 = self.inc(x)
27 |         x2 = self.down1(x1)
28 |         x3 = self.down2(x2)
29 |         x4 = self.down3(x3)  # b 512 32 32
30 |         size = (1, 512, 32, 32)
31 | 
32 |         if self.compress_level > 0:
33 |             x4 = F.relu(self.bn_compress(self.com_compresser(x4)))
34 |             x4 = F.relu(self.bn_decompress(self.com_decompresser(x4)))
35 | 
36 |         batch_size = x.size(0) // self.num_agent
37 |         feat_list = super().build_feat_list(x4, batch_size)
38 | 
39 |         local_com_mat = torch.cat(tuple(feat_list), 1)
40 |         local_com_mat_update = torch.cat(tuple(feat_list), 1)
41 | 
42 |         for b in range(batch_size):
43 |             com_num_agent = num_agent_tensor[b, 0]
44 | 
45 |             agent_feat_list = list()
46 |             for nb in range(self.num_agent):
47 |                 agent_feat_list.append(local_com_mat[b, nb])
48 | 
49 |             for _ in range(self.gnn_iter_num):
50 |                 updated_feats_list = list()
51 | 
52 |                 for i in range(com_num_agent):
53 |                     tg_agent = local_com_mat[b, i]
54 | 
55 |                     neighbor_feat_list = list()
56 |                     neighbor_feat_list.append(tg_agent)
57 | 
58 |                     for j in range(com_num_agent):
59 |                         if j != i:
60 |                             if self.only_v2i and i != 0 and j != 0:
61 |                                 continue
62 |                             
63 |                             neighbor_feat_list.append(
64 |                                 super().feature_transformation(
65 |                                     b,
66 |                                     j,
67 |                                     i,
68 |                                     local_com_mat,
69 |                                     size,
70 |                                     trans_matrices,
71 |                                 )
72 |                             )
73 | 
74 |                     mean_feat = torch.mean(torch.stack(neighbor_feat_list), dim=0)
75 |                     cat_feat = torch.cat([agent_feat_list[i], mean_feat], dim=0)
76 |                     cat_feat = cat_feat.unsqueeze(0).unsqueeze(0)
77 |                     updated_feat, _ = self.convgru(cat_feat, None)
78 |                     updated_feat = torch.squeeze(torch.squeeze(updated_feat, 0), 0)
79 |                     updated_feats_list.append(updated_feat)
80 |                 agent_feat_list = updated_feats_list
81 |             for k in range(com_num_agent):
82 |                 local_com_mat_update[b, k] = agent_feat_list[k]
83 | 
84 |         feat_mat = super().agents_to_batch(local_com_mat_update)
85 | 
86 |         x5 = self.down4(feat_mat)
87 |         x = self.up1(x5, feat_mat)
88 |         x = self.up2(x, x3)
89 |         x = self.up3(x, x2)
90 |         x = self.up4(x, x1)
91 |         logits = self.outc(x)
92 |         return logits
93 | 


--------------------------------------------------------------------------------
/coperception/models/seg/__init__.py:
--------------------------------------------------------------------------------
 1 | from .SegModelBase import SegModelBase
 2 | from .V2VNet import V2VNet
 3 | from .When2Com_UNet import When2Com_UNet
 4 | from .UNet import UNet
 5 | from .FusionBase import FusionBase
 6 | from .MeanFusion import MeanFusion
 7 | from .MaxFusion import MaxFusion
 8 | from .SumFusion import SumFusion
 9 | from .CatFusion import CatFusion
10 | from .AgentWiseWeightedFusion import AgentWiseWeightedFusion
11 | from .DiscoNet import DiscoNet
12 | 


--------------------------------------------------------------------------------
/coperception/utils/AverageMeter.py:
--------------------------------------------------------------------------------
 1 | class AverageMeter:
 2 |     def __init__(self, name, fmt=":f"):
 3 |         self.name = name
 4 |         self.fmt = fmt
 5 |         self.reset()
 6 |         self.val = 0
 7 |         self.avg = 0
 8 |         self.sum = 0
 9 |         self.count = 0
10 | 
11 |     def reset(self):
12 |         self.val = 0
13 |         self.avg = 0
14 |         self.sum = 0
15 |         self.count = 0
16 | 
17 |     def update(self, val, n=1):
18 |         self.val = val
19 |         self.sum += val * n
20 |         self.count += n
21 |         self.avg = self.sum / self.count
22 | 
23 |     def __str__(self):
24 |         fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
25 |         return fmtstr.format(**self.__dict__)
26 | 


--------------------------------------------------------------------------------
/coperception/utils/SegMetrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from coperception.configs import Config
 4 | 
 5 | 
 6 | def fast_hist(a, b, n):
 7 |     """
 8 |     Return a histogram that's the confusion matrix of a and b
 9 |     :param a: np.ndarray with shape (HxW,)
10 |     :param b: np.ndarray with shape (HxW,)
11 |     :param n: num of classes
12 |     :return: np.ndarray with shape (n, n)
13 |     """
14 |     k = (a >= 0) & (a < n)
15 |     return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n)
16 | 
17 | 
18 | def per_class_iu(hist):
19 |     """
20 |     Calculate the IoU(Intersection over Union) for each class
21 |     :param hist: np.ndarray with shape (n, n)
22 |     :return: np.ndarray with shape (n,)
23 |     """
24 |     np.seterr(divide="ignore", invalid="ignore")
25 |     res = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
26 |     np.seterr(divide="warn", invalid="warn")
27 |     res[np.isnan(res)] = 0.0
28 |     return res
29 | 
30 | 
31 | class ComputeIoU(object):
32 |     """
33 |     IoU: Intersection over Union
34 |     """
35 | 
36 |     def __init__(self, num_class):
37 |         self.num_class = num_class
38 |         self.cfsmatrix = np.zeros(
39 |             (self.num_class, self.num_class), dtype="uint64"
40 |         )  # confusion matrix
41 |         self.ious = dict()
42 |         self.config = Config(None)
43 | 
44 |     def get_cfsmatrix(self):
45 |         return self.cfsmatrix
46 | 
47 |     def get_ious(self):
48 |         ious_by_class = per_class_iu(self.cfsmatrix)
49 |         self.ious = {
50 |             self.config.class_idx_to_name[idx]: ious_by_class[idx]
51 |             for idx in range(self.num_class)
52 |         }
53 | 
54 |         return self.ious
55 | 
56 |     def get_miou(self, ignore=None):
57 |         self.get_ious()
58 |         total_iou = 0
59 |         count = 0
60 |         for key, value in self.ious.items():
61 |             if (
62 |                 isinstance(ignore, list)
63 |                 and key in ignore
64 |                 or isinstance(ignore, int)
65 |                 and key == ignore
66 |             ):
67 |                 continue
68 |             total_iou += value
69 |             count += 1
70 |         return total_iou / count
71 | 
72 |     def __call__(self, pred, label):
73 |         """
74 |         :param pred: [N, H, W]
75 |         :param label:  [N, H, W}
76 |         Channel == 1
77 |         """
78 | 
79 |         pred = pred.cpu().numpy()
80 |         # label = label.cpu().numpy()
81 | 
82 |         assert pred.shape == label.shape
83 | 
84 |         self.cfsmatrix += fast_hist(
85 |             pred.reshape(-1), label.reshape(-1), self.num_class
86 |         ).astype("uint64")
87 | 


--------------------------------------------------------------------------------
/coperception/utils/SegModule.py:
--------------------------------------------------------------------------------
  1 | import torch.nn.functional as F
  2 | import torch.nn as nn
  3 | import torch
  4 | from coperception.utils.detection_util import *
  5 | 
  6 | 
  7 | class SegModule(object):
  8 |     def __init__(self, model, teacher, config, optimizer, kd_flag):
  9 |         self.config = config
 10 |         self.model = model
 11 |         self.optimizer = optimizer
 12 |         self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
 13 |             optimizer, T_max=self.config.nepoch
 14 |         )
 15 |         # self.scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 100, 150, 200], gamma=0.5)
 16 |         self.criterion = nn.CrossEntropyLoss()
 17 |         self.teacher = teacher
 18 |         if kd_flag:
 19 |             for k, v in self.teacher.named_parameters():
 20 |                 v.requires_grad = False  # fix parameters
 21 | 
 22 |         self.kd_flag = kd_flag
 23 | 
 24 |         self.com = config.com
 25 | 
 26 |     def resume(self, path):
 27 |         def map_func(storage, location):
 28 |             return storage.cuda()
 29 | 
 30 |         if os.path.isfile(path):
 31 |             if rank == 0:
 32 |                 print("=> loading checkpoint '{}'".format(path))
 33 | 
 34 |             checkpoint = torch.load(path, map_location=map_func)
 35 |             self.model.load_state_dict(checkpoint["state_dict"], strict=False)
 36 | 
 37 |             ckpt_keys = set(checkpoint["state_dict"].keys())
 38 |             own_keys = set(model.state_dict().keys())
 39 |             missing_keys = own_keys - ckpt_keys
 40 |             for k in missing_keys:
 41 |                 print("caution: missing keys from checkpoint {}: {}".format(path, k))
 42 |         else:
 43 |             print("=> no checkpoint found at '{}'".format(path))
 44 | 
 45 |     def step(self, data, num_agent, batch_size, loss=True):
 46 |         bev = data["bev_seq"]
 47 |         labels = data["labels"]
 48 |         self.optimizer.zero_grad()
 49 |         bev = bev.permute(0, 3, 1, 2).contiguous()
 50 | 
 51 |         if not self.com:
 52 |             filtered_bev = []
 53 |             filtered_label = []
 54 |             for i in range(bev.size(0)):
 55 |                 if torch.sum(bev[i]) > 1e-4:
 56 |                     filtered_bev.append(bev[i])
 57 |                     filtered_label.append(labels[i])
 58 |             bev = torch.stack(filtered_bev, 0)
 59 |             labels = torch.stack(filtered_label, 0)
 60 | 
 61 |         if self.kd_flag:
 62 |             data["bev_seq_teacher"] = (
 63 |                 data["bev_seq_teacher"].permute(0, 3, 1, 2).contiguous()
 64 |             )
 65 | 
 66 |         if self.com:
 67 |             if self.kd_flag:
 68 |                 pred, x9, x8, x7, x6, x5, fused_layer = self.model(
 69 |                     bev, data["trans_matrices"], data["num_sensor"]
 70 |                 )
 71 |             elif self.config.flag.startswith("when2com") or self.config.flag.startswith(
 72 |                 "who2com"
 73 |             ):
 74 |                 if self.config.split == "train":
 75 |                     pred = self.model(
 76 |                         bev, data["trans_matrices"], data["num_sensor"], training=True
 77 |                     )
 78 |                 else:
 79 |                     pred = self.model(
 80 |                         bev,
 81 |                         data["trans_matrices"],
 82 |                         data["num_sensor"],
 83 |                         inference=self.config.inference,
 84 |                         training=False,
 85 |                     )
 86 |             else:
 87 |                 pred = self.model(bev, data["trans_matrices"], data["num_sensor"])
 88 |         else:
 89 |             pred = self.model(bev)
 90 | 
 91 |         if self.com:
 92 |             filtered_pred = []
 93 |             filtered_label = []
 94 |             for i in range(bev.size(0)):
 95 |                 if torch.sum(bev[i]) > 1e-4:
 96 |                     filtered_pred.append(pred[i])
 97 |                     filtered_label.append(labels[i])
 98 |             pred = torch.stack(filtered_pred, 0)
 99 |             labels = torch.stack(filtered_label, 0)
100 |         if not loss:
101 |             return pred, labels
102 | 
103 |         kd_loss = (
104 |             self.get_kd_loss(batch_size, data, fused_layer, num_agent, x5, x6, x7)
105 |             if self.kd_flag
106 |             else 0
107 |         )
108 |         loss = self.criterion(pred, labels.long()) + kd_loss
109 | 
110 |         if isinstance(self.criterion, nn.DataParallel):
111 |             loss = loss.mean()
112 | 
113 |         loss_data = loss.data.item()
114 |         if np.isnan(loss_data):
115 |             raise ValueError("loss is nan while training")
116 | 
117 |         loss.backward()
118 |         self.optimizer.step()
119 | 
120 |         return pred, loss_data
121 | 
122 |     def get_kd_loss(self, batch_size, data, fused_layer, num_agent, x5, x6, x7):
123 |         if not self.kd_flag:
124 |             return 0
125 | 
126 |         bev_seq_teacher = data["bev_seq_teacher"].type(torch.cuda.FloatTensor)
127 |         kd_weight = data["kd_weight"]
128 |         (
129 |             logit_teacher,
130 |             x9_teacher,
131 |             x8_teacher,
132 |             x7_teacher,
133 |             x6_teacher,
134 |             x5_teacher,
135 |             x4_teacher,
136 |         ) = self.teacher(bev_seq_teacher)
137 |         kl_loss_mean = nn.KLDivLoss(size_average=True, reduce=True)
138 | 
139 |         target_x5 = x5_teacher.permute(0, 2, 3, 1).reshape(
140 |             num_agent * batch_size * 16 * 16, -1
141 |         )
142 |         student_x5 = x5.permute(0, 2, 3, 1).reshape(
143 |             num_agent * batch_size * 16 * 16, -1
144 |         )
145 |         kd_loss_x5 = kl_loss_mean(
146 |             F.log_softmax(student_x5, dim=1), F.softmax(target_x5, dim=1)
147 |         )
148 | 
149 |         target_x6 = x6_teacher.permute(0, 2, 3, 1).reshape(
150 |             num_agent * batch_size * 32 * 32, -1
151 |         )
152 |         student_x6 = x6.permute(0, 2, 3, 1).reshape(
153 |             num_agent * batch_size * 32 * 32, -1
154 |         )
155 |         kd_loss_x6 = kl_loss_mean(
156 |             F.log_softmax(student_x6, dim=1), F.softmax(target_x6, dim=1)
157 |         )
158 | 
159 |         target_x7 = x7_teacher.permute(0, 2, 3, 1).reshape(
160 |             num_agent * batch_size * 64 * 64, -1
161 |         )
162 |         student_x7 = x7.permute(0, 2, 3, 1).reshape(
163 |             num_agent * batch_size * 64 * 64, -1
164 |         )
165 |         kd_loss_x7 = kl_loss_mean(
166 |             F.log_softmax(student_x7, dim=1), F.softmax(target_x7, dim=1)
167 |         )
168 | 
169 |         target_x4 = x4_teacher.permute(0, 2, 3, 1).reshape(
170 |             num_agent * batch_size * 32 * 32, -1
171 |         )
172 |         student_x4 = fused_layer.permute(0, 2, 3, 1).reshape(
173 |             num_agent * batch_size * 32 * 32, -1
174 |         )
175 |         kd_loss_fused_layer = kl_loss_mean(
176 |             F.log_softmax(student_x4, dim=1), F.softmax(target_x4, dim=1)
177 |         )
178 | 
179 |         return kd_weight * (kd_loss_x5 + kd_loss_x6 + kd_loss_x7 + kd_loss_fused_layer)
180 | 


--------------------------------------------------------------------------------
/coperception/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 | from .AverageMeter import AverageMeter
3 | 


--------------------------------------------------------------------------------
/coperception/utils/convolutional_rnn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .module import Conv1dRNN
 2 | from .module import Conv1dLSTM
 3 | from .module import Conv1dPeepholeLSTM
 4 | from .module import Conv1dGRU
 5 | 
 6 | from .module import Conv2dRNN
 7 | from .module import Conv2dLSTM
 8 | from .module import Conv2dPeepholeLSTM
 9 | from .module import Conv2dGRU
10 | 
11 | from .module import Conv3dRNN
12 | from .module import Conv3dLSTM
13 | from .module import Conv3dPeepholeLSTM
14 | from .module import Conv3dGRU
15 | 
16 | from .module import Conv1dRNNCell
17 | from .module import Conv1dLSTMCell
18 | from .module import Conv1dPeepholeLSTMCell
19 | from .module import Conv1dGRUCell
20 | 
21 | from .module import Conv2dRNNCell
22 | from .module import Conv2dLSTMCell
23 | from .module import Conv2dPeepholeLSTMCell
24 | from .module import Conv2dGRUCell
25 | 
26 | from .module import Conv3dRNNCell
27 | from .module import Conv3dLSTMCell
28 | from .module import Conv3dPeepholeLSTMCell
29 | from .module import Conv3dGRUCell
30 | 


--------------------------------------------------------------------------------
/coperception/utils/convolutional_rnn/functional.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | 
  6 | try:
  7 |     # pytorch<=0.4.1
  8 |     from torch.nn._functions.thnn import rnnFusedPointwise as fusedBackend
  9 | except ImportError:
 10 |     fusedBackend = None
 11 | 
 12 | from .utils import _single, _pair, _triple
 13 | 
 14 | 
 15 | def RNNReLUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None):
 16 |     """Copied from torch.nn._functions.rnn and modified"""
 17 |     if linear_func is None:
 18 |         linear_func = F.linear
 19 |     hy = F.relu(linear_func(input, w_ih, b_ih) + linear_func(hidden, w_hh, b_hh))
 20 |     return hy
 21 | 
 22 | 
 23 | def RNNTanhCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None):
 24 |     """Copied from torch.nn._functions.rnn and modified"""
 25 |     if linear_func is None:
 26 |         linear_func = F.linear
 27 |     hy = torch.tanh(linear_func(input, w_ih, b_ih) + linear_func(hidden, w_hh, b_hh))
 28 |     return hy
 29 | 
 30 | 
 31 | def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None):
 32 |     """Copied from torch.nn._functions.rnn and modified"""
 33 |     if linear_func is None:
 34 |         linear_func = F.linear
 35 |     if input.is_cuda and linear_func is F.linear and fusedBackend is not None:
 36 |         igates = linear_func(input, w_ih)
 37 |         hgates = linear_func(hidden[0], w_hh)
 38 |         state = fusedBackend.LSTMFused.apply
 39 |         return (
 40 |             state(igates, hgates, hidden[1])
 41 |             if b_ih is None
 42 |             else state(igates, hgates, hidden[1], b_ih, b_hh)
 43 |         )
 44 | 
 45 |     hx, cx = hidden
 46 |     gates = linear_func(input, w_ih, b_ih) + linear_func(hx, w_hh, b_hh)
 47 |     ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
 48 | 
 49 |     ingate = torch.sigmoid(ingate)
 50 |     forgetgate = torch.sigmoid(forgetgate)
 51 |     cellgate = torch.tanh(cellgate)
 52 |     outgate = torch.sigmoid(outgate)
 53 | 
 54 |     cy = (forgetgate * cx) + (ingate * cellgate)
 55 |     hy = outgate * torch.tanh(cy)
 56 | 
 57 |     return hy, cy
 58 | 
 59 | 
 60 | def PeepholeLSTMCell(
 61 |     input, hidden, w_ih, w_hh, w_pi, w_pf, w_po, b_ih=None, b_hh=None, linear_func=None
 62 | ):
 63 |     if linear_func is None:
 64 |         linear_func = F.linear
 65 |     hx, cx = hidden
 66 |     gates = linear_func(input, w_ih, b_ih) + linear_func(hx, w_hh, b_hh)
 67 |     ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
 68 | 
 69 |     ingate += linear_func(cx, w_pi)
 70 |     forgetgate += linear_func(cx, w_pf)
 71 |     ingate = torch.sigmoid(ingate)
 72 |     forgetgate = torch.sigmoid(forgetgate)
 73 |     cellgate = torch.tanh(cellgate)
 74 | 
 75 |     cy = (forgetgate * cx) + (ingate * cellgate)
 76 |     outgate += linear_func(cy, w_po)
 77 |     outgate = torch.sigmoid(outgate)
 78 | 
 79 |     hy = outgate * torch.tanh(cy)
 80 | 
 81 |     return hy, cy
 82 | 
 83 | 
 84 | def GRUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None, linear_func=None):
 85 |     """Copied from torch.nn._functions.rnn and modified"""
 86 |     if linear_func is None:
 87 |         linear_func = F.linear
 88 |     if input.is_cuda and linear_func is F.linear and fusedBackend is not None:
 89 |         gi = linear_func(input, w_ih)
 90 |         gh = linear_func(hidden, w_hh)
 91 |         state = fusedBackend.GRUFused.apply
 92 |         return (
 93 |             state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh)
 94 |         )
 95 |     gi = linear_func(input, w_ih, b_ih)
 96 |     gh = linear_func(hidden, w_hh, b_hh)
 97 |     i_r, i_i, i_n = gi.chunk(3, 1)
 98 |     h_r, h_i, h_n = gh.chunk(3, 1)
 99 | 
100 |     resetgate = torch.sigmoid(i_r + h_r)
101 |     inputgate = torch.sigmoid(i_i + h_i)
102 |     newgate = torch.tanh(i_n + resetgate * h_n)
103 |     hy = newgate + inputgate * (hidden - newgate)
104 | 
105 |     return hy
106 | 
107 | 
108 | def StackedRNN(inners, num_layers, lstm=False, dropout=0, train=True):
109 |     """Copied from torch.nn._functions.rnn and modified"""
110 | 
111 |     num_directions = len(inners)
112 |     total_layers = num_layers * num_directions
113 | 
114 |     def forward(input, hidden, weight, batch_sizes):
115 |         assert len(weight) == total_layers
116 |         next_hidden = []
117 |         ch_dim = input.dim() - weight[0][0].dim() + 1
118 | 
119 |         if lstm:
120 |             hidden = list(zip(*hidden))
121 | 
122 |         for i in range(num_layers):
123 |             all_output = []
124 |             for j, inner in enumerate(inners):
125 |                 l = i * num_directions + j
126 | 
127 |                 hy, output = inner(input, hidden[l], weight[l], batch_sizes)
128 |                 next_hidden.append(hy)
129 |                 all_output.append(output)
130 | 
131 |             input = torch.cat(all_output, ch_dim)
132 | 
133 |             if dropout != 0 and i < num_layers - 1:
134 |                 input = F.dropout(input, p=dropout, training=train, inplace=False)
135 | 
136 |         if lstm:
137 |             next_h, next_c = zip(*next_hidden)
138 |             next_hidden = (
139 |                 torch.cat(next_h, 0).view(total_layers, *next_h[0].size()),
140 |                 torch.cat(next_c, 0).view(total_layers, *next_c[0].size()),
141 |             )
142 |         else:
143 |             next_hidden = torch.cat(next_hidden, 0).view(
144 |                 total_layers, *next_hidden[0].size()
145 |             )
146 | 
147 |         return next_hidden, input
148 | 
149 |     return forward
150 | 
151 | 
152 | def Recurrent(inner, reverse=False):
153 |     """Copied from torch.nn._functions.rnn without any modification"""
154 | 
155 |     def forward(input, hidden, weight, batch_sizes):
156 |         output = []
157 |         steps = range(input.size(0) - 1, -1, -1) if reverse else range(input.size(0))
158 |         for i in steps:
159 |             hidden = inner(input[i], hidden, *weight)
160 |             # hack to handle LSTM
161 |             output.append(hidden[0] if isinstance(hidden, tuple) else hidden)
162 | 
163 |         if reverse:
164 |             output.reverse()
165 |         output = torch.cat(output, 0).view(input.size(0), *output[0].size())
166 | 
167 |         return hidden, output
168 | 
169 |     return forward
170 | 
171 | 
172 | def variable_recurrent_factory(inner, reverse=False):
173 |     """Copied from torch.nn._functions.rnn without any modification"""
174 |     if reverse:
175 |         return VariableRecurrentReverse(inner)
176 |     else:
177 |         return VariableRecurrent(inner)
178 | 
179 | 
180 | def VariableRecurrent(inner):
181 |     """Copied from torch.nn._functions.rnn without any modification"""
182 | 
183 |     def forward(input, hidden, weight, batch_sizes):
184 |         output = []
185 |         input_offset = 0
186 |         last_batch_size = batch_sizes[0]
187 |         hiddens = []
188 |         flat_hidden = not isinstance(hidden, tuple)
189 |         if flat_hidden:
190 |             hidden = (hidden,)
191 |         for batch_size in batch_sizes:
192 |             step_input = input[input_offset : input_offset + batch_size]
193 |             input_offset += batch_size
194 | 
195 |             dec = last_batch_size - batch_size
196 |             if dec > 0:
197 |                 hiddens.append(tuple(h[-dec:] for h in hidden))
198 |                 hidden = tuple(h[:-dec] for h in hidden)
199 |             last_batch_size = batch_size
200 | 
201 |             if flat_hidden:
202 |                 hidden = (inner(step_input, hidden[0], *weight),)
203 |             else:
204 |                 hidden = inner(step_input, hidden, *weight)
205 | 
206 |             output.append(hidden[0])
207 |         hiddens.append(hidden)
208 |         hiddens.reverse()
209 | 
210 |         hidden = tuple(torch.cat(h, 0) for h in zip(*hiddens))
211 |         assert hidden[0].size(0) == batch_sizes[0]
212 |         if flat_hidden:
213 |             hidden = hidden[0]
214 |         output = torch.cat(output, 0)
215 | 
216 |         return hidden, output
217 | 
218 |     return forward
219 | 
220 | 
221 | def VariableRecurrentReverse(inner):
222 |     """Copied from torch.nn._functions.rnn without any modification"""
223 | 
224 |     def forward(input, hidden, weight, batch_sizes):
225 |         output = []
226 |         input_offset = input.size(0)
227 |         last_batch_size = batch_sizes[-1]
228 |         initial_hidden = hidden
229 |         flat_hidden = not isinstance(hidden, tuple)
230 |         if flat_hidden:
231 |             hidden = (hidden,)
232 |             initial_hidden = (initial_hidden,)
233 |         hidden = tuple(h[: batch_sizes[-1]] for h in hidden)
234 |         for i in reversed(range(len(batch_sizes))):
235 |             batch_size = batch_sizes[i]
236 |             inc = batch_size - last_batch_size
237 |             if inc > 0:
238 |                 hidden = tuple(
239 |                     torch.cat((h, ih[last_batch_size:batch_size]), 0)
240 |                     for h, ih in zip(hidden, initial_hidden)
241 |                 )
242 |             last_batch_size = batch_size
243 |             step_input = input[input_offset - batch_size : input_offset]
244 |             input_offset -= batch_size
245 | 
246 |             if flat_hidden:
247 |                 hidden = (inner(step_input, hidden[0], *weight),)
248 |             else:
249 |                 hidden = inner(step_input, hidden, *weight)
250 |             output.append(hidden[0])
251 | 
252 |         output.reverse()
253 |         output = torch.cat(output, 0)
254 |         if flat_hidden:
255 |             hidden = hidden[0]
256 |         return hidden, output
257 | 
258 |     return forward
259 | 
260 | 
261 | def ConvNdWithSamePadding(convndim=2, stride=1, dilation=1, groups=1):
262 |     def forward(input, w, b=None):
263 |         if convndim == 1:
264 |             ntuple = _single
265 |         elif convndim == 2:
266 |             ntuple = _pair
267 |         elif convndim == 3:
268 |             ntuple = _triple
269 |         else:
270 |             raise ValueError("convndim must be 1, 2, or 3, but got {}".format(convndim))
271 | 
272 |         if input.dim() != convndim + 2:
273 |             raise RuntimeError(
274 |                 "Input dim must be {}, bot got {}".format(convndim + 2, input.dim())
275 |             )
276 |         if w.dim() != convndim + 2:
277 |             raise RuntimeError("w must be {}, bot got {}".format(convndim + 2, w.dim()))
278 | 
279 |         insize = input.shape[2:]
280 |         kernel_size = w.shape[2:]
281 |         _stride = ntuple(stride)
282 |         _dilation = ntuple(dilation)
283 | 
284 |         ps = [
285 |             (i + 1 - h + s * (h - 1) + d * (k - 1)) // 2
286 |             for h, k, s, d in list(zip(insize, kernel_size, _stride, _dilation))[::-1]
287 |             for i in range(2)
288 |         ]
289 |         # Padding to make the output shape to have the same shape as the input
290 |         input = F.pad(input, ps, "constant", 0)
291 |         return getattr(F, "conv{}d".format(convndim))(
292 |             input,
293 |             w,
294 |             b,
295 |             stride=_stride,
296 |             padding=ntuple(0),
297 |             dilation=_dilation,
298 |             groups=groups,
299 |         )
300 | 
301 |     return forward
302 | 
303 | 
304 | def _conv_cell_helper(mode, convndim=2, stride=1, dilation=1, groups=1):
305 |     linear_func = ConvNdWithSamePadding(
306 |         convndim=convndim, stride=stride, dilation=dilation, groups=groups
307 |     )
308 | 
309 |     if mode == "RNN_RELU":
310 |         cell = partial(RNNReLUCell, linear_func=linear_func)
311 |     elif mode == "RNN_TANH":
312 |         cell = partial(RNNTanhCell, linear_func=linear_func)
313 |     elif mode == "LSTM":
314 |         cell = partial(LSTMCell, linear_func=linear_func)
315 |     elif mode == "GRU":
316 |         cell = partial(GRUCell, linear_func=linear_func)
317 |     elif mode == "PeepholeLSTM":
318 |         cell = partial(PeepholeLSTMCell, linear_func=linear_func)
319 |     else:
320 |         raise Exception("Unknown mode: {}".format(mode))
321 |     return cell
322 | 
323 | 
324 | def AutogradConvRNN(
325 |     mode,
326 |     num_layers=1,
327 |     batch_first=False,
328 |     dropout=0,
329 |     train=True,
330 |     bidirectional=False,
331 |     variable_length=False,
332 |     convndim=2,
333 |     stride=1,
334 |     dilation=1,
335 |     groups=1,
336 | ):
337 |     """Copied from torch.nn._functions.rnn and modified"""
338 |     cell = _conv_cell_helper(
339 |         mode, convndim=convndim, stride=stride, dilation=dilation, groups=groups
340 |     )
341 | 
342 |     rec_factory = variable_recurrent_factory if variable_length else Recurrent
343 | 
344 |     if bidirectional:
345 |         layer = (rec_factory(cell), rec_factory(cell, reverse=True))
346 |     else:
347 |         layer = (rec_factory(cell),)
348 | 
349 |     func = StackedRNN(
350 |         layer,
351 |         num_layers,
352 |         (mode in ("LSTM", "PeepholeLSTM")),
353 |         dropout=dropout,
354 |         train=train,
355 |     )
356 | 
357 |     def forward(input, weight, hidden, batch_sizes):
358 |         if batch_first and batch_sizes is None:
359 |             input = input.transpose(0, 1)
360 | 
361 |         nexth, output = func(input, hidden, weight, batch_sizes)
362 | 
363 |         if batch_first and batch_sizes is None:
364 |             output = output.transpose(0, 1)
365 | 
366 |         return output, nexth
367 | 
368 |     return forward
369 | 


--------------------------------------------------------------------------------
/coperception/utils/convolutional_rnn/utils.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | from itertools import repeat
 3 | 
 4 | 
 5 | """ Copied from torch.nn.modules.utils """
 6 | 
 7 | 
 8 | def _ntuple(n):
 9 |     def parse(x):
10 |         if isinstance(x, collections.Iterable):
11 |             return x
12 |         return tuple(repeat(x, n))
13 | 
14 |     return parse
15 | 
16 | 
17 | _single = _ntuple(1)
18 | _pair = _ntuple(2)
19 | _triple = _ntuple(3)
20 | _quadruple = _ntuple(4)
21 | 


--------------------------------------------------------------------------------
/coperception/utils/mapping.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coperception/double-m-quantification/aa5dda688302135ef95818f1757864652bab83b4/coperception/utils/mapping.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/coperception/utils/mapping.py:
--------------------------------------------------------------------------------
 1 | def __bootstrap__():
 2 |     global __bootstrap__, __loader__, __file__
 3 |     import sys, pkg_resources, imp
 4 | 
 5 |     __file__ = pkg_resources.resource_filename(
 6 |         __name__, "mapping.cpython-37m-x86_64-linux-gnu.so"
 7 |     )
 8 |     __loader__ = None
 9 |     del __bootstrap__, __loader__
10 |     imp.load_dynamic(__name__, __file__)
11 | 
12 | 
13 | __bootstrap__()
14 | 


--------------------------------------------------------------------------------
/coperception/utils/mbb_util.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import torch
  4 | import torch.optim as optim
  5 | from torch.utils.data import DataLoader
  6 | from tqdm import tqdm
  7 | from copy import deepcopy
  8 | 
  9 | from coperception.datasets import V2XSimDet, MbbSampler
 10 | from coperception.configs import Config, ConfigGlobal
 11 | from coperception.utils.CoDetModule import *
 12 | from coperception.utils.loss import *
 13 | from coperception.models.det import *
 14 | from coperception.utils import AverageMeter
 15 | from coperception.utils.data_util import apply_pose_noise
 16 | from coperception.utils.mean_ap import eval_map, get_residual_error_and_cov
 17 | 
 18 | import glob
 19 | import os
 20 | 
 21 | def check_folder(folder_path):
 22 |     if not os.path.exists(folder_path):
 23 |         os.mkdir(folder_path)
 24 |     return folder_path
 25 | 
 26 | def test_model(fafmodule, validation_data_loader, flag, device, config, epoch, args):
 27 |     fafmodule.model.eval()
 28 |     num_agent = args.num_agent
 29 |     apply_late_fusion = args.apply_late_fusion
 30 |     agent_idx_range = range(num_agent) if args.rsu else range(1, num_agent)
 31 |     save_epoch_path = check_folder(os.path.join(args.test_store, str(epoch)))
 32 |     save_fig_path = [
 33 |         check_folder(os.path.join(save_epoch_path, f"vis{i}")) for i in agent_idx_range
 34 |     ]
 35 |     tracking_path = [
 36 |         check_folder(os.path.join(save_epoch_path, f"result{i}"))
 37 |         for i in agent_idx_range
 38 |     ]
 39 | 
 40 |     # for local and global mAP evaluation
 41 |     det_results_local = [[] for i in agent_idx_range]
 42 |     annotations_local = [[] for i in agent_idx_range]
 43 | 
 44 |     if not args.rsu:
 45 |         num_agent -= 1
 46 |     tracking_file = [set()] * num_agent
 47 |     
 48 |     for cnt, sample in enumerate(validation_data_loader):
 49 |         t = time.time()
 50 |         (
 51 |             padded_voxel_point_list,
 52 |             padded_voxel_points_teacher_list,
 53 |             label_one_hot_list,
 54 |             reg_target_list,
 55 |             reg_loss_mask_list,
 56 |             anchors_map_list,
 57 |             vis_maps_list,
 58 |             gt_max_iou,
 59 |             filenames,
 60 |             target_agent_id_list,
 61 |             num_agent_list,
 62 |             trans_matrices_list,
 63 |         ) = zip(*sample)
 64 | 
 65 |         print(filenames)
 66 | 
 67 |         filename0 = filenames[0]
 68 |         trans_matrices = torch.stack(tuple(trans_matrices_list), 1)
 69 |         target_agent_ids = torch.stack(tuple(target_agent_id_list), 1)
 70 |         num_all_agents = torch.stack(tuple(num_agent_list), 1)
 71 | 
 72 |         # add pose noise
 73 |         if args.pose_noise > 0:
 74 |             apply_pose_noise(args.pose_noise, trans_matrices)
 75 | 
 76 |         if not args.rsu:
 77 |             num_all_agents -= 1
 78 | 
 79 |         if flag == "upperbound":
 80 |             padded_voxel_points = torch.cat(tuple(padded_voxel_points_teacher_list), 0)
 81 |         else:
 82 |             padded_voxel_points = torch.cat(tuple(padded_voxel_point_list), 0)
 83 | 
 84 |         label_one_hot = torch.cat(tuple(label_one_hot_list), 0)
 85 |         reg_target = torch.cat(tuple(reg_target_list), 0)
 86 |         reg_loss_mask = torch.cat(tuple(reg_loss_mask_list), 0)
 87 |         anchors_map = torch.cat(tuple(anchors_map_list), 0)
 88 |         vis_maps = torch.cat(tuple(vis_maps_list), 0)
 89 | 
 90 |         data = {
 91 |             "bev_seq": padded_voxel_points.to(device),
 92 |             "labels": label_one_hot.to(device),
 93 |             "reg_targets": reg_target.to(device),
 94 |             "anchors": anchors_map.to(device),
 95 |             "vis_maps": vis_maps.to(device),
 96 |             "reg_loss_mask": reg_loss_mask.to(device).type(dtype=torch.bool),
 97 |             "target_agent_ids": target_agent_ids.to(device),
 98 |             "num_agent": num_all_agents.to(device),
 99 |             "trans_matrices": trans_matrices.to(device),
100 |         }
101 | 
102 |         if flag == "lowerbound_box_com":
103 |             loss, cls_loss, loc_loss, result = fafmodule.predict_all_with_box_com(
104 |                 data, data["trans_matrices"]
105 |             )
106 |         elif flag == "disco":
107 |             (
108 |                 loss,
109 |                 cls_loss,
110 |                 loc_loss,
111 |                 result,
112 |                 save_agent_weight_list,
113 |             ) = fafmodule.predict_all(data, 1, num_agent=num_agent)
114 |         else:
115 |             loss, cls_loss, loc_loss, result = fafmodule.predict_all(
116 |                 data, 1, num_agent=num_agent
117 |             )
118 | 
119 |         box_color_map = ["red", "yellow", "blue", "purple", "black", "orange"]
120 | 
121 |         # If has RSU, do not count RSU's output into evaluation
122 |         eval_start_idx = 1 if args.rsu else 0
123 | 
124 |         # local qualitative evaluation
125 |         for k in range(eval_start_idx, num_agent):
126 |             box_colors = None
127 |             if apply_late_fusion == 1 and len(result[k]) != 0:
128 |                 pred_restore = result[k][0][0][0]["pred"]
129 |                 score_restore = result[k][0][0][0]["score"]
130 |                 selected_idx_restore = result[k][0][0][0]["selected_idx"]
131 | 
132 |             data_agents = {
133 |                 "bev_seq": torch.unsqueeze(padded_voxel_points[k, :, :, :, :], 1),
134 |                 "reg_targets": torch.unsqueeze(reg_target[k, :, :, :, :, :], 0),
135 |                 "anchors": torch.unsqueeze(anchors_map[k, :, :, :, :], 0),
136 |             }
137 |             temp = gt_max_iou[k]
138 | 
139 |             if len(temp[0]["gt_box"]) == 0:
140 |                 data_agents["gt_max_iou"] = []
141 |             else:
142 |                 data_agents["gt_max_iou"] = temp[0]["gt_box"][0, :, :]
143 | 
144 |             # late fusion
145 |             if apply_late_fusion == 1 and len(result[k]) != 0:
146 |                 box_colors = late_fusion(
147 |                     k, num_agent, result, trans_matrices, box_color_map
148 |                 )
149 | 
150 |             result_temp = result[k]
151 | 
152 |             temp = {
153 |                 "bev_seq": data_agents["bev_seq"][0, -1].cpu().numpy(),
154 |                 "result": [] if len(result_temp) == 0 else result_temp[0][0],
155 |                 "reg_targets": data_agents["reg_targets"].cpu().numpy()[0],
156 |                 "anchors_map": data_agents["anchors"].cpu().numpy()[0],
157 |                 "gt_max_iou": data_agents["gt_max_iou"],
158 |             }
159 |             det_results_local[k], annotations_local[k], det_results_frame, annotations_frame = cal_local_mAP(
160 |                 config, temp, det_results_local[k], annotations_local[k], True
161 |             )
162 | 
163 |             filename = str(filename0[0][0])
164 |             cut = filename[filename.rfind("agent") + 7 :]
165 |             seq_name = cut[: cut.rfind("_")]
166 |             idx = cut[cut.rfind("_") + 1 : cut.rfind("/")]
167 |             seq_save = os.path.join(save_fig_path[k], seq_name)
168 |             check_folder(seq_save)
169 |             idx_save = str(idx) + ".png"
170 |             #temp_ = deepcopy(temp)
171 |             if args.visualization:
172 |                 visualization(
173 |                     config,
174 |                     temp,
175 |                     box_colors,
176 |                     box_color_map,
177 |                     apply_late_fusion,
178 |                     os.path.join(seq_save, idx_save),
179 |                 )
180 | 
181 |             # restore data before late-fusion
182 |             if apply_late_fusion == 1 and len(result[k]) != 0:
183 |                 result[k][0][0][0]["pred"] = pred_restore
184 |                 result[k][0][0][0]["score"] = score_restore
185 |                 result[k][0][0][0]["selected_idx"] = selected_idx_restore
186 | 
187 |         print("Validation scene {}, at frame {}".format(seq_name, idx))
188 |         print("Takes {} s\n".format(str(time.time() - t)))
189 | 
190 |     log_file_path = os.path.join(args.test_store, "log_test.txt")
191 |     if os.path.exists(log_file_path):
192 |         log_file = open(log_file_path, "a")
193 |     else:
194 |         log_file = open(log_file_path, "w")
195 | 
196 |     def print_and_write_log(log_str):
197 |         print(log_str)
198 |         log_file.write(log_str + "\n")
199 | 
200 |     # local mAP evaluation
201 |     det_results_all_local = []
202 |     annotations_all_local = []
203 |     mean_ap_5 = []
204 |     mean_ap_7 = []
205 |     mean_ap_all = []
206 |     for k in range(eval_start_idx, num_agent):
207 |         if type(det_results_local[k]) != list or len(det_results_local[k]) == 0:
208 |             continue
209 | 
210 |         print_and_write_log("Local mAP@0.5 from agent {}".format(k))
211 |         mean_ap, _ = eval_map(
212 |             det_results_local[k],
213 |             annotations_local[k],
214 |             scale_ranges=None,
215 |             iou_thr=0.5,
216 |             dataset=None,
217 |             logger=None,
218 |         )
219 |         mean_ap_5.append(mean_ap)
220 |         print_and_write_log("Local mAP@0.7 from agent {}".format(k))
221 | 
222 |         mean_ap, _ = eval_map(
223 |             det_results_local[k],
224 |             annotations_local[k],
225 |             scale_ranges=None,
226 |             iou_thr=0.7,
227 |             dataset=None,
228 |             logger=None,
229 |         )
230 |         mean_ap_7.append(mean_ap)
231 | 
232 |         det_results_all_local += det_results_local[k]
233 |         annotations_all_local += annotations_local[k]
234 | 
235 |     npy_frame_file = os.path.join(save_epoch_path, "all_data.npy")
236 |     det_res = {"det_results_frame": det_results_all_local, "annotations_frame": annotations_all_local}
237 |     np.save(npy_frame_file, det_res)
238 |     mean_ap_local_average, _ = eval_map(
239 |         det_results_all_local,
240 |         annotations_all_local,
241 |         scale_ranges=None,
242 |         iou_thr=0.5,
243 |         dataset=None,
244 |         logger=None,
245 |     )
246 |     mean_ap_all.append(mean_ap_local_average)
247 | 
248 |     mean_ap_local_average, _ = eval_map(
249 |         det_results_all_local,
250 |         annotations_all_local,
251 |         scale_ranges=None,
252 |         iou_thr=0.7,
253 |         dataset=None,
254 |         logger=None,
255 |     )
256 |     mean_ap_all.append(mean_ap_local_average)
257 |     mean_ap_agents = []
258 |     mean_ap_agents.append(mean_ap_5)
259 |     mean_ap_agents.append(mean_ap_7)
260 | 
261 |     print_and_write_log(
262 |         "Quantitative evaluation results of model, at epoch {}".format(
263 |              epoch
264 |         )
265 |     )
266 | 
267 |     print_and_write_log(
268 |         "average local mAP@0.5 is {} and average local mAP@0.7 is {}".format(
269 |             mean_ap_all[0], mean_ap_all[1]
270 |         )
271 |     )
272 | 
273 | def computer_mbb_covar(args):
274 |     start_epoch = 0
275 |     end_epoch = args.nepoch
276 |     res_diff = []
277 |     all_predicted_covariance = []
278 |     covar_flag = False
279 |     iou_thr = 0.5
280 |     for epoch in range(start_epoch, end_epoch+1):
281 |         data_path = args.test_store + "/{}".format(epoch) +"/all_data.npy"
282 |         print("Load data from {}".format(data_path))
283 |         data = np.load(data_path, allow_pickle=True)
284 |         det_results_all_local = data.item()['det_results_frame']
285 |         annotations_all_local = data.item()['annotations_frame']
286 |         res_diff_one_epoch, predicted_covar = get_residual_error_and_cov(det_results_all_local, annotations_all_local, scale_ranges=None, iou_thr=iou_thr)
287 |         res_diff.extend(res_diff_one_epoch)
288 |         if predicted_covar != None:
289 |             all_predicted_covariance.extend(predicted_covar)
290 |             covar_flag = True
291 |         print("Number of corners of all bounding box: {}".format(len(res_diff[epoch])))
292 |     res_diff_np = np.array(res_diff[0])
293 |     if covar_flag:
294 |         all_predicted_covariance_np = np.array(all_predicted_covariance[0])
295 |     for i in range(1, len(res_diff)):
296 |         res_diff_np = np.concatenate((res_diff_np, res_diff[i]))
297 |         if covar_flag:
298 |             all_predicted_covariance_np = np.concatenate((all_predicted_covariance_np, all_predicted_covariance[i]))
299 |     print(res_diff_np.shape)
300 |     print("covariance matrix for residual error:")
301 |     covar_e = np.cov(res_diff_np.T)
302 |     print(covar_e)
303 |     save_data = {"covar_e":covar_e}
304 |     if covar_flag:
305 |         print(all_predicted_covariance_np.shape)
306 |         print("mean of predicted covariance matrix:")
307 |         covar_a = np.mean(all_predicted_covariance_np, axis=0)
308 |         print(covar_a)
309 |         save_data['covar_a'] =  covar_a
310 |     save_data_path = args.test_store + "/mbb_covar.npy"
311 |     np.save(save_data_path, save_data)
312 |     print("Save computed covariance in {}".format(save_data_path))


--------------------------------------------------------------------------------
/coperception/utils/min_norm_solvers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 Mitsubishi Electric Research Laboratories (MERL). All rights reserved. The software, documentation and/or data in this file is provided on an "as is" basis, and MERL has no obligations to provide maintenance, support, updates, enhancements or modifications. MERL specifically disclaims any warranties, including, but not limited to, the implied warranties of merchantability and fitness for any particular purpose. In no event shall MERL be liable to any party for direct, indirect, special, incidental, or consequential damages, including lost profits, arising out of the use of this software and its documentation, even if MERL has been advised of the possibility of such damages. As more fully described in the license agreement that was required in order to download this software, documentation and/or data, permission to use, copy and modify this software without fee is granted, but only for educational, research and non-commercial purposes.
  2 | 
  3 | # Original code from https://github.com/intel-isl/MultiObjectiveOptimization
  4 | 
  5 | #############################################################################
  6 | #           Note: The functions in this file require PyTorch 1.1            #
  7 | #############################################################################
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | 
 12 | 
 13 | class MinNormSolver:
 14 |     MAX_ITER = 250
 15 |     STOP_CRIT = 1e-5
 16 | 
 17 |     def _min_norm_element_from2(v1v1, v1v2, v2v2):
 18 |         """
 19 |         Analytical solution for min_{c} |cx_1 + (1-c)x_2|_2^2
 20 |         d is the distance (objective) optimzed
 21 |         v1v1 = <x1,x1>
 22 |         v1v2 = <x1,x2>
 23 |         v2v2 = <x2,x2>
 24 |         """
 25 |         if v1v2 >= v1v1:
 26 |             # Case: Fig 1, third column
 27 |             gamma = 0.999
 28 |             cost = v1v1
 29 |             return gamma, cost
 30 |         if v1v2 >= v2v2:
 31 |             # Case: Fig 1, first column
 32 |             gamma = 0.001
 33 |             cost = v2v2
 34 |             return gamma, cost
 35 |         # Case: Fig 1, second column
 36 |         gamma = -1.0 * ((v1v2 - v2v2) / (v1v1 + v2v2 - 2 * v1v2))
 37 |         cost = v2v2 + gamma * (v1v2 - v2v2)
 38 |         return gamma, cost
 39 | 
 40 |     def _min_norm_2d(vecs, dps):
 41 |         """
 42 |         Find the minimum norm solution as combination of two points
 43 |         This is correct only in 2D
 44 |         ie. min_c |\sum c_i x_i|_2^2 st. \sum c_i = 1 , 1 >= c_1 >= 0 for all i, c_i + c_j = 1.0 for some i, j
 45 |         """
 46 |         dmin = 1e8
 47 |         for i in range(len(vecs)):
 48 |             for j in range(i + 1, len(vecs)):
 49 |                 if (i, j) not in dps:
 50 |                     dps[(i, j)] = 0.0
 51 |                     for k in range(len(vecs[i])):
 52 |                         dps[(i, j)] += torch.sum(vecs[i][k] * vecs[j][k]).item()
 53 |                     dps[(j, i)] = dps[(i, j)]
 54 |                 if (i, i) not in dps:
 55 |                     dps[(i, i)] = 0.0
 56 |                     for k in range(len(vecs[i])):
 57 |                         dps[(i, i)] += torch.sum(vecs[i][k] * vecs[i][k]).item()
 58 |                 if (j, j) not in dps:
 59 |                     dps[(j, j)] = 0.0
 60 |                     for k in range(len(vecs[i])):
 61 |                         dps[(j, j)] += torch.sum(vecs[j][k] * vecs[j][k]).item()
 62 |                 c, d = MinNormSolver._min_norm_element_from2(
 63 |                     dps[(i, i)], dps[(i, j)], dps[(j, j)]
 64 |                 )
 65 |                 if d < dmin:
 66 |                     dmin = d
 67 |                     sol = [(i, j), c, d]
 68 |         return sol, dps
 69 | 
 70 |     def _projection2simplex(y):
 71 |         """
 72 |         Given y, it solves argmin_z |y-z|_2 st \sum z = 1 , 1 >= z_i >= 0 for all i
 73 |         """
 74 |         m = len(y)
 75 |         sorted_y = np.flip(np.sort(y), axis=0)
 76 |         tmpsum = 0.0
 77 |         tmax_f = (np.sum(y) - 1.0) / m
 78 |         for i in range(m - 1):
 79 |             tmpsum += sorted_y[i]
 80 |             tmax = (tmpsum - 1) / (i + 1.0)
 81 |             if tmax > sorted_y[i + 1]:
 82 |                 tmax_f = tmax
 83 |                 break
 84 |         return np.maximum(y - tmax_f, np.zeros(y.shape))
 85 | 
 86 |     def _next_point(cur_val, grad, n):
 87 |         proj_grad = grad - (np.sum(grad) / n)
 88 |         tm1 = -1.0 * cur_val[proj_grad < 0] / proj_grad[proj_grad < 0]
 89 |         tm2 = (1.0 - cur_val[proj_grad > 0]) / (proj_grad[proj_grad > 0])
 90 | 
 91 |         # skippers = np.sum(tm1 < 1e-7) + np.sum(tm2 < 1e-7)
 92 |         t = 1
 93 |         if len(tm1[tm1 > 1e-7]) > 0:
 94 |             t = np.min(tm1[tm1 > 1e-7])
 95 |         if len(tm2[tm2 > 1e-7]) > 0:
 96 |             t = min(t, np.min(tm2[tm2 > 1e-7]))
 97 | 
 98 |         next_point = proj_grad * t + cur_val
 99 |         next_point = MinNormSolver._projection2simplex(next_point)
100 |         return next_point
101 | 
102 |     def find_min_norm_element(vecs):
103 |         """
104 |         Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull
105 |         as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1.
106 |         It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j})
107 |         Hence, we find the best 2-task solution, and then run the projected gradient descent until convergence
108 |         """
109 |         # Solution lying at the combination of two points
110 |         dps = {}
111 |         init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps)
112 | 
113 |         n = len(vecs)
114 |         sol_vec = np.zeros(n)
115 |         sol_vec[init_sol[0][0]] = init_sol[1]
116 |         sol_vec[init_sol[0][1]] = 1 - init_sol[1]
117 | 
118 |         if n < 3:
119 |             # This is optimal for n=2, so return the solution
120 |             return sol_vec, init_sol[2]
121 | 
122 |         iter_count = 0
123 | 
124 |         grad_mat = np.zeros((n, n))
125 |         for i in range(n):
126 |             for j in range(n):
127 |                 grad_mat[i, j] = dps[(i, j)]
128 | 
129 |         while iter_count < MinNormSolver.MAX_ITER:
130 |             grad_dir = -1.0 * np.dot(grad_mat, sol_vec)
131 |             new_point = MinNormSolver._next_point(sol_vec, grad_dir, n)
132 |             # Re-compute the inner products for line search
133 |             v1v1 = 0.0
134 |             v1v2 = 0.0
135 |             v2v2 = 0.0
136 |             for i in range(n):
137 |                 for j in range(n):
138 |                     v1v1 += sol_vec[i] * sol_vec[j] * dps[(i, j)]
139 |                     v1v2 += sol_vec[i] * new_point[j] * dps[(i, j)]
140 |                     v2v2 += new_point[i] * new_point[j] * dps[(i, j)]
141 |             nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2)
142 |             new_sol_vec = nc * sol_vec + (1 - nc) * new_point
143 |             change = new_sol_vec - sol_vec
144 |             if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT:
145 |                 return sol_vec, nd
146 |             sol_vec = new_sol_vec
147 | 
148 |     def find_min_norm_element_FW(vecs):
149 |         """
150 |         Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull
151 |         as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1.
152 |         It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j})
153 |         Hence, we find the best 2-task solution, and then run the Frank Wolfe until convergence
154 |         """
155 |         # Solution lying at the combination of two points
156 |         dps = {}
157 |         init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps)
158 | 
159 |         n = len(vecs)
160 |         sol_vec = np.zeros(n)
161 |         sol_vec[init_sol[0][0]] = init_sol[1]
162 |         sol_vec[init_sol[0][1]] = 1 - init_sol[1]
163 | 
164 |         if n < 3:
165 |             # This is optimal for n=2, so return the solution
166 |             return sol_vec, init_sol[2]
167 | 
168 |         iter_count = 0
169 | 
170 |         grad_mat = np.zeros((n, n))
171 |         for i in range(n):
172 |             for j in range(n):
173 |                 grad_mat[i, j] = dps[(i, j)]
174 | 
175 |         while iter_count < MinNormSolver.MAX_ITER:
176 |             t_iter = np.argmin(np.dot(grad_mat, sol_vec))
177 | 
178 |             v1v1 = np.dot(sol_vec, np.dot(grad_mat, sol_vec))
179 |             v1v2 = np.dot(sol_vec, grad_mat[:, t_iter])
180 |             v2v2 = grad_mat[t_iter, t_iter]
181 | 
182 |             nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2)
183 |             new_sol_vec = nc * sol_vec
184 |             new_sol_vec[t_iter] += 1 - nc
185 | 
186 |             change = new_sol_vec - sol_vec
187 |             if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT:
188 |                 return sol_vec, nd
189 |             sol_vec = new_sol_vec
190 | 
191 | 
192 | def gradient_normalizers(grads, losses, normalization_type):
193 |     gn = {}
194 |     if normalization_type == "l2":
195 |         for t in grads:
196 |             gn[t] = np.sqrt(np.sum([gr.pow(2).sum().item() for gr in grads[t]]))
197 |     elif normalization_type == "loss":
198 |         for t in grads:
199 |             gn[t] = losses[t]
200 |     elif normalization_type == "loss+":
201 |         for t in grads:
202 |             gn[t] = losses[t] * np.sqrt(
203 |                 np.sum([gr.pow(2).sum().item() for gr in grads[t]])
204 |             )
205 |     elif normalization_type == "none":
206 |         for t in grads:
207 |             gn[t] = 1.0
208 |     else:
209 |         print("ERROR: Invalid Normalization Type")
210 |     return gn
211 | 


--------------------------------------------------------------------------------
/coperception/utils/postprocess.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Non Max Suppression
  3 | IOU, Recall, Precision, Find overlap and Average Precisions
  4 | Source Code is adapted from github.com/matterport/MaskRCNN
  5 | """
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from shapely.geometry import Polygon
 10 | 
 11 | 
 12 | def convert_format(boxes_array):
 13 |     """
 14 |     :param array: an array of shape [# bboxs, 4, 2]
 15 |     :return: a shapely.geometry.Polygon object
 16 |     """
 17 | 
 18 |     polygons = [
 19 |         Polygon([(box[i, 0], box[i, 1]) for i in range(4)]) for box in boxes_array
 20 |     ]
 21 |     return np.array(polygons)
 22 | 
 23 | 
 24 | def compute_overlaps(boxes1, boxes2):
 25 |     """Computes IoU overlaps between two sets of boxes.
 26 |     boxes1, boxes2: a np array of boxes
 27 |     For better performance, pass the largest set first and the smaller second.
 28 |     :return: a matrix of overlaps [boxes1 count, boxes2 count]
 29 |     """
 30 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
 31 |     # Each cell contains the IoU value.
 32 | 
 33 |     boxes1 = convert_format(boxes1)
 34 |     boxes2 = convert_format(boxes2)
 35 |     overlaps = np.zeros((len(boxes1), len(boxes2)))
 36 |     for i in range(overlaps.shape[1]):
 37 |         box2 = boxes2[i]
 38 |         overlaps[:, i] = compute_iou(box2, boxes1)
 39 |     return overlaps
 40 | 
 41 | 
 42 | def compute_iou(box, boxes):
 43 |     """Calculates IoU of the given box with the array of the given boxes.
 44 |     box: a polygon
 45 |     boxes: a vector of polygons
 46 |     Note: the areas are passed in rather than calculated here for
 47 |     efficiency. Calculate once in the caller to avoid duplicate work.
 48 |     """
 49 |     # Calculate intersection areas
 50 |     iou = [box.intersection(b).area / box.union(b).area for b in boxes]
 51 | 
 52 |     return np.array(iou, dtype=np.float32)
 53 | 
 54 | 
 55 | def compute_recall(pred_boxes, gt_boxes, iou):
 56 |     """Compute the recall at the given IoU threshold. It's an indication
 57 |     of how many GT boxes were found by the given prediction boxes.
 58 |     pred_boxes: a list of predicted Polygons of size N
 59 |     gt_boxes: a list of ground truth Polygons of size N
 60 |     """
 61 |     # Measure overlaps
 62 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
 63 |     iou_max = np.max(overlaps, axis=1)
 64 |     iou_argmax = np.argmax(overlaps, axis=1)
 65 |     positive_ids = np.where(iou_max >= iou)[0]
 66 |     matched_gt_boxes = iou_argmax[positive_ids]
 67 | 
 68 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
 69 |     return recall, positive_ids
 70 | 
 71 | 
 72 | def non_max_suppression(boxes, scores, threshold):
 73 |     """Performs non-maximum suppression and returns indices of kept boxes.
 74 |     scores: 1-D array of box scores.
 75 |     threshold: Float. IoU threshold to use for filtering.
 76 |     return an numpy array of the positions of picks
 77 |     """
 78 |     assert boxes.shape[0] > 0
 79 |     if boxes.dtype.kind != "f":
 80 |         boxes = boxes.astype(np.float32)
 81 | 
 82 |     # Get indicies of boxes sorted by scores (highest first)
 83 |     # ixs = scores.argsort()[::-1][:top]
 84 |     fil_id = np.where(scores > 0.7)[0]
 85 |     ixs_sort = scores[fil_id].argsort()[::-1]
 86 |     # print(fil_id)
 87 |     ixs = []
 88 |     for i in range(len(fil_id)):
 89 |         ixs.append(fil_id[ixs_sort[i]])
 90 | 
 91 |     polygons = convert_format(boxes[ixs])
 92 |     iter_ixs = [ii for ii in range(len(polygons))]
 93 | 
 94 |     pick = []
 95 |     # print('ori: ',len(ixs))
 96 |     while len(iter_ixs) > 0:
 97 |         # Pick top box and add its index to the list
 98 |         i = iter_ixs[0]
 99 |         pick.append(ixs[i])
100 |         # Compute IoU of the picked box with the rest
101 |         iou = compute_iou(polygons[i], polygons[iter_ixs[1:]])
102 |         # Identify boxes with IoU over the threshold. This
103 |         # returns indices into ixs[1:], so add 1 to get
104 |         # indices into ixs.
105 | 
106 |         remove_ixs = np.where(iou > threshold)[0] + 1
107 | 
108 |         # Remove indices of the picked and overlapped boxes.
109 |         iter_ixs = np.delete(iter_ixs, remove_ixs)
110 |         iter_ixs = np.delete(iter_ixs, 0)
111 | 
112 |     print("selected: ", len(pick))
113 |     return np.array(pick, dtype=np.int32)
114 | 
115 | 
116 | def filter_pred(config, pred):
117 |     if len(pred.size()) == 4:
118 |         if pred.size(0) == 1:
119 |             pred.squeeze_(0)
120 |         else:
121 |             raise ValueError("Tensor dimension is not right")
122 | 
123 |     cls_pred = pred[0, ...]
124 |     activation = cls_pred > config["cls_threshold"]
125 |     num_boxes = int(activation.sum())
126 | 
127 |     if num_boxes == 0:
128 |         # print("No bounding box found")
129 |         return [], []
130 | 
131 |     corners = torch.zeros((num_boxes, 8))
132 |     for i in range(7, 15):
133 |         corners[:, i - 7] = torch.masked_select(pred[i, ...], activation)
134 |     corners = corners.view(-1, 4, 2).numpy()
135 |     scores = torch.masked_select(cls_pred, activation).cpu().numpy()
136 | 
137 |     # NMS
138 |     selected_ids = non_max_suppression(corners, scores, config["nms_iou_threshold"])
139 |     corners = corners[selected_ids]
140 |     scores = scores[selected_ids]
141 | 
142 |     return corners, scores
143 | 
144 | 
145 | def compute_ap_range(
146 |     gt_box,
147 |     gt_class_id,
148 |     pred_box,
149 |     pred_class_id,
150 |     pred_score,
151 |     iou_thresholds=None,
152 |     verbose=1,
153 | ):
154 |     """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
155 |     # Default is 0.5 to 0.95 with increments of 0.05
156 |     iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
157 | 
158 |     # Compute AP over range of IoU thresholds
159 |     AP = []
160 |     for iou_threshold in iou_thresholds:
161 |         ap, precisions, recalls, overlaps = compute_ap(
162 |             gt_box,
163 |             gt_class_id,
164 |             pred_box,
165 |             pred_class_id,
166 |             pred_score,
167 |             iou_threshold=iou_threshold,
168 |         )
169 |         if verbose:
170 |             print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
171 |         AP.append(ap)
172 |     AP = np.array(AP).mean()
173 |     if verbose:
174 |         print(
175 |             "AP @{:.2f}-{:.2f}:\t {:.3f}".format(
176 |                 iou_thresholds[0], iou_thresholds[-1], AP
177 |             )
178 |         )
179 |     return AP
180 | 
181 | 
182 | def compute_ap(pred_match, num_gt, num_pred):
183 | 
184 |     assert num_gt != 0
185 |     assert num_pred != 0
186 |     tp = (pred_match > -1).sum()
187 |     # Compute precision and recall at each prediction box step
188 |     precisions = np.cumsum(pred_match > -1) / (np.arange(num_pred) + 1)
189 |     recalls = np.cumsum(pred_match > -1).astype(np.float32) / num_gt
190 | 
191 |     # Pad with start and end values to simplify the math
192 |     precisions = np.concatenate([[0], precisions, [0]])
193 |     recalls = np.concatenate([[0], recalls, [1]])
194 | 
195 |     # Ensure precision values decrease but don't increase. This way, the
196 |     # precision value at each recall threshold is the maximum it can be
197 |     # for all following recall thresholds, as specified by the VOC paper.
198 |     for i in range(len(precisions) - 2, -1, -1):
199 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
200 | 
201 |     # Compute mean AP over recall range
202 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
203 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) * precisions[indices])
204 |     precision = tp / num_pred
205 |     recall = tp / num_gt
206 |     return mAP, precisions, recalls, precision, recall
207 | 
208 | 
209 | def compute_matches(
210 |     gt_boxes, pred_boxes, pred_scores, iou_threshold=0.5, score_threshold=0.0
211 | ):
212 |     """Finds matches between prediction and ground truth instances.
213 |     Returns:
214 |         gt_match: 1-D array. For each GT box it has the index of the matched
215 |                   predicted box.
216 |         pred_match: 1-D array. For each predicted box, it has the index of
217 |                     the matched ground truth box.
218 |         overlaps: [pred_boxes, gt_boxes] IoU overlaps.
219 |     """
220 | 
221 |     if len(pred_scores) == 0:
222 |         return -1 * np.ones([gt_boxes.shape[0]]), np.array([]), np.array([])
223 | 
224 |     gt_class_ids = np.ones(len(gt_boxes), dtype=int)
225 |     pred_class_ids = np.ones(len(pred_scores), dtype=int)
226 | 
227 |     # Sort predictions by score from high to low
228 |     indices = np.argsort(pred_scores)[::-1]
229 |     pred_boxes = pred_boxes[indices]
230 |     pred_class_ids = pred_class_ids[indices]
231 |     pred_scores = pred_scores[indices]
232 | 
233 |     # Compute IoU overlaps [pred_boxes, gt_boxes]
234 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
235 | 
236 |     # Loop through predictions and find matching ground truth boxes
237 |     match_count = 0
238 |     pred_match = -1 * np.ones([pred_boxes.shape[0]])
239 |     gt_match = -1 * np.ones([gt_boxes.shape[0]])
240 |     for i in range(len(pred_boxes)):
241 |         # Find best matching ground truth box
242 |         # 1. Sort matches by score
243 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
244 |         # 2. Remove low scores
245 |         low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
246 |         if low_score_idx.size > 0:
247 |             sorted_ixs = sorted_ixs[: low_score_idx[0]]
248 |         # 3. Find the match
249 |         for j in sorted_ixs:
250 |             # If ground truth box is already matched, go to next one
251 |             if gt_match[j] > 0:
252 |                 continue
253 |             # If we reach IoU smaller than the threshold, end the loop
254 |             iou = overlaps[i, j]
255 |             if iou < iou_threshold:
256 |                 break
257 |             # Do we have a match?
258 |             if pred_class_ids[i] == gt_class_ids[j]:
259 |                 match_count += 1
260 |                 gt_match[j] = i
261 |                 pred_match[i] = j
262 |                 break
263 | 
264 |     return gt_match, pred_match, overlaps
265 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: coperception
 2 | 
 3 | dependencies:
 4 |   - python==3.7
 5 |   - pip
 6 |   - pip:
 7 |     - numpy
 8 |     - torch
 9 |     - opencv-python
10 |     - torchvision
11 |     - typing
12 |     - nuscenes-devkit==1.0.9
13 |     - pyquaternion
14 |     - numba
15 |     - matplotlib
16 |     - mmcv
17 |     - terminaltables
18 |     - shapely


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | torch
 3 | opencv-python
 4 | torchvision
 5 | typing
 6 | nuscenes-devkit==1.0.9
 7 | pyquaternion
 8 | numba
 9 | matplotlib
10 | mmcv
11 | terminaltables
12 | shapely


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | from distutils.core import setup
 3 | import setuptools
 4 | 
 5 | # The directory containing this file
 6 | HERE = pathlib.Path(__file__).parent
 7 | 
 8 | # The text of the README file
 9 | README = (HERE / "README.md").read_text()
10 | 
11 | setup(
12 |     name="coperception",
13 |     version="0.0.10",
14 |     package_data={
15 |         "": ["*.so"],
16 |     },
17 |     packages=setuptools.find_packages(),
18 |     license="apache-2.0",  # Chose a license from here: https://help.github.com/articles/licensing-a-repository
19 |     description="A library for collaborative perception.",
20 |     author="AI4CE Lab @NYU",
21 |     author_email="dm4524@nyu.edu",
22 |     url="https://ai4ce.github.io/",
23 |     download_url="https://github.com/coperception/coperception/archive/refs/tags/v0.0.1-alpha.tar.gz",
24 |     keywords=[
25 |         "computer-vision",
26 |         "deep-learning",
27 |         "autonomous-driving",
28 |         "collaborative-learning",
29 |         "knowledge-distillation",
30 |         "communication-networks",
31 |         "multi-agent-learning",
32 |         "multi-agent-system",
33 |         "3d-object-detection",
34 |         "graph-learning",
35 |         "point-cloud-processing",
36 |         "v2x-communication",
37 |         "multi-agent-perception",
38 |         "3d-scene-understanding",
39 |     ],  # Keywords that define your package best
40 |     install_requires=[
41 |         "numpy",
42 |         "torch",
43 |         "opencv-python",
44 |         "torchvision",
45 |         "typing",
46 |         "nuscenes-devkit",
47 |         "pyquaternion",
48 |         "numba",
49 |         "matplotlib",
50 |         "mmcv",
51 |         "terminaltables",
52 |         "shapely",
53 |         "seaborn",
54 |     ],
55 |     classifiers=[
56 |         "Development Status :: 3 - Alpha",  # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
57 |         "Intended Audience :: Developers",  # Define that your audience are developers
58 |         "Topic :: Software Development :: Libraries",
59 |         "License :: OSI Approved :: Apache Software License",
60 |         "Programming Language :: Python :: 3.7",  # Specify which pyhton versions that you want to support
61 |     ],
62 | )
63 | 


--------------------------------------------------------------------------------
/tools/det/Makefile:
--------------------------------------------------------------------------------
  1 | # Path to the original V2X-Sim dataset
  2 | original_data_path := /data/v2x-sim
  3 | # Where to save the created data
  4 | #V2X-Sim-det  V2X-Sim-demo
  5 | create_data_save_path := /data/v2x-sim-nyu/V2X-Sim-det
  6 | # Index of the begining scene
  7 | scene_begin := 0
  8 |  # Index of the ending scene + 1
  9 | scene_end := 100
 10 | # Index of the start agent
 11 | from_agent := 0
 12 | # Index of the end agent + 1
 13 | to_agent := 6
 14 | 
 15 | training_script := train_codet.py
 16 | # Path to the created training data
 17 | training_data := $(create_data_save_path)/train
 18 | 
 19 | testing_script := test_codet.py
 20 | # Path to the test/val data
 21 | testing_data := $(create_data_save_path)/test
 22 | val_data := $(create_data_save_path)/val
 23 | # [lowerbound / upperbound / v2v / disco / when2com / max / mean / sum / agent]
 24 | com := disco
 25 | batch_size := 1
 26 | # [train / test / val]
 27 | split := train
 28 | # Where to store the logs
 29 | logpath := check/check_loss_base
 30 | testlogpath := check/test_loss_corner_pair_ind
 31 | # Train for how many epochs
 32 | nepoch := 25
 33 | # loss type: corner_loss faf_loss kl_loss_center kl_loss_center_add, kl_loss_corner, kl_loss_center_ind, kl_loss_center_offset_ind, kl_loss_corner_pair_ind 
 34 | loss_type = kl_loss_corner_pair_ind
 35 | # If given, the model will resume from its most recent (by modification time) check point
 36 | auto_resume_path := $(logpath)
 37 | # experiment name
 38 | exp_name = test_mbb_base
 39 | # compress_level
 40 | compress_level := 0
 41 | # 1: apply late fusion. 0: no late fusion
 42 | apply_late_fusion := 0
 43 | # 1: do visualizaton. 0: no visualization
 44 | visualization := 0
 45 | # pose_noise in meters
 46 | pose_noise := 0
 47 | # only apply v2i communication
 48 | only_v2i := 0
 49 | # 0: no RSU, 1: RSU
 50 | rsu := 0
 51 | # Whether to use pose info for When2com
 52 | warp_flag := 0
 53 | # Used when testing when2com / who2com
 54 | # when2com: activated, who2com: argmax_test
 55 | inference := activated
 56 | #block len for MBB method
 57 | block_len := 16
 58 | 
 59 | create_data:
 60 | 	python create_data_det.py \
 61 | 	--root $(original_data_path) \
 62 | 	--split $(split) \
 63 | 	--scene_begin $(scene_begin) \
 64 | 	--scene_end $(scene_end) \
 65 | 	--savepath $(create_data_save_path) \
 66 | 	--from_agent $(from_agent) \
 67 | 	--to_agent $(to_agent)
 68 | 
 69 | train:
 70 | 	python $(training_script) \
 71 | 	--data $(training_data) \
 72 | 	--com $(com) \
 73 | 	--log \
 74 | 	--batch_size $(batch_size) \
 75 | 	--auto_resume_path $(auto_resume_path) \
 76 | 	--nepoch $(nepoch) \
 77 | 	--logpath $(logpath) \
 78 | 	--warp_flag $(warp_flag) \
 79 | 	--rsu $(rsu) \
 80 | 	--compress_level $(compress_level) \
 81 | 	--pose_noise $(pose_noise) \
 82 | 	--only_v2i $(only_v2i) \
 83 | 	--loss_type $(loss_type) \
 84 | 	--exp_name $(exp_name)
 85 | 
 86 | 
 87 | train_disco:
 88 | 	python $(training_script) \
 89 | 	--data $(training_data) \
 90 | 	--com disco \
 91 | 	--log --batch $(batch_size) \
 92 | 	--kd_flag 1 \
 93 | 	--resume_teacher $(logpath)/upperbound/with_rsu/epoch_$(nepoch).pth \
 94 | 	--auto_resume_path $(auto_resume_path) \
 95 | 	--logpath $(logpath) \
 96 | 	--nepoch $(nepoch) \
 97 | 	--compress_level $(compress_level) \
 98 | 	--pose_noise $(pose_noise) \
 99 | 	--only_v2i $(only_v2i) \
100 | 	-- rsu 1 \
101 | 	--loss_type $(loss_type)
102 | 
103 | train_disco_no_rsu:
104 | 	python $(training_script) \
105 | 	--data $(training_data) \
106 | 	--com disco \
107 | 	--log --batch $(batch_size) \
108 | 	--kd_flag 1 \
109 | 	--resume_teacher check/check_loss_base/upperbound/no_rsu/epoch_100.pth \
110 | 	--auto_resume_path $(auto_resume_path) \
111 | 	--logpath $(logpath) \
112 | 	--nepoch $(nepoch) \
113 | 	--compress_level $(compress_level) \
114 | 	--pose_noise $(pose_noise) \
115 | 	--only_v2i $(only_v2i) \
116 | 	--rsu 0 \
117 | 	--loss_type $(loss_type) \
118 | 	--exp_name $(exp_name)
119 |     
120 | mbb_train:
121 | 	python train_mbb.py \
122 | 	--data $(training_data) \
123 | 	--com $(com) \
124 | 	--log \
125 | 	--batch_size $(batch_size) \
126 | 	--auto_resume_path $(auto_resume_path) \
127 | 	--nepoch $(nepoch) \
128 | 	--logpath $(logpath) \
129 | 	--warp_flag $(warp_flag) \
130 | 	--rsu $(rsu) \
131 | 	--compress_level $(compress_level) \
132 | 	--pose_noise $(pose_noise) \
133 | 	--only_v2i $(only_v2i) \
134 | 	--block_len $(block_len) \
135 | 	--init_resume_path check/check_loss_base/$(com)/no_rsu/epoch_80.pth \
136 | 	--test_store $(logpath)/$(com)/no_rsu \
137 | 	--test_data $(create_data_save_path) \
138 | 	--loss_type $(loss_type) \
139 | 	--exp_name $(exp_name) \
140 | 	--test
141 |     
142 | mbb_train_disco_no_rsu:
143 | 	python train_mbb.py \
144 | 	--data $(training_data) \
145 | 	--com disco \
146 | 	--log --batch $(batch_size) \
147 | 	--kd_flag 1 \
148 | 	--resume_teacher check/check_loss_base/upperbound/no_rsu/epoch_100.pth \
149 | 	--auto_resume_path $(auto_resume_path) \
150 | 	--logpath $(logpath) \
151 | 	--nepoch $(nepoch) \
152 | 	--compress_level $(compress_level) \
153 | 	--pose_noise $(pose_noise) \
154 | 	--only_v2i $(only_v2i) \
155 | 	--rsu 0 \
156 | 	--block_len $(block_len) \
157 | 	--init_resume_path check/check_loss_base/disco/no_rsu/epoch_80.pth \
158 | 	--test_store $(logpath)/disco/no_rsu \
159 | 	--test_data $(create_data_save_path) \
160 | 	--loss_type $(loss_type) \
161 | 	--exp_name $(exp_name) \
162 | 	--test
163 |     
164 | test:
165 | 	python $(testing_script) \
166 | 	--data $(testing_data) \
167 | 	--com $(com) \
168 | 	--resume $(logpath)/$(com)/with_rsu/epoch_$(nepoch).pth \
169 | 	--tracking \
170 | 	--logpath $(logpath) \
171 | 	--apply_late_fusion $(apply_late_fusion) \
172 | 	--visualization $(visualization) \
173 | 	--inference $(inference) \
174 | 	--warp_flag $(warp_flag) \
175 | 	--rsu 1 \
176 | 	--covar_path $(logpath)/${com}/no_rsu/mbb_covar.npy \
177 | 	--loss_type $(loss_type)
178 | 
179 | test_no_rsu:
180 | 	python $(testing_script) \
181 | 	--data $(testing_data) \
182 | 	--com $(com) \
183 | 	--resume $(logpath)/$(com)/no_rsu/epoch_$(nepoch).pth \
184 | 	--logpath $(logpath) \
185 | 	--apply_late_fusion $(apply_late_fusion) \
186 | 	--visualization $(visualization) \
187 | 	--inference $(inference) \
188 | 	--warp_flag $(warp_flag) \
189 | 	--rsu 0 \
190 | 	--covar_path $(logpath)/${com}/no_rsu/mbb_covar.npy \
191 | 	--loss_type $(loss_type)
192 | 
193 | mbb_test_no_rsu:
194 | 	python test_mbb.py \
195 | 	--data $(val_data) \
196 | 	--com $(com) \
197 | 	--resume $(logpath)/$(com)/no_rsu \
198 | 	--logpath $(logpath)/${com}/no_rsu \
199 | 	--apply_late_fusion $(apply_late_fusion) \
200 | 	--visualization $(visualization) \
201 | 	--inference $(inference) \
202 | 	--warp_flag $(warp_flag) \
203 | 	--rsu 0 \
204 | 	--test_store $(logpath)/$(com)/no_rsu \
205 | 	--init_resume_path check/check_loss_base/$(com)/no_rsu/epoch_80.pth \
206 | 	--nepoch ${nepoch} \
207 | 	--loss_type $(loss_type)
208 | 
209 | compute_mbb_covar:
210 | 	python compute_mbb_covar.py \
211 | 	--mbb_path $(logpath)/${com}/no_rsu \
212 | 	--min_epoch 0 \
213 | 	--max_epoch $(nepoch)


--------------------------------------------------------------------------------
/tools/det/README.md:
--------------------------------------------------------------------------------
 1 | # Detection benchmark on V2XSIM
 2 | 
 3 | We implement lowerbound, upperbound, when2com, who2com, V2VNet as our benchmark detectors. Please see more details in our paper.
 4 | 
 5 | ## Preparation
 6 | 
 7 | - Download V2XSIM datasets from our [website](https://ai4ce.github.io/V2X-Sim/index.html)
 8 | - Run the code below to generate preprocessed data
 9 | ```bash
10 | make create_data
11 | ```
12 | - You might want to consult `./Makefile` for all the arguments you can pass in
13 | 
14 | 
15 | ## Training
16 | 
17 | Train benchmark detectors:
18 | - Lowerbound / Upperbound / V2VNet / When2Com
19 | ```bash
20 | make train com=[lowerbound/upperbound/v2v/when2com] rsu=[0/1]
21 | ```
22 | 
23 | - DiscoNet
24 | ```bash
25 | # DiscoNet
26 | make train_disco
27 | 
28 | # DiscoNet with no cross road (RSU) data
29 | make train_disco_no_rsu
30 | ```
31 | 
32 | - When2com_warp
33 | ```bash
34 | # When2com_warp
35 | make train com=when2com warp_flag=1 rsu=[0/1]
36 | ```
37 | 
38 | - Note: Who2com is trained the same way as When2com. They only differ in inference.
39 | 
40 | ## Evaluation
41 | 
42 | Evaluate benchmark detectors:
43 | 
44 | - Lowerbound
45 | ```bash
46 | # with RSU
47 | make test com=[lowerbound/upperbound/v2v/when2com/who2com]
48 | 
49 | # no RSU
50 | make test_no_rsu com=[lowerbound/upperbound/v2v/when2com/who2com]
51 | ```
52 | 
53 | - When2com
54 | ```bash
55 | # with RSU
56 | make test com=when2com inference=activated warp_flag=[0/1]
57 | 
58 | # no RSU
59 | make test_no_rsu com=when2com inference=activated warp_flag=[0/1]
60 | ```
61 | 
62 | - Who2com
63 | ```bash
64 | # with RSU
65 | make test com=who2com inference=argmax_test warp_flag=[0/1]
66 | 
67 | # no RSU
68 | make test_no_rsu com=who2com inference=argmax_test warp_flag=[0/1]
69 | ```
70 | 
71 | 
72 | ## Results
73 | |  **Method**   | **AP@0.5 w/o RSU** | AP@0.5 w/ RSU | **Δ** | AP@0.7 w/o RSU | **AP@0.7 w/ RSU** |   Δ   |
74 | | :-----------: | :----------------: | :-----------: | :---: | :------------: | :---------------: | :---: |
75 | |  Lower-bound  | 49.90              | 46.96         | -2.94  | 44.21          | 42.33             | -1.88 |
76 | |  Co-lower-bound  | 43.99              | 42.98         | -1.01  | 39.10          | 38.26             | -0.84 |
77 | |   When2com    | 44.02              | 46.39         | +2.37 | 39.89          | 40.32             | +0.43 |
78 | | When2com* | 45.35              | 48.28         | +2.93 | 40.45          | 41.43             | +0.68 |
79 | |    Who2com    | 44.02              | 46.39         | +2.37 | 39.89          | 40.32             | +0.43 |
80 | | Who2com*  | 45.35              | 48.28         | +2.93 | 40.45          | 41.13             | +0.68 |
81 | |    V2VNet     | 68.35              | 72.08         | +3.73 | 62.83          | 65.85             | +3.02 |
82 | |   DiscoNet    | 69.03              | 72.87         | +3.84 | 63.44          | 66.40             | +2.96 |
83 | |  Upper-bound  | 70.43              | 77.08         | +6.65 | 67.04          | 72.57             | +5.53 |
84 | 
85 | 


--------------------------------------------------------------------------------
/tools/det/compute_mbb_covar.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from copy import deepcopy
 4 | 
 5 | from coperception.utils.CoDetModule import *
 6 | from coperception.utils.loss import *
 7 | from coperception.utils.mean_ap import eval_map, eval_nll, get_residual_error_and_cov
 8 | 
 9 | def main(args):
10 |     start_epoch = args.min_epoch
11 |     end_epoch = args.max_epoch
12 |     res_diff = []
13 |     all_predicted_covariance = []
14 |     covar_flag = False
15 |     iou_thr = 0.5
16 |     for epoch in range(start_epoch, end_epoch+1):
17 |         data_path = args.mbb_path + "/{}".format(epoch) +"/all_data.npy"
18 |         print("Load data from {}".format(data_path))
19 |         data = np.load(data_path, allow_pickle=True)
20 |         det_results_all_local = data.item()['det_results_frame']
21 |         annotations_all_local = data.item()['annotations_frame']
22 |         res_diff_one_epoch, predicted_covar = get_residual_error_and_cov(det_results_all_local, annotations_all_local, scale_ranges=None, iou_thr=iou_thr)
23 |         res_diff.extend(res_diff_one_epoch)
24 |         if predicted_covar != None:
25 |             all_predicted_covariance.extend(predicted_covar)
26 |             covar_flag = True
27 |         print("Number of corners of all bounding box: {}".format(len(res_diff[epoch])))
28 |     res_diff_np = np.array(res_diff[0])
29 |     if covar_flag:
30 |         all_predicted_covariance_np = np.array(all_predicted_covariance[0])
31 |     for i in range(1, len(res_diff)):
32 |         res_diff_np = np.concatenate((res_diff_np, res_diff[i]))
33 |         if covar_flag:
34 |             all_predicted_covariance_np = np.concatenate((all_predicted_covariance_np, all_predicted_covariance[i]))
35 |     print(res_diff_np.shape)
36 |     print("covariance matrix for residual error:")
37 |     covar_e = np.cov(res_diff_np.T)
38 |     print(covar_e)
39 |     save_data = {"covar_e":covar_e}
40 |     if covar_flag:
41 |         print(all_predicted_covariance_np.shape)
42 |         print("mean of predicted covariance matrix:")
43 |         covar_a = np.mean(all_predicted_covariance_np, axis=0)
44 |         print(covar_a)
45 |         save_data['covar_a'] =  covar_a
46 |     save_data_path = args.mbb_path + "/mbb_covar.npy"
47 |     np.save(save_data_path, save_data)
48 |     print("Save computed covariance in {}".format(save_data_path))
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser()
52 |     parser.add_argument("--min_epoch", default=0, type=int, help="min epochs we consider")
53 |     parser.add_argument("--max_epoch", default=25, type=int, help="max epochs we consider")
54 |     parser.add_argument("--nworker", default=1, type=int, help="Number of workers")
55 |     parser.add_argument(
56 |         "--mbb_path",
57 |         default="",
58 |         type=str,
59 |         help="The path to the serval mbb models",
60 |     )
61 | 
62 |     torch.multiprocessing.set_sharing_strategy("file_system")
63 |     args = parser.parse_args()
64 |     print(args)
65 |     main(args)


--------------------------------------------------------------------------------
/tools/det/test_mbb.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from copy import deepcopy
  4 | 
  5 | import seaborn as sns
  6 | import torch.optim as optim
  7 | from torch.utils.data import DataLoader
  8 | 
  9 | from coperception.datasets import V2XSimDet
 10 | from coperception.configs import Config, ConfigGlobal
 11 | from coperception.utils.CoDetModule import *
 12 | from coperception.utils.loss import *
 13 | from coperception.utils.mean_ap import eval_map
 14 | from coperception.models.det import *
 15 | from coperception.utils.detection_util import late_fusion
 16 | from coperception.utils.data_util import apply_pose_noise
 17 | from coperception.utils.mbb_util import test_model
 18 | import socket
 19 | 
 20 | 
 21 | def check_folder(folder_path):
 22 |     if not os.path.exists(folder_path):
 23 |         os.mkdir(folder_path)
 24 |     return folder_path
 25 | 
 26 | @torch.no_grad()
 27 | def main(args):
 28 |     config = Config("train", binary=True, only_det=True, loss_type = args.loss_type)
 29 |     config_global = ConfigGlobal("train", binary=True, only_det=True, loss_type = args.loss_type)
 30 | 
 31 |     need_log = args.log
 32 |     num_workers = args.nworker
 33 |     apply_late_fusion = args.apply_late_fusion
 34 |     pose_noise = args.pose_noise
 35 |     compress_level = args.compress_level
 36 |     only_v2i = args.only_v2i
 37 | 
 38 |     # Specify gpu device
 39 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 40 |     device_num = torch.cuda.device_count()
 41 |     print("device number", device_num)
 42 | 
 43 |     config.inference = args.inference
 44 |     if args.com == "upperbound":
 45 |         flag = "upperbound"
 46 |     elif args.com == "when2com":
 47 |         flag = "when2com"
 48 |         if args.inference == "argmax_test":
 49 |             flag = "who2com"
 50 |         if args.warp_flag:
 51 |             flag = flag + "_warp"
 52 |     elif args.com in {"v2v", "disco", "sum", "mean", "max", "cat", "agent"}:
 53 |         flag = args.com
 54 |     elif args.com == "lowerbound":
 55 |         flag = "lowerbound"
 56 |         if args.box_com:
 57 |             flag += "_box_com"
 58 |     else:
 59 |         raise ValueError(f"com: {args.com} is not supported")
 60 | 
 61 |     print("flag", flag)
 62 |     config.flag = flag
 63 |     config.split = "test"
 64 | 
 65 |     num_agent = args.num_agent
 66 |     # agent0 is the RSU
 67 |     agent_idx_range = range(num_agent) if args.rsu else range(1, num_agent)
 68 |     validation_dataset = V2XSimDet(
 69 |         dataset_roots=[f"{args.data}/agent{i}" for i in agent_idx_range],
 70 |         config=config,
 71 |         config_global=config_global,
 72 |         split="val",
 73 |         val=True,
 74 |         bound="upperbound" if args.com == "upperbound" else "lowerbound",
 75 |         kd_flag=args.kd_flag,
 76 |         rsu=args.rsu,
 77 |     )
 78 |     validation_data_loader = DataLoader(
 79 |         validation_dataset, batch_size=1, shuffle=False, num_workers=num_workers
 80 |     )
 81 |     print("Validation dataset size:", len(validation_dataset))
 82 | 
 83 |     if not args.rsu:
 84 |         num_agent -= 1
 85 | 
 86 |     if flag == "upperbound" or flag.startswith("lowerbound"):
 87 |         model = FaFNet(
 88 |             config, layer=args.layer, kd_flag=args.kd_flag, num_agent=num_agent
 89 |         )
 90 |     elif flag.startswith("when2com") or flag.startswith("who2com"):
 91 |         # model = PixelwiseWeightedFusionSoftmax(config, layer=args.layer)
 92 |         model = When2com(
 93 |             config,
 94 |             layer=args.layer,
 95 |             warp_flag=args.warp_flag,
 96 |             num_agent=num_agent,
 97 |             compress_level=compress_level,
 98 |             only_v2i=only_v2i,
 99 |         )
100 |     elif args.com == "disco":
101 |         model = DiscoNet(
102 |             config,
103 |             layer=args.layer,
104 |             kd_flag=args.kd_flag,
105 |             num_agent=num_agent,
106 |             compress_level=compress_level,
107 |             only_v2i=only_v2i,
108 |         )
109 |     elif args.com == "sum":
110 |         model = SumFusion(
111 |             config,
112 |             layer=args.layer,
113 |             kd_flag=args.kd_flag,
114 |             num_agent=num_agent,
115 |             compress_level=compress_level,
116 |             only_v2i=only_v2i,
117 |         )
118 |     elif args.com == "mean":
119 |         model = MeanFusion(
120 |             config,
121 |             layer=args.layer,
122 |             kd_flag=args.kd_flag,
123 |             num_agent=num_agent,
124 |             compress_level=compress_level,
125 |             only_v2i=only_v2i,
126 |         )
127 |     elif args.com == "max":
128 |         model = MaxFusion(
129 |             config,
130 |             layer=args.layer,
131 |             kd_flag=args.kd_flag,
132 |             num_agent=num_agent,
133 |             compress_level=compress_level,
134 |             only_v2i=only_v2i,
135 |         )
136 |     elif args.com == "cat":
137 |         model = CatFusion(
138 |             config,
139 |             layer=args.layer,
140 |             kd_flag=args.kd_flag,
141 |             num_agent=num_agent,
142 |             compress_level=compress_level,
143 |             only_v2i=only_v2i,
144 |         )
145 |     elif args.com == "agent":
146 |         model = AgentWiseWeightedFusion(
147 |             config,
148 |             layer=args.layer,
149 |             kd_flag=args.kd_flag,
150 |             num_agent=num_agent,
151 |             compress_level=compress_level,
152 |             only_v2i=only_v2i,
153 |         )
154 |     elif args.com == "v2v":
155 |         model = V2VNet(
156 |             config,
157 |             gnn_iter_times=args.gnn_iter_times,
158 |             layer=args.layer,
159 |             layer_channel=256,
160 |             num_agent=num_agent,
161 |             compress_level=compress_level,
162 |             only_v2i=only_v2i,
163 |         )
164 | 
165 |     model = nn.DataParallel(model)
166 |     model = model.to(device)
167 |     optimizer = optim.Adam(model.parameters(), lr=0.001)
168 |     criterion = {
169 |         "cls": SoftmaxFocalClassificationLoss(),
170 |         "loc": WeightedSmoothL1LocalizationLoss(),
171 |     }
172 | 
173 |     fafmodule = FaFModule(model, model, config, optimizer, criterion, args.kd_flag)
174 | 
175 |     model_save_path = args.resume[: args.resume.rfind("/")]
176 | 
177 |     if args.inference == "argmax_test":
178 |         model_save_path = model_save_path.replace("when2com", "who2com")
179 | 
180 |     os.makedirs(model_save_path, exist_ok=True)
181 |     log_file_name = os.path.join(model_save_path, "log.txt")
182 |     saver = open(log_file_name, "a")
183 |     saver.write("GPU number: {}\n".format(torch.cuda.device_count()))
184 |     saver.flush()
185 | 
186 |     # Logging the details for this experiment
187 |     saver.write("command line: {}\n".format(" ".join(sys.argv[1:])))
188 |     saver.write(args.__repr__() + "\n\n")
189 |     saver.flush()
190 | 
191 |     for epoch in range(args.nepoch+1):
192 |         if epoch == 0:
193 |             checkpoint_path = args.init_resume_path
194 |         else:
195 |             checkpoint_path = os.path.join(args.resume, f"epoch_{epoch}.pth")
196 |         checkpoint = torch.load(checkpoint_path, map_location="cpu")
197 |         fafmodule.model.load_state_dict(checkpoint["model_state_dict"])
198 |         fafmodule.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
199 |         fafmodule.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
200 |         print("Load model from {}, at epoch {}".format(args.resume, epoch))
201 |         test_model(fafmodule, validation_data_loader, flag, device, config, epoch, args)
202 | 
203 | 
204 | if __name__ == "__main__":
205 |     parser = argparse.ArgumentParser()
206 |     parser.add_argument(
207 |         "-d",
208 |         "--data",
209 |         default=None,
210 |         type=str,
211 |         help="The path to the preprocessed sparse BEV training data",
212 |     )
213 |     parser.add_argument("--nepoch", default=100, type=int, help="Number of epochs")
214 |     parser.add_argument("--nworker", default=1, type=int, help="Number of workers")
215 |     parser.add_argument("--lr", default=0.001, type=float, help="Initial learning rate")
216 |     parser.add_argument("--log", action="store_true", help="Whether to log")
217 |     parser.add_argument("--logpath", default="", help="The path to the output log file")
218 |     parser.add_argument(
219 |         "--resume",
220 |         default="",
221 |         type=str,
222 |         help="The path to the saved model that is loaded to resume training",
223 |     )
224 |     parser.add_argument(
225 |         "--resume_teacher",
226 |         default="",
227 |         type=str,
228 |         help="The path to the saved teacher model that is loaded to resume training",
229 |     )
230 |     parser.add_argument(
231 |         "--layer",
232 |         default=3,
233 |         type=int,
234 |         help="Communicate which layer in the single layer com mode",
235 |     )
236 |     parser.add_argument(
237 |         "--warp_flag", default=0, type=int, help="Whether to use pose info for When2com"
238 |     )
239 |     parser.add_argument(
240 |         "--kd_flag",
241 |         default=0,
242 |         type=int,
243 |         help="Whether to enable distillation (only DiscNet is 1 )",
244 |     )
245 |     parser.add_argument("--kd_weight", default=100000, type=int, help="KD loss weight")
246 |     parser.add_argument(
247 |         "--gnn_iter_times",
248 |         default=3,
249 |         type=int,
250 |         help="Number of message passing for V2VNet",
251 |     )
252 |     parser.add_argument(
253 |         "--visualization", type=int, default=0, help="Visualize validation result"
254 |     )
255 |     parser.add_argument(
256 |         "--com",
257 |         default="",
258 |         type=str,
259 |         help="lowerbound/upperbound/disco/when2com/v2v/sum/mean/max/cat/agent",
260 |     )
261 |     parser.add_argument("--inference", type=str)
262 |     parser.add_argument("--tracking", action="store_true")
263 |     parser.add_argument("--box_com", action="store_true")
264 |     parser.add_argument("--rsu", default=0, type=int, help="0: no RSU, 1: RSU")
265 |     # scene_batch => batch size in each scene
266 |     parser.add_argument(
267 |         "--num_agent", default=6, type=int, help="The total number of agents"
268 |     )
269 |     parser.add_argument(
270 |         "--apply_late_fusion",
271 |         default=0,
272 |         type=int,
273 |         help="1: apply late fusion. 0: no late fusion",
274 |     )
275 |     parser.add_argument(
276 |         "--compress_level",
277 |         default=0,
278 |         type=int,
279 |         help="Compress the communication layer channels by 2**x times in encoder",
280 |     )
281 |     parser.add_argument(
282 |         "--pose_noise",
283 |         default=0,
284 |         type=float,
285 |         help="draw noise from normal distribution with given mean (in meters), apply to transformation matrix.",
286 |     )
287 |     parser.add_argument(
288 |         "--only_v2i",
289 |         default=0,
290 |         type=int,
291 |         help="1: only v2i, 0: v2v and v2i",
292 |     )
293 |     parser.add_argument(
294 |         "--test_store",
295 |         default="",
296 |         type=str,
297 |         help="The path to store the output of testing",
298 |     )
299 |     parser.add_argument(
300 |         "--init_resume_path",
301 |         default="",
302 |         type=str,
303 |         help="The path to reload the initial pth",
304 |     )
305 |     parser.add_argument(
306 |         "--loss_type",
307 |         default="corner_loss",
308 |         type=str,
309 |         help="corner_loss faf_loss kl_loss_center kl_loss_center_add, kl_loss_corner, kl_loss_center_ind, kl_loss_center_offset_ind, kl_loss_corner_pair_ind",
310 |     )
311 |     torch.multiprocessing.set_sharing_strategy("file_system")
312 |     args = parser.parse_args()
313 |     print(args)
314 |     main(args)
315 | 


--------------------------------------------------------------------------------
/tools/utils/move_scenes.py:
--------------------------------------------------------------------------------
 1 | # !!! WARNING !!!
 2 | # Some of the "0.npy" files inside the folder of each scene might be moved out of the folder, and the folder will be disappeared.
 3 | # E.g. We encountered this on scene 45_30
 4 | # Please check manually for whether some scenes have this problem.
 5 | import os
 6 | import shutil
 7 | 
 8 | scene_file = 'test_scenes.txt'
 9 | train_scene_file = open(scene_file, 'r')
10 | 
11 | train_idxs = set()
12 | for line in train_scene_file:
13 |     line = line.strip()
14 |     train_idxs.add(int(line))
15 | 
16 | from_loc = '/scratch/dm4524/data/V2X-Sim-det/all'
17 | to_loc = '/scratch/dm4524/data/V2X-Sim-det/test'
18 | 
19 | for agent_dir in os.listdir(from_loc):
20 |     to_dir = os.path.join(to_loc, agent_dir)
21 |     agent_dir = os.path.join(from_loc, agent_dir)
22 |     for f in os.listdir(agent_dir):
23 |         scene_file_path = os.path.join(agent_dir, f)
24 |         scene_idx = int(f.split('_')[0])
25 |         if scene_idx in train_idxs:
26 |             shutil.move(scene_file_path, to_dir)


--------------------------------------------------------------------------------
/tools/utils/test_scenes.txt:
--------------------------------------------------------------------------------
 1 | 5
 2 | 8
 3 | 19
 4 | 27
 5 | 28
 6 | 29
 7 | 91
 8 | 92
 9 | 96
10 | 97


--------------------------------------------------------------------------------
/tools/utils/train_scenes.txt:
--------------------------------------------------------------------------------
 1 | 82
 2 | 25
 3 | 95
 4 | 0
 5 | 2
 6 | 6
 7 | 7
 8 | 9
 9 | 10
10 | 11
11 | 12
12 | 13
13 | 14
14 | 15
15 | 16
16 | 17
17 | 18
18 | 20
19 | 21
20 | 22
21 | 23
22 | 24
23 | 26
24 | 30
25 | 31
26 | 32
27 | 33
28 | 34
29 | 35
30 | 36
31 | 37
32 | 38
33 | 39
34 | 40
35 | 41
36 | 42
37 | 43
38 | 44
39 | 45
40 | 46
41 | 47
42 | 48
43 | 49
44 | 50
45 | 51
46 | 52
47 | 53
48 | 54
49 | 55
50 | 56
51 | 57
52 | 58
53 | 59
54 | 60
55 | 61
56 | 62
57 | 64
58 | 66
59 | 67
60 | 69
61 | 70
62 | 71
63 | 72
64 | 73
65 | 74
66 | 75
67 | 77
68 | 80
69 | 81
70 | 83
71 | 85
72 | 86
73 | 87
74 | 88
75 | 89
76 | 90
77 | 93
78 | 94
79 | 98
80 | 99


--------------------------------------------------------------------------------
/tools/utils/val_scenes.txt:
--------------------------------------------------------------------------------
 1 | 1
 2 | 3
 3 | 4
 4 | 63
 5 | 65
 6 | 68
 7 | 76
 8 | 78
 9 | 79
10 | 84


--------------------------------------------------------------------------------