├── .github
    └── FUNDING.yml
├── .gitignore
├── LICENSE
├── README.md
├── figs
    ├── lane_segment.jpg
    └── pipeline.png
├── projects
    ├── __init__.py
    ├── bevformer
    │   ├── __init__.py
    │   └── modules
    │   │   ├── __init__.py
    │   │   ├── custom_base_transformer_layer.py
    │   │   ├── decoder.py
    │   │   ├── encoder.py
    │   │   ├── multi_scale_deformable_attn_function.py
    │   │   ├── spatial_cross_attention.py
    │   │   └── temporal_self_attention.py
    ├── configs
    │   ├── lanesegnet_r50_8x1_24e_olv2_subset_A.py
    │   └── lanesegnet_r50_8x1_24e_olv2_subset_A_mapele_bucket.py
    └── lanesegnet
    │   ├── __init__.py
    │   ├── core
    │       ├── __init__.py
    │       ├── lane
    │       │   ├── __init__.py
    │       │   ├── area_coder.py
    │       │   ├── lane_coder.py
    │       │   ├── lane_hungarian_assigner.py
    │       │   ├── match_cost.py
    │       │   └── util.py
    │       └── visualizer
    │       │   ├── __init__.py
    │       │   └── lane_segment.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── openlanev2_evaluate_custom.py
    │       ├── openlanev2_subset_A_lanesegnet_dataset.py
    │       ├── openlanev2_subset_A_mapele_bucket_dataset.py
    │       └── pipelines
    │       │   ├── __init__.py
    │       │   ├── formating.py
    │       │   ├── loading.py
    │       │   ├── transform_3d.py
    │       │   └── transform_3d_lane.py
    │   ├── models
    │       ├── __init__.py
    │       ├── dense_heads
    │       │   ├── __init__.py
    │       │   ├── deformable_detr_head.py
    │       │   ├── laneseg_head.py
    │       │   └── relationship_head.py
    │       ├── detectors
    │       │   ├── __init__.py
    │       │   ├── lanesegnet.py
    │       │   └── lanesegnet_mapele_bucket.py
    │       └── modules
    │       │   ├── __init__.py
    │       │   ├── bevformer_constructer.py
    │       │   ├── lane_attention.py
    │       │   ├── laneseg_decoder.py
    │       │   └── laneseg_transformer.py
    │   ├── thirdparty
    │       ├── __init__.py
    │       ├── map_loss.py
    │       ├── maptr_assigner.py
    │       ├── maptr_decoder.py
    │       ├── maptr_head.py
    │       └── maptr_transformer.py
    │   └── utils
    │       ├── __init__.py
    │       └── builder.py
├── requirements.txt
└── tools
    ├── data_process.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── test.py
    └── train.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [OpenDriveLab] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | # Custom
133 | ckpts
134 | data
135 | work_dirs
136 | .vscode
137 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | 
  3 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving
  4 | 
  5 | [![arXiv](https://img.shields.io/badge/arXiv-2312.16108-479ee2.svg)](https://arxiv.org/abs/2312.16108)
  6 | [![OpenLane-V2](https://img.shields.io/badge/GitHub-OpenLane--V2-blueviolet.svg)](https://github.com/OpenDriveLab/OpenLane-V2)
  7 | [![LICENSE](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](./LICENSE)
  8 | 
  9 | ![lanesegment](figs/lane_segment.jpg "Diagram of Lane Segment")
 10 | 
 11 | </div>
 12 | 
 13 | > - :mailbox_with_mail: Primary contact: [Tianyu Li](https://scholar.google.com/citations?user=X6vTmEMAAAAJ) ( litianyu@opendrivelab.com )
 14 | > - [arXiv paper](https://arxiv.org/abs/2312.16108) | [OpenReview](https://openreview.net/forum?id=LsURkIPYR5), ICLR 2024
 15 | > - [Blog (Zhihu)](https://zhuanlan.zhihu.com/p/678596087) | [Slides TODO]()
 16 | 
 17 | ## Highlights
 18 | 
 19 | :fire: We advocate **Lane Segment** as a map learning paradigm that seamlessly incorporates both map :motorway: geometry and :spider_web: topology information.
 20 | 
 21 | :checkered_flag: **Lane Segment** and **`OpenLane-V2 Map Element Bucket`** serve as a track in the **`CVPR 2024 Autonomous Grand Challenge`**.
 22 | 
 23 | > - Official website: :globe_with_meridians: [AGC2024 Mapless Driving](https://opendrivelab.com/challenge2024/#mapless_driving)
 24 | > - Evaluation server: :hugs: [Hugging Face](https://huggingface.co/spaces/AGC2024/mapless-driving-2024)
 25 | 
 26 | This repository can be used as a starting point for Mapless Driving track.
 27 | 
 28 | 
 29 | 
 30 | ## News
 31 | - **`[2024/3]`** We released a multi-head version of LaneSegNet for the OpenLane-V2 Map Element Bucket!
 32 |   - Refer to this [config](projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A_mapele_bucket.py) file to train the baseline!
 33 |   - Please use the `*_ls.pkl` files generated by [OpenLane-V2](https://github.com/OpenDriveLab/OpenLane-V2/tree/master/data#preprocess) preprocess scripts!
 34 |   - You can use `./tools/dist_test.sh {GPUs} --format-only` to generate the `submission.pkl` now!
 35 | - **`[2023/12]`** LaneSegNet [paper](https://arxiv.org/abs/2312.16108) is available on arXiv. Code is also released!
 36 | 
 37 | ---
 38 | 
 39 | ![method](figs/pipeline.png "Pipeline of LaneSegNet")
 40 | 
 41 | <div align="center">
 42 | <b>Overall pipeline of LaneSegNet</b>
 43 | </div>
 44 | 
 45 | ## Table of Contents
 46 | - [Model Zoo](#model-zoo)
 47 | - [Prerequisites](#prerequisites)
 48 | - [Installation](#installation)
 49 | - [Prepare Dataset](#prepare-dataset)
 50 | - [Train and Evaluate](#train-and-evaluate)
 51 | - [License and Citation](#license-and-citation)
 52 | 
 53 | ## Model Zoo
 54 | 
 55 | > [!NOTE]
 56 | > The evaluation results below are based on OpenLane-V2 devkit `v2.1.0`. In this version, we have addressed a loophole in the TOP metric, which caused the TOP<sub>lsls</sub> value to be significantly higher than what was reported in the paper.  
 57 | > For more details please see issue [#76](https://github.com/OpenDriveLab/OpenLane-V2/issues/76) of OpenLane-V2.
 58 | 
 59 | ### Performance in LaneSegNet paper
 60 | 
 61 | |   Model    | Epoch |  mAP  | TOP<sub>lsls</sub> | Memory | Config | Download |
 62 | | :--------: | :---: | :---: | :----------------: | :----: | :----: | :------: |
 63 | | LaneSegNet | 24 | 33.5 | 25.4 | 9.4G | [config](projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py) | [ckpt](https://huggingface.co/OpenDriveLab/lanesegnet_r50_8x1_24e_olv2_subset_A/resolve/main/lanesegnet_r50_8x1_24e_olv2_subset_A.pth) / [log](https://huggingface.co/OpenDriveLab/lanesegnet_r50_8x1_24e_olv2_subset_A/resolve/main/20231225_213951.log) |
 64 | 
 65 | > The mean AP is between lane segment and pedestrian crossing.
 66 | 
 67 | ### Performance on OpenLane-V2 Map Element Bucket
 68 | 
 69 | | Model | Epoch | DET<sub>ls</sub> | DET<sub>a</sub> | DET<sub>t</sub> | TOP<sub>lsls</sub> | TOP<sub>lste</sub> | Config |
 70 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
 71 | | LaneSegNet-meb | 24 | 27.8 | 23.8 | 36.9 | 24.1 | 21.3 | [config](projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A_mapele_bucket.py) |
 72 | 
 73 | > This is a naive multi-branch model for the Map Element Bucket.  
 74 | > The pedestrian and road boundary are detected by an additional MapTR head. The traffic element are detected by a Deformable DETR head. The hyper-parameters are roughly set.
 75 | 
 76 | ## Prerequisites
 77 | 
 78 | - Linux
 79 | - Python 3.8.x
 80 | - NVIDIA GPU + CUDA 11.1
 81 | - PyTorch 1.9.1
 82 | 
 83 | ## Installation
 84 | 
 85 | We recommend using [conda](https://docs.conda.io/en/latest/miniconda.html) to run the code.
 86 | ```bash
 87 | conda create -n lanesegnet python=3.8 -y
 88 | conda activate lanesegnet
 89 | 
 90 | # (optional) If you have CUDA installed on your computer, skip this step.
 91 | conda install cudatoolkit=11.1.1 -c conda-forge
 92 | 
 93 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
 94 | ```
 95 | 
 96 | Install mm-series packages.
 97 | ```bash
 98 | pip install mmcv-full==1.5.2 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
 99 | pip install mmdet==2.26.0
100 | pip install mmsegmentation==0.29.1
101 | pip install mmdet3d==1.0.0rc6
102 | ```
103 | 
104 | Install other required packages.
105 | ```bash
106 | pip install -r requirements.txt
107 | ```
108 | 
109 | ## Prepare Dataset
110 | 
111 | Following [OpenLane-V2 repo](https://github.com/OpenDriveLab/OpenLane-V2/blob/v2.1.0/data) to download the **Image** and the **Map Element Bucket** data. Run the following script to collect data for this repo. 
112 | 
113 | > [!IMPORTANT]
114 | > 
115 | > :exclamation: Please note that the script for generating LaneSegNet data is not the same as the OpenLane-V2 Map Element Bucket. The `*_lanesegnet.pkl` is not the same as the `*_ls.pkl`.
116 | > 
117 | > :bell: The `Map Element Bucket` has been updated as of October 2023. Please ensure you download the most recent data.
118 | 
119 | ```bash
120 | cd LaneSegNet
121 | mkdir data
122 | 
123 | ln -s {Path to OpenLane-V2 repo}/data/OpenLane-V2 ./data/
124 | python ./tools/data_process.py
125 | ```
126 | 
127 | After setup, the hierarchy of folder `data` is described below:
128 | ```
129 | data/OpenLane-V2
130 | ├── train
131 | |   └── ...
132 | ├── val
133 | |   └── ...
134 | ├── test
135 | |   └── ...
136 | ├── data_dict_subset_A_train_lanesegnet.pkl
137 | ├── data_dict_subset_A_val_lanesegnet.pkl
138 | ├── ...
139 | ```
140 | 
141 | ## Train and Evaluate
142 | 
143 | ### Train
144 | 
145 | We recommend using 8 GPUs for training. If a different number of GPUs is utilized, you can enhance performance by configuring the `--autoscale-lr` option. The training logs will be saved to `work_dirs/lanesegnet`.
146 | 
147 | ```bash
148 | cd LaneSegNet
149 | mkdir -p work_dirs/lanesegnet
150 | 
151 | ./tools/dist_train.sh 8 [--autoscale-lr]
152 | ```
153 | 
154 | ### Evaluate
155 | You can set `--show` to visualize the results.
156 | 
157 | ```bash
158 | ./tools/dist_test.sh 8 [--show]
159 | ```
160 | 
161 | ## License and Citation
162 | All assets and code are under the [Apache 2.0 license](./LICENSE) unless specified otherwise.
163 | 
164 | If this work is helpful for your research, please consider citing the following BibTeX entry.
165 | 
166 | ``` bibtex
167 | @inproceedings{li2023lanesegnet,
168 |   title={LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving},
169 |   author={Li, Tianyu and Jia, Peijin and Wang, Bangjun and Chen, Li and Jiang, Kun and Yan, Junchi and Li, Hongyang},
170 |   booktitle={ICLR},
171 |   year={2024}
172 | }
173 | 
174 | @inproceedings{wang2023openlanev2,
175 |   title={OpenLane-V2: A Topology Reasoning Benchmark for Unified 3D HD Mapping}, 
176 |   author={Wang, Huijie and Li, Tianyu and Li, Yang and Chen, Li and Sima, Chonghao and Liu, Zhenbo and Wang, Bangjun and Jia, Peijin and Wang, Yuting and Jiang, Shengyin and Wen, Feng and Xu, Hang and Luo, Ping and Yan, Junchi and Zhang, Wei and Li, Hongyang},
177 |   booktitle={NeurIPS},
178 |   year={2023}
179 | }
180 | ```
181 | 
182 | ## Related resources
183 | 
184 | We acknowledge all the open-source contributors for the following projects to make this work possible:
185 | 
186 | - [Openlane-V2](https://github.com/OpenDriveLab/OpenLane-V2)
187 | - [BEVFormer](https://github.com/fundamentalvision/BEVFormer)
188 | - [TopoNet](https://github.com/OpenDriveLab/TopoNet)
189 | - [PersFormer](https://github.com/OpenDriveLab/PersFormer_3DLane)
190 | - [OpenLane](https://github.com/OpenDriveLab/OpenLane)
191 | - [MapTR](https://github.com/hustvl/MapTR)
192 | 


--------------------------------------------------------------------------------
/figs/lane_segment.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/LaneSegNet/699e5862ba2c173490b7e1f47b06184be8b7306e/figs/lane_segment.jpg


--------------------------------------------------------------------------------
/figs/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/LaneSegNet/699e5862ba2c173490b7e1f47b06184be8b7306e/figs/pipeline.png


--------------------------------------------------------------------------------
/projects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/LaneSegNet/699e5862ba2c173490b7e1f47b06184be8b7306e/projects/__init__.py


--------------------------------------------------------------------------------
/projects/bevformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import *
2 | 


--------------------------------------------------------------------------------
/projects/bevformer/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D
2 | from .temporal_self_attention import TemporalSelfAttention
3 | from .encoder import BEVFormerEncoder, BEVFormerLayer
4 | from .decoder import DetectionTransformerDecoder
5 | 


--------------------------------------------------------------------------------
/projects/bevformer/modules/custom_base_transformer_layer.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------
  2 | # Copyright (c) OpenMMLab. All rights reserved.
  3 | # ---------------------------------------------
  4 | #  Modified by Zhiqi Li
  5 | # ---------------------------------------------
  6 | 
  7 | import copy
  8 | import warnings
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | 
 13 | from mmcv import ConfigDict, deprecated_api_warning
 14 | from mmcv.cnn import Linear, build_activation_layer, build_norm_layer
 15 | from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
 16 | 
 17 | from mmcv.cnn.bricks.registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING,
 18 |                                       TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE)
 19 | 
 20 | # Avoid BC-breaking of importing MultiScaleDeformableAttention from this file
 21 | try:
 22 |     from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention  # noqa F401
 23 |     warnings.warn(
 24 |         ImportWarning(
 25 |             '``MultiScaleDeformableAttention`` has been moved to '
 26 |             '``mmcv.ops.multi_scale_deform_attn``, please change original path '  # noqa E501
 27 |             '``from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` '  # noqa E501
 28 |             'to ``from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` '  # noqa E501
 29 |         ))
 30 | except ImportError:
 31 |     warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from '
 32 |                   '``mmcv.ops.multi_scale_deform_attn``, '
 33 |                   'You should install ``mmcv-full`` if you need this module. ')
 34 | from mmcv.cnn.bricks.transformer import build_feedforward_network, build_attention
 35 | 
 36 | 
 37 | @TRANSFORMER_LAYER.register_module()
 38 | class MyCustomBaseTransformerLayer(BaseModule):
 39 |     """Base `TransformerLayer` for vision transformer.
 40 |     It can be built from `mmcv.ConfigDict` and support more flexible
 41 |     customization, for example, using any number of `FFN or LN ` and
 42 |     use different kinds of `attention` by specifying a list of `ConfigDict`
 43 |     named `attn_cfgs`. It is worth mentioning that it supports `prenorm`
 44 |     when you specifying `norm` as the first element of `operation_order`.
 45 |     More details about the `prenorm`: `On Layer Normalization in the
 46 |     Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .
 47 |     Args:
 48 |         attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
 49 |             Configs for `self_attention` or `cross_attention` modules,
 50 |             The order of the configs in the list should be consistent with
 51 |             corresponding attentions in operation_order.
 52 |             If it is a dict, all of the attention modules in operation_order
 53 |             will be built with this config. Default: None.
 54 |         ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
 55 |             Configs for FFN, The order of the configs in the list should be
 56 |             consistent with corresponding ffn in operation_order.
 57 |             If it is a dict, all of the attention modules in operation_order
 58 |             will be built with this config.
 59 |         operation_order (tuple[str]): The execution order of operation
 60 |             in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').
 61 |             Support `prenorm` when you specifying first element as `norm`.
 62 |             Default：None.
 63 |         norm_cfg (dict): Config dict for normalization layer.
 64 |             Default: dict(type='LN').
 65 |         init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
 66 |             Default: None.
 67 |         batch_first (bool): Key, Query and Value are shape
 68 |             of (batch, n, embed_dim)
 69 |             or (n, batch, embed_dim). Default to False.
 70 |     """
 71 | 
 72 |     def __init__(self,
 73 |                  attn_cfgs=None,
 74 |                  ffn_cfgs=dict(
 75 |                      type='FFN',
 76 |                      embed_dims=256,
 77 |                      feedforward_channels=1024,
 78 |                      num_fcs=2,
 79 |                      ffn_drop=0.,
 80 |                      act_cfg=dict(type='ReLU', inplace=True),
 81 |                  ),
 82 |                  operation_order=None,
 83 |                  norm_cfg=dict(type='LN'),
 84 |                  init_cfg=None,
 85 |                  batch_first=True,
 86 |                  **kwargs):
 87 | 
 88 |         deprecated_args = dict(
 89 |             feedforward_channels='feedforward_channels',
 90 |             ffn_dropout='ffn_drop',
 91 |             ffn_num_fcs='num_fcs')
 92 |         for ori_name, new_name in deprecated_args.items():
 93 |             if ori_name in kwargs:
 94 |                 warnings.warn(
 95 |                     f'The arguments `{ori_name}` in BaseTransformerLayer '
 96 |                     f'has been deprecated, now you should set `{new_name}` '
 97 |                     f'and other FFN related arguments '
 98 |                     f'to a dict named `ffn_cfgs`. ')
 99 |                 ffn_cfgs[new_name] = kwargs[ori_name]
100 | 
101 |         super(MyCustomBaseTransformerLayer, self).__init__(init_cfg)
102 | 
103 |         self.batch_first = batch_first
104 | 
105 |         assert set(operation_order) & set(
106 |             ['self_attn', 'norm', 'ffn', 'cross_attn']) == \
107 |             set(operation_order), f'The operation_order of' \
108 |             f' {self.__class__.__name__} should ' \
109 |             f'contains all four operation type ' \
110 |             f"{['self_attn', 'norm', 'ffn', 'cross_attn']}"
111 | 
112 |         num_attn = operation_order.count('self_attn') + operation_order.count(
113 |             'cross_attn')
114 |         if isinstance(attn_cfgs, dict):
115 |             attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]
116 |         else:
117 |             assert num_attn == len(attn_cfgs), f'The length ' \
118 |                 f'of attn_cfg {num_attn} is ' \
119 |                 f'not consistent with the number of attention' \
120 |                 f'in operation_order {operation_order}.'
121 | 
122 |         self.num_attn = num_attn
123 |         self.operation_order = operation_order
124 |         self.norm_cfg = norm_cfg
125 |         self.pre_norm = operation_order[0] == 'norm'
126 |         self.attentions = ModuleList()
127 | 
128 |         index = 0
129 |         for operation_name in operation_order:
130 |             if operation_name in ['self_attn', 'cross_attn']:
131 |                 if 'batch_first' in attn_cfgs[index]:
132 |                     assert self.batch_first == attn_cfgs[index]['batch_first']
133 |                 else:
134 |                     attn_cfgs[index]['batch_first'] = self.batch_first
135 |                 attention = build_attention(attn_cfgs[index])
136 |                 # Some custom attentions used as `self_attn`
137 |                 # or `cross_attn` can have different behavior.
138 |                 attention.operation_name = operation_name
139 |                 self.attentions.append(attention)
140 |                 index += 1
141 | 
142 |         self.embed_dims = self.attentions[0].embed_dims
143 | 
144 |         self.ffns = ModuleList()
145 |         num_ffns = operation_order.count('ffn')
146 |         if isinstance(ffn_cfgs, dict):
147 |             ffn_cfgs = ConfigDict(ffn_cfgs)
148 |         if isinstance(ffn_cfgs, dict):
149 |             ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]
150 |         assert len(ffn_cfgs) == num_ffns
151 |         for ffn_index in range(num_ffns):
152 |             if 'embed_dims' not in ffn_cfgs[ffn_index]:
153 |                 ffn_cfgs['embed_dims'] = self.embed_dims
154 |             else:
155 |                 assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims
156 | 
157 |             self.ffns.append(
158 |                 build_feedforward_network(ffn_cfgs[ffn_index]))
159 | 
160 |         self.norms = ModuleList()
161 |         num_norms = operation_order.count('norm')
162 |         for _ in range(num_norms):
163 |             self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])
164 | 
165 |     def forward(self,
166 |                 query,
167 |                 key=None,
168 |                 value=None,
169 |                 query_pos=None,
170 |                 key_pos=None,
171 |                 attn_masks=None,
172 |                 query_key_padding_mask=None,
173 |                 key_padding_mask=None,
174 |                 **kwargs):
175 |         """Forward function for `TransformerDecoderLayer`.
176 |         **kwargs contains some specific arguments of attentions.
177 |         Args:
178 |             query (Tensor): The input query with shape
179 |                 [num_queries, bs, embed_dims] if
180 |                 self.batch_first is False, else
181 |                 [bs, num_queries embed_dims].
182 |             key (Tensor): The key tensor with shape [num_keys, bs,
183 |                 embed_dims] if self.batch_first is False, else
184 |                 [bs, num_keys, embed_dims] .
185 |             value (Tensor): The value tensor with same shape as `key`.
186 |             query_pos (Tensor): The positional encoding for `query`.
187 |                 Default: None.
188 |             key_pos (Tensor): The positional encoding for `key`.
189 |                 Default: None.
190 |             attn_masks (List[Tensor] | None): 2D Tensor used in
191 |                 calculation of corresponding attention. The length of
192 |                 it should equal to the number of `attention` in
193 |                 `operation_order`. Default: None.
194 |             query_key_padding_mask (Tensor): ByteTensor for `query`, with
195 |                 shape [bs, num_queries]. Only used in `self_attn` layer.
196 |                 Defaults to None.
197 |             key_padding_mask (Tensor): ByteTensor for `query`, with
198 |                 shape [bs, num_keys]. Default: None.
199 |         Returns:
200 |             Tensor: forwarded results with shape [num_queries, bs, embed_dims].
201 |         """
202 | 
203 |         norm_index = 0
204 |         attn_index = 0
205 |         ffn_index = 0
206 |         identity = query
207 |         if attn_masks is None:
208 |             attn_masks = [None for _ in range(self.num_attn)]
209 |         elif isinstance(attn_masks, torch.Tensor):
210 |             attn_masks = [
211 |                 copy.deepcopy(attn_masks) for _ in range(self.num_attn)
212 |             ]
213 |             warnings.warn(f'Use same attn_mask in all attentions in '
214 |                           f'{self.__class__.__name__} ')
215 |         else:
216 |             assert len(attn_masks) == self.num_attn, f'The length of ' \
217 |                 f'attn_masks {len(attn_masks)} must be equal ' \
218 |                 f'to the number of attention in ' \
219 |                 f'operation_order {self.num_attn}'
220 | 
221 |         for layer in self.operation_order:
222 |             if layer == 'self_attn':
223 |                 temp_key = temp_value = query
224 |                 query = self.attentions[attn_index](
225 |                     query,
226 |                     temp_key,
227 |                     temp_value,
228 |                     identity if self.pre_norm else None,
229 |                     query_pos=query_pos,
230 |                     key_pos=query_pos,
231 |                     attn_mask=attn_masks[attn_index],
232 |                     key_padding_mask=query_key_padding_mask,
233 |                     **kwargs)
234 |                 attn_index += 1
235 |                 identity = query
236 | 
237 |             elif layer == 'norm':
238 |                 query = self.norms[norm_index](query)
239 |                 norm_index += 1
240 | 
241 |             elif layer == 'cross_attn':
242 |                 query = self.attentions[attn_index](
243 |                     query,
244 |                     key,
245 |                     value,
246 |                     identity if self.pre_norm else None,
247 |                     query_pos=query_pos,
248 |                     key_pos=key_pos,
249 |                     attn_mask=attn_masks[attn_index],
250 |                     key_padding_mask=key_padding_mask,
251 |                     **kwargs)
252 |                 attn_index += 1
253 |                 identity = query
254 | 
255 |             elif layer == 'ffn':
256 |                 query = self.ffns[ffn_index](
257 |                     query, identity if self.pre_norm else None)
258 |                 ffn_index += 1
259 | 
260 |         return query
261 | 


--------------------------------------------------------------------------------
/projects/bevformer/modules/multi_scale_deformable_attn_function.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------
  2 | # Copyright (c) OpenMMLab. All rights reserved.
  3 | # ---------------------------------------------
  4 | #  Modified by Zhiqi Li
  5 | # ---------------------------------------------
  6 | 
  7 | import torch
  8 | from torch.cuda.amp import custom_bwd, custom_fwd
  9 | from torch.autograd.function import Function, once_differentiable
 10 | from mmcv.utils import ext_loader
 11 | ext_module = ext_loader.load_ext(
 12 |     '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
 13 | 
 14 | 
 15 | class MultiScaleDeformableAttnFunction_fp16(Function):
 16 | 
 17 |     @staticmethod
 18 |     @custom_fwd(cast_inputs=torch.float16)
 19 |     def forward(ctx, value, value_spatial_shapes, value_level_start_index,
 20 |                 sampling_locations, attention_weights, im2col_step):
 21 |         """GPU version of multi-scale deformable attention.
 22 | 
 23 |         Args:
 24 |             value (Tensor): The value has shape
 25 |                 (bs, num_keys, mum_heads, embed_dims//num_heads)
 26 |             value_spatial_shapes (Tensor): Spatial shape of
 27 |                 each feature map, has shape (num_levels, 2),
 28 |                 last dimension 2 represent (h, w)
 29 |             sampling_locations (Tensor): The location of sampling points,
 30 |                 has shape
 31 |                 (bs ,num_queries, num_heads, num_levels, num_points, 2),
 32 |                 the last dimension 2 represent (x, y).
 33 |             attention_weights (Tensor): The weight of sampling points used
 34 |                 when calculate the attention, has shape
 35 |                 (bs ,num_queries, num_heads, num_levels, num_points),
 36 |             im2col_step (Tensor): The step used in image to column.
 37 | 
 38 |         Returns:
 39 |             Tensor: has shape (bs, num_queries, embed_dims)
 40 |         """
 41 |         ctx.im2col_step = im2col_step
 42 |         output = ext_module.ms_deform_attn_forward(
 43 |             value,
 44 |             value_spatial_shapes,
 45 |             value_level_start_index,
 46 |             sampling_locations,
 47 |             attention_weights,
 48 |             im2col_step=ctx.im2col_step)
 49 |         ctx.save_for_backward(value, value_spatial_shapes,
 50 |                               value_level_start_index, sampling_locations,
 51 |                               attention_weights)
 52 |         return output
 53 | 
 54 |     @staticmethod
 55 |     @once_differentiable
 56 |     @custom_bwd
 57 |     def backward(ctx, grad_output):
 58 |         """GPU version of backward function.
 59 | 
 60 |         Args:
 61 |             grad_output (Tensor): Gradient
 62 |                 of output tensor of forward.
 63 | 
 64 |         Returns:
 65 |              Tuple[Tensor]: Gradient
 66 |                 of input tensors in forward.
 67 |         """
 68 |         value, value_spatial_shapes, value_level_start_index, \
 69 |             sampling_locations, attention_weights = ctx.saved_tensors
 70 |         grad_value = torch.zeros_like(value)
 71 |         grad_sampling_loc = torch.zeros_like(sampling_locations)
 72 |         grad_attn_weight = torch.zeros_like(attention_weights)
 73 | 
 74 |         ext_module.ms_deform_attn_backward(
 75 |             value,
 76 |             value_spatial_shapes,
 77 |             value_level_start_index,
 78 |             sampling_locations,
 79 |             attention_weights,
 80 |             grad_output.contiguous(),
 81 |             grad_value,
 82 |             grad_sampling_loc,
 83 |             grad_attn_weight,
 84 |             im2col_step=ctx.im2col_step)
 85 | 
 86 |         return grad_value, None, None, \
 87 |             grad_sampling_loc, grad_attn_weight, None
 88 | 
 89 | 
 90 | class MultiScaleDeformableAttnFunction_fp32(Function):
 91 | 
 92 |     @staticmethod
 93 |     @custom_fwd(cast_inputs=torch.float32)
 94 |     def forward(ctx, value, value_spatial_shapes, value_level_start_index,
 95 |                 sampling_locations, attention_weights, im2col_step):
 96 |         """GPU version of multi-scale deformable attention.
 97 | 
 98 |         Args:
 99 |             value (Tensor): The value has shape
100 |                 (bs, num_keys, mum_heads, embed_dims//num_heads)
101 |             value_spatial_shapes (Tensor): Spatial shape of
102 |                 each feature map, has shape (num_levels, 2),
103 |                 last dimension 2 represent (h, w)
104 |             sampling_locations (Tensor): The location of sampling points,
105 |                 has shape
106 |                 (bs ,num_queries, num_heads, num_levels, num_points, 2),
107 |                 the last dimension 2 represent (x, y).
108 |             attention_weights (Tensor): The weight of sampling points used
109 |                 when calculate the attention, has shape
110 |                 (bs ,num_queries, num_heads, num_levels, num_points),
111 |             im2col_step (Tensor): The step used in image to column.
112 | 
113 |         Returns:
114 |             Tensor: has shape (bs, num_queries, embed_dims)
115 |         """
116 | 
117 |         ctx.im2col_step = im2col_step
118 |         output = ext_module.ms_deform_attn_forward(
119 |             value,
120 |             value_spatial_shapes,
121 |             value_level_start_index,
122 |             sampling_locations,
123 |             attention_weights,
124 |             im2col_step=ctx.im2col_step)
125 |         ctx.save_for_backward(value, value_spatial_shapes,
126 |                               value_level_start_index, sampling_locations,
127 |                               attention_weights)
128 |         return output
129 | 
130 |     @staticmethod
131 |     @once_differentiable
132 |     @custom_bwd
133 |     def backward(ctx, grad_output):
134 |         """GPU version of backward function.
135 | 
136 |         Args:
137 |             grad_output (Tensor): Gradient
138 |                 of output tensor of forward.
139 | 
140 |         Returns:
141 |              Tuple[Tensor]: Gradient
142 |                 of input tensors in forward.
143 |         """
144 |         value, value_spatial_shapes, value_level_start_index, \
145 |             sampling_locations, attention_weights = ctx.saved_tensors
146 |         grad_value = torch.zeros_like(value)
147 |         grad_sampling_loc = torch.zeros_like(sampling_locations)
148 |         grad_attn_weight = torch.zeros_like(attention_weights)
149 | 
150 |         ext_module.ms_deform_attn_backward(
151 |             value,
152 |             value_spatial_shapes,
153 |             value_level_start_index,
154 |             sampling_locations,
155 |             attention_weights,
156 |             grad_output.contiguous(),
157 |             grad_value,
158 |             grad_sampling_loc,
159 |             grad_attn_weight,
160 |             im2col_step=ctx.im2col_step)
161 | 
162 |         return grad_value, None, None, \
163 |             grad_sampling_loc, grad_attn_weight, None
164 | 


--------------------------------------------------------------------------------
/projects/bevformer/modules/temporal_self_attention.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------
  2 | # Copyright (c) OpenMMLab. All rights reserved.
  3 | # ---------------------------------------------
  4 | #  Modified by Zhiqi Li
  5 | # ---------------------------------------------
  6 | 
  7 | from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32
  8 | from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
  9 | import warnings
 10 | import torch
 11 | import torch.nn as nn
 12 | from mmcv.cnn import xavier_init, constant_init
 13 | from mmcv.cnn.bricks.registry import ATTENTION
 14 | import math
 15 | from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
 16 | from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,
 17 |                         to_2tuple)
 18 | 
 19 | from mmcv.utils import ext_loader
 20 | ext_module = ext_loader.load_ext(
 21 |     '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
 22 | 
 23 | 
 24 | @ATTENTION.register_module()
 25 | class TemporalSelfAttention(BaseModule):
 26 |     """An attention module used in BEVFormer based on Deformable-Detr.
 27 | 
 28 |     `Deformable DETR: Deformable Transformers for End-to-End Object Detection.
 29 |     <https://arxiv.org/pdf/2010.04159.pdf>`_.
 30 | 
 31 |     Args:
 32 |         embed_dims (int): The embedding dimension of Attention.
 33 |             Default: 256.
 34 |         num_heads (int): Parallel attention heads. Default: 64.
 35 |         num_levels (int): The number of feature map used in
 36 |             Attention. Default: 4.
 37 |         num_points (int): The number of sampling points for
 38 |             each query in each head. Default: 4.
 39 |         im2col_step (int): The step used in image_to_column.
 40 |             Default: 64.
 41 |         dropout (float): A Dropout layer on `inp_identity`.
 42 |             Default: 0.1.
 43 |         batch_first (bool): Key, Query and Value are shape of
 44 |             (batch, n, embed_dim)
 45 |             or (n, batch, embed_dim). Default to True.
 46 |         norm_cfg (dict): Config dict for normalization layer.
 47 |             Default: None.
 48 |         init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
 49 |             Default: None.
 50 |         num_bev_queue (int): In this version, we only use one history BEV and one currenct BEV.
 51 |          the length of BEV queue is 2.
 52 |     """
 53 | 
 54 |     def __init__(self,
 55 |                  embed_dims=256,
 56 |                  num_heads=8,
 57 |                  num_levels=4,
 58 |                  num_points=4,
 59 |                  num_bev_queue=2,
 60 |                  im2col_step=64,
 61 |                  dropout=0.1,
 62 |                  batch_first=True,
 63 |                  norm_cfg=None,
 64 |                  init_cfg=None):
 65 | 
 66 |         super().__init__(init_cfg)
 67 |         if embed_dims % num_heads != 0:
 68 |             raise ValueError(f'embed_dims must be divisible by num_heads, '
 69 |                              f'but got {embed_dims} and {num_heads}')
 70 |         dim_per_head = embed_dims // num_heads
 71 |         self.norm_cfg = norm_cfg
 72 |         self.dropout = nn.Dropout(dropout)
 73 |         self.batch_first = batch_first
 74 |         self.fp16_enabled = False
 75 | 
 76 |         # you'd better set dim_per_head to a power of 2
 77 |         # which is more efficient in the CUDA implementation
 78 |         def _is_power_of_2(n):
 79 |             if (not isinstance(n, int)) or (n < 0):
 80 |                 raise ValueError(
 81 |                     'invalid input for _is_power_of_2: {} (type: {})'.format(
 82 |                         n, type(n)))
 83 |             return (n & (n - 1) == 0) and n != 0
 84 | 
 85 |         if not _is_power_of_2(dim_per_head):
 86 |             warnings.warn(
 87 |                 "You'd better set embed_dims in "
 88 |                 'MultiScaleDeformAttention to make '
 89 |                 'the dimension of each attention head a power of 2 '
 90 |                 'which is more efficient in our CUDA implementation.')
 91 | 
 92 |         self.im2col_step = im2col_step
 93 |         self.embed_dims = embed_dims
 94 |         self.num_levels = num_levels
 95 |         self.num_heads = num_heads
 96 |         self.num_points = num_points
 97 |         self.num_bev_queue = num_bev_queue
 98 |         self.sampling_offsets = nn.Linear(
 99 |             embed_dims*self.num_bev_queue, num_bev_queue*num_heads * num_levels * num_points * 2)
100 |         self.attention_weights = nn.Linear(embed_dims*self.num_bev_queue,
101 |                                            num_bev_queue*num_heads * num_levels * num_points)
102 |         self.value_proj = nn.Linear(embed_dims, embed_dims)
103 |         self.output_proj = nn.Linear(embed_dims, embed_dims)
104 |         self.init_weights()
105 | 
106 |     def init_weights(self):
107 |         """Default initialization for Parameters of Module."""
108 |         constant_init(self.sampling_offsets, 0.)
109 |         thetas = torch.arange(
110 |             self.num_heads,
111 |             dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
112 |         grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
113 |         grid_init = (grid_init /
114 |                      grid_init.abs().max(-1, keepdim=True)[0]).view(
115 |             self.num_heads, 1, 1,
116 |             2).repeat(1, self.num_levels*self.num_bev_queue, self.num_points, 1)
117 | 
118 |         for i in range(self.num_points):
119 |             grid_init[:, :, i, :] *= i + 1
120 | 
121 |         self.sampling_offsets.bias.data = grid_init.view(-1)
122 |         constant_init(self.attention_weights, val=0., bias=0.)
123 |         xavier_init(self.value_proj, distribution='uniform', bias=0.)
124 |         xavier_init(self.output_proj, distribution='uniform', bias=0.)
125 |         self._is_init = True
126 | 
127 |     def forward(self,
128 |                 query,
129 |                 key=None,
130 |                 value=None,
131 |                 identity=None,
132 |                 query_pos=None,
133 |                 key_padding_mask=None,
134 |                 reference_points=None,
135 |                 spatial_shapes=None,
136 |                 level_start_index=None,
137 |                 flag='decoder',
138 | 
139 |                 **kwargs):
140 |         """Forward Function of MultiScaleDeformAttention.
141 | 
142 |         Args:
143 |             query (Tensor): Query of Transformer with shape
144 |                 (num_query, bs, embed_dims).
145 |             key (Tensor): The key tensor with shape
146 |                 `(num_key, bs, embed_dims)`.
147 |             value (Tensor): The value tensor with shape
148 |                 `(num_key, bs, embed_dims)`.
149 |             identity (Tensor): The tensor used for addition, with the
150 |                 same shape as `query`. Default None. If None,
151 |                 `query` will be used.
152 |             query_pos (Tensor): The positional encoding for `query`.
153 |                 Default: None.
154 |             key_pos (Tensor): The positional encoding for `key`. Default
155 |                 None.
156 |             reference_points (Tensor):  The normalized reference
157 |                 points with shape (bs, num_query, num_levels, 2),
158 |                 all elements is range in [0, 1], top-left (0,0),
159 |                 bottom-right (1, 1), including padding area.
160 |                 or (N, Length_{query}, num_levels, 4), add
161 |                 additional two dimensions is (w, h) to
162 |                 form reference boxes.
163 |             key_padding_mask (Tensor): ByteTensor for `query`, with
164 |                 shape [bs, num_key].
165 |             spatial_shapes (Tensor): Spatial shape of features in
166 |                 different levels. With shape (num_levels, 2),
167 |                 last dimension represents (h, w).
168 |             level_start_index (Tensor): The start index of each level.
169 |                 A tensor has shape ``(num_levels, )`` and can be represented
170 |                 as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
171 | 
172 |         Returns:
173 |              Tensor: forwarded results with shape [num_query, bs, embed_dims].
174 |         """
175 | 
176 |         if value is None:
177 |             assert self.batch_first
178 |             bs, len_bev, c = query.shape
179 |             value = torch.stack([query, query], 1).reshape(bs*2, len_bev, c)
180 | 
181 |             # value = torch.cat([query, query], 0)
182 | 
183 |         if identity is None:
184 |             identity = query
185 |         if query_pos is not None:
186 |             query = query + query_pos
187 |         if not self.batch_first:
188 |             # change to (bs, num_query ,embed_dims)
189 |             query = query.permute(1, 0, 2)
190 |             value = value.permute(1, 0, 2)
191 |         bs,  num_query, embed_dims = query.shape
192 |         _, num_value, _ = value.shape
193 |         assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
194 |         assert self.num_bev_queue == 2
195 | 
196 |         query = torch.cat([value[:bs], query], -1)
197 |         value = self.value_proj(value)
198 | 
199 |         if key_padding_mask is not None:
200 |             value = value.masked_fill(key_padding_mask[..., None], 0.0)
201 | 
202 |         value = value.reshape(bs*self.num_bev_queue,
203 |                               num_value, self.num_heads, -1)
204 | 
205 |         sampling_offsets = self.sampling_offsets(query)
206 |         sampling_offsets = sampling_offsets.view(
207 |             bs, num_query, self.num_heads,  self.num_bev_queue, self.num_levels, self.num_points, 2)
208 |         attention_weights = self.attention_weights(query).view(
209 |             bs, num_query,  self.num_heads, self.num_bev_queue, self.num_levels * self.num_points)
210 |         attention_weights = attention_weights.softmax(-1)
211 | 
212 |         attention_weights = attention_weights.view(bs, num_query,
213 |                                                    self.num_heads,
214 |                                                    self.num_bev_queue,
215 |                                                    self.num_levels,
216 |                                                    self.num_points)
217 | 
218 |         attention_weights = attention_weights.permute(0, 3, 1, 2, 4, 5)\
219 |             .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points).contiguous()
220 |         sampling_offsets = sampling_offsets.permute(0, 3, 1, 2, 4, 5, 6)\
221 |             .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points, 2)
222 | 
223 |         if reference_points.shape[-1] == 2:
224 |             offset_normalizer = torch.stack(
225 |                 [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
226 |             sampling_locations = reference_points[:, :, None, :, None, :] \
227 |                 + sampling_offsets \
228 |                 / offset_normalizer[None, None, None, :, None, :]
229 | 
230 |         elif reference_points.shape[-1] == 4:
231 |             sampling_locations = reference_points[:, :, None, :, None, :2] \
232 |                 + sampling_offsets / self.num_points \
233 |                 * reference_points[:, :, None, :, None, 2:] \
234 |                 * 0.5
235 |         else:
236 |             raise ValueError(
237 |                 f'Last dim of reference_points must be'
238 |                 f' 2 or 4, but get {reference_points.shape[-1]} instead.')
239 |         if torch.cuda.is_available() and value.is_cuda:
240 | 
241 |             # using fp16 deformable attention is unstable because it performs many sum operations
242 |             if value.dtype == torch.float16:
243 |                 MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
244 |             else:
245 |                 MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
246 |             output = MultiScaleDeformableAttnFunction.apply(
247 |                 value, spatial_shapes, level_start_index, sampling_locations,
248 |                 attention_weights, self.im2col_step)
249 |         else:
250 | 
251 |             output = multi_scale_deformable_attn_pytorch(
252 |                 value, spatial_shapes, sampling_locations, attention_weights)
253 | 
254 |         # output shape (bs*num_bev_queue, num_query, embed_dims)
255 |         # (bs*num_bev_queue, num_query, embed_dims)-> (num_query, embed_dims, bs*num_bev_queue)
256 |         output = output.permute(1, 2, 0)
257 | 
258 |         # fuse history value and current value
259 |         # (num_query, embed_dims, bs*num_bev_queue)-> (num_query, embed_dims, bs, num_bev_queue)
260 |         output = output.view(num_query, embed_dims, bs, self.num_bev_queue)
261 |         output = output.mean(-1)
262 | 
263 |         # (num_query, embed_dims, bs)-> (bs, num_query, embed_dims)
264 |         output = output.permute(2, 0, 1)
265 | 
266 |         output = self.output_proj(output)
267 | 
268 |         if not self.batch_first:
269 |             output = output.permute(1, 0, 2)
270 | 
271 |         return self.dropout(output) + identity
272 | 


--------------------------------------------------------------------------------
/projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py:
--------------------------------------------------------------------------------
  1 | _base_ = []
  2 | custom_imports = dict(imports=['projects.bevformer', 'projects.lanesegnet'])
  3 | 
  4 | # If point cloud range is changed, the models should also change their point
  5 | # cloud range accordingly
  6 | point_cloud_range = [-51.2, -25.6, -2.3, 51.2, 25.6, 1.7]
  7 | 
  8 | img_norm_cfg = dict(
  9 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 10 | 
 11 | class_names = ['lane_segment', 'ped_crossing']
 12 | class_nums = len(class_names)
 13 | 
 14 | input_modality = dict(
 15 |     use_lidar=False,
 16 |     use_camera=True,
 17 |     use_radar=False,
 18 |     use_map=False,
 19 |     use_external=False)
 20 | num_cams = 7
 21 | pts_dim = 3
 22 | 
 23 | dataset_type = 'OpenLaneV2_subset_A_LaneSegNet_Dataset'
 24 | data_root = 'data/OpenLane-V2/'
 25 | 
 26 | para_method = 'fix_pts_interp'
 27 | method_para = dict(n_points=10)
 28 | code_size = 3 * method_para['n_points'] * pts_dim
 29 | 
 30 | _dim_ = 256
 31 | _pos_dim_ = _dim_//2
 32 | _ffn_dim_ = _dim_*2
 33 | _ffn_cfg_ = dict(
 34 |     type='FFN',
 35 |     embed_dims=_dim_,
 36 |     feedforward_channels=_ffn_dim_,
 37 |     num_fcs=2,
 38 |     ffn_drop=0.1,
 39 |     act_cfg=dict(type='ReLU', inplace=True),
 40 | ),
 41 | 
 42 | _num_levels_ = 4
 43 | bev_h_ = 100
 44 | bev_w_ = 200
 45 | 
 46 | model = dict(
 47 |     type='LaneSegNet',
 48 |     img_backbone=dict(
 49 |         type='ResNet',
 50 |         depth=50,
 51 |         num_stages=4,
 52 |         out_indices=(1, 2, 3),
 53 |         frozen_stages=1,
 54 |         norm_cfg=dict(type='BN', requires_grad=False),
 55 |         norm_eval=True,
 56 |         style='pytorch',
 57 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 58 |     img_neck=dict(
 59 |         type='FPN',
 60 |         in_channels=[512, 1024, 2048],
 61 |         out_channels=_dim_,
 62 |         start_level=0,
 63 |         add_extra_convs='on_output',
 64 |         num_outs=_num_levels_,
 65 |         relu_before_extra_convs=True),
 66 |     bev_constructor=dict(
 67 |         type='BEVFormerConstructer',
 68 |         num_feature_levels=_num_levels_,
 69 |         num_cams=num_cams,
 70 |         embed_dims=_dim_,
 71 |         rotate_prev_bev=True,
 72 |         use_shift=True,
 73 |         use_can_bus=True,
 74 |         pc_range=point_cloud_range,
 75 |         bev_h=bev_h_,
 76 |         bev_w=bev_w_,
 77 |         rotate_center=[bev_h_//2, bev_w_//2],
 78 |         encoder=dict(
 79 |             type='BEVFormerEncoder',
 80 |             num_layers=3,
 81 |             pc_range=point_cloud_range,
 82 |             num_points_in_pillar=4,
 83 |             return_intermediate=False,
 84 |             transformerlayers=dict(
 85 |                 type='BEVFormerLayer',
 86 |                 attn_cfgs=[
 87 |                     dict(
 88 |                         type='TemporalSelfAttention',
 89 |                         embed_dims=_dim_,
 90 |                         num_levels=1),
 91 |                     dict(
 92 |                         type='SpatialCrossAttention',
 93 |                         embed_dims=_dim_,
 94 |                         num_cams=num_cams,
 95 |                         pc_range=point_cloud_range,
 96 |                         deformable_attention=dict(
 97 |                             type='MSDeformableAttention3D',
 98 |                             embed_dims=_dim_,
 99 |                             num_points=8,
100 |                             num_levels=_num_levels_)
101 |                     )
102 |                 ],
103 |                 ffn_cfgs=_ffn_cfg_,
104 |                 operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
105 |                                  'ffn', 'norm'))),
106 |         positional_encoding=dict(
107 |             type='LearnedPositionalEncoding',
108 |             num_feats=_pos_dim_,
109 |             row_num_embed=bev_h_,
110 |             col_num_embed=bev_w_),
111 |     ),
112 |     lane_head=dict(
113 |         type='LaneSegHead',
114 |         num_classes=class_nums,
115 |         num_lane_type_classes=3,
116 |         in_channels=_dim_,
117 |         num_query=200,
118 |         bev_h=bev_h_,
119 |         bev_w=bev_w_,
120 |         pc_range=point_cloud_range,
121 |         pts_dim=pts_dim,
122 |         sync_cls_avg_factor=False,
123 |         with_box_refine=True,
124 |         code_size=code_size,
125 |         code_weights= [1.0 for i in range(code_size)],
126 |         transformer=dict(
127 |             type='LaneSegNetTransformer',
128 |             embed_dims=_dim_,
129 |             points_num=method_para['n_points'],
130 |             pts_dim=pts_dim,
131 |             decoder=dict(
132 |                 type='LaneSegNetDecoder',
133 |                 num_layers=6,
134 |                 return_intermediate=True,
135 |                 pc_range=point_cloud_range,
136 |                 pts_dim=pts_dim,
137 |                 transformerlayers=dict(
138 |                     type='CustomDetrTransformerDecoderLayer',
139 |                     attn_cfgs=[
140 |                         dict(
141 |                             type='MultiheadAttention',
142 |                             embed_dims=_dim_,
143 |                             num_heads=8,
144 |                             dropout=0.1),
145 |                          dict(
146 |                             type='LaneAttention',
147 |                             embed_dims=_dim_,
148 |                             num_heads=8,
149 |                             num_points=32,
150 |                             num_levels=1),
151 |                     ],
152 |                     ffn_cfgs=_ffn_cfg_,
153 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
154 |                                      'ffn', 'norm')))),
155 |         bbox_coder=dict(type='LaneSegmentPseudoCoder'),
156 |         loss_cls=dict(
157 |             type='FocalLoss',
158 |             use_sigmoid=True,
159 |             gamma=2.0,
160 |             alpha=0.25,
161 |             loss_weight=1.5),
162 |         loss_bbox=dict(type='L1Loss', loss_weight=0.025),
163 |         loss_lane_type=dict(
164 |             type='CrossEntropyLoss',
165 |             use_sigmoid=True,
166 |             loss_weight=0.1),
167 |         loss_mask=dict(
168 |             type='CrossEntropyLoss',
169 |             use_sigmoid=True,
170 |             reduction='mean',
171 |             loss_weight=3.0),
172 |         loss_dice=dict(
173 |             type='DiceLoss',
174 |             use_sigmoid=True,
175 |             activate=True,
176 |             reduction='mean',
177 |             naive_dice=True,
178 |             eps=1.0,
179 |             loss_weight=3.0)),
180 |     lclc_head=dict(
181 |         type='RelationshipHead',
182 |         in_channels_o1=_dim_,
183 |         in_channels_o2=_dim_,
184 |         shared_param=False,
185 |         loss_rel=dict(
186 |             type='FocalLoss',
187 |             use_sigmoid=True,
188 |             gamma=2.0,
189 |             alpha=0.25,
190 |             loss_weight=5)),
191 |     # model training and testing settings
192 |     train_cfg=dict(
193 |         lane=dict(
194 |             assigner=dict(
195 |                 type='LaneSegmentHungarianAssigner3D',
196 |                 cls_cost=dict(type='FocalLossCost', weight=1.5),
197 |                 reg_cost=dict(type='LaneL1Cost', weight=0.025),
198 |                 mask_cost=dict(type='CrossEntropyLossCost', weight=3.0, use_sigmoid=True),
199 |                 dice_cost=dict(type='DiceCost', weight=3.0, pred_act=False, eps=1.0),
200 |                 pc_range=point_cloud_range))))
201 | 
202 | train_pipeline = [
203 |     dict(type='CustomLoadMultiViewImageFromFiles', to_float32=True),
204 |     dict(type='LoadAnnotations3DLaneSegment',
205 |          with_lane_3d=True, with_lane_label_3d=True, with_lane_adj=True, with_lane_type=True,
206 |          with_bbox=False, with_label=False, with_lane_lste_adj=False),
207 |     dict(type='PhotoMetricDistortionMultiViewImage'),
208 |     dict(type='CropFrontViewImageForAv2'),
209 |     dict(type='RandomScaleImageMultiViewImage', scales=[0.5]),
210 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
211 |     dict(type='PadMultiViewImageSame2Max', size_divisor=32),
212 |     dict(type='GridMaskMultiViewImage'),
213 |     dict(type='LaneSegmentParameterize3D', method=para_method, method_para=method_para),
214 |     dict(type='GenerateLaneSegmentMask', points_num=method_para['n_points'], bev_h=bev_h_, bev_w=bev_w_),
215 |     dict(type='CustomFormatBundle3DLane', class_names=class_names),
216 |     dict(type='CustomCollect3D', keys=[
217 |         'img', 'gt_lanes_3d', 'gt_lane_labels_3d', 'gt_lane_adj',
218 |         'gt_instance_masks', 'gt_lane_left_type', 'gt_lane_right_type'])
219 | ]
220 | 
221 | test_pipeline = [
222 |     dict(type='CustomLoadMultiViewImageFromFiles', to_float32=True),
223 |     dict(type='CropFrontViewImageForAv2'),
224 |     dict(type='RandomScaleImageMultiViewImage', scales=[0.5]),
225 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
226 |     dict(type='PadMultiViewImageSame2Max', size_divisor=32),
227 |     dict(type='CustomFormatBundle3DLane', class_names=class_names),
228 |     dict(type='CustomCollect3D', keys=['img'])
229 | ]
230 | 
231 | data = dict(
232 |     samples_per_gpu=1,
233 |     workers_per_gpu=8,
234 |     train=dict(
235 |         type=dataset_type,
236 |         data_root=data_root,
237 |         ann_file=data_root + 'data_dict_subset_A_train_lanesegnet.pkl',
238 |         pipeline=train_pipeline,
239 |         classes=class_names,
240 |         modality=input_modality,
241 |         split='train',
242 |         filter_map_change=True,
243 |         points_num=method_para['n_points'],
244 |         test_mode=False),
245 |     val=dict(
246 |         type=dataset_type,
247 |         data_root=data_root,
248 |         ann_file=data_root + 'data_dict_subset_A_val_lanesegnet.pkl',
249 |         pipeline=test_pipeline,
250 |         classes=class_names,
251 |         modality=input_modality,
252 |         split='val',
253 |         points_num=method_para['n_points'],
254 |         test_mode=True),
255 |     test=dict(
256 |         type=dataset_type,
257 |         data_root=data_root,
258 |         ann_file=data_root + 'data_dict_subset_A_val_lanesegnet.pkl',
259 |         pipeline=test_pipeline,
260 |         classes=class_names,
261 |         modality=input_modality,
262 |         split='val',
263 |         points_num=method_para['n_points'],
264 |         test_mode=True)
265 | )
266 | 
267 | optimizer = dict(
268 |     type='AdamW',
269 |     lr=2e-4,
270 |     paramwise_cfg=dict(
271 |         custom_keys={
272 |             'img_backbone': dict(lr_mult=0.1),
273 |         }),
274 |     weight_decay=0.01)
275 | 
276 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
277 | # learning policy
278 | lr_config = dict(
279 |     policy='CosineAnnealing',
280 |     warmup='linear',
281 |     warmup_iters=500,
282 |     warmup_ratio=1.0 / 3,
283 |     min_lr_ratio=1e-3)
284 | total_epochs = 24
285 | evaluation = dict(interval=24, pipeline=test_pipeline)
286 | 
287 | runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
288 | log_config = dict(
289 |     interval=50,
290 |     hooks=[
291 |         dict(type='TextLoggerHook'),
292 |         dict(type='TensorboardLoggerHook')
293 |     ])
294 | 
295 | checkpoint_config = dict(interval=1, max_keep_ckpts=1)
296 | 
297 | dist_params = dict(backend='nccl')
298 | log_level = 'INFO'
299 | work_dir = None
300 | load_from = None
301 | resume_from = None
302 | workflow = [('train', 1)]
303 | 
304 | # NOTE: `auto_scale_lr` is for automatically scaling LR,
305 | # base_batch_size = (8 GPUs) x (1 samples per GPU)
306 | auto_scale_lr = dict(base_batch_size=8)
307 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import *
2 | from .models import *
3 | from .core import *
4 | from .utils import *
5 | from .thirdparty import *
6 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .lane import *
2 | from .visualizer import *
3 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/lane/__init__.py:
--------------------------------------------------------------------------------
1 | from .lane_hungarian_assigner import LaneSegmentHungarianAssigner3D
2 | from .lane_coder import LaneSegmentPseudoCoder
3 | from .match_cost import LaneL1Cost
4 | from .area_coder import AreaPseudoCoder
5 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/lane/area_coder.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core.bbox import BaseBBoxCoder
 2 | from mmdet.core.bbox.builder import BBOX_CODERS
 3 | 
 4 | @BBOX_CODERS.register_module()
 5 | class AreaPseudoCoder(BaseBBoxCoder):
 6 |     def encode(self):
 7 |         pass
 8 | 
 9 |     def decode_single(self, cls_scores, area_preds):
10 | 
11 |         cls_scores = cls_scores.sigmoid()
12 |         scores, labels = cls_scores.max(-1)
13 | 
14 |         predictions_dict = {
15 |             'areas': area_preds.detach().cpu().numpy(),
16 |             'scores': scores.detach().cpu().numpy(),
17 |             'labels': labels.detach().cpu().numpy(),
18 |         }
19 | 
20 |         return predictions_dict
21 | 
22 |     def decode(self, preds_dicts):
23 |         all_cls_scores = preds_dicts['all_cls_scores'][-1]
24 |         all_pts_preds = preds_dicts['all_pts_preds'][-1]
25 |         batch_size = all_cls_scores.size()[0]
26 |         predictions_list = []
27 |         for i in range(batch_size):
28 |             predictions_list.append(self.decode_single(all_cls_scores[i], all_pts_preds[i]))
29 |         return predictions_list
30 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/lane/lane_coder.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | from mmdet.core.bbox import BaseBBoxCoder
10 | from mmdet.core.bbox.builder import BBOX_CODERS
11 | from .util import denormalize_3dlane
12 | 
13 | 
14 | @BBOX_CODERS.register_module()
15 | class LaneSegmentPseudoCoder(BaseBBoxCoder):
16 | 
17 |     def __init__(self, denormalize=False):
18 |         self.denormalize = denormalize
19 | 
20 |     def encode(self):
21 |         pass
22 | 
23 |     def decode_single(self, cls_scores, lane_preds, lane_left_type_scores, lane_right_type_scores):
24 |         """Decode bboxes.
25 |         Args:
26 |             cls_scores (Tensor): Outputs from the classification head, \
27 |                 shape [num_query, cls_out_channels]. Note \
28 |                 cls_out_channels should includes background.
29 |             lane_preds (Tensor): Outputs from the regression \
30 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
31 |                 Shape [num_query, 9].
32 |         Returns:
33 |             list[dict]: Decoded boxes.
34 |         """
35 | 
36 |         cls_scores = cls_scores.sigmoid()
37 |         scores, labels = cls_scores.max(-1)
38 |         left_type_scores, left_type_labels = lane_left_type_scores.sigmoid().max(-1)
39 |         right_type_scores, right_type_labels = lane_right_type_scores.sigmoid().max(-1)
40 |         if self.denormalize:
41 |             final_lane_preds = denormalize_3dlane(lane_preds, self.pc_range)
42 |         else:
43 |             final_lane_preds = lane_preds
44 | 
45 |         predictions_dict = {
46 |             'lane3d': final_lane_preds.detach().cpu().numpy(),
47 |             'scores': scores.detach().cpu().numpy(),
48 |             'labels': labels.detach().cpu().numpy(),
49 |             'left_type_scores': left_type_scores.detach().cpu().numpy(),
50 |             'left_type_labels': left_type_labels.detach().cpu().numpy(),
51 |             'right_type_scores': right_type_scores.detach().cpu().numpy(),
52 |             'right_type_labels': right_type_labels.detach().cpu().numpy()
53 |         }
54 | 
55 |         return predictions_dict
56 | 
57 |     def decode(self, preds_dicts):
58 | 
59 |         all_cls_scores = preds_dicts['all_cls_scores'][-1]
60 |         all_lanes_preds = preds_dicts['all_lanes_preds'][-1]
61 |         all_lanes_left_types = preds_dicts['all_lanes_left_type'][-1]
62 |         all_lanes_right_types = preds_dicts['all_lanes_right_type'][-1]
63 |         
64 |         batch_size = all_cls_scores.size()[0]
65 |         predictions_list = []
66 |         for i in range(batch_size):
67 |             predictions_list.append(self.decode_single(
68 |                 all_cls_scores[i], all_lanes_preds[i], all_lanes_left_types[i], all_lanes_right_types[i]))
69 |         return predictions_list
70 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/lane/lane_hungarian_assigner.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import torch
  8 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS
  9 | from mmdet.core.bbox.assigners import AssignResult
 10 | from mmdet.core.bbox.assigners import BaseAssigner
 11 | from mmdet.core.bbox.match_costs import build_match_cost
 12 | from mmdet.models.utils.transformer import inverse_sigmoid
 13 | from .util import normalize_3dlane
 14 | 
 15 | try:
 16 |     from scipy.optimize import linear_sum_assignment
 17 | except ImportError:
 18 |     linear_sum_assignment = None
 19 | 
 20 | 
 21 | @BBOX_ASSIGNERS.register_module()
 22 | class LaneSegmentHungarianAssigner3D(BaseAssigner):
 23 | 
 24 |     def __init__(self,
 25 |                  cls_cost=dict(type='ClassificationCost', weight=1.),
 26 |                  reg_cost=dict(type='BBoxL1Cost', weight=1.0),
 27 |                  mask_cost=dict(type='CrossEntropyLossCost', weight=1.0),
 28 |                  dice_cost=dict(type='DiceCost', weight=1.0),
 29 |                  normalize_gt=False,
 30 |                  pc_range=None):
 31 |         self.cls_cost = build_match_cost(cls_cost)
 32 |         self.reg_cost = build_match_cost(reg_cost)
 33 |         self.mask_cost = build_match_cost(mask_cost)
 34 |         self.dice_cost = build_match_cost(dice_cost)
 35 |         self.normalize_gt = normalize_gt
 36 |         self.pc_range = pc_range
 37 | 
 38 |     def assign(self,
 39 |                lanes_pred,
 40 |                mask_pred,
 41 |                cls_pred,
 42 |                gt_lanes,
 43 |                gt_mask,
 44 |                gt_labels):
 45 | 
 46 |         num_gts, num_bboxes = gt_lanes.size(0), lanes_pred.size(0)
 47 |         # 1. assign -1 by default
 48 |         assigned_gt_inds = lanes_pred.new_full((num_bboxes, ),
 49 |                                               -1,
 50 |                                               dtype=torch.long)
 51 |         assigned_labels = lanes_pred.new_full((num_bboxes, ),
 52 |                                              -1,
 53 |                                              dtype=torch.long)
 54 |         if num_gts == 0 or num_bboxes == 0:
 55 |             # No ground truth or boxes, return empty assignment
 56 |             if num_gts == 0:
 57 |                 # No ground truth, assign all to background
 58 |                 assigned_gt_inds[:] = 0
 59 |             return AssignResult(
 60 |                 num_gts, assigned_gt_inds, None, labels=assigned_labels)
 61 | 
 62 |         # 2. compute the weighted costs
 63 |         # classification and bboxcost.
 64 |         cls_cost = self.cls_cost(cls_pred, gt_labels)
 65 | 
 66 |         if self.normalize_gt:
 67 |             normalized_gt_lanes = normalize_3dlane(gt_lanes, self.pc_range)
 68 |         else:
 69 |             normalized_gt_lanes = gt_lanes
 70 | 
 71 |         # regression L1 cost    
 72 |         reg_cost = self.reg_cost(lanes_pred, normalized_gt_lanes)
 73 | 
 74 |         # mask cost
 75 |         mask_cost = self.mask_cost(mask_pred, gt_mask)
 76 | 
 77 |         # dice cost
 78 |         dice_cost = self.dice_cost(mask_pred, gt_mask)
 79 | 
 80 |         # weighted sum of above two costs
 81 |         cost = cls_cost + reg_cost + mask_cost + dice_cost
 82 | 
 83 |         # 3. do Hungarian matching on CPU using linear_sum_assignment
 84 |         cost = cost.detach().cpu()
 85 |         if linear_sum_assignment is None:
 86 |             raise ImportError('Please run "pip install scipy" '
 87 |                               'to install scipy first.')
 88 |         matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
 89 |         matched_row_inds = torch.from_numpy(matched_row_inds).to(
 90 |             lanes_pred.device)
 91 |         matched_col_inds = torch.from_numpy(matched_col_inds).to(
 92 |             lanes_pred.device)
 93 | 
 94 |         # 4. assign backgrounds and foregrounds
 95 |         # assign all indices to backgrounds first
 96 |         assigned_gt_inds[:] = 0
 97 | 
 98 |         # assign foregrounds based on matching results
 99 |         assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
100 |         assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
101 | 
102 |         return AssignResult(
103 |             num_gts, assigned_gt_inds, None, labels=assigned_labels)
104 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/lane/match_cost.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import torch
 8 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
 9 | 
10 | 
11 | @MATCH_COST.register_module()
12 | class LaneL1Cost(object):
13 | 
14 |     def __init__(self, weight=1.):
15 |         self.weight = weight
16 | 
17 |     def __call__(self, lane_pred, gt_lanes):
18 |         lane_cost = torch.cdist(lane_pred, gt_lanes, p=1)
19 |         return lane_cost * self.weight
20 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/lane/util.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | from shapely.geometry import LineString
10 | 
11 | def normalize_3dlane(lanes, pc_range):
12 |     normalized_lanes = lanes.clone()
13 |     normalized_lanes[..., 0::3] = (lanes[..., 0::3] - pc_range[0]) / (pc_range[3] - pc_range[0])
14 |     normalized_lanes[..., 1::3] = (lanes[..., 1::3] - pc_range[1]) / (pc_range[4] - pc_range[1])
15 |     normalized_lanes[..., 2::3] = (lanes[..., 2::3] - pc_range[2]) / (pc_range[5] - pc_range[2])
16 |     normalized_lanes = torch.clamp(normalized_lanes, 0, 1)
17 | 
18 |     return normalized_lanes
19 | 
20 | def denormalize_3dlane(normalized_lanes, pc_range):
21 |     lanes = normalized_lanes.clone()
22 |     lanes[..., 0::3] = (normalized_lanes[..., 0::3] * (pc_range[3] - pc_range[0]) + pc_range[0])
23 |     lanes[..., 1::3] = (normalized_lanes[..., 1::3] * (pc_range[4] - pc_range[1]) + pc_range[1])
24 |     lanes[..., 2::3] = (normalized_lanes[..., 2::3] * (pc_range[5] - pc_range[2]) + pc_range[2])
25 |     return lanes
26 | 
27 | def fix_pts_interpolate(lane, n_points):
28 |     ls = LineString(lane)
29 |     distances = np.linspace(0, ls.length, n_points)
30 |     lane = np.array([ls.interpolate(distance).coords[0] for distance in distances])
31 |     return lane
32 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/visualizer/__init__.py:
--------------------------------------------------------------------------------
1 | from .lane_segment import draw_annotation_bev
2 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/core/visualizer/lane_segment.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import numpy as np
 8 | import matplotlib.pyplot as plt
 9 | 
10 | COLOR_DICT = {  # RGB [0, 1]
11 |     'centerline': np.array([243, 90, 2]) / 255,
12 |     'laneline': np.array([0, 32, 127]) / 255,
13 |     'ped_crossing': np.array([255, 192, 0]) / 255,
14 |     'road_boundary': np.array([220, 30, 0]) / 255,
15 | }
16 | LINE_PARAM = {
17 |     0: {'color': COLOR_DICT['laneline'], 'alpha': 0.3, 'linestyle': ':'},       # none
18 |     1: {'color': COLOR_DICT['laneline'], 'alpha': 0.75, 'linestyle': 'solid'},  # solid
19 |     2: {'color': COLOR_DICT['laneline'], 'alpha': 0.75, 'linestyle': '--'},     # dashed
20 |     'ped_crossing': {'color': COLOR_DICT['ped_crossing'], 'alpha': 1, 'linestyle': 'solid'},
21 |     'road_boundary': {'color': COLOR_DICT['road_boundary'], 'alpha': 1, 'linestyle': 'solid'}
22 | }
23 | BEV_RANGE = [-50, 50, -25, 25]
24 | 
25 | def _draw_centerline(ax, lane_centerline):
26 |     points = np.asarray(lane_centerline['points'])
27 |     color = COLOR_DICT['centerline']
28 |     # draw line
29 |     ax.plot(points[:, 1], points[:, 0], color=color, alpha=1.0, linewidth=0.6)
30 |     # draw start and end vertex
31 |     ax.scatter(points[[0, -1], 1], points[[0, -1], 0], color=color, s=1)
32 |     # draw arrow
33 |     ax.annotate('', xy=(points[-1, 1], points[-1, 0]),
34 |                 xytext=(points[-2, 1], points[-2, 0]),
35 |                 arrowprops=dict(arrowstyle='->', lw=0.6, color=color))
36 | 
37 | def _draw_line(ax, line):
38 |     points = np.asarray(line['points'])
39 |     config = LINE_PARAM[line['linetype']]
40 |     ax.plot(points[:, 1], points[:, 0], linewidth=0.6, **config)
41 | 
42 | def _draw_lane_segment(ax, lane_segment, with_centerline, with_laneline):
43 |     if with_centerline:
44 |         _draw_centerline(ax, {'points': lane_segment['centerline']})
45 |     if with_laneline:
46 |         _draw_line(ax, {'points': lane_segment['left_laneline'], 'linetype': lane_segment['left_laneline_type']})
47 |         _draw_line(ax, {'points': lane_segment['right_laneline'], 'linetype': lane_segment['right_laneline_type']})
48 | 
49 | def _draw_area(ax, area):
50 |     if area['category'] == 1:  # ped crossing with lane segment style.
51 |         _draw_line(ax, {'points': area['points'], 'linetype': 'ped_crossing'})
52 |     elif area['category'] == 2:  # road boundary
53 |         _draw_line(ax, {'points': area['points'], 'linetype': 'road_boundary'})
54 | 
55 | def draw_annotation_bev(annotation, with_centerline=True, with_laneline=True, with_area=True):
56 | 
57 |     fig, ax = plt.figure(figsize=(2, 4), dpi=200), plt.gca()
58 |     ax.set_aspect('equal')
59 |     ax.set_ylim([BEV_RANGE[0], BEV_RANGE[1]])
60 |     ax.set_xlim([BEV_RANGE[2], BEV_RANGE[3]])
61 |     ax.invert_xaxis()
62 |     ax.grid(False)
63 |     ax.axis('off')
64 |     ax.set_facecolor('white')
65 |     fig.tight_layout(pad=0.2)
66 | 
67 |     for lane_segment in annotation['lane_segment']:
68 |         _draw_lane_segment(ax, lane_segment, with_centerline, with_laneline)
69 |     if with_area:
70 |         for area in annotation['area']:
71 |             _draw_area(ax, area)
72 | 
73 |     fig.canvas.draw()
74 |     data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
75 |     data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
76 |     plt.close(fig)
77 |     return data
78 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipelines import *
2 | from .openlanev2_subset_A_lanesegnet_dataset import OpenLaneV2_subset_A_LaneSegNet_Dataset
3 | from .openlanev2_subset_A_mapele_bucket_dataset import OpenLaneV2_subset_A_MapElementBucket_Dataset
4 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/datasets/openlanev2_evaluate_custom.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | from openlanev2.lanesegment.io import io
 10 | from openlanev2.lanesegment.evaluation.distance import (pairwise, area_distance,
 11 |                                                         lane_segment_distance, lane_segment_distance_c)
 12 | from openlanev2.lanesegment.evaluation.evaluate import (THRESHOLDS_LANESEG, THRESHOLDS_AREA,
 13 |                                                         _mAP_over_threshold, _mAP_topology_lsls)
 14 | 
 15 | 
 16 | def lanesegnet_evaluate(ground_truth, predictions, verbose=True):
 17 | 
 18 |     if isinstance(ground_truth, str):
 19 |         ground_truth = io.pickle_load(ground_truth)
 20 | 
 21 |     if predictions is None:
 22 |         preds = {}
 23 |         print('\nDummy evaluation on ground truth.\n')
 24 |     else:
 25 |         if isinstance(predictions, str):
 26 |             predictions = io.pickle_load(predictions)
 27 |         predictions = predictions['results']
 28 | 
 29 |     gts = {}
 30 |     preds = {}
 31 |     for token in ground_truth.keys():
 32 |         gts[token] = ground_truth[token]['annotation']
 33 |         if predictions is None:
 34 |             preds[token] = gts[token]
 35 |             for i, _ in enumerate(preds[token]['lane_segment']):
 36 |                 preds[token]['lane_segment'][i]['confidence'] = np.float32(1)
 37 |             for i, _ in enumerate(preds[token]['area']):
 38 |                 preds[token]['area'][i]['confidence'] = np.float32(1)
 39 |             for i, _ in enumerate(preds[token]['traffic_element']):
 40 |                 preds[token]['traffic_element'][i]['confidence'] = np.float32(1)
 41 |         else:
 42 |             preds[token] = predictions[token]['predictions']
 43 | 
 44 |     assert set(gts.keys()) == set(preds.keys()), '#frame differs'
 45 | 
 46 |     """
 47 |         calculate distances between gts and preds    
 48 |     """
 49 | 
 50 |     distance_matrices = {
 51 |         'laneseg': {},
 52 |         'area': {},
 53 |     }
 54 | 
 55 |     for token in tqdm(gts.keys(), desc='calculating distances:', ncols=80, disable=not verbose):
 56 | 
 57 |         mask = pairwise(
 58 |             [gt for gt in gts[token]['lane_segment']],
 59 |             [pred for pred in preds[token]['lane_segment']],
 60 |             lane_segment_distance_c,
 61 |             relax=True,
 62 |         ) < THRESHOLDS_LANESEG[-1]
 63 | 
 64 |         distance_matrices['laneseg'][token] = pairwise(
 65 |             [gt for gt in gts[token]['lane_segment']],
 66 |             [pred for pred in preds[token]['lane_segment']],
 67 |             lane_segment_distance,
 68 |             mask=mask,
 69 |             relax=True,
 70 |         )
 71 | 
 72 |         distance_matrices['area'][token] = pairwise(
 73 |             [gt for gt in gts[token]['area']],
 74 |             [pred for pred in preds[token]['area']],
 75 |             area_distance,
 76 |         )
 77 | 
 78 |     """
 79 |         evaluate
 80 |     """
 81 | 
 82 |     metrics = {
 83 |         'mAP': 0
 84 |     }
 85 | 
 86 |     metrics['AP_ls'] = _mAP_over_threshold(
 87 |         gts=gts, 
 88 |         preds=preds, 
 89 |         distance_matrices=distance_matrices['laneseg'], 
 90 |         distance_thresholds=THRESHOLDS_LANESEG,
 91 |         object_type='lane_segment',
 92 |         filter=lambda _: True,
 93 |         inject=True, # save tp for eval on graph
 94 |     ).mean()
 95 | 
 96 |     metrics['AP_ped'] = _mAP_over_threshold(
 97 |         gts=gts, 
 98 |         preds=preds, 
 99 |         distance_matrices=distance_matrices['area'], 
100 |         distance_thresholds=THRESHOLDS_AREA, 
101 |         object_type='area',
102 |         filter=lambda x: x['category'] == 1,
103 |         inject=False,
104 |     ).mean()
105 | 
106 |     metrics['TOP_lsls'] = _mAP_topology_lsls(gts, preds, THRESHOLDS_LANESEG)
107 | 
108 |     metrics['mAP'] = (metrics['AP_ls'] + metrics['AP_ped']) / 2
109 | 
110 |     return metrics
111 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/datasets/openlanev2_subset_A_mapele_bucket_dataset.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import os
  8 | import random
  9 | import copy
 10 | import tqdm
 11 | 
 12 | import numpy as np
 13 | import torch
 14 | import mmcv
 15 | import cv2
 16 | 
 17 | import shapely
 18 | from shapely.geometry import LineString
 19 | from mmdet.datasets import DATASETS
 20 | 
 21 | from openlanev2.lanesegment.evaluation import evaluate as openlanev2_evaluate
 22 | from .openlanev2_subset_A_lanesegnet_dataset import OpenLaneV2_subset_A_LaneSegNet_Dataset
 23 | from ..core.lane.util import fix_pts_interpolate
 24 | from ..core.visualizer.lane_segment import draw_annotation_bev
 25 | 
 26 | 
 27 | @DATASETS.register_module()
 28 | class OpenLaneV2_subset_A_MapElementBucket_Dataset(OpenLaneV2_subset_A_LaneSegNet_Dataset):
 29 | 
 30 |     def get_ann_info(self, index):
 31 |         """Get annotation info according to the given index.
 32 | 
 33 |         Args:
 34 |             index (int): Index of the annotation data to get.
 35 | 
 36 |         Returns:
 37 |             dict: annotation information
 38 |         """
 39 |         info = self.data_infos[index]
 40 |         ann_info = info['annotation']
 41 | 
 42 |         gt_lanes = []
 43 |         gt_lane_labels_3d = []
 44 |         gt_lane_left_type = []
 45 |         gt_lane_right_type = []
 46 | 
 47 |         for idx, lane in enumerate(ann_info['lane_segment']):
 48 |             centerline = lane['centerline']
 49 |             LineString_lane = LineString(centerline)
 50 |             left_boundary = lane['left_laneline']
 51 |             LineString_left_boundary = LineString(left_boundary)
 52 |             right_boundary = lane['right_laneline']
 53 |             LineString_right_boundary = LineString(right_boundary)
 54 |             gt_lanes.append([LineString_lane, LineString_left_boundary, LineString_right_boundary])
 55 |             gt_lane_labels_3d.append(0)
 56 |             gt_lane_left_type.append(lane['left_laneline_type'])
 57 |             gt_lane_right_type.append(lane['right_laneline_type'])
 58 | 
 59 |         topology_lsls = np.array(ann_info['topology_lsls'], dtype=np.float32)
 60 | 
 61 |         te_bboxes = np.array([np.array(sign['points'], dtype=np.float32).flatten() for sign in ann_info['traffic_element']])
 62 |         te_labels = np.array([sign['attribute'] for sign in ann_info['traffic_element']], dtype=np.int64)
 63 |         if len(te_bboxes) == 0:
 64 |             te_bboxes = np.zeros((0, 4), dtype=np.float32)
 65 |             te_labels = np.zeros((0, ), dtype=np.int64)
 66 | 
 67 |         topology_lste = np.array(ann_info['topology_lste'], dtype=np.float32)
 68 | 
 69 |         gt_areas_3d = []
 70 |         gt_area_labels_3d = []
 71 |         for area in ann_info['area']:
 72 |             gt_areas_3d.append(fix_pts_interpolate(area['points'], 20))
 73 |             gt_area_labels_3d.append(area['category'] - 1)
 74 | 
 75 |         annos = dict(
 76 |             gt_lanes_3d = gt_lanes,
 77 |             gt_lane_labels_3d = gt_lane_labels_3d,
 78 |             gt_lane_adj = topology_lsls,
 79 |             bboxes = te_bboxes,
 80 |             labels = te_labels,
 81 |             gt_lane_lste_adj = topology_lste,
 82 |             gt_lane_left_type = gt_lane_left_type,
 83 |             gt_lane_right_type = gt_lane_right_type,
 84 |             gt_areas_3d = gt_areas_3d,
 85 |             gt_area_labels_3d = gt_area_labels_3d
 86 |         )
 87 |         return annos
 88 | 
 89 |     def format_openlanev2_gt(self):
 90 |         gt_dict = {}
 91 |         for idx in range(len(self.data_infos)):
 92 |             info = copy.deepcopy(self.data_infos[idx])
 93 |             key = (self.split, info['segment_id'], str(info['timestamp']))
 94 |             areas = []
 95 |             for area in info['annotation']['area']:
 96 |                     points = area['points']
 97 |                     if len(points) != 20:
 98 |                         points = fix_pts_interpolate(points, 20)
 99 |                     area['points'] = points
100 |                     areas.append(area)
101 |             info['annotation']['area'] = areas
102 |             gt_dict[key] = info
103 |         return gt_dict
104 | 
105 |     def format_results(self, results, out_dir=None, logger=None, **kwargs):
106 |         if out_dir is not None:
107 |             logger.info(f'Starting format results...')
108 |             data_type = np.float16
109 |         else:
110 |             data_type = np.float32
111 | 
112 |         pred_dict = {}
113 |         pred_dict['method'] = 'LaneSegNet'
114 |         pred_dict['team'] = 'dummy'
115 |         pred_dict['authors'] = []
116 |         pred_dict['e-mail'] = 'dummy'
117 |         pred_dict['institution / company'] = 'OpenDriveLab'
118 |         pred_dict['country / region'] = 'CN'
119 |         pred_dict['results'] = {}
120 |         for idx, result in enumerate(tqdm.tqdm(results, ncols=80, desc='Formatting results')):
121 |             info = self.data_infos[idx]
122 |             key = (self.split, info['segment_id'], str(info['timestamp']))
123 | 
124 |             pred_info = dict(
125 |                 lane_segment = [],
126 |                 area = [],
127 |                 traffic_element = [],
128 |                 topology_lsls = None,
129 |                 topology_lste = None
130 |             )
131 | 
132 |             if result['lane_results'] is not None:
133 |                 lane_results = result['lane_results']
134 |                 scores = lane_results[1]
135 |                 valid_indices = np.argsort(-scores)
136 |                 lanes = lane_results[0][valid_indices]
137 |                 labels = lane_results[2][valid_indices]
138 |                 scores = scores[valid_indices]
139 |                 lanes = lanes.reshape(-1, lanes.shape[-1] // 3, 3)
140 | 
141 |                 # left_type_scores = lane_results[3][valid_indices]
142 |                 # right_type_scores = lane_results[5][valid_indices]
143 |                 left_type_labels = lane_results[4][valid_indices]
144 |                 right_type_labels = lane_results[6][valid_indices]
145 | 
146 |                 for pred_idx, (lane, score, label) in enumerate(zip(lanes, scores, labels)):
147 |                     pred_lane_segment = {}
148 |                     pred_lane_segment['id'] = 20000 + pred_idx
149 |                     pred_lane_segment['centerline'] = fix_pts_interpolate(lane[:self.points_num], 10).astype(data_type)
150 |                     pred_lane_segment['left_laneline'] = fix_pts_interpolate(lane[self.points_num:self.points_num * 2], 10).astype(data_type)
151 |                     pred_lane_segment['right_laneline'] = fix_pts_interpolate(lane[self.points_num * 2:], 10).astype(data_type)
152 |                     pred_lane_segment['left_laneline_type'] = left_type_labels[pred_idx]
153 |                     pred_lane_segment['right_laneline_type'] = right_type_labels[pred_idx]
154 |                     pred_lane_segment['confidence'] = score.item()
155 |                     pred_info['lane_segment'].append(pred_lane_segment)
156 | 
157 |             if result['area_results'] is not None:
158 |                 area_results = result['area_results']
159 |                 scores = area_results[1]
160 |                 area_valid_indices = np.argsort(-scores)
161 |                 areas = area_results[0][area_valid_indices]
162 |                 labels = area_results[2][area_valid_indices]
163 |                 scores = scores[area_valid_indices]
164 |                 for pred_idx, (area, score, label) in enumerate(zip(areas, scores, labels)):
165 |                     pred_area = {}
166 |                     pred_area['id'] = 30000 + pred_idx
167 |                     pred_area['points'] = fix_pts_interpolate(area, 20).astype(data_type)
168 |                     pred_area['category'] = label + 1
169 |                     pred_area['confidence'] = score.item()
170 |                     pred_info['area'].append(pred_area)
171 | 
172 |             if result['bbox_results'] is not None:
173 |                 te_results = result['bbox_results']
174 |                 scores = te_results[1]
175 |                 te_valid_indices = np.argsort(-scores)
176 |                 tes = te_results[0][te_valid_indices]
177 |                 scores = scores[te_valid_indices]
178 |                 class_idxs = te_results[2][te_valid_indices]
179 |                 for pred_idx, (te, score, class_idx) in enumerate(zip(tes, scores, class_idxs)):
180 |                     te_info = dict(
181 |                         id = 10000 + pred_idx,
182 |                         category = 1 if class_idx < 4 else 2,
183 |                         attribute = class_idx,
184 |                         points = te.reshape(2, 2).astype(data_type),
185 |                         confidence = score
186 |                     )
187 |                     pred_info['traffic_element'].append(te_info)
188 | 
189 |             if result['lsls_results'] is not None:
190 |                 pred_info['topology_lsls'] = result['lsls_results'].astype(np.float32)[valid_indices][:, valid_indices]
191 |             else:
192 |                 pred_info['topology_lsls'] = np.zeros((len(pred_info['lane_segment']), len(pred_info['lane_segment'])), dtype=np.float32)
193 | 
194 |             if result['lste_results'] is not None:
195 |                 topology_lste = result['lste_results'].astype(np.float32)[valid_indices]
196 |                 pred_info['topology_lste'] = topology_lste
197 |             else:
198 |                 pred_info['topology_lste'] = np.zeros((len(pred_info['lane_segment']), len(pred_info['traffic_element'])), dtype=np.float32)
199 | 
200 |             pred_dict['results'][key] = dict(predictions=pred_info)
201 | 
202 |         if out_dir is not None:
203 |             logger.info(f'Saving results to {out_dir}...')
204 |             mmcv.dump(pred_dict, os.path.join(out_dir, 'submission.pkl'))
205 | 
206 |         return pred_dict
207 | 
208 |     def evaluate(self, results, logger=None, show=False, out_dir=None, **kwargs):
209 |         """Evaluation in Openlane-V2 subset_A dataset.
210 | 
211 |         Args:
212 |             results (list): Testing results of the dataset.
213 |             logger (logging.Logger | str | None): Logger used for printing
214 |                 related information during evaluation. Default: None.
215 |             show (bool): Whether to visualize the results.
216 |             out_dir (str): Path of directory to save the results.
217 | 
218 |         Returns:
219 |             dict: Evaluation results for evaluation metric.
220 |         """
221 |         if show:
222 |             assert out_dir, 'Expect out_dir when show is set.'
223 |             logger.info(f'Visualizing results at {out_dir}...')
224 |             self.show(results, out_dir)
225 |             logger.info(f'Visualize done.')
226 | 
227 |         logger.info(f'Starting format results...')
228 |         gt_dict = self.format_openlanev2_gt()
229 |         pred_dict = self.format_results(results, logger=logger)
230 | 
231 |         logger.info(f'Starting openlanev2 evaluate...')
232 |         metric_results = openlanev2_evaluate(gt_dict, pred_dict)['OpenLane-V2 UniScore']
233 |         return metric_results
234 | 
235 | 
236 |     def show(self, results, out_dir, score_thr=0.3, show_num=20, **kwargs):
237 |         """Show the results.
238 | 
239 |         Args:
240 |             results (list[dict]): Testing results of the dataset.
241 |             out_dir (str): Path of directory to save the results.
242 |             score_thr (float): The threshold of score.
243 |             show_num (int): The number of images to be shown.
244 |         """
245 |         for idx, result in enumerate(results):
246 |             if idx % 5 != 0:
247 |                 continue
248 |             if idx // 5 > show_num:
249 |                 break
250 | 
251 |             info = self.data_infos[idx]
252 | 
253 |             pred_result = self.format_results([result])
254 |             pred_result = list(pred_result['results'].values())[0]['predictions']
255 |             pred_result = self._filter_by_confidence(pred_result, score_thr)
256 | 
257 |             pv_imgs = []
258 |             for cam_name, cam_info in info['sensor'].items():
259 |                 image_path = os.path.join(self.data_root, cam_info['image_path'])
260 |                 image_pv = mmcv.imread(image_path)
261 |                 pv_imgs.append(image_pv)
262 | 
263 |             surround_img = self._render_surround_img(pv_imgs)
264 |             output_path = os.path.join(out_dir, f'{info["segment_id"]}/{info["timestamp"]}/surround.jpg')
265 |             mmcv.imwrite(surround_img, output_path)
266 | 
267 |             conn_img_gt = draw_annotation_bev(info['annotation'])
268 |             conn_img_pred = draw_annotation_bev(pred_result)
269 |             divider = np.ones((conn_img_gt.shape[0], 7, 3), dtype=np.uint8) * 128
270 |             conn_img = np.concatenate([conn_img_gt, divider, conn_img_pred], axis=1)[..., ::-1]
271 | 
272 |             output_path = os.path.join(out_dir, f'{info["segment_id"]}/{info["timestamp"]}/bev.jpg')
273 |             mmcv.imwrite(conn_img, output_path)
274 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .transform_3d import (
 2 |     PadMultiViewImage, NormalizeMultiviewImage,
 3 |     PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage,
 4 |     GridMaskMultiViewImage, CropFrontViewImageForAv2)
 5 | from .transform_3d_lane import LaneSegmentParameterize3D, GenerateLaneSegmentMask
 6 | from .formating import CustomFormatBundle3DLane
 7 | from .loading import CustomLoadMultiViewImageFromFiles, LoadAnnotations3DLaneSegment
 8 | 
 9 | __all__ = [
10 |     'PadMultiViewImage', 'NormalizeMultiviewImage', 
11 |     'PhotoMetricDistortionMultiViewImage', 'CustomCollect3D', 'RandomScaleImageMultiViewImage',
12 |     'GridMaskMultiViewImage', 'CropFrontViewImageForAv2',
13 |     'LaneSegmentParameterize3D', 'GenerateLaneSegmentMask',
14 |     'CustomFormatBundle3DLane',
15 |     'CustomLoadMultiViewImageFromFiles', 'LoadAnnotations3DLaneSegment'
16 | ]
17 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import numpy as np
 8 | from mmcv.parallel import DataContainer as DC
 9 | 
10 | from mmdet.datasets.builder import PIPELINES
11 | from mmdet.datasets.pipelines import to_tensor
12 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D
13 | 
14 | 
15 | @PIPELINES.register_module()
16 | class CustomFormatBundle3DLane(DefaultFormatBundle3D):
17 |     """Custom formatting bundle for 3D Lane.
18 |     """
19 | 
20 |     def __init__(self, class_names, **kwargs):
21 |         super(CustomFormatBundle3DLane, self).__init__(class_names, **kwargs)
22 | 
23 |     def __call__(self, results):
24 |         """Call function to transform and format common fields in results.
25 | 
26 |         Args:
27 |             results (dict): Result dict contains the data to convert.
28 | 
29 |         Returns:
30 |             dict: The result dict contains the data that is formatted with
31 |                 default bundle.
32 |         """
33 |         if 'gt_lanes_3d' in results:
34 |             results['gt_lanes_3d'] = DC(
35 |                 to_tensor(results['gt_lanes_3d']))
36 |         if 'gt_lane_labels_3d' in results:
37 |             results['gt_lane_labels_3d'] = DC(
38 |                 to_tensor(results['gt_lane_labels_3d']))
39 |         if 'gt_lane_adj' in results:
40 |             results['gt_lane_adj'] = DC(
41 |                 to_tensor(results['gt_lane_adj']))
42 |         if 'gt_lane_lste_adj' in results:
43 |             results['gt_lane_lste_adj'] = DC(
44 |                 to_tensor(results['gt_lane_lste_adj']))
45 |         if 'gt_lane_left_type' in results:
46 |             results['gt_lane_left_type'] = DC(
47 |                 to_tensor(results['gt_lane_left_type']))  
48 |         if 'gt_lane_right_type' in results:
49 |             results['gt_lane_right_type'] = DC(
50 |                 to_tensor(results['gt_lane_right_type']))  
51 |         if 'gt_instance_masks' in results:
52 |             results['gt_instance_masks'] = DC(
53 |                 to_tensor(results['gt_instance_masks']))
54 |         if 'gt_areas_3d' in results:
55 |             results['gt_areas_3d'] = DC(
56 |                 to_tensor(results['gt_areas_3d']))
57 |         if 'gt_area_labels_3d' in results:
58 |             results['gt_area_labels_3d'] = DC(
59 |                 to_tensor(results['gt_area_labels_3d']))
60 | 
61 |         results = super(CustomFormatBundle3DLane, self).__call__(results)
62 |         return results
63 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import numpy as np
  8 | import mmcv
  9 | from mmdet.datasets.builder import PIPELINES
 10 | from mmdet3d.datasets.pipelines import LoadAnnotations3D
 11 | 
 12 | 
 13 | @PIPELINES.register_module()
 14 | class CustomLoadMultiViewImageFromFiles(object):
 15 | 
 16 |     def __init__(self, to_float32=False, color_type='unchanged'):
 17 |         self.to_float32 = to_float32
 18 |         self.color_type = color_type
 19 | 
 20 |     def __call__(self, results):
 21 |         filename = results['img_filename']
 22 |         # img is of shape (h, w, c, num_views)
 23 |         img = [mmcv.imread(name, self.color_type) for name in filename]
 24 |         if self.to_float32:
 25 |             img = [_.astype(np.float32) for _ in img]
 26 |         results['filename'] = filename
 27 |         results['img'] = img
 28 |         results['img_shape'] = [img_.shape for img_ in img]
 29 |         results['ori_shape'] = [img_.shape for img_ in img]
 30 |         # Set initial values for default meta_keys
 31 |         results['pad_shape'] = [img_.shape for img_ in img]
 32 |         results['crop_shape'] = [np.zeros(2) for img_ in img]
 33 |         results['scale_factor'] = [1.0 for img_ in img]
 34 |         num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2]
 35 |         results['img_norm_cfg'] = dict(
 36 |             mean=np.zeros(num_channels, dtype=np.float32),
 37 |             std=np.ones(num_channels, dtype=np.float32),
 38 |             to_rgb=False)
 39 |         return results
 40 | 
 41 |     def __repr__(self):
 42 |         """str: Return a string that describes the module."""
 43 |         repr_str = self.__class__.__name__
 44 |         repr_str += f'(to_float32={self.to_float32}, '
 45 |         repr_str += f"color_type='{self.color_type}')"
 46 |         return repr_str
 47 | 
 48 | 
 49 | @PIPELINES.register_module()
 50 | class LoadAnnotations3DLaneSegment(LoadAnnotations3D):
 51 | 
 52 |     def __init__(self,
 53 |                  with_lane_3d=True,
 54 |                  with_lane_label_3d=True,
 55 |                  with_lane_adj=True,
 56 |                  with_lane_lste_adj=False,
 57 |                  with_lane_type=False,
 58 |                  with_bbox_3d=False,
 59 |                  with_label_3d=False,
 60 |                  with_area=False,
 61 |                  **kwargs):
 62 |         super().__init__(with_bbox_3d, with_label_3d, **kwargs)
 63 |         self.with_lane_3d = with_lane_3d
 64 |         self.with_lane_label_3d = with_lane_label_3d
 65 |         self.with_lane_type = with_lane_type
 66 |         self.with_lane_adj = with_lane_adj
 67 |         self.with_lane_lste_adj = with_lane_lste_adj
 68 |         self.with_area = with_area
 69 | 
 70 |     def _load_lanes_3d(self, results):
 71 |         results['gt_lanes_3d'] = results['ann_info']['gt_lanes_3d']
 72 |         if self.with_lane_label_3d:
 73 |             results['gt_lane_labels_3d'] = results['ann_info']['gt_lane_labels_3d']
 74 |         if self.with_lane_adj:
 75 |             results['gt_lane_adj'] = results['ann_info']['gt_lane_adj']
 76 |         if self.with_lane_lste_adj:
 77 |             results['gt_lane_lste_adj'] = results['ann_info']['gt_lane_lste_adj']
 78 |         if self.with_lane_type:
 79 |             results['gt_lane_left_type'] = results['ann_info']['gt_lane_left_type']
 80 |             results['gt_lane_right_type'] = results['ann_info']['gt_lane_right_type']
 81 |         if self.with_area:
 82 |             results['gt_areas_3d'] = results['ann_info']['gt_areas_3d']
 83 |             results['gt_area_labels_3d'] = results['ann_info']['gt_area_labels_3d']
 84 |         return results
 85 | 
 86 |     def __call__(self, results):
 87 |         results = super().__call__(results)
 88 |         if self.with_lane_3d:
 89 |             results = self._load_lanes_3d(results)
 90 |         return results
 91 | 
 92 |     def __repr__(self):
 93 |         """str: Return a string that describes the module."""
 94 |         indent_str = '    '
 95 |         repr_str = super().__repr__()
 96 |         repr_str += f'{indent_str}with_lane_3d={self.with_lane_3d}, '
 97 |         repr_str += f'{indent_str}with_lane_lable_3d={self.with_lane_label_3d}, '
 98 |         repr_str += f'{indent_str}with_lane_adj={self.with_lane_adj}, '
 99 |         repr_str += f'{indent_str}with_lane_lste_adj={self.with_lane_lste_adj}, '
100 |         repr_str += f'{indent_str}with_lane_type={self.with_lane_type}, '
101 |         repr_str += f'{indent_str}with_area={self.with_area}, '
102 |         return repr_str
103 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/datasets/pipelines/transform_3d_lane.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import numpy as np
 8 | import cv2
 9 | from mmdet.datasets.builder import PIPELINES
10 | from shapely.geometry import LineString
11 | 
12 | 
13 | @PIPELINES.register_module()
14 | class LaneSegmentParameterize3D(object):
15 | 
16 |     def __init__(self, method, method_para):
17 |         method_list = ['fix_pts_interp']
18 |         self.method = method
19 |         if not self.method in method_list:
20 |             raise Exception("Not implemented!")
21 |         self.method_para = method_para
22 | 
23 |     def __call__(self, results):
24 |         lanes = results['gt_lanes_3d']
25 |         para_lanes = getattr(self, self.method)(lanes, **self.method_para)
26 |         results['gt_lanes_3d'] = para_lanes
27 |         return results
28 | 
29 |     def fix_pts_interp(self, input_data, n_points=11):
30 | 
31 |         lane_list = []
32 |         for lane in input_data:
33 |             ls = lane[1]
34 |             distances = np.linspace(0, ls.length, n_points)
35 |             left_line = np.array([ls.interpolate(distance).coords[0] for distance in distances])
36 | 
37 |             ls = lane[2]
38 |             distances = np.linspace(0, ls.length, n_points)
39 |             right_line = np.array([ls.interpolate(distance).coords[0] for distance in distances])
40 | 
41 |             centerline = (left_line + right_line) / 2.0
42 | 
43 |             line = np.concatenate([centerline.flatten(), left_line.flatten(), right_line.flatten()])
44 |             lane_list.append(line)
45 | 
46 |         return np.array(lane_list, dtype=np.float32)
47 | 
48 | @PIPELINES.register_module()
49 | class GenerateLaneSegmentMask(object):
50 |     """Generate mask ground truth for segmentation head
51 |     Args:
52 |         results (dict): Result dict from loading pipeline.
53 |     Returns:
54 |         dict: Instance mask gt is added into result dict.
55 |     """
56 |     def __init__(self, points_num, map_size=[-50, -25, 50, 25], bev_h=100, bev_w=200) -> None:
57 |         self.points_num = points_num
58 |         self.map_size = map_size  # [min_x, min_y, max_x, max_y]
59 |         self.bev_h = bev_h
60 |         self.bev_w = bev_w
61 | 
62 |     def __call__(self,results):
63 |         results = self._generate_lanesegment_instance_mask(results)
64 |         return results
65 | 
66 |     def _generate_lanesegment_instance_mask(self, results):
67 |         gt_lanes = np.array(results['gt_lanes_3d']).reshape(-1, 3, self.points_num, 3)
68 |         gt_left_lines = gt_lanes[:, 1]
69 |         gt_right_lines = gt_lanes[:, 2]
70 | 
71 |         origin = np.array([self.bev_w // 2, self.bev_h // 2])
72 |         scale = np.array([self.bev_w / (self.map_size[2] - self.map_size[0]), self.bev_h / (self.map_size[3] - self.map_size[1])])
73 | 
74 |         inst_masks = []
75 |         for idx, (left_line, right_line) in enumerate(zip(gt_left_lines, gt_right_lines)):
76 | 
77 |             segment_boundary = np.concatenate((left_line, right_line[::-1], left_line[0:1]), axis=0)
78 |             mask = np.zeros((self.bev_h, self.bev_w), dtype=np.uint8)
79 | 
80 |             draw_coor = (segment_boundary[:, :2] * scale + origin).astype(np.int32)
81 |             mask = cv2.fillPoly(mask, [draw_coor], 255)
82 |             bitMask = (mask / 255)
83 |             inst_masks.append(bitMask)
84 | 
85 |         results['gt_instance_masks'] = inst_masks
86 | 
87 |         return results
88 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .detectors import *
2 | from .dense_heads import *
3 | from .modules import *
4 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .deformable_detr_head import CustomDeformableDETRHead
2 | from .relationship_head import RelationshipHead
3 | from .laneseg_head import LaneSegHead
4 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/dense_heads/relationship_head.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | 
 12 | from mmcv.utils import TORCH_VERSION, digit_version
 13 | from mmdet.models.builder import HEADS, build_loss
 14 | from mmcv.runner import BaseModule
 15 | 
 16 | class MLP(nn.Module):
 17 | 
 18 |     def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
 19 |         super().__init__()
 20 |         self.num_layers = num_layers
 21 |         h = [hidden_dim] * (num_layers - 1)
 22 |         self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
 23 | 
 24 |     def forward(self, x):
 25 |         for i, layer in enumerate(self.layers):
 26 |             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
 27 |         return x
 28 | 
 29 | @HEADS.register_module()
 30 | class RelationshipHead(BaseModule):
 31 | 
 32 |     def __init__(self,
 33 |                  in_channels_o1,
 34 |                  in_channels_o2=None,
 35 |                  shared_param=True,
 36 |                  loss_rel=dict(
 37 |                     type='FocalLoss',
 38 |                     use_sigmoid=True,
 39 |                     gamma=2.0,
 40 |                     alpha=0.25),
 41 |                  init_cfg=None):
 42 |         super().__init__()
 43 | 
 44 |         self.MLP_o1 = MLP(in_channels_o1, in_channels_o1, 128, 3)
 45 |         self.shared_param = shared_param
 46 |         if shared_param:
 47 |             self.MLP_o2 = self.MLP_o1
 48 |         else:
 49 |             self.MLP_o2 = MLP(in_channels_o2, in_channels_o2, 128, 3)
 50 |         self.classifier = MLP(256, 256, 1, 3)
 51 |         self.loss_rel = build_loss(loss_rel)
 52 | 
 53 |     def forward_train(self, o1_feats, o1_assign_results, o2_feats, o2_assign_results, gt_adj):
 54 |         rel_pred = self.forward(o1_feats, o2_feats)
 55 |         losses = self.loss(rel_pred, gt_adj, o1_assign_results, o2_assign_results)
 56 |         return losses
 57 | 
 58 |     def get_relationship(self, o1_feats, o2_feats):
 59 |         rel_pred = self.forward(o1_feats, o2_feats)
 60 |         rel_results = rel_pred.squeeze(-1).sigmoid()
 61 |         rel_results = [_ for _ in rel_results]
 62 |         return rel_results
 63 | 
 64 |     def forward(self, o1_feats, o2_feats):
 65 |         # feats: D, B, num_query, num_embedding
 66 |         o1_embeds = self.MLP_o1(o1_feats[-1])
 67 |         o2_embeds = self.MLP_o2(o2_feats[-1])
 68 | 
 69 |         num_query_o1 = o1_embeds.size(1)
 70 |         num_query_o2 = o2_embeds.size(1)
 71 |         o1_tensor = o1_embeds.unsqueeze(2).repeat(1, 1, num_query_o2, 1)
 72 |         o2_tensor = o2_embeds.unsqueeze(1).repeat(1, num_query_o1, 1, 1)
 73 | 
 74 |         relationship_tensor = torch.cat([o1_tensor, o2_tensor], dim=-1)
 75 |         relationship_pred = self.classifier(relationship_tensor)
 76 | 
 77 |         return relationship_pred
 78 | 
 79 |     def loss(self, rel_preds, gt_adjs, o1_assign_results, o2_assign_results):
 80 |         B, num_query_o1, num_query_o2, _ = rel_preds.size()
 81 |         o1_assign = o1_assign_results[-1]
 82 |         o1_pos_inds = o1_assign['pos_inds']
 83 |         o1_pos_assigned_gt_inds = o1_assign['pos_assigned_gt_inds']
 84 | 
 85 |         if self.shared_param:
 86 |             o2_assign = o1_assign
 87 |             o2_pos_inds = o1_pos_inds
 88 |             o2_pos_assigned_gt_inds = o1_pos_assigned_gt_inds
 89 |         else:
 90 |             o2_assign = o2_assign_results[-1]
 91 |             o2_pos_inds = o2_assign['pos_inds']
 92 |             o2_pos_assigned_gt_inds = o2_assign['pos_assigned_gt_inds']
 93 | 
 94 |         targets = []
 95 |         masked_rel_preds = []
 96 |         for i in range(B):
 97 |             gt_adj = gt_adjs[i]
 98 |             len_o1 = gt_adj.size(0)
 99 |             len_o2 = gt_adj.size(1)
100 |             o1_pos_mask = o1_pos_assigned_gt_inds[i] < len_o1
101 |             o2_pos_mask = o2_pos_assigned_gt_inds[i] < len_o2
102 | 
103 |             masked_rel_pred = rel_preds[i][o1_pos_inds[i]][:, o2_pos_inds[i]][o1_pos_mask][:, o2_pos_mask]
104 |             masked_rel_preds.append(masked_rel_pred.view(-1, 1))
105 | 
106 |             target = gt_adj[o1_pos_assigned_gt_inds[i][o1_pos_mask]][:, o2_pos_assigned_gt_inds[i][o2_pos_mask]]
107 |             targets.append(1 - target.view(-1).long())
108 | 
109 |         targets = torch.cat(targets, dim=0)
110 |         rel_preds = torch.cat(masked_rel_preds, dim=0)
111 |         if torch.numel(targets) == 0:
112 |             return dict(loss_rel=rel_preds.sum() * 0)
113 | 
114 |         loss_rel = self.loss_rel(rel_preds, targets, avg_factor=targets.sum())
115 | 
116 |         if digit_version(TORCH_VERSION) >= digit_version('1.8'):
117 |             loss_rel = torch.nan_to_num(loss_rel)
118 | 
119 |         return dict(loss_rel=loss_rel)
120 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .lanesegnet import LaneSegNet
2 | from .lanesegnet_mapele_bucket import LaneSegNetMapElementBucket
3 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/detectors/lanesegnet.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import copy
  8 | import numpy as np
  9 | import torch
 10 | 
 11 | from mmcv.runner import force_fp32, auto_fp16
 12 | from mmdet.models import DETECTORS
 13 | from mmdet.models.builder import build_head
 14 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector
 15 | 
 16 | from ...utils.builder import build_bev_constructor
 17 | 
 18 | 
 19 | @DETECTORS.register_module()
 20 | class LaneSegNet(MVXTwoStageDetector):
 21 | 
 22 |     def __init__(self,
 23 |                  bev_constructor=None,
 24 |                  lane_head=None,
 25 |                  lclc_head=None,
 26 |                  bbox_head=None,
 27 |                  lcte_head=None,
 28 |                  video_test_mode=False,
 29 |                  **kwargs):
 30 | 
 31 |         super(LaneSegNet, self).__init__(**kwargs)
 32 | 
 33 |         if bev_constructor is not None:
 34 |             self.bev_constructor = build_bev_constructor(bev_constructor)
 35 | 
 36 |         if lane_head is not None:
 37 |             lane_head.update(train_cfg=self.train_cfg.lane)
 38 |             self.pts_bbox_head = build_head(lane_head)
 39 |         else:
 40 |             self.pts_bbox_head = None
 41 |         
 42 |         if lclc_head is not None:
 43 |             self.lclc_head = build_head(lclc_head)
 44 |         else:
 45 |             self.lclc_head = None
 46 | 
 47 |         if bbox_head is not None:
 48 |             bbox_head.update(train_cfg=self.train_cfg.bbox)
 49 |             self.bbox_head = build_head(bbox_head)
 50 |         else:
 51 |             self.bbox_head = None
 52 | 
 53 |         if lcte_head is not None:
 54 |             self.lcte_head = build_head(lcte_head)
 55 |         else:
 56 |             self.lcte_head = None
 57 | 
 58 |         self.fp16_enabled = False
 59 | 
 60 |         # temporal
 61 |         self.video_test_mode = video_test_mode
 62 |         self.prev_frame_info = {
 63 |             'prev_bev': None,
 64 |             'scene_token': None,
 65 |             'prev_pos': 0,
 66 |             'prev_angle': 0,
 67 |         }
 68 | 
 69 |     def extract_img_feat(self, img, img_metas, len_queue=None):
 70 |         """Extract features of images."""
 71 |         B = img.size(0)
 72 |         if img is not None:
 73 | 
 74 |             if img.dim() == 5 and img.size(0) == 1:
 75 |                 img.squeeze_()
 76 |             elif img.dim() == 5 and img.size(0) > 1:
 77 |                 B, N, C, H, W = img.size()
 78 |                 img = img.reshape(B * N, C, H, W)
 79 |             img_feats = self.img_backbone(img)
 80 | 
 81 |             if isinstance(img_feats, dict):
 82 |                 img_feats = list(img_feats.values())
 83 |         else:
 84 |             return None
 85 |         if self.with_img_neck:
 86 |             img_feats = self.img_neck(img_feats)
 87 | 
 88 |         img_feats_reshaped = []
 89 |         for img_feat in img_feats:
 90 |             BN, C, H, W = img_feat.size()
 91 |             if len_queue is not None:
 92 |                 img_feats_reshaped.append(img_feat.view(int(B/len_queue), len_queue, int(BN / B), C, H, W))
 93 |             else:
 94 |                 img_feats_reshaped.append(img_feat.view(B, int(BN / B), C, H, W))
 95 |         return img_feats_reshaped
 96 | 
 97 |     @auto_fp16(apply_to=('img'))
 98 |     def extract_feat(self, img, img_metas=None, len_queue=None):
 99 |         img_feats = self.extract_img_feat(img, img_metas, len_queue=len_queue)
100 |         return img_feats
101 | 
102 |     def forward_dummy(self, img):
103 |         dummy_metas = None
104 |         return self.forward_test(img=img, img_metas=[[dummy_metas]])
105 | 
106 |     def forward(self, return_loss=True, **kwargs):
107 |         if return_loss:
108 |             return self.forward_train(**kwargs)
109 |         else:
110 |             return self.forward_test(**kwargs)
111 |     
112 |     def obtain_history_bev(self, imgs_queue, img_metas_list):
113 |         """Obtain history BEV features iteratively. To save GPU memory, gradients are not calculated.
114 |         """
115 |         self.eval()
116 | 
117 |         with torch.no_grad():
118 |             prev_bev = None
119 |             bs, len_queue, num_cams, C, H, W = imgs_queue.shape
120 |             imgs_queue = imgs_queue.reshape(bs*len_queue, num_cams, C, H, W)
121 |             img_feats_list = self.extract_feat(img=imgs_queue, len_queue=len_queue)
122 |             for i in range(len_queue):
123 |                 img_metas = [each[i] for each in img_metas_list]
124 |                 img_feats = [each_scale[:, i] for each_scale in img_feats_list]
125 |                 prev_bev = self.bev_constructor(img_feats, img_metas, prev_bev)
126 |             self.train()
127 |             return prev_bev
128 | 
129 |     @auto_fp16(apply_to=('img', 'points'))
130 |     def forward_train(self,
131 |                       img=None,
132 |                       img_metas=None,
133 |                       gt_lanes_3d=None,
134 |                       gt_lane_labels_3d=None,
135 |                       gt_lane_adj=None,
136 |                       gt_lane_left_type=None,
137 |                       gt_lane_right_type=None,
138 |                       gt_labels=None,
139 |                       gt_bboxes=None,
140 |                       gt_instance_masks=None,
141 |                       gt_bboxes_ignore=None,
142 |                       ):
143 | 
144 |         len_queue = img.size(1)
145 |         prev_img = img[:, :-1, ...]
146 |         img = img[:, -1, ...]
147 | 
148 |         if self.video_test_mode:
149 |             prev_img_metas = copy.deepcopy(img_metas)
150 |             prev_bev = self.obtain_history_bev(prev_img, prev_img_metas)
151 |         else:
152 |             prev_bev = None
153 | 
154 |         img_metas = [each[len_queue-1] for each in img_metas]
155 |         img_feats = self.extract_feat(img=img, img_metas=img_metas)
156 |         bev_feats = self.bev_constructor(img_feats, img_metas, prev_bev)
157 | 
158 |         losses = dict()
159 |         outs = self.pts_bbox_head(img_feats, bev_feats, img_metas)
160 |         loss_inputs = [outs, gt_lanes_3d, gt_lane_labels_3d, gt_instance_masks, gt_lane_left_type, gt_lane_right_type]
161 |         lane_losses, lane_assign_result = self.pts_bbox_head.loss(*loss_inputs, img_metas=img_metas)
162 |         for loss in lane_losses:
163 |             losses['lane_head.' + loss] = lane_losses[loss]
164 |         lane_feats = outs['history_states']
165 | 
166 |         if self.lclc_head is not None:
167 |             lclc_losses = self.lclc_head.forward_train(lane_feats, lane_assign_result, lane_feats, lane_assign_result, gt_lane_adj)
168 |             for loss in lclc_losses:
169 |                 losses['lclc_head.' + loss] = lclc_losses[loss]
170 | 
171 |         return losses
172 | 
173 |     def forward_test(self, img_metas, img=None, **kwargs):
174 |         for var, name in [(img_metas, 'img_metas')]:
175 |             if not isinstance(var, list):
176 |                 raise TypeError('{} must be a list, but got {}'.format(
177 |                     name, type(var)))
178 |         img = [img] if img is None else img
179 | 
180 |         if img_metas[0]['scene_token'] != self.prev_frame_info['scene_token']:
181 |             # the first sample of each scene is truncated
182 |             self.prev_frame_info['prev_bev'] = None
183 |         # update idx
184 |         self.prev_frame_info['scene_token'] = img_metas[0]['scene_token']
185 | 
186 |         # do not use temporal information
187 |         if not self.video_test_mode:
188 |             self.prev_frame_info['prev_bev'] = None
189 | 
190 |         # Get the delta of ego position and angle between two timestamps.
191 |         tmp_pos = copy.deepcopy(img_metas[0]['can_bus'][:3])
192 |         tmp_angle = copy.deepcopy(img_metas[0]['can_bus'][-1])
193 |         if self.prev_frame_info['prev_bev'] is not None:
194 |             img_metas[0]['can_bus'][:3] -= self.prev_frame_info['prev_pos']
195 |             img_metas[0]['can_bus'][-1] -= self.prev_frame_info['prev_angle']
196 |         else:
197 |             img_metas[0]['can_bus'][-1] = 0
198 |             img_metas[0]['can_bus'][:3] = 0
199 | 
200 |         new_prev_bev, results_list = self.simple_test(
201 |             img_metas, img, prev_bev=self.prev_frame_info['prev_bev'], **kwargs)
202 |         # During inference, we save the BEV features and ego motion of each timestamp.
203 |         self.prev_frame_info['prev_pos'] = tmp_pos
204 |         self.prev_frame_info['prev_angle'] = tmp_angle
205 |         self.prev_frame_info['prev_bev'] = new_prev_bev
206 |         return results_list
207 | 
208 |     def simple_test_pts(self, x, img_metas, img=None, prev_bev=None, rescale=False):
209 |         """Test function"""
210 |         batchsize = len(img_metas)
211 | 
212 |         bev_feats = self.bev_constructor(x, img_metas, prev_bev)
213 |         outs = self.pts_bbox_head(x, bev_feats, img_metas)
214 | 
215 |         lane_results = self.pts_bbox_head.get_lanes(
216 |             outs, img_metas, rescale=rescale)
217 | 
218 |         if self.lclc_head is not None:
219 |             lane_feats = outs['history_states']
220 |             lsls_results = self.lclc_head.get_relationship(lane_feats, lane_feats)
221 |             lsls_results = [result.detach().cpu().numpy() for result in lsls_results]
222 |         else:
223 |             lsls_results = [None for _ in range(batchsize)]
224 | 
225 |         return bev_feats, lane_results, lsls_results
226 | 
227 |     def simple_test(self, img_metas, img=None, prev_bev=None, rescale=False):
228 |         """Test function without augmentaiton."""
229 |         img_feats = self.extract_feat(img=img, img_metas=img_metas)
230 | 
231 |         results_list = [dict() for i in range(len(img_metas))]
232 |         new_prev_bev, lane_results, lsls_results = self.simple_test_pts(
233 |             img_feats, img_metas, img, prev_bev, rescale=rescale)
234 |         for result_dict, lane, lsls in zip(results_list, lane_results, lsls_results):
235 |             result_dict['lane_results'] = lane
236 |             result_dict['bbox_results'] = None
237 |             result_dict['lsls_results'] = lsls
238 |             result_dict['lste_results'] = None
239 | 
240 |         return new_prev_bev, results_list
241 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/detectors/lanesegnet_mapele_bucket.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import copy
  8 | import numpy as np
  9 | import torch
 10 | 
 11 | from mmcv.runner import force_fp32, auto_fp16
 12 | from mmdet.models import DETECTORS
 13 | from mmdet.models.builder import build_head
 14 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector
 15 | 
 16 | from ...utils.builder import build_bev_constructor
 17 | 
 18 | 
 19 | @DETECTORS.register_module()
 20 | class LaneSegNetMapElementBucket(MVXTwoStageDetector):
 21 | 
 22 |     def __init__(self,
 23 |                  bev_constructor=None,
 24 |                  lane_head=None,
 25 |                  lsls_head=None,
 26 |                  bbox_head=None,
 27 |                  lste_head=None,
 28 |                  area_head=None,
 29 |                  video_test_mode=False,
 30 |                  **kwargs):
 31 | 
 32 |         super(LaneSegNetMapElementBucket, self).__init__(**kwargs)
 33 | 
 34 |         if bev_constructor is not None:
 35 |             self.bev_constructor = build_bev_constructor(bev_constructor)
 36 | 
 37 |         if lane_head is not None:
 38 |             lane_head.update(train_cfg=self.train_cfg.lane)
 39 |             self.pts_bbox_head = build_head(lane_head)
 40 |         else:
 41 |             self.pts_bbox_head = None
 42 |         
 43 |         if lsls_head is not None:
 44 |             self.lsls_head = build_head(lsls_head)
 45 |         else:
 46 |             self.lsls_head = None
 47 | 
 48 |         if bbox_head is not None:
 49 |             bbox_head.update(train_cfg=self.train_cfg.bbox)
 50 |             self.bbox_head = build_head(bbox_head)
 51 |         else:
 52 |             self.bbox_head = None
 53 | 
 54 |         if lste_head is not None:
 55 |             self.lste_head = build_head(lste_head)
 56 |         else:
 57 |             self.lste_head = None
 58 |  
 59 |         if area_head is not None:
 60 |             area_head.update(train_cfg=self.train_cfg.area)
 61 |             self.area_head = build_head(area_head)
 62 |         else:
 63 |             self.area_head = None
 64 | 
 65 |         self.fp16_enabled = False
 66 | 
 67 |         # temporal
 68 |         self.video_test_mode = video_test_mode
 69 |         self.prev_frame_info = {
 70 |             'prev_bev': None,
 71 |             'scene_token': None,
 72 |             'prev_pos': 0,
 73 |             'prev_angle': 0,
 74 |         }
 75 | 
 76 |     def extract_img_feat(self, img, img_metas, len_queue=None):
 77 |         """Extract features of images."""
 78 |         B = img.size(0)
 79 |         if img is not None:
 80 | 
 81 |             if img.dim() == 5 and img.size(0) == 1:
 82 |                 img.squeeze_()
 83 |             elif img.dim() == 5 and img.size(0) > 1:
 84 |                 B, N, C, H, W = img.size()
 85 |                 img = img.reshape(B * N, C, H, W)
 86 |             img_feats = self.img_backbone(img)
 87 | 
 88 |             if isinstance(img_feats, dict):
 89 |                 img_feats = list(img_feats.values())
 90 |         else:
 91 |             return None
 92 |         if self.with_img_neck:
 93 |             img_feats = self.img_neck(img_feats)
 94 | 
 95 |         img_feats_reshaped = []
 96 |         for img_feat in img_feats:
 97 |             BN, C, H, W = img_feat.size()
 98 |             if len_queue is not None:
 99 |                 img_feats_reshaped.append(img_feat.view(int(B/len_queue), len_queue, int(BN / B), C, H, W))
100 |             else:
101 |                 img_feats_reshaped.append(img_feat.view(B, int(BN / B), C, H, W))
102 |         return img_feats_reshaped
103 | 
104 |     @auto_fp16(apply_to=('img',))
105 |     def extract_feat(self, img, img_metas=None, len_queue=None):
106 |         img_feats = self.extract_img_feat(img, img_metas, len_queue=len_queue)
107 |         return img_feats
108 | 
109 |     def forward_dummy(self, img):
110 |         dummy_metas = None
111 |         return self.forward_test(img=img, img_metas=[[dummy_metas]])
112 | 
113 |     def forward(self, return_loss=True, **kwargs):
114 |         if return_loss:
115 |             return self.forward_train(**kwargs)
116 |         else:
117 |             return self.forward_test(**kwargs)
118 | 
119 |     def obtain_history_bev(self, imgs_queue, img_metas_list):
120 |         """Obtain history BEV features iteratively. To save GPU memory, gradients are not calculated.
121 |         """
122 |         self.eval()
123 | 
124 |         with torch.no_grad():
125 |             prev_bev = None
126 |             bs, len_queue, num_cams, C, H, W = imgs_queue.shape
127 |             imgs_queue = imgs_queue.reshape(bs*len_queue, num_cams, C, H, W)
128 |             img_feats_list = self.extract_feat(img=imgs_queue, len_queue=len_queue)
129 |             for i in range(len_queue):
130 |                 img_metas = [each[i] for each in img_metas_list]
131 |                 img_feats = [each_scale[:, i] for each_scale in img_feats_list]
132 |                 prev_bev = self.bev_constructor(img_feats, img_metas, prev_bev)
133 |             self.train()
134 |             return prev_bev
135 | 
136 |     @auto_fp16(apply_to=('img',))
137 |     def forward_train(self,
138 |                       img=None,
139 |                       img_metas=None,
140 |                       gt_lanes_3d=None,
141 |                       gt_lane_labels_3d=None,
142 |                       gt_lane_adj=None,
143 |                       gt_lane_left_type=None,
144 |                       gt_lane_right_type=None,
145 |                       gt_labels=None,
146 |                       gt_bboxes=None,
147 |                       gt_lane_lste_adj=None,
148 |                       gt_areas_3d=None,
149 |                       gt_area_labels_3d=None,
150 |                       gt_instance_masks=None,
151 |                       gt_bboxes_ignore=None,
152 |                       ):
153 | 
154 |         len_queue = img.size(1)
155 |         prev_img = img[:, :-1, ...]
156 |         img = img[:, -1, ...]
157 | 
158 |         if self.video_test_mode:
159 |             prev_img_metas = copy.deepcopy(img_metas)
160 |             prev_bev = self.obtain_history_bev(prev_img, prev_img_metas)
161 |         else:
162 |             prev_bev = None
163 | 
164 |         img_metas = [each[len_queue-1] for each in img_metas]
165 |         img_feats = self.extract_feat(img=img, img_metas=img_metas)
166 |         bev_feats = self.bev_constructor(img_feats, img_metas, prev_bev)
167 | 
168 |         losses = dict()
169 |         outs = self.pts_bbox_head(img_feats, bev_feats, img_metas)
170 |         loss_inputs = [outs, gt_lanes_3d, gt_lane_labels_3d, gt_instance_masks, gt_lane_left_type, gt_lane_right_type]
171 |         lane_losses, lane_assign_result = self.pts_bbox_head.loss(*loss_inputs, img_metas=img_metas)
172 |         for loss in lane_losses:
173 |             losses['lane_head.' + loss] = lane_losses[loss]
174 |         lane_feats = outs['history_states']
175 | 
176 |         if self.lsls_head is not None:
177 |             lsls_losses = self.lsls_head.forward_train(lane_feats, lane_assign_result, lane_feats, lane_assign_result, gt_lane_adj)
178 |             for loss in lsls_losses:
179 |                 losses['lsls_head.' + loss] = lsls_losses[loss]
180 | 
181 |         if self.bbox_head is not None:
182 |             front_view_img_feats = [lvl[:, 0] for lvl in img_feats]
183 |             batch_input_shape = tuple(img[0, 0].size()[-2:])
184 |             bbox_img_metas = []
185 |             for img_meta in img_metas:
186 |                 bbox_img_metas.append(
187 |                     dict(
188 |                         batch_input_shape=batch_input_shape,
189 |                         img_shape=img_meta['img_shape'][0],
190 |                         scale_factor=img_meta['scale_factor'][0],
191 |                         crop_shape=img_meta['crop_shape'][0]))
192 |                 img_meta['batch_input_shape'] = batch_input_shape
193 | 
194 |             te_losses = {}
195 |             bbox_outs = self.bbox_head(front_view_img_feats, bbox_img_metas)
196 |             bbox_losses, te_assign_result = self.bbox_head.loss(bbox_outs, gt_bboxes, gt_labels, bbox_img_metas, gt_bboxes_ignore)
197 |             for loss in bbox_losses:
198 |                 te_losses['bbox_head.' + loss] = bbox_losses[loss]
199 | 
200 |             if self.lste_head is not None:
201 |                 te_feats = bbox_outs['history_states']
202 |                 lste_losses = self.lste_head.forward_train(lane_feats, lane_assign_result, te_feats, te_assign_result, gt_lane_lste_adj)
203 |                 for loss in lste_losses:
204 |                     te_losses['lste_head.' + loss] = lste_losses[loss]
205 |         
206 |             num_gt_bboxes = sum([len(gt) for gt in gt_labels])
207 |             if num_gt_bboxes == 0:
208 |                 for loss in te_losses:
209 |                     te_losses[loss] *= 0
210 | 
211 |             losses.update(te_losses)
212 |         
213 |         if self.area_head is not None:
214 |             outs = self.area_head(img_feats, bev_feats, img_metas)
215 |             loss_inputs = [outs, gt_areas_3d, gt_area_labels_3d]
216 |             area_losses, _ = self.area_head.loss(*loss_inputs, img_metas=img_metas)
217 |             for loss in area_losses:
218 |                 losses['area_head.' + loss] = area_losses[loss]
219 | 
220 |         return losses
221 | 
222 |     def forward_test(self, img_metas, img=None, **kwargs):
223 |         for var, name in [(img_metas, 'img_metas')]:
224 |             if not isinstance(var, list):
225 |                 raise TypeError('{} must be a list, but got {}'.format(
226 |                     name, type(var)))
227 |         img = [img] if img is None else img
228 | 
229 |         if img_metas[0]['scene_token'] != self.prev_frame_info['scene_token']:
230 |             # the first sample of each scene is truncated
231 |             self.prev_frame_info['prev_bev'] = None
232 |         # update idx
233 |         self.prev_frame_info['scene_token'] = img_metas[0]['scene_token']
234 | 
235 |         # do not use temporal information
236 |         if not self.video_test_mode:
237 |             self.prev_frame_info['prev_bev'] = None
238 | 
239 |         # Get the delta of ego position and angle between two timestamps.
240 |         tmp_pos = copy.deepcopy(img_metas[0]['can_bus'][:3])
241 |         tmp_angle = copy.deepcopy(img_metas[0]['can_bus'][-1])
242 |         if self.prev_frame_info['prev_bev'] is not None:
243 |             img_metas[0]['can_bus'][:3] -= self.prev_frame_info['prev_pos']
244 |             img_metas[0]['can_bus'][-1] -= self.prev_frame_info['prev_angle']
245 |         else:
246 |             img_metas[0]['can_bus'][-1] = 0
247 |             img_metas[0]['can_bus'][:3] = 0
248 | 
249 |         new_prev_bev, results_list = self.simple_test(
250 |             img_metas, img, prev_bev=self.prev_frame_info['prev_bev'], **kwargs)
251 |         # During inference, we save the BEV features and ego motion of each timestamp.
252 |         self.prev_frame_info['prev_pos'] = tmp_pos
253 |         self.prev_frame_info['prev_angle'] = tmp_angle
254 |         self.prev_frame_info['prev_bev'] = new_prev_bev
255 |         return results_list
256 | 
257 |     def simple_test_pts(self, x, img_metas, img=None, prev_bev=None, rescale=False):
258 |         """Test function"""
259 |         batchsize = len(img_metas)
260 | 
261 |         bev_feats = self.bev_constructor(x, img_metas, prev_bev)
262 |         outs = self.pts_bbox_head(x, bev_feats, img_metas)
263 | 
264 |         lane_results = self.pts_bbox_head.get_lanes(
265 |             outs, img_metas, rescale=rescale)
266 | 
267 |         if self.lsls_head is not None:
268 |             lane_feats = outs['history_states']
269 |             lsls_results = self.lsls_head.get_relationship(lane_feats, lane_feats)
270 |             lsls_results = [result.detach().cpu().numpy() for result in lsls_results]
271 |         else:
272 |             lsls_results = [None for _ in range(batchsize)]
273 | 
274 |         if self.bbox_head is not None:
275 |             front_view_img_feats = [lvl[:, 0] for lvl in x]
276 |             batch_input_shape = tuple(img[0, 0].size()[-2:])
277 |             bbox_img_metas = []
278 |             for img_meta in img_metas:
279 |                 bbox_img_metas.append(
280 |                     dict(
281 |                         batch_input_shape=batch_input_shape,
282 |                         img_shape=img_meta['img_shape'][0],
283 |                         scale_factor=img_meta['scale_factor'][0],
284 |                         crop_shape=img_meta['crop_shape'][0]))
285 |                 img_meta['batch_input_shape'] = batch_input_shape
286 |             bbox_outs = self.bbox_head(front_view_img_feats, bbox_img_metas)
287 |             bbox_results = self.bbox_head.get_bboxes(bbox_outs, bbox_img_metas, rescale=rescale)
288 |         else:
289 |             bbox_results = [None for _ in range(batchsize)]
290 |         
291 |         if self.bbox_head is not None and self.lste_head is not None:
292 |             te_feats = bbox_outs['history_states']
293 |             lste_results = self.lste_head.get_relationship(lane_feats, te_feats)
294 |             lste_results = [result.detach().cpu().numpy() for result in lste_results]
295 |         else:
296 |             lste_results = [None for _ in range(batchsize)]
297 |         
298 |         if self.area_head is not None:
299 |             outs = self.area_head(x, bev_feats, img_metas)
300 |             area_results = self.area_head.get_areas(
301 |                 outs, img_metas, rescale=rescale)
302 | 
303 |         return bev_feats, lane_results, lsls_results, bbox_results, lste_results, area_results
304 | 
305 |     def simple_test(self, img_metas, img=None, prev_bev=None, rescale=False):
306 |         """Test function without augmentaiton."""
307 |         img_feats = self.extract_feat(img=img, img_metas=img_metas)
308 | 
309 |         results_list = [dict() for i in range(len(img_metas))]
310 |         new_prev_bev, lane_results, lsls_results, bbox_results, lste_results, area_results = self.simple_test_pts(
311 |             img_feats, img_metas, img, prev_bev, rescale=rescale)
312 |         for result_dict, lane, lsls, bbox, lste, area in zip(
313 |             results_list, lane_results, lsls_results, bbox_results, lste_results, area_results):
314 |             result_dict['lane_results'] = lane
315 |             result_dict['lsls_results'] = lsls
316 |             result_dict['bbox_results'] = bbox
317 |             result_dict['lste_results'] = lste
318 |             result_dict['area_results'] = area
319 |         return new_prev_bev, results_list
320 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer_constructer import BEVFormerConstructer
2 | from .laneseg_transformer import LaneSegNetTransformer
3 | from .laneseg_decoder import LaneSegNetDecoder
4 | from .lane_attention import LaneAttention
5 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/modules/bevformer_constructer.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | from torch.nn.init import normal_
 11 | from torchvision.transforms.functional import rotate
 12 | 
 13 | from mmcv.cnn import xavier_init
 14 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence, build_positional_encoding
 15 | from mmcv.runner.base_module import BaseModule
 16 | 
 17 | from ...utils.builder import BEV_CONSTRUCTOR
 18 | from projects.bevformer.modules.temporal_self_attention import TemporalSelfAttention
 19 | from projects.bevformer.modules.spatial_cross_attention import MSDeformableAttention3D
 20 | from projects.bevformer.modules.decoder import CustomMSDeformableAttention
 21 | 
 22 | 
 23 | @BEV_CONSTRUCTOR.register_module()
 24 | class BEVFormerConstructer(BaseModule):
 25 |     """Implements the BEVFormer BEV Constructer.
 26 |     Args:
 27 |         as_two_stage (bool): Generate query from encoder features.
 28 |             Default: False.
 29 |         num_feature_levels (int): Number of feature maps from FPN:
 30 |             Default: 4.
 31 |         two_stage_num_proposals (int): Number of proposals when set
 32 |             `as_two_stage` as True. Default: 300.
 33 |     """
 34 | 
 35 |     def __init__(self,
 36 |                  num_feature_levels=4,
 37 |                  num_cams=6,
 38 |                  embed_dims=256,
 39 |                  rotate_prev_bev=True,
 40 |                  use_shift=True,
 41 |                  use_can_bus=True,
 42 |                  can_bus_norm=True,
 43 |                  use_cams_embeds=True,
 44 |                  pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
 45 |                  bev_h=200,
 46 |                  bev_w=200,
 47 |                  rotate_center=[100, 100],
 48 |                  encoder=None,
 49 |                  positional_encoding=None,
 50 |                  **kwargs):
 51 |         super(BEVFormerConstructer, self).__init__(**kwargs)
 52 |         self.embed_dims = embed_dims
 53 |         self.num_feature_levels = num_feature_levels
 54 |         self.num_cams = num_cams
 55 |         self.fp16_enabled = False
 56 | 
 57 |         self.rotate_prev_bev = rotate_prev_bev
 58 |         self.use_shift = use_shift
 59 |         self.use_can_bus = use_can_bus
 60 |         self.can_bus_norm = can_bus_norm
 61 |         self.use_cams_embeds = use_cams_embeds
 62 |         self.encoder = build_transformer_layer_sequence(encoder)
 63 |         self.positional_encoding = build_positional_encoding(positional_encoding)
 64 | 
 65 |         self.pc_range = pc_range
 66 |         self.real_w = self.pc_range[3] - self.pc_range[0]
 67 |         self.real_h = self.pc_range[4] - self.pc_range[1]
 68 |         self.bev_h = bev_h
 69 |         self.bev_w = bev_w
 70 |         self.rotate_center = rotate_center
 71 | 
 72 |         self.init_layers()
 73 | 
 74 |     def init_layers(self):
 75 |         self.bev_embedding = nn.Embedding(
 76 |                 self.bev_h * self.bev_w, self.embed_dims)
 77 |         self.level_embeds = nn.Parameter(torch.Tensor(
 78 |             self.num_feature_levels, self.embed_dims))
 79 |         self.cams_embeds = nn.Parameter(
 80 |             torch.Tensor(self.num_cams, self.embed_dims))
 81 |         self.can_bus_mlp = nn.Sequential(
 82 |             nn.Linear(18, self.embed_dims // 2),
 83 |             nn.ReLU(inplace=True),
 84 |             nn.Linear(self.embed_dims // 2, self.embed_dims),
 85 |             nn.ReLU(inplace=True),
 86 |         )
 87 |         if self.can_bus_norm:
 88 |             self.can_bus_mlp.add_module('norm', nn.LayerNorm(self.embed_dims))
 89 |  
 90 |     def init_weights(self):
 91 |         """Initialize the transformer weights."""
 92 |         for p in self.parameters():
 93 |             if p.dim() > 1:
 94 |                 nn.init.xavier_uniform_(p)
 95 |         for m in self.modules():
 96 |             if isinstance(m, MSDeformableAttention3D) or isinstance(m, TemporalSelfAttention) \
 97 |                     or isinstance(m, CustomMSDeformableAttention):
 98 |                 try:
 99 |                     m.init_weight()
100 |                 except AttributeError:
101 |                     m.init_weights()
102 |         normal_(self.level_embeds)
103 |         normal_(self.cams_embeds)
104 |         xavier_init(self.can_bus_mlp, distribution='uniform', bias=0.)
105 | 
106 |     # @auto_fp16(apply_to=('mlvl_feats', 'prev_bev'))
107 |     def forward(self, mlvl_feats, img_metas, prev_bev=None, **kwargs):
108 |         """
109 |         obtain bev features.
110 |         """
111 |         bs, num_cam, _, _, _ = mlvl_feats[0].shape
112 |         dtype = mlvl_feats[0].dtype
113 | 
114 |         bev_queries = self.bev_embedding.weight.to(dtype)
115 |         bev_queries = bev_queries.unsqueeze(1).repeat(1, bs, 1)
116 | 
117 |         bev_mask = torch.zeros((bs, self.bev_h, self.bev_w),
118 |                                device=bev_queries.device).to(dtype)
119 |         bev_pos = self.positional_encoding(bev_mask).to(dtype)
120 |         bev_pos = bev_pos.flatten(2).permute(2, 0, 1)
121 | 
122 |         # BEVFormer assumes the coords are x-right and y-forward for the nuScenes lidar
123 |         # but OpenLane-V2's coords are x-forward and y-left
124 |         # here is a fix for any lidar coords, the shift is calculated by the rotation matrix
125 |         delta_global = np.array([each['can_bus'][:3] for each in img_metas])
126 |         lidar2global_rotation = np.array([each['lidar2global_rotation'] for each in img_metas])
127 |         delta_lidar = []
128 |         for i in range(bs):
129 |             delta_lidar.append(np.linalg.inv(lidar2global_rotation[i]) @ delta_global[i])
130 |         delta_lidar = np.array(delta_lidar)
131 |         shift_y = delta_lidar[:, 1] / self.real_h
132 |         shift_x = delta_lidar[:, 0] / self.real_w
133 |         shift_y = shift_y * self.use_shift
134 |         shift_x = shift_x * self.use_shift
135 |         shift = bev_queries.new_tensor([shift_x, shift_y]).permute(1, 0)  # xy, bs -> bs, xy
136 | 
137 |         if prev_bev is not None:
138 |             if prev_bev.shape[1] == self.bev_h * self.bev_w:
139 |                 prev_bev = prev_bev.permute(1, 0, 2)
140 |             if self.rotate_prev_bev:
141 |                 for i in range(bs):
142 |                     # num_prev_bev = prev_bev.size(1)
143 |                     rotation_angle = img_metas[i]['can_bus'][-1]
144 |                     tmp_prev_bev = prev_bev[:, i].reshape(
145 |                         self.bev_h, self.bev_w, -1).permute(2, 0, 1)
146 |                     tmp_prev_bev = rotate(tmp_prev_bev, rotation_angle,
147 |                                           center=self.rotate_center)
148 |                     tmp_prev_bev = tmp_prev_bev.permute(1, 2, 0).reshape(
149 |                         self.bev_h * self.bev_w, 1, -1)
150 |                     prev_bev[:, i] = tmp_prev_bev[:, 0]
151 | 
152 |         # add can bus signals
153 |         can_bus = bev_queries.new_tensor(
154 |             [each['can_bus'] for each in img_metas])  # [:, :]
155 |         can_bus = self.can_bus_mlp(can_bus)[None, :, :]
156 |         bev_queries = bev_queries + can_bus * self.use_can_bus
157 | 
158 |         feat_flatten = []
159 |         spatial_shapes = []
160 |         for lvl, feat in enumerate(mlvl_feats):
161 |             bs, num_cam, c, h, w = feat.shape
162 |             spatial_shape = (h, w)
163 |             feat = feat.flatten(3).permute(1, 0, 3, 2)
164 |             if self.use_cams_embeds:
165 |                 feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype)
166 |             feat = feat + self.level_embeds[None,
167 |                                             None, lvl:lvl + 1, :].to(feat.dtype)
168 |             spatial_shapes.append(spatial_shape)
169 |             feat_flatten.append(feat)
170 | 
171 |         feat_flatten = torch.cat(feat_flatten, 2)
172 |         spatial_shapes = torch.as_tensor(
173 |             spatial_shapes, dtype=torch.long, device=bev_pos.device)
174 |         level_start_index = torch.cat((spatial_shapes.new_zeros(
175 |             (1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))
176 | 
177 |         feat_flatten = feat_flatten.permute(
178 |             0, 2, 1, 3)  # (num_cam, H*W, bs, embed_dims)
179 | 
180 |         bev_embed = self.encoder(
181 |             bev_queries,
182 |             feat_flatten,
183 |             feat_flatten,
184 |             bev_h=self.bev_h,
185 |             bev_w=self.bev_w,
186 |             bev_pos=bev_pos,
187 |             spatial_shapes=spatial_shapes,
188 |             level_start_index=level_start_index,
189 |             prev_bev=prev_bev,
190 |             shift=shift,
191 |             img_metas=img_metas,
192 |             **kwargs
193 |         )
194 | 
195 |         return bev_embed
196 | 
197 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/modules/lane_attention.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import warnings
  8 | import math
  9 | import numpy as np
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import mmcv
 14 | from mmcv.cnn import xavier_init, constant_init
 15 | from mmcv.cnn.bricks.registry import ATTENTION
 16 | from mmcv.runner.base_module import BaseModule
 17 | from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
 18 | from projects.bevformer.modules.multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32
 19 | 
 20 | 
 21 | @ATTENTION.register_module()
 22 | class LaneAttention(BaseModule):
 23 | 
 24 |     def __init__(self,
 25 |                  embed_dims=256,
 26 |                  num_heads=8,
 27 |                  num_levels=1,
 28 |                  num_points=32,
 29 |                  im2col_step=64,
 30 |                  dropout=0.1,
 31 |                  batch_first=False,
 32 |                  norm_cfg=None,
 33 |                  init_cfg=None):
 34 |         super().__init__(init_cfg)
 35 |         if embed_dims % num_heads != 0:
 36 |             raise ValueError(f'embed_dims must be divisible by num_heads, '
 37 |                              f'but got {embed_dims} and {num_heads}')
 38 |         dim_per_head = embed_dims // num_heads
 39 |         self.norm_cfg = norm_cfg
 40 |         self.dropout = nn.Dropout(dropout)
 41 |         self.batch_first = batch_first
 42 |         self.fp16_enabled = False
 43 | 
 44 |         # you'd better set dim_per_head to a power of 2
 45 |         # which is more efficient in the CUDA implementation
 46 |         def _is_power_of_2(n):
 47 |             if (not isinstance(n, int)) or (n < 0):
 48 |                 raise ValueError(
 49 |                     'invalid input for _is_power_of_2: {} (type: {})'.format(
 50 |                         n, type(n)))
 51 |             return (n & (n - 1) == 0) and n != 0
 52 | 
 53 |         if not _is_power_of_2(dim_per_head):
 54 |             warnings.warn(
 55 |                 "You'd better set embed_dims in "
 56 |                 'MultiScaleDeformAttention to make '
 57 |                 'the dimension of each attention head a power of 2 '
 58 |                 'which is more efficient in our CUDA implementation.')
 59 | 
 60 |         self.im2col_step = im2col_step
 61 |         self.embed_dims = embed_dims
 62 |         self.num_levels = num_levels
 63 |         self.num_heads = num_heads
 64 |         self.num_points = num_points
 65 |         self.sampling_offsets = nn.Linear(
 66 |             embed_dims, num_heads * num_levels * num_points * 2)
 67 |         self.attention_weights = nn.Linear(embed_dims,
 68 |                                            num_heads * num_levels * num_points)
 69 |         self.value_proj = nn.Linear(embed_dims, embed_dims)
 70 |         self.output_proj = nn.Linear(embed_dims, embed_dims)
 71 |         self.init_weights()
 72 | 
 73 |     def init_weights(self):
 74 |         """Default initialization for Parameters of Module."""
 75 |         constant_init(self.sampling_offsets, 0.)
 76 |         thetas = torch.arange(
 77 |             8, dtype=torch.float32) * (2.0 * math.pi / 8)
 78 |         grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
 79 |         grid_init = (grid_init /
 80 |                      grid_init.abs().max(-1, keepdim=True)[0]).view(
 81 |             1, 1, 8, 1, 2).repeat(self.num_heads, self.num_levels, 1, self.num_points // 8, 1)
 82 |         for i in range(self.num_points // 8):
 83 |             grid_init[:, :, :, i, :] *= i + 1
 84 |         grid_init = grid_init.view(self.num_heads, self.num_levels, self.num_points, 2)
 85 | 
 86 |         self.sampling_offsets.bias.data = grid_init.view(-1)
 87 |         constant_init(self.attention_weights, val=0., bias=0.)
 88 |         xavier_init(self.value_proj, distribution='uniform', bias=0.)
 89 |         xavier_init(self.output_proj, distribution='uniform', bias=0.)
 90 |         self._is_init = True
 91 | 
 92 |     def forward(self,
 93 |                 query,
 94 |                 key=None,
 95 |                 value=None,
 96 |                 identity=None,
 97 |                 query_pos=None,
 98 |                 key_padding_mask=None,
 99 |                 reference_points=None,
100 |                 spatial_shapes=None,
101 |                 level_start_index=None,
102 |                 flag='decoder',
103 |                 **kwargs):
104 | 
105 |         if value is None:
106 |             value = query
107 | 
108 |         if identity is None:
109 |             identity = query
110 |         if query_pos is not None:
111 |             query = query + query_pos
112 |         if not self.batch_first:
113 |             # change to (bs, num_query ,embed_dims)
114 |             query = query.permute(1, 0, 2)
115 |             value = value.permute(1, 0, 2)
116 | 
117 |         bs, num_query, _ = query.shape
118 |         bs, num_value, _ = value.shape
119 |         assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
120 | 
121 |         value = self.value_proj(value)
122 |         if key_padding_mask is not None:
123 |             value = value.masked_fill(key_padding_mask[..., None], 0.0)
124 |         value = value.view(bs, num_value, self.num_heads, -1)
125 | 
126 |         sampling_offsets = self.sampling_offsets(query).view(
127 |             bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
128 |         attention_weights = self.attention_weights(query).view(
129 |             bs, num_query, self.num_heads, self.num_levels * self.num_points)
130 |         attention_weights = attention_weights.softmax(-1)
131 | 
132 |         attention_weights = attention_weights.view(bs, num_query,
133 |                                                    self.num_heads,
134 |                                                    self.num_levels,
135 |                                                    self.num_points)
136 |         if reference_points.shape[-1] == 2:
137 |             reference_points = reference_points.permute(0, 1, 3, 2, 4)
138 |             offset_normalizer = torch.stack(
139 |                 [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
140 |             sampling_locations = reference_points[:, :, :, :, None, :] \
141 |                 + sampling_offsets \
142 |                 / offset_normalizer[None, None, None, :, None, :]
143 |         else:
144 |             raise ValueError(
145 |                 f'Last dim of reference_points must be'
146 |                 f' 2, but get {reference_points.shape[-1]} instead.')
147 |         if torch.cuda.is_available() and value.is_cuda:
148 | 
149 |             # using fp16 deformable attention is unstable because it performs many sum operations
150 |             if value.dtype == torch.float16:
151 |                 MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
152 |             else:
153 |                 MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
154 |             output = MultiScaleDeformableAttnFunction.apply(
155 |                 value, spatial_shapes, level_start_index, sampling_locations,
156 |                 attention_weights, self.im2col_step)
157 |         else:
158 |             output = multi_scale_deformable_attn_pytorch(
159 |                 value, spatial_shapes, sampling_locations, attention_weights)
160 | 
161 |         output = self.output_proj(output)
162 | 
163 |         if not self.batch_first:
164 |             # (num_query, bs ,embed_dims)
165 |             output = output.permute(1, 0, 2)
166 | 
167 |         return self.dropout(output) + identity
168 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/modules/laneseg_decoder.py:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------#
  2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
  3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
  4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
  5 | #---------------------------------------------------------------------------------------#
  6 | 
  7 | import torch
  8 | from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER_SEQUENCE, TRANSFORMER_LAYER
  9 | from mmcv.cnn.bricks.transformer import TransformerLayerSequence, BaseTransformerLayer
 10 | from mmdet.models.utils.transformer import inverse_sigmoid
 11 | 
 12 | 
 13 | @TRANSFORMER_LAYER_SEQUENCE.register_module()
 14 | class LaneSegNetDecoder(TransformerLayerSequence):
 15 | 
 16 |     def __init__(self, 
 17 |                  *args, 
 18 |                  return_intermediate=False, 
 19 |                  pc_range=None, 
 20 |                  sample_idx=[0, 3, 6, 9], # num_ref_pts = len(sample_idx) * 2
 21 |                  pts_dim=3, 
 22 |                  **kwargs):
 23 |         super(LaneSegNetDecoder, self).__init__(*args, **kwargs)
 24 |         self.return_intermediate = return_intermediate
 25 |         self.fp16_enabled = False
 26 |         self.pc_range = pc_range
 27 |         self.sample_idx = sample_idx
 28 |         self.pts_dim = pts_dim
 29 | 
 30 |     def forward(self,
 31 |                 query,
 32 |                 *args,
 33 |                 reference_points=None,
 34 |                 reg_branches=None,
 35 |                 key_padding_mask=None,
 36 |                 **kwargs):
 37 | 
 38 |         output = query
 39 |         intermediate = []
 40 |         intermediate_reference_points = []
 41 |         lane_ref_points = reference_points[:, :, self.sample_idx * 2, :]
 42 |         for lid, layer in enumerate(self.layers):
 43 |             # BS NUM_QUERY NUM_LEVEL NUM_REFPTS 3
 44 |             reference_points_input = lane_ref_points[..., :2].unsqueeze(2)
 45 |             output = layer(
 46 |                 output,
 47 |                 *args,
 48 |                 reference_points=reference_points_input,
 49 |                 key_padding_mask=key_padding_mask,
 50 |                 **kwargs)
 51 |             output = output.permute(1, 0, 2)
 52 | 
 53 |             if reg_branches is not None:
 54 |                 reg_center = reg_branches[0]
 55 |                 reg_offset = reg_branches[1]
 56 | 
 57 |                 tmp = reg_center[lid](output)
 58 |                 bs, num_query, _ = tmp.shape
 59 |                 tmp = tmp.view(bs, num_query, -1, self.pts_dim)
 60 | 
 61 |                 assert reference_points.shape[-1] == self.pts_dim
 62 | 
 63 |                 tmp = tmp + inverse_sigmoid(reference_points)
 64 |                 tmp = tmp.sigmoid()
 65 |                 reference_points = tmp.detach()
 66 | 
 67 |                 coord = tmp.clone()
 68 |                 coord[..., 0] = coord[..., 0] * (self.pc_range[3] - self.pc_range[0]) + self.pc_range[0]
 69 |                 coord[..., 1] = coord[..., 1] * (self.pc_range[4] - self.pc_range[1]) + self.pc_range[1]
 70 |                 if self.pts_dim == 3:
 71 |                     coord[..., 2] = coord[..., 2] * (self.pc_range[5] - self.pc_range[2]) + self.pc_range[2]
 72 |                 centerline = coord.view(bs, num_query, -1).contiguous()
 73 | 
 74 |                 offset = reg_offset[lid](output)
 75 |                 left_laneline = centerline + offset
 76 |                 right_laneline = centerline - offset
 77 |                 left_laneline = left_laneline.view(bs, num_query, -1, self.pts_dim)[:, :, self.sample_idx, :]
 78 |                 right_laneline = right_laneline.view(bs, num_query, -1, self.pts_dim)[:, :, self.sample_idx, :]
 79 | 
 80 |                 lane_ref_points = torch.cat([left_laneline, right_laneline], axis=-2).contiguous().detach()
 81 | 
 82 |                 lane_ref_points[..., 0] = (lane_ref_points[..., 0] - self.pc_range[0]) / (self.pc_range[3] - self.pc_range[0])
 83 |                 lane_ref_points[..., 1] = (lane_ref_points[..., 1] - self.pc_range[1]) / (self.pc_range[4] - self.pc_range[1])
 84 |                 if self.pts_dim == 3:
 85 |                     lane_ref_points[..., 2] = (lane_ref_points[..., 2] - self.pc_range[2]) / (self.pc_range[5] - self.pc_range[2])
 86 | 
 87 |             output = output.permute(1, 0, 2)
 88 |             if self.return_intermediate:
 89 |                 intermediate.append(output)
 90 |                 intermediate_reference_points.append(reference_points)
 91 | 
 92 |         if self.return_intermediate:
 93 |             return torch.stack(intermediate), torch.stack(
 94 |                 intermediate_reference_points)
 95 | 
 96 |         return output, reference_points
 97 | 
 98 | 
 99 | @TRANSFORMER_LAYER.register_module()
100 | class CustomDetrTransformerDecoderLayer(BaseTransformerLayer):
101 | 
102 |     def __init__(self,
103 |                  attn_cfgs,
104 |                  ffn_cfgs,
105 |                  operation_order=None,
106 |                  norm_cfg=dict(type='LN'),
107 |                  **kwargs):
108 |         super(CustomDetrTransformerDecoderLayer, self).__init__(
109 |             attn_cfgs=attn_cfgs,
110 |             ffn_cfgs=ffn_cfgs,
111 |             operation_order=operation_order,
112 |             norm_cfg=norm_cfg,
113 |             **kwargs)
114 |         assert len(operation_order) == 6
115 |         assert set(operation_order) == set(
116 |             ['self_attn', 'norm', 'cross_attn', 'ffn'])
117 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/models/modules/laneseg_transformer.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | import torch.nn as nn
10 | from mmcv.cnn import xavier_init
11 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence
12 | from mmcv.runner import auto_fp16, force_fp32
13 | from mmcv.runner.base_module import BaseModule
14 | from mmdet.models.utils.builder import TRANSFORMER
15 | from .lane_attention import LaneAttention
16 | 
17 | 
18 | @TRANSFORMER.register_module()
19 | class LaneSegNetTransformer(BaseModule):
20 | 
21 |     def __init__(self,
22 |                  decoder=None,
23 |                  embed_dims=256,
24 |                  points_num=1,
25 |                  pts_dim=3,
26 |                  **kwargs):
27 |         super(LaneSegNetTransformer, self).__init__(**kwargs)
28 |         self.decoder = build_transformer_layer_sequence(decoder)
29 |         self.embed_dims = embed_dims
30 |         self.points_num = points_num
31 |         self.pts_dim = pts_dim
32 |         self.fp16_enabled = False
33 |         self.init_layers()
34 | 
35 |     def init_layers(self):
36 |         self.reference_points = nn.Linear(self.embed_dims, self.pts_dim)
37 | 
38 |     def init_weights(self):
39 |         for p in self.parameters():
40 |             if p.dim() > 1:
41 |                 nn.init.xavier_uniform_(p)
42 |         for m in self.modules():
43 |             if isinstance(m, LaneAttention):
44 |                 m.init_weights()
45 |         xavier_init(self.reference_points, distribution='uniform', bias=0.)
46 | 
47 | 
48 |     @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'object_query_embed', 'prev_bev', 'bev_pos'))
49 |     def forward(self,
50 |                 mlvl_feats,
51 |                 bev_embed,
52 |                 object_query_embed,
53 |                 bev_h,
54 |                 bev_w,
55 |                 reg_branches=None,
56 |                 cls_branches=None,
57 |                 **kwargs):
58 | 
59 |         bs = mlvl_feats[0].size(0)
60 |         query_pos, query = torch.split(
61 |             object_query_embed, self.embed_dims, dim=1)
62 |         query_pos = query_pos.unsqueeze(0).expand(bs, -1, -1)
63 |         query = query.unsqueeze(0).expand(bs, -1, -1)
64 |         reference_points = self.reference_points(query_pos)
65 | 
66 |         # ident init: repeat reference points to num points
67 |         reference_points = reference_points.repeat(1, 1, self.points_num)
68 |         reference_points = reference_points.sigmoid()
69 |         bs, num_qeury, _ = reference_points.shape
70 |         reference_points = reference_points.view(bs, num_qeury, self.points_num, self.pts_dim)
71 | 
72 |         init_reference_out = reference_points
73 | 
74 |         query = query.permute(1, 0, 2)
75 |         query_pos = query_pos.permute(1, 0, 2)
76 |         bev_embed = bev_embed.permute(1, 0, 2)
77 |         inter_states, inter_references = self.decoder(
78 |             query=query,
79 |             key=None,
80 |             value=bev_embed,
81 |             query_pos=query_pos,
82 |             reference_points=reference_points,
83 |             reg_branches=reg_branches,
84 |             cls_branches=cls_branches,
85 |             spatial_shapes=torch.tensor([[bev_h, bev_w]], device=query.device),
86 |             level_start_index=torch.tensor([0], device=query.device),
87 |             **kwargs)
88 | 
89 |         inter_references_out = inter_references
90 | 
91 |         return inter_states, init_reference_out, inter_references_out
92 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/thirdparty/__init__.py:
--------------------------------------------------------------------------------
1 | from .maptr_head import MapTRHead
2 | from .maptr_transformer import MapTRTransformerDecoderOnly
3 | from .maptr_decoder import MapTRDecoder
4 | from .maptr_assigner import MapTRAssigner
5 | from .map_loss import PtsDirCosLoss
6 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/thirdparty/map_loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import functools
  3 | import torch
  4 | from torch import nn as nn
  5 | import mmcv
  6 | from mmdet.models.builder import LOSSES
  7 | 
  8 | 
  9 | @mmcv.jit(derivate=True, coderize=True)
 10 | def custom_weight_dir_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
 11 |     """Apply element-wise weight and reduce loss.
 12 | 
 13 |     Args:
 14 |         loss (Tensor): num_sample, num_dir
 15 |         weight (Tensor): Element-wise weights.
 16 |         reduction (str): Same as built-in losses of PyTorch.
 17 |         avg_factor (float): Average factor when computing the mean of losses.
 18 | 
 19 |     Returns:
 20 |         Tensor: Processed loss values.
 21 |     """
 22 |     # if weight is specified, apply element-wise weight
 23 |     if weight is not None:
 24 |         loss = loss * weight
 25 | 
 26 |     # if avg_factor is not specified, just reduce the loss
 27 |     if avg_factor is None:
 28 |         raise ValueError('avg_factor should not be none for OrderedPtsL1Loss')
 29 |         # loss = reduce_loss(loss, reduction)
 30 |     else:
 31 |         # if reduction is mean, then average the loss by avg_factor
 32 |         if reduction == 'mean':
 33 |             loss = loss.sum()
 34 |             loss = loss / avg_factor
 35 |         # if reduction is 'none', then do nothing, otherwise raise an error
 36 |         elif reduction != 'none':
 37 |             raise ValueError('avg_factor can not be used with reduction="sum"')
 38 |     return loss
 39 | 
 40 | def custom_weighted_dir_loss(loss_func):
 41 |     """Create a weighted version of a given loss function.
 42 | 
 43 |     To use this decorator, the loss function must have the signature like
 44 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
 45 |     element-wise loss without any reduction. This decorator will add weight
 46 |     and reduction arguments to the function. The decorated function will have
 47 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
 48 |     avg_factor=None, **kwargs)`.
 49 | 
 50 |     :Example:
 51 | 
 52 |     >>> import torch
 53 |     >>> @weighted_loss
 54 |     >>> def l1_loss(pred, target):
 55 |     >>>     return (pred - target).abs()
 56 | 
 57 |     >>> pred = torch.Tensor([0, 2, 3])
 58 |     >>> target = torch.Tensor([1, 1, 1])
 59 |     >>> weight = torch.Tensor([1, 0, 1])
 60 | 
 61 |     >>> l1_loss(pred, target)
 62 |     tensor(1.3333)
 63 |     >>> l1_loss(pred, target, weight)
 64 |     tensor(1.)
 65 |     >>> l1_loss(pred, target, reduction='none')
 66 |     tensor([1., 1., 2.])
 67 |     >>> l1_loss(pred, target, weight, avg_factor=2)
 68 |     tensor(1.5000)
 69 |     """
 70 | 
 71 |     @functools.wraps(loss_func)
 72 |     def wrapper(pred,
 73 |                 target,
 74 |                 weight=None,
 75 |                 reduction='mean',
 76 |                 avg_factor=None,
 77 |                 **kwargs):
 78 |         # get element-wise loss
 79 |         loss = loss_func(pred, target, **kwargs)
 80 |         loss = custom_weight_dir_reduce_loss(loss, weight, reduction, avg_factor)
 81 |         return loss
 82 | 
 83 |     return wrapper
 84 | 
 85 | @mmcv.jit(derivate=True, coderize=True)
 86 | @custom_weighted_dir_loss
 87 | def pts_dir_cos_loss(pred, target):
 88 |     """ Dir cosine similiarity loss
 89 |     pred (torch.Tensor): shape [num_samples, num_dir, num_coords]
 90 |     target (torch.Tensor): shape [num_samples, num_dir, num_coords]
 91 | 
 92 |     """
 93 |     if target.numel() == 0:
 94 |         return pred.sum() * 0
 95 |     # import pdb;pdb.set_trace()
 96 |     num_samples, num_dir, num_coords = pred.shape
 97 |     loss_func = torch.nn.CosineEmbeddingLoss(reduction='none')
 98 |     tgt_param = target.new_ones((num_samples, num_dir))
 99 |     tgt_param = tgt_param.flatten(0)
100 |     loss = loss_func(pred.flatten(0,1), target.flatten(0,1), tgt_param)
101 |     loss = loss.view(num_samples, num_dir)
102 |     return loss
103 | 
104 | 
105 | @LOSSES.register_module()
106 | class PtsDirCosLoss(nn.Module):
107 |     """L1 loss.
108 | 
109 |     Args:
110 |         reduction (str, optional): The method to reduce the loss.
111 |             Options are "none", "mean" and "sum".
112 |         loss_weight (float, optional): The weight of loss.
113 |     """
114 | 
115 |     def __init__(self, reduction='mean', loss_weight=1.0):
116 |         super(PtsDirCosLoss, self).__init__()
117 |         self.reduction = reduction
118 |         self.loss_weight = loss_weight
119 | 
120 |     def forward(self,
121 |                 pred,
122 |                 target,
123 |                 weight=None,
124 |                 avg_factor=None,
125 |                 reduction_override=None):
126 |         """Forward function.
127 | 
128 |         Args:
129 |             pred (torch.Tensor): The prediction.
130 |             target (torch.Tensor): The learning target of the prediction.
131 |             weight (torch.Tensor, optional): The weight of loss for each
132 |                 prediction. Defaults to None.
133 |             avg_factor (int, optional): Average factor that is used to average
134 |                 the loss. Defaults to None.
135 |             reduction_override (str, optional): The reduction method used to
136 |                 override the original reduction method of the loss.
137 |                 Defaults to None.
138 |         """
139 |         assert reduction_override in (None, 'none', 'mean', 'sum')
140 |         reduction = (
141 |             reduction_override if reduction_override else self.reduction)
142 | 
143 |         loss_dir = self.loss_weight * pts_dir_cos_loss(
144 |             pred, target, weight, reduction=reduction, avg_factor=avg_factor)
145 |         return loss_dir
146 | 
147 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/thirdparty/maptr_assigner.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS
 3 | from mmdet.core.bbox.assigners import AssignResult
 4 | from mmdet.core.bbox.assigners import BaseAssigner
 5 | from mmdet.core.bbox.match_costs import build_match_cost
 6 | import torch.nn.functional as F
 7 | 
 8 | from ..core.lane.util import normalize_3dlane
 9 | 
10 | try:
11 |     from scipy.optimize import linear_sum_assignment
12 | except ImportError:
13 |     linear_sum_assignment = None
14 | 
15 | 
16 | @BBOX_ASSIGNERS.register_module()
17 | class MapTRAssigner(BaseAssigner):
18 | 
19 |     def __init__(self,
20 |                  cls_cost=dict(type='ClassificationCost', weight=1.),
21 |                  reg_cost=dict(type='BBoxL1Cost', weight=1.0),
22 |                  iou_cost=dict(type='IoUCost', weight=0.0),
23 |                  normalize_gt=False,
24 |                  pc_range=None):
25 |         self.cls_cost = build_match_cost(cls_cost)
26 |         self.reg_cost = build_match_cost(reg_cost)
27 |         self.iou_cost = build_match_cost(iou_cost)
28 |         self.pc_range = pc_range
29 |         self.normalize_gt = normalize_gt
30 | 
31 |     def assign(self,
32 |                lanes_pred,
33 |                cls_pred,
34 |                gt_lanes,
35 |                gt_labels,
36 |                gt_bboxes_ignore=None,
37 |                eps=1e-7):
38 | 
39 |         assert gt_bboxes_ignore is None, \
40 |             'Only case when gt_bboxes_ignore is None is supported.'
41 |         num_gts, num_bboxes = gt_lanes.size(0), lanes_pred.size(0)
42 |         # 1. assign -1 by default
43 |         assigned_gt_inds = lanes_pred.new_full((num_bboxes, ),
44 |                                               -1,
45 |                                               dtype=torch.long)
46 |         assigned_labels = lanes_pred.new_full((num_bboxes, ),
47 |                                              -1,
48 |                                              dtype=torch.long)
49 |         if num_gts == 0 or num_bboxes == 0:
50 |             # No ground truth or boxes, return empty assignment
51 |             if num_gts == 0:
52 |                 # No ground truth, assign all to background
53 |                 assigned_gt_inds[:] = 0
54 |             return AssignResult(
55 |                 num_gts, assigned_gt_inds, None, labels=assigned_labels)
56 | 
57 |         # 2. compute the weighted costs
58 |         # classification and bboxcost.
59 |         cls_cost = self.cls_cost(cls_pred, gt_labels)
60 |         
61 |         # regression L1 cost
62 |         assert gt_lanes.size(-1) == lanes_pred.size(-1)
63 | 
64 | 
65 |         num_orders = gt_lanes.size(1)
66 |         gt_lanes = gt_lanes.view(num_gts * num_orders, -1)
67 | 
68 |         reg_cost_ordered = self.reg_cost(lanes_pred.view(num_bboxes, -1), gt_lanes)
69 |         reg_cost_ordered = reg_cost_ordered.view(num_bboxes, num_gts, num_orders)
70 |         reg_cost, order_index = torch.min(reg_cost_ordered, 2)
71 | 
72 |         cost = cls_cost + reg_cost
73 | 
74 |         # 3. do Hungarian matching on CPU using linear_sum_assignment
75 |         cost = cost.detach().cpu()
76 |         if linear_sum_assignment is None:
77 |             raise ImportError('Please run "pip install scipy" '
78 |                               'to install scipy first.')
79 |         matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
80 |         matched_row_inds = torch.from_numpy(matched_row_inds).to(
81 |             lanes_pred.device)
82 |         matched_col_inds = torch.from_numpy(matched_col_inds).to(
83 |             lanes_pred.device)
84 | 
85 |         # 4. assign backgrounds and foregrounds
86 |         # assign all indices to backgrounds first
87 |         assigned_gt_inds[:] = 0
88 |         # assign foregrounds based on matching results
89 |         assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
90 |         assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
91 | 
92 |         return AssignResult(
93 |             num_gts, assigned_gt_inds, None, labels=assigned_labels), order_index
94 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/thirdparty/maptr_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER_SEQUENCE
 3 | from mmcv.cnn.bricks.transformer import TransformerLayerSequence
 4 | from mmdet.models.utils.transformer import inverse_sigmoid
 5 | 
 6 | 
 7 | @TRANSFORMER_LAYER_SEQUENCE.register_module()
 8 | class MapTRDecoder(TransformerLayerSequence):
 9 |     """Implements the decoder in DETR3D transformer.
10 |     Args:
11 |         return_intermediate (bool): Whether to return intermediate outputs.
12 |         coder_norm_cfg (dict): Config of last normalization layer. Default：
13 |             `LN`.
14 |     """
15 | 
16 |     def __init__(self, 
17 |                  *args, 
18 |                  return_intermediate=False, 
19 |                  pts_dim=3,
20 |                  **kwargs):
21 |         super(MapTRDecoder, self).__init__(*args, **kwargs)
22 |         self.return_intermediate = return_intermediate
23 |         self.fp16_enabled = False
24 |         self.pts_dim = pts_dim
25 | 
26 |     def forward(self,
27 |                 query,
28 |                 *args,
29 |                 reference_points=None,
30 |                 reg_branches=None,
31 |                 key_padding_mask=None,
32 |                 **kwargs):
33 | 
34 |         output = query
35 |         intermediate = []
36 |         intermediate_reference_points = []
37 |         for lid, layer in enumerate(self.layers):
38 |             # BS NUM_QUERY NUM_LEVEL 2/3
39 |             reference_points_input = reference_points[..., :2].unsqueeze(
40 |                 2) 
41 |             output = layer(
42 |                 output,
43 |                 *args,
44 |                 reference_points=reference_points_input,
45 |                 key_padding_mask=key_padding_mask,
46 |                 **kwargs)
47 |             output = output.permute(1, 0, 2)
48 | 
49 |             if reg_branches is not None:
50 |                 tmp = reg_branches[lid](output)
51 | 
52 |                 assert reference_points.shape[-1] == self.pts_dim
53 | 
54 |                 new_reference_points = torch.zeros_like(reference_points)
55 |                 new_reference_points[..., :self.pts_dim] = tmp[
56 |                     ..., :self.pts_dim] + inverse_sigmoid(reference_points[..., :self.pts_dim])
57 |                 new_reference_points = new_reference_points.sigmoid()
58 | 
59 |                 reference_points = new_reference_points.detach()
60 | 
61 |             output = output.permute(1, 0, 2)
62 |             if self.return_intermediate:
63 |                 intermediate.append(output)
64 |                 intermediate_reference_points.append(reference_points)
65 | 
66 |         if self.return_intermediate:
67 |             return torch.stack(intermediate), torch.stack(
68 |                 intermediate_reference_points)
69 | 
70 |         return output, reference_points
71 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/thirdparty/maptr_transformer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | from mmcv.cnn import xavier_init
 5 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence
 6 | from mmcv.runner import auto_fp16, force_fp32
 7 | from mmcv.runner.base_module import BaseModule
 8 | from mmdet.models.utils.builder import TRANSFORMER
 9 | from projects.bevformer.modules.decoder import CustomMSDeformableAttention
10 | 
11 | @TRANSFORMER.register_module()
12 | class MapTRTransformerDecoderOnly(BaseModule):
13 | 
14 |     def __init__(self,
15 |                  decoder=None,
16 |                  embed_dims=256,
17 |                  pts_dim=3,
18 |                  **kwargs):
19 |         super(MapTRTransformerDecoderOnly, self).__init__(**kwargs)
20 |         self.decoder = build_transformer_layer_sequence(decoder)
21 |         self.embed_dims = embed_dims
22 |         self.pts_dim = pts_dim
23 |         self.fp16_enabled = False
24 |         self.init_layers()
25 | 
26 |     def init_layers(self):
27 |         self.reference_points = nn.Linear(self.embed_dims, self.pts_dim)
28 | 
29 |     def init_weights(self):
30 |         for p in self.parameters():
31 |             if p.dim() > 1:
32 |                 nn.init.xavier_uniform_(p)
33 |         for m in self.modules():
34 |             if isinstance(m, CustomMSDeformableAttention):
35 |                 m.init_weights()
36 |         xavier_init(self.reference_points, distribution='uniform', bias=0.)
37 | 
38 | 
39 |     @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'object_query_embed', 'prev_bev', 'bev_pos'))
40 |     def forward(self,
41 |                 mlvl_feats,
42 |                 bev_embed,
43 |                 object_query_embed,
44 |                 bev_h,
45 |                 bev_w,
46 |                 reg_branches=None,
47 |                 cls_branches=None,
48 |                 **kwargs):
49 | 
50 |         bs = mlvl_feats[0].size(0)
51 |         query_pos, query = torch.split(
52 |             object_query_embed, self.embed_dims, dim=1)
53 |         query_pos = query_pos.unsqueeze(0).expand(bs, -1, -1)
54 |         query = query.unsqueeze(0).expand(bs, -1, -1)
55 |         reference_points = self.reference_points(query_pos)
56 | 
57 |         reference_points = reference_points.sigmoid()
58 |         init_reference_out = reference_points
59 | 
60 |         query = query.permute(1, 0, 2)
61 |         query_pos = query_pos.permute(1, 0, 2)
62 |         bev_embed = bev_embed.permute(1, 0, 2)
63 |         inter_states, inter_references = self.decoder(
64 |             query=query,
65 |             key=None,
66 |             value=bev_embed,
67 |             query_pos=query_pos,
68 |             reference_points=reference_points,
69 |             reg_branches=reg_branches,
70 |             cls_branches=cls_branches,
71 |             spatial_shapes=torch.tensor([[bev_h, bev_w]], device=query.device),
72 |             level_start_index=torch.tensor([0], device=query.device),
73 |             **kwargs)
74 | 
75 |         inter_references_out = inter_references
76 | 
77 |         return inter_states, init_reference_out, inter_references_out
78 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_bev_constructor
2 | 


--------------------------------------------------------------------------------
/projects/lanesegnet/utils/builder.py:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------------------------------------------#
 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving          #
 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet                               #
 4 | # Copyright (c) OpenDriveLab. All rights reserved.                                      #
 5 | #---------------------------------------------------------------------------------------#
 6 | 
 7 | import torch.nn as nn
 8 | from mmcv.utils import Registry, build_from_cfg
 9 | 
10 | BEV_CONSTRUCTOR = Registry('BEV Constructor')
11 | 
12 | def build_bev_constructor(cfg, default_args=None):
13 |     """Builder for BEV Constructor."""
14 |     return build_from_cfg(cfg, BEV_CONSTRUCTOR, default_args)
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | similaritymeasures==0.6.0
2 | numpy==1.22.4
3 | scipy==1.8.0
4 | ortools==9.2.9972
5 | setuptools==59.5.0
6 | openlanev2==2.1.0
7 | 


--------------------------------------------------------------------------------
/tools/data_process.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tqdm import tqdm
 3 | from shapely.geometry import LineString
 4 | from openlanev2.lanesegment.io import io
 5 | 
 6 | """
 7 | This script is used to collect the data from the original OpenLane-V2 dataset.
 8 | The results will be saved in OpenLane-V2 folder.
 9 | The main difference between this script and the original one is that we don't interpolate the points for ped crossing and road bouadary.
10 | """
11 | 
12 | def _fix_pts_interpolate(curve, n_points):
13 |     ls = LineString(curve)
14 |     distances = np.linspace(0, ls.length, n_points)
15 |     curve = np.array([ls.interpolate(distance).coords[0] for distance in distances], dtype=np.float32)
16 |     return curve
17 | 
18 | def collect(root_path : str, data_dict : dict, collection : str, n_points : dict) -> None:
19 | 
20 |     data_list = [(split, segment_id, timestamp.split('.')[0]) \
21 |         for split, segment_ids in data_dict.items() \
22 |             for segment_id, timestamps in segment_ids.items() \
23 |                 for timestamp in timestamps
24 |     ]
25 |     meta = {}
26 |     for split, segment_id, timestamp in tqdm(data_list, desc=f'collecting {collection}', ncols=100):
27 |         identifier = (split, segment_id, timestamp)
28 |         frame = io.json_load(f'{root_path}/{split}/{segment_id}/info/{timestamp}-ls.json')
29 | 
30 |         for k, v in frame['pose'].items():
31 |             frame['pose'][k] = np.array(v, dtype=np.float64)
32 |         for camera in frame['sensor'].keys():
33 |             for para in ['intrinsic', 'extrinsic']:
34 |                 for k, v in frame['sensor'][camera][para].items():
35 |                     frame['sensor'][camera][para][k] = np.array(v, dtype=np.float64)
36 | 
37 |         if 'annotation' not in frame:
38 |             meta[identifier] = frame
39 |             continue
40 | 
41 |         # NOTE: We don't interpolate the points for ped crossing and road bouadary.
42 |         for i, area in enumerate(frame['annotation']['area']):
43 |             frame['annotation']['area'][i]['points'] = np.array(area['points'], dtype=np.float32)
44 |         for i, lane_segment in enumerate(frame['annotation']['lane_segment']):
45 |             frame['annotation']['lane_segment'][i]['centerline'] = _fix_pts_interpolate(np.array(lane_segment['centerline']), n_points['centerline'])
46 |             frame['annotation']['lane_segment'][i]['left_laneline'] = _fix_pts_interpolate(np.array(lane_segment['left_laneline']), n_points['left_laneline'])
47 |             frame['annotation']['lane_segment'][i]['right_laneline'] = _fix_pts_interpolate(np.array(lane_segment['right_laneline']), n_points['right_laneline'])
48 |         for i, traffic_element in enumerate(frame['annotation']['traffic_element']):
49 |             frame['annotation']['traffic_element'][i]['points'] = np.array(traffic_element['points'], dtype=np.float32)
50 |         frame['annotation']['topology_lsls'] = np.array(frame['annotation']['topology_lsls'], dtype=np.int8)
51 |         frame['annotation']['topology_lste'] = np.array(frame['annotation']['topology_lste'], dtype=np.int8)
52 |         meta[identifier] = frame
53 | 
54 |     io.pickle_dump(f'{root_path}/{collection}.pkl', meta)
55 | 
56 | if __name__ == '__main__':
57 |     root_path = 'data/OpenLane-V2'
58 |     file = f'{root_path}/data_dict_subset_A.json'
59 |     subset = 'data_dict_subset_A'
60 |     for split, segments in io.json_load(file).items():
61 |         collect(
62 |             root_path,
63 |             {split: segments},
64 |             f'{subset}_{split}_lanesegnet',
65 |             n_points={
66 |                 'centerline': 10,
67 |                 'left_laneline': 10,
68 |                 'right_laneline': 10
69 |             },
70 |         )
71 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | timestamp=`date +"%y%m%d.%H%M%S"`
 5 | 
 6 | WORK_DIR=work_dirs/lanesegnet
 7 | CONFIG=projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py
 8 | 
 9 | CHECKPOINT=${WORK_DIR}/latest.pth
10 | 
11 | GPUS=$1
12 | PORT=${PORT:-28510}
13 | 
14 | python -m torch.distributed.run --nproc_per_node=$GPUS --master_port=$PORT \
15 |     tools/test.py $CONFIG $CHECKPOINT --launcher pytorch \
16 |     --out-dir ${WORK_DIR}/test --eval openlane_v2 ${@:2} \
17 |     2>&1 | tee ${WORK_DIR}/test.${timestamp}.log
18 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | timestamp=`date +"%y%m%d.%H%M%S"`
 5 | 
 6 | WORK_DIR=work_dirs/lanesegnet
 7 | CONFIG=projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py
 8 | 
 9 | GPUS=$1
10 | PORT=${PORT:-28510}
11 | 
12 | python -m torch.distributed.run --nproc_per_node=$GPUS --master_port=$PORT \
13 |     tools/train.py $CONFIG --launcher pytorch --work-dir ${WORK_DIR} ${@:2} \
14 |     2>&1 | tee ${WORK_DIR}/train.${timestamp}.log
15 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------
  2 | # Copyright (c) OpenMMLab. All rights reserved.
  3 | # ---------------------------------------------
  4 | #  Modified by Tianyu Li
  5 | # ---------------------------------------------
  6 | import argparse
  7 | import os
  8 | import os.path as osp
  9 | import time
 10 | import warnings
 11 | 
 12 | import torch
 13 | import mmcv
 14 | from mmcv import Config, DictAction
 15 | from mmcv.cnn import fuse_conv_bn
 16 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 17 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
 18 |                          wrap_fp16_model)
 19 | from mmcv.utils import get_logger
 20 | from mmdet.apis import multi_gpu_test, set_random_seed
 21 | from mmdet.datasets import replace_ImageToTensor
 22 | from mmdet3d.apis import single_gpu_test
 23 | from mmdet3d.datasets import build_dataloader, build_dataset
 24 | from mmdet3d.models import build_model
 25 | 
 26 | try:
 27 |     # If mmdet version > 2.20.0, setup_multi_processes would be imported and
 28 |     # used from mmdet instead of mmdet3d.
 29 |     from mmdet.utils import setup_multi_processes
 30 | except ImportError:
 31 |     from mmdet3d.utils import setup_multi_processes
 32 | 
 33 | def parse_args():
 34 |     parser = argparse.ArgumentParser(
 35 |         description='MMDet test (and eval) a model')
 36 |     parser.add_argument('config', help='test config file path')
 37 |     parser.add_argument('checkpoint', help='checkpoint file')
 38 |     parser.add_argument('--out',
 39 |         action='store_true',
 40 |         help='output model output file in pickle format')
 41 |     parser.add_argument('--input',
 42 |         action='store_true'
 43 |     )
 44 |     parser.add_argument(
 45 |         '--fuse-conv-bn',
 46 |         action='store_true',
 47 |         help='Whether to fuse conv and bn, this will slightly increase'
 48 |         'the inference speed')
 49 |     parser.add_argument(
 50 |         '--format-only',
 51 |         action='store_true',
 52 |         help='Format the output results without perform evaluation. It is'
 53 |         'useful when you want to format the result to a specific format and '
 54 |         'submit it to the test server')
 55 |     parser.add_argument(
 56 |         '--eval',
 57 |         type=str,
 58 |         nargs='+',
 59 |         help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
 60 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
 61 |     parser.add_argument('--show', action='store_true', help='show results')
 62 |     parser.add_argument(
 63 |         '--out-dir', help='directory where results will be saved')
 64 |     parser.add_argument(
 65 |         '--gpu-collect',
 66 |         action='store_true',
 67 |         help='whether to use gpu to collect results.')
 68 |     parser.add_argument('--seed', type=int, default=0, help='random seed')
 69 |     parser.add_argument(
 70 |         '--deterministic',
 71 |         action='store_true',
 72 |         help='whether to set deterministic options for CUDNN backend.')
 73 |     parser.add_argument(
 74 |         '--cfg-options',
 75 |         nargs='+',
 76 |         action=DictAction,
 77 |         help='override some settings in the used config, the key-value pair '
 78 |         'in xxx=yyy format will be merged into config file. If the value to '
 79 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 80 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 81 |         'Note that the quotation marks are necessary and that no white space '
 82 |         'is allowed.')
 83 |     parser.add_argument(
 84 |         '--options',
 85 |         nargs='+',
 86 |         action=DictAction,
 87 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 88 |         'format will be kwargs for dataset.evaluate() function (deprecate), '
 89 |         'change to --eval-options instead.')
 90 |     parser.add_argument(
 91 |         '--eval-options',
 92 |         nargs='+',
 93 |         action=DictAction,
 94 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 95 |         'format will be kwargs for dataset.evaluate() function')
 96 |     parser.add_argument(
 97 |         '--launcher',
 98 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 99 |         default='none',
100 |         help='job launcher')
101 |     parser.add_argument('--local_rank', type=int, default=0)
102 |     args = parser.parse_args()
103 |     if 'LOCAL_RANK' not in os.environ:
104 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
105 | 
106 |     if args.options and args.eval_options:
107 |         raise ValueError(
108 |             '--options and --eval-options cannot be both specified, '
109 |             '--options is deprecated in favor of --eval-options')
110 |     if args.options:
111 |         warnings.warn('--options is deprecated in favor of --eval-options')
112 |         args.eval_options = args.options
113 |     return args
114 | 
115 | 
116 | def main():
117 |     args = parse_args()
118 | 
119 |     assert args.out or args.eval or args.format_only or args.show \
120 |         ('Please specify at least one operation (save/eval/format/show the '
121 |          'results / save the results) with the argument "--out", "--eval"'
122 |          ', "--format-only" or "--show"')
123 | 
124 |     if args.eval and args.format_only:
125 |         print('[WARNING] --eval and --format_only should not be both specified!')
126 |         print('[WARNING] ignored --eval option')
127 |         assert args.out_dir is not None, 'Please specify out_dir when format results'
128 | 
129 |     cfg = Config.fromfile(args.config)
130 |     if args.cfg_options is not None:
131 |         cfg.merge_from_dict(args.cfg_options)
132 | 
133 |     # set multi-process settings
134 |     setup_multi_processes(cfg)
135 | 
136 |     # set cudnn_benchmark
137 |     if cfg.get('cudnn_benchmark', False):
138 |         torch.backends.cudnn.benchmark = True
139 | 
140 |     cfg.model.pretrained = None
141 |     # in case the test dataset is concatenated
142 |     samples_per_gpu = 1
143 |     if isinstance(cfg.data.test, dict):
144 |         cfg.data.test.test_mode = True
145 |         samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
146 |         if samples_per_gpu > 1:
147 |             # Replace 'ImageToTensor' to 'DefaultFormatBundle'
148 |             cfg.data.test.pipeline = replace_ImageToTensor(
149 |                 cfg.data.test.pipeline)
150 |     elif isinstance(cfg.data.test, list):
151 |         for ds_cfg in cfg.data.test:
152 |             ds_cfg.test_mode = True
153 |         samples_per_gpu = max(
154 |             [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
155 |         if samples_per_gpu > 1:
156 |             for ds_cfg in cfg.data.test:
157 |                 ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
158 | 
159 |     # init distributed env first, since logger depends on the dist info.
160 |     if args.launcher == 'none':
161 |         distributed = False
162 |     else:
163 |         distributed = True
164 |         init_dist(args.launcher, **cfg.dist_params)
165 | 
166 |     logger = get_logger(name='mmdet', log_level=cfg.log_level)
167 |     mmcv.mkdir_or_exist(osp.abspath(args.out_dir))
168 |     # set random seeds
169 |     if args.seed is not None:
170 |         set_random_seed(args.seed, deterministic=args.deterministic)
171 | 
172 |     # build the dataloader
173 |     dataset = build_dataset(cfg.data.test)
174 | 
175 |     if args.input:
176 |         logger.info(f'Loading model outputs from results.pkl')
177 |         outputs = mmcv.load(os.path.join(args.out_dir, 'results.pkl'))
178 |     else:
179 |         data_loader = build_dataloader(
180 |             dataset,
181 |             samples_per_gpu=samples_per_gpu,
182 |             workers_per_gpu=cfg.data.workers_per_gpu,
183 |             dist=distributed,
184 |             shuffle=False,
185 |         )
186 | 
187 |         # build the model and load checkpoint
188 |         # cfg.model.train_cfg = None
189 |         model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
190 |         fp16_cfg = cfg.get('fp16', None)
191 |         if fp16_cfg is not None:
192 |             wrap_fp16_model(model)
193 |         checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
194 |         if args.fuse_conv_bn:
195 |             model = fuse_conv_bn(model)
196 |         # old versions did not save class info in checkpoints, this walkaround is
197 |         # for backward compatibility
198 |         if 'CLASSES' in checkpoint.get('meta', {}):
199 |             model.CLASSES = checkpoint['meta']['CLASSES']
200 |         else:
201 |             model.CLASSES = dataset.CLASSES
202 |         # palette for visualization in segmentation tasks
203 |         if 'PALETTE' in checkpoint.get('meta', {}):
204 |             model.PALETTE = checkpoint['meta']['PALETTE']
205 |         elif hasattr(dataset, 'PALETTE'):
206 |             # segmentation dataset has `PALETTE` attribute
207 |             model.PALETTE = dataset.PALETTE
208 | 
209 |         if not distributed:
210 |             model = MMDataParallel(model, device_ids=cfg.gpu_ids)
211 |             outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
212 |         else:
213 |             model = MMDistributedDataParallel(
214 |                 model.cuda(),
215 |                 device_ids=[torch.cuda.current_device()],
216 |                 broadcast_buffers=False)
217 |             outputs = multi_gpu_test(model, data_loader, 
218 |                                      tmpdir=os.path.join(args.out_dir, '.dist_test'), 
219 |                                      gpu_collect=args.gpu_collect)
220 |         print('\n')
221 | 
222 |     rank, _ = get_dist_info()
223 |     if rank == 0:
224 |         if args.out and not args.input:
225 |             logger.info(f'Writing model outputs to results.pkl')
226 |             mmcv.dump(outputs, os.path.join(args.out_dir, 'results.pkl'))
227 | 
228 |         kwargs = {} if args.eval_options is None else args.eval_options
229 |         kwargs['logger'] = logger
230 |         kwargs['show'] = args.show
231 |         kwargs['out_dir'] = args.out_dir
232 | 
233 |         if args.format_only:
234 |             dataset.format_results(outputs, **kwargs)
235 |             return
236 | 
237 |         if args.eval:
238 |             eval_kwargs = cfg.get('evaluation', {}).copy()
239 |             # hard-code way to remove EvalHook args
240 |             for key in [
241 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
242 |                 'rule'
243 |             ]:
244 |                 eval_kwargs.pop(key, None)
245 |             eval_kwargs.update(dict(metric=args.eval, **kwargs))
246 |             eval_kwargs['out_dir'] = os.path.join(args.out_dir, 'vis/')
247 | 
248 |             print(dataset.evaluate(outputs, **eval_kwargs))
249 | 
250 | 
251 | if __name__ == '__main__':
252 |     main()
253 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------
  2 | # Copyright (c) OpenMMLab. All rights reserved.
  3 | # ---------------------------------------------
  4 | #  Modified by Tianyu Li
  5 | # ---------------------------------------------
  6 | from __future__ import division
  7 | import argparse
  8 | import copy
  9 | import os
 10 | import time
 11 | import warnings
 12 | from os import path as osp
 13 | 
 14 | import mmcv
 15 | import torch
 16 | import torch.distributed as dist
 17 | from mmcv import Config, DictAction
 18 | from mmcv.runner import get_dist_info, init_dist
 19 | 
 20 | from mmdet import __version__ as mmdet_version
 21 | from mmdet3d import __version__ as mmdet3d_version
 22 | from mmdet3d.apis import init_random_seed, train_model
 23 | from mmdet3d.datasets import build_dataset
 24 | from mmdet3d.models import build_model
 25 | from mmdet3d.utils import collect_env, get_root_logger
 26 | from mmdet.apis import set_random_seed
 27 | from mmseg import __version__ as mmseg_version
 28 | 
 29 | try:
 30 |     # If mmdet version > 2.20.0, setup_multi_processes would be imported and
 31 |     # used from mmdet instead of mmdet3d.
 32 |     from mmdet.utils import setup_multi_processes
 33 | except ImportError:
 34 |     from mmdet3d.utils import setup_multi_processes
 35 | 
 36 | 
 37 | def parse_args():
 38 |     parser = argparse.ArgumentParser(description='Train a detector')
 39 |     parser.add_argument('config', help='train config file path')
 40 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 41 |     parser.add_argument(
 42 |         '--resume-from', help='the checkpoint file to resume from')
 43 |     parser.add_argument(
 44 |         '--auto-resume',
 45 |         action='store_true',
 46 |         help='resume from the latest checkpoint automatically')
 47 |     parser.add_argument(
 48 |         '--no-validate',
 49 |         action='store_true',
 50 |         help='whether not to evaluate the checkpoint during training')
 51 |     group_gpus = parser.add_mutually_exclusive_group()
 52 |     group_gpus.add_argument(
 53 |         '--gpus',
 54 |         type=int,
 55 |         help='(Deprecated, please use --gpu-id) number of gpus to use '
 56 |         '(only applicable to non-distributed training)')
 57 |     group_gpus.add_argument(
 58 |         '--gpu-ids',
 59 |         type=int,
 60 |         nargs='+',
 61 |         help='(Deprecated, please use --gpu-id) ids of gpus to use '
 62 |         '(only applicable to non-distributed training)')
 63 |     group_gpus.add_argument(
 64 |         '--gpu-id',
 65 |         type=int,
 66 |         default=0,
 67 |         help='number of gpus to use '
 68 |         '(only applicable to non-distributed training)')
 69 |     parser.add_argument('--seed', type=int, default=42, help='random seed')
 70 |     parser.add_argument(
 71 |         '--diff-seed',
 72 |         action='store_true',
 73 |         help='Whether or not set different seeds for different ranks')
 74 |     parser.add_argument(
 75 |         '--deterministic',
 76 |         action='store_true',
 77 |         help='whether to set deterministic options for CUDNN backend.')
 78 |     parser.add_argument(
 79 |         '--options',
 80 |         nargs='+',
 81 |         action=DictAction,
 82 |         help='override some settings in the used config, the key-value pair '
 83 |         'in xxx=yyy format will be merged into config file (deprecate), '
 84 |         'change to --cfg-options instead.')
 85 |     parser.add_argument(
 86 |         '--cfg-options',
 87 |         nargs='+',
 88 |         action=DictAction,
 89 |         help='override some settings in the used config, the key-value pair '
 90 |         'in xxx=yyy format will be merged into config file. If the value to '
 91 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 92 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 93 |         'Note that the quotation marks are necessary and that no white space '
 94 |         'is allowed.')
 95 |     parser.add_argument(
 96 |         '--launcher',
 97 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 98 |         default='none',
 99 |         help='job launcher')
100 |     parser.add_argument('--local_rank', type=int, default=0)
101 |     parser.add_argument(
102 |         '--autoscale-lr',
103 |         action='store_true',
104 |         help='automatically scale lr with the number of gpus')
105 |     args = parser.parse_args()
106 |     if 'LOCAL_RANK' not in os.environ:
107 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
108 | 
109 |     if args.options and args.cfg_options:
110 |         raise ValueError(
111 |             '--options and --cfg-options cannot be both specified, '
112 |             '--options is deprecated in favor of --cfg-options')
113 |     if args.options:
114 |         warnings.warn('--options is deprecated in favor of --cfg-options')
115 |         args.cfg_options = args.options
116 | 
117 |     return args
118 | 
119 | 
120 | def auto_scale_lr(cfg, distributed, logger):
121 |     """Automatically scaling LR according to GPU number and sample per GPU.
122 | 
123 |     Args:
124 |         cfg (config): Training config.
125 |         distributed (bool): Using distributed or not.
126 |         logger (logging.Logger): Logger.
127 |     """
128 |     # Get flag from config
129 |     if ('auto_scale_lr' not in cfg) or \
130 |             (not cfg.auto_scale_lr.get('enable', False)):
131 |         logger.info('Automatic scaling of learning rate (LR)'
132 |                     ' has been disabled.')
133 |         return
134 | 
135 |     # Get base batch size from config
136 |     base_batch_size = cfg.auto_scale_lr.get('base_batch_size', None)
137 |     if base_batch_size is None:
138 |         return
139 | 
140 |     # Get gpu number
141 |     if distributed:
142 |         _, world_size = get_dist_info()
143 |         num_gpus = len(range(world_size))
144 |     else:
145 |         num_gpus = len(cfg.gpu_ids)
146 | 
147 |     # calculate the batch size
148 |     samples_per_gpu = cfg.data.samples_per_gpu
149 |     batch_size = num_gpus * samples_per_gpu
150 |     logger.info(f'Training with {num_gpus} GPU(s) with {samples_per_gpu} '
151 |                 f'samples per GPU. The total batch size is {batch_size}.')
152 | 
153 |     if batch_size != base_batch_size:
154 |         # scale LR with
155 |         # [linear scaling rule](https://arxiv.org/abs/1706.02677)
156 |         scaled_lr = (batch_size / base_batch_size) * cfg.optimizer.lr
157 |         logger.info('LR has been automatically scaled '
158 |                     f'from {cfg.optimizer.lr} to {scaled_lr}')
159 |         cfg.optimizer.lr = scaled_lr
160 |     else:
161 |         logger.info('The batch size match the '
162 |                     f'base batch size: {base_batch_size}, '
163 |                     f'will not scaling the LR ({cfg.optimizer.lr}).')
164 | 
165 | 
166 | def main():
167 |     args = parse_args()
168 | 
169 |     cfg = Config.fromfile(args.config)
170 |     if args.cfg_options is not None:
171 |         cfg.merge_from_dict(args.cfg_options)
172 | 
173 |     # set multi-process settings
174 |     setup_multi_processes(cfg)
175 | 
176 |     # set cudnn_benchmark
177 |     if cfg.get('cudnn_benchmark', False):
178 |         torch.backends.cudnn.benchmark = True
179 | 
180 |     # work_dir is determined in this priority: CLI > segment in file > filename
181 |     if args.work_dir is not None:
182 |         # update configs according to CLI args if args.work_dir is not None
183 |         cfg.work_dir = args.work_dir
184 |     elif cfg.get('work_dir', None) is None:
185 |         # use config filename as default work_dir if cfg.work_dir is None
186 |         cfg.work_dir = osp.join('./work_dirs',
187 |                                 osp.splitext(osp.basename(args.config))[0])
188 |     if args.resume_from is not None:
189 |         cfg.resume_from = args.resume_from
190 | 
191 |     if args.auto_resume:
192 |         cfg.auto_resume = args.auto_resume
193 |         warnings.warn('`--auto-resume` is only supported when mmdet'
194 |                       'version >= 2.20.0 for 3D detection model or'
195 |                       'mmsegmentation version >= 0.21.0 for 3D'
196 |                       'segmentation model')
197 | 
198 |     if args.gpus is not None:
199 |         cfg.gpu_ids = range(1)
200 |         warnings.warn('`--gpus` is deprecated because we only support '
201 |                       'single GPU mode in non-distributed training. '
202 |                       'Use `gpus=1` now.')
203 |     if args.gpu_ids is not None:
204 |         cfg.gpu_ids = args.gpu_ids[0:1]
205 |         warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
206 |                       'Because we only support single GPU mode in '
207 |                       'non-distributed training. Use the first GPU '
208 |                       'in `gpu_ids` now.')
209 |     if args.gpus is None and args.gpu_ids is None:
210 |         cfg.gpu_ids = [args.gpu_id]
211 | 
212 |     if args.autoscale_lr:
213 |         if 'auto_scale_lr' in cfg and \
214 |                 'base_batch_size' in cfg.auto_scale_lr:
215 |             cfg.auto_scale_lr.enable = True
216 |         else:
217 |             warnings.warn('Can not find "auto_scale_lr" or '
218 |                           '"auto_scale_lr.base_batch_size" in your'
219 |                           ' configuration file.')
220 | 
221 |     # init distributed env first, since logger depends on the dist info.
222 |     if args.launcher == 'none':
223 |         distributed = False
224 |     else:
225 |         distributed = True
226 |         init_dist(args.launcher, **cfg.dist_params)
227 |         # re-set gpu_ids with distributed training mode
228 |         _, world_size = get_dist_info()
229 |         cfg.gpu_ids = range(world_size)
230 | 
231 |     # create work_dir
232 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
233 |     # dump config
234 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
235 |     # init the logger before other steps
236 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
237 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
238 |     # specify logger name, if we still use 'mmdet', the output info will be
239 |     # filtered and won't be saved in the log_file
240 |     logger = get_root_logger(
241 |         log_file=log_file, log_level=cfg.log_level, name='mmdet')
242 | 
243 |     # init the meta dict to record some important information such as
244 |     # environment info and seed, which will be logged
245 |     meta = dict()
246 |     # log env info
247 |     env_info_dict = collect_env()
248 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
249 |     dash_line = '-' * 60 + '\n'
250 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
251 |                 dash_line)
252 |     meta['env_info'] = env_info
253 |     meta['config'] = cfg.pretty_text
254 | 
255 |     # log some basic info
256 |     logger.info(f'Distributed training: {distributed}')
257 |     logger.info(f'Config:\n{cfg.pretty_text}')
258 | 
259 |     # set random seeds
260 |     seed = init_random_seed(args.seed)
261 |     seed = seed + dist.get_rank() if args.diff_seed else seed
262 |     logger.info(f'Set random seed to {seed}, '
263 |                 f'deterministic: {args.deterministic}')
264 |     set_random_seed(seed, deterministic=args.deterministic)
265 |     cfg.seed = seed
266 |     meta['seed'] = seed
267 |     meta['exp_name'] = osp.basename(args.config)
268 | 
269 |     model = build_model(
270 |         cfg.model,
271 |         train_cfg=cfg.get('train_cfg'),
272 |         test_cfg=cfg.get('test_cfg'))
273 |     model.init_weights()
274 | 
275 |     logger.info(f'Model:\n{model}')
276 |     datasets = [build_dataset(cfg.data.train)]
277 |     if len(cfg.workflow) == 2:
278 |         val_dataset = copy.deepcopy(cfg.data.val)
279 |         # in case we use a dataset wrapper
280 |         if 'dataset' in cfg.data.train:
281 |             val_dataset.pipeline = cfg.data.train.dataset.pipeline
282 |         else:
283 |             val_dataset.pipeline = cfg.data.train.pipeline
284 |         # set test_mode=False here in deep copied config
285 |         # which do not affect AP/AR calculation later
286 |         # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
287 |         val_dataset.test_mode = False
288 |         datasets.append(build_dataset(val_dataset))
289 |     if cfg.checkpoint_config is not None:
290 |         # save mmdet version, config file content and class names in
291 |         # checkpoints as meta data
292 |         cfg.checkpoint_config.meta = dict(
293 |             mmdet_version=mmdet_version,
294 |             mmseg_version=mmseg_version,
295 |             mmdet3d_version=mmdet3d_version,
296 |             config=cfg.pretty_text,
297 |             CLASSES=datasets[0].CLASSES,
298 |             PALETTE=datasets[0].PALETTE  # for segmentors
299 |             if hasattr(datasets[0], 'PALETTE') else None)
300 |     # add an attribute for visualization convenience
301 |     model.CLASSES = datasets[0].CLASSES
302 |     auto_scale_lr(cfg, distributed=distributed, logger=logger)
303 |     train_model(
304 |         model,
305 |         datasets,
306 |         cfg,
307 |         distributed=distributed,
308 |         validate=(not args.no_validate),
309 |         timestamp=timestamp,
310 |         meta=meta)
311 | 
312 | 
313 | if __name__ == '__main__':
314 |     main()
315 | 


--------------------------------------------------------------------------------