├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── README.md ├── figs ├── lane_segment.jpg └── pipeline.png ├── projects ├── __init__.py ├── bevformer │ ├── __init__.py │ └── modules │ │ ├── __init__.py │ │ ├── custom_base_transformer_layer.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── multi_scale_deformable_attn_function.py │ │ ├── spatial_cross_attention.py │ │ └── temporal_self_attention.py ├── configs │ ├── lanesegnet_r50_8x1_24e_olv2_subset_A.py │ └── lanesegnet_r50_8x1_24e_olv2_subset_A_mapele_bucket.py └── lanesegnet │ ├── __init__.py │ ├── core │ ├── __init__.py │ ├── lane │ │ ├── __init__.py │ │ ├── area_coder.py │ │ ├── lane_coder.py │ │ ├── lane_hungarian_assigner.py │ │ ├── match_cost.py │ │ └── util.py │ └── visualizer │ │ ├── __init__.py │ │ └── lane_segment.py │ ├── datasets │ ├── __init__.py │ ├── openlanev2_evaluate_custom.py │ ├── openlanev2_subset_A_lanesegnet_dataset.py │ ├── openlanev2_subset_A_mapele_bucket_dataset.py │ └── pipelines │ │ ├── __init__.py │ │ ├── formating.py │ │ ├── loading.py │ │ ├── transform_3d.py │ │ └── transform_3d_lane.py │ ├── models │ ├── __init__.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── deformable_detr_head.py │ │ ├── laneseg_head.py │ │ └── relationship_head.py │ ├── detectors │ │ ├── __init__.py │ │ ├── lanesegnet.py │ │ └── lanesegnet_mapele_bucket.py │ └── modules │ │ ├── __init__.py │ │ ├── bevformer_constructer.py │ │ ├── lane_attention.py │ │ ├── laneseg_decoder.py │ │ └── laneseg_transformer.py │ ├── thirdparty │ ├── __init__.py │ ├── map_loss.py │ ├── maptr_assigner.py │ ├── maptr_decoder.py │ ├── maptr_head.py │ └── maptr_transformer.py │ └── utils │ ├── __init__.py │ └── builder.py ├── requirements.txt └── tools ├── data_process.py ├── dist_test.sh ├── dist_train.sh ├── test.py └── train.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [OpenDriveLab] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | # Custom 133 | ckpts 134 | data 135 | work_dirs 136 | .vscode 137 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving 4 | 5 | [![arXiv](https://img.shields.io/badge/arXiv-2312.16108-479ee2.svg)](https://arxiv.org/abs/2312.16108) 6 | [![OpenLane-V2](https://img.shields.io/badge/GitHub-OpenLane--V2-blueviolet.svg)](https://github.com/OpenDriveLab/OpenLane-V2) 7 | [![LICENSE](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](./LICENSE) 8 | 9 | ![lanesegment](figs/lane_segment.jpg "Diagram of Lane Segment") 10 | 11 |
12 | 13 | > - :mailbox_with_mail: Primary contact: [Tianyu Li](https://scholar.google.com/citations?user=X6vTmEMAAAAJ) ( litianyu@opendrivelab.com ) 14 | > - [arXiv paper](https://arxiv.org/abs/2312.16108) | [OpenReview](https://openreview.net/forum?id=LsURkIPYR5), ICLR 2024 15 | > - [Blog (Zhihu)](https://zhuanlan.zhihu.com/p/678596087) | [Slides TODO]() 16 | 17 | ## Highlights 18 | 19 | :fire: We advocate **Lane Segment** as a map learning paradigm that seamlessly incorporates both map :motorway: geometry and :spider_web: topology information. 20 | 21 | :checkered_flag: **Lane Segment** and **`OpenLane-V2 Map Element Bucket`** serve as a track in the **`CVPR 2024 Autonomous Grand Challenge`**. 22 | 23 | > - Official website: :globe_with_meridians: [AGC2024 Mapless Driving](https://opendrivelab.com/challenge2024/#mapless_driving) 24 | > - Evaluation server: :hugs: [Hugging Face](https://huggingface.co/spaces/AGC2024/mapless-driving-2024) 25 | 26 | This repository can be used as a starting point for Mapless Driving track. 27 | 28 | 29 | 30 | ## News 31 | - **`[2024/3]`** We released a multi-head version of LaneSegNet for the OpenLane-V2 Map Element Bucket! 32 | - Refer to this [config](projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A_mapele_bucket.py) file to train the baseline! 33 | - Please use the `*_ls.pkl` files generated by [OpenLane-V2](https://github.com/OpenDriveLab/OpenLane-V2/tree/master/data#preprocess) preprocess scripts! 34 | - You can use `./tools/dist_test.sh {GPUs} --format-only` to generate the `submission.pkl` now! 35 | - **`[2023/12]`** LaneSegNet [paper](https://arxiv.org/abs/2312.16108) is available on arXiv. Code is also released! 36 | 37 | --- 38 | 39 | ![method](figs/pipeline.png "Pipeline of LaneSegNet") 40 | 41 |
42 | Overall pipeline of LaneSegNet 43 |
44 | 45 | ## Table of Contents 46 | - [Model Zoo](#model-zoo) 47 | - [Prerequisites](#prerequisites) 48 | - [Installation](#installation) 49 | - [Prepare Dataset](#prepare-dataset) 50 | - [Train and Evaluate](#train-and-evaluate) 51 | - [License and Citation](#license-and-citation) 52 | 53 | ## Model Zoo 54 | 55 | > [!NOTE] 56 | > The evaluation results below are based on OpenLane-V2 devkit `v2.1.0`. In this version, we have addressed a loophole in the TOP metric, which caused the TOPlsls value to be significantly higher than what was reported in the paper. 57 | > For more details please see issue [#76](https://github.com/OpenDriveLab/OpenLane-V2/issues/76) of OpenLane-V2. 58 | 59 | ### Performance in LaneSegNet paper 60 | 61 | | Model | Epoch | mAP | TOPlsls | Memory | Config | Download | 62 | | :--------: | :---: | :---: | :----------------: | :----: | :----: | :------: | 63 | | LaneSegNet | 24 | 33.5 | 25.4 | 9.4G | [config](projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py) | [ckpt](https://huggingface.co/OpenDriveLab/lanesegnet_r50_8x1_24e_olv2_subset_A/resolve/main/lanesegnet_r50_8x1_24e_olv2_subset_A.pth) / [log](https://huggingface.co/OpenDriveLab/lanesegnet_r50_8x1_24e_olv2_subset_A/resolve/main/20231225_213951.log) | 64 | 65 | > The mean AP is between lane segment and pedestrian crossing. 66 | 67 | ### Performance on OpenLane-V2 Map Element Bucket 68 | 69 | | Model | Epoch | DETls | DETa | DETt | TOPlsls | TOPlste | Config | 70 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 71 | | LaneSegNet-meb | 24 | 27.8 | 23.8 | 36.9 | 24.1 | 21.3 | [config](projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A_mapele_bucket.py) | 72 | 73 | > This is a naive multi-branch model for the Map Element Bucket. 74 | > The pedestrian and road boundary are detected by an additional MapTR head. The traffic element are detected by a Deformable DETR head. The hyper-parameters are roughly set. 75 | 76 | ## Prerequisites 77 | 78 | - Linux 79 | - Python 3.8.x 80 | - NVIDIA GPU + CUDA 11.1 81 | - PyTorch 1.9.1 82 | 83 | ## Installation 84 | 85 | We recommend using [conda](https://docs.conda.io/en/latest/miniconda.html) to run the code. 86 | ```bash 87 | conda create -n lanesegnet python=3.8 -y 88 | conda activate lanesegnet 89 | 90 | # (optional) If you have CUDA installed on your computer, skip this step. 91 | conda install cudatoolkit=11.1.1 -c conda-forge 92 | 93 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html 94 | ``` 95 | 96 | Install mm-series packages. 97 | ```bash 98 | pip install mmcv-full==1.5.2 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 99 | pip install mmdet==2.26.0 100 | pip install mmsegmentation==0.29.1 101 | pip install mmdet3d==1.0.0rc6 102 | ``` 103 | 104 | Install other required packages. 105 | ```bash 106 | pip install -r requirements.txt 107 | ``` 108 | 109 | ## Prepare Dataset 110 | 111 | Following [OpenLane-V2 repo](https://github.com/OpenDriveLab/OpenLane-V2/blob/v2.1.0/data) to download the **Image** and the **Map Element Bucket** data. Run the following script to collect data for this repo. 112 | 113 | > [!IMPORTANT] 114 | > 115 | > :exclamation: Please note that the script for generating LaneSegNet data is not the same as the OpenLane-V2 Map Element Bucket. The `*_lanesegnet.pkl` is not the same as the `*_ls.pkl`. 116 | > 117 | > :bell: The `Map Element Bucket` has been updated as of October 2023. Please ensure you download the most recent data. 118 | 119 | ```bash 120 | cd LaneSegNet 121 | mkdir data 122 | 123 | ln -s {Path to OpenLane-V2 repo}/data/OpenLane-V2 ./data/ 124 | python ./tools/data_process.py 125 | ``` 126 | 127 | After setup, the hierarchy of folder `data` is described below: 128 | ``` 129 | data/OpenLane-V2 130 | ├── train 131 | | └── ... 132 | ├── val 133 | | └── ... 134 | ├── test 135 | | └── ... 136 | ├── data_dict_subset_A_train_lanesegnet.pkl 137 | ├── data_dict_subset_A_val_lanesegnet.pkl 138 | ├── ... 139 | ``` 140 | 141 | ## Train and Evaluate 142 | 143 | ### Train 144 | 145 | We recommend using 8 GPUs for training. If a different number of GPUs is utilized, you can enhance performance by configuring the `--autoscale-lr` option. The training logs will be saved to `work_dirs/lanesegnet`. 146 | 147 | ```bash 148 | cd LaneSegNet 149 | mkdir -p work_dirs/lanesegnet 150 | 151 | ./tools/dist_train.sh 8 [--autoscale-lr] 152 | ``` 153 | 154 | ### Evaluate 155 | You can set `--show` to visualize the results. 156 | 157 | ```bash 158 | ./tools/dist_test.sh 8 [--show] 159 | ``` 160 | 161 | ## License and Citation 162 | All assets and code are under the [Apache 2.0 license](./LICENSE) unless specified otherwise. 163 | 164 | If this work is helpful for your research, please consider citing the following BibTeX entry. 165 | 166 | ``` bibtex 167 | @inproceedings{li2023lanesegnet, 168 | title={LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving}, 169 | author={Li, Tianyu and Jia, Peijin and Wang, Bangjun and Chen, Li and Jiang, Kun and Yan, Junchi and Li, Hongyang}, 170 | booktitle={ICLR}, 171 | year={2024} 172 | } 173 | 174 | @inproceedings{wang2023openlanev2, 175 | title={OpenLane-V2: A Topology Reasoning Benchmark for Unified 3D HD Mapping}, 176 | author={Wang, Huijie and Li, Tianyu and Li, Yang and Chen, Li and Sima, Chonghao and Liu, Zhenbo and Wang, Bangjun and Jia, Peijin and Wang, Yuting and Jiang, Shengyin and Wen, Feng and Xu, Hang and Luo, Ping and Yan, Junchi and Zhang, Wei and Li, Hongyang}, 177 | booktitle={NeurIPS}, 178 | year={2023} 179 | } 180 | ``` 181 | 182 | ## Related resources 183 | 184 | We acknowledge all the open-source contributors for the following projects to make this work possible: 185 | 186 | - [Openlane-V2](https://github.com/OpenDriveLab/OpenLane-V2) 187 | - [BEVFormer](https://github.com/fundamentalvision/BEVFormer) 188 | - [TopoNet](https://github.com/OpenDriveLab/TopoNet) 189 | - [PersFormer](https://github.com/OpenDriveLab/PersFormer_3DLane) 190 | - [OpenLane](https://github.com/OpenDriveLab/OpenLane) 191 | - [MapTR](https://github.com/hustvl/MapTR) 192 | -------------------------------------------------------------------------------- /figs/lane_segment.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/LaneSegNet/699e5862ba2c173490b7e1f47b06184be8b7306e/figs/lane_segment.jpg -------------------------------------------------------------------------------- /figs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/LaneSegNet/699e5862ba2c173490b7e1f47b06184be8b7306e/figs/pipeline.png -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/LaneSegNet/699e5862ba2c173490b7e1f47b06184be8b7306e/projects/__init__.py -------------------------------------------------------------------------------- /projects/bevformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import * 2 | -------------------------------------------------------------------------------- /projects/bevformer/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D 2 | from .temporal_self_attention import TemporalSelfAttention 3 | from .encoder import BEVFormerEncoder, BEVFormerLayer 4 | from .decoder import DetectionTransformerDecoder 5 | -------------------------------------------------------------------------------- /projects/bevformer/modules/custom_base_transformer_layer.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | import copy 8 | import warnings 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | from mmcv import ConfigDict, deprecated_api_warning 14 | from mmcv.cnn import Linear, build_activation_layer, build_norm_layer 15 | from mmcv.runner.base_module import BaseModule, ModuleList, Sequential 16 | 17 | from mmcv.cnn.bricks.registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING, 18 | TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE) 19 | 20 | # Avoid BC-breaking of importing MultiScaleDeformableAttention from this file 21 | try: 22 | from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401 23 | warnings.warn( 24 | ImportWarning( 25 | '``MultiScaleDeformableAttention`` has been moved to ' 26 | '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 27 | '``from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 28 | 'to ``from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 29 | )) 30 | except ImportError: 31 | warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from ' 32 | '``mmcv.ops.multi_scale_deform_attn``, ' 33 | 'You should install ``mmcv-full`` if you need this module. ') 34 | from mmcv.cnn.bricks.transformer import build_feedforward_network, build_attention 35 | 36 | 37 | @TRANSFORMER_LAYER.register_module() 38 | class MyCustomBaseTransformerLayer(BaseModule): 39 | """Base `TransformerLayer` for vision transformer. 40 | It can be built from `mmcv.ConfigDict` and support more flexible 41 | customization, for example, using any number of `FFN or LN ` and 42 | use different kinds of `attention` by specifying a list of `ConfigDict` 43 | named `attn_cfgs`. It is worth mentioning that it supports `prenorm` 44 | when you specifying `norm` as the first element of `operation_order`. 45 | More details about the `prenorm`: `On Layer Normalization in the 46 | Transformer Architecture `_ . 47 | Args: 48 | attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): 49 | Configs for `self_attention` or `cross_attention` modules, 50 | The order of the configs in the list should be consistent with 51 | corresponding attentions in operation_order. 52 | If it is a dict, all of the attention modules in operation_order 53 | will be built with this config. Default: None. 54 | ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): 55 | Configs for FFN, The order of the configs in the list should be 56 | consistent with corresponding ffn in operation_order. 57 | If it is a dict, all of the attention modules in operation_order 58 | will be built with this config. 59 | operation_order (tuple[str]): The execution order of operation 60 | in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm'). 61 | Support `prenorm` when you specifying first element as `norm`. 62 | Default:None. 63 | norm_cfg (dict): Config dict for normalization layer. 64 | Default: dict(type='LN'). 65 | init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. 66 | Default: None. 67 | batch_first (bool): Key, Query and Value are shape 68 | of (batch, n, embed_dim) 69 | or (n, batch, embed_dim). Default to False. 70 | """ 71 | 72 | def __init__(self, 73 | attn_cfgs=None, 74 | ffn_cfgs=dict( 75 | type='FFN', 76 | embed_dims=256, 77 | feedforward_channels=1024, 78 | num_fcs=2, 79 | ffn_drop=0., 80 | act_cfg=dict(type='ReLU', inplace=True), 81 | ), 82 | operation_order=None, 83 | norm_cfg=dict(type='LN'), 84 | init_cfg=None, 85 | batch_first=True, 86 | **kwargs): 87 | 88 | deprecated_args = dict( 89 | feedforward_channels='feedforward_channels', 90 | ffn_dropout='ffn_drop', 91 | ffn_num_fcs='num_fcs') 92 | for ori_name, new_name in deprecated_args.items(): 93 | if ori_name in kwargs: 94 | warnings.warn( 95 | f'The arguments `{ori_name}` in BaseTransformerLayer ' 96 | f'has been deprecated, now you should set `{new_name}` ' 97 | f'and other FFN related arguments ' 98 | f'to a dict named `ffn_cfgs`. ') 99 | ffn_cfgs[new_name] = kwargs[ori_name] 100 | 101 | super(MyCustomBaseTransformerLayer, self).__init__(init_cfg) 102 | 103 | self.batch_first = batch_first 104 | 105 | assert set(operation_order) & set( 106 | ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ 107 | set(operation_order), f'The operation_order of' \ 108 | f' {self.__class__.__name__} should ' \ 109 | f'contains all four operation type ' \ 110 | f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" 111 | 112 | num_attn = operation_order.count('self_attn') + operation_order.count( 113 | 'cross_attn') 114 | if isinstance(attn_cfgs, dict): 115 | attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] 116 | else: 117 | assert num_attn == len(attn_cfgs), f'The length ' \ 118 | f'of attn_cfg {num_attn} is ' \ 119 | f'not consistent with the number of attention' \ 120 | f'in operation_order {operation_order}.' 121 | 122 | self.num_attn = num_attn 123 | self.operation_order = operation_order 124 | self.norm_cfg = norm_cfg 125 | self.pre_norm = operation_order[0] == 'norm' 126 | self.attentions = ModuleList() 127 | 128 | index = 0 129 | for operation_name in operation_order: 130 | if operation_name in ['self_attn', 'cross_attn']: 131 | if 'batch_first' in attn_cfgs[index]: 132 | assert self.batch_first == attn_cfgs[index]['batch_first'] 133 | else: 134 | attn_cfgs[index]['batch_first'] = self.batch_first 135 | attention = build_attention(attn_cfgs[index]) 136 | # Some custom attentions used as `self_attn` 137 | # or `cross_attn` can have different behavior. 138 | attention.operation_name = operation_name 139 | self.attentions.append(attention) 140 | index += 1 141 | 142 | self.embed_dims = self.attentions[0].embed_dims 143 | 144 | self.ffns = ModuleList() 145 | num_ffns = operation_order.count('ffn') 146 | if isinstance(ffn_cfgs, dict): 147 | ffn_cfgs = ConfigDict(ffn_cfgs) 148 | if isinstance(ffn_cfgs, dict): 149 | ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] 150 | assert len(ffn_cfgs) == num_ffns 151 | for ffn_index in range(num_ffns): 152 | if 'embed_dims' not in ffn_cfgs[ffn_index]: 153 | ffn_cfgs['embed_dims'] = self.embed_dims 154 | else: 155 | assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims 156 | 157 | self.ffns.append( 158 | build_feedforward_network(ffn_cfgs[ffn_index])) 159 | 160 | self.norms = ModuleList() 161 | num_norms = operation_order.count('norm') 162 | for _ in range(num_norms): 163 | self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) 164 | 165 | def forward(self, 166 | query, 167 | key=None, 168 | value=None, 169 | query_pos=None, 170 | key_pos=None, 171 | attn_masks=None, 172 | query_key_padding_mask=None, 173 | key_padding_mask=None, 174 | **kwargs): 175 | """Forward function for `TransformerDecoderLayer`. 176 | **kwargs contains some specific arguments of attentions. 177 | Args: 178 | query (Tensor): The input query with shape 179 | [num_queries, bs, embed_dims] if 180 | self.batch_first is False, else 181 | [bs, num_queries embed_dims]. 182 | key (Tensor): The key tensor with shape [num_keys, bs, 183 | embed_dims] if self.batch_first is False, else 184 | [bs, num_keys, embed_dims] . 185 | value (Tensor): The value tensor with same shape as `key`. 186 | query_pos (Tensor): The positional encoding for `query`. 187 | Default: None. 188 | key_pos (Tensor): The positional encoding for `key`. 189 | Default: None. 190 | attn_masks (List[Tensor] | None): 2D Tensor used in 191 | calculation of corresponding attention. The length of 192 | it should equal to the number of `attention` in 193 | `operation_order`. Default: None. 194 | query_key_padding_mask (Tensor): ByteTensor for `query`, with 195 | shape [bs, num_queries]. Only used in `self_attn` layer. 196 | Defaults to None. 197 | key_padding_mask (Tensor): ByteTensor for `query`, with 198 | shape [bs, num_keys]. Default: None. 199 | Returns: 200 | Tensor: forwarded results with shape [num_queries, bs, embed_dims]. 201 | """ 202 | 203 | norm_index = 0 204 | attn_index = 0 205 | ffn_index = 0 206 | identity = query 207 | if attn_masks is None: 208 | attn_masks = [None for _ in range(self.num_attn)] 209 | elif isinstance(attn_masks, torch.Tensor): 210 | attn_masks = [ 211 | copy.deepcopy(attn_masks) for _ in range(self.num_attn) 212 | ] 213 | warnings.warn(f'Use same attn_mask in all attentions in ' 214 | f'{self.__class__.__name__} ') 215 | else: 216 | assert len(attn_masks) == self.num_attn, f'The length of ' \ 217 | f'attn_masks {len(attn_masks)} must be equal ' \ 218 | f'to the number of attention in ' \ 219 | f'operation_order {self.num_attn}' 220 | 221 | for layer in self.operation_order: 222 | if layer == 'self_attn': 223 | temp_key = temp_value = query 224 | query = self.attentions[attn_index]( 225 | query, 226 | temp_key, 227 | temp_value, 228 | identity if self.pre_norm else None, 229 | query_pos=query_pos, 230 | key_pos=query_pos, 231 | attn_mask=attn_masks[attn_index], 232 | key_padding_mask=query_key_padding_mask, 233 | **kwargs) 234 | attn_index += 1 235 | identity = query 236 | 237 | elif layer == 'norm': 238 | query = self.norms[norm_index](query) 239 | norm_index += 1 240 | 241 | elif layer == 'cross_attn': 242 | query = self.attentions[attn_index]( 243 | query, 244 | key, 245 | value, 246 | identity if self.pre_norm else None, 247 | query_pos=query_pos, 248 | key_pos=key_pos, 249 | attn_mask=attn_masks[attn_index], 250 | key_padding_mask=key_padding_mask, 251 | **kwargs) 252 | attn_index += 1 253 | identity = query 254 | 255 | elif layer == 'ffn': 256 | query = self.ffns[ffn_index]( 257 | query, identity if self.pre_norm else None) 258 | ffn_index += 1 259 | 260 | return query 261 | -------------------------------------------------------------------------------- /projects/bevformer/modules/multi_scale_deformable_attn_function.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | import torch 8 | from torch.cuda.amp import custom_bwd, custom_fwd 9 | from torch.autograd.function import Function, once_differentiable 10 | from mmcv.utils import ext_loader 11 | ext_module = ext_loader.load_ext( 12 | '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) 13 | 14 | 15 | class MultiScaleDeformableAttnFunction_fp16(Function): 16 | 17 | @staticmethod 18 | @custom_fwd(cast_inputs=torch.float16) 19 | def forward(ctx, value, value_spatial_shapes, value_level_start_index, 20 | sampling_locations, attention_weights, im2col_step): 21 | """GPU version of multi-scale deformable attention. 22 | 23 | Args: 24 | value (Tensor): The value has shape 25 | (bs, num_keys, mum_heads, embed_dims//num_heads) 26 | value_spatial_shapes (Tensor): Spatial shape of 27 | each feature map, has shape (num_levels, 2), 28 | last dimension 2 represent (h, w) 29 | sampling_locations (Tensor): The location of sampling points, 30 | has shape 31 | (bs ,num_queries, num_heads, num_levels, num_points, 2), 32 | the last dimension 2 represent (x, y). 33 | attention_weights (Tensor): The weight of sampling points used 34 | when calculate the attention, has shape 35 | (bs ,num_queries, num_heads, num_levels, num_points), 36 | im2col_step (Tensor): The step used in image to column. 37 | 38 | Returns: 39 | Tensor: has shape (bs, num_queries, embed_dims) 40 | """ 41 | ctx.im2col_step = im2col_step 42 | output = ext_module.ms_deform_attn_forward( 43 | value, 44 | value_spatial_shapes, 45 | value_level_start_index, 46 | sampling_locations, 47 | attention_weights, 48 | im2col_step=ctx.im2col_step) 49 | ctx.save_for_backward(value, value_spatial_shapes, 50 | value_level_start_index, sampling_locations, 51 | attention_weights) 52 | return output 53 | 54 | @staticmethod 55 | @once_differentiable 56 | @custom_bwd 57 | def backward(ctx, grad_output): 58 | """GPU version of backward function. 59 | 60 | Args: 61 | grad_output (Tensor): Gradient 62 | of output tensor of forward. 63 | 64 | Returns: 65 | Tuple[Tensor]: Gradient 66 | of input tensors in forward. 67 | """ 68 | value, value_spatial_shapes, value_level_start_index, \ 69 | sampling_locations, attention_weights = ctx.saved_tensors 70 | grad_value = torch.zeros_like(value) 71 | grad_sampling_loc = torch.zeros_like(sampling_locations) 72 | grad_attn_weight = torch.zeros_like(attention_weights) 73 | 74 | ext_module.ms_deform_attn_backward( 75 | value, 76 | value_spatial_shapes, 77 | value_level_start_index, 78 | sampling_locations, 79 | attention_weights, 80 | grad_output.contiguous(), 81 | grad_value, 82 | grad_sampling_loc, 83 | grad_attn_weight, 84 | im2col_step=ctx.im2col_step) 85 | 86 | return grad_value, None, None, \ 87 | grad_sampling_loc, grad_attn_weight, None 88 | 89 | 90 | class MultiScaleDeformableAttnFunction_fp32(Function): 91 | 92 | @staticmethod 93 | @custom_fwd(cast_inputs=torch.float32) 94 | def forward(ctx, value, value_spatial_shapes, value_level_start_index, 95 | sampling_locations, attention_weights, im2col_step): 96 | """GPU version of multi-scale deformable attention. 97 | 98 | Args: 99 | value (Tensor): The value has shape 100 | (bs, num_keys, mum_heads, embed_dims//num_heads) 101 | value_spatial_shapes (Tensor): Spatial shape of 102 | each feature map, has shape (num_levels, 2), 103 | last dimension 2 represent (h, w) 104 | sampling_locations (Tensor): The location of sampling points, 105 | has shape 106 | (bs ,num_queries, num_heads, num_levels, num_points, 2), 107 | the last dimension 2 represent (x, y). 108 | attention_weights (Tensor): The weight of sampling points used 109 | when calculate the attention, has shape 110 | (bs ,num_queries, num_heads, num_levels, num_points), 111 | im2col_step (Tensor): The step used in image to column. 112 | 113 | Returns: 114 | Tensor: has shape (bs, num_queries, embed_dims) 115 | """ 116 | 117 | ctx.im2col_step = im2col_step 118 | output = ext_module.ms_deform_attn_forward( 119 | value, 120 | value_spatial_shapes, 121 | value_level_start_index, 122 | sampling_locations, 123 | attention_weights, 124 | im2col_step=ctx.im2col_step) 125 | ctx.save_for_backward(value, value_spatial_shapes, 126 | value_level_start_index, sampling_locations, 127 | attention_weights) 128 | return output 129 | 130 | @staticmethod 131 | @once_differentiable 132 | @custom_bwd 133 | def backward(ctx, grad_output): 134 | """GPU version of backward function. 135 | 136 | Args: 137 | grad_output (Tensor): Gradient 138 | of output tensor of forward. 139 | 140 | Returns: 141 | Tuple[Tensor]: Gradient 142 | of input tensors in forward. 143 | """ 144 | value, value_spatial_shapes, value_level_start_index, \ 145 | sampling_locations, attention_weights = ctx.saved_tensors 146 | grad_value = torch.zeros_like(value) 147 | grad_sampling_loc = torch.zeros_like(sampling_locations) 148 | grad_attn_weight = torch.zeros_like(attention_weights) 149 | 150 | ext_module.ms_deform_attn_backward( 151 | value, 152 | value_spatial_shapes, 153 | value_level_start_index, 154 | sampling_locations, 155 | attention_weights, 156 | grad_output.contiguous(), 157 | grad_value, 158 | grad_sampling_loc, 159 | grad_attn_weight, 160 | im2col_step=ctx.im2col_step) 161 | 162 | return grad_value, None, None, \ 163 | grad_sampling_loc, grad_attn_weight, None 164 | -------------------------------------------------------------------------------- /projects/bevformer/modules/temporal_self_attention.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32 8 | from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch 9 | import warnings 10 | import torch 11 | import torch.nn as nn 12 | from mmcv.cnn import xavier_init, constant_init 13 | from mmcv.cnn.bricks.registry import ATTENTION 14 | import math 15 | from mmcv.runner.base_module import BaseModule, ModuleList, Sequential 16 | from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning, 17 | to_2tuple) 18 | 19 | from mmcv.utils import ext_loader 20 | ext_module = ext_loader.load_ext( 21 | '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) 22 | 23 | 24 | @ATTENTION.register_module() 25 | class TemporalSelfAttention(BaseModule): 26 | """An attention module used in BEVFormer based on Deformable-Detr. 27 | 28 | `Deformable DETR: Deformable Transformers for End-to-End Object Detection. 29 | `_. 30 | 31 | Args: 32 | embed_dims (int): The embedding dimension of Attention. 33 | Default: 256. 34 | num_heads (int): Parallel attention heads. Default: 64. 35 | num_levels (int): The number of feature map used in 36 | Attention. Default: 4. 37 | num_points (int): The number of sampling points for 38 | each query in each head. Default: 4. 39 | im2col_step (int): The step used in image_to_column. 40 | Default: 64. 41 | dropout (float): A Dropout layer on `inp_identity`. 42 | Default: 0.1. 43 | batch_first (bool): Key, Query and Value are shape of 44 | (batch, n, embed_dim) 45 | or (n, batch, embed_dim). Default to True. 46 | norm_cfg (dict): Config dict for normalization layer. 47 | Default: None. 48 | init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. 49 | Default: None. 50 | num_bev_queue (int): In this version, we only use one history BEV and one currenct BEV. 51 | the length of BEV queue is 2. 52 | """ 53 | 54 | def __init__(self, 55 | embed_dims=256, 56 | num_heads=8, 57 | num_levels=4, 58 | num_points=4, 59 | num_bev_queue=2, 60 | im2col_step=64, 61 | dropout=0.1, 62 | batch_first=True, 63 | norm_cfg=None, 64 | init_cfg=None): 65 | 66 | super().__init__(init_cfg) 67 | if embed_dims % num_heads != 0: 68 | raise ValueError(f'embed_dims must be divisible by num_heads, ' 69 | f'but got {embed_dims} and {num_heads}') 70 | dim_per_head = embed_dims // num_heads 71 | self.norm_cfg = norm_cfg 72 | self.dropout = nn.Dropout(dropout) 73 | self.batch_first = batch_first 74 | self.fp16_enabled = False 75 | 76 | # you'd better set dim_per_head to a power of 2 77 | # which is more efficient in the CUDA implementation 78 | def _is_power_of_2(n): 79 | if (not isinstance(n, int)) or (n < 0): 80 | raise ValueError( 81 | 'invalid input for _is_power_of_2: {} (type: {})'.format( 82 | n, type(n))) 83 | return (n & (n - 1) == 0) and n != 0 84 | 85 | if not _is_power_of_2(dim_per_head): 86 | warnings.warn( 87 | "You'd better set embed_dims in " 88 | 'MultiScaleDeformAttention to make ' 89 | 'the dimension of each attention head a power of 2 ' 90 | 'which is more efficient in our CUDA implementation.') 91 | 92 | self.im2col_step = im2col_step 93 | self.embed_dims = embed_dims 94 | self.num_levels = num_levels 95 | self.num_heads = num_heads 96 | self.num_points = num_points 97 | self.num_bev_queue = num_bev_queue 98 | self.sampling_offsets = nn.Linear( 99 | embed_dims*self.num_bev_queue, num_bev_queue*num_heads * num_levels * num_points * 2) 100 | self.attention_weights = nn.Linear(embed_dims*self.num_bev_queue, 101 | num_bev_queue*num_heads * num_levels * num_points) 102 | self.value_proj = nn.Linear(embed_dims, embed_dims) 103 | self.output_proj = nn.Linear(embed_dims, embed_dims) 104 | self.init_weights() 105 | 106 | def init_weights(self): 107 | """Default initialization for Parameters of Module.""" 108 | constant_init(self.sampling_offsets, 0.) 109 | thetas = torch.arange( 110 | self.num_heads, 111 | dtype=torch.float32) * (2.0 * math.pi / self.num_heads) 112 | grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) 113 | grid_init = (grid_init / 114 | grid_init.abs().max(-1, keepdim=True)[0]).view( 115 | self.num_heads, 1, 1, 116 | 2).repeat(1, self.num_levels*self.num_bev_queue, self.num_points, 1) 117 | 118 | for i in range(self.num_points): 119 | grid_init[:, :, i, :] *= i + 1 120 | 121 | self.sampling_offsets.bias.data = grid_init.view(-1) 122 | constant_init(self.attention_weights, val=0., bias=0.) 123 | xavier_init(self.value_proj, distribution='uniform', bias=0.) 124 | xavier_init(self.output_proj, distribution='uniform', bias=0.) 125 | self._is_init = True 126 | 127 | def forward(self, 128 | query, 129 | key=None, 130 | value=None, 131 | identity=None, 132 | query_pos=None, 133 | key_padding_mask=None, 134 | reference_points=None, 135 | spatial_shapes=None, 136 | level_start_index=None, 137 | flag='decoder', 138 | 139 | **kwargs): 140 | """Forward Function of MultiScaleDeformAttention. 141 | 142 | Args: 143 | query (Tensor): Query of Transformer with shape 144 | (num_query, bs, embed_dims). 145 | key (Tensor): The key tensor with shape 146 | `(num_key, bs, embed_dims)`. 147 | value (Tensor): The value tensor with shape 148 | `(num_key, bs, embed_dims)`. 149 | identity (Tensor): The tensor used for addition, with the 150 | same shape as `query`. Default None. If None, 151 | `query` will be used. 152 | query_pos (Tensor): The positional encoding for `query`. 153 | Default: None. 154 | key_pos (Tensor): The positional encoding for `key`. Default 155 | None. 156 | reference_points (Tensor): The normalized reference 157 | points with shape (bs, num_query, num_levels, 2), 158 | all elements is range in [0, 1], top-left (0,0), 159 | bottom-right (1, 1), including padding area. 160 | or (N, Length_{query}, num_levels, 4), add 161 | additional two dimensions is (w, h) to 162 | form reference boxes. 163 | key_padding_mask (Tensor): ByteTensor for `query`, with 164 | shape [bs, num_key]. 165 | spatial_shapes (Tensor): Spatial shape of features in 166 | different levels. With shape (num_levels, 2), 167 | last dimension represents (h, w). 168 | level_start_index (Tensor): The start index of each level. 169 | A tensor has shape ``(num_levels, )`` and can be represented 170 | as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. 171 | 172 | Returns: 173 | Tensor: forwarded results with shape [num_query, bs, embed_dims]. 174 | """ 175 | 176 | if value is None: 177 | assert self.batch_first 178 | bs, len_bev, c = query.shape 179 | value = torch.stack([query, query], 1).reshape(bs*2, len_bev, c) 180 | 181 | # value = torch.cat([query, query], 0) 182 | 183 | if identity is None: 184 | identity = query 185 | if query_pos is not None: 186 | query = query + query_pos 187 | if not self.batch_first: 188 | # change to (bs, num_query ,embed_dims) 189 | query = query.permute(1, 0, 2) 190 | value = value.permute(1, 0, 2) 191 | bs, num_query, embed_dims = query.shape 192 | _, num_value, _ = value.shape 193 | assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value 194 | assert self.num_bev_queue == 2 195 | 196 | query = torch.cat([value[:bs], query], -1) 197 | value = self.value_proj(value) 198 | 199 | if key_padding_mask is not None: 200 | value = value.masked_fill(key_padding_mask[..., None], 0.0) 201 | 202 | value = value.reshape(bs*self.num_bev_queue, 203 | num_value, self.num_heads, -1) 204 | 205 | sampling_offsets = self.sampling_offsets(query) 206 | sampling_offsets = sampling_offsets.view( 207 | bs, num_query, self.num_heads, self.num_bev_queue, self.num_levels, self.num_points, 2) 208 | attention_weights = self.attention_weights(query).view( 209 | bs, num_query, self.num_heads, self.num_bev_queue, self.num_levels * self.num_points) 210 | attention_weights = attention_weights.softmax(-1) 211 | 212 | attention_weights = attention_weights.view(bs, num_query, 213 | self.num_heads, 214 | self.num_bev_queue, 215 | self.num_levels, 216 | self.num_points) 217 | 218 | attention_weights = attention_weights.permute(0, 3, 1, 2, 4, 5)\ 219 | .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points).contiguous() 220 | sampling_offsets = sampling_offsets.permute(0, 3, 1, 2, 4, 5, 6)\ 221 | .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points, 2) 222 | 223 | if reference_points.shape[-1] == 2: 224 | offset_normalizer = torch.stack( 225 | [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) 226 | sampling_locations = reference_points[:, :, None, :, None, :] \ 227 | + sampling_offsets \ 228 | / offset_normalizer[None, None, None, :, None, :] 229 | 230 | elif reference_points.shape[-1] == 4: 231 | sampling_locations = reference_points[:, :, None, :, None, :2] \ 232 | + sampling_offsets / self.num_points \ 233 | * reference_points[:, :, None, :, None, 2:] \ 234 | * 0.5 235 | else: 236 | raise ValueError( 237 | f'Last dim of reference_points must be' 238 | f' 2 or 4, but get {reference_points.shape[-1]} instead.') 239 | if torch.cuda.is_available() and value.is_cuda: 240 | 241 | # using fp16 deformable attention is unstable because it performs many sum operations 242 | if value.dtype == torch.float16: 243 | MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32 244 | else: 245 | MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32 246 | output = MultiScaleDeformableAttnFunction.apply( 247 | value, spatial_shapes, level_start_index, sampling_locations, 248 | attention_weights, self.im2col_step) 249 | else: 250 | 251 | output = multi_scale_deformable_attn_pytorch( 252 | value, spatial_shapes, sampling_locations, attention_weights) 253 | 254 | # output shape (bs*num_bev_queue, num_query, embed_dims) 255 | # (bs*num_bev_queue, num_query, embed_dims)-> (num_query, embed_dims, bs*num_bev_queue) 256 | output = output.permute(1, 2, 0) 257 | 258 | # fuse history value and current value 259 | # (num_query, embed_dims, bs*num_bev_queue)-> (num_query, embed_dims, bs, num_bev_queue) 260 | output = output.view(num_query, embed_dims, bs, self.num_bev_queue) 261 | output = output.mean(-1) 262 | 263 | # (num_query, embed_dims, bs)-> (bs, num_query, embed_dims) 264 | output = output.permute(2, 0, 1) 265 | 266 | output = self.output_proj(output) 267 | 268 | if not self.batch_first: 269 | output = output.permute(1, 0, 2) 270 | 271 | return self.dropout(output) + identity 272 | -------------------------------------------------------------------------------- /projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py: -------------------------------------------------------------------------------- 1 | _base_ = [] 2 | custom_imports = dict(imports=['projects.bevformer', 'projects.lanesegnet']) 3 | 4 | # If point cloud range is changed, the models should also change their point 5 | # cloud range accordingly 6 | point_cloud_range = [-51.2, -25.6, -2.3, 51.2, 25.6, 1.7] 7 | 8 | img_norm_cfg = dict( 9 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 10 | 11 | class_names = ['lane_segment', 'ped_crossing'] 12 | class_nums = len(class_names) 13 | 14 | input_modality = dict( 15 | use_lidar=False, 16 | use_camera=True, 17 | use_radar=False, 18 | use_map=False, 19 | use_external=False) 20 | num_cams = 7 21 | pts_dim = 3 22 | 23 | dataset_type = 'OpenLaneV2_subset_A_LaneSegNet_Dataset' 24 | data_root = 'data/OpenLane-V2/' 25 | 26 | para_method = 'fix_pts_interp' 27 | method_para = dict(n_points=10) 28 | code_size = 3 * method_para['n_points'] * pts_dim 29 | 30 | _dim_ = 256 31 | _pos_dim_ = _dim_//2 32 | _ffn_dim_ = _dim_*2 33 | _ffn_cfg_ = dict( 34 | type='FFN', 35 | embed_dims=_dim_, 36 | feedforward_channels=_ffn_dim_, 37 | num_fcs=2, 38 | ffn_drop=0.1, 39 | act_cfg=dict(type='ReLU', inplace=True), 40 | ), 41 | 42 | _num_levels_ = 4 43 | bev_h_ = 100 44 | bev_w_ = 200 45 | 46 | model = dict( 47 | type='LaneSegNet', 48 | img_backbone=dict( 49 | type='ResNet', 50 | depth=50, 51 | num_stages=4, 52 | out_indices=(1, 2, 3), 53 | frozen_stages=1, 54 | norm_cfg=dict(type='BN', requires_grad=False), 55 | norm_eval=True, 56 | style='pytorch', 57 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 58 | img_neck=dict( 59 | type='FPN', 60 | in_channels=[512, 1024, 2048], 61 | out_channels=_dim_, 62 | start_level=0, 63 | add_extra_convs='on_output', 64 | num_outs=_num_levels_, 65 | relu_before_extra_convs=True), 66 | bev_constructor=dict( 67 | type='BEVFormerConstructer', 68 | num_feature_levels=_num_levels_, 69 | num_cams=num_cams, 70 | embed_dims=_dim_, 71 | rotate_prev_bev=True, 72 | use_shift=True, 73 | use_can_bus=True, 74 | pc_range=point_cloud_range, 75 | bev_h=bev_h_, 76 | bev_w=bev_w_, 77 | rotate_center=[bev_h_//2, bev_w_//2], 78 | encoder=dict( 79 | type='BEVFormerEncoder', 80 | num_layers=3, 81 | pc_range=point_cloud_range, 82 | num_points_in_pillar=4, 83 | return_intermediate=False, 84 | transformerlayers=dict( 85 | type='BEVFormerLayer', 86 | attn_cfgs=[ 87 | dict( 88 | type='TemporalSelfAttention', 89 | embed_dims=_dim_, 90 | num_levels=1), 91 | dict( 92 | type='SpatialCrossAttention', 93 | embed_dims=_dim_, 94 | num_cams=num_cams, 95 | pc_range=point_cloud_range, 96 | deformable_attention=dict( 97 | type='MSDeformableAttention3D', 98 | embed_dims=_dim_, 99 | num_points=8, 100 | num_levels=_num_levels_) 101 | ) 102 | ], 103 | ffn_cfgs=_ffn_cfg_, 104 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 105 | 'ffn', 'norm'))), 106 | positional_encoding=dict( 107 | type='LearnedPositionalEncoding', 108 | num_feats=_pos_dim_, 109 | row_num_embed=bev_h_, 110 | col_num_embed=bev_w_), 111 | ), 112 | lane_head=dict( 113 | type='LaneSegHead', 114 | num_classes=class_nums, 115 | num_lane_type_classes=3, 116 | in_channels=_dim_, 117 | num_query=200, 118 | bev_h=bev_h_, 119 | bev_w=bev_w_, 120 | pc_range=point_cloud_range, 121 | pts_dim=pts_dim, 122 | sync_cls_avg_factor=False, 123 | with_box_refine=True, 124 | code_size=code_size, 125 | code_weights= [1.0 for i in range(code_size)], 126 | transformer=dict( 127 | type='LaneSegNetTransformer', 128 | embed_dims=_dim_, 129 | points_num=method_para['n_points'], 130 | pts_dim=pts_dim, 131 | decoder=dict( 132 | type='LaneSegNetDecoder', 133 | num_layers=6, 134 | return_intermediate=True, 135 | pc_range=point_cloud_range, 136 | pts_dim=pts_dim, 137 | transformerlayers=dict( 138 | type='CustomDetrTransformerDecoderLayer', 139 | attn_cfgs=[ 140 | dict( 141 | type='MultiheadAttention', 142 | embed_dims=_dim_, 143 | num_heads=8, 144 | dropout=0.1), 145 | dict( 146 | type='LaneAttention', 147 | embed_dims=_dim_, 148 | num_heads=8, 149 | num_points=32, 150 | num_levels=1), 151 | ], 152 | ffn_cfgs=_ffn_cfg_, 153 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 154 | 'ffn', 'norm')))), 155 | bbox_coder=dict(type='LaneSegmentPseudoCoder'), 156 | loss_cls=dict( 157 | type='FocalLoss', 158 | use_sigmoid=True, 159 | gamma=2.0, 160 | alpha=0.25, 161 | loss_weight=1.5), 162 | loss_bbox=dict(type='L1Loss', loss_weight=0.025), 163 | loss_lane_type=dict( 164 | type='CrossEntropyLoss', 165 | use_sigmoid=True, 166 | loss_weight=0.1), 167 | loss_mask=dict( 168 | type='CrossEntropyLoss', 169 | use_sigmoid=True, 170 | reduction='mean', 171 | loss_weight=3.0), 172 | loss_dice=dict( 173 | type='DiceLoss', 174 | use_sigmoid=True, 175 | activate=True, 176 | reduction='mean', 177 | naive_dice=True, 178 | eps=1.0, 179 | loss_weight=3.0)), 180 | lclc_head=dict( 181 | type='RelationshipHead', 182 | in_channels_o1=_dim_, 183 | in_channels_o2=_dim_, 184 | shared_param=False, 185 | loss_rel=dict( 186 | type='FocalLoss', 187 | use_sigmoid=True, 188 | gamma=2.0, 189 | alpha=0.25, 190 | loss_weight=5)), 191 | # model training and testing settings 192 | train_cfg=dict( 193 | lane=dict( 194 | assigner=dict( 195 | type='LaneSegmentHungarianAssigner3D', 196 | cls_cost=dict(type='FocalLossCost', weight=1.5), 197 | reg_cost=dict(type='LaneL1Cost', weight=0.025), 198 | mask_cost=dict(type='CrossEntropyLossCost', weight=3.0, use_sigmoid=True), 199 | dice_cost=dict(type='DiceCost', weight=3.0, pred_act=False, eps=1.0), 200 | pc_range=point_cloud_range)))) 201 | 202 | train_pipeline = [ 203 | dict(type='CustomLoadMultiViewImageFromFiles', to_float32=True), 204 | dict(type='LoadAnnotations3DLaneSegment', 205 | with_lane_3d=True, with_lane_label_3d=True, with_lane_adj=True, with_lane_type=True, 206 | with_bbox=False, with_label=False, with_lane_lste_adj=False), 207 | dict(type='PhotoMetricDistortionMultiViewImage'), 208 | dict(type='CropFrontViewImageForAv2'), 209 | dict(type='RandomScaleImageMultiViewImage', scales=[0.5]), 210 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 211 | dict(type='PadMultiViewImageSame2Max', size_divisor=32), 212 | dict(type='GridMaskMultiViewImage'), 213 | dict(type='LaneSegmentParameterize3D', method=para_method, method_para=method_para), 214 | dict(type='GenerateLaneSegmentMask', points_num=method_para['n_points'], bev_h=bev_h_, bev_w=bev_w_), 215 | dict(type='CustomFormatBundle3DLane', class_names=class_names), 216 | dict(type='CustomCollect3D', keys=[ 217 | 'img', 'gt_lanes_3d', 'gt_lane_labels_3d', 'gt_lane_adj', 218 | 'gt_instance_masks', 'gt_lane_left_type', 'gt_lane_right_type']) 219 | ] 220 | 221 | test_pipeline = [ 222 | dict(type='CustomLoadMultiViewImageFromFiles', to_float32=True), 223 | dict(type='CropFrontViewImageForAv2'), 224 | dict(type='RandomScaleImageMultiViewImage', scales=[0.5]), 225 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 226 | dict(type='PadMultiViewImageSame2Max', size_divisor=32), 227 | dict(type='CustomFormatBundle3DLane', class_names=class_names), 228 | dict(type='CustomCollect3D', keys=['img']) 229 | ] 230 | 231 | data = dict( 232 | samples_per_gpu=1, 233 | workers_per_gpu=8, 234 | train=dict( 235 | type=dataset_type, 236 | data_root=data_root, 237 | ann_file=data_root + 'data_dict_subset_A_train_lanesegnet.pkl', 238 | pipeline=train_pipeline, 239 | classes=class_names, 240 | modality=input_modality, 241 | split='train', 242 | filter_map_change=True, 243 | points_num=method_para['n_points'], 244 | test_mode=False), 245 | val=dict( 246 | type=dataset_type, 247 | data_root=data_root, 248 | ann_file=data_root + 'data_dict_subset_A_val_lanesegnet.pkl', 249 | pipeline=test_pipeline, 250 | classes=class_names, 251 | modality=input_modality, 252 | split='val', 253 | points_num=method_para['n_points'], 254 | test_mode=True), 255 | test=dict( 256 | type=dataset_type, 257 | data_root=data_root, 258 | ann_file=data_root + 'data_dict_subset_A_val_lanesegnet.pkl', 259 | pipeline=test_pipeline, 260 | classes=class_names, 261 | modality=input_modality, 262 | split='val', 263 | points_num=method_para['n_points'], 264 | test_mode=True) 265 | ) 266 | 267 | optimizer = dict( 268 | type='AdamW', 269 | lr=2e-4, 270 | paramwise_cfg=dict( 271 | custom_keys={ 272 | 'img_backbone': dict(lr_mult=0.1), 273 | }), 274 | weight_decay=0.01) 275 | 276 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 277 | # learning policy 278 | lr_config = dict( 279 | policy='CosineAnnealing', 280 | warmup='linear', 281 | warmup_iters=500, 282 | warmup_ratio=1.0 / 3, 283 | min_lr_ratio=1e-3) 284 | total_epochs = 24 285 | evaluation = dict(interval=24, pipeline=test_pipeline) 286 | 287 | runner = dict(type='EpochBasedRunner', max_epochs=total_epochs) 288 | log_config = dict( 289 | interval=50, 290 | hooks=[ 291 | dict(type='TextLoggerHook'), 292 | dict(type='TensorboardLoggerHook') 293 | ]) 294 | 295 | checkpoint_config = dict(interval=1, max_keep_ckpts=1) 296 | 297 | dist_params = dict(backend='nccl') 298 | log_level = 'INFO' 299 | work_dir = None 300 | load_from = None 301 | resume_from = None 302 | workflow = [('train', 1)] 303 | 304 | # NOTE: `auto_scale_lr` is for automatically scaling LR, 305 | # base_batch_size = (8 GPUs) x (1 samples per GPU) 306 | auto_scale_lr = dict(base_batch_size=8) 307 | -------------------------------------------------------------------------------- /projects/lanesegnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import * 2 | from .models import * 3 | from .core import * 4 | from .utils import * 5 | from .thirdparty import * 6 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .lane import * 2 | from .visualizer import * 3 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/lane/__init__.py: -------------------------------------------------------------------------------- 1 | from .lane_hungarian_assigner import LaneSegmentHungarianAssigner3D 2 | from .lane_coder import LaneSegmentPseudoCoder 3 | from .match_cost import LaneL1Cost 4 | from .area_coder import AreaPseudoCoder 5 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/lane/area_coder.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox import BaseBBoxCoder 2 | from mmdet.core.bbox.builder import BBOX_CODERS 3 | 4 | @BBOX_CODERS.register_module() 5 | class AreaPseudoCoder(BaseBBoxCoder): 6 | def encode(self): 7 | pass 8 | 9 | def decode_single(self, cls_scores, area_preds): 10 | 11 | cls_scores = cls_scores.sigmoid() 12 | scores, labels = cls_scores.max(-1) 13 | 14 | predictions_dict = { 15 | 'areas': area_preds.detach().cpu().numpy(), 16 | 'scores': scores.detach().cpu().numpy(), 17 | 'labels': labels.detach().cpu().numpy(), 18 | } 19 | 20 | return predictions_dict 21 | 22 | def decode(self, preds_dicts): 23 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 24 | all_pts_preds = preds_dicts['all_pts_preds'][-1] 25 | batch_size = all_cls_scores.size()[0] 26 | predictions_list = [] 27 | for i in range(batch_size): 28 | predictions_list.append(self.decode_single(all_cls_scores[i], all_pts_preds[i])) 29 | return predictions_list 30 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/lane/lane_coder.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import torch 9 | from mmdet.core.bbox import BaseBBoxCoder 10 | from mmdet.core.bbox.builder import BBOX_CODERS 11 | from .util import denormalize_3dlane 12 | 13 | 14 | @BBOX_CODERS.register_module() 15 | class LaneSegmentPseudoCoder(BaseBBoxCoder): 16 | 17 | def __init__(self, denormalize=False): 18 | self.denormalize = denormalize 19 | 20 | def encode(self): 21 | pass 22 | 23 | def decode_single(self, cls_scores, lane_preds, lane_left_type_scores, lane_right_type_scores): 24 | """Decode bboxes. 25 | Args: 26 | cls_scores (Tensor): Outputs from the classification head, \ 27 | shape [num_query, cls_out_channels]. Note \ 28 | cls_out_channels should includes background. 29 | lane_preds (Tensor): Outputs from the regression \ 30 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 31 | Shape [num_query, 9]. 32 | Returns: 33 | list[dict]: Decoded boxes. 34 | """ 35 | 36 | cls_scores = cls_scores.sigmoid() 37 | scores, labels = cls_scores.max(-1) 38 | left_type_scores, left_type_labels = lane_left_type_scores.sigmoid().max(-1) 39 | right_type_scores, right_type_labels = lane_right_type_scores.sigmoid().max(-1) 40 | if self.denormalize: 41 | final_lane_preds = denormalize_3dlane(lane_preds, self.pc_range) 42 | else: 43 | final_lane_preds = lane_preds 44 | 45 | predictions_dict = { 46 | 'lane3d': final_lane_preds.detach().cpu().numpy(), 47 | 'scores': scores.detach().cpu().numpy(), 48 | 'labels': labels.detach().cpu().numpy(), 49 | 'left_type_scores': left_type_scores.detach().cpu().numpy(), 50 | 'left_type_labels': left_type_labels.detach().cpu().numpy(), 51 | 'right_type_scores': right_type_scores.detach().cpu().numpy(), 52 | 'right_type_labels': right_type_labels.detach().cpu().numpy() 53 | } 54 | 55 | return predictions_dict 56 | 57 | def decode(self, preds_dicts): 58 | 59 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 60 | all_lanes_preds = preds_dicts['all_lanes_preds'][-1] 61 | all_lanes_left_types = preds_dicts['all_lanes_left_type'][-1] 62 | all_lanes_right_types = preds_dicts['all_lanes_right_type'][-1] 63 | 64 | batch_size = all_cls_scores.size()[0] 65 | predictions_list = [] 66 | for i in range(batch_size): 67 | predictions_list.append(self.decode_single( 68 | all_cls_scores[i], all_lanes_preds[i], all_lanes_left_types[i], all_lanes_right_types[i])) 69 | return predictions_list 70 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/lane/lane_hungarian_assigner.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import torch 8 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS 9 | from mmdet.core.bbox.assigners import AssignResult 10 | from mmdet.core.bbox.assigners import BaseAssigner 11 | from mmdet.core.bbox.match_costs import build_match_cost 12 | from mmdet.models.utils.transformer import inverse_sigmoid 13 | from .util import normalize_3dlane 14 | 15 | try: 16 | from scipy.optimize import linear_sum_assignment 17 | except ImportError: 18 | linear_sum_assignment = None 19 | 20 | 21 | @BBOX_ASSIGNERS.register_module() 22 | class LaneSegmentHungarianAssigner3D(BaseAssigner): 23 | 24 | def __init__(self, 25 | cls_cost=dict(type='ClassificationCost', weight=1.), 26 | reg_cost=dict(type='BBoxL1Cost', weight=1.0), 27 | mask_cost=dict(type='CrossEntropyLossCost', weight=1.0), 28 | dice_cost=dict(type='DiceCost', weight=1.0), 29 | normalize_gt=False, 30 | pc_range=None): 31 | self.cls_cost = build_match_cost(cls_cost) 32 | self.reg_cost = build_match_cost(reg_cost) 33 | self.mask_cost = build_match_cost(mask_cost) 34 | self.dice_cost = build_match_cost(dice_cost) 35 | self.normalize_gt = normalize_gt 36 | self.pc_range = pc_range 37 | 38 | def assign(self, 39 | lanes_pred, 40 | mask_pred, 41 | cls_pred, 42 | gt_lanes, 43 | gt_mask, 44 | gt_labels): 45 | 46 | num_gts, num_bboxes = gt_lanes.size(0), lanes_pred.size(0) 47 | # 1. assign -1 by default 48 | assigned_gt_inds = lanes_pred.new_full((num_bboxes, ), 49 | -1, 50 | dtype=torch.long) 51 | assigned_labels = lanes_pred.new_full((num_bboxes, ), 52 | -1, 53 | dtype=torch.long) 54 | if num_gts == 0 or num_bboxes == 0: 55 | # No ground truth or boxes, return empty assignment 56 | if num_gts == 0: 57 | # No ground truth, assign all to background 58 | assigned_gt_inds[:] = 0 59 | return AssignResult( 60 | num_gts, assigned_gt_inds, None, labels=assigned_labels) 61 | 62 | # 2. compute the weighted costs 63 | # classification and bboxcost. 64 | cls_cost = self.cls_cost(cls_pred, gt_labels) 65 | 66 | if self.normalize_gt: 67 | normalized_gt_lanes = normalize_3dlane(gt_lanes, self.pc_range) 68 | else: 69 | normalized_gt_lanes = gt_lanes 70 | 71 | # regression L1 cost 72 | reg_cost = self.reg_cost(lanes_pred, normalized_gt_lanes) 73 | 74 | # mask cost 75 | mask_cost = self.mask_cost(mask_pred, gt_mask) 76 | 77 | # dice cost 78 | dice_cost = self.dice_cost(mask_pred, gt_mask) 79 | 80 | # weighted sum of above two costs 81 | cost = cls_cost + reg_cost + mask_cost + dice_cost 82 | 83 | # 3. do Hungarian matching on CPU using linear_sum_assignment 84 | cost = cost.detach().cpu() 85 | if linear_sum_assignment is None: 86 | raise ImportError('Please run "pip install scipy" ' 87 | 'to install scipy first.') 88 | matched_row_inds, matched_col_inds = linear_sum_assignment(cost) 89 | matched_row_inds = torch.from_numpy(matched_row_inds).to( 90 | lanes_pred.device) 91 | matched_col_inds = torch.from_numpy(matched_col_inds).to( 92 | lanes_pred.device) 93 | 94 | # 4. assign backgrounds and foregrounds 95 | # assign all indices to backgrounds first 96 | assigned_gt_inds[:] = 0 97 | 98 | # assign foregrounds based on matching results 99 | assigned_gt_inds[matched_row_inds] = matched_col_inds + 1 100 | assigned_labels[matched_row_inds] = gt_labels[matched_col_inds] 101 | 102 | return AssignResult( 103 | num_gts, assigned_gt_inds, None, labels=assigned_labels) 104 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/lane/match_cost.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import torch 8 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 9 | 10 | 11 | @MATCH_COST.register_module() 12 | class LaneL1Cost(object): 13 | 14 | def __init__(self, weight=1.): 15 | self.weight = weight 16 | 17 | def __call__(self, lane_pred, gt_lanes): 18 | lane_cost = torch.cdist(lane_pred, gt_lanes, p=1) 19 | return lane_cost * self.weight 20 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/lane/util.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import torch 9 | from shapely.geometry import LineString 10 | 11 | def normalize_3dlane(lanes, pc_range): 12 | normalized_lanes = lanes.clone() 13 | normalized_lanes[..., 0::3] = (lanes[..., 0::3] - pc_range[0]) / (pc_range[3] - pc_range[0]) 14 | normalized_lanes[..., 1::3] = (lanes[..., 1::3] - pc_range[1]) / (pc_range[4] - pc_range[1]) 15 | normalized_lanes[..., 2::3] = (lanes[..., 2::3] - pc_range[2]) / (pc_range[5] - pc_range[2]) 16 | normalized_lanes = torch.clamp(normalized_lanes, 0, 1) 17 | 18 | return normalized_lanes 19 | 20 | def denormalize_3dlane(normalized_lanes, pc_range): 21 | lanes = normalized_lanes.clone() 22 | lanes[..., 0::3] = (normalized_lanes[..., 0::3] * (pc_range[3] - pc_range[0]) + pc_range[0]) 23 | lanes[..., 1::3] = (normalized_lanes[..., 1::3] * (pc_range[4] - pc_range[1]) + pc_range[1]) 24 | lanes[..., 2::3] = (normalized_lanes[..., 2::3] * (pc_range[5] - pc_range[2]) + pc_range[2]) 25 | return lanes 26 | 27 | def fix_pts_interpolate(lane, n_points): 28 | ls = LineString(lane) 29 | distances = np.linspace(0, ls.length, n_points) 30 | lane = np.array([ls.interpolate(distance).coords[0] for distance in distances]) 31 | return lane 32 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .lane_segment import draw_annotation_bev 2 | -------------------------------------------------------------------------------- /projects/lanesegnet/core/visualizer/lane_segment.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | COLOR_DICT = { # RGB [0, 1] 11 | 'centerline': np.array([243, 90, 2]) / 255, 12 | 'laneline': np.array([0, 32, 127]) / 255, 13 | 'ped_crossing': np.array([255, 192, 0]) / 255, 14 | 'road_boundary': np.array([220, 30, 0]) / 255, 15 | } 16 | LINE_PARAM = { 17 | 0: {'color': COLOR_DICT['laneline'], 'alpha': 0.3, 'linestyle': ':'}, # none 18 | 1: {'color': COLOR_DICT['laneline'], 'alpha': 0.75, 'linestyle': 'solid'}, # solid 19 | 2: {'color': COLOR_DICT['laneline'], 'alpha': 0.75, 'linestyle': '--'}, # dashed 20 | 'ped_crossing': {'color': COLOR_DICT['ped_crossing'], 'alpha': 1, 'linestyle': 'solid'}, 21 | 'road_boundary': {'color': COLOR_DICT['road_boundary'], 'alpha': 1, 'linestyle': 'solid'} 22 | } 23 | BEV_RANGE = [-50, 50, -25, 25] 24 | 25 | def _draw_centerline(ax, lane_centerline): 26 | points = np.asarray(lane_centerline['points']) 27 | color = COLOR_DICT['centerline'] 28 | # draw line 29 | ax.plot(points[:, 1], points[:, 0], color=color, alpha=1.0, linewidth=0.6) 30 | # draw start and end vertex 31 | ax.scatter(points[[0, -1], 1], points[[0, -1], 0], color=color, s=1) 32 | # draw arrow 33 | ax.annotate('', xy=(points[-1, 1], points[-1, 0]), 34 | xytext=(points[-2, 1], points[-2, 0]), 35 | arrowprops=dict(arrowstyle='->', lw=0.6, color=color)) 36 | 37 | def _draw_line(ax, line): 38 | points = np.asarray(line['points']) 39 | config = LINE_PARAM[line['linetype']] 40 | ax.plot(points[:, 1], points[:, 0], linewidth=0.6, **config) 41 | 42 | def _draw_lane_segment(ax, lane_segment, with_centerline, with_laneline): 43 | if with_centerline: 44 | _draw_centerline(ax, {'points': lane_segment['centerline']}) 45 | if with_laneline: 46 | _draw_line(ax, {'points': lane_segment['left_laneline'], 'linetype': lane_segment['left_laneline_type']}) 47 | _draw_line(ax, {'points': lane_segment['right_laneline'], 'linetype': lane_segment['right_laneline_type']}) 48 | 49 | def _draw_area(ax, area): 50 | if area['category'] == 1: # ped crossing with lane segment style. 51 | _draw_line(ax, {'points': area['points'], 'linetype': 'ped_crossing'}) 52 | elif area['category'] == 2: # road boundary 53 | _draw_line(ax, {'points': area['points'], 'linetype': 'road_boundary'}) 54 | 55 | def draw_annotation_bev(annotation, with_centerline=True, with_laneline=True, with_area=True): 56 | 57 | fig, ax = plt.figure(figsize=(2, 4), dpi=200), plt.gca() 58 | ax.set_aspect('equal') 59 | ax.set_ylim([BEV_RANGE[0], BEV_RANGE[1]]) 60 | ax.set_xlim([BEV_RANGE[2], BEV_RANGE[3]]) 61 | ax.invert_xaxis() 62 | ax.grid(False) 63 | ax.axis('off') 64 | ax.set_facecolor('white') 65 | fig.tight_layout(pad=0.2) 66 | 67 | for lane_segment in annotation['lane_segment']: 68 | _draw_lane_segment(ax, lane_segment, with_centerline, with_laneline) 69 | if with_area: 70 | for area in annotation['area']: 71 | _draw_area(ax, area) 72 | 73 | fig.canvas.draw() 74 | data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) 75 | data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) 76 | plt.close(fig) 77 | return data 78 | -------------------------------------------------------------------------------- /projects/lanesegnet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipelines import * 2 | from .openlanev2_subset_A_lanesegnet_dataset import OpenLaneV2_subset_A_LaneSegNet_Dataset 3 | from .openlanev2_subset_A_mapele_bucket_dataset import OpenLaneV2_subset_A_MapElementBucket_Dataset 4 | -------------------------------------------------------------------------------- /projects/lanesegnet/datasets/openlanev2_evaluate_custom.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | from tqdm import tqdm 9 | from openlanev2.lanesegment.io import io 10 | from openlanev2.lanesegment.evaluation.distance import (pairwise, area_distance, 11 | lane_segment_distance, lane_segment_distance_c) 12 | from openlanev2.lanesegment.evaluation.evaluate import (THRESHOLDS_LANESEG, THRESHOLDS_AREA, 13 | _mAP_over_threshold, _mAP_topology_lsls) 14 | 15 | 16 | def lanesegnet_evaluate(ground_truth, predictions, verbose=True): 17 | 18 | if isinstance(ground_truth, str): 19 | ground_truth = io.pickle_load(ground_truth) 20 | 21 | if predictions is None: 22 | preds = {} 23 | print('\nDummy evaluation on ground truth.\n') 24 | else: 25 | if isinstance(predictions, str): 26 | predictions = io.pickle_load(predictions) 27 | predictions = predictions['results'] 28 | 29 | gts = {} 30 | preds = {} 31 | for token in ground_truth.keys(): 32 | gts[token] = ground_truth[token]['annotation'] 33 | if predictions is None: 34 | preds[token] = gts[token] 35 | for i, _ in enumerate(preds[token]['lane_segment']): 36 | preds[token]['lane_segment'][i]['confidence'] = np.float32(1) 37 | for i, _ in enumerate(preds[token]['area']): 38 | preds[token]['area'][i]['confidence'] = np.float32(1) 39 | for i, _ in enumerate(preds[token]['traffic_element']): 40 | preds[token]['traffic_element'][i]['confidence'] = np.float32(1) 41 | else: 42 | preds[token] = predictions[token]['predictions'] 43 | 44 | assert set(gts.keys()) == set(preds.keys()), '#frame differs' 45 | 46 | """ 47 | calculate distances between gts and preds 48 | """ 49 | 50 | distance_matrices = { 51 | 'laneseg': {}, 52 | 'area': {}, 53 | } 54 | 55 | for token in tqdm(gts.keys(), desc='calculating distances:', ncols=80, disable=not verbose): 56 | 57 | mask = pairwise( 58 | [gt for gt in gts[token]['lane_segment']], 59 | [pred for pred in preds[token]['lane_segment']], 60 | lane_segment_distance_c, 61 | relax=True, 62 | ) < THRESHOLDS_LANESEG[-1] 63 | 64 | distance_matrices['laneseg'][token] = pairwise( 65 | [gt for gt in gts[token]['lane_segment']], 66 | [pred for pred in preds[token]['lane_segment']], 67 | lane_segment_distance, 68 | mask=mask, 69 | relax=True, 70 | ) 71 | 72 | distance_matrices['area'][token] = pairwise( 73 | [gt for gt in gts[token]['area']], 74 | [pred for pred in preds[token]['area']], 75 | area_distance, 76 | ) 77 | 78 | """ 79 | evaluate 80 | """ 81 | 82 | metrics = { 83 | 'mAP': 0 84 | } 85 | 86 | metrics['AP_ls'] = _mAP_over_threshold( 87 | gts=gts, 88 | preds=preds, 89 | distance_matrices=distance_matrices['laneseg'], 90 | distance_thresholds=THRESHOLDS_LANESEG, 91 | object_type='lane_segment', 92 | filter=lambda _: True, 93 | inject=True, # save tp for eval on graph 94 | ).mean() 95 | 96 | metrics['AP_ped'] = _mAP_over_threshold( 97 | gts=gts, 98 | preds=preds, 99 | distance_matrices=distance_matrices['area'], 100 | distance_thresholds=THRESHOLDS_AREA, 101 | object_type='area', 102 | filter=lambda x: x['category'] == 1, 103 | inject=False, 104 | ).mean() 105 | 106 | metrics['TOP_lsls'] = _mAP_topology_lsls(gts, preds, THRESHOLDS_LANESEG) 107 | 108 | metrics['mAP'] = (metrics['AP_ls'] + metrics['AP_ped']) / 2 109 | 110 | return metrics 111 | -------------------------------------------------------------------------------- /projects/lanesegnet/datasets/openlanev2_subset_A_mapele_bucket_dataset.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import os 8 | import random 9 | import copy 10 | import tqdm 11 | 12 | import numpy as np 13 | import torch 14 | import mmcv 15 | import cv2 16 | 17 | import shapely 18 | from shapely.geometry import LineString 19 | from mmdet.datasets import DATASETS 20 | 21 | from openlanev2.lanesegment.evaluation import evaluate as openlanev2_evaluate 22 | from .openlanev2_subset_A_lanesegnet_dataset import OpenLaneV2_subset_A_LaneSegNet_Dataset 23 | from ..core.lane.util import fix_pts_interpolate 24 | from ..core.visualizer.lane_segment import draw_annotation_bev 25 | 26 | 27 | @DATASETS.register_module() 28 | class OpenLaneV2_subset_A_MapElementBucket_Dataset(OpenLaneV2_subset_A_LaneSegNet_Dataset): 29 | 30 | def get_ann_info(self, index): 31 | """Get annotation info according to the given index. 32 | 33 | Args: 34 | index (int): Index of the annotation data to get. 35 | 36 | Returns: 37 | dict: annotation information 38 | """ 39 | info = self.data_infos[index] 40 | ann_info = info['annotation'] 41 | 42 | gt_lanes = [] 43 | gt_lane_labels_3d = [] 44 | gt_lane_left_type = [] 45 | gt_lane_right_type = [] 46 | 47 | for idx, lane in enumerate(ann_info['lane_segment']): 48 | centerline = lane['centerline'] 49 | LineString_lane = LineString(centerline) 50 | left_boundary = lane['left_laneline'] 51 | LineString_left_boundary = LineString(left_boundary) 52 | right_boundary = lane['right_laneline'] 53 | LineString_right_boundary = LineString(right_boundary) 54 | gt_lanes.append([LineString_lane, LineString_left_boundary, LineString_right_boundary]) 55 | gt_lane_labels_3d.append(0) 56 | gt_lane_left_type.append(lane['left_laneline_type']) 57 | gt_lane_right_type.append(lane['right_laneline_type']) 58 | 59 | topology_lsls = np.array(ann_info['topology_lsls'], dtype=np.float32) 60 | 61 | te_bboxes = np.array([np.array(sign['points'], dtype=np.float32).flatten() for sign in ann_info['traffic_element']]) 62 | te_labels = np.array([sign['attribute'] for sign in ann_info['traffic_element']], dtype=np.int64) 63 | if len(te_bboxes) == 0: 64 | te_bboxes = np.zeros((0, 4), dtype=np.float32) 65 | te_labels = np.zeros((0, ), dtype=np.int64) 66 | 67 | topology_lste = np.array(ann_info['topology_lste'], dtype=np.float32) 68 | 69 | gt_areas_3d = [] 70 | gt_area_labels_3d = [] 71 | for area in ann_info['area']: 72 | gt_areas_3d.append(fix_pts_interpolate(area['points'], 20)) 73 | gt_area_labels_3d.append(area['category'] - 1) 74 | 75 | annos = dict( 76 | gt_lanes_3d = gt_lanes, 77 | gt_lane_labels_3d = gt_lane_labels_3d, 78 | gt_lane_adj = topology_lsls, 79 | bboxes = te_bboxes, 80 | labels = te_labels, 81 | gt_lane_lste_adj = topology_lste, 82 | gt_lane_left_type = gt_lane_left_type, 83 | gt_lane_right_type = gt_lane_right_type, 84 | gt_areas_3d = gt_areas_3d, 85 | gt_area_labels_3d = gt_area_labels_3d 86 | ) 87 | return annos 88 | 89 | def format_openlanev2_gt(self): 90 | gt_dict = {} 91 | for idx in range(len(self.data_infos)): 92 | info = copy.deepcopy(self.data_infos[idx]) 93 | key = (self.split, info['segment_id'], str(info['timestamp'])) 94 | areas = [] 95 | for area in info['annotation']['area']: 96 | points = area['points'] 97 | if len(points) != 20: 98 | points = fix_pts_interpolate(points, 20) 99 | area['points'] = points 100 | areas.append(area) 101 | info['annotation']['area'] = areas 102 | gt_dict[key] = info 103 | return gt_dict 104 | 105 | def format_results(self, results, out_dir=None, logger=None, **kwargs): 106 | if out_dir is not None: 107 | logger.info(f'Starting format results...') 108 | data_type = np.float16 109 | else: 110 | data_type = np.float32 111 | 112 | pred_dict = {} 113 | pred_dict['method'] = 'LaneSegNet' 114 | pred_dict['team'] = 'dummy' 115 | pred_dict['authors'] = [] 116 | pred_dict['e-mail'] = 'dummy' 117 | pred_dict['institution / company'] = 'OpenDriveLab' 118 | pred_dict['country / region'] = 'CN' 119 | pred_dict['results'] = {} 120 | for idx, result in enumerate(tqdm.tqdm(results, ncols=80, desc='Formatting results')): 121 | info = self.data_infos[idx] 122 | key = (self.split, info['segment_id'], str(info['timestamp'])) 123 | 124 | pred_info = dict( 125 | lane_segment = [], 126 | area = [], 127 | traffic_element = [], 128 | topology_lsls = None, 129 | topology_lste = None 130 | ) 131 | 132 | if result['lane_results'] is not None: 133 | lane_results = result['lane_results'] 134 | scores = lane_results[1] 135 | valid_indices = np.argsort(-scores) 136 | lanes = lane_results[0][valid_indices] 137 | labels = lane_results[2][valid_indices] 138 | scores = scores[valid_indices] 139 | lanes = lanes.reshape(-1, lanes.shape[-1] // 3, 3) 140 | 141 | # left_type_scores = lane_results[3][valid_indices] 142 | # right_type_scores = lane_results[5][valid_indices] 143 | left_type_labels = lane_results[4][valid_indices] 144 | right_type_labels = lane_results[6][valid_indices] 145 | 146 | for pred_idx, (lane, score, label) in enumerate(zip(lanes, scores, labels)): 147 | pred_lane_segment = {} 148 | pred_lane_segment['id'] = 20000 + pred_idx 149 | pred_lane_segment['centerline'] = fix_pts_interpolate(lane[:self.points_num], 10).astype(data_type) 150 | pred_lane_segment['left_laneline'] = fix_pts_interpolate(lane[self.points_num:self.points_num * 2], 10).astype(data_type) 151 | pred_lane_segment['right_laneline'] = fix_pts_interpolate(lane[self.points_num * 2:], 10).astype(data_type) 152 | pred_lane_segment['left_laneline_type'] = left_type_labels[pred_idx] 153 | pred_lane_segment['right_laneline_type'] = right_type_labels[pred_idx] 154 | pred_lane_segment['confidence'] = score.item() 155 | pred_info['lane_segment'].append(pred_lane_segment) 156 | 157 | if result['area_results'] is not None: 158 | area_results = result['area_results'] 159 | scores = area_results[1] 160 | area_valid_indices = np.argsort(-scores) 161 | areas = area_results[0][area_valid_indices] 162 | labels = area_results[2][area_valid_indices] 163 | scores = scores[area_valid_indices] 164 | for pred_idx, (area, score, label) in enumerate(zip(areas, scores, labels)): 165 | pred_area = {} 166 | pred_area['id'] = 30000 + pred_idx 167 | pred_area['points'] = fix_pts_interpolate(area, 20).astype(data_type) 168 | pred_area['category'] = label + 1 169 | pred_area['confidence'] = score.item() 170 | pred_info['area'].append(pred_area) 171 | 172 | if result['bbox_results'] is not None: 173 | te_results = result['bbox_results'] 174 | scores = te_results[1] 175 | te_valid_indices = np.argsort(-scores) 176 | tes = te_results[0][te_valid_indices] 177 | scores = scores[te_valid_indices] 178 | class_idxs = te_results[2][te_valid_indices] 179 | for pred_idx, (te, score, class_idx) in enumerate(zip(tes, scores, class_idxs)): 180 | te_info = dict( 181 | id = 10000 + pred_idx, 182 | category = 1 if class_idx < 4 else 2, 183 | attribute = class_idx, 184 | points = te.reshape(2, 2).astype(data_type), 185 | confidence = score 186 | ) 187 | pred_info['traffic_element'].append(te_info) 188 | 189 | if result['lsls_results'] is not None: 190 | pred_info['topology_lsls'] = result['lsls_results'].astype(np.float32)[valid_indices][:, valid_indices] 191 | else: 192 | pred_info['topology_lsls'] = np.zeros((len(pred_info['lane_segment']), len(pred_info['lane_segment'])), dtype=np.float32) 193 | 194 | if result['lste_results'] is not None: 195 | topology_lste = result['lste_results'].astype(np.float32)[valid_indices] 196 | pred_info['topology_lste'] = topology_lste 197 | else: 198 | pred_info['topology_lste'] = np.zeros((len(pred_info['lane_segment']), len(pred_info['traffic_element'])), dtype=np.float32) 199 | 200 | pred_dict['results'][key] = dict(predictions=pred_info) 201 | 202 | if out_dir is not None: 203 | logger.info(f'Saving results to {out_dir}...') 204 | mmcv.dump(pred_dict, os.path.join(out_dir, 'submission.pkl')) 205 | 206 | return pred_dict 207 | 208 | def evaluate(self, results, logger=None, show=False, out_dir=None, **kwargs): 209 | """Evaluation in Openlane-V2 subset_A dataset. 210 | 211 | Args: 212 | results (list): Testing results of the dataset. 213 | logger (logging.Logger | str | None): Logger used for printing 214 | related information during evaluation. Default: None. 215 | show (bool): Whether to visualize the results. 216 | out_dir (str): Path of directory to save the results. 217 | 218 | Returns: 219 | dict: Evaluation results for evaluation metric. 220 | """ 221 | if show: 222 | assert out_dir, 'Expect out_dir when show is set.' 223 | logger.info(f'Visualizing results at {out_dir}...') 224 | self.show(results, out_dir) 225 | logger.info(f'Visualize done.') 226 | 227 | logger.info(f'Starting format results...') 228 | gt_dict = self.format_openlanev2_gt() 229 | pred_dict = self.format_results(results, logger=logger) 230 | 231 | logger.info(f'Starting openlanev2 evaluate...') 232 | metric_results = openlanev2_evaluate(gt_dict, pred_dict)['OpenLane-V2 UniScore'] 233 | return metric_results 234 | 235 | 236 | def show(self, results, out_dir, score_thr=0.3, show_num=20, **kwargs): 237 | """Show the results. 238 | 239 | Args: 240 | results (list[dict]): Testing results of the dataset. 241 | out_dir (str): Path of directory to save the results. 242 | score_thr (float): The threshold of score. 243 | show_num (int): The number of images to be shown. 244 | """ 245 | for idx, result in enumerate(results): 246 | if idx % 5 != 0: 247 | continue 248 | if idx // 5 > show_num: 249 | break 250 | 251 | info = self.data_infos[idx] 252 | 253 | pred_result = self.format_results([result]) 254 | pred_result = list(pred_result['results'].values())[0]['predictions'] 255 | pred_result = self._filter_by_confidence(pred_result, score_thr) 256 | 257 | pv_imgs = [] 258 | for cam_name, cam_info in info['sensor'].items(): 259 | image_path = os.path.join(self.data_root, cam_info['image_path']) 260 | image_pv = mmcv.imread(image_path) 261 | pv_imgs.append(image_pv) 262 | 263 | surround_img = self._render_surround_img(pv_imgs) 264 | output_path = os.path.join(out_dir, f'{info["segment_id"]}/{info["timestamp"]}/surround.jpg') 265 | mmcv.imwrite(surround_img, output_path) 266 | 267 | conn_img_gt = draw_annotation_bev(info['annotation']) 268 | conn_img_pred = draw_annotation_bev(pred_result) 269 | divider = np.ones((conn_img_gt.shape[0], 7, 3), dtype=np.uint8) * 128 270 | conn_img = np.concatenate([conn_img_gt, divider, conn_img_pred], axis=1)[..., ::-1] 271 | 272 | output_path = os.path.join(out_dir, f'{info["segment_id"]}/{info["timestamp"]}/bev.jpg') 273 | mmcv.imwrite(conn_img, output_path) 274 | -------------------------------------------------------------------------------- /projects/lanesegnet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform_3d import ( 2 | PadMultiViewImage, NormalizeMultiviewImage, 3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage, 4 | GridMaskMultiViewImage, CropFrontViewImageForAv2) 5 | from .transform_3d_lane import LaneSegmentParameterize3D, GenerateLaneSegmentMask 6 | from .formating import CustomFormatBundle3DLane 7 | from .loading import CustomLoadMultiViewImageFromFiles, LoadAnnotations3DLaneSegment 8 | 9 | __all__ = [ 10 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 11 | 'PhotoMetricDistortionMultiViewImage', 'CustomCollect3D', 'RandomScaleImageMultiViewImage', 12 | 'GridMaskMultiViewImage', 'CropFrontViewImageForAv2', 13 | 'LaneSegmentParameterize3D', 'GenerateLaneSegmentMask', 14 | 'CustomFormatBundle3DLane', 15 | 'CustomLoadMultiViewImageFromFiles', 'LoadAnnotations3DLaneSegment' 16 | ] 17 | -------------------------------------------------------------------------------- /projects/lanesegnet/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | from mmcv.parallel import DataContainer as DC 9 | 10 | from mmdet.datasets.builder import PIPELINES 11 | from mmdet.datasets.pipelines import to_tensor 12 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D 13 | 14 | 15 | @PIPELINES.register_module() 16 | class CustomFormatBundle3DLane(DefaultFormatBundle3D): 17 | """Custom formatting bundle for 3D Lane. 18 | """ 19 | 20 | def __init__(self, class_names, **kwargs): 21 | super(CustomFormatBundle3DLane, self).__init__(class_names, **kwargs) 22 | 23 | def __call__(self, results): 24 | """Call function to transform and format common fields in results. 25 | 26 | Args: 27 | results (dict): Result dict contains the data to convert. 28 | 29 | Returns: 30 | dict: The result dict contains the data that is formatted with 31 | default bundle. 32 | """ 33 | if 'gt_lanes_3d' in results: 34 | results['gt_lanes_3d'] = DC( 35 | to_tensor(results['gt_lanes_3d'])) 36 | if 'gt_lane_labels_3d' in results: 37 | results['gt_lane_labels_3d'] = DC( 38 | to_tensor(results['gt_lane_labels_3d'])) 39 | if 'gt_lane_adj' in results: 40 | results['gt_lane_adj'] = DC( 41 | to_tensor(results['gt_lane_adj'])) 42 | if 'gt_lane_lste_adj' in results: 43 | results['gt_lane_lste_adj'] = DC( 44 | to_tensor(results['gt_lane_lste_adj'])) 45 | if 'gt_lane_left_type' in results: 46 | results['gt_lane_left_type'] = DC( 47 | to_tensor(results['gt_lane_left_type'])) 48 | if 'gt_lane_right_type' in results: 49 | results['gt_lane_right_type'] = DC( 50 | to_tensor(results['gt_lane_right_type'])) 51 | if 'gt_instance_masks' in results: 52 | results['gt_instance_masks'] = DC( 53 | to_tensor(results['gt_instance_masks'])) 54 | if 'gt_areas_3d' in results: 55 | results['gt_areas_3d'] = DC( 56 | to_tensor(results['gt_areas_3d'])) 57 | if 'gt_area_labels_3d' in results: 58 | results['gt_area_labels_3d'] = DC( 59 | to_tensor(results['gt_area_labels_3d'])) 60 | 61 | results = super(CustomFormatBundle3DLane, self).__call__(results) 62 | return results 63 | -------------------------------------------------------------------------------- /projects/lanesegnet/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import mmcv 9 | from mmdet.datasets.builder import PIPELINES 10 | from mmdet3d.datasets.pipelines import LoadAnnotations3D 11 | 12 | 13 | @PIPELINES.register_module() 14 | class CustomLoadMultiViewImageFromFiles(object): 15 | 16 | def __init__(self, to_float32=False, color_type='unchanged'): 17 | self.to_float32 = to_float32 18 | self.color_type = color_type 19 | 20 | def __call__(self, results): 21 | filename = results['img_filename'] 22 | # img is of shape (h, w, c, num_views) 23 | img = [mmcv.imread(name, self.color_type) for name in filename] 24 | if self.to_float32: 25 | img = [_.astype(np.float32) for _ in img] 26 | results['filename'] = filename 27 | results['img'] = img 28 | results['img_shape'] = [img_.shape for img_ in img] 29 | results['ori_shape'] = [img_.shape for img_ in img] 30 | # Set initial values for default meta_keys 31 | results['pad_shape'] = [img_.shape for img_ in img] 32 | results['crop_shape'] = [np.zeros(2) for img_ in img] 33 | results['scale_factor'] = [1.0 for img_ in img] 34 | num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2] 35 | results['img_norm_cfg'] = dict( 36 | mean=np.zeros(num_channels, dtype=np.float32), 37 | std=np.ones(num_channels, dtype=np.float32), 38 | to_rgb=False) 39 | return results 40 | 41 | def __repr__(self): 42 | """str: Return a string that describes the module.""" 43 | repr_str = self.__class__.__name__ 44 | repr_str += f'(to_float32={self.to_float32}, ' 45 | repr_str += f"color_type='{self.color_type}')" 46 | return repr_str 47 | 48 | 49 | @PIPELINES.register_module() 50 | class LoadAnnotations3DLaneSegment(LoadAnnotations3D): 51 | 52 | def __init__(self, 53 | with_lane_3d=True, 54 | with_lane_label_3d=True, 55 | with_lane_adj=True, 56 | with_lane_lste_adj=False, 57 | with_lane_type=False, 58 | with_bbox_3d=False, 59 | with_label_3d=False, 60 | with_area=False, 61 | **kwargs): 62 | super().__init__(with_bbox_3d, with_label_3d, **kwargs) 63 | self.with_lane_3d = with_lane_3d 64 | self.with_lane_label_3d = with_lane_label_3d 65 | self.with_lane_type = with_lane_type 66 | self.with_lane_adj = with_lane_adj 67 | self.with_lane_lste_adj = with_lane_lste_adj 68 | self.with_area = with_area 69 | 70 | def _load_lanes_3d(self, results): 71 | results['gt_lanes_3d'] = results['ann_info']['gt_lanes_3d'] 72 | if self.with_lane_label_3d: 73 | results['gt_lane_labels_3d'] = results['ann_info']['gt_lane_labels_3d'] 74 | if self.with_lane_adj: 75 | results['gt_lane_adj'] = results['ann_info']['gt_lane_adj'] 76 | if self.with_lane_lste_adj: 77 | results['gt_lane_lste_adj'] = results['ann_info']['gt_lane_lste_adj'] 78 | if self.with_lane_type: 79 | results['gt_lane_left_type'] = results['ann_info']['gt_lane_left_type'] 80 | results['gt_lane_right_type'] = results['ann_info']['gt_lane_right_type'] 81 | if self.with_area: 82 | results['gt_areas_3d'] = results['ann_info']['gt_areas_3d'] 83 | results['gt_area_labels_3d'] = results['ann_info']['gt_area_labels_3d'] 84 | return results 85 | 86 | def __call__(self, results): 87 | results = super().__call__(results) 88 | if self.with_lane_3d: 89 | results = self._load_lanes_3d(results) 90 | return results 91 | 92 | def __repr__(self): 93 | """str: Return a string that describes the module.""" 94 | indent_str = ' ' 95 | repr_str = super().__repr__() 96 | repr_str += f'{indent_str}with_lane_3d={self.with_lane_3d}, ' 97 | repr_str += f'{indent_str}with_lane_lable_3d={self.with_lane_label_3d}, ' 98 | repr_str += f'{indent_str}with_lane_adj={self.with_lane_adj}, ' 99 | repr_str += f'{indent_str}with_lane_lste_adj={self.with_lane_lste_adj}, ' 100 | repr_str += f'{indent_str}with_lane_type={self.with_lane_type}, ' 101 | repr_str += f'{indent_str}with_area={self.with_area}, ' 102 | return repr_str 103 | -------------------------------------------------------------------------------- /projects/lanesegnet/datasets/pipelines/transform_3d_lane.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import cv2 9 | from mmdet.datasets.builder import PIPELINES 10 | from shapely.geometry import LineString 11 | 12 | 13 | @PIPELINES.register_module() 14 | class LaneSegmentParameterize3D(object): 15 | 16 | def __init__(self, method, method_para): 17 | method_list = ['fix_pts_interp'] 18 | self.method = method 19 | if not self.method in method_list: 20 | raise Exception("Not implemented!") 21 | self.method_para = method_para 22 | 23 | def __call__(self, results): 24 | lanes = results['gt_lanes_3d'] 25 | para_lanes = getattr(self, self.method)(lanes, **self.method_para) 26 | results['gt_lanes_3d'] = para_lanes 27 | return results 28 | 29 | def fix_pts_interp(self, input_data, n_points=11): 30 | 31 | lane_list = [] 32 | for lane in input_data: 33 | ls = lane[1] 34 | distances = np.linspace(0, ls.length, n_points) 35 | left_line = np.array([ls.interpolate(distance).coords[0] for distance in distances]) 36 | 37 | ls = lane[2] 38 | distances = np.linspace(0, ls.length, n_points) 39 | right_line = np.array([ls.interpolate(distance).coords[0] for distance in distances]) 40 | 41 | centerline = (left_line + right_line) / 2.0 42 | 43 | line = np.concatenate([centerline.flatten(), left_line.flatten(), right_line.flatten()]) 44 | lane_list.append(line) 45 | 46 | return np.array(lane_list, dtype=np.float32) 47 | 48 | @PIPELINES.register_module() 49 | class GenerateLaneSegmentMask(object): 50 | """Generate mask ground truth for segmentation head 51 | Args: 52 | results (dict): Result dict from loading pipeline. 53 | Returns: 54 | dict: Instance mask gt is added into result dict. 55 | """ 56 | def __init__(self, points_num, map_size=[-50, -25, 50, 25], bev_h=100, bev_w=200) -> None: 57 | self.points_num = points_num 58 | self.map_size = map_size # [min_x, min_y, max_x, max_y] 59 | self.bev_h = bev_h 60 | self.bev_w = bev_w 61 | 62 | def __call__(self,results): 63 | results = self._generate_lanesegment_instance_mask(results) 64 | return results 65 | 66 | def _generate_lanesegment_instance_mask(self, results): 67 | gt_lanes = np.array(results['gt_lanes_3d']).reshape(-1, 3, self.points_num, 3) 68 | gt_left_lines = gt_lanes[:, 1] 69 | gt_right_lines = gt_lanes[:, 2] 70 | 71 | origin = np.array([self.bev_w // 2, self.bev_h // 2]) 72 | scale = np.array([self.bev_w / (self.map_size[2] - self.map_size[0]), self.bev_h / (self.map_size[3] - self.map_size[1])]) 73 | 74 | inst_masks = [] 75 | for idx, (left_line, right_line) in enumerate(zip(gt_left_lines, gt_right_lines)): 76 | 77 | segment_boundary = np.concatenate((left_line, right_line[::-1], left_line[0:1]), axis=0) 78 | mask = np.zeros((self.bev_h, self.bev_w), dtype=np.uint8) 79 | 80 | draw_coor = (segment_boundary[:, :2] * scale + origin).astype(np.int32) 81 | mask = cv2.fillPoly(mask, [draw_coor], 255) 82 | bitMask = (mask / 255) 83 | inst_masks.append(bitMask) 84 | 85 | results['gt_instance_masks'] = inst_masks 86 | 87 | return results 88 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .detectors import * 2 | from .dense_heads import * 3 | from .modules import * 4 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .deformable_detr_head import CustomDeformableDETRHead 2 | from .relationship_head import RelationshipHead 3 | from .laneseg_head import LaneSegHead 4 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/dense_heads/relationship_head.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | from mmcv.utils import TORCH_VERSION, digit_version 13 | from mmdet.models.builder import HEADS, build_loss 14 | from mmcv.runner import BaseModule 15 | 16 | class MLP(nn.Module): 17 | 18 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers): 19 | super().__init__() 20 | self.num_layers = num_layers 21 | h = [hidden_dim] * (num_layers - 1) 22 | self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) 23 | 24 | def forward(self, x): 25 | for i, layer in enumerate(self.layers): 26 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) 27 | return x 28 | 29 | @HEADS.register_module() 30 | class RelationshipHead(BaseModule): 31 | 32 | def __init__(self, 33 | in_channels_o1, 34 | in_channels_o2=None, 35 | shared_param=True, 36 | loss_rel=dict( 37 | type='FocalLoss', 38 | use_sigmoid=True, 39 | gamma=2.0, 40 | alpha=0.25), 41 | init_cfg=None): 42 | super().__init__() 43 | 44 | self.MLP_o1 = MLP(in_channels_o1, in_channels_o1, 128, 3) 45 | self.shared_param = shared_param 46 | if shared_param: 47 | self.MLP_o2 = self.MLP_o1 48 | else: 49 | self.MLP_o2 = MLP(in_channels_o2, in_channels_o2, 128, 3) 50 | self.classifier = MLP(256, 256, 1, 3) 51 | self.loss_rel = build_loss(loss_rel) 52 | 53 | def forward_train(self, o1_feats, o1_assign_results, o2_feats, o2_assign_results, gt_adj): 54 | rel_pred = self.forward(o1_feats, o2_feats) 55 | losses = self.loss(rel_pred, gt_adj, o1_assign_results, o2_assign_results) 56 | return losses 57 | 58 | def get_relationship(self, o1_feats, o2_feats): 59 | rel_pred = self.forward(o1_feats, o2_feats) 60 | rel_results = rel_pred.squeeze(-1).sigmoid() 61 | rel_results = [_ for _ in rel_results] 62 | return rel_results 63 | 64 | def forward(self, o1_feats, o2_feats): 65 | # feats: D, B, num_query, num_embedding 66 | o1_embeds = self.MLP_o1(o1_feats[-1]) 67 | o2_embeds = self.MLP_o2(o2_feats[-1]) 68 | 69 | num_query_o1 = o1_embeds.size(1) 70 | num_query_o2 = o2_embeds.size(1) 71 | o1_tensor = o1_embeds.unsqueeze(2).repeat(1, 1, num_query_o2, 1) 72 | o2_tensor = o2_embeds.unsqueeze(1).repeat(1, num_query_o1, 1, 1) 73 | 74 | relationship_tensor = torch.cat([o1_tensor, o2_tensor], dim=-1) 75 | relationship_pred = self.classifier(relationship_tensor) 76 | 77 | return relationship_pred 78 | 79 | def loss(self, rel_preds, gt_adjs, o1_assign_results, o2_assign_results): 80 | B, num_query_o1, num_query_o2, _ = rel_preds.size() 81 | o1_assign = o1_assign_results[-1] 82 | o1_pos_inds = o1_assign['pos_inds'] 83 | o1_pos_assigned_gt_inds = o1_assign['pos_assigned_gt_inds'] 84 | 85 | if self.shared_param: 86 | o2_assign = o1_assign 87 | o2_pos_inds = o1_pos_inds 88 | o2_pos_assigned_gt_inds = o1_pos_assigned_gt_inds 89 | else: 90 | o2_assign = o2_assign_results[-1] 91 | o2_pos_inds = o2_assign['pos_inds'] 92 | o2_pos_assigned_gt_inds = o2_assign['pos_assigned_gt_inds'] 93 | 94 | targets = [] 95 | masked_rel_preds = [] 96 | for i in range(B): 97 | gt_adj = gt_adjs[i] 98 | len_o1 = gt_adj.size(0) 99 | len_o2 = gt_adj.size(1) 100 | o1_pos_mask = o1_pos_assigned_gt_inds[i] < len_o1 101 | o2_pos_mask = o2_pos_assigned_gt_inds[i] < len_o2 102 | 103 | masked_rel_pred = rel_preds[i][o1_pos_inds[i]][:, o2_pos_inds[i]][o1_pos_mask][:, o2_pos_mask] 104 | masked_rel_preds.append(masked_rel_pred.view(-1, 1)) 105 | 106 | target = gt_adj[o1_pos_assigned_gt_inds[i][o1_pos_mask]][:, o2_pos_assigned_gt_inds[i][o2_pos_mask]] 107 | targets.append(1 - target.view(-1).long()) 108 | 109 | targets = torch.cat(targets, dim=0) 110 | rel_preds = torch.cat(masked_rel_preds, dim=0) 111 | if torch.numel(targets) == 0: 112 | return dict(loss_rel=rel_preds.sum() * 0) 113 | 114 | loss_rel = self.loss_rel(rel_preds, targets, avg_factor=targets.sum()) 115 | 116 | if digit_version(TORCH_VERSION) >= digit_version('1.8'): 117 | loss_rel = torch.nan_to_num(loss_rel) 118 | 119 | return dict(loss_rel=loss_rel) 120 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .lanesegnet import LaneSegNet 2 | from .lanesegnet_mapele_bucket import LaneSegNetMapElementBucket 3 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/detectors/lanesegnet.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import copy 8 | import numpy as np 9 | import torch 10 | 11 | from mmcv.runner import force_fp32, auto_fp16 12 | from mmdet.models import DETECTORS 13 | from mmdet.models.builder import build_head 14 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector 15 | 16 | from ...utils.builder import build_bev_constructor 17 | 18 | 19 | @DETECTORS.register_module() 20 | class LaneSegNet(MVXTwoStageDetector): 21 | 22 | def __init__(self, 23 | bev_constructor=None, 24 | lane_head=None, 25 | lclc_head=None, 26 | bbox_head=None, 27 | lcte_head=None, 28 | video_test_mode=False, 29 | **kwargs): 30 | 31 | super(LaneSegNet, self).__init__(**kwargs) 32 | 33 | if bev_constructor is not None: 34 | self.bev_constructor = build_bev_constructor(bev_constructor) 35 | 36 | if lane_head is not None: 37 | lane_head.update(train_cfg=self.train_cfg.lane) 38 | self.pts_bbox_head = build_head(lane_head) 39 | else: 40 | self.pts_bbox_head = None 41 | 42 | if lclc_head is not None: 43 | self.lclc_head = build_head(lclc_head) 44 | else: 45 | self.lclc_head = None 46 | 47 | if bbox_head is not None: 48 | bbox_head.update(train_cfg=self.train_cfg.bbox) 49 | self.bbox_head = build_head(bbox_head) 50 | else: 51 | self.bbox_head = None 52 | 53 | if lcte_head is not None: 54 | self.lcte_head = build_head(lcte_head) 55 | else: 56 | self.lcte_head = None 57 | 58 | self.fp16_enabled = False 59 | 60 | # temporal 61 | self.video_test_mode = video_test_mode 62 | self.prev_frame_info = { 63 | 'prev_bev': None, 64 | 'scene_token': None, 65 | 'prev_pos': 0, 66 | 'prev_angle': 0, 67 | } 68 | 69 | def extract_img_feat(self, img, img_metas, len_queue=None): 70 | """Extract features of images.""" 71 | B = img.size(0) 72 | if img is not None: 73 | 74 | if img.dim() == 5 and img.size(0) == 1: 75 | img.squeeze_() 76 | elif img.dim() == 5 and img.size(0) > 1: 77 | B, N, C, H, W = img.size() 78 | img = img.reshape(B * N, C, H, W) 79 | img_feats = self.img_backbone(img) 80 | 81 | if isinstance(img_feats, dict): 82 | img_feats = list(img_feats.values()) 83 | else: 84 | return None 85 | if self.with_img_neck: 86 | img_feats = self.img_neck(img_feats) 87 | 88 | img_feats_reshaped = [] 89 | for img_feat in img_feats: 90 | BN, C, H, W = img_feat.size() 91 | if len_queue is not None: 92 | img_feats_reshaped.append(img_feat.view(int(B/len_queue), len_queue, int(BN / B), C, H, W)) 93 | else: 94 | img_feats_reshaped.append(img_feat.view(B, int(BN / B), C, H, W)) 95 | return img_feats_reshaped 96 | 97 | @auto_fp16(apply_to=('img')) 98 | def extract_feat(self, img, img_metas=None, len_queue=None): 99 | img_feats = self.extract_img_feat(img, img_metas, len_queue=len_queue) 100 | return img_feats 101 | 102 | def forward_dummy(self, img): 103 | dummy_metas = None 104 | return self.forward_test(img=img, img_metas=[[dummy_metas]]) 105 | 106 | def forward(self, return_loss=True, **kwargs): 107 | if return_loss: 108 | return self.forward_train(**kwargs) 109 | else: 110 | return self.forward_test(**kwargs) 111 | 112 | def obtain_history_bev(self, imgs_queue, img_metas_list): 113 | """Obtain history BEV features iteratively. To save GPU memory, gradients are not calculated. 114 | """ 115 | self.eval() 116 | 117 | with torch.no_grad(): 118 | prev_bev = None 119 | bs, len_queue, num_cams, C, H, W = imgs_queue.shape 120 | imgs_queue = imgs_queue.reshape(bs*len_queue, num_cams, C, H, W) 121 | img_feats_list = self.extract_feat(img=imgs_queue, len_queue=len_queue) 122 | for i in range(len_queue): 123 | img_metas = [each[i] for each in img_metas_list] 124 | img_feats = [each_scale[:, i] for each_scale in img_feats_list] 125 | prev_bev = self.bev_constructor(img_feats, img_metas, prev_bev) 126 | self.train() 127 | return prev_bev 128 | 129 | @auto_fp16(apply_to=('img', 'points')) 130 | def forward_train(self, 131 | img=None, 132 | img_metas=None, 133 | gt_lanes_3d=None, 134 | gt_lane_labels_3d=None, 135 | gt_lane_adj=None, 136 | gt_lane_left_type=None, 137 | gt_lane_right_type=None, 138 | gt_labels=None, 139 | gt_bboxes=None, 140 | gt_instance_masks=None, 141 | gt_bboxes_ignore=None, 142 | ): 143 | 144 | len_queue = img.size(1) 145 | prev_img = img[:, :-1, ...] 146 | img = img[:, -1, ...] 147 | 148 | if self.video_test_mode: 149 | prev_img_metas = copy.deepcopy(img_metas) 150 | prev_bev = self.obtain_history_bev(prev_img, prev_img_metas) 151 | else: 152 | prev_bev = None 153 | 154 | img_metas = [each[len_queue-1] for each in img_metas] 155 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 156 | bev_feats = self.bev_constructor(img_feats, img_metas, prev_bev) 157 | 158 | losses = dict() 159 | outs = self.pts_bbox_head(img_feats, bev_feats, img_metas) 160 | loss_inputs = [outs, gt_lanes_3d, gt_lane_labels_3d, gt_instance_masks, gt_lane_left_type, gt_lane_right_type] 161 | lane_losses, lane_assign_result = self.pts_bbox_head.loss(*loss_inputs, img_metas=img_metas) 162 | for loss in lane_losses: 163 | losses['lane_head.' + loss] = lane_losses[loss] 164 | lane_feats = outs['history_states'] 165 | 166 | if self.lclc_head is not None: 167 | lclc_losses = self.lclc_head.forward_train(lane_feats, lane_assign_result, lane_feats, lane_assign_result, gt_lane_adj) 168 | for loss in lclc_losses: 169 | losses['lclc_head.' + loss] = lclc_losses[loss] 170 | 171 | return losses 172 | 173 | def forward_test(self, img_metas, img=None, **kwargs): 174 | for var, name in [(img_metas, 'img_metas')]: 175 | if not isinstance(var, list): 176 | raise TypeError('{} must be a list, but got {}'.format( 177 | name, type(var))) 178 | img = [img] if img is None else img 179 | 180 | if img_metas[0]['scene_token'] != self.prev_frame_info['scene_token']: 181 | # the first sample of each scene is truncated 182 | self.prev_frame_info['prev_bev'] = None 183 | # update idx 184 | self.prev_frame_info['scene_token'] = img_metas[0]['scene_token'] 185 | 186 | # do not use temporal information 187 | if not self.video_test_mode: 188 | self.prev_frame_info['prev_bev'] = None 189 | 190 | # Get the delta of ego position and angle between two timestamps. 191 | tmp_pos = copy.deepcopy(img_metas[0]['can_bus'][:3]) 192 | tmp_angle = copy.deepcopy(img_metas[0]['can_bus'][-1]) 193 | if self.prev_frame_info['prev_bev'] is not None: 194 | img_metas[0]['can_bus'][:3] -= self.prev_frame_info['prev_pos'] 195 | img_metas[0]['can_bus'][-1] -= self.prev_frame_info['prev_angle'] 196 | else: 197 | img_metas[0]['can_bus'][-1] = 0 198 | img_metas[0]['can_bus'][:3] = 0 199 | 200 | new_prev_bev, results_list = self.simple_test( 201 | img_metas, img, prev_bev=self.prev_frame_info['prev_bev'], **kwargs) 202 | # During inference, we save the BEV features and ego motion of each timestamp. 203 | self.prev_frame_info['prev_pos'] = tmp_pos 204 | self.prev_frame_info['prev_angle'] = tmp_angle 205 | self.prev_frame_info['prev_bev'] = new_prev_bev 206 | return results_list 207 | 208 | def simple_test_pts(self, x, img_metas, img=None, prev_bev=None, rescale=False): 209 | """Test function""" 210 | batchsize = len(img_metas) 211 | 212 | bev_feats = self.bev_constructor(x, img_metas, prev_bev) 213 | outs = self.pts_bbox_head(x, bev_feats, img_metas) 214 | 215 | lane_results = self.pts_bbox_head.get_lanes( 216 | outs, img_metas, rescale=rescale) 217 | 218 | if self.lclc_head is not None: 219 | lane_feats = outs['history_states'] 220 | lsls_results = self.lclc_head.get_relationship(lane_feats, lane_feats) 221 | lsls_results = [result.detach().cpu().numpy() for result in lsls_results] 222 | else: 223 | lsls_results = [None for _ in range(batchsize)] 224 | 225 | return bev_feats, lane_results, lsls_results 226 | 227 | def simple_test(self, img_metas, img=None, prev_bev=None, rescale=False): 228 | """Test function without augmentaiton.""" 229 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 230 | 231 | results_list = [dict() for i in range(len(img_metas))] 232 | new_prev_bev, lane_results, lsls_results = self.simple_test_pts( 233 | img_feats, img_metas, img, prev_bev, rescale=rescale) 234 | for result_dict, lane, lsls in zip(results_list, lane_results, lsls_results): 235 | result_dict['lane_results'] = lane 236 | result_dict['bbox_results'] = None 237 | result_dict['lsls_results'] = lsls 238 | result_dict['lste_results'] = None 239 | 240 | return new_prev_bev, results_list 241 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/detectors/lanesegnet_mapele_bucket.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import copy 8 | import numpy as np 9 | import torch 10 | 11 | from mmcv.runner import force_fp32, auto_fp16 12 | from mmdet.models import DETECTORS 13 | from mmdet.models.builder import build_head 14 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector 15 | 16 | from ...utils.builder import build_bev_constructor 17 | 18 | 19 | @DETECTORS.register_module() 20 | class LaneSegNetMapElementBucket(MVXTwoStageDetector): 21 | 22 | def __init__(self, 23 | bev_constructor=None, 24 | lane_head=None, 25 | lsls_head=None, 26 | bbox_head=None, 27 | lste_head=None, 28 | area_head=None, 29 | video_test_mode=False, 30 | **kwargs): 31 | 32 | super(LaneSegNetMapElementBucket, self).__init__(**kwargs) 33 | 34 | if bev_constructor is not None: 35 | self.bev_constructor = build_bev_constructor(bev_constructor) 36 | 37 | if lane_head is not None: 38 | lane_head.update(train_cfg=self.train_cfg.lane) 39 | self.pts_bbox_head = build_head(lane_head) 40 | else: 41 | self.pts_bbox_head = None 42 | 43 | if lsls_head is not None: 44 | self.lsls_head = build_head(lsls_head) 45 | else: 46 | self.lsls_head = None 47 | 48 | if bbox_head is not None: 49 | bbox_head.update(train_cfg=self.train_cfg.bbox) 50 | self.bbox_head = build_head(bbox_head) 51 | else: 52 | self.bbox_head = None 53 | 54 | if lste_head is not None: 55 | self.lste_head = build_head(lste_head) 56 | else: 57 | self.lste_head = None 58 | 59 | if area_head is not None: 60 | area_head.update(train_cfg=self.train_cfg.area) 61 | self.area_head = build_head(area_head) 62 | else: 63 | self.area_head = None 64 | 65 | self.fp16_enabled = False 66 | 67 | # temporal 68 | self.video_test_mode = video_test_mode 69 | self.prev_frame_info = { 70 | 'prev_bev': None, 71 | 'scene_token': None, 72 | 'prev_pos': 0, 73 | 'prev_angle': 0, 74 | } 75 | 76 | def extract_img_feat(self, img, img_metas, len_queue=None): 77 | """Extract features of images.""" 78 | B = img.size(0) 79 | if img is not None: 80 | 81 | if img.dim() == 5 and img.size(0) == 1: 82 | img.squeeze_() 83 | elif img.dim() == 5 and img.size(0) > 1: 84 | B, N, C, H, W = img.size() 85 | img = img.reshape(B * N, C, H, W) 86 | img_feats = self.img_backbone(img) 87 | 88 | if isinstance(img_feats, dict): 89 | img_feats = list(img_feats.values()) 90 | else: 91 | return None 92 | if self.with_img_neck: 93 | img_feats = self.img_neck(img_feats) 94 | 95 | img_feats_reshaped = [] 96 | for img_feat in img_feats: 97 | BN, C, H, W = img_feat.size() 98 | if len_queue is not None: 99 | img_feats_reshaped.append(img_feat.view(int(B/len_queue), len_queue, int(BN / B), C, H, W)) 100 | else: 101 | img_feats_reshaped.append(img_feat.view(B, int(BN / B), C, H, W)) 102 | return img_feats_reshaped 103 | 104 | @auto_fp16(apply_to=('img',)) 105 | def extract_feat(self, img, img_metas=None, len_queue=None): 106 | img_feats = self.extract_img_feat(img, img_metas, len_queue=len_queue) 107 | return img_feats 108 | 109 | def forward_dummy(self, img): 110 | dummy_metas = None 111 | return self.forward_test(img=img, img_metas=[[dummy_metas]]) 112 | 113 | def forward(self, return_loss=True, **kwargs): 114 | if return_loss: 115 | return self.forward_train(**kwargs) 116 | else: 117 | return self.forward_test(**kwargs) 118 | 119 | def obtain_history_bev(self, imgs_queue, img_metas_list): 120 | """Obtain history BEV features iteratively. To save GPU memory, gradients are not calculated. 121 | """ 122 | self.eval() 123 | 124 | with torch.no_grad(): 125 | prev_bev = None 126 | bs, len_queue, num_cams, C, H, W = imgs_queue.shape 127 | imgs_queue = imgs_queue.reshape(bs*len_queue, num_cams, C, H, W) 128 | img_feats_list = self.extract_feat(img=imgs_queue, len_queue=len_queue) 129 | for i in range(len_queue): 130 | img_metas = [each[i] for each in img_metas_list] 131 | img_feats = [each_scale[:, i] for each_scale in img_feats_list] 132 | prev_bev = self.bev_constructor(img_feats, img_metas, prev_bev) 133 | self.train() 134 | return prev_bev 135 | 136 | @auto_fp16(apply_to=('img',)) 137 | def forward_train(self, 138 | img=None, 139 | img_metas=None, 140 | gt_lanes_3d=None, 141 | gt_lane_labels_3d=None, 142 | gt_lane_adj=None, 143 | gt_lane_left_type=None, 144 | gt_lane_right_type=None, 145 | gt_labels=None, 146 | gt_bboxes=None, 147 | gt_lane_lste_adj=None, 148 | gt_areas_3d=None, 149 | gt_area_labels_3d=None, 150 | gt_instance_masks=None, 151 | gt_bboxes_ignore=None, 152 | ): 153 | 154 | len_queue = img.size(1) 155 | prev_img = img[:, :-1, ...] 156 | img = img[:, -1, ...] 157 | 158 | if self.video_test_mode: 159 | prev_img_metas = copy.deepcopy(img_metas) 160 | prev_bev = self.obtain_history_bev(prev_img, prev_img_metas) 161 | else: 162 | prev_bev = None 163 | 164 | img_metas = [each[len_queue-1] for each in img_metas] 165 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 166 | bev_feats = self.bev_constructor(img_feats, img_metas, prev_bev) 167 | 168 | losses = dict() 169 | outs = self.pts_bbox_head(img_feats, bev_feats, img_metas) 170 | loss_inputs = [outs, gt_lanes_3d, gt_lane_labels_3d, gt_instance_masks, gt_lane_left_type, gt_lane_right_type] 171 | lane_losses, lane_assign_result = self.pts_bbox_head.loss(*loss_inputs, img_metas=img_metas) 172 | for loss in lane_losses: 173 | losses['lane_head.' + loss] = lane_losses[loss] 174 | lane_feats = outs['history_states'] 175 | 176 | if self.lsls_head is not None: 177 | lsls_losses = self.lsls_head.forward_train(lane_feats, lane_assign_result, lane_feats, lane_assign_result, gt_lane_adj) 178 | for loss in lsls_losses: 179 | losses['lsls_head.' + loss] = lsls_losses[loss] 180 | 181 | if self.bbox_head is not None: 182 | front_view_img_feats = [lvl[:, 0] for lvl in img_feats] 183 | batch_input_shape = tuple(img[0, 0].size()[-2:]) 184 | bbox_img_metas = [] 185 | for img_meta in img_metas: 186 | bbox_img_metas.append( 187 | dict( 188 | batch_input_shape=batch_input_shape, 189 | img_shape=img_meta['img_shape'][0], 190 | scale_factor=img_meta['scale_factor'][0], 191 | crop_shape=img_meta['crop_shape'][0])) 192 | img_meta['batch_input_shape'] = batch_input_shape 193 | 194 | te_losses = {} 195 | bbox_outs = self.bbox_head(front_view_img_feats, bbox_img_metas) 196 | bbox_losses, te_assign_result = self.bbox_head.loss(bbox_outs, gt_bboxes, gt_labels, bbox_img_metas, gt_bboxes_ignore) 197 | for loss in bbox_losses: 198 | te_losses['bbox_head.' + loss] = bbox_losses[loss] 199 | 200 | if self.lste_head is not None: 201 | te_feats = bbox_outs['history_states'] 202 | lste_losses = self.lste_head.forward_train(lane_feats, lane_assign_result, te_feats, te_assign_result, gt_lane_lste_adj) 203 | for loss in lste_losses: 204 | te_losses['lste_head.' + loss] = lste_losses[loss] 205 | 206 | num_gt_bboxes = sum([len(gt) for gt in gt_labels]) 207 | if num_gt_bboxes == 0: 208 | for loss in te_losses: 209 | te_losses[loss] *= 0 210 | 211 | losses.update(te_losses) 212 | 213 | if self.area_head is not None: 214 | outs = self.area_head(img_feats, bev_feats, img_metas) 215 | loss_inputs = [outs, gt_areas_3d, gt_area_labels_3d] 216 | area_losses, _ = self.area_head.loss(*loss_inputs, img_metas=img_metas) 217 | for loss in area_losses: 218 | losses['area_head.' + loss] = area_losses[loss] 219 | 220 | return losses 221 | 222 | def forward_test(self, img_metas, img=None, **kwargs): 223 | for var, name in [(img_metas, 'img_metas')]: 224 | if not isinstance(var, list): 225 | raise TypeError('{} must be a list, but got {}'.format( 226 | name, type(var))) 227 | img = [img] if img is None else img 228 | 229 | if img_metas[0]['scene_token'] != self.prev_frame_info['scene_token']: 230 | # the first sample of each scene is truncated 231 | self.prev_frame_info['prev_bev'] = None 232 | # update idx 233 | self.prev_frame_info['scene_token'] = img_metas[0]['scene_token'] 234 | 235 | # do not use temporal information 236 | if not self.video_test_mode: 237 | self.prev_frame_info['prev_bev'] = None 238 | 239 | # Get the delta of ego position and angle between two timestamps. 240 | tmp_pos = copy.deepcopy(img_metas[0]['can_bus'][:3]) 241 | tmp_angle = copy.deepcopy(img_metas[0]['can_bus'][-1]) 242 | if self.prev_frame_info['prev_bev'] is not None: 243 | img_metas[0]['can_bus'][:3] -= self.prev_frame_info['prev_pos'] 244 | img_metas[0]['can_bus'][-1] -= self.prev_frame_info['prev_angle'] 245 | else: 246 | img_metas[0]['can_bus'][-1] = 0 247 | img_metas[0]['can_bus'][:3] = 0 248 | 249 | new_prev_bev, results_list = self.simple_test( 250 | img_metas, img, prev_bev=self.prev_frame_info['prev_bev'], **kwargs) 251 | # During inference, we save the BEV features and ego motion of each timestamp. 252 | self.prev_frame_info['prev_pos'] = tmp_pos 253 | self.prev_frame_info['prev_angle'] = tmp_angle 254 | self.prev_frame_info['prev_bev'] = new_prev_bev 255 | return results_list 256 | 257 | def simple_test_pts(self, x, img_metas, img=None, prev_bev=None, rescale=False): 258 | """Test function""" 259 | batchsize = len(img_metas) 260 | 261 | bev_feats = self.bev_constructor(x, img_metas, prev_bev) 262 | outs = self.pts_bbox_head(x, bev_feats, img_metas) 263 | 264 | lane_results = self.pts_bbox_head.get_lanes( 265 | outs, img_metas, rescale=rescale) 266 | 267 | if self.lsls_head is not None: 268 | lane_feats = outs['history_states'] 269 | lsls_results = self.lsls_head.get_relationship(lane_feats, lane_feats) 270 | lsls_results = [result.detach().cpu().numpy() for result in lsls_results] 271 | else: 272 | lsls_results = [None for _ in range(batchsize)] 273 | 274 | if self.bbox_head is not None: 275 | front_view_img_feats = [lvl[:, 0] for lvl in x] 276 | batch_input_shape = tuple(img[0, 0].size()[-2:]) 277 | bbox_img_metas = [] 278 | for img_meta in img_metas: 279 | bbox_img_metas.append( 280 | dict( 281 | batch_input_shape=batch_input_shape, 282 | img_shape=img_meta['img_shape'][0], 283 | scale_factor=img_meta['scale_factor'][0], 284 | crop_shape=img_meta['crop_shape'][0])) 285 | img_meta['batch_input_shape'] = batch_input_shape 286 | bbox_outs = self.bbox_head(front_view_img_feats, bbox_img_metas) 287 | bbox_results = self.bbox_head.get_bboxes(bbox_outs, bbox_img_metas, rescale=rescale) 288 | else: 289 | bbox_results = [None for _ in range(batchsize)] 290 | 291 | if self.bbox_head is not None and self.lste_head is not None: 292 | te_feats = bbox_outs['history_states'] 293 | lste_results = self.lste_head.get_relationship(lane_feats, te_feats) 294 | lste_results = [result.detach().cpu().numpy() for result in lste_results] 295 | else: 296 | lste_results = [None for _ in range(batchsize)] 297 | 298 | if self.area_head is not None: 299 | outs = self.area_head(x, bev_feats, img_metas) 300 | area_results = self.area_head.get_areas( 301 | outs, img_metas, rescale=rescale) 302 | 303 | return bev_feats, lane_results, lsls_results, bbox_results, lste_results, area_results 304 | 305 | def simple_test(self, img_metas, img=None, prev_bev=None, rescale=False): 306 | """Test function without augmentaiton.""" 307 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 308 | 309 | results_list = [dict() for i in range(len(img_metas))] 310 | new_prev_bev, lane_results, lsls_results, bbox_results, lste_results, area_results = self.simple_test_pts( 311 | img_feats, img_metas, img, prev_bev, rescale=rescale) 312 | for result_dict, lane, lsls, bbox, lste, area in zip( 313 | results_list, lane_results, lsls_results, bbox_results, lste_results, area_results): 314 | result_dict['lane_results'] = lane 315 | result_dict['lsls_results'] = lsls 316 | result_dict['bbox_results'] = bbox 317 | result_dict['lste_results'] = lste 318 | result_dict['area_results'] = area 319 | return new_prev_bev, results_list 320 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_constructer import BEVFormerConstructer 2 | from .laneseg_transformer import LaneSegNetTransformer 3 | from .laneseg_decoder import LaneSegNetDecoder 4 | from .lane_attention import LaneAttention 5 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/modules/bevformer_constructer.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | from torch.nn.init import normal_ 11 | from torchvision.transforms.functional import rotate 12 | 13 | from mmcv.cnn import xavier_init 14 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence, build_positional_encoding 15 | from mmcv.runner.base_module import BaseModule 16 | 17 | from ...utils.builder import BEV_CONSTRUCTOR 18 | from projects.bevformer.modules.temporal_self_attention import TemporalSelfAttention 19 | from projects.bevformer.modules.spatial_cross_attention import MSDeformableAttention3D 20 | from projects.bevformer.modules.decoder import CustomMSDeformableAttention 21 | 22 | 23 | @BEV_CONSTRUCTOR.register_module() 24 | class BEVFormerConstructer(BaseModule): 25 | """Implements the BEVFormer BEV Constructer. 26 | Args: 27 | as_two_stage (bool): Generate query from encoder features. 28 | Default: False. 29 | num_feature_levels (int): Number of feature maps from FPN: 30 | Default: 4. 31 | two_stage_num_proposals (int): Number of proposals when set 32 | `as_two_stage` as True. Default: 300. 33 | """ 34 | 35 | def __init__(self, 36 | num_feature_levels=4, 37 | num_cams=6, 38 | embed_dims=256, 39 | rotate_prev_bev=True, 40 | use_shift=True, 41 | use_can_bus=True, 42 | can_bus_norm=True, 43 | use_cams_embeds=True, 44 | pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0], 45 | bev_h=200, 46 | bev_w=200, 47 | rotate_center=[100, 100], 48 | encoder=None, 49 | positional_encoding=None, 50 | **kwargs): 51 | super(BEVFormerConstructer, self).__init__(**kwargs) 52 | self.embed_dims = embed_dims 53 | self.num_feature_levels = num_feature_levels 54 | self.num_cams = num_cams 55 | self.fp16_enabled = False 56 | 57 | self.rotate_prev_bev = rotate_prev_bev 58 | self.use_shift = use_shift 59 | self.use_can_bus = use_can_bus 60 | self.can_bus_norm = can_bus_norm 61 | self.use_cams_embeds = use_cams_embeds 62 | self.encoder = build_transformer_layer_sequence(encoder) 63 | self.positional_encoding = build_positional_encoding(positional_encoding) 64 | 65 | self.pc_range = pc_range 66 | self.real_w = self.pc_range[3] - self.pc_range[0] 67 | self.real_h = self.pc_range[4] - self.pc_range[1] 68 | self.bev_h = bev_h 69 | self.bev_w = bev_w 70 | self.rotate_center = rotate_center 71 | 72 | self.init_layers() 73 | 74 | def init_layers(self): 75 | self.bev_embedding = nn.Embedding( 76 | self.bev_h * self.bev_w, self.embed_dims) 77 | self.level_embeds = nn.Parameter(torch.Tensor( 78 | self.num_feature_levels, self.embed_dims)) 79 | self.cams_embeds = nn.Parameter( 80 | torch.Tensor(self.num_cams, self.embed_dims)) 81 | self.can_bus_mlp = nn.Sequential( 82 | nn.Linear(18, self.embed_dims // 2), 83 | nn.ReLU(inplace=True), 84 | nn.Linear(self.embed_dims // 2, self.embed_dims), 85 | nn.ReLU(inplace=True), 86 | ) 87 | if self.can_bus_norm: 88 | self.can_bus_mlp.add_module('norm', nn.LayerNorm(self.embed_dims)) 89 | 90 | def init_weights(self): 91 | """Initialize the transformer weights.""" 92 | for p in self.parameters(): 93 | if p.dim() > 1: 94 | nn.init.xavier_uniform_(p) 95 | for m in self.modules(): 96 | if isinstance(m, MSDeformableAttention3D) or isinstance(m, TemporalSelfAttention) \ 97 | or isinstance(m, CustomMSDeformableAttention): 98 | try: 99 | m.init_weight() 100 | except AttributeError: 101 | m.init_weights() 102 | normal_(self.level_embeds) 103 | normal_(self.cams_embeds) 104 | xavier_init(self.can_bus_mlp, distribution='uniform', bias=0.) 105 | 106 | # @auto_fp16(apply_to=('mlvl_feats', 'prev_bev')) 107 | def forward(self, mlvl_feats, img_metas, prev_bev=None, **kwargs): 108 | """ 109 | obtain bev features. 110 | """ 111 | bs, num_cam, _, _, _ = mlvl_feats[0].shape 112 | dtype = mlvl_feats[0].dtype 113 | 114 | bev_queries = self.bev_embedding.weight.to(dtype) 115 | bev_queries = bev_queries.unsqueeze(1).repeat(1, bs, 1) 116 | 117 | bev_mask = torch.zeros((bs, self.bev_h, self.bev_w), 118 | device=bev_queries.device).to(dtype) 119 | bev_pos = self.positional_encoding(bev_mask).to(dtype) 120 | bev_pos = bev_pos.flatten(2).permute(2, 0, 1) 121 | 122 | # BEVFormer assumes the coords are x-right and y-forward for the nuScenes lidar 123 | # but OpenLane-V2's coords are x-forward and y-left 124 | # here is a fix for any lidar coords, the shift is calculated by the rotation matrix 125 | delta_global = np.array([each['can_bus'][:3] for each in img_metas]) 126 | lidar2global_rotation = np.array([each['lidar2global_rotation'] for each in img_metas]) 127 | delta_lidar = [] 128 | for i in range(bs): 129 | delta_lidar.append(np.linalg.inv(lidar2global_rotation[i]) @ delta_global[i]) 130 | delta_lidar = np.array(delta_lidar) 131 | shift_y = delta_lidar[:, 1] / self.real_h 132 | shift_x = delta_lidar[:, 0] / self.real_w 133 | shift_y = shift_y * self.use_shift 134 | shift_x = shift_x * self.use_shift 135 | shift = bev_queries.new_tensor([shift_x, shift_y]).permute(1, 0) # xy, bs -> bs, xy 136 | 137 | if prev_bev is not None: 138 | if prev_bev.shape[1] == self.bev_h * self.bev_w: 139 | prev_bev = prev_bev.permute(1, 0, 2) 140 | if self.rotate_prev_bev: 141 | for i in range(bs): 142 | # num_prev_bev = prev_bev.size(1) 143 | rotation_angle = img_metas[i]['can_bus'][-1] 144 | tmp_prev_bev = prev_bev[:, i].reshape( 145 | self.bev_h, self.bev_w, -1).permute(2, 0, 1) 146 | tmp_prev_bev = rotate(tmp_prev_bev, rotation_angle, 147 | center=self.rotate_center) 148 | tmp_prev_bev = tmp_prev_bev.permute(1, 2, 0).reshape( 149 | self.bev_h * self.bev_w, 1, -1) 150 | prev_bev[:, i] = tmp_prev_bev[:, 0] 151 | 152 | # add can bus signals 153 | can_bus = bev_queries.new_tensor( 154 | [each['can_bus'] for each in img_metas]) # [:, :] 155 | can_bus = self.can_bus_mlp(can_bus)[None, :, :] 156 | bev_queries = bev_queries + can_bus * self.use_can_bus 157 | 158 | feat_flatten = [] 159 | spatial_shapes = [] 160 | for lvl, feat in enumerate(mlvl_feats): 161 | bs, num_cam, c, h, w = feat.shape 162 | spatial_shape = (h, w) 163 | feat = feat.flatten(3).permute(1, 0, 3, 2) 164 | if self.use_cams_embeds: 165 | feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype) 166 | feat = feat + self.level_embeds[None, 167 | None, lvl:lvl + 1, :].to(feat.dtype) 168 | spatial_shapes.append(spatial_shape) 169 | feat_flatten.append(feat) 170 | 171 | feat_flatten = torch.cat(feat_flatten, 2) 172 | spatial_shapes = torch.as_tensor( 173 | spatial_shapes, dtype=torch.long, device=bev_pos.device) 174 | level_start_index = torch.cat((spatial_shapes.new_zeros( 175 | (1,)), spatial_shapes.prod(1).cumsum(0)[:-1])) 176 | 177 | feat_flatten = feat_flatten.permute( 178 | 0, 2, 1, 3) # (num_cam, H*W, bs, embed_dims) 179 | 180 | bev_embed = self.encoder( 181 | bev_queries, 182 | feat_flatten, 183 | feat_flatten, 184 | bev_h=self.bev_h, 185 | bev_w=self.bev_w, 186 | bev_pos=bev_pos, 187 | spatial_shapes=spatial_shapes, 188 | level_start_index=level_start_index, 189 | prev_bev=prev_bev, 190 | shift=shift, 191 | img_metas=img_metas, 192 | **kwargs 193 | ) 194 | 195 | return bev_embed 196 | 197 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/modules/lane_attention.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import warnings 8 | import math 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import mmcv 14 | from mmcv.cnn import xavier_init, constant_init 15 | from mmcv.cnn.bricks.registry import ATTENTION 16 | from mmcv.runner.base_module import BaseModule 17 | from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch 18 | from projects.bevformer.modules.multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32 19 | 20 | 21 | @ATTENTION.register_module() 22 | class LaneAttention(BaseModule): 23 | 24 | def __init__(self, 25 | embed_dims=256, 26 | num_heads=8, 27 | num_levels=1, 28 | num_points=32, 29 | im2col_step=64, 30 | dropout=0.1, 31 | batch_first=False, 32 | norm_cfg=None, 33 | init_cfg=None): 34 | super().__init__(init_cfg) 35 | if embed_dims % num_heads != 0: 36 | raise ValueError(f'embed_dims must be divisible by num_heads, ' 37 | f'but got {embed_dims} and {num_heads}') 38 | dim_per_head = embed_dims // num_heads 39 | self.norm_cfg = norm_cfg 40 | self.dropout = nn.Dropout(dropout) 41 | self.batch_first = batch_first 42 | self.fp16_enabled = False 43 | 44 | # you'd better set dim_per_head to a power of 2 45 | # which is more efficient in the CUDA implementation 46 | def _is_power_of_2(n): 47 | if (not isinstance(n, int)) or (n < 0): 48 | raise ValueError( 49 | 'invalid input for _is_power_of_2: {} (type: {})'.format( 50 | n, type(n))) 51 | return (n & (n - 1) == 0) and n != 0 52 | 53 | if not _is_power_of_2(dim_per_head): 54 | warnings.warn( 55 | "You'd better set embed_dims in " 56 | 'MultiScaleDeformAttention to make ' 57 | 'the dimension of each attention head a power of 2 ' 58 | 'which is more efficient in our CUDA implementation.') 59 | 60 | self.im2col_step = im2col_step 61 | self.embed_dims = embed_dims 62 | self.num_levels = num_levels 63 | self.num_heads = num_heads 64 | self.num_points = num_points 65 | self.sampling_offsets = nn.Linear( 66 | embed_dims, num_heads * num_levels * num_points * 2) 67 | self.attention_weights = nn.Linear(embed_dims, 68 | num_heads * num_levels * num_points) 69 | self.value_proj = nn.Linear(embed_dims, embed_dims) 70 | self.output_proj = nn.Linear(embed_dims, embed_dims) 71 | self.init_weights() 72 | 73 | def init_weights(self): 74 | """Default initialization for Parameters of Module.""" 75 | constant_init(self.sampling_offsets, 0.) 76 | thetas = torch.arange( 77 | 8, dtype=torch.float32) * (2.0 * math.pi / 8) 78 | grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) 79 | grid_init = (grid_init / 80 | grid_init.abs().max(-1, keepdim=True)[0]).view( 81 | 1, 1, 8, 1, 2).repeat(self.num_heads, self.num_levels, 1, self.num_points // 8, 1) 82 | for i in range(self.num_points // 8): 83 | grid_init[:, :, :, i, :] *= i + 1 84 | grid_init = grid_init.view(self.num_heads, self.num_levels, self.num_points, 2) 85 | 86 | self.sampling_offsets.bias.data = grid_init.view(-1) 87 | constant_init(self.attention_weights, val=0., bias=0.) 88 | xavier_init(self.value_proj, distribution='uniform', bias=0.) 89 | xavier_init(self.output_proj, distribution='uniform', bias=0.) 90 | self._is_init = True 91 | 92 | def forward(self, 93 | query, 94 | key=None, 95 | value=None, 96 | identity=None, 97 | query_pos=None, 98 | key_padding_mask=None, 99 | reference_points=None, 100 | spatial_shapes=None, 101 | level_start_index=None, 102 | flag='decoder', 103 | **kwargs): 104 | 105 | if value is None: 106 | value = query 107 | 108 | if identity is None: 109 | identity = query 110 | if query_pos is not None: 111 | query = query + query_pos 112 | if not self.batch_first: 113 | # change to (bs, num_query ,embed_dims) 114 | query = query.permute(1, 0, 2) 115 | value = value.permute(1, 0, 2) 116 | 117 | bs, num_query, _ = query.shape 118 | bs, num_value, _ = value.shape 119 | assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value 120 | 121 | value = self.value_proj(value) 122 | if key_padding_mask is not None: 123 | value = value.masked_fill(key_padding_mask[..., None], 0.0) 124 | value = value.view(bs, num_value, self.num_heads, -1) 125 | 126 | sampling_offsets = self.sampling_offsets(query).view( 127 | bs, num_query, self.num_heads, self.num_levels, self.num_points, 2) 128 | attention_weights = self.attention_weights(query).view( 129 | bs, num_query, self.num_heads, self.num_levels * self.num_points) 130 | attention_weights = attention_weights.softmax(-1) 131 | 132 | attention_weights = attention_weights.view(bs, num_query, 133 | self.num_heads, 134 | self.num_levels, 135 | self.num_points) 136 | if reference_points.shape[-1] == 2: 137 | reference_points = reference_points.permute(0, 1, 3, 2, 4) 138 | offset_normalizer = torch.stack( 139 | [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) 140 | sampling_locations = reference_points[:, :, :, :, None, :] \ 141 | + sampling_offsets \ 142 | / offset_normalizer[None, None, None, :, None, :] 143 | else: 144 | raise ValueError( 145 | f'Last dim of reference_points must be' 146 | f' 2, but get {reference_points.shape[-1]} instead.') 147 | if torch.cuda.is_available() and value.is_cuda: 148 | 149 | # using fp16 deformable attention is unstable because it performs many sum operations 150 | if value.dtype == torch.float16: 151 | MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32 152 | else: 153 | MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32 154 | output = MultiScaleDeformableAttnFunction.apply( 155 | value, spatial_shapes, level_start_index, sampling_locations, 156 | attention_weights, self.im2col_step) 157 | else: 158 | output = multi_scale_deformable_attn_pytorch( 159 | value, spatial_shapes, sampling_locations, attention_weights) 160 | 161 | output = self.output_proj(output) 162 | 163 | if not self.batch_first: 164 | # (num_query, bs ,embed_dims) 165 | output = output.permute(1, 0, 2) 166 | 167 | return self.dropout(output) + identity 168 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/modules/laneseg_decoder.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import torch 8 | from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER_SEQUENCE, TRANSFORMER_LAYER 9 | from mmcv.cnn.bricks.transformer import TransformerLayerSequence, BaseTransformerLayer 10 | from mmdet.models.utils.transformer import inverse_sigmoid 11 | 12 | 13 | @TRANSFORMER_LAYER_SEQUENCE.register_module() 14 | class LaneSegNetDecoder(TransformerLayerSequence): 15 | 16 | def __init__(self, 17 | *args, 18 | return_intermediate=False, 19 | pc_range=None, 20 | sample_idx=[0, 3, 6, 9], # num_ref_pts = len(sample_idx) * 2 21 | pts_dim=3, 22 | **kwargs): 23 | super(LaneSegNetDecoder, self).__init__(*args, **kwargs) 24 | self.return_intermediate = return_intermediate 25 | self.fp16_enabled = False 26 | self.pc_range = pc_range 27 | self.sample_idx = sample_idx 28 | self.pts_dim = pts_dim 29 | 30 | def forward(self, 31 | query, 32 | *args, 33 | reference_points=None, 34 | reg_branches=None, 35 | key_padding_mask=None, 36 | **kwargs): 37 | 38 | output = query 39 | intermediate = [] 40 | intermediate_reference_points = [] 41 | lane_ref_points = reference_points[:, :, self.sample_idx * 2, :] 42 | for lid, layer in enumerate(self.layers): 43 | # BS NUM_QUERY NUM_LEVEL NUM_REFPTS 3 44 | reference_points_input = lane_ref_points[..., :2].unsqueeze(2) 45 | output = layer( 46 | output, 47 | *args, 48 | reference_points=reference_points_input, 49 | key_padding_mask=key_padding_mask, 50 | **kwargs) 51 | output = output.permute(1, 0, 2) 52 | 53 | if reg_branches is not None: 54 | reg_center = reg_branches[0] 55 | reg_offset = reg_branches[1] 56 | 57 | tmp = reg_center[lid](output) 58 | bs, num_query, _ = tmp.shape 59 | tmp = tmp.view(bs, num_query, -1, self.pts_dim) 60 | 61 | assert reference_points.shape[-1] == self.pts_dim 62 | 63 | tmp = tmp + inverse_sigmoid(reference_points) 64 | tmp = tmp.sigmoid() 65 | reference_points = tmp.detach() 66 | 67 | coord = tmp.clone() 68 | coord[..., 0] = coord[..., 0] * (self.pc_range[3] - self.pc_range[0]) + self.pc_range[0] 69 | coord[..., 1] = coord[..., 1] * (self.pc_range[4] - self.pc_range[1]) + self.pc_range[1] 70 | if self.pts_dim == 3: 71 | coord[..., 2] = coord[..., 2] * (self.pc_range[5] - self.pc_range[2]) + self.pc_range[2] 72 | centerline = coord.view(bs, num_query, -1).contiguous() 73 | 74 | offset = reg_offset[lid](output) 75 | left_laneline = centerline + offset 76 | right_laneline = centerline - offset 77 | left_laneline = left_laneline.view(bs, num_query, -1, self.pts_dim)[:, :, self.sample_idx, :] 78 | right_laneline = right_laneline.view(bs, num_query, -1, self.pts_dim)[:, :, self.sample_idx, :] 79 | 80 | lane_ref_points = torch.cat([left_laneline, right_laneline], axis=-2).contiguous().detach() 81 | 82 | lane_ref_points[..., 0] = (lane_ref_points[..., 0] - self.pc_range[0]) / (self.pc_range[3] - self.pc_range[0]) 83 | lane_ref_points[..., 1] = (lane_ref_points[..., 1] - self.pc_range[1]) / (self.pc_range[4] - self.pc_range[1]) 84 | if self.pts_dim == 3: 85 | lane_ref_points[..., 2] = (lane_ref_points[..., 2] - self.pc_range[2]) / (self.pc_range[5] - self.pc_range[2]) 86 | 87 | output = output.permute(1, 0, 2) 88 | if self.return_intermediate: 89 | intermediate.append(output) 90 | intermediate_reference_points.append(reference_points) 91 | 92 | if self.return_intermediate: 93 | return torch.stack(intermediate), torch.stack( 94 | intermediate_reference_points) 95 | 96 | return output, reference_points 97 | 98 | 99 | @TRANSFORMER_LAYER.register_module() 100 | class CustomDetrTransformerDecoderLayer(BaseTransformerLayer): 101 | 102 | def __init__(self, 103 | attn_cfgs, 104 | ffn_cfgs, 105 | operation_order=None, 106 | norm_cfg=dict(type='LN'), 107 | **kwargs): 108 | super(CustomDetrTransformerDecoderLayer, self).__init__( 109 | attn_cfgs=attn_cfgs, 110 | ffn_cfgs=ffn_cfgs, 111 | operation_order=operation_order, 112 | norm_cfg=norm_cfg, 113 | **kwargs) 114 | assert len(operation_order) == 6 115 | assert set(operation_order) == set( 116 | ['self_attn', 'norm', 'cross_attn', 'ffn']) 117 | -------------------------------------------------------------------------------- /projects/lanesegnet/models/modules/laneseg_transformer.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | from mmcv.cnn import xavier_init 11 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence 12 | from mmcv.runner import auto_fp16, force_fp32 13 | from mmcv.runner.base_module import BaseModule 14 | from mmdet.models.utils.builder import TRANSFORMER 15 | from .lane_attention import LaneAttention 16 | 17 | 18 | @TRANSFORMER.register_module() 19 | class LaneSegNetTransformer(BaseModule): 20 | 21 | def __init__(self, 22 | decoder=None, 23 | embed_dims=256, 24 | points_num=1, 25 | pts_dim=3, 26 | **kwargs): 27 | super(LaneSegNetTransformer, self).__init__(**kwargs) 28 | self.decoder = build_transformer_layer_sequence(decoder) 29 | self.embed_dims = embed_dims 30 | self.points_num = points_num 31 | self.pts_dim = pts_dim 32 | self.fp16_enabled = False 33 | self.init_layers() 34 | 35 | def init_layers(self): 36 | self.reference_points = nn.Linear(self.embed_dims, self.pts_dim) 37 | 38 | def init_weights(self): 39 | for p in self.parameters(): 40 | if p.dim() > 1: 41 | nn.init.xavier_uniform_(p) 42 | for m in self.modules(): 43 | if isinstance(m, LaneAttention): 44 | m.init_weights() 45 | xavier_init(self.reference_points, distribution='uniform', bias=0.) 46 | 47 | 48 | @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'object_query_embed', 'prev_bev', 'bev_pos')) 49 | def forward(self, 50 | mlvl_feats, 51 | bev_embed, 52 | object_query_embed, 53 | bev_h, 54 | bev_w, 55 | reg_branches=None, 56 | cls_branches=None, 57 | **kwargs): 58 | 59 | bs = mlvl_feats[0].size(0) 60 | query_pos, query = torch.split( 61 | object_query_embed, self.embed_dims, dim=1) 62 | query_pos = query_pos.unsqueeze(0).expand(bs, -1, -1) 63 | query = query.unsqueeze(0).expand(bs, -1, -1) 64 | reference_points = self.reference_points(query_pos) 65 | 66 | # ident init: repeat reference points to num points 67 | reference_points = reference_points.repeat(1, 1, self.points_num) 68 | reference_points = reference_points.sigmoid() 69 | bs, num_qeury, _ = reference_points.shape 70 | reference_points = reference_points.view(bs, num_qeury, self.points_num, self.pts_dim) 71 | 72 | init_reference_out = reference_points 73 | 74 | query = query.permute(1, 0, 2) 75 | query_pos = query_pos.permute(1, 0, 2) 76 | bev_embed = bev_embed.permute(1, 0, 2) 77 | inter_states, inter_references = self.decoder( 78 | query=query, 79 | key=None, 80 | value=bev_embed, 81 | query_pos=query_pos, 82 | reference_points=reference_points, 83 | reg_branches=reg_branches, 84 | cls_branches=cls_branches, 85 | spatial_shapes=torch.tensor([[bev_h, bev_w]], device=query.device), 86 | level_start_index=torch.tensor([0], device=query.device), 87 | **kwargs) 88 | 89 | inter_references_out = inter_references 90 | 91 | return inter_states, init_reference_out, inter_references_out 92 | -------------------------------------------------------------------------------- /projects/lanesegnet/thirdparty/__init__.py: -------------------------------------------------------------------------------- 1 | from .maptr_head import MapTRHead 2 | from .maptr_transformer import MapTRTransformerDecoderOnly 3 | from .maptr_decoder import MapTRDecoder 4 | from .maptr_assigner import MapTRAssigner 5 | from .map_loss import PtsDirCosLoss 6 | -------------------------------------------------------------------------------- /projects/lanesegnet/thirdparty/map_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import functools 3 | import torch 4 | from torch import nn as nn 5 | import mmcv 6 | from mmdet.models.builder import LOSSES 7 | 8 | 9 | @mmcv.jit(derivate=True, coderize=True) 10 | def custom_weight_dir_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 11 | """Apply element-wise weight and reduce loss. 12 | 13 | Args: 14 | loss (Tensor): num_sample, num_dir 15 | weight (Tensor): Element-wise weights. 16 | reduction (str): Same as built-in losses of PyTorch. 17 | avg_factor (float): Average factor when computing the mean of losses. 18 | 19 | Returns: 20 | Tensor: Processed loss values. 21 | """ 22 | # if weight is specified, apply element-wise weight 23 | if weight is not None: 24 | loss = loss * weight 25 | 26 | # if avg_factor is not specified, just reduce the loss 27 | if avg_factor is None: 28 | raise ValueError('avg_factor should not be none for OrderedPtsL1Loss') 29 | # loss = reduce_loss(loss, reduction) 30 | else: 31 | # if reduction is mean, then average the loss by avg_factor 32 | if reduction == 'mean': 33 | loss = loss.sum() 34 | loss = loss / avg_factor 35 | # if reduction is 'none', then do nothing, otherwise raise an error 36 | elif reduction != 'none': 37 | raise ValueError('avg_factor can not be used with reduction="sum"') 38 | return loss 39 | 40 | def custom_weighted_dir_loss(loss_func): 41 | """Create a weighted version of a given loss function. 42 | 43 | To use this decorator, the loss function must have the signature like 44 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 45 | element-wise loss without any reduction. This decorator will add weight 46 | and reduction arguments to the function. The decorated function will have 47 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 48 | avg_factor=None, **kwargs)`. 49 | 50 | :Example: 51 | 52 | >>> import torch 53 | >>> @weighted_loss 54 | >>> def l1_loss(pred, target): 55 | >>> return (pred - target).abs() 56 | 57 | >>> pred = torch.Tensor([0, 2, 3]) 58 | >>> target = torch.Tensor([1, 1, 1]) 59 | >>> weight = torch.Tensor([1, 0, 1]) 60 | 61 | >>> l1_loss(pred, target) 62 | tensor(1.3333) 63 | >>> l1_loss(pred, target, weight) 64 | tensor(1.) 65 | >>> l1_loss(pred, target, reduction='none') 66 | tensor([1., 1., 2.]) 67 | >>> l1_loss(pred, target, weight, avg_factor=2) 68 | tensor(1.5000) 69 | """ 70 | 71 | @functools.wraps(loss_func) 72 | def wrapper(pred, 73 | target, 74 | weight=None, 75 | reduction='mean', 76 | avg_factor=None, 77 | **kwargs): 78 | # get element-wise loss 79 | loss = loss_func(pred, target, **kwargs) 80 | loss = custom_weight_dir_reduce_loss(loss, weight, reduction, avg_factor) 81 | return loss 82 | 83 | return wrapper 84 | 85 | @mmcv.jit(derivate=True, coderize=True) 86 | @custom_weighted_dir_loss 87 | def pts_dir_cos_loss(pred, target): 88 | """ Dir cosine similiarity loss 89 | pred (torch.Tensor): shape [num_samples, num_dir, num_coords] 90 | target (torch.Tensor): shape [num_samples, num_dir, num_coords] 91 | 92 | """ 93 | if target.numel() == 0: 94 | return pred.sum() * 0 95 | # import pdb;pdb.set_trace() 96 | num_samples, num_dir, num_coords = pred.shape 97 | loss_func = torch.nn.CosineEmbeddingLoss(reduction='none') 98 | tgt_param = target.new_ones((num_samples, num_dir)) 99 | tgt_param = tgt_param.flatten(0) 100 | loss = loss_func(pred.flatten(0,1), target.flatten(0,1), tgt_param) 101 | loss = loss.view(num_samples, num_dir) 102 | return loss 103 | 104 | 105 | @LOSSES.register_module() 106 | class PtsDirCosLoss(nn.Module): 107 | """L1 loss. 108 | 109 | Args: 110 | reduction (str, optional): The method to reduce the loss. 111 | Options are "none", "mean" and "sum". 112 | loss_weight (float, optional): The weight of loss. 113 | """ 114 | 115 | def __init__(self, reduction='mean', loss_weight=1.0): 116 | super(PtsDirCosLoss, self).__init__() 117 | self.reduction = reduction 118 | self.loss_weight = loss_weight 119 | 120 | def forward(self, 121 | pred, 122 | target, 123 | weight=None, 124 | avg_factor=None, 125 | reduction_override=None): 126 | """Forward function. 127 | 128 | Args: 129 | pred (torch.Tensor): The prediction. 130 | target (torch.Tensor): The learning target of the prediction. 131 | weight (torch.Tensor, optional): The weight of loss for each 132 | prediction. Defaults to None. 133 | avg_factor (int, optional): Average factor that is used to average 134 | the loss. Defaults to None. 135 | reduction_override (str, optional): The reduction method used to 136 | override the original reduction method of the loss. 137 | Defaults to None. 138 | """ 139 | assert reduction_override in (None, 'none', 'mean', 'sum') 140 | reduction = ( 141 | reduction_override if reduction_override else self.reduction) 142 | 143 | loss_dir = self.loss_weight * pts_dir_cos_loss( 144 | pred, target, weight, reduction=reduction, avg_factor=avg_factor) 145 | return loss_dir 146 | 147 | -------------------------------------------------------------------------------- /projects/lanesegnet/thirdparty/maptr_assigner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS 3 | from mmdet.core.bbox.assigners import AssignResult 4 | from mmdet.core.bbox.assigners import BaseAssigner 5 | from mmdet.core.bbox.match_costs import build_match_cost 6 | import torch.nn.functional as F 7 | 8 | from ..core.lane.util import normalize_3dlane 9 | 10 | try: 11 | from scipy.optimize import linear_sum_assignment 12 | except ImportError: 13 | linear_sum_assignment = None 14 | 15 | 16 | @BBOX_ASSIGNERS.register_module() 17 | class MapTRAssigner(BaseAssigner): 18 | 19 | def __init__(self, 20 | cls_cost=dict(type='ClassificationCost', weight=1.), 21 | reg_cost=dict(type='BBoxL1Cost', weight=1.0), 22 | iou_cost=dict(type='IoUCost', weight=0.0), 23 | normalize_gt=False, 24 | pc_range=None): 25 | self.cls_cost = build_match_cost(cls_cost) 26 | self.reg_cost = build_match_cost(reg_cost) 27 | self.iou_cost = build_match_cost(iou_cost) 28 | self.pc_range = pc_range 29 | self.normalize_gt = normalize_gt 30 | 31 | def assign(self, 32 | lanes_pred, 33 | cls_pred, 34 | gt_lanes, 35 | gt_labels, 36 | gt_bboxes_ignore=None, 37 | eps=1e-7): 38 | 39 | assert gt_bboxes_ignore is None, \ 40 | 'Only case when gt_bboxes_ignore is None is supported.' 41 | num_gts, num_bboxes = gt_lanes.size(0), lanes_pred.size(0) 42 | # 1. assign -1 by default 43 | assigned_gt_inds = lanes_pred.new_full((num_bboxes, ), 44 | -1, 45 | dtype=torch.long) 46 | assigned_labels = lanes_pred.new_full((num_bboxes, ), 47 | -1, 48 | dtype=torch.long) 49 | if num_gts == 0 or num_bboxes == 0: 50 | # No ground truth or boxes, return empty assignment 51 | if num_gts == 0: 52 | # No ground truth, assign all to background 53 | assigned_gt_inds[:] = 0 54 | return AssignResult( 55 | num_gts, assigned_gt_inds, None, labels=assigned_labels) 56 | 57 | # 2. compute the weighted costs 58 | # classification and bboxcost. 59 | cls_cost = self.cls_cost(cls_pred, gt_labels) 60 | 61 | # regression L1 cost 62 | assert gt_lanes.size(-1) == lanes_pred.size(-1) 63 | 64 | 65 | num_orders = gt_lanes.size(1) 66 | gt_lanes = gt_lanes.view(num_gts * num_orders, -1) 67 | 68 | reg_cost_ordered = self.reg_cost(lanes_pred.view(num_bboxes, -1), gt_lanes) 69 | reg_cost_ordered = reg_cost_ordered.view(num_bboxes, num_gts, num_orders) 70 | reg_cost, order_index = torch.min(reg_cost_ordered, 2) 71 | 72 | cost = cls_cost + reg_cost 73 | 74 | # 3. do Hungarian matching on CPU using linear_sum_assignment 75 | cost = cost.detach().cpu() 76 | if linear_sum_assignment is None: 77 | raise ImportError('Please run "pip install scipy" ' 78 | 'to install scipy first.') 79 | matched_row_inds, matched_col_inds = linear_sum_assignment(cost) 80 | matched_row_inds = torch.from_numpy(matched_row_inds).to( 81 | lanes_pred.device) 82 | matched_col_inds = torch.from_numpy(matched_col_inds).to( 83 | lanes_pred.device) 84 | 85 | # 4. assign backgrounds and foregrounds 86 | # assign all indices to backgrounds first 87 | assigned_gt_inds[:] = 0 88 | # assign foregrounds based on matching results 89 | assigned_gt_inds[matched_row_inds] = matched_col_inds + 1 90 | assigned_labels[matched_row_inds] = gt_labels[matched_col_inds] 91 | 92 | return AssignResult( 93 | num_gts, assigned_gt_inds, None, labels=assigned_labels), order_index 94 | -------------------------------------------------------------------------------- /projects/lanesegnet/thirdparty/maptr_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER_SEQUENCE 3 | from mmcv.cnn.bricks.transformer import TransformerLayerSequence 4 | from mmdet.models.utils.transformer import inverse_sigmoid 5 | 6 | 7 | @TRANSFORMER_LAYER_SEQUENCE.register_module() 8 | class MapTRDecoder(TransformerLayerSequence): 9 | """Implements the decoder in DETR3D transformer. 10 | Args: 11 | return_intermediate (bool): Whether to return intermediate outputs. 12 | coder_norm_cfg (dict): Config of last normalization layer. Default: 13 | `LN`. 14 | """ 15 | 16 | def __init__(self, 17 | *args, 18 | return_intermediate=False, 19 | pts_dim=3, 20 | **kwargs): 21 | super(MapTRDecoder, self).__init__(*args, **kwargs) 22 | self.return_intermediate = return_intermediate 23 | self.fp16_enabled = False 24 | self.pts_dim = pts_dim 25 | 26 | def forward(self, 27 | query, 28 | *args, 29 | reference_points=None, 30 | reg_branches=None, 31 | key_padding_mask=None, 32 | **kwargs): 33 | 34 | output = query 35 | intermediate = [] 36 | intermediate_reference_points = [] 37 | for lid, layer in enumerate(self.layers): 38 | # BS NUM_QUERY NUM_LEVEL 2/3 39 | reference_points_input = reference_points[..., :2].unsqueeze( 40 | 2) 41 | output = layer( 42 | output, 43 | *args, 44 | reference_points=reference_points_input, 45 | key_padding_mask=key_padding_mask, 46 | **kwargs) 47 | output = output.permute(1, 0, 2) 48 | 49 | if reg_branches is not None: 50 | tmp = reg_branches[lid](output) 51 | 52 | assert reference_points.shape[-1] == self.pts_dim 53 | 54 | new_reference_points = torch.zeros_like(reference_points) 55 | new_reference_points[..., :self.pts_dim] = tmp[ 56 | ..., :self.pts_dim] + inverse_sigmoid(reference_points[..., :self.pts_dim]) 57 | new_reference_points = new_reference_points.sigmoid() 58 | 59 | reference_points = new_reference_points.detach() 60 | 61 | output = output.permute(1, 0, 2) 62 | if self.return_intermediate: 63 | intermediate.append(output) 64 | intermediate_reference_points.append(reference_points) 65 | 66 | if self.return_intermediate: 67 | return torch.stack(intermediate), torch.stack( 68 | intermediate_reference_points) 69 | 70 | return output, reference_points 71 | -------------------------------------------------------------------------------- /projects/lanesegnet/thirdparty/maptr_transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import xavier_init 5 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence 6 | from mmcv.runner import auto_fp16, force_fp32 7 | from mmcv.runner.base_module import BaseModule 8 | from mmdet.models.utils.builder import TRANSFORMER 9 | from projects.bevformer.modules.decoder import CustomMSDeformableAttention 10 | 11 | @TRANSFORMER.register_module() 12 | class MapTRTransformerDecoderOnly(BaseModule): 13 | 14 | def __init__(self, 15 | decoder=None, 16 | embed_dims=256, 17 | pts_dim=3, 18 | **kwargs): 19 | super(MapTRTransformerDecoderOnly, self).__init__(**kwargs) 20 | self.decoder = build_transformer_layer_sequence(decoder) 21 | self.embed_dims = embed_dims 22 | self.pts_dim = pts_dim 23 | self.fp16_enabled = False 24 | self.init_layers() 25 | 26 | def init_layers(self): 27 | self.reference_points = nn.Linear(self.embed_dims, self.pts_dim) 28 | 29 | def init_weights(self): 30 | for p in self.parameters(): 31 | if p.dim() > 1: 32 | nn.init.xavier_uniform_(p) 33 | for m in self.modules(): 34 | if isinstance(m, CustomMSDeformableAttention): 35 | m.init_weights() 36 | xavier_init(self.reference_points, distribution='uniform', bias=0.) 37 | 38 | 39 | @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'object_query_embed', 'prev_bev', 'bev_pos')) 40 | def forward(self, 41 | mlvl_feats, 42 | bev_embed, 43 | object_query_embed, 44 | bev_h, 45 | bev_w, 46 | reg_branches=None, 47 | cls_branches=None, 48 | **kwargs): 49 | 50 | bs = mlvl_feats[0].size(0) 51 | query_pos, query = torch.split( 52 | object_query_embed, self.embed_dims, dim=1) 53 | query_pos = query_pos.unsqueeze(0).expand(bs, -1, -1) 54 | query = query.unsqueeze(0).expand(bs, -1, -1) 55 | reference_points = self.reference_points(query_pos) 56 | 57 | reference_points = reference_points.sigmoid() 58 | init_reference_out = reference_points 59 | 60 | query = query.permute(1, 0, 2) 61 | query_pos = query_pos.permute(1, 0, 2) 62 | bev_embed = bev_embed.permute(1, 0, 2) 63 | inter_states, inter_references = self.decoder( 64 | query=query, 65 | key=None, 66 | value=bev_embed, 67 | query_pos=query_pos, 68 | reference_points=reference_points, 69 | reg_branches=reg_branches, 70 | cls_branches=cls_branches, 71 | spatial_shapes=torch.tensor([[bev_h, bev_w]], device=query.device), 72 | level_start_index=torch.tensor([0], device=query.device), 73 | **kwargs) 74 | 75 | inter_references_out = inter_references 76 | 77 | return inter_states, init_reference_out, inter_references_out 78 | -------------------------------------------------------------------------------- /projects/lanesegnet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_bev_constructor 2 | -------------------------------------------------------------------------------- /projects/lanesegnet/utils/builder.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------------------------# 2 | # LaneSegNet: Map Learning with Lane Segment Perception for Autonomous Driving # 3 | # Source code: https://github.com/OpenDriveLab/LaneSegNet # 4 | # Copyright (c) OpenDriveLab. All rights reserved. # 5 | #---------------------------------------------------------------------------------------# 6 | 7 | import torch.nn as nn 8 | from mmcv.utils import Registry, build_from_cfg 9 | 10 | BEV_CONSTRUCTOR = Registry('BEV Constructor') 11 | 12 | def build_bev_constructor(cfg, default_args=None): 13 | """Builder for BEV Constructor.""" 14 | return build_from_cfg(cfg, BEV_CONSTRUCTOR, default_args) 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | similaritymeasures==0.6.0 2 | numpy==1.22.4 3 | scipy==1.8.0 4 | ortools==9.2.9972 5 | setuptools==59.5.0 6 | openlanev2==2.1.0 7 | -------------------------------------------------------------------------------- /tools/data_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | from shapely.geometry import LineString 4 | from openlanev2.lanesegment.io import io 5 | 6 | """ 7 | This script is used to collect the data from the original OpenLane-V2 dataset. 8 | The results will be saved in OpenLane-V2 folder. 9 | The main difference between this script and the original one is that we don't interpolate the points for ped crossing and road bouadary. 10 | """ 11 | 12 | def _fix_pts_interpolate(curve, n_points): 13 | ls = LineString(curve) 14 | distances = np.linspace(0, ls.length, n_points) 15 | curve = np.array([ls.interpolate(distance).coords[0] for distance in distances], dtype=np.float32) 16 | return curve 17 | 18 | def collect(root_path : str, data_dict : dict, collection : str, n_points : dict) -> None: 19 | 20 | data_list = [(split, segment_id, timestamp.split('.')[0]) \ 21 | for split, segment_ids in data_dict.items() \ 22 | for segment_id, timestamps in segment_ids.items() \ 23 | for timestamp in timestamps 24 | ] 25 | meta = {} 26 | for split, segment_id, timestamp in tqdm(data_list, desc=f'collecting {collection}', ncols=100): 27 | identifier = (split, segment_id, timestamp) 28 | frame = io.json_load(f'{root_path}/{split}/{segment_id}/info/{timestamp}-ls.json') 29 | 30 | for k, v in frame['pose'].items(): 31 | frame['pose'][k] = np.array(v, dtype=np.float64) 32 | for camera in frame['sensor'].keys(): 33 | for para in ['intrinsic', 'extrinsic']: 34 | for k, v in frame['sensor'][camera][para].items(): 35 | frame['sensor'][camera][para][k] = np.array(v, dtype=np.float64) 36 | 37 | if 'annotation' not in frame: 38 | meta[identifier] = frame 39 | continue 40 | 41 | # NOTE: We don't interpolate the points for ped crossing and road bouadary. 42 | for i, area in enumerate(frame['annotation']['area']): 43 | frame['annotation']['area'][i]['points'] = np.array(area['points'], dtype=np.float32) 44 | for i, lane_segment in enumerate(frame['annotation']['lane_segment']): 45 | frame['annotation']['lane_segment'][i]['centerline'] = _fix_pts_interpolate(np.array(lane_segment['centerline']), n_points['centerline']) 46 | frame['annotation']['lane_segment'][i]['left_laneline'] = _fix_pts_interpolate(np.array(lane_segment['left_laneline']), n_points['left_laneline']) 47 | frame['annotation']['lane_segment'][i]['right_laneline'] = _fix_pts_interpolate(np.array(lane_segment['right_laneline']), n_points['right_laneline']) 48 | for i, traffic_element in enumerate(frame['annotation']['traffic_element']): 49 | frame['annotation']['traffic_element'][i]['points'] = np.array(traffic_element['points'], dtype=np.float32) 50 | frame['annotation']['topology_lsls'] = np.array(frame['annotation']['topology_lsls'], dtype=np.int8) 51 | frame['annotation']['topology_lste'] = np.array(frame['annotation']['topology_lste'], dtype=np.int8) 52 | meta[identifier] = frame 53 | 54 | io.pickle_dump(f'{root_path}/{collection}.pkl', meta) 55 | 56 | if __name__ == '__main__': 57 | root_path = 'data/OpenLane-V2' 58 | file = f'{root_path}/data_dict_subset_A.json' 59 | subset = 'data_dict_subset_A' 60 | for split, segments in io.json_load(file).items(): 61 | collect( 62 | root_path, 63 | {split: segments}, 64 | f'{subset}_{split}_lanesegnet', 65 | n_points={ 66 | 'centerline': 10, 67 | 'left_laneline': 10, 68 | 'right_laneline': 10 69 | }, 70 | ) 71 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | timestamp=`date +"%y%m%d.%H%M%S"` 5 | 6 | WORK_DIR=work_dirs/lanesegnet 7 | CONFIG=projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py 8 | 9 | CHECKPOINT=${WORK_DIR}/latest.pth 10 | 11 | GPUS=$1 12 | PORT=${PORT:-28510} 13 | 14 | python -m torch.distributed.run --nproc_per_node=$GPUS --master_port=$PORT \ 15 | tools/test.py $CONFIG $CHECKPOINT --launcher pytorch \ 16 | --out-dir ${WORK_DIR}/test --eval openlane_v2 ${@:2} \ 17 | 2>&1 | tee ${WORK_DIR}/test.${timestamp}.log 18 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | timestamp=`date +"%y%m%d.%H%M%S"` 5 | 6 | WORK_DIR=work_dirs/lanesegnet 7 | CONFIG=projects/configs/lanesegnet_r50_8x1_24e_olv2_subset_A.py 8 | 9 | GPUS=$1 10 | PORT=${PORT:-28510} 11 | 12 | python -m torch.distributed.run --nproc_per_node=$GPUS --master_port=$PORT \ 13 | tools/train.py $CONFIG --launcher pytorch --work-dir ${WORK_DIR} ${@:2} \ 14 | 2>&1 | tee ${WORK_DIR}/train.${timestamp}.log 15 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Tianyu Li 5 | # --------------------------------------------- 6 | import argparse 7 | import os 8 | import os.path as osp 9 | import time 10 | import warnings 11 | 12 | import torch 13 | import mmcv 14 | from mmcv import Config, DictAction 15 | from mmcv.cnn import fuse_conv_bn 16 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 17 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, 18 | wrap_fp16_model) 19 | from mmcv.utils import get_logger 20 | from mmdet.apis import multi_gpu_test, set_random_seed 21 | from mmdet.datasets import replace_ImageToTensor 22 | from mmdet3d.apis import single_gpu_test 23 | from mmdet3d.datasets import build_dataloader, build_dataset 24 | from mmdet3d.models import build_model 25 | 26 | try: 27 | # If mmdet version > 2.20.0, setup_multi_processes would be imported and 28 | # used from mmdet instead of mmdet3d. 29 | from mmdet.utils import setup_multi_processes 30 | except ImportError: 31 | from mmdet3d.utils import setup_multi_processes 32 | 33 | def parse_args(): 34 | parser = argparse.ArgumentParser( 35 | description='MMDet test (and eval) a model') 36 | parser.add_argument('config', help='test config file path') 37 | parser.add_argument('checkpoint', help='checkpoint file') 38 | parser.add_argument('--out', 39 | action='store_true', 40 | help='output model output file in pickle format') 41 | parser.add_argument('--input', 42 | action='store_true' 43 | ) 44 | parser.add_argument( 45 | '--fuse-conv-bn', 46 | action='store_true', 47 | help='Whether to fuse conv and bn, this will slightly increase' 48 | 'the inference speed') 49 | parser.add_argument( 50 | '--format-only', 51 | action='store_true', 52 | help='Format the output results without perform evaluation. It is' 53 | 'useful when you want to format the result to a specific format and ' 54 | 'submit it to the test server') 55 | parser.add_argument( 56 | '--eval', 57 | type=str, 58 | nargs='+', 59 | help='evaluation metrics, which depends on the dataset, e.g., "bbox",' 60 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') 61 | parser.add_argument('--show', action='store_true', help='show results') 62 | parser.add_argument( 63 | '--out-dir', help='directory where results will be saved') 64 | parser.add_argument( 65 | '--gpu-collect', 66 | action='store_true', 67 | help='whether to use gpu to collect results.') 68 | parser.add_argument('--seed', type=int, default=0, help='random seed') 69 | parser.add_argument( 70 | '--deterministic', 71 | action='store_true', 72 | help='whether to set deterministic options for CUDNN backend.') 73 | parser.add_argument( 74 | '--cfg-options', 75 | nargs='+', 76 | action=DictAction, 77 | help='override some settings in the used config, the key-value pair ' 78 | 'in xxx=yyy format will be merged into config file. If the value to ' 79 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 80 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 81 | 'Note that the quotation marks are necessary and that no white space ' 82 | 'is allowed.') 83 | parser.add_argument( 84 | '--options', 85 | nargs='+', 86 | action=DictAction, 87 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 88 | 'format will be kwargs for dataset.evaluate() function (deprecate), ' 89 | 'change to --eval-options instead.') 90 | parser.add_argument( 91 | '--eval-options', 92 | nargs='+', 93 | action=DictAction, 94 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 95 | 'format will be kwargs for dataset.evaluate() function') 96 | parser.add_argument( 97 | '--launcher', 98 | choices=['none', 'pytorch', 'slurm', 'mpi'], 99 | default='none', 100 | help='job launcher') 101 | parser.add_argument('--local_rank', type=int, default=0) 102 | args = parser.parse_args() 103 | if 'LOCAL_RANK' not in os.environ: 104 | os.environ['LOCAL_RANK'] = str(args.local_rank) 105 | 106 | if args.options and args.eval_options: 107 | raise ValueError( 108 | '--options and --eval-options cannot be both specified, ' 109 | '--options is deprecated in favor of --eval-options') 110 | if args.options: 111 | warnings.warn('--options is deprecated in favor of --eval-options') 112 | args.eval_options = args.options 113 | return args 114 | 115 | 116 | def main(): 117 | args = parse_args() 118 | 119 | assert args.out or args.eval or args.format_only or args.show \ 120 | ('Please specify at least one operation (save/eval/format/show the ' 121 | 'results / save the results) with the argument "--out", "--eval"' 122 | ', "--format-only" or "--show"') 123 | 124 | if args.eval and args.format_only: 125 | print('[WARNING] --eval and --format_only should not be both specified!') 126 | print('[WARNING] ignored --eval option') 127 | assert args.out_dir is not None, 'Please specify out_dir when format results' 128 | 129 | cfg = Config.fromfile(args.config) 130 | if args.cfg_options is not None: 131 | cfg.merge_from_dict(args.cfg_options) 132 | 133 | # set multi-process settings 134 | setup_multi_processes(cfg) 135 | 136 | # set cudnn_benchmark 137 | if cfg.get('cudnn_benchmark', False): 138 | torch.backends.cudnn.benchmark = True 139 | 140 | cfg.model.pretrained = None 141 | # in case the test dataset is concatenated 142 | samples_per_gpu = 1 143 | if isinstance(cfg.data.test, dict): 144 | cfg.data.test.test_mode = True 145 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) 146 | if samples_per_gpu > 1: 147 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 148 | cfg.data.test.pipeline = replace_ImageToTensor( 149 | cfg.data.test.pipeline) 150 | elif isinstance(cfg.data.test, list): 151 | for ds_cfg in cfg.data.test: 152 | ds_cfg.test_mode = True 153 | samples_per_gpu = max( 154 | [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test]) 155 | if samples_per_gpu > 1: 156 | for ds_cfg in cfg.data.test: 157 | ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) 158 | 159 | # init distributed env first, since logger depends on the dist info. 160 | if args.launcher == 'none': 161 | distributed = False 162 | else: 163 | distributed = True 164 | init_dist(args.launcher, **cfg.dist_params) 165 | 166 | logger = get_logger(name='mmdet', log_level=cfg.log_level) 167 | mmcv.mkdir_or_exist(osp.abspath(args.out_dir)) 168 | # set random seeds 169 | if args.seed is not None: 170 | set_random_seed(args.seed, deterministic=args.deterministic) 171 | 172 | # build the dataloader 173 | dataset = build_dataset(cfg.data.test) 174 | 175 | if args.input: 176 | logger.info(f'Loading model outputs from results.pkl') 177 | outputs = mmcv.load(os.path.join(args.out_dir, 'results.pkl')) 178 | else: 179 | data_loader = build_dataloader( 180 | dataset, 181 | samples_per_gpu=samples_per_gpu, 182 | workers_per_gpu=cfg.data.workers_per_gpu, 183 | dist=distributed, 184 | shuffle=False, 185 | ) 186 | 187 | # build the model and load checkpoint 188 | # cfg.model.train_cfg = None 189 | model = build_model(cfg.model, test_cfg=cfg.get('test_cfg')) 190 | fp16_cfg = cfg.get('fp16', None) 191 | if fp16_cfg is not None: 192 | wrap_fp16_model(model) 193 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') 194 | if args.fuse_conv_bn: 195 | model = fuse_conv_bn(model) 196 | # old versions did not save class info in checkpoints, this walkaround is 197 | # for backward compatibility 198 | if 'CLASSES' in checkpoint.get('meta', {}): 199 | model.CLASSES = checkpoint['meta']['CLASSES'] 200 | else: 201 | model.CLASSES = dataset.CLASSES 202 | # palette for visualization in segmentation tasks 203 | if 'PALETTE' in checkpoint.get('meta', {}): 204 | model.PALETTE = checkpoint['meta']['PALETTE'] 205 | elif hasattr(dataset, 'PALETTE'): 206 | # segmentation dataset has `PALETTE` attribute 207 | model.PALETTE = dataset.PALETTE 208 | 209 | if not distributed: 210 | model = MMDataParallel(model, device_ids=cfg.gpu_ids) 211 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir) 212 | else: 213 | model = MMDistributedDataParallel( 214 | model.cuda(), 215 | device_ids=[torch.cuda.current_device()], 216 | broadcast_buffers=False) 217 | outputs = multi_gpu_test(model, data_loader, 218 | tmpdir=os.path.join(args.out_dir, '.dist_test'), 219 | gpu_collect=args.gpu_collect) 220 | print('\n') 221 | 222 | rank, _ = get_dist_info() 223 | if rank == 0: 224 | if args.out and not args.input: 225 | logger.info(f'Writing model outputs to results.pkl') 226 | mmcv.dump(outputs, os.path.join(args.out_dir, 'results.pkl')) 227 | 228 | kwargs = {} if args.eval_options is None else args.eval_options 229 | kwargs['logger'] = logger 230 | kwargs['show'] = args.show 231 | kwargs['out_dir'] = args.out_dir 232 | 233 | if args.format_only: 234 | dataset.format_results(outputs, **kwargs) 235 | return 236 | 237 | if args.eval: 238 | eval_kwargs = cfg.get('evaluation', {}).copy() 239 | # hard-code way to remove EvalHook args 240 | for key in [ 241 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 242 | 'rule' 243 | ]: 244 | eval_kwargs.pop(key, None) 245 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 246 | eval_kwargs['out_dir'] = os.path.join(args.out_dir, 'vis/') 247 | 248 | print(dataset.evaluate(outputs, **eval_kwargs)) 249 | 250 | 251 | if __name__ == '__main__': 252 | main() 253 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Tianyu Li 5 | # --------------------------------------------- 6 | from __future__ import division 7 | import argparse 8 | import copy 9 | import os 10 | import time 11 | import warnings 12 | from os import path as osp 13 | 14 | import mmcv 15 | import torch 16 | import torch.distributed as dist 17 | from mmcv import Config, DictAction 18 | from mmcv.runner import get_dist_info, init_dist 19 | 20 | from mmdet import __version__ as mmdet_version 21 | from mmdet3d import __version__ as mmdet3d_version 22 | from mmdet3d.apis import init_random_seed, train_model 23 | from mmdet3d.datasets import build_dataset 24 | from mmdet3d.models import build_model 25 | from mmdet3d.utils import collect_env, get_root_logger 26 | from mmdet.apis import set_random_seed 27 | from mmseg import __version__ as mmseg_version 28 | 29 | try: 30 | # If mmdet version > 2.20.0, setup_multi_processes would be imported and 31 | # used from mmdet instead of mmdet3d. 32 | from mmdet.utils import setup_multi_processes 33 | except ImportError: 34 | from mmdet3d.utils import setup_multi_processes 35 | 36 | 37 | def parse_args(): 38 | parser = argparse.ArgumentParser(description='Train a detector') 39 | parser.add_argument('config', help='train config file path') 40 | parser.add_argument('--work-dir', help='the dir to save logs and models') 41 | parser.add_argument( 42 | '--resume-from', help='the checkpoint file to resume from') 43 | parser.add_argument( 44 | '--auto-resume', 45 | action='store_true', 46 | help='resume from the latest checkpoint automatically') 47 | parser.add_argument( 48 | '--no-validate', 49 | action='store_true', 50 | help='whether not to evaluate the checkpoint during training') 51 | group_gpus = parser.add_mutually_exclusive_group() 52 | group_gpus.add_argument( 53 | '--gpus', 54 | type=int, 55 | help='(Deprecated, please use --gpu-id) number of gpus to use ' 56 | '(only applicable to non-distributed training)') 57 | group_gpus.add_argument( 58 | '--gpu-ids', 59 | type=int, 60 | nargs='+', 61 | help='(Deprecated, please use --gpu-id) ids of gpus to use ' 62 | '(only applicable to non-distributed training)') 63 | group_gpus.add_argument( 64 | '--gpu-id', 65 | type=int, 66 | default=0, 67 | help='number of gpus to use ' 68 | '(only applicable to non-distributed training)') 69 | parser.add_argument('--seed', type=int, default=42, help='random seed') 70 | parser.add_argument( 71 | '--diff-seed', 72 | action='store_true', 73 | help='Whether or not set different seeds for different ranks') 74 | parser.add_argument( 75 | '--deterministic', 76 | action='store_true', 77 | help='whether to set deterministic options for CUDNN backend.') 78 | parser.add_argument( 79 | '--options', 80 | nargs='+', 81 | action=DictAction, 82 | help='override some settings in the used config, the key-value pair ' 83 | 'in xxx=yyy format will be merged into config file (deprecate), ' 84 | 'change to --cfg-options instead.') 85 | parser.add_argument( 86 | '--cfg-options', 87 | nargs='+', 88 | action=DictAction, 89 | help='override some settings in the used config, the key-value pair ' 90 | 'in xxx=yyy format will be merged into config file. If the value to ' 91 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 92 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 93 | 'Note that the quotation marks are necessary and that no white space ' 94 | 'is allowed.') 95 | parser.add_argument( 96 | '--launcher', 97 | choices=['none', 'pytorch', 'slurm', 'mpi'], 98 | default='none', 99 | help='job launcher') 100 | parser.add_argument('--local_rank', type=int, default=0) 101 | parser.add_argument( 102 | '--autoscale-lr', 103 | action='store_true', 104 | help='automatically scale lr with the number of gpus') 105 | args = parser.parse_args() 106 | if 'LOCAL_RANK' not in os.environ: 107 | os.environ['LOCAL_RANK'] = str(args.local_rank) 108 | 109 | if args.options and args.cfg_options: 110 | raise ValueError( 111 | '--options and --cfg-options cannot be both specified, ' 112 | '--options is deprecated in favor of --cfg-options') 113 | if args.options: 114 | warnings.warn('--options is deprecated in favor of --cfg-options') 115 | args.cfg_options = args.options 116 | 117 | return args 118 | 119 | 120 | def auto_scale_lr(cfg, distributed, logger): 121 | """Automatically scaling LR according to GPU number and sample per GPU. 122 | 123 | Args: 124 | cfg (config): Training config. 125 | distributed (bool): Using distributed or not. 126 | logger (logging.Logger): Logger. 127 | """ 128 | # Get flag from config 129 | if ('auto_scale_lr' not in cfg) or \ 130 | (not cfg.auto_scale_lr.get('enable', False)): 131 | logger.info('Automatic scaling of learning rate (LR)' 132 | ' has been disabled.') 133 | return 134 | 135 | # Get base batch size from config 136 | base_batch_size = cfg.auto_scale_lr.get('base_batch_size', None) 137 | if base_batch_size is None: 138 | return 139 | 140 | # Get gpu number 141 | if distributed: 142 | _, world_size = get_dist_info() 143 | num_gpus = len(range(world_size)) 144 | else: 145 | num_gpus = len(cfg.gpu_ids) 146 | 147 | # calculate the batch size 148 | samples_per_gpu = cfg.data.samples_per_gpu 149 | batch_size = num_gpus * samples_per_gpu 150 | logger.info(f'Training with {num_gpus} GPU(s) with {samples_per_gpu} ' 151 | f'samples per GPU. The total batch size is {batch_size}.') 152 | 153 | if batch_size != base_batch_size: 154 | # scale LR with 155 | # [linear scaling rule](https://arxiv.org/abs/1706.02677) 156 | scaled_lr = (batch_size / base_batch_size) * cfg.optimizer.lr 157 | logger.info('LR has been automatically scaled ' 158 | f'from {cfg.optimizer.lr} to {scaled_lr}') 159 | cfg.optimizer.lr = scaled_lr 160 | else: 161 | logger.info('The batch size match the ' 162 | f'base batch size: {base_batch_size}, ' 163 | f'will not scaling the LR ({cfg.optimizer.lr}).') 164 | 165 | 166 | def main(): 167 | args = parse_args() 168 | 169 | cfg = Config.fromfile(args.config) 170 | if args.cfg_options is not None: 171 | cfg.merge_from_dict(args.cfg_options) 172 | 173 | # set multi-process settings 174 | setup_multi_processes(cfg) 175 | 176 | # set cudnn_benchmark 177 | if cfg.get('cudnn_benchmark', False): 178 | torch.backends.cudnn.benchmark = True 179 | 180 | # work_dir is determined in this priority: CLI > segment in file > filename 181 | if args.work_dir is not None: 182 | # update configs according to CLI args if args.work_dir is not None 183 | cfg.work_dir = args.work_dir 184 | elif cfg.get('work_dir', None) is None: 185 | # use config filename as default work_dir if cfg.work_dir is None 186 | cfg.work_dir = osp.join('./work_dirs', 187 | osp.splitext(osp.basename(args.config))[0]) 188 | if args.resume_from is not None: 189 | cfg.resume_from = args.resume_from 190 | 191 | if args.auto_resume: 192 | cfg.auto_resume = args.auto_resume 193 | warnings.warn('`--auto-resume` is only supported when mmdet' 194 | 'version >= 2.20.0 for 3D detection model or' 195 | 'mmsegmentation version >= 0.21.0 for 3D' 196 | 'segmentation model') 197 | 198 | if args.gpus is not None: 199 | cfg.gpu_ids = range(1) 200 | warnings.warn('`--gpus` is deprecated because we only support ' 201 | 'single GPU mode in non-distributed training. ' 202 | 'Use `gpus=1` now.') 203 | if args.gpu_ids is not None: 204 | cfg.gpu_ids = args.gpu_ids[0:1] 205 | warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' 206 | 'Because we only support single GPU mode in ' 207 | 'non-distributed training. Use the first GPU ' 208 | 'in `gpu_ids` now.') 209 | if args.gpus is None and args.gpu_ids is None: 210 | cfg.gpu_ids = [args.gpu_id] 211 | 212 | if args.autoscale_lr: 213 | if 'auto_scale_lr' in cfg and \ 214 | 'base_batch_size' in cfg.auto_scale_lr: 215 | cfg.auto_scale_lr.enable = True 216 | else: 217 | warnings.warn('Can not find "auto_scale_lr" or ' 218 | '"auto_scale_lr.base_batch_size" in your' 219 | ' configuration file.') 220 | 221 | # init distributed env first, since logger depends on the dist info. 222 | if args.launcher == 'none': 223 | distributed = False 224 | else: 225 | distributed = True 226 | init_dist(args.launcher, **cfg.dist_params) 227 | # re-set gpu_ids with distributed training mode 228 | _, world_size = get_dist_info() 229 | cfg.gpu_ids = range(world_size) 230 | 231 | # create work_dir 232 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 233 | # dump config 234 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 235 | # init the logger before other steps 236 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 237 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 238 | # specify logger name, if we still use 'mmdet', the output info will be 239 | # filtered and won't be saved in the log_file 240 | logger = get_root_logger( 241 | log_file=log_file, log_level=cfg.log_level, name='mmdet') 242 | 243 | # init the meta dict to record some important information such as 244 | # environment info and seed, which will be logged 245 | meta = dict() 246 | # log env info 247 | env_info_dict = collect_env() 248 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 249 | dash_line = '-' * 60 + '\n' 250 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 251 | dash_line) 252 | meta['env_info'] = env_info 253 | meta['config'] = cfg.pretty_text 254 | 255 | # log some basic info 256 | logger.info(f'Distributed training: {distributed}') 257 | logger.info(f'Config:\n{cfg.pretty_text}') 258 | 259 | # set random seeds 260 | seed = init_random_seed(args.seed) 261 | seed = seed + dist.get_rank() if args.diff_seed else seed 262 | logger.info(f'Set random seed to {seed}, ' 263 | f'deterministic: {args.deterministic}') 264 | set_random_seed(seed, deterministic=args.deterministic) 265 | cfg.seed = seed 266 | meta['seed'] = seed 267 | meta['exp_name'] = osp.basename(args.config) 268 | 269 | model = build_model( 270 | cfg.model, 271 | train_cfg=cfg.get('train_cfg'), 272 | test_cfg=cfg.get('test_cfg')) 273 | model.init_weights() 274 | 275 | logger.info(f'Model:\n{model}') 276 | datasets = [build_dataset(cfg.data.train)] 277 | if len(cfg.workflow) == 2: 278 | val_dataset = copy.deepcopy(cfg.data.val) 279 | # in case we use a dataset wrapper 280 | if 'dataset' in cfg.data.train: 281 | val_dataset.pipeline = cfg.data.train.dataset.pipeline 282 | else: 283 | val_dataset.pipeline = cfg.data.train.pipeline 284 | # set test_mode=False here in deep copied config 285 | # which do not affect AP/AR calculation later 286 | # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow # noqa 287 | val_dataset.test_mode = False 288 | datasets.append(build_dataset(val_dataset)) 289 | if cfg.checkpoint_config is not None: 290 | # save mmdet version, config file content and class names in 291 | # checkpoints as meta data 292 | cfg.checkpoint_config.meta = dict( 293 | mmdet_version=mmdet_version, 294 | mmseg_version=mmseg_version, 295 | mmdet3d_version=mmdet3d_version, 296 | config=cfg.pretty_text, 297 | CLASSES=datasets[0].CLASSES, 298 | PALETTE=datasets[0].PALETTE # for segmentors 299 | if hasattr(datasets[0], 'PALETTE') else None) 300 | # add an attribute for visualization convenience 301 | model.CLASSES = datasets[0].CLASSES 302 | auto_scale_lr(cfg, distributed=distributed, logger=logger) 303 | train_model( 304 | model, 305 | datasets, 306 | cfg, 307 | distributed=distributed, 308 | validate=(not args.no_validate), 309 | timestamp=timestamp, 310 | meta=meta) 311 | 312 | 313 | if __name__ == '__main__': 314 | main() 315 | --------------------------------------------------------------------------------