├── .gitignore ├── LICENSE ├── README.md ├── assets ├── figures │ ├── arch.png │ ├── demo.gif │ ├── pivot-title.png │ ├── pivotnet-arch.png │ ├── pivotnet-logo.png │ └── title.png ├── splits │ └── nuscenes │ │ ├── all.txt │ │ ├── cloudy.txt │ │ ├── day.txt │ │ ├── night.txt │ │ ├── rainy.txt │ │ ├── sunny.txt │ │ ├── test.txt │ │ ├── train.txt │ │ └── val.txt └── weights │ └── README.md ├── configs ├── bemapnet_nuscenes_effb0.py ├── bemapnet_nuscenes_res50.py ├── bemapnet_nuscenes_swint.py ├── pivotnet_nuscenes_effb0.py ├── pivotnet_nuscenes_res50.py └── pivotnet_nuscenes_swint.py ├── mapmaster ├── __init__.py ├── dataset │ ├── nuscenes_bemapnet.py │ ├── nuscenes_pivotnet.py │ ├── sampler.py │ └── transform.py ├── engine │ ├── callbacks.py │ ├── core.py │ ├── environ.py │ ├── executor.py │ └── experiment.py ├── models │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── bifpn │ │ │ ├── __init__.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── efficientnet │ │ │ ├── __init__.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── model.py │ │ ├── resnet │ │ │ ├── __init__.py │ │ │ ├── resnet.py │ │ │ └── utils.py │ │ └── swin_transformer │ │ │ ├── __init__.py │ │ │ ├── model.py │ │ │ └── utils.py │ ├── bev_decoder │ │ ├── __init__.py │ │ ├── deform_transformer │ │ │ ├── __init__.py │ │ │ ├── deform_transformer.py │ │ │ ├── ops │ │ │ │ ├── __init__.py │ │ │ │ ├── functions │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ms_deform_attn_func.py │ │ │ │ ├── make.sh │ │ │ │ ├── modules │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ms_deform_attn.py │ │ │ │ ├── setup.py │ │ │ │ ├── src │ │ │ │ │ ├── cpu │ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp │ │ │ │ │ │ └── ms_deform_attn_cpu.h │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ │ │ │ ├── ms_deform_attn_cuda.h │ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh │ │ │ │ │ ├── ms_deform_attn.h │ │ │ │ │ └── vision.cpp │ │ │ │ └── test.py │ │ │ └── position_encoding.py │ │ ├── model.py │ │ └── transformer.py │ ├── ins_decoder │ │ ├── __init__.py │ │ ├── mask2former.py │ │ ├── model.py │ │ └── pointmask2former.py │ ├── network.py │ ├── output_head │ │ ├── __init__.py │ │ ├── bezier_outputs.py │ │ ├── bezier_post_processor.py │ │ ├── line_matching.py │ │ ├── pivot_outputs.py │ │ └── pivot_post_processor.py │ └── utils │ │ ├── mask_loss.py │ │ ├── misc.py │ │ ├── position_encoding.py │ │ └── recovery_loss.py └── utils │ ├── env.py │ └── misc.py ├── requirement.txt ├── run.sh └── tools ├── __init__.py ├── anno_converter ├── __init__.py ├── bezier.py ├── generate_pivots.py └── nuscenes │ ├── __init__.py │ ├── convert.py │ ├── rasterize.py │ └── vectorize.py └── evaluation ├── ap.py ├── cd.py └── eval.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Wenjie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | # 5 | ### NEWS !!! 6 | * **`Jan. 15th, 2024`:** :rocket: :rocket: :rocket: The official implementation of our PivotNet is released now. Enjoy it! 7 | * **`Jul. 14th, 2023`:** :clap: Our **PivotNet** is accepted by ICCV 2023 ! Refer to the **[Long-version Paper](https://arxiv.org/pdf/2308.16477)** for more details. 8 | * **`May. 26th, 2023`:** :trophy: Our team win the **Championship** and the **Innovation-Award** of the CVPR23 *Online HD Map Construction Challenge* ! [ **[Leaderboard](https://opendrivelab.com/AD23Challenge.html#online_hd_map_construction)** / **[Tech-Report](https://arxiv.org/abs/2306.10301)**] 9 | * **`Feb. 28th, 2023`:** :clap: Our **BeMapNet** is accepted by CVPR 2023 ! Refer to the **[Paper](https://openaccess.thecvf.com/content/CVPR2023/html/Qiao_End-to-End_Vectorized_HD-Map_Construction_With_Piecewise_Bezier_Curve_CVPR_2023_paper.html)** for more details. [**[Long version on Arxiv](https://arxiv.org/pdf/2306.09700)** / **[Code](https://github.com/er-muyue/BeMapNet/tree/main)**] 10 | 11 | ##
Introduction
12 | **Vectorized high-definition map (HD-map) construction** has garnered considerable attention in the field of autonomous driving research. Towards precise map element learning, we propose a simple yet effective architecture named **PivotNet**, which adopts unified pivot-based map representations and is formulated as a direct set prediction paradigm. Concretely, we first propose a novel *Point-to-Line Mask module* to encode both the subordinate and geometrical point-line priors in the network. Then, a well-designed *Pivot Dynamic Matching module* is proposed to model the topology in dynamic point sequences by introducing the concept of sequence matching. Furthermore, to supervise the position and topology of the vectorized point predictions, we propose a *Dynamic Vectorized Sequence loss*. PivotNet contains four primary components: **Camera Feature Extractor**, **BEV Feature Decoder**, **Line-aware Point Decoder**, and **Pivotal Point Predictor**. It takes the RGB images as inputs and generates flexible and compact vectorized representation without any post-processing. 13 | 14 |
15 | 16 | ##
Documentation
17 | 18 | We build the released version of **PivotNet** upon [BeMapNet](https://github.com/er-muyue/BeMapNet/tree/main) project. Therefore, this project supports the reproduction of **both** PivotNet and BeMapNet. 19 | 20 |
21 | Step-by-step Installation 22 | <\br>
23 | 24 | - **a. Check Environment** 25 | ```shell 26 | Python >= 3.8 27 | CUDA 11.1 28 | # other versions of python/cuda have not been fully tested, but I think they should work as well. 29 | ``` 30 | 31 | - **b. Create a conda virtual environment and activate it. (Optional)** 32 | ```shell 33 | conda create -n pivotnet python=3.8 -y 34 | conda activate pivotnet 35 | ``` 36 | 37 | - **c. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/get-started/previous-versions/).** 38 | ```shell 39 | pip3 install torch==1.10.1+cu111 torchvision==0.11.2+cu111 -f https://download.pytorch.org/whl/torch_stable.html 40 | ``` 41 | 42 | - **d. Install MMCV following the [official instructions](https://github.com/open-mmlab/mmcv). (need GPU)** 43 | ```shell 44 | pip3 install -U openmim 45 | mim install mmcv==1.7.1 46 | ``` 47 | 48 | - **e. Install Detectron2 following the [official instructions](https://detectron2.readthedocs.io/en/latest/tutorials/install.html).** 49 | ```shell 50 | python3 -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html 51 | ``` 52 | 53 | - **f. Compiling CUDA operators for Deformable Transformer.** 54 | ```shell 55 | cd mapmaster/models/bev_decoder/deform_transformer/ops 56 | bash make.sh 57 | ``` 58 | 59 | - **g. Install PivotNet.** 60 | ```shell 61 | git clone git@github.com:wenjie710/PivotNet.git 62 | cd pivotnet 63 | pip3 install -r requirement.txt 64 | ``` 65 | 66 |
67 | 68 | 69 |
70 | Material Preparation 71 | <\br>
72 | 73 | - **a. Data: NuScenes** 74 | - Download&Unzip the [NuScenes](https://www.nuscenes.org/nuscenes#download) dataset into your server and link it to desirable path. 75 | ```shell 76 | cd /path/to/pivotnet 77 | mkdir data 78 | ln -s /any/path/to/your/nuscenes data/nuscenes 79 | ``` 80 | - Generate Pivot/Bezier-annotations from NuScenes's raw-annotations. 81 | ```shell 82 | cd /path/to/pivotnet 83 | python3 tools/anno_converter/nuscenes/convert.py -d ./data # generate pivot-annotation only 84 | OR python3 tools/anno_converter/nuscenes/convert.py -d ./data --bezier # generate both pivot and bezier annotatation (more time needed) 85 | ``` 86 | 87 | - **b. Weights: Public-Pretrain-Models** 88 | - Download public pretrain-weights as backbone initialization. 89 | ```shell 90 | cd /path/to/pivotnet 91 | cd assets/weights 92 | wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/efficientnet-b0-355c32eb.pth . 93 | wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/resnet50-0676ba61.pth . 94 | wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/upernet_swin_tiny_patch4_window7_512x512.pth . 95 | ``` 96 | 97 | - **c. Check: Project-Structure** 98 | - Your project directory should be, 99 | ```shell 100 | assets 101 | | -- weights (resnet, swin-t, efficient-b0, ...) 102 | | -- 103 | mapmaster 104 | configs 105 | data 106 | | -- nuscenes 107 | | -- samples (CAM_FRONT, CAM_FRONT_LEFT, CAM_FRONT_RIGHT, ...) 108 | | -- annotations 109 | | -- v1.0-trainval 110 | | -- ... 111 | | -- customer 112 | | -- pivot-bezier 113 | | -- *.npz 114 | tools 115 | ``` 116 | 117 |
118 | 119 |
120 | 121 | Training and Evluation 122 | <\br>
123 | 124 | - **a. Model Training** 125 | ```shell 126 | bash run.sh train pivotnet_nuscenes_swint 30 # pivotnet, default: 8GPUs, bs=1, epochs=30 127 | bash run.sh train bemapnet_nuscenes_swint 30 # bemapnet, default: 8GPUs, bs=1, epochs=30 128 | ``` 129 | 130 | - **b. Model Evaluation** 131 | ```shell 132 | bash run.sh test pivotnet_nuscenes_swint ${checkpoint-path} # for pivotnet 133 | bash run.sh test bemapnet_nuscenes_swint ${checkpoint-path} # for bemapnet 134 | ``` 135 | 136 | - **c. Reproduce with one command** 137 | ```shell 138 | bash run.sh reproduce pivotnet_nuscenes_swint # for pivotnet 139 | bash run.sh reproduce bemapnet_nuscenes_swint # for bemapnet 140 | ``` 141 |
142 | 143 | ##
Models & Results
144 | 145 |
146 | 147 | Results on NuScenes Val Set 148 | <\br>
149 | 150 | - **a. Easy-Setting --> AP-threshold is `0.5m, 1.0m, 1.5m` (same as [VectorMapNet](https://arxiv.org/abs/2206.08920.pdf) / [MapTR](https://arxiv.org/abs/2208.14437.pdf))** 151 | 152 | | Model | Config | Schd | mAPdivider | mAPpedcross |mAPboundary | mAPavg | Download | 153 | | :---: | :---: | :---: | :---: | :---:|:---:| :---: | :---: | 154 | |PivotNet-Effb0| [config](configs/pivotnet_nuscenes_effb0.py) | 30ep | 59.3 | 54.1 | 60.0 | 57.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30_train.log)| 155 | |PivotNet-Res50| [config](configs/pivotnet_nuscenes_res50.py) | 30ep | 58.0 | 53.5 | 59.7 | 57.1 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30_train.log)| 156 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 30ep | 62.9 | 57.9 | 64.0 | 61.6 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30_train.log)| 157 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 110ep | 67.8 | 62.1 | 69.2 | 66.4 | [model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110_train.log) | 158 | 159 | 160 | - **b. Hard-Setting --> AP-threshold is `0.2m, 0.5m, 1.0m` (Recommended as a more practical HD map evaluation protocol)** 161 | 162 | | Model | Config | Schd | mAPdivider | mAPpedcross |mAPboundary | mAPavg | Download | 163 | | :---: | :---: | :---: | :---: | :---:|:---:| :---: | :---: | 164 | |PivotNet-Effb0| [config](configs/pivotnet_nuscenes_effb0.py) | 30ep | 44.0 | 35.9 | 39.7 | 39.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30_train.log) | 165 | |PivotNet-Res50| [config](configs/pivotnet_nuscenes_res50.py) | 30ep | 43.5 | 35.6 | 40.4 | 39.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30_train.log) | 166 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 30ep | 47.7 | 39.4 | 43.7 | 43.6 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30_train.log) | 167 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 110ep | 54.1 | 43.3 | 50.3 | 49.3 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110_train.log) | 168 | 169 | 170 |
171 | 172 | # 173 | ## Citation 174 | If you find PivotNet/BeMapNet/MachMap is useful in your research or applications, please consider giving us a star :star: and citing them by the following BibTeX entries: 175 | ``` 176 | @inproceedings{ding2023pivotnet, 177 | title={Pivotnet: Vectorized pivot learning for end-to-end hd map construction}, 178 | author={Ding, Wenjie and Qiao, Limeng and Qiu, Xi and Zhang, Chi}, 179 | booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, 180 | pages={3672--3682}, 181 | year={2023} 182 | } 183 | 184 | @InProceedings{Qiao_2023_CVPR, 185 | author = {Qiao, Limeng and Ding, Wenjie and Qiu, Xi and Zhang, Chi}, 186 | title = {End-to-End Vectorized HD-Map Construction With Piecewise Bezier Curve}, 187 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 188 | month = {June}, 189 | year = {2023}, 190 | pages = {13218-13228} 191 | } 192 | 193 | @article{qiao2023machmap, 194 | author={Limeng Qiao and Yongchao Zheng and Peng Zhang and Wenjie Ding and Xi Qiu and Xing Wei and Chi Zhang}, 195 | title={MachMap: End-to-End Vectorized Solution for Compact HD-Map Construction}, 196 | journal={arXiv preprint arXiv:2306.10301}, 197 | year={2023}, 198 | } 199 | 200 | ``` 201 | -------------------------------------------------------------------------------- /assets/figures/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/arch.png -------------------------------------------------------------------------------- /assets/figures/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/demo.gif -------------------------------------------------------------------------------- /assets/figures/pivot-title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivot-title.png -------------------------------------------------------------------------------- /assets/figures/pivotnet-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivotnet-arch.png -------------------------------------------------------------------------------- /assets/figures/pivotnet-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivotnet-logo.png -------------------------------------------------------------------------------- /assets/figures/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/title.png -------------------------------------------------------------------------------- /assets/weights/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/weights/README.md -------------------------------------------------------------------------------- /configs/pivotnet_nuscenes_effb0.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mapmaster.engine.core import MapMasterCli 3 | from pivotnet_nuscenes_swint import EXPConfig, Exp 4 | 5 | EXPConfig.model_setup["im_backbone"] = dict( 6 | arch_name="efficient_net", 7 | ret_layers=2, 8 | fpn_kwargs=None, 9 | bkb_kwargs=dict( 10 | model_name='efficientnet-b0', 11 | in_channels=3, 12 | out_stride=32, 13 | with_head=False, 14 | with_cp=True, 15 | norm_layer=nn.SyncBatchNorm, 16 | weights_path="assets/weights/efficientnet-b0-355c32eb.pth", 17 | ), 18 | ) 19 | 20 | EXPConfig.model_setup['bev_decoder']["net_kwargs"].update( 21 | dict( 22 | in_channels=[112, 320], 23 | ) 24 | ) 25 | 26 | class ExpDev(Exp): 27 | def __init__(self, batch_size_per_device=1, total_devices=8, max_epoch=60, **kwargs): 28 | super(ExpDev, self).__init__(batch_size_per_device, total_devices, max_epoch, **kwargs) 29 | self.exp_config = EXPConfig() 30 | 31 | if __name__ == "__main__": 32 | MapMasterCli(ExpDev).run() 33 | 34 | -------------------------------------------------------------------------------- /configs/pivotnet_nuscenes_res50.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mapmaster.engine.core import MapMasterCli 3 | from pivotnet_nuscenes_swint import EXPConfig, Exp 4 | 5 | 6 | EXPConfig.model_setup["im_backbone"] = dict( 7 | arch_name="resnet", 8 | ret_layers=2, 9 | fpn_kwargs=None, 10 | bkb_kwargs=dict( 11 | depth=50, 12 | num_stages=4, 13 | out_indices=(2, 3), 14 | frozen_stages=-1, # do not freeze any layers 15 | norm_cfg=dict(type='SyncBN', requires_grad=True), 16 | norm_eval=True, 17 | style='pytorch', 18 | init_cfg=dict( 19 | type='Pretrained', 20 | checkpoint='assets/weights/resnet50-0676ba61.pth'), # from pytorch 21 | with_cp=True, 22 | ), 23 | ) 24 | 25 | EXPConfig.model_setup['bev_decoder']["net_kwargs"].update( 26 | dict( 27 | in_channels=[1024, 2048], 28 | ) 29 | ) 30 | 31 | class ExpDev(Exp): 32 | def __init__(self, batch_size_per_device=1, total_devices=8, max_epoch=60, **kwargs): 33 | super(ExpDev, self).__init__(batch_size_per_device, total_devices, max_epoch, **kwargs) 34 | self.exp_config = EXPConfig() 35 | 36 | if __name__ == "__main__": 37 | MapMasterCli(ExpDev).run() 38 | 39 | -------------------------------------------------------------------------------- /mapmaster/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/mapmaster/__init__.py -------------------------------------------------------------------------------- /mapmaster/dataset/nuscenes_bemapnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | from copy import deepcopy 6 | from skimage import io as skimage_io 7 | from torch.utils.data import Dataset 8 | 9 | 10 | class NuScenesMapDataset(Dataset): 11 | def __init__(self, img_key_list, map_conf, ida_conf, bezier_conf, transforms, data_split="training"): 12 | super().__init__() 13 | self.img_key_list = img_key_list 14 | self.map_conf = map_conf 15 | self.ida_conf = ida_conf 16 | self.bez_conf = bezier_conf 17 | self.ego_size = map_conf["ego_size"] 18 | self.mask_key = map_conf["mask_key"] 19 | self.nusc_root = map_conf["nusc_root"] 20 | self.anno_root = map_conf["anno_root"] 21 | self.split_dir = map_conf["split_dir"] 22 | self.num_degree = bezier_conf["num_degree"] 23 | self.max_pieces = bezier_conf["max_pieces"] 24 | self.max_instances = bezier_conf["max_instances"] 25 | self.split_mode = 'train' if data_split == "training" else 'val' 26 | split_path = os.path.join(self.split_dir, f'{self.split_mode}.txt') 27 | self.tokens = [token.strip() for token in open(split_path).readlines()] 28 | self.transforms = transforms 29 | 30 | def __getitem__(self, idx: int): 31 | token = self.tokens[idx] 32 | sample = np.load(os.path.join(self.anno_root, f'{token}.npz'), allow_pickle=True) 33 | resize_dims, crop, flip, rotate = self.sample_ida_augmentation() 34 | images, ida_mats = [], [] 35 | for im_view in self.img_key_list: 36 | for im_path in sample['image_paths']: 37 | if im_path.startswith(f'samples/{im_view}/'): 38 | im_path = os.path.join(self.nusc_root, im_path) 39 | img = skimage_io.imread(im_path) 40 | img, ida_mat = self.img_transform(img, resize_dims, crop, flip, rotate) 41 | images.append(img) 42 | ida_mats.append(ida_mat) 43 | extrinsic = np.stack([np.eye(4) for _ in range(sample["trans"].shape[0])], axis=0) 44 | extrinsic[:, :3, :3] = sample["rots"] 45 | extrinsic[:, :3, 3] = sample["trans"] 46 | intrinsic = sample['intrins'] 47 | ctr_points = np.zeros((self.max_instances, max(self.max_pieces) * max(self.num_degree) + 1, 2), dtype=np.float) 48 | ins_labels = np.zeros((self.max_instances, 3), dtype=np.int16) - 1 49 | for ins_id, ctr_info in enumerate(sample['ctr_points']): 50 | cls_id = int(ctr_info['type']) 51 | ctr_pts_raw = np.array(ctr_info['pts']) 52 | max_points = self.max_pieces[cls_id] * self.num_degree[cls_id] + 1 53 | num_points = max_points if max_points <= ctr_pts_raw.shape[0] else ctr_pts_raw.shape[0] 54 | assert num_points >= self.num_degree[cls_id] + 1 55 | ctr_points[ins_id][:num_points] = np.array(ctr_pts_raw[:num_points]) 56 | ins_labels[ins_id] = [cls_id, (num_points - 1) // self.num_degree[cls_id] - 1, num_points] 57 | masks = sample[self.mask_key] 58 | if flip: 59 | new_order = [2, 1, 0, 5, 4, 3] 60 | img_key_list = [self.img_key_list[i] for i in new_order] 61 | images = [images[i] for i in new_order] 62 | ida_mats = [ida_mats[i] for i in new_order] 63 | extrinsic = [extrinsic[i] for i in new_order] 64 | intrinsic = [intrinsic[i] for i in new_order] 65 | masks = [np.flip(mask, axis=1) for mask in masks] 66 | ctr_points = self.point_flip(ctr_points, ins_labels, self.ego_size) 67 | item = dict( 68 | images=images, targets=dict(masks=masks, points=ctr_points, labels=ins_labels), 69 | extrinsic=np.stack(extrinsic), intrinsic=np.stack(intrinsic), ida_mats=np.stack(ida_mats), 70 | extra_infos=dict(token=token, img_key_list=self.img_key_list, map_size=self.ego_size, do_flip=flip) 71 | ) 72 | if self.transforms is not None: 73 | item = self.transforms(item) 74 | return item 75 | 76 | def __len__(self): 77 | return len(self.tokens) 78 | 79 | def sample_ida_augmentation(self): 80 | """Generate ida augmentation values based on ida_config.""" 81 | resize_dims = w, h = self.ida_conf["resize_dims"] 82 | crop = (0, 0, w, h) 83 | if self.ida_conf["up_crop_ratio"] > 0: 84 | crop = (0, int(self.ida_conf["up_crop_ratio"] * h), w, h) 85 | flip, color, rotate_ida = False, False, 0 86 | if self.split_mode == "train": 87 | if self.ida_conf["rand_flip"] and np.random.choice([0, 1]): 88 | flip = True 89 | if self.ida_conf["rot_lim"]: 90 | assert isinstance(self.ida_conf["rot_lim"], (tuple, list)) 91 | rotate_ida = np.random.uniform(*self.ida_conf["rot_lim"]) 92 | return resize_dims, crop, flip, rotate_ida 93 | 94 | def img_transform(self, img, resize_dims, crop, flip, rotate): 95 | img = Image.fromarray(img) 96 | ida_rot = torch.eye(2) 97 | ida_tran = torch.zeros(2) 98 | W, H = img.size 99 | img = img.resize(resize_dims) 100 | img = img.crop(crop) 101 | if flip: 102 | img = img.transpose(method=Image.FLIP_LEFT_RIGHT) 103 | img = img.rotate(rotate) 104 | 105 | # post-homography transformation 106 | scales = torch.tensor([resize_dims[0] / W, resize_dims[1] / H]) 107 | ida_rot *= torch.Tensor(scales) 108 | ida_tran -= torch.Tensor(crop[:2]) 109 | if flip: 110 | A = torch.Tensor([[-1, 0], [0, 1]]) 111 | b = torch.Tensor([crop[2] - crop[0], 0]) 112 | ida_rot = A.matmul(ida_rot) 113 | ida_tran = A.matmul(ida_tran) + b 114 | A = self.get_rot(rotate / 180 * np.pi) 115 | b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2 116 | b = A.matmul(-b) + b 117 | ida_rot = A.matmul(ida_rot) 118 | ida_tran = A.matmul(ida_tran) + b 119 | ida_mat = ida_rot.new_zeros(3, 3) 120 | ida_mat[2, 2] = 1 121 | ida_mat[:2, :2] = ida_rot 122 | ida_mat[:2, 2] = ida_tran 123 | return np.asarray(img), ida_mat 124 | 125 | @staticmethod 126 | def point_flip(points, labels, map_shape): 127 | 128 | def _flip(pts): 129 | pts[:, 0] = map_shape[1] - pts[:, 0] 130 | return pts.copy() 131 | 132 | points_ret = deepcopy(points) 133 | for ins_id in range(points.shape[0]): 134 | end = labels[ins_id, 2] 135 | points_ret[ins_id][:end] = _flip(points[ins_id][:end]) 136 | 137 | return points_ret 138 | 139 | @staticmethod 140 | def get_rot(h): 141 | return torch.Tensor([[np.cos(h), np.sin(h)], [-np.sin(h), np.cos(h)]]) 142 | -------------------------------------------------------------------------------- /mapmaster/dataset/nuscenes_pivotnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle as pkl 4 | from PIL import Image 5 | from torch.utils.data import Dataset 6 | 7 | class NuScenesMapDataset(Dataset): 8 | def __init__(self, img_key_list, map_conf, transforms, data_split="training"): 9 | super().__init__() 10 | self.img_key_list = img_key_list 11 | self.map_conf = map_conf 12 | 13 | self.ego_size = map_conf["ego_size"] 14 | self.mask_key = map_conf["mask_key"] 15 | self.nusc_root = map_conf["nusc_root"] 16 | self.anno_root = map_conf["anno_root"] 17 | self.split_dir = map_conf["split_dir"] # instance_mask/instance_mask8 18 | 19 | self.split_mode = 'train' if data_split == "training" else 'val' 20 | split_path = os.path.join(self.split_dir, f'{self.split_mode}.txt') 21 | self.tokens = [token.strip() for token in open(split_path).readlines()] 22 | self.transforms = transforms 23 | 24 | def __getitem__(self, idx: int): 25 | token = self.tokens[idx] 26 | sample = np.load(os.path.join(self.anno_root, f'{token}.npz'), allow_pickle=True) 27 | # images 28 | images = [] 29 | for im_view in self.img_key_list: 30 | for im_path in sample['image_paths']: 31 | if im_path.startswith(f'samples/{im_view}/'): 32 | im_path = os.path.join(self.nusc_root, im_path) 33 | img = np.asarray(Image.open(im_path)) 34 | images.append(img) 35 | # pivot pts 36 | pivot_pts = sample["pivot_pts"].item() 37 | valid_length = sample["pivot_length"].item() 38 | # targets 39 | masks=sample[self.mask_key] 40 | targets = dict(masks=masks, points=pivot_pts, valid_len=valid_length) 41 | # pose 42 | extrinsic = np.stack([np.eye(4) for _ in range(sample["trans"].shape[0])], axis=0) 43 | extrinsic[:, :3, :3] = sample["rots"] 44 | extrinsic[:, :3, 3] = sample["trans"] 45 | intrinsic = sample['intrins'] 46 | # transform 47 | item = dict(images=images, targets=targets, 48 | extra_infos=dict(token=token, map_size=self.ego_size), 49 | extrinsic=np.stack(extrinsic, axis=0), intrinsic=np.stack(intrinsic, axis=0)) 50 | if self.transforms is not None: 51 | item = self.transforms(item) 52 | 53 | return item 54 | 55 | def __len__(self): 56 | return len(self.tokens) 57 | -------------------------------------------------------------------------------- /mapmaster/dataset/sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import itertools 3 | import torch.distributed as dist 4 | from typing import Optional 5 | from torch.utils.data.sampler import Sampler 6 | 7 | 8 | class InfiniteSampler(Sampler): 9 | """ 10 | In training, we only care about the "infinite stream" of training data. 11 | So this sampler produces an infinite stream of indices and 12 | all workers cooperate to correctly shuffle the indices and sample different indices. 13 | The samplers in each worker effectively produces `indices[worker_id::num_workers]` 14 | where `indices` is an infinite stream of indices consisting of 15 | `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) 16 | or `range(size) + range(size) + ...` (if shuffle is False) 17 | """ 18 | 19 | def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = 0, rank=0, world_size=1, drop_last=False): 20 | """ 21 | Args: 22 | size (int): the total number of data of the underlying dataset to sample from 23 | shuffle (bool): whether to shuffle the indices or not 24 | seed (int): the initial seed of the shuffle. Must be the same 25 | across all workers. If None, will use a random seed shared 26 | among workers (require synchronization among all workers). 27 | """ 28 | self._size = size 29 | assert size > 0 30 | self._shuffle = shuffle 31 | self._seed = int(seed) 32 | self.drop_last = drop_last 33 | 34 | if dist.is_available() and dist.is_initialized(): 35 | self._rank = dist.get_rank() 36 | self._world_size = dist.get_world_size() 37 | else: 38 | self._rank = rank 39 | self._world_size = world_size 40 | 41 | def set_epoch(self, epoch): 42 | pass 43 | 44 | def __iter__(self): 45 | start = self._rank 46 | yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) 47 | 48 | def _infinite_indices(self): 49 | g = torch.Generator() 50 | g.manual_seed(self._seed) 51 | while True: 52 | if self._shuffle: 53 | yield from torch.randperm(self._size, generator=g).tolist() 54 | else: 55 | yield from list(range(self._size)) 56 | 57 | def __len__(self): 58 | if self.drop_last: 59 | return self._size // self._world_size 60 | else: 61 | return (self._size + self._world_size - 1) // self._world_size 62 | -------------------------------------------------------------------------------- /mapmaster/dataset/transform.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mmcv 3 | import torch 4 | import numpy as np 5 | from PIL import Image 6 | from collections.abc import Sequence 7 | 8 | class Resize(object): 9 | def __init__(self, img_scale=None, backend="cv2", interpolation="bilinear"): 10 | self.size = img_scale 11 | self.backend = backend 12 | self.interpolation = interpolation 13 | self.cv2_interp_codes = { 14 | "nearest": cv2.INTER_NEAREST, 15 | "bilinear": cv2.INTER_LINEAR, 16 | "bicubic": cv2.INTER_CUBIC, 17 | "area": cv2.INTER_AREA, 18 | "lanczos": cv2.INTER_LANCZOS4, 19 | } 20 | self.pillow_interp_codes = { 21 | "nearest": Image.NEAREST, 22 | "bilinear": Image.BILINEAR, 23 | "bicubic": Image.BICUBIC, 24 | "box": Image.BOX, 25 | "lanczos": Image.LANCZOS, 26 | "hamming": Image.HAMMING, 27 | } 28 | 29 | def __call__(self, data_dict): 30 | """Call function to resize images. 31 | 32 | Args: 33 | data_dict (dict): Result dict from loading pipeline. 34 | 35 | Returns: 36 | dict: Resized data_dict, 'scale_factor' keys are added into result dict. 37 | """ 38 | 39 | imgs = [] 40 | for img in data_dict["images"]: 41 | img = self.im_resize(img, self.size, backend=self.backend) 42 | imgs.append(img) 43 | data_dict["images"] = imgs 44 | 45 | new_h, new_w = imgs[0].shape[:2] 46 | h, w = data_dict["images"][0].shape[:2] 47 | w_scale = new_w / w 48 | h_scale = new_h / h 49 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) 50 | data_dict["extra_infos"].update({"scale_factor": scale_factor}) 51 | 52 | return data_dict 53 | 54 | def im_resize(self, img, size, return_scale=False, interpolation="bilinear", out=None, backend="cv2"): 55 | """Resize image to a given size. 56 | Args: 57 | img (ndarray): The input image. 58 | size (tuple[int]): Target size (w, h). 59 | return_scale (bool): Whether to return `w_scale` and `h_scale`. 60 | interpolation (str): Interpolation method, accepted values are 61 | "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' 62 | backend, "nearest", "bilinear" for 'pillow' backend. 63 | out (ndarray): The output destination. 64 | backend (str | None): The image resize backend type. Options are `cv2`, 65 | `pillow`, `None`. 66 | Returns: 67 | tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or 68 | `resized_img`. 69 | """ 70 | h, w = img.shape[:2] 71 | if backend not in ["cv2", "pillow"]: 72 | raise ValueError( 73 | f"backend: {backend} is not supported for resize." f"Supported backends are 'cv2', 'pillow'" 74 | ) 75 | 76 | if backend == "pillow": 77 | assert img.dtype == np.uint8, "Pillow backend only support uint8 type" 78 | pil_image = Image.fromarray(img) 79 | pil_image = pil_image.resize(size, self.pillow_interp_codes[interpolation]) 80 | resized_img = np.array(pil_image) 81 | else: 82 | resized_img = cv2.resize(img, size, dst=out, interpolation=self.cv2_interp_codes[interpolation]) 83 | if not return_scale: 84 | return resized_img 85 | else: 86 | w_scale = size[0] / w 87 | h_scale = size[1] / h 88 | return resized_img, w_scale, h_scale 89 | 90 | class Normalize(object): 91 | """Normalize the image. 92 | 93 | Added key is "img_norm_cfg". 94 | 95 | Args: 96 | mean (sequence): Mean values of 3 channels. 97 | std (sequence): Std values of 3 channels. 98 | to_rgb (bool): Whether to convert the image from BGR to RGB, 99 | default is true. 100 | """ 101 | 102 | def __init__(self, mean, std, to_rgb=True): 103 | self.mean = np.array(mean, dtype=np.float32) 104 | self.std = np.array(std, dtype=np.float32) 105 | self.to_rgb = to_rgb 106 | 107 | def __call__(self, data_dict): 108 | imgs = [] 109 | for img in data_dict["images"]: 110 | if self.to_rgb: 111 | img = img.astype(np.float32) / 255.0 112 | img = self.im_normalize(img, self.mean, self.std, self.to_rgb) 113 | imgs.append(img) 114 | data_dict["images"] = imgs 115 | data_dict["extra_infos"]["img_norm_cfg"] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb) 116 | return data_dict 117 | 118 | @staticmethod 119 | def im_normalize(img, mean, std, to_rgb=True): 120 | img = img.copy().astype(np.float32) 121 | assert img.dtype != np.uint8 # cv2 inplace normalization does not accept uint8 122 | mean = np.float64(mean.reshape(1, -1)) 123 | stdinv = 1 / np.float64(std.reshape(1, -1)) 124 | if to_rgb: 125 | cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace 126 | cv2.subtract(img, mean, img) # inplace 127 | cv2.multiply(img, stdinv, img) # inplace 128 | return img 129 | 130 | 131 | class ToTensor(object): 132 | """Default formatting bundle.""" 133 | 134 | def __call__(self, data_dict): 135 | """Call function to transform and format common fields in data_dict. 136 | 137 | Args: 138 | data_dict (dict): Data dict contains the data to convert. 139 | 140 | Returns: 141 | dict: The result dict contains the data that is formatted with default bundle. 142 | """ 143 | 144 | for k in ["images", "extrinsic", "intrinsic", "ida_mats"]: 145 | if k == "images": 146 | data_dict[k] = np.stack([img.transpose(2, 0, 1) for img in data_dict[k]], axis=0) 147 | data_dict[k] = self.to_tensor(np.ascontiguousarray(data_dict[k])) 148 | 149 | for k in ["masks", "points", "labels"]: 150 | data_dict["targets"][k] = self.to_tensor(np.ascontiguousarray(data_dict["targets"][k])) 151 | 152 | return data_dict 153 | 154 | @staticmethod 155 | def to_tensor(data): 156 | if isinstance(data, torch.Tensor): 157 | return data 158 | elif isinstance(data, np.ndarray): 159 | return torch.from_numpy(data) 160 | elif isinstance(data, Sequence) and not mmcv.is_str(data): 161 | return torch.tensor(data) 162 | elif isinstance(data, int): 163 | return torch.LongTensor([data]) 164 | elif isinstance(data, float): 165 | return torch.FloatTensor([data]) 166 | else: 167 | raise TypeError(f"type {type(data)} cannot be converted to tensor.") 168 | 169 | class ToTensor_Pivot(object): 170 | """Default formatting bundle.""" 171 | 172 | def __call__(self, data_dict): 173 | """Call function to transform and format common fields in data_dict. 174 | 175 | Args: 176 | data_dict (dict): Data dict contains the data to convert. 177 | 178 | Returns: 179 | dict: The result dict contains the data that is formatted with default bundle. 180 | """ 181 | if "images" in data_dict: 182 | if isinstance(data_dict["images"], list): 183 | # process multiple imgs in single frame 184 | imgs = [img.transpose(2, 0, 1) for img in data_dict["images"]] 185 | imgs = np.ascontiguousarray(np.stack(imgs, axis=0)) 186 | data_dict["images"] = self.to_tensor(imgs) 187 | else: 188 | img = np.ascontiguousarray(data_dict["img"].transpose(2, 0, 1)) 189 | data_dict["images"] = self.to_tensor(img) 190 | 191 | for k in ["masks"]: 192 | data_dict["targets"][k] = self.to_tensor(np.ascontiguousarray(data_dict["targets"][k])) 193 | 194 | return data_dict 195 | 196 | @staticmethod 197 | def to_tensor(data): 198 | """Convert objects of various python types to :obj:`torch.Tensor`. 199 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, 200 | :class:`Sequence`, :class:`int` and :class:`float`. 201 | Args: 202 | data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to 203 | be converted. 204 | """ 205 | 206 | if isinstance(data, torch.Tensor): 207 | return data 208 | elif isinstance(data, np.ndarray): 209 | return torch.from_numpy(data) 210 | elif isinstance(data, Sequence) and not mmcv.is_str(data): 211 | return torch.tensor(data) 212 | elif isinstance(data, int): 213 | return torch.LongTensor([data]) 214 | elif isinstance(data, float): 215 | return torch.FloatTensor([data]) 216 | else: 217 | raise TypeError(f"type {type(data)} cannot be converted to tensor.") 218 | 219 | 220 | 221 | class Pad(object): 222 | """Pad the image & mask. 223 | 224 | There are two padding modes: (1) pad to a fixed size and (2) pad to the 225 | minimum size that is divisible by some number. 226 | Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", 227 | 228 | Args: 229 | size (tuple, optional): Fixed padding size. 230 | size_divisor (int, optional): The divisor of padded size. 231 | pad_val (float, optional): Padding value, 0 by default. 232 | """ 233 | 234 | def __init__(self, size_divisor=None, pad_val=0): 235 | self.size_divisor = size_divisor 236 | self.pad_val = pad_val 237 | # only one of size and size_divisor should be valid 238 | assert size_divisor is not None 239 | 240 | def __call__(self, data_dict): 241 | """Call function to pad images, masks, semantic segmentation maps. 242 | 243 | Args: 244 | data_dict (dict): Result dict from loading pipeline. 245 | 246 | Returns: 247 | dict: Updated result dict. 248 | """ 249 | padded_img = None 250 | padded_imgs = [] 251 | for img in data_dict["images"]: 252 | padded_img = self.im_pad_to_multiple(img, self.size_divisor, pad_val=self.pad_val) 253 | padded_imgs.append(padded_img) 254 | data_dict["images"] = padded_imgs 255 | data_dict["extra_infos"].update( 256 | { 257 | "pad_shape": padded_img.shape, 258 | "pad_size_divisor": self.size_divisor if self.size_divisor is not None else "None", 259 | } 260 | ) 261 | return data_dict 262 | 263 | def im_pad_to_multiple(self, img, divisor, pad_val=0): 264 | """Pad an image to ensure each edge to be multiple to some number. 265 | Args: 266 | img (ndarray): Image to be padded. 267 | divisor (int): Padded image edges will be multiple to divisor. 268 | pad_val (Number | Sequence[Number]): Same as :func:`impad`. 269 | Returns: 270 | ndarray: The padded image. 271 | """ 272 | pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor 273 | pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor 274 | return self.im_pad(img, shape=(pad_h, pad_w), pad_val=pad_val) 275 | -------------------------------------------------------------------------------- /mapmaster/engine/core.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import datetime 5 | import warnings 6 | import subprocess 7 | from mapmaster.engine.executor import Trainer, BeMapNetEvaluator 8 | from mapmaster.engine.environ import ShareFSUUIDNameServer, RlaunchReplicaEnv 9 | from mapmaster.engine.callbacks import CheckPointLoader, CheckPointSaver, ClearMLCallback, ProgressBar 10 | from mapmaster.engine.callbacks import TensorBoardMonitor, TextMonitor, ClipGrad 11 | from mapmaster.utils.env import collect_env_info, get_root_dir 12 | from mapmaster.utils.misc import setup_logger, sanitize_filename, PyDecorator, all_gather_object 13 | 14 | 15 | __all__ = ["BaseCli", "BeMapNetCli"] 16 | 17 | 18 | class BaseCli: 19 | """Command line tools for any exp.""" 20 | 21 | def __init__(self, Exp): 22 | """Make sure the order of initialization is: build_args --> build_env --> build_exp, 23 | since experiments depend on the environment and the environment depends on args. 24 | 25 | Args: 26 | Exp : experiment description class 27 | """ 28 | self.ExpCls = Exp 29 | self.args = self._get_parser(Exp).parse_args() 30 | self.env = RlaunchReplicaEnv(self.args.sync_bn, self.args.devices, self.args.find_unused_parameters) 31 | 32 | @property 33 | def exp(self): 34 | if not hasattr(self, "_exp"): 35 | exp = self.ExpCls( 36 | **{x if y is not None else "none": y for (x, y) in vars(self.args).items()}, 37 | total_devices=self.env.world_size(), 38 | ) 39 | self.exp_updated_cfg_msg = exp.update_attr(self.args.exp_options) 40 | self._exp = exp 41 | return self._exp 42 | 43 | def _get_parser(self, Exp): 44 | parser = argparse.ArgumentParser() 45 | parser = Exp.add_argparse_args(parser) 46 | parser = self.add_argparse_args(parser) 47 | return parser 48 | 49 | @staticmethod 50 | def add_argparse_args(parser: argparse.ArgumentParser): 51 | parser.add_argument("--eval", dest="eval", action="store_true", help="conduct evaluation only") 52 | parser.add_argument("-te", "--train_and_eval", dest="train_and_eval", action="store_true", help="train+eval") 53 | parser.add_argument("--find_unused_parameters", dest="find_unused_parameters", action="store_true") 54 | parser.add_argument("-d", "--devices", default="0-7", type=str, help="device for training") 55 | parser.add_argument("--ckpt", type=str, default=None, help="checkpoint to start from or be evaluated") 56 | parser.add_argument("--pretrained_model", type=str, default=None, help="pretrained_model used by training") 57 | parser.add_argument("--sync_bn", type=int, default=0, help="0-> disable sync_bn, 1-> whole world") 58 | clearml_parser = parser.add_mutually_exclusive_group(required=False) 59 | clearml_parser.add_argument("--clearml", dest="clearml", action="store_true", help="enabel clearml for train") 60 | clearml_parser.add_argument("--no-clearml", dest="clearml", action="store_false", help="disable clearml") 61 | parser.set_defaults(clearml=True) 62 | return parser 63 | 64 | def _get_exp_output_dir(self): 65 | exp_dir = os.path.join(os.path.join(get_root_dir(), "outputs"), sanitize_filename(self.exp.exp_name)) 66 | os.makedirs(exp_dir, exist_ok=True) 67 | output_dir = None 68 | if self.args.ckpt: 69 | output_dir = os.path.dirname(os.path.dirname(os.path.abspath(self.args.ckpt))) 70 | elif self.env.global_rank() == 0: 71 | output_dir = os.path.join(exp_dir, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")) 72 | os.makedirs(output_dir, exist_ok=True) 73 | # make a symlink "latest" 74 | symlink, symlink_tmp = os.path.join(exp_dir, "latest"), os.path.join(exp_dir, "latest_tmp") 75 | if os.path.exists(symlink_tmp): 76 | os.remove(symlink_tmp) 77 | os.symlink(os.path.relpath(output_dir, exp_dir), symlink_tmp) 78 | os.rename(symlink_tmp, symlink) 79 | output_dir = all_gather_object(output_dir)[0] 80 | return output_dir 81 | 82 | def get_evaluator(self, callbacks=None): 83 | exp = self.exp 84 | if self.args.ckpt is None: 85 | warnings.warn("No checkpoint is specified for evaluation") 86 | if exp.eval_executor_class is None: 87 | sys.exit("No evaluator is specified for evaluation") 88 | 89 | output_dir = self._get_exp_output_dir() 90 | logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="eval.log") 91 | self._set_basic_log_message(logger) 92 | if callbacks is None: 93 | callbacks = [self.env, CheckPointLoader(self.args.ckpt)] 94 | evaluator = exp.eval_executor_class(exp=exp, callbacks=callbacks, logger=logger) 95 | return evaluator 96 | 97 | def _set_basic_log_message(self, logger): 98 | logger.opt(ansi=True).info("Cli arguments:\n{}".format(self.args)) 99 | logger.info(f"exp_name: {self.exp.exp_name}") 100 | logger.opt(ansi=True).info( 101 | "Used experiment configs:\n{}".format(self.exp.get_cfg_as_str()) 102 | ) 103 | if self.exp_updated_cfg_msg: 104 | logger.opt(ansi=True).info( 105 | "List of override configs:\n{}".format(self.exp_updated_cfg_msg) 106 | ) 107 | logger.opt(ansi=True).info("Environment info:\n{}".format(collect_env_info())) 108 | 109 | def get_trainer(self, callbacks=None, evaluator=None): 110 | args = self.args 111 | exp = self.exp 112 | if evaluator is not None: 113 | output_dir = self.exp.output_dir 114 | else: 115 | output_dir = self._get_exp_output_dir() 116 | 117 | logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="train.log") 118 | self._set_basic_log_message(logger) 119 | 120 | if callbacks is None: 121 | callbacks = [ 122 | self.env, 123 | ProgressBar(logger=logger), 124 | TextMonitor(interval=exp.print_interval), 125 | TensorBoardMonitor(os.path.join(output_dir, "tensorboard"), interval=exp.print_interval), 126 | CheckPointSaver( 127 | local_path=os.path.join(output_dir, "dump_model"), 128 | remote_path=exp.ckpt_oss_save_dir, 129 | save_interval=exp.dump_interval, 130 | num_keep_latest=exp.num_keep_latest_ckpt, 131 | ), 132 | ] 133 | if "grad_clip_value" in exp.__dict__: 134 | callbacks.append(ClipGrad(exp.grad_clip_value)) 135 | if args.clearml: 136 | callbacks.append(ClearMLCallback()) 137 | if args.ckpt: 138 | callbacks.append(CheckPointLoader(args.ckpt)) 139 | if args.pretrained_model: 140 | callbacks.append(CheckPointLoader(args.pretrained_model, weight_only=True)) 141 | callbacks.extend(exp.callbacks) 142 | 143 | trainer = Trainer(exp=exp, callbacks=callbacks, logger=logger, evaluator=evaluator) 144 | return trainer 145 | 146 | def executor(self): 147 | if self.args.eval: 148 | self.get_evaluator().eval() 149 | elif self.args.train_and_eval: 150 | evaluator = self.get_evaluator(callbacks=[]) 151 | self.get_trainer(evaluator=evaluator).train() 152 | else: 153 | self.get_trainer().train() 154 | 155 | def dispatch(self, executor_func): 156 | is_master = self.env.global_rank() == 0 157 | with ShareFSUUIDNameServer(is_master) as ns: 158 | self.env.set_master_uri(ns) 159 | self.env.setup_nccl() 160 | if self.env.local_rank() == 0: 161 | command = sys.argv.copy() 162 | command[0] = os.path.abspath(command[0]) 163 | command = [sys.executable] + command 164 | for local_rank in range(1, self.env.nr_gpus): 165 | env_copy = os.environ.copy() 166 | env_copy["LOCAL_RANK"] = f"{local_rank}" 167 | subprocess.Popen(command, env=env_copy) 168 | self.env.init_dist() 169 | executor_func() 170 | 171 | def run(self): 172 | self.dispatch(self.executor) 173 | 174 | 175 | class MapMasterCli(BaseCli): 176 | @PyDecorator.overrides(BaseCli) 177 | def get_evaluator(self, callbacks=None): 178 | exp = self.exp 179 | 180 | output_dir = self._get_exp_output_dir() 181 | self.exp.output_dir = output_dir 182 | logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="eval.log") 183 | self._set_basic_log_message(logger) 184 | if callbacks is None: 185 | callbacks = [ 186 | self.env, 187 | CheckPointLoader(self.args.ckpt), 188 | ] 189 | 190 | evaluator = BeMapNetEvaluator(exp=exp, callbacks=callbacks, logger=logger) 191 | return evaluator 192 | -------------------------------------------------------------------------------- /mapmaster/engine/environ.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import uuid 4 | import torch 5 | import subprocess 6 | import numpy as np 7 | from torch import nn 8 | from loguru import logger 9 | import torch.distributed as dist 10 | from mapmaster.utils.env import get_root_dir 11 | from mapmaster.utils.misc import parse_devices 12 | from mapmaster.engine.callbacks import Callback 13 | 14 | 15 | __all__ = ["ShareFSUUIDNameServer", "RlaunchReplicaEnv"] 16 | output_root_dir = os.path.join(get_root_dir(), "outputs") 17 | 18 | 19 | class ShareFSUUIDNameServer: 20 | def __init__(self, is_master): 21 | self.exp_id = self._get_exp_id() 22 | self.is_master = is_master 23 | os.makedirs(os.path.dirname(self.filepath), exist_ok=True) 24 | 25 | def _get_exp_id(self): 26 | if "DET3D_EXPID" not in os.environ: 27 | if int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) == 1: 28 | return str(uuid.uuid4()) 29 | msg = """cannot find DET3D_EXPID in environ please use following 30 | command DET3D_EXPID=$(cat /proc/sys/kernel/random/uuid) rlaunch ... 31 | """ 32 | logger.error(msg) 33 | raise RuntimeError 34 | return str(os.environ["DET3D_EXPID"]) 35 | 36 | @property 37 | def filepath(self): 38 | return os.path.join(output_root_dir, f"master_ip_{self.exp_id}.txt") 39 | 40 | def __enter__(self): 41 | if self.is_master: 42 | self.set_master() 43 | return self 44 | 45 | def __exit__(self, exc_type, exc_value, exc_tb): 46 | if self.is_master: 47 | os.remove(self.filepath) 48 | 49 | def set_master(self): 50 | assert not os.path.exists(self.filepath) 51 | hostname = "Host" 52 | with open(self.filepath, "w") as f: 53 | f.write(hostname) 54 | 55 | def get_master(self): 56 | while True: 57 | if os.path.exists(self.filepath): 58 | with open(self.filepath, "r") as f: 59 | return f.read() 60 | else: 61 | time.sleep(5) 62 | 63 | 64 | class _DDPEnv(Callback): 65 | def __init__(self, sync_bn=0, devices=None, find_unused_parameters=False): 66 | if devices: 67 | devices = parse_devices(devices) 68 | os.environ["CUDA_VISIBLE_DEVICES"] = devices 69 | self.nr_gpus = torch.cuda.device_count() 70 | self.sync_bn = sync_bn 71 | self.find_unused_parameters = find_unused_parameters 72 | 73 | @staticmethod 74 | def setup_nccl(): 75 | ifname = filter(lambda x: x not in ("lo",), os.listdir("/sys/class/net/")) 76 | os.environ["NCCL_SOCKET_IFNAME"] = ",".join(ifname) 77 | os.environ["NCCL_IB_DISABLE"] = "1" 78 | 79 | # os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL" 80 | os.environ["NCCL_IB_HCA"] = subprocess.getoutput( 81 | "cd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; " 82 | "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null " 83 | "| grep v >/dev/null && echo $i ; done; > /dev/null" 84 | ) 85 | os.environ["NCCL_IB_GID_INDEX"] = "3" 86 | os.environ["NCCL_IB_TC"] = "106" 87 | 88 | def after_init(self, trainer): 89 | trainer.model.cuda() 90 | if int(self.sync_bn) > 1: 91 | ranks = np.arange(self.world_size()).reshape(-1, self.sync_bn) 92 | process_groups = [torch.distributed.new_group(list(pids)) for pids in ranks] 93 | trainer.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( 94 | trainer.model, process_groups[self.global_rank() // self.sync_bn] 95 | ) 96 | elif int(self.sync_bn) == 1: 97 | trainer.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(trainer.model) 98 | trainer.model = nn.parallel.DistributedDataParallel( 99 | trainer.model, device_ids=[self.local_rank()], find_unused_parameters=self.find_unused_parameters 100 | ) 101 | 102 | def cleanup(self): 103 | dist.destroy_process_group() 104 | 105 | def init_dist(self): 106 | torch.cuda.set_device(self.local_rank()) 107 | dist.init_process_group( 108 | backend="nccl", 109 | init_method=self._master_uri, 110 | rank=self.global_rank(), 111 | world_size=self.world_size(), 112 | ) 113 | dist.barrier() 114 | 115 | 116 | class RlaunchReplicaEnv(_DDPEnv): 117 | def __init__(self, sync_bn=0, devices=None, find_unused_parameters=False): 118 | super().__init__(sync_bn, devices, find_unused_parameters) 119 | 120 | def set_master_uri(self, ns): 121 | self._master_uri = f"tcp://{self.master_address(ns)}:{self.master_port()}" 122 | logger.info(self._master_uri) 123 | 124 | @staticmethod 125 | def is_brainpp_mm_env(): 126 | return int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) > 1 127 | 128 | def master_address(self, ns) -> str: 129 | if self.node_rank() == 0: 130 | root_node = "localhost" 131 | else: 132 | root_node = ns.get_master() 133 | os.environ["MASTER_ADDR"] = root_node 134 | return root_node 135 | 136 | def master_port(self) -> int: 137 | port = os.environ.get("MASTER_PORT", 12345) 138 | os.environ["MASTER_PORT"] = str(port) 139 | return int(port) 140 | 141 | def world_size(self) -> int: 142 | return int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) * int(self.nr_gpus) 143 | 144 | def global_rank(self) -> int: 145 | return int(self.nr_gpus) * self.node_rank() + self.local_rank() 146 | 147 | def local_rank(self) -> int: 148 | return int(os.environ.get("LOCAL_RANK", 0)) 149 | 150 | def node_rank(self) -> int: 151 | return int(os.environ.get("RLAUNCH_REPLICA", 0)) 152 | -------------------------------------------------------------------------------- /mapmaster/engine/executor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from tqdm import tqdm 4 | from typing import Sequence 5 | from mapmaster.engine.experiment import BaseExp 6 | from mapmaster.utils.misc import get_rank, synchronize 7 | 8 | 9 | __all__ = ["Callback", "BaseExecutor", "Trainer", "BeMapNetEvaluator"] 10 | 11 | 12 | class Callback: 13 | 14 | # callback enabled rank list 15 | # None means callback is always enabled 16 | enabled_rank = None 17 | 18 | def setup(self, executor): 19 | pass 20 | 21 | def load_checkpoint(self, executor): 22 | pass 23 | 24 | def after_init(self, executor): 25 | pass 26 | 27 | def before_train(self, executor): 28 | pass 29 | 30 | def before_epoch(self, executor, epoch: int): 31 | pass 32 | 33 | def before_step(self, executor, step, data_dict): 34 | pass 35 | 36 | def before_backward(self, executor): 37 | pass 38 | 39 | def before_optimize(self, executor): 40 | pass 41 | 42 | def after_step(self, executor, step, data_dict, *args, **kwargs): 43 | pass 44 | 45 | def after_epoch(self, executor, epoch: int, update_best_ckpt: bool = False): 46 | pass 47 | 48 | def after_train(self, executor): 49 | pass 50 | 51 | 52 | class BaseExecutor: 53 | def __init__(self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None) -> None: 54 | self.exp = exp 55 | self.logger = logger 56 | self.callbacks = callbacks 57 | self._invoke_callback("setup") 58 | 59 | self.epoch = 0 60 | self.global_step = 0 61 | self._invoke_callback("load_checkpoint") 62 | self._invoke_callback("after_init") 63 | 64 | @property 65 | def train_dataloader(self): 66 | return self.exp.train_dataloader 67 | 68 | @property 69 | def val_dataloader(self): 70 | return self.exp.val_dataloader 71 | 72 | @property 73 | def model(self): 74 | return self.exp.model 75 | 76 | @model.setter 77 | def model(self, value): 78 | self.exp.model = value 79 | 80 | @property 81 | def optimizer(self): 82 | return self.exp.optimizer 83 | 84 | @property 85 | def lr_scheduler(self): 86 | return self.exp.lr_scheduler 87 | 88 | def _invoke_callback(self, callback_name, *args, **kwargs): 89 | for cb in self.callbacks: 90 | if cb.enabled_rank is None or self.global_rank in cb.enabled_rank: 91 | func = getattr(cb, callback_name, None) 92 | if func: 93 | func(self, *args, **kwargs) 94 | 95 | @property 96 | def global_rank(self): 97 | return get_rank() 98 | 99 | 100 | class Trainer(BaseExecutor): 101 | def __init__( 102 | self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None, use_amp=False, evaluator=None 103 | ) -> None: 104 | super(Trainer, self).__init__(exp, callbacks, logger) 105 | self.use_amp = use_amp 106 | self.evaluator = evaluator 107 | if self.use_amp: 108 | self.grad_scaler = torch.cuda.amp.GradScaler() 109 | 110 | def train(self): 111 | self.train_iter = iter(self.train_dataloader) 112 | self._invoke_callback("before_train") 113 | self.model.cuda() 114 | self.model.train() 115 | self.optimizer_to(self.optimizer, next(self.model.parameters()).device) 116 | start_epoch = self.epoch 117 | for epoch in range(start_epoch, self.exp.max_epoch): 118 | self.epoch = epoch 119 | self.model.train() 120 | self.train_epoch(epoch) 121 | self._invoke_callback("after_train") 122 | 123 | def train_epoch(self, epoch): 124 | self._invoke_callback("before_epoch", epoch) 125 | sampler = self.train_dataloader.sampler 126 | if hasattr(sampler, "set_epoch"): 127 | sampler.set_epoch(epoch) 128 | for step in range(len(self.train_dataloader)): 129 | try: 130 | data = next(self.train_iter) 131 | except StopIteration: 132 | self.train_iter = iter(self.train_dataloader) 133 | data = next(self.train_iter) 134 | self.train_step(data, step) 135 | if self.evaluator is not None: 136 | self.evaluator.eval() 137 | self._invoke_callback("after_epoch", epoch, update_best_ckpt=False) 138 | 139 | def train_step(self, data, step): 140 | self._invoke_callback("before_step", step, data) 141 | self.lr_scheduler.step(self.global_step) 142 | self.model.train() 143 | self.optimizer.zero_grad() 144 | if not self.use_amp: 145 | ret = self.exp.training_step(data) 146 | else: 147 | with torch.cuda.amp.autocast(): 148 | ret = self.exp.training_step(data) 149 | if isinstance(ret, torch.Tensor): 150 | loss = ret 151 | ext_dict = None 152 | elif isinstance(ret, tuple): 153 | loss, ext_dict = ret 154 | ext_dict = {k: v.detach() if isinstance(v, torch.Tensor) else v for k, v in ext_dict.items()} 155 | else: 156 | raise TypeError 157 | self._invoke_callback("before_backward") 158 | if not self.use_amp: 159 | loss.backward() 160 | self._invoke_callback("before_optimize") 161 | self.optimizer.step() 162 | else: 163 | self.grad_scaler.scale(loss).backward() 164 | self.grad_scaler.unscale_(self.optimizer) # NOTE: grads are unscaled before "before_optimize" callbacks 165 | self._invoke_callback("before_optimize") 166 | self.grad_scaler.step(self.optimizer) 167 | self.grad_scaler.update() 168 | self._invoke_callback("after_step", step, data, loss=loss.detach(), extra=ext_dict) 169 | self.global_step += 1 170 | 171 | # refer to: https://github.com/pytorch/pytorch/issues/8741 172 | @staticmethod 173 | def optimizer_to(optim, device): 174 | for param in optim.state.values(): 175 | # Not sure there are any global tensors in the state dict 176 | if isinstance(param, torch.Tensor): 177 | param.data = param.data.to(device) 178 | if param._grad is not None: 179 | param._grad.data = param._grad.data.to(device) 180 | elif isinstance(param, dict): 181 | for subparam in param.values(): 182 | if isinstance(subparam, torch.Tensor): 183 | subparam.data = subparam.data.to(device) 184 | if subparam._grad is not None: 185 | subparam._grad.data = subparam._grad.data.to(device) 186 | 187 | 188 | class BeMapNetEvaluator(BaseExecutor): 189 | def __init__(self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None) -> None: 190 | super(BeMapNetEvaluator, self).__init__(exp, callbacks, logger) 191 | 192 | def eval(self, ckpt_name=None): 193 | 194 | exp = self.exp 195 | val_iter = iter(self.val_dataloader) 196 | 197 | self._invoke_callback("before_eval") 198 | 199 | if ckpt_name is not None: 200 | if get_rank() == 0: 201 | self.logger.info("Eval with best checkpoint!") 202 | path = os.path.join(exp.output_dir, 'dump_model', ckpt_name) 203 | checkpoint = torch.load(open(path, "rb"), map_location=torch.device("cpu")) 204 | self.model.load_state_dict(checkpoint["model_state"], strict=False) 205 | 206 | self.model.cuda() 207 | self.model.eval() 208 | 209 | for step in tqdm(range(len(self.val_dataloader))): 210 | batch_data = next(val_iter) 211 | with torch.no_grad(): 212 | exp.test_step(batch_data) 213 | self._invoke_callback("after_step", step, {}) 214 | 215 | synchronize() 216 | 217 | if get_rank() == 0: 218 | self.logger.info("Done with inference, start evaluation later!") 219 | gt_dir = exp.exp_config.map_conf['anno_root'] 220 | dt_dir = exp.evaluation_save_dir 221 | val_txts = exp.exp_config.VAL_TXT 222 | 223 | for val_txt in val_txts: 224 | ap_table = "".join(os.popen(f"python3 tools/evaluation/eval.py {gt_dir} {dt_dir} {val_txt}").readlines()) 225 | self.logger.info(" AP-Performance with HDMapNetAPI: \n" + val_txt + "\n" + ap_table) 226 | 227 | self._invoke_callback("after_eval") 228 | -------------------------------------------------------------------------------- /mapmaster/engine/experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import functools 5 | import numpy as np 6 | from torch.nn import Module 7 | from tabulate import tabulate 8 | from abc import ABCMeta, abstractmethod 9 | from mapmaster.utils.misc import DictAction 10 | 11 | 12 | class BaseExp(metaclass=ABCMeta): 13 | """Basic class for any experiment in Perceptron. 14 | 15 | Args: 16 | batch_size_per_device (int): 17 | batch_size of each device 18 | 19 | total_devices (int): 20 | number of devices to use 21 | 22 | max_epoch (int): 23 | total training epochs, the reason why we need to give max_epoch 24 | is that lr_scheduler may need to be adapted according to max_epoch 25 | """ 26 | 27 | def __init__(self, batch_size_per_device, total_devices, max_epoch): 28 | self._batch_size_per_device = batch_size_per_device 29 | self._max_epoch = max_epoch 30 | self._total_devices = total_devices 31 | # ----------------------------------------------- extra configure ------------------------- # 32 | self.seed = None 33 | self.exp_name = os.path.splitext(os.path.basename(sys.argv.copy()[0]))[0] # entrypoint filename as exp_name 34 | self.print_interval = 100 35 | self.dump_interval = 10 36 | self.eval_interval = 10 37 | self.num_keep_latest_ckpt = 10 38 | self.ckpt_oss_save_dir = None 39 | self.enable_tensorboard = False 40 | self.eval_executor_class = None 41 | 42 | @property 43 | def train_dataloader(self): 44 | if "_train_dataloader" not in self.__dict__: 45 | self._train_dataloader = self._configure_train_dataloader() 46 | return self._train_dataloader 47 | 48 | @property 49 | def val_dataloader(self): 50 | if "_val_dataloader" not in self.__dict__: 51 | self._val_dataloader = self._configure_val_dataloader() 52 | return self._val_dataloader 53 | 54 | @property 55 | def test_dataloader(self): 56 | if "_test_dataloader" not in self.__dict__: 57 | self._test_dataloader = self._configure_test_dataloader() 58 | return self._test_dataloader 59 | 60 | @property 61 | def model(self): 62 | if "_model" not in self.__dict__: 63 | self._model = self._configure_model() 64 | return self._model 65 | 66 | @model.setter 67 | def model(self, value): 68 | self._model = value 69 | 70 | @property 71 | def callbacks(self): 72 | if not hasattr(self, "_callbacks"): 73 | self._callbacks = self._configure_callbacks() 74 | return self._callbacks 75 | 76 | @property 77 | def optimizer(self): 78 | if "_optimizer" not in self.__dict__: 79 | self._optimizer = self._configure_optimizer() 80 | return self._optimizer 81 | 82 | @property 83 | def lr_scheduler(self): 84 | if "_lr_scheduler" not in self.__dict__: 85 | self._lr_scheduler = self._configure_lr_scheduler() 86 | return self._lr_scheduler 87 | 88 | @property 89 | def batch_size_per_device(self): 90 | return self._batch_size_per_device 91 | 92 | @property 93 | def max_epoch(self): 94 | return self._max_epoch 95 | 96 | @property 97 | def total_devices(self): 98 | return self._total_devices 99 | 100 | @abstractmethod 101 | def _configure_model(self) -> Module: 102 | pass 103 | 104 | @abstractmethod 105 | def _configure_train_dataloader(self): 106 | """""" 107 | 108 | def _configure_callbacks(self): 109 | return [] 110 | 111 | @abstractmethod 112 | def _configure_val_dataloader(self): 113 | """""" 114 | 115 | @abstractmethod 116 | def _configure_test_dataloader(self): 117 | """""" 118 | 119 | def training_step(self, *args, **kwargs): 120 | pass 121 | 122 | @abstractmethod 123 | def _configure_optimizer(self) -> torch.optim.Optimizer: 124 | pass 125 | 126 | @abstractmethod 127 | def _configure_lr_scheduler(self, **kwargs): 128 | pass 129 | 130 | def update_attr(self, options: dict) -> str: 131 | if options is None: 132 | return "" 133 | assert isinstance(options, dict) 134 | msg = "" 135 | for k, v in options.items(): 136 | if k in self.__dict__: 137 | old_v = self.__getattribute__(k) 138 | if not v == old_v: 139 | self.__setattr__(k, v) 140 | msg = "{}\n'{}' is overriden from '{}' to '{}'".format(msg, k, old_v, v) 141 | else: 142 | self.__setattr__(k, v) 143 | msg = "{}\n'{}' is set to '{}'".format(msg, k, v) 144 | 145 | # update exp_name 146 | exp_name_suffix = "-".join(sorted([f"{k}-{v}" for k, v in options.items()])) 147 | self.exp_name = f"{self.exp_name}--{exp_name_suffix}" 148 | return msg 149 | 150 | def get_cfg_as_str(self) -> str: 151 | config_table = [] 152 | for c, v in self.__dict__.items(): 153 | if not isinstance(v, (int, float, str, list, tuple, dict, np.ndarray)): 154 | if hasattr(v, "__name__"): 155 | v = v.__name__ 156 | elif hasattr(v, "__class__"): 157 | v = v.__class__ 158 | elif type(v) == functools.partial: 159 | v = v.func.__name__ 160 | if c[0] == "_": 161 | c = c[1:] 162 | config_table.append((str(c), str(v))) 163 | 164 | headers = ["config key", "value"] 165 | config_table = tabulate(config_table, headers, tablefmt="plain") 166 | return config_table 167 | 168 | def __str__(self): 169 | return self.get_cfg_as_str() 170 | 171 | def to_onnx(self): 172 | pass 173 | 174 | @classmethod 175 | def add_argparse_args(cls, parser): # pragma: no-cover 176 | parser.add_argument( 177 | "--exp_options", 178 | nargs="+", 179 | action=DictAction, 180 | help="override some settings in the exp, the key-value pair in xxx=yyy format will be merged into exp. " 181 | 'If the value to be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 182 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 183 | "Note that the quotation marks are necessary and that no white space is allowed.", 184 | ) 185 | parser.add_argument("-b", "--batch-size-per-device", type=int, default=None) 186 | parser.add_argument("-e", "--max-epoch", type=int, default=None) 187 | return parser 188 | -------------------------------------------------------------------------------- /mapmaster/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .network import MapMaster 2 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import ResNetBackbone, EfficientNetBackbone, SwinTRBackbone 2 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/bifpn/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import BiFPN 2 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/bifpn/utils.py: -------------------------------------------------------------------------------- 1 | # Author: Zylo117 2 | 3 | import math 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class Swish(nn.Module): 10 | def forward(self, x): 11 | return x * torch.sigmoid(x) 12 | 13 | 14 | class Conv2dStaticSamePadding(nn.Module): 15 | """ 16 | created by Zylo117 17 | The real keras/tensorflow conv2d with same padding 18 | """ 19 | 20 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True, groups=1, dilation=1, **kwargs): 21 | super().__init__() 22 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, bias=bias, groups=groups) 23 | self.stride = self.conv.stride 24 | self.kernel_size = self.conv.kernel_size 25 | self.dilation = self.conv.dilation 26 | 27 | if isinstance(self.stride, int): 28 | self.stride = [self.stride] * 2 29 | elif len(self.stride) == 1: 30 | self.stride = [self.stride[0]] * 2 31 | 32 | if isinstance(self.kernel_size, int): 33 | self.kernel_size = [self.kernel_size] * 2 34 | elif len(self.kernel_size) == 1: 35 | self.kernel_size = [self.kernel_size[0]] * 2 36 | 37 | def forward(self, x): 38 | h, w = x.shape[-2:] 39 | 40 | extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1] 41 | extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0] 42 | 43 | left = extra_h // 2 44 | right = extra_h - left 45 | top = extra_v // 2 46 | bottom = extra_v - top 47 | 48 | x = F.pad(x, [left, right, top, bottom]) 49 | 50 | x = self.conv(x) 51 | return x 52 | 53 | 54 | class MaxPool2dStaticSamePadding(nn.Module): 55 | """ 56 | created by Zylo117 57 | The real keras/tensorflow MaxPool2d with same padding 58 | """ 59 | 60 | def __init__(self, *args, **kwargs): 61 | super().__init__() 62 | self.pool = nn.MaxPool2d(*args, **kwargs) 63 | self.stride = self.pool.stride 64 | self.kernel_size = self.pool.kernel_size 65 | 66 | if isinstance(self.stride, int): 67 | self.stride = [self.stride] * 2 68 | elif len(self.stride) == 1: 69 | self.stride = [self.stride[0]] * 2 70 | 71 | if isinstance(self.kernel_size, int): 72 | self.kernel_size = [self.kernel_size] * 2 73 | elif len(self.kernel_size) == 1: 74 | self.kernel_size = [self.kernel_size[0]] * 2 75 | 76 | def forward(self, x): 77 | h, w = x.shape[-2:] 78 | 79 | extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1] 80 | extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0] 81 | 82 | left = extra_h // 2 83 | right = extra_h - left 84 | top = extra_v // 2 85 | bottom = extra_v - top 86 | 87 | x = F.pad(x, [left, right, top, bottom]) 88 | 89 | x = self.pool(x) 90 | return x 91 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/efficientnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import EfficientNet 2 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mapmaster.models.backbone.resnet import ResNet 5 | from mapmaster.models.backbone.efficientnet import EfficientNet 6 | from mapmaster.models.backbone.swin_transformer import SwinTransformer 7 | from mapmaster.models.backbone.bifpn import BiFPN 8 | 9 | 10 | class ResNetBackbone(nn.Module): 11 | def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1): 12 | super(ResNetBackbone, self).__init__() 13 | assert 0 < ret_layers < 4 14 | self.ret_layers = ret_layers 15 | self.bkb = ResNet(**bkb_kwargs) 16 | self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg) 17 | self.up_shape = None if up_shape is None else up_shape 18 | self.bkb.init_weights() 19 | 20 | def forward(self, inputs): 21 | images = inputs["images"] 22 | images = images.view(-1, *images.shape[-3:]) 23 | bkb_features = list(self.bkb(images)[-self.ret_layers:]) 24 | nek_features = self.fpn(bkb_features) if self.fpn is not None else None 25 | return {"im_bkb_features": bkb_features, "im_nek_features": nek_features} 26 | 27 | 28 | class EfficientNetBackbone(nn.Module): 29 | def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1): 30 | super(EfficientNetBackbone, self).__init__() 31 | assert 0 < ret_layers < 4 32 | self.ret_layers = ret_layers 33 | self.bkb = EfficientNet.from_pretrained(**bkb_kwargs) 34 | self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg) 35 | self.up_shape = None if up_shape is None else up_shape 36 | del self.bkb._conv_head 37 | del self.bkb._bn1 38 | del self.bkb._avg_pooling 39 | del self.bkb._dropout 40 | del self.bkb._fc 41 | 42 | def forward(self, inputs): 43 | images = inputs["images"] 44 | images = images.view(-1, *images.shape[-3:]) 45 | endpoints = self.bkb.extract_endpoints(images) 46 | bkb_features = [] 47 | for i, (key, value) in enumerate(endpoints.items()): 48 | if i > 0: 49 | bkb_features.append(value) 50 | bkb_features = list(bkb_features[-self.ret_layers:]) 51 | nek_features = self.fpn(bkb_features) if self.fpn is not None else None 52 | return {"im_bkb_features": bkb_features, "im_nek_features": nek_features} 53 | 54 | 55 | class SwinTRBackbone(nn.Module): 56 | def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1): 57 | super(SwinTRBackbone, self).__init__() 58 | assert 0 < ret_layers < 4 59 | self.ret_layers = ret_layers 60 | self.bkb = SwinTransformer(**bkb_kwargs) 61 | self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg) 62 | self.up_shape = None if up_shape is None else up_shape 63 | 64 | def forward(self, inputs): 65 | images = inputs["images"] 66 | images = images.view(-1, *images.shape[-3:]) 67 | bkb_features = list(self.bkb(images)[-self.ret_layers:]) 68 | nek_features = None 69 | if self.fpn is not None: 70 | nek_features = self.fpn(bkb_features) 71 | else: 72 | if self.up_shape is not None: 73 | nek_features = [torch.cat([self.up_sample(x, self.up_shape) for x in bkb_features], dim=1)] 74 | 75 | return {"im_bkb_features": bkb_features, "im_nek_features": nek_features} 76 | 77 | def up_sample(self, x, tgt_shape=None): 78 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape 79 | if tuple(x.shape[-2:]) == tuple(tgt_shape): 80 | return x 81 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True) 82 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet 2 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/resnet/utils.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import build_conv_layer, build_norm_layer 2 | from mmcv.runner import Sequential 3 | from torch import nn as nn 4 | 5 | 6 | class ResLayer(Sequential): 7 | """ResLayer to build ResNet style backbone. 8 | Args: 9 | block (nn.Module): block used to build ResLayer. 10 | inplanes (int): inplanes of block. 11 | planes (int): planes of block. 12 | num_blocks (int): number of blocks. 13 | stride (int): stride of the first block. Default: 1 14 | avg_down (bool): Use AvgPool instead of stride conv when 15 | downsampling in the bottleneck. Default: False 16 | conv_cfg (dict): dictionary to construct and config conv layer. 17 | Default: None 18 | norm_cfg (dict): dictionary to construct and config norm layer. 19 | Default: dict(type='BN') 20 | downsample_first (bool): Downsample at the first block or last block. 21 | False for Hourglass, True for ResNet. Default: True 22 | """ 23 | 24 | def __init__( 25 | self, 26 | block, 27 | inplanes, 28 | planes, 29 | num_blocks, 30 | stride=1, 31 | avg_down=False, 32 | conv_cfg=None, 33 | norm_cfg=dict(type="BN"), 34 | downsample_first=True, 35 | **kwargs 36 | ): 37 | self.block = block 38 | 39 | downsample = None 40 | if stride != 1 or inplanes != planes * block.expansion: 41 | downsample = [] 42 | conv_stride = stride 43 | if avg_down: 44 | conv_stride = 1 45 | downsample.append( 46 | nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False) 47 | ) 48 | downsample.extend( 49 | [ 50 | build_conv_layer( 51 | conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False 52 | ), 53 | build_norm_layer(norm_cfg, planes * block.expansion)[1], 54 | ] 55 | ) 56 | downsample = nn.Sequential(*downsample) 57 | 58 | layers = [] 59 | if downsample_first: 60 | layers.append( 61 | block( 62 | inplanes=inplanes, 63 | planes=planes, 64 | stride=stride, 65 | downsample=downsample, 66 | conv_cfg=conv_cfg, 67 | norm_cfg=norm_cfg, 68 | **kwargs 69 | ) 70 | ) 71 | inplanes = planes * block.expansion 72 | for _ in range(1, num_blocks): 73 | layers.append( 74 | block(inplanes=inplanes, planes=planes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs) 75 | ) 76 | 77 | else: # downsample_first=False is for HourglassModule 78 | for _ in range(num_blocks - 1): 79 | layers.append( 80 | block(inplanes=inplanes, planes=inplanes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs) 81 | ) 82 | layers.append( 83 | block( 84 | inplanes=inplanes, 85 | planes=planes, 86 | stride=stride, 87 | downsample=downsample, 88 | conv_cfg=conv_cfg, 89 | norm_cfg=norm_cfg, 90 | **kwargs 91 | ) 92 | ) 93 | super(ResLayer, self).__init__(*layers) 94 | -------------------------------------------------------------------------------- /mapmaster/models/backbone/swin_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from .model import SwinTransformer as _SwinTransformer 4 | from torch.utils import model_zoo 5 | 6 | model_urls = { 7 | "tiny": "https://github.com/SwinTransformer/storage/releases/download/v1.0.1/upernet_swin_tiny_patch4_window7_512x512.pth", 8 | "base": "https://github.com/SwinTransformer/storage/releases/download/v1.0.1/upernet_swin_base_patch4_window7_512x512.pth", 9 | } 10 | 11 | 12 | class SwinTransformer(_SwinTransformer): 13 | def __init__( 14 | self, 15 | arch="tiny", 16 | pretrained=False, 17 | window_size=7, 18 | shift_mode=1, 19 | mlp_ratio=4.0, 20 | qkv_bias=True, 21 | qk_scale=None, 22 | drop_rate=0.0, 23 | attn_drop_rate=0.0, 24 | drop_path_rate=0.3, 25 | ape=False, 26 | patch_norm=True, 27 | out_indices=(0, 1, 2, 3), 28 | use_checkpoint=False, 29 | **kwargs 30 | ): 31 | if arch == "tiny": 32 | embed_dim = 96 33 | depths = (2, 2, 6, 2) 34 | num_heads = (3, 6, 12, 24) 35 | elif arch == "small": 36 | embed_dim = 96 37 | depths = (2, 2, 18, 2) 38 | num_heads = (3, 6, 12, 24) 39 | elif arch == "base": 40 | embed_dim = 128 41 | depths = (2, 2, 18, 2) 42 | num_heads = (4, 8, 16, 32) 43 | else: 44 | raise NotImplementedError 45 | 46 | super(SwinTransformer, self).__init__( 47 | embed_dim=embed_dim, 48 | depths=depths, 49 | num_heads=num_heads, 50 | window_size=window_size, 51 | shift_mode=shift_mode, 52 | mlp_ratio=mlp_ratio, 53 | qkv_bias=qkv_bias, 54 | qk_scale=qk_scale, 55 | drop_rate=drop_rate, 56 | attn_drop_rate=attn_drop_rate, 57 | drop_path_rate=drop_path_rate, 58 | ape=ape, 59 | patch_norm=patch_norm, 60 | out_indices=out_indices, 61 | use_checkpoint=use_checkpoint, 62 | **kwargs 63 | ) 64 | if isinstance(pretrained, bool): 65 | assert pretrained is True 66 | print(model_urls[arch]) 67 | state_dict = model_zoo.load_url(model_urls[arch])["state_dict"] 68 | elif isinstance(pretrained, str): 69 | assert os.path.exists(pretrained) 70 | print(pretrained) 71 | state_dict = torch.load(pretrained)["state_dict"] 72 | else: 73 | raise NotImplementedError 74 | 75 | self.arch = arch 76 | self.init_weights(state_dict=state_dict) 77 | 78 | def init_weights(self, state_dict): 79 | new_state_dict = {} 80 | for key, value in state_dict.items(): 81 | if "backbone" in key: 82 | new_state_dict[key.replace("backbone.", "")] = value 83 | ret = self.load_state_dict(new_state_dict, strict=False) 84 | print("Backbone missing_keys: {}".format(ret.missing_keys)) 85 | print("Backbone unexpected_keys: {}".format(ret.unexpected_keys)) 86 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import TransformerBEVDecoder, DeformTransformerBEVEncoder 2 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_transformer import DeformTransformer 2 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import MSDeformAttn 2 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | from .ms_deform_attn_func import MSDeformAttnFunction 10 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/functions/ms_deform_attn_func.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import print_function 11 | from __future__ import division 12 | 13 | import torch 14 | import torch.nn.functional as F 15 | from torch.autograd import Function 16 | from torch.autograd.function import once_differentiable 17 | 18 | import MultiScaleDeformableAttention as MSDA 19 | 20 | 21 | class MSDeformAttnFunction(Function): 22 | @staticmethod 23 | def forward( 24 | ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step 25 | ): 26 | ctx.im2col_step = im2col_step 27 | output = MSDA.ms_deform_attn_forward( 28 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step 29 | ) 30 | ctx.save_for_backward( 31 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights 32 | ) 33 | return output 34 | 35 | @staticmethod 36 | @once_differentiable 37 | def backward(ctx, grad_output): 38 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors 39 | grad_value, grad_sampling_loc, grad_attn_weight = MSDA.ms_deform_attn_backward( 40 | value, 41 | value_spatial_shapes, 42 | value_level_start_index, 43 | sampling_locations, 44 | attention_weights, 45 | grad_output, 46 | ctx.im2col_step, 47 | ) 48 | 49 | return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None 50 | 51 | 52 | def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): 53 | # for debug and test only, 54 | # need to use cuda version instead 55 | N_, S_, M_, D_ = value.shape 56 | _, Lq_, M_, L_, P_, _ = sampling_locations.shape 57 | value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) 58 | sampling_grids = 2 * sampling_locations - 1 59 | sampling_value_list = [] 60 | for lid_, (H_, W_) in enumerate(value_spatial_shapes): 61 | # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ 62 | value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_ * M_, D_, H_, W_) 63 | # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 64 | sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) 65 | # N_*M_, D_, Lq_, P_ 66 | sampling_value_l_ = F.grid_sample( 67 | value_l_, sampling_grid_l_, mode="bilinear", padding_mode="zeros", align_corners=False 68 | ) 69 | sampling_value_list.append(sampling_value_l_) 70 | # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) 71 | attention_weights = attention_weights.transpose(1, 2).reshape(N_ * M_, 1, Lq_, L_ * P_) 72 | output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_ * D_, Lq_) 73 | return output.transpose(1, 2).contiguous() 74 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # ------------------------------------------------------------------------------------------------ 3 | # Deformable DETR 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------------------------------ 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | # ------------------------------------------------------------------------------------------------ 9 | 10 | python setup.py build install 11 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | from .ms_deform_attn import MSDeformAttn 10 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/modules/ms_deform_attn.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import print_function 11 | from __future__ import division 12 | 13 | import warnings 14 | import math 15 | 16 | import torch 17 | from torch import nn 18 | import torch.nn.functional as F 19 | from torch.nn.init import xavier_uniform_, constant_ 20 | 21 | from ..functions import MSDeformAttnFunction 22 | 23 | 24 | def _is_power_of_2(n): 25 | if (not isinstance(n, int)) or (n < 0): 26 | raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) 27 | return (n & (n - 1) == 0) and n != 0 28 | 29 | 30 | class MSDeformAttn(nn.Module): 31 | def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): 32 | """ 33 | Multi-Scale Deformable Attention Module 34 | :param d_model hidden dimension 35 | :param n_levels number of feature levels 36 | :param n_heads number of attention heads 37 | :param n_points number of sampling points per attention head per feature level 38 | """ 39 | super().__init__() 40 | if d_model % n_heads != 0: 41 | raise ValueError("d_model must be divisible by n_heads, but got {} and {}".format(d_model, n_heads)) 42 | _d_per_head = d_model // n_heads 43 | # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation 44 | if not _is_power_of_2(_d_per_head): 45 | warnings.warn( 46 | "You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " 47 | "which is more efficient in our CUDA implementation." 48 | ) 49 | 50 | self.im2col_step = 64 51 | 52 | self.d_model = d_model 53 | self.n_levels = n_levels 54 | self.n_heads = n_heads 55 | self.n_points = n_points 56 | 57 | self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2) 58 | self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) 59 | self.value_proj = nn.Linear(d_model, d_model) 60 | self.output_proj = nn.Linear(d_model, d_model) 61 | 62 | self._reset_parameters() 63 | 64 | def _reset_parameters(self): 65 | constant_(self.sampling_offsets.weight.data, 0.0) 66 | thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) 67 | grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) 68 | grid_init = ( 69 | (grid_init / grid_init.abs().max(-1, keepdim=True)[0]) 70 | .view(self.n_heads, 1, 1, 2) 71 | .repeat(1, self.n_levels, self.n_points, 1) 72 | ) 73 | for i in range(self.n_points): 74 | grid_init[:, :, i, :] *= i + 1 75 | with torch.no_grad(): 76 | self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) 77 | constant_(self.attention_weights.weight.data, 0.0) 78 | constant_(self.attention_weights.bias.data, 0.0) 79 | xavier_uniform_(self.value_proj.weight.data) 80 | constant_(self.value_proj.bias.data, 0.0) 81 | xavier_uniform_(self.output_proj.weight.data) 82 | constant_(self.output_proj.bias.data, 0.0) 83 | 84 | def forward( 85 | self, 86 | query, 87 | reference_points, 88 | input_flatten, 89 | input_spatial_shapes, 90 | input_level_start_index, 91 | input_padding_mask=None, 92 | ): 93 | """ 94 | :param query (N, Length_{query}, C) 95 | :param reference_points (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area 96 | or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes 97 | :param input_flatten (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C) 98 | :param input_spatial_shapes (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})] 99 | :param input_level_start_index (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}] 100 | :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements 101 | 102 | :return output (N, Length_{query}, C) 103 | """ 104 | N, Len_q, _ = query.shape 105 | N, Len_in, _ = input_flatten.shape 106 | assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in 107 | 108 | value = self.value_proj(input_flatten) 109 | if input_padding_mask is not None: 110 | value = value.masked_fill(input_padding_mask[..., None], float(0)) 111 | value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) 112 | sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2) 113 | attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) 114 | attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) 115 | # N, Len_q, n_heads, n_levels, n_points, 2 116 | if reference_points.shape[-1] == 2: 117 | offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1) 118 | sampling_locations = ( 119 | reference_points[:, :, None, :, None, :] 120 | + sampling_offsets / offset_normalizer[None, None, None, :, None, :] 121 | ) 122 | elif reference_points.shape[-1] == 4: 123 | sampling_locations = ( 124 | reference_points[:, :, None, :, None, :2] 125 | + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5 126 | ) 127 | else: 128 | raise ValueError( 129 | "Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1]) 130 | ) 131 | output = MSDeformAttnFunction.apply( 132 | value, 133 | input_spatial_shapes, 134 | input_level_start_index, 135 | sampling_locations, 136 | attention_weights, 137 | self.im2col_step, 138 | ) 139 | output = self.output_proj(output) 140 | return output 141 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/setup.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | import os 10 | import glob 11 | 12 | import torch 13 | 14 | from torch.utils.cpp_extension import CUDA_HOME 15 | from torch.utils.cpp_extension import CppExtension 16 | from torch.utils.cpp_extension import CUDAExtension 17 | 18 | from setuptools import find_packages 19 | from setuptools import setup 20 | 21 | requirements = ["torch", "torchvision"] 22 | 23 | 24 | def get_extensions(): 25 | this_dir = os.path.dirname(os.path.abspath(__file__)) 26 | extensions_dir = os.path.join(this_dir, "src") 27 | 28 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 29 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 30 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 31 | 32 | sources = main_file + source_cpu 33 | extension = CppExtension 34 | extra_compile_args = {"cxx": []} 35 | define_macros = [] 36 | 37 | if torch.cuda.is_available() and CUDA_HOME is not None: 38 | extension = CUDAExtension 39 | sources += source_cuda 40 | define_macros += [("WITH_CUDA", None)] 41 | extra_compile_args["nvcc"] = [ 42 | "-DCUDA_HAS_FP16=1", 43 | "-D__CUDA_NO_HALF_OPERATORS__", 44 | "-D__CUDA_NO_HALF_CONVERSIONS__", 45 | "-D__CUDA_NO_HALF2_OPERATORS__", 46 | "-arch=sm_60", 47 | "-gencode=arch=compute_60,code=sm_60", 48 | "-gencode=arch=compute_61,code=sm_61", 49 | "-gencode=arch=compute_70,code=sm_70", 50 | "-gencode=arch=compute_75,code=sm_75", 51 | # "-gencode=arch=compute_80,code=sm_80", 52 | ] 53 | else: 54 | raise NotImplementedError("Cuda is not availabel") 55 | 56 | sources = [os.path.join(extensions_dir, s) for s in sources] 57 | include_dirs = [extensions_dir] 58 | ext_modules = [ 59 | extension( 60 | "MultiScaleDeformableAttention", 61 | sources, 62 | include_dirs=include_dirs, 63 | define_macros=define_macros, 64 | extra_compile_args=extra_compile_args, 65 | ) 66 | ] 67 | return ext_modules 68 | 69 | 70 | setup( 71 | name="MultiScaleDeformableAttention", 72 | version="1.0", 73 | author="Weijie Su", 74 | url="https://github.com/fundamentalvision/Deformable-DETR", 75 | description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention", 76 | packages=find_packages( 77 | exclude=( 78 | "configs", 79 | "tests", 80 | ) 81 | ), 82 | ext_modules=get_extensions(), 83 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 84 | ) 85 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/src/cpu/ms_deform_attn_cpu.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | 17 | at::Tensor 18 | ms_deform_attn_cpu_forward( 19 | const at::Tensor &value, 20 | const at::Tensor &spatial_shapes, 21 | const at::Tensor &level_start_index, 22 | const at::Tensor &sampling_loc, 23 | const at::Tensor &attn_weight, 24 | const int im2col_step) 25 | { 26 | AT_ERROR("Not implement on cpu"); 27 | } 28 | 29 | std::vector 30 | ms_deform_attn_cpu_backward( 31 | const at::Tensor &value, 32 | const at::Tensor &spatial_shapes, 33 | const at::Tensor &level_start_index, 34 | const at::Tensor &sampling_loc, 35 | const at::Tensor &attn_weight, 36 | const at::Tensor &grad_output, 37 | const int im2col_step) 38 | { 39 | AT_ERROR("Not implement on cpu"); 40 | } 41 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/src/cpu/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | at::Tensor 15 | ms_deform_attn_cpu_forward( 16 | const at::Tensor &value, 17 | const at::Tensor &spatial_shapes, 18 | const at::Tensor &level_start_index, 19 | const at::Tensor &sampling_loc, 20 | const at::Tensor &attn_weight, 21 | const int im2col_step); 22 | 23 | std::vector 24 | ms_deform_attn_cpu_backward( 25 | const at::Tensor &value, 26 | const at::Tensor &spatial_shapes, 27 | const at::Tensor &level_start_index, 28 | const at::Tensor &sampling_loc, 29 | const at::Tensor &attn_weight, 30 | const at::Tensor &grad_output, 31 | const int im2col_step); 32 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/src/cuda/ms_deform_attn_cuda.cu: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include 12 | #include "cuda/ms_deform_im2col_cuda.cuh" 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | 20 | at::Tensor ms_deform_attn_cuda_forward( 21 | const at::Tensor &value, 22 | const at::Tensor &spatial_shapes, 23 | const at::Tensor &level_start_index, 24 | const at::Tensor &sampling_loc, 25 | const at::Tensor &attn_weight, 26 | const int im2col_step) 27 | { 28 | AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); 29 | AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); 30 | AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); 31 | AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); 32 | AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); 33 | 34 | AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); 35 | AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); 36 | AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); 37 | AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); 38 | AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); 39 | 40 | const int batch = value.size(0); 41 | const int spatial_size = value.size(1); 42 | const int num_heads = value.size(2); 43 | const int channels = value.size(3); 44 | 45 | const int num_levels = spatial_shapes.size(0); 46 | 47 | const int num_query = sampling_loc.size(1); 48 | const int num_point = sampling_loc.size(4); 49 | 50 | const int im2col_step_ = std::min(batch, im2col_step); 51 | 52 | AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); 53 | 54 | auto output = at::zeros({batch, num_query, num_heads, channels}, value.options()); 55 | 56 | const int batch_n = im2col_step_; 57 | auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); 58 | auto per_value_size = spatial_size * num_heads * channels; 59 | auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; 60 | auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; 61 | for (int n = 0; n < batch/im2col_step_; ++n) 62 | { 63 | auto columns = output_n.select(0, n); 64 | AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] { 65 | ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(), 66 | value.data() + n * im2col_step_ * per_value_size, 67 | spatial_shapes.data(), 68 | level_start_index.data(), 69 | sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, 70 | attn_weight.data() + n * im2col_step_ * per_attn_weight_size, 71 | batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, 72 | columns.data()); 73 | 74 | })); 75 | } 76 | 77 | output = output.view({batch, num_query, num_heads*channels}); 78 | 79 | return output; 80 | } 81 | 82 | 83 | std::vector ms_deform_attn_cuda_backward( 84 | const at::Tensor &value, 85 | const at::Tensor &spatial_shapes, 86 | const at::Tensor &level_start_index, 87 | const at::Tensor &sampling_loc, 88 | const at::Tensor &attn_weight, 89 | const at::Tensor &grad_output, 90 | const int im2col_step) 91 | { 92 | 93 | AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); 94 | AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); 95 | AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); 96 | AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); 97 | AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); 98 | AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous"); 99 | 100 | AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); 101 | AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); 102 | AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); 103 | AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); 104 | AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); 105 | AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor"); 106 | 107 | const int batch = value.size(0); 108 | const int spatial_size = value.size(1); 109 | const int num_heads = value.size(2); 110 | const int channels = value.size(3); 111 | 112 | const int num_levels = spatial_shapes.size(0); 113 | 114 | const int num_query = sampling_loc.size(1); 115 | const int num_point = sampling_loc.size(4); 116 | 117 | const int im2col_step_ = std::min(batch, im2col_step); 118 | 119 | AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); 120 | 121 | auto grad_value = at::zeros_like(value); 122 | auto grad_sampling_loc = at::zeros_like(sampling_loc); 123 | auto grad_attn_weight = at::zeros_like(attn_weight); 124 | 125 | const int batch_n = im2col_step_; 126 | auto per_value_size = spatial_size * num_heads * channels; 127 | auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; 128 | auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; 129 | auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); 130 | 131 | for (int n = 0; n < batch/im2col_step_; ++n) 132 | { 133 | auto grad_output_g = grad_output_n.select(0, n); 134 | AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] { 135 | ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(), 136 | grad_output_g.data(), 137 | value.data() + n * im2col_step_ * per_value_size, 138 | spatial_shapes.data(), 139 | level_start_index.data(), 140 | sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, 141 | attn_weight.data() + n * im2col_step_ * per_attn_weight_size, 142 | batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, 143 | grad_value.data() + n * im2col_step_ * per_value_size, 144 | grad_sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, 145 | grad_attn_weight.data() + n * im2col_step_ * per_attn_weight_size); 146 | 147 | })); 148 | } 149 | 150 | return { 151 | grad_value, grad_sampling_loc, grad_attn_weight 152 | }; 153 | } 154 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/src/cuda/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | at::Tensor ms_deform_attn_cuda_forward( 15 | const at::Tensor &value, 16 | const at::Tensor &spatial_shapes, 17 | const at::Tensor &level_start_index, 18 | const at::Tensor &sampling_loc, 19 | const at::Tensor &attn_weight, 20 | const int im2col_step); 21 | 22 | std::vector ms_deform_attn_cuda_backward( 23 | const at::Tensor &value, 24 | const at::Tensor &spatial_shapes, 25 | const at::Tensor &level_start_index, 26 | const at::Tensor &sampling_loc, 27 | const at::Tensor &attn_weight, 28 | const at::Tensor &grad_output, 29 | const int im2col_step); 30 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/src/ms_deform_attn.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | 13 | #include "cpu/ms_deform_attn_cpu.h" 14 | 15 | #ifdef WITH_CUDA 16 | #include "cuda/ms_deform_attn_cuda.h" 17 | #endif 18 | 19 | 20 | at::Tensor 21 | ms_deform_attn_forward( 22 | const at::Tensor &value, 23 | const at::Tensor &spatial_shapes, 24 | const at::Tensor &level_start_index, 25 | const at::Tensor &sampling_loc, 26 | const at::Tensor &attn_weight, 27 | const int im2col_step) 28 | { 29 | if (value.type().is_cuda()) 30 | { 31 | #ifdef WITH_CUDA 32 | return ms_deform_attn_cuda_forward( 33 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); 34 | #else 35 | AT_ERROR("Not compiled with GPU support"); 36 | #endif 37 | } 38 | AT_ERROR("Not implemented on the CPU"); 39 | } 40 | 41 | std::vector 42 | ms_deform_attn_backward( 43 | const at::Tensor &value, 44 | const at::Tensor &spatial_shapes, 45 | const at::Tensor &level_start_index, 46 | const at::Tensor &sampling_loc, 47 | const at::Tensor &attn_weight, 48 | const at::Tensor &grad_output, 49 | const int im2col_step) 50 | { 51 | if (value.type().is_cuda()) 52 | { 53 | #ifdef WITH_CUDA 54 | return ms_deform_attn_cuda_backward( 55 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); 56 | #else 57 | AT_ERROR("Not compiled with GPU support"); 58 | #endif 59 | } 60 | AT_ERROR("Not implemented on the CPU"); 61 | } 62 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include "ms_deform_attn.h" 12 | 13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 14 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 15 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 16 | } 17 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/ops/test.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import print_function 11 | from __future__ import division 12 | 13 | import torch 14 | from torch.autograd import gradcheck 15 | 16 | from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch 17 | 18 | 19 | N, M, D = 1, 2, 2 20 | Lq, L, P = 2, 2, 2 21 | shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() 22 | level_start_index = torch.cat((shapes.new_zeros((1,)), shapes.prod(1).cumsum(0)[:-1])) 23 | S = sum([(H * W).item() for H, W in shapes]) 24 | 25 | 26 | torch.manual_seed(3) 27 | 28 | 29 | @torch.no_grad() 30 | def check_forward_equal_with_pytorch_double(): 31 | value = torch.rand(N, S, M, D).cuda() * 0.01 32 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() 33 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 34 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) 35 | im2col_step = 2 36 | output_pytorch = ( 37 | ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()) 38 | .detach() 39 | .cpu() 40 | ) 41 | output_cuda = ( 42 | MSDeformAttnFunction.apply( 43 | value.double(), 44 | shapes, 45 | level_start_index, 46 | sampling_locations.double(), 47 | attention_weights.double(), 48 | im2col_step, 49 | ) 50 | .detach() 51 | .cpu() 52 | ) 53 | fwdok = torch.allclose(output_cuda, output_pytorch) 54 | max_abs_err = (output_cuda - output_pytorch).abs().max() 55 | max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() 56 | 57 | print( 58 | f"* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}" 59 | ) 60 | 61 | 62 | @torch.no_grad() 63 | def check_forward_equal_with_pytorch_float(): 64 | value = torch.rand(N, S, M, D).cuda() * 0.01 65 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() 66 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 67 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) 68 | im2col_step = 2 69 | output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu() 70 | output_cuda = ( 71 | MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step) 72 | .detach() 73 | .cpu() 74 | ) 75 | fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3) 76 | max_abs_err = (output_cuda - output_pytorch).abs().max() 77 | max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() 78 | 79 | print( 80 | f"* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}" 81 | ) 82 | 83 | 84 | def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True): 85 | 86 | value = torch.rand(N, S, M, channels).cuda() * 0.01 87 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() 88 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 89 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) 90 | im2col_step = 2 91 | func = MSDeformAttnFunction.apply 92 | 93 | value.requires_grad = grad_value 94 | sampling_locations.requires_grad = grad_sampling_loc 95 | attention_weights.requires_grad = grad_attn_weight 96 | 97 | gradok = gradcheck( 98 | func, 99 | ( 100 | value.double(), 101 | shapes, 102 | level_start_index, 103 | sampling_locations.double(), 104 | attention_weights.double(), 105 | im2col_step, 106 | ), 107 | ) 108 | 109 | print(f"* {gradok} check_gradient_numerical(D={channels})") 110 | 111 | 112 | if __name__ == "__main__": 113 | check_forward_equal_with_pytorch_double() 114 | check_forward_equal_with_pytorch_float() 115 | 116 | for channels in [30, 32, 64, 71, 1025, 2048, 3096]: 117 | check_gradient_numerical(channels, True, True, True) 118 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/deform_transformer/position_encoding.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various positional encodings for the transformer. 3 | """ 4 | import math 5 | import torch 6 | from torch import nn 7 | 8 | class PositionEmbeddingSine(nn.Module): 9 | """ 10 | This is a more standard version of the position embedding, very similar to the one 11 | used by the Attention is all you need paper, generalized to work on images. 12 | """ 13 | 14 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=True, scale=None): 15 | super().__init__() 16 | self.num_pos_feats = num_pos_feats 17 | self.temperature = temperature 18 | self.normalize = normalize 19 | if scale is not None and normalize is False: 20 | raise ValueError("normalize should be True if scale is passed") 21 | if scale is None: 22 | scale = 2 * math.pi 23 | self.scale = scale 24 | 25 | def forward(self, mask): 26 | assert mask is not None 27 | not_mask = ~mask 28 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 29 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 30 | if self.normalize: 31 | eps = 1e-6 32 | y_embed = (y_embed - 0.5) / (y_embed[:, -1:, :] + eps) * self.scale 33 | x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale 34 | 35 | dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=mask.device) 36 | dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2)) 37 | 38 | pos_x = x_embed[:, :, :, None] / dim_t 39 | pos_y = y_embed[:, :, :, None] / dim_t 40 | pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3) 41 | pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3) 42 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) 43 | return pos 44 | 45 | class PositionEmbeddingLearned(nn.Module): 46 | """ 47 | Absolute pos embedding, learned. 48 | """ 49 | 50 | def __init__(self, num_pos=(50, 50), num_pos_feats=256): 51 | super().__init__() 52 | self.num_pos = num_pos 53 | self.pos_embed = nn.Embedding(num_pos[0] * num_pos[1], num_pos_feats) 54 | self.reset_parameters() 55 | 56 | def reset_parameters(self): 57 | nn.init.normal_(self.pos_embed.weight) 58 | 59 | def forward(self, mask): 60 | h, w = mask.shape[-2:] 61 | pos = self.pos_embed.weight.view(*self.num_pos, -1)[:h, :w] 62 | pos = pos.permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1) 63 | return pos 64 | -------------------------------------------------------------------------------- /mapmaster/models/bev_decoder/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn 4 | from mapmaster.models.bev_decoder.transformer import Transformer 5 | from mapmaster.models.bev_decoder.deform_transformer import DeformTransformer 6 | 7 | class TransformerBEVDecoder(nn.Module): 8 | def __init__(self, key='im_bkb_features', **kwargs): 9 | super(TransformerBEVDecoder, self).__init__() 10 | self.bev_encoder = Transformer(**kwargs) 11 | self.key = key 12 | 13 | def forward(self, inputs): 14 | assert self.key in inputs 15 | feats = inputs[self.key] 16 | fuse_feats = feats[-1] 17 | fuse_feats = fuse_feats.reshape(*inputs['images'].shape[:2], *fuse_feats.shape[-3:]) 18 | fuse_feats = torch.cat(torch.unbind(fuse_feats, dim=1), dim=-1) 19 | 20 | cameras_info = { 21 | 'extrinsic': inputs.get('extrinsic', None), 22 | 'intrinsic': inputs.get('intrinsic', None), 23 | 'ida_mats': inputs.get('ida_mats', None), 24 | 'do_flip': inputs['extra_infos'].get('do_flip', None) 25 | } 26 | 27 | _, _, bev_feats = self.bev_encoder(fuse_feats, cameras_info=cameras_info) 28 | 29 | return {"bev_enc_features": list(bev_feats)} 30 | 31 | class DeformTransformerBEVEncoder(nn.Module): 32 | def __init__(self, **kwargs): 33 | super(DeformTransformerBEVEncoder, self).__init__() 34 | self.bev_encoder = DeformTransformer(**kwargs) 35 | 36 | def forward(self, inputs): 37 | assert "im_bkb_features" in inputs 38 | feats = inputs["im_bkb_features"] 39 | for i in range(len(feats)): 40 | feats[i] = feats[i].reshape(*inputs["images"].shape[:2], *feats[i].shape[-3:]) 41 | feats[i] = feats[i].permute(0, 2, 3, 1, 4) 42 | feats[i] = feats[i].reshape(*feats[i].shape[:3], -1) 43 | cameras_info = { 44 | 'extrinsic': inputs.get('extrinsic', None), 45 | 'intrinsic': inputs.get('intrinsic', None), 46 | 'do_flip': inputs['extra_infos'].get('do_flip', None) 47 | } 48 | # src_feats: (N, H1 * W1, C) tgt_feats: # (M, N, H2 * W2, C) 49 | _, _, bev_feats = self.bev_encoder(feats, cameras_info=cameras_info) 50 | 51 | return { 52 | "bev_enc_features": list(bev_feats), 53 | } 54 | -------------------------------------------------------------------------------- /mapmaster/models/ins_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import Mask2formerINSDecoder, PointMask2formerINSDecoder 2 | -------------------------------------------------------------------------------- /mapmaster/models/ins_decoder/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mapmaster.models.ins_decoder.mask2former import MultiScaleMaskedTransformerDecoder 4 | from mapmaster.models.ins_decoder.pointmask2former import PointMask2TransformerDecoder 5 | 6 | 7 | class INSDecoderBase(nn.Module): 8 | def __init__(self, decoder_ids=(5, ), tgt_shape=None): 9 | super(INSDecoderBase, self).__init__() 10 | self.decoder_ids = tuple(decoder_ids) # [0, 1, 2, 3, 4, 5] 11 | self.tgt_shape = tgt_shape 12 | self.bev_decoder = None 13 | 14 | def forward(self, inputs): 15 | assert "bev_enc_features" in inputs 16 | bev_enc_features = inputs["bev_enc_features"] 17 | if self.tgt_shape is not None: 18 | bev_enc_features = [self.up_sample(x) for x in inputs["bev_enc_features"]] 19 | out = self.bev_decoder(bev_enc_features[-1:], bev_enc_features[-1]) 20 | return {"mask_features": [out["pred_masks"][1:][i] for i in self.decoder_ids], 21 | "obj_scores": [out["pred_logits"][1:][i] for i in self.decoder_ids], 22 | "decoder_outputs": [out["decoder_outputs"][1:][i] for i in self.decoder_ids], 23 | "bev_enc_features": bev_enc_features} 24 | 25 | def up_sample(self, x, tgt_shape=None): 26 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape 27 | if tuple(x.shape[-2:]) == tuple(tgt_shape): 28 | return x 29 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True) 30 | 31 | class Mask2formerINSDecoder(INSDecoderBase): 32 | def __init__(self, decoder_ids=(5, ), tgt_shape=None, **kwargs): 33 | super(Mask2formerINSDecoder, self).__init__(decoder_ids, tgt_shape) 34 | self.bev_decoder = MultiScaleMaskedTransformerDecoder(**kwargs) 35 | 36 | class PointMask2formerINSDecoder(INSDecoderBase): 37 | def __init__(self, decoder_ids=(5, ), tgt_shape=None, **kwargs): 38 | super(PointMask2formerINSDecoder, self).__init__(decoder_ids, tgt_shape) 39 | self.bev_decoder = PointMask2TransformerDecoder(**kwargs) 40 | -------------------------------------------------------------------------------- /mapmaster/models/network.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mapmaster.models import backbone, bev_decoder, ins_decoder, output_head 3 | # os.environ['TORCH_DISTRIBUTED_DEBUG'] = "INFO" 4 | # warnings.filterwarnings('ignore') 5 | 6 | 7 | class MapMaster(nn.Module): 8 | def __init__(self, model_config, *args, **kwargs): 9 | super(MapMaster, self).__init__() 10 | self.im_backbone = self.create_backbone(**model_config["im_backbone"]) 11 | self.bev_decoder = self.create_bev_decoder(**model_config["bev_decoder"]) 12 | self.ins_decoder = self.create_ins_decoder(**model_config["ins_decoder"]) 13 | self.output_head = self.create_output_head(**model_config["output_head"]) 14 | self.post_processor = self.create_post_processor(**model_config["post_processor"]) 15 | 16 | def forward(self, inputs): 17 | outputs = {} 18 | outputs.update({k: inputs[k] for k in ["images", "extra_infos"]}) 19 | outputs.update({k: inputs[k].float() for k in ["extrinsic", "intrinsic"]}) 20 | if "ida_mats" in inputs: 21 | outputs.update({"ida_mats": inputs["ida_mats"].float()}) 22 | outputs.update(self.im_backbone(outputs)) 23 | outputs.update(self.bev_decoder(outputs)) 24 | outputs.update(self.ins_decoder(outputs)) 25 | outputs.update(self.output_head(outputs)) 26 | return outputs 27 | 28 | @staticmethod 29 | def create_backbone(arch_name, ret_layers, bkb_kwargs, fpn_kwargs, up_shape=None): 30 | __factory_dict__ = { 31 | "resnet": backbone.ResNetBackbone, 32 | "efficient_net": backbone.EfficientNetBackbone, 33 | "swin_transformer": backbone.SwinTRBackbone, 34 | } 35 | return __factory_dict__[arch_name](bkb_kwargs, fpn_kwargs, up_shape, ret_layers) 36 | 37 | @staticmethod 38 | def create_bev_decoder(arch_name, net_kwargs): 39 | __factory_dict__ = { 40 | "transformer": bev_decoder.TransformerBEVDecoder, 41 | "ipm_deformable_transformer": bev_decoder.DeformTransformerBEVEncoder, 42 | } 43 | return __factory_dict__[arch_name](**net_kwargs) 44 | 45 | @staticmethod 46 | def create_ins_decoder(arch_name, net_kwargs): 47 | __factory_dict__ = { 48 | "mask2former": ins_decoder.Mask2formerINSDecoder, 49 | "line_aware_decoder": ins_decoder.PointMask2formerINSDecoder, 50 | } 51 | 52 | return __factory_dict__[arch_name](**net_kwargs) 53 | 54 | @staticmethod 55 | def create_output_head(arch_name, net_kwargs): 56 | __factory_dict__ = { 57 | "bezier_output_head": output_head.PiecewiseBezierMapOutputHead, 58 | "pivot_point_predictor": output_head.PivotMapOutputHead, 59 | } 60 | return __factory_dict__[arch_name](**net_kwargs) 61 | 62 | @staticmethod 63 | def create_post_processor(arch_name, net_kwargs): 64 | __factory_dict__ = { 65 | "bezier_post_processor": output_head.PiecewiseBezierMapPostProcessor, 66 | "pivot_post_processor": output_head.PivotMapPostProcessor, 67 | } 68 | return __factory_dict__[arch_name](**net_kwargs) 69 | -------------------------------------------------------------------------------- /mapmaster/models/output_head/__init__.py: -------------------------------------------------------------------------------- 1 | from .bezier_outputs import PiecewiseBezierMapOutputHead 2 | from .bezier_post_processor import PiecewiseBezierMapPostProcessor 3 | from .pivot_outputs import PivotMapOutputHead 4 | from .pivot_post_processor import PivotMapPostProcessor -------------------------------------------------------------------------------- /mapmaster/models/output_head/bezier_outputs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class FFN(nn.Module): 7 | """ Very simple multi-layer perceptron (also called FFN)""" 8 | 9 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, basic_type='linear'): 10 | super().__init__() 11 | self.basic_type = basic_type 12 | if output_dim == 0: 13 | self.basic_type = "identity" 14 | self.num_layers = num_layers 15 | h = [hidden_dim] * (num_layers - 1) 16 | self.layers = nn.ModuleList(self.basic_layer(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) 17 | 18 | def forward(self, x): 19 | for i, layer in enumerate(self.layers): 20 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) 21 | return x 22 | 23 | def basic_layer(self, n, k): 24 | if self.basic_type == 'linear': 25 | return nn.Linear(n, k) 26 | elif self.basic_type == 'conv': 27 | return nn.Conv2d(n, k, kernel_size=1, stride=1) 28 | elif self.basic_type == 'identity': 29 | return nn.Identity() 30 | else: 31 | raise NotImplementedError 32 | 33 | 34 | class PiecewiseBezierMapOutputHead(nn.Module): 35 | def __init__(self, in_channel, num_queries, tgt_shape, num_degree, max_pieces, bev_channels=-1, ins_channel=64): 36 | super(PiecewiseBezierMapOutputHead, self).__init__() 37 | self.num_queries = num_queries 38 | self.num_classes = len(num_queries) 39 | self.tgt_shape = tgt_shape 40 | self.bev_channels = bev_channels 41 | self.semantic_heads = None 42 | if self.bev_channels > 0: 43 | self.semantic_heads = nn.ModuleList( 44 | nn.Sequential(nn.Conv2d(bev_channels, 2, kernel_size=1, stride=1)) for _ in range(self.num_classes) 45 | ) 46 | self.num_degree = num_degree 47 | self.max_pieces = max_pieces 48 | self.num_ctr_im = [(n + 1) for n in self.max_pieces] 49 | self.num_ctr_ex = [n * (d - 1) for n, d in zip(self.max_pieces, self.num_degree)] 50 | _N = self.num_classes 51 | 52 | _C = ins_channel 53 | self.im_ctr_heads = nn.ModuleList(FFN(in_channel, 256, (self.num_ctr_im[i] * 2) * _C, 3) for i in range(_N)) 54 | self.ex_ctr_heads = nn.ModuleList(FFN(in_channel, 256, (self.num_ctr_ex[i] * 2) * _C, 3) for i in range(_N)) 55 | self.npiece_heads = nn.ModuleList(FFN(in_channel, 256, self.max_pieces[i], 3) for i in range(_N)) 56 | self.gap_layer = nn.AdaptiveAvgPool2d((1, 1)) 57 | self.coords = self.compute_locations(device='cuda') 58 | self.coords_head = FFN(2, 256, _C, 3, 'conv') 59 | 60 | def forward(self, inputs): 61 | num_decoders = len(inputs["mask_features"]) 62 | dt_obj_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 63 | dt_ins_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 64 | im_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 65 | ex_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 66 | dt_end_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 67 | coords_feats = self.coords_head.forward(self.coords.repeat((inputs["mask_features"][0].shape[0], 1, 1, 1))) 68 | for i in range(num_decoders): 69 | x_ins_cw = inputs["mask_features"][i].split(self.num_queries, dim=1) 70 | x_obj_cw = inputs["obj_scores"][i].split(self.num_queries, dim=1) 71 | x_qry_cw = inputs["decoder_outputs"][i].split(self.num_queries, dim=1) 72 | batch_size = x_qry_cw[0].shape[0] 73 | for j in range(self.num_classes): 74 | num_qry = self.num_queries[j] 75 | # if self.training: 76 | dt_ins_masks[i][j] = self.up_sample(x_ins_cw[j]) 77 | dt_obj_logit[i][j] = x_obj_cw[j] 78 | dt_end_logit[i][j] = self.npiece_heads[j](x_qry_cw[j]) 79 | # im 80 | im_feats = self.im_ctr_heads[j](x_qry_cw[j]) 81 | im_feats = im_feats.reshape(batch_size, num_qry, self.num_ctr_im[j] * 2, -1).flatten(1, 2) 82 | im_coords_map = torch.einsum("bqc,bchw->bqhw", im_feats, coords_feats) 83 | im_coords = self.gap_layer(im_coords_map) 84 | im_ctr_coord[i][j] = im_coords.reshape(batch_size, num_qry, self.max_pieces[j] + 1, 2) 85 | # ex 86 | if self.num_ctr_ex[j] == 0: 87 | ex_ctr_coord[i][j] = torch.zeros(batch_size, num_qry, self.max_pieces[j], 0, 2).cuda() 88 | else: 89 | ex_feats = self.ex_ctr_heads[j](x_qry_cw[j]) 90 | ex_feats = ex_feats.reshape(batch_size, num_qry, self.num_ctr_ex[j] * 2, -1).flatten(1, 2) 91 | ex_coords_map = torch.einsum("bqc,bchw->bqhw", ex_feats, coords_feats) 92 | ex_coords = self.gap_layer(ex_coords_map) 93 | ex_ctr_coord[i][j] = ex_coords.reshape(batch_size, num_qry, self.max_pieces[j], self.num_degree[j] - 1, 2) 94 | ret = {"outputs": {"obj_logits": dt_obj_logit, "ins_masks": dt_ins_masks, 95 | "ctr_im": im_ctr_coord, "ctr_ex": ex_ctr_coord, "end_logits": dt_end_logit}} 96 | if self.semantic_heads is not None: 97 | num_decoders = len(inputs["bev_enc_features"]) 98 | dt_sem_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 99 | for i in range(num_decoders): 100 | x_sem = inputs["bev_enc_features"][i] 101 | for j in range(self.num_classes): 102 | dt_sem_masks[i][j] = self.up_sample(self.semantic_heads[j](x_sem)) 103 | ret["outputs"].update({"sem_masks": dt_sem_masks}) 104 | return ret 105 | 106 | def up_sample(self, x, tgt_shape=None): 107 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape 108 | if tuple(x.shape[-2:]) == tuple(tgt_shape): 109 | return x 110 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True) 111 | 112 | def compute_locations(self, stride=1, device='cpu'): 113 | 114 | fh, fw = self.tgt_shape 115 | 116 | shifts_x = torch.arange(0, fw * stride, step=stride, dtype=torch.float32, device=device) 117 | shifts_y = torch.arange(0, fh * stride, step=stride, dtype=torch.float32, device=device) 118 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) 119 | shift_x = shift_x.reshape(-1) 120 | shift_y = shift_y.reshape(-1) 121 | locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2 122 | 123 | locations = locations.unsqueeze(0).permute(0, 2, 1).contiguous().float().view(1, 2, fh, fw) 124 | locations[:, 0, :, :] /= fw 125 | locations[:, 1, :, :] /= fh 126 | 127 | return locations 128 | -------------------------------------------------------------------------------- /mapmaster/models/output_head/line_matching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def seq_matching_dist_parallel(cost, gt_lens, coe_endpts=0): 4 | # Time complexity: O(m*n) 5 | bs, m, n = cost.shape 6 | assert m <= n 7 | min_cost = np.ones((bs, m, n)) * np.inf 8 | mem_sort_value = np.ones((bs, m, n)) * np.inf # v[i][j] = np.min(min_cost[i][:j+1]) 9 | 10 | # initialization 11 | for j in range(0, n): 12 | if j == 0: 13 | min_cost[:, 0, j] = cost[:, 0, j] 14 | mem_sort_value[:, 0, j] = min_cost[:, 0, 0] 15 | 16 | for i in range(1, m): 17 | for j in range(i, n): 18 | min_cost[:, i, j] = mem_sort_value[:, i-1, j-1] + cost[:, i, j] 19 | indexes = (min_cost[:, i, j] < mem_sort_value[:, i, j-1]) 20 | indexes_inv = np.array(1-indexes, dtype=np.bool) 21 | mem_sort_value[indexes, i, j] = min_cost[indexes, i, j] 22 | mem_sort_value[indexes_inv, i, j] = mem_sort_value[indexes_inv, i, j-1] 23 | 24 | indexes = [] 25 | for i, ll in enumerate(gt_lens): 26 | indexes.append([i, ll-1, n-1]) 27 | indexes = np.array(indexes) 28 | xs, ys, zs = indexes[:, 0], indexes[:, 1], indexes[:, 2] 29 | res_cost = min_cost[xs, ys, zs] + (cost[xs, 0, 0] + cost[xs, ys, zs]) * coe_endpts 30 | return res_cost / (indexes[:, 1]+1+coe_endpts*2) 31 | 32 | def pivot_dynamic_matching(cost: np.array): 33 | # Time complexity: O(m*n) 34 | m, n = cost.shape 35 | assert m <= n 36 | 37 | min_cost = np.ones((m, n)) * np.inf 38 | mem_sort_value = np.ones((m, n)) * np.inf 39 | match_res1 = [[] for _ in range(n)] 40 | match_res2 = [[] for _ in range(n)] 41 | 42 | # initialization 43 | for j in range(0, n-m+1): 44 | match_res1[j] = [0] 45 | mem_sort_value[0][j] = cost[0][0] 46 | if j == 0: 47 | min_cost[0][j] = cost[0][0] 48 | 49 | for i in range(1, m): 50 | for j in range(i, n-m + i+1): 51 | min_cost[i][j] = mem_sort_value[i-1][j-1] + cost[i][j] 52 | if min_cost[i][j] < mem_sort_value[i][j-1]: 53 | mem_sort_value[i][j] = min_cost[i][j] 54 | if i < m-1: 55 | match_res2[j] = match_res1[j-1] + [j] 56 | else: 57 | mem_sort_value[i][j] = mem_sort_value[i][j-1] 58 | if i < m -1: 59 | match_res2[j] = match_res2[j-1] 60 | if i < m-1: 61 | match_res1, match_res2 = match_res2.copy(), [[] for _ in range(n)] 62 | 63 | total_cost = min_cost[-1][-1] 64 | final_match_res = match_res1[-2] + [n-1] 65 | return total_cost, final_match_res -------------------------------------------------------------------------------- /mapmaster/models/output_head/pivot_outputs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class FFN(nn.Module): 7 | """ Very simple multi-layer perceptron (also called FFN)""" 8 | 9 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, basic_type='linear'): 10 | super().__init__() 11 | self.basic_type = basic_type 12 | self.num_layers = num_layers 13 | h = [hidden_dim] * (num_layers - 1) 14 | self.layers = nn.ModuleList(self.basic_layer(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) 15 | 16 | def forward(self, x): 17 | for i, layer in enumerate(self.layers): 18 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) 19 | return x 20 | 21 | def basic_layer(self, n, k): 22 | if self.basic_type == 'linear': 23 | return nn.Linear(n, k) 24 | elif self.basic_type == 'conv': 25 | return nn.Conv2d(n, k, kernel_size=1, stride=1) 26 | else: 27 | raise NotImplementedError 28 | 29 | class PivotMapOutputHead(nn.Module): 30 | def __init__(self, in_channel, num_queries, tgt_shape, max_pieces, bev_channels=-1, ins_channel=64): 31 | super(PivotMapOutputHead, self).__init__() 32 | self.num_queries = num_queries 33 | self.num_classes = len(num_queries) 34 | self.tgt_shape = tgt_shape 35 | self.bev_channels = bev_channels 36 | self.semantic_heads = None 37 | if self.bev_channels > 0: 38 | self.semantic_heads = nn.ModuleList( 39 | nn.Sequential(nn.Conv2d(bev_channels, 2, kernel_size=1, stride=1)) for _ in range(self.num_classes) 40 | ) 41 | 42 | self.max_pieces = max_pieces # [10, 2, 30] 43 | self.pts_split = [num_queries[i]*max_pieces[i] for i in range(len(num_queries))] 44 | _N = self.num_classes 45 | _C = ins_channel 46 | self.im_ctr_heads = nn.ModuleList(FFN(in_channel, 256, 2 * _C, 3) for _ in range(_N)) 47 | self.pts_cls_heads = nn.ModuleList(FFN((_C)*2, _C*2, 2, 3) for i in range(_N)) 48 | self.gap_layer = nn.AdaptiveAvgPool2d((1, 1)) 49 | self.coords = self.compute_locations(device='cuda') # (1, 2, h, w) 50 | self.coords_head = FFN(2, 256, _C, 3, 'conv') 51 | 52 | def forward(self, inputs): 53 | num_decoders = len(inputs["mask_features"]) 54 | dt_obj_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 55 | dt_ins_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 56 | im_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 57 | dt_pivots_logits = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 58 | coords_feats = self.coords_head.forward(self.coords.repeat((inputs["mask_features"][0].shape[0], 1, 1, 1))) 59 | 60 | for i in range(num_decoders): 61 | x_ins_cw = inputs["mask_features"][i].split(self.num_queries, dim=1) 62 | x_obj_cw = inputs["obj_scores"][i].split(self.num_queries, dim=1) 63 | x_qry_cw = inputs["decoder_outputs"][i].split(self.pts_split, dim=1) # [(b, 200, c), (b, 50, c), (b, 450, c)] 64 | batch_size = x_qry_cw[0].shape[0] 65 | for j in range(self.num_classes): 66 | dt_ins_masks[i][j] = self.up_sample(x_ins_cw[j]) # (B, P, H, W) 67 | dt_obj_logit[i][j] = x_obj_cw[j] # (B, P, 2) 68 | # im 69 | num_qry, n_pts = self.num_queries[j], self.max_pieces[j] 70 | im_feats = self.im_ctr_heads[j](x_qry_cw[j]) # (bs, n_q * n_pts, 2*c) 71 | im_feats_tmp = im_feats.reshape(batch_size, num_qry*n_pts*2, -1) # (bs, n_q*n_pts*2, c) 72 | im_coords_map = torch.einsum("bqc,bchw->bqhw", im_feats_tmp, coords_feats) # [bs, n_q*n_pts*2, h, w] 73 | im_coords = self.gap_layer(im_coords_map) # [bs, n_q * n_pts] 74 | im_coords = im_coords.reshape(batch_size, num_qry, self.max_pieces[j], 2).sigmoid() 75 | im_ctr_coord[i][j] = im_coords 76 | 77 | pt_feats = im_feats.reshape(batch_size, num_qry, self.max_pieces[j], -1).flatten(1, 2) # [bs, n_q * n_pts, 2*C] 78 | pt_logits = self.pts_cls_heads[j](pt_feats) 79 | dt_pivots_logits[i][j] = pt_logits.reshape(batch_size, num_qry, self.max_pieces[j], 2) 80 | 81 | ret = {"outputs": {"obj_logits": dt_obj_logit, "ins_masks": dt_ins_masks, 82 | "ctr_im": im_ctr_coord, "pts_logits": dt_pivots_logits}} 83 | 84 | if self.semantic_heads is not None: 85 | num_decoders = len(inputs["bev_enc_features"]) 86 | dt_sem_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)] 87 | for i in range(num_decoders): 88 | x_sem = inputs["bev_enc_features"][i] 89 | for j in range(self.num_classes): 90 | dt_sem_masks[i][j] = self.up_sample(self.semantic_heads[j](x_sem)) # (B, P, 2, H, W) 91 | ret["outputs"].update({"sem_masks": dt_sem_masks}) 92 | return ret 93 | 94 | def up_sample(self, x, tgt_shape=None): 95 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape 96 | if tuple(x.shape[-2:]) == tuple(tgt_shape): 97 | return x 98 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True) 99 | 100 | def compute_locations(self, stride=1, device='cpu'): 101 | 102 | fh, fw = self.tgt_shape 103 | 104 | shifts_x = torch.arange(0, fw * stride, step=stride, dtype=torch.float32, device=device) 105 | shifts_y = torch.arange(0, fh * stride, step=stride, dtype=torch.float32, device=device) 106 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) 107 | shift_x = shift_x.reshape(-1) 108 | shift_y = shift_y.reshape(-1) 109 | locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2 110 | 111 | locations = locations.unsqueeze(0).permute(0, 2, 1).contiguous().float().view(1, 2, fh, fw) 112 | locations[:, 0, :, :] /= fw 113 | locations[:, 1, :, :] /= fh 114 | 115 | return locations 116 | -------------------------------------------------------------------------------- /mapmaster/models/utils/mask_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from detectron2.projects.point_rend.point_features import point_sample 6 | from detectron2.projects.point_rend.point_features import get_uncertain_point_coords_with_randomness 7 | 8 | 9 | class SegmentationLoss(nn.Module): 10 | 11 | def __init__(self, ce_weight, dice_weight, use_point_render=False, num_points=8000, oversample=3.0, importance=0.75): 12 | super(SegmentationLoss, self).__init__() 13 | self.ce_weight = ce_weight 14 | self.dice_weight = dice_weight 15 | self.use_point_render = use_point_render 16 | self.num_points = num_points 17 | self.oversample = oversample 18 | self.importance = importance 19 | 20 | def forward(self, dt_masks, gt_masks, stage="loss"): 21 | loss = 0 22 | if self.use_point_render: 23 | dt_masks, gt_masks = self.points_render(dt_masks, gt_masks, stage) 24 | if self.ce_weight > 0: 25 | loss += self.ce_weight * self.forward_sigmoid_ce_loss(dt_masks, gt_masks) 26 | if self.dice_weight > 0: 27 | loss += self.dice_weight * self.forward_dice_loss(dt_masks, gt_masks) 28 | return loss 29 | 30 | @staticmethod 31 | def forward_dice_loss(inputs, targets): 32 | inputs = inputs.sigmoid() 33 | inputs = inputs.flatten(1) 34 | targets = targets.flatten(1) 35 | numerator = 2 * (inputs * targets).sum(-1) 36 | denominator = inputs.sum(-1) + targets.sum(-1) 37 | loss = 1 - (numerator + 1) / (denominator + 1) 38 | return loss 39 | 40 | @staticmethod 41 | def forward_sigmoid_ce_loss(inputs, targets): 42 | inputs = inputs.flatten(1) 43 | targets = targets.flatten(1) 44 | loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") 45 | return loss.mean(1) 46 | 47 | def points_render(self, src_masks, tgt_masks, stage): 48 | assert stage in ["loss", "matcher"] 49 | assert src_masks.shape == tgt_masks.shape 50 | 51 | src_masks = src_masks[:, None] 52 | tgt_masks = tgt_masks[:, None] 53 | 54 | if stage == "matcher": 55 | point_coords = torch.rand(1, self.num_points, 2, device=src_masks.device) 56 | point_coords_src = point_coords.repeat(src_masks.shape[0], 1, 1) 57 | point_coords_tgt = point_coords.repeat(tgt_masks.shape[0], 1, 1) 58 | else: 59 | point_coords = get_uncertain_point_coords_with_randomness( 60 | src_masks, 61 | lambda logits: self.calculate_uncertainty(logits), 62 | self.num_points, 63 | self.oversample, 64 | self.importance, 65 | ) 66 | point_coords_src = point_coords.clone() 67 | point_coords_tgt = point_coords.clone() 68 | 69 | src_masks = point_sample(src_masks, point_coords_src, align_corners=False).squeeze(1) 70 | tgt_masks = point_sample(tgt_masks, point_coords_tgt, align_corners=False).squeeze(1) 71 | 72 | return src_masks, tgt_masks 73 | 74 | @staticmethod 75 | def calculate_uncertainty(logits): 76 | """ 77 | We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the 78 | foreground class in `classes`. 79 | Args: 80 | logits (Tensor): A tensor of shape (R, 1, ...) for class-specific or 81 | class-agnostic, where R is the total number of predicted masks in all images and C is 82 | the number of foreground classes. The values are logits. 83 | Returns: 84 | scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with 85 | the most uncertain locations having the highest uncertainty score. 86 | """ 87 | assert logits.shape[1] == 1 88 | gt_class_logits = logits.clone() 89 | return -(torch.abs(gt_class_logits)) 90 | -------------------------------------------------------------------------------- /mapmaster/models/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import warnings 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | 7 | 8 | def c2_xavier_fill(module: nn.Module) -> None: 9 | """ 10 | Initialize `module.weight` using the "XavierFill" implemented in Caffe2. 11 | Also initializes `module.bias` to 0. 12 | Args: 13 | module (torch.nn.Module): module to initialize. 14 | """ 15 | # Caffe2 implementation of XavierFill in fact 16 | # corresponds to kaiming_uniform_ in PyTorch 17 | # pyre-fixme[6]: For 1st param expected `Tensor` but got `Union[Module, Tensor]`. 18 | nn.init.kaiming_uniform_(module.weight, a=1) 19 | if module.bias is not None: 20 | # pyre-fixme[6]: Expected `Tensor` for 1st param but got `Union[nn.Module, 21 | # torch.Tensor]`. 22 | nn.init.constant_(module.bias, 0) 23 | 24 | 25 | class Conv2d(torch.nn.Conv2d): 26 | """ 27 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. 28 | """ 29 | 30 | def __init__(self, *args, **kwargs): 31 | """ 32 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: 33 | Args: 34 | norm (nn.Module, optional): a normalization layer 35 | activation (callable(Tensor) -> Tensor): a callable activation function 36 | It assumes that norm layer is used before activation. 37 | """ 38 | norm = kwargs.pop("norm", None) 39 | activation = kwargs.pop("activation", None) 40 | super().__init__(*args, **kwargs) 41 | 42 | self.norm = norm 43 | self.activation = activation 44 | 45 | def forward(self, x): 46 | # torchscript does not support SyncBatchNorm yet 47 | # https://github.com/pytorch/pytorch/issues/40507 48 | # and we skip these codes in torchscript since: 49 | # 1. currently we only support torchscript in evaluation mode 50 | # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or 51 | # later version, `Conv2d` in these PyTorch versions has already supported empty inputs. 52 | if not torch.jit.is_scripting(): 53 | with warnings.catch_warnings(record=True): 54 | if x.numel() == 0 and self.training: 55 | # https://github.com/pytorch/pytorch/issues/12013 56 | assert not isinstance( 57 | self.norm, torch.nn.SyncBatchNorm 58 | ), "SyncBatchNorm does not support empty inputs!" 59 | 60 | x = F.conv2d( 61 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups 62 | ) 63 | if self.norm is not None: 64 | x = self.norm(x) 65 | if self.activation is not None: 66 | x = self.activation(x) 67 | return x 68 | 69 | 70 | def get_activation_fn(activation): 71 | """Return an activation function given a string""" 72 | if activation == "relu": 73 | return F.relu 74 | if activation == "gelu": 75 | return F.gelu 76 | if activation == "glu": 77 | return F.glu 78 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.") 79 | -------------------------------------------------------------------------------- /mapmaster/models/utils/position_encoding.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various positional encodings for the transformer. 3 | """ 4 | import math 5 | import torch 6 | from torch import nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class PositionEmbeddingSine(nn.Module): 11 | """ 12 | This is a more standard version of the position embedding, very similar to the one 13 | used by the Attention is all you need paper, generalized to work on images. 14 | """ 15 | 16 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=True, scale=None): 17 | super().__init__() 18 | self.num_pos_feats = num_pos_feats 19 | self.temperature = temperature 20 | self.normalize = normalize 21 | if scale is not None and normalize is False: 22 | raise ValueError("normalize should be True if scale is passed") 23 | if scale is None: 24 | scale = 2 * math.pi 25 | self.scale = scale 26 | 27 | def forward(self, mask): 28 | assert mask is not None 29 | not_mask = ~mask 30 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 31 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 32 | if self.normalize: 33 | eps = 1e-6 34 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 35 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 36 | 37 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=mask.device) 38 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 39 | 40 | pos_x = x_embed[:, :, :, None] / dim_t 41 | pos_y = y_embed[:, :, :, None] / dim_t 42 | pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3) 43 | pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3) 44 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) 45 | return pos 46 | 47 | 48 | class PositionEmbeddingLearned(nn.Module): 49 | """ 50 | Absolute pos embedding, learned. 51 | """ 52 | 53 | def __init__(self, num_pos=(50, 50), num_pos_feats=256): 54 | super().__init__() 55 | self.num_pos = num_pos 56 | self.pos_embed = nn.Embedding(num_pos[0] * num_pos[1], num_pos_feats) 57 | self.reset_parameters() 58 | 59 | def reset_parameters(self): 60 | nn.init.normal_(self.pos_embed.weight) 61 | 62 | def forward(self, mask): 63 | h, w = mask.shape[-2:] 64 | pos = self.pos_embed.weight.view(*self.num_pos, -1)[:h, :w] 65 | pos = pos.permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1) 66 | return pos 67 | 68 | 69 | class PositionEmbeddingIPM(nn.Module): 70 | 71 | def __init__(self, 72 | encoder=None, 73 | num_pos=(16, 168), 74 | input_shape=(512, 896), 75 | num_pos_feats=64, 76 | sine_encoding=False, 77 | temperature=10000): 78 | super().__init__() 79 | 80 | h, w_expand = num_pos 81 | self.current_shape = (h, w_expand // 6) 82 | self.input_shape = input_shape 83 | 84 | self.num_pos_feats = num_pos_feats 85 | self.temperature = temperature 86 | self.encoder = encoder 87 | self.sine_encoding = sine_encoding 88 | 89 | def get_embedding(self, extrinsic, intrinsic, ida_mats): 90 | """ 91 | Get the BeV Coordinate for Image 92 | 93 | Return 94 | xy_world_coord (N, H, W, 2) Ego x, y coordinate 95 | Valid (N, H, W, 1) -- Valid Points or Not 1 -- valid; 0 -- invalid 96 | """ 97 | # extrinsic -> (B, M, 4, 4) 98 | device, b, n = extrinsic.device, extrinsic.shape[0], extrinsic.shape[1] 99 | 100 | x = torch.linspace(0, self.input_shape[1] - 1, self.current_shape[1], dtype=torch.float) 101 | y = torch.linspace(0, self.input_shape[0] - 1, self.current_shape[0], dtype=torch.float) 102 | y_grid, x_grid = torch.meshgrid(y, x) 103 | z = torch.ones(self.current_shape) 104 | feat_coords = torch.stack([x_grid, y_grid, z], dim=-1).to(device) # (H, W, 3) 105 | feat_coords = feat_coords.unsqueeze(0).repeat(n, 1, 1, 1).unsqueeze(0).repeat(b, 1, 1, 1, 1) # (B, N, H, W, 3) 106 | 107 | ida_mats = ida_mats.view(b, n, 1, 1, 3, 3) 108 | image_coords = ida_mats.inverse().matmul(feat_coords.unsqueeze(-1)) # (B, N, H, W, 3, 1) 109 | 110 | intrinsic = intrinsic.view(b, n, 1, 1, 3, 3) # (B, N, 1, 1, 3, 3) 111 | normed_coords = torch.linalg.inv(intrinsic) @ image_coords # (B, N, H, W, 3, 1) 112 | 113 | ext_rots = extrinsic[:, :, :3, :3] # (B, N, 3, 3) 114 | ext_trans = extrinsic[:, :, :3, 3] # (B, N, 3) 115 | 116 | ext_rots = ext_rots.view(b, n, 1, 1, 3, 3) # (B, N, 1, 1, 3, 3) 117 | world_coords = (ext_rots @ normed_coords).squeeze(-1) # (B, N, H, W, 3) 118 | world_coords = F.normalize(world_coords, p=2, dim=-1) 119 | z_coord = world_coords[:, :, :, :, 2] # (B, N, H, W) 120 | 121 | trans_z = ext_trans[:, :, 2].unsqueeze(-1).unsqueeze(-1) # (B, N, 1, 1) 122 | depth = - trans_z / z_coord # (B, N, H, W) 123 | valid = depth > 0 # (B, N, H, W) 124 | 125 | xy_world_coords = world_coords[:, :, :, :, :2] # (B, N, H, W, 2) 126 | xy_world_coords = xy_world_coords * depth.unsqueeze(-1) 127 | valid = valid.unsqueeze(-1) # (B, N, H, W, 1) 128 | 129 | return xy_world_coords, valid 130 | 131 | def forward(self, extrinsic, intrinsic, ida_mats, do_flip): 132 | """ 133 | extrinsic (N, 6, 4, 4) torch.Tensor 134 | intrinsic (N, 6, 3, 3) 135 | """ 136 | device = extrinsic.device 137 | xy_pos_embed, valid = self.get_embedding(extrinsic, intrinsic, ida_mats) 138 | if do_flip: 139 | xy_pos_embed[:, :, :, :, 1] = -1 * xy_pos_embed[:, :, :, :, 1] 140 | # along with w 141 | xy_pos_embed = torch.cat(torch.unbind(xy_pos_embed, dim=1), dim=-2) # (B, H, N*W, 2) 142 | valid = torch.cat(torch.unbind(valid, dim=1), dim=-2) # (B, H, N*W, 2) 143 | if self.sine_encoding: 144 | # Use Sine encoding to get 256 dim embeddings 145 | dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=device) 146 | dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2)) 147 | pos_embed = xy_pos_embed[:, :, :, :, None] / dim_t 148 | pos_x = torch.stack((pos_embed[:, :, :, 0, 0::2].sin(), pos_embed[:, :, :, 0, 1::2].cos()), dim=4) 149 | pos_y = torch.stack((pos_embed[:, :, :, 1, 0::2].sin(), pos_embed[:, :, :, 1, 1::2].cos()), dim=4) 150 | pos_full_embed = torch.cat((pos_y.flatten(3), pos_x.flatten(3)), dim=3) 151 | pos_combined = torch.where(valid, pos_full_embed, torch.tensor(0., dtype=torch.float32, device=device)) 152 | pos_combined = pos_combined.permute(0, 3, 1, 2) # (B, 2, H, W') 153 | else: 154 | assert None 155 | # pos_combined = torch.where(valid, xy_pos_embed, torch.tensor(0., dtype=torch.float32, device=device)) 156 | # pos_combined = pos_combined.permute(0, 3, 1, 2) 157 | 158 | if self.encoder is None: 159 | return pos_combined, valid.squeeze(-1) 160 | else: 161 | pos_embed_contiguous = pos_combined.contiguous() 162 | return self.encoder(pos_embed_contiguous), valid.squeeze(-1) 163 | 164 | 165 | class PositionEmbeddingTgt(nn.Module): 166 | def __init__(self, 167 | encoder=None, 168 | tgt_shape=(40, 20), 169 | map_size=(400, 200), 170 | map_resolution=0.15, 171 | num_pos_feats=64, 172 | sine_encoding=False, 173 | temperature=10000): 174 | super().__init__() 175 | self.tgt_shape = tgt_shape 176 | self.encoder = encoder 177 | self.map_size = map_size 178 | self.map_resolution = map_resolution 179 | self.num_pos_feats = num_pos_feats 180 | self.temperature = temperature 181 | self.sine_encoding = sine_encoding 182 | 183 | def forward(self, mask): 184 | B = mask.shape[0] 185 | 186 | map_forward_ratio = self.tgt_shape[0] / self.map_size[0] 187 | map_lateral_ratio = self.tgt_shape[1] / self.map_size[1] 188 | 189 | map_forward_res = self.map_resolution / map_forward_ratio 190 | map_lateral_res = self.map_resolution / map_lateral_ratio 191 | 192 | X = (torch.arange(self.tgt_shape[0] - 1, -1, -1, device=mask.device) + 0.5 - self.tgt_shape[ 193 | 0] / 2) * map_forward_res 194 | Y = (torch.arange(self.tgt_shape[1] - 1, -1, -1, device=mask.device) + 0.5 - self.tgt_shape[ 195 | 1] / 2) * map_lateral_res 196 | 197 | grid_X, grid_Y = torch.meshgrid(X, Y) 198 | pos_embed = torch.stack([grid_X, grid_Y], dim=-1) # (H, W, 2) 199 | 200 | if self.sine_encoding: 201 | dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=mask.device) 202 | dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2)) 203 | 204 | pos_embed = pos_embed[:, :, :, None] / dim_t 205 | pos_x = torch.stack((pos_embed[:, :, 0, 0::2].sin(), pos_embed[:, :, 0, 1::2].cos()), dim=3).flatten(2) 206 | pos_y = torch.stack((pos_embed[:, :, 1, 0::2].sin(), pos_embed[:, :, 1, 1::2].cos()), dim=3).flatten(2) 207 | pos_full_embed = torch.cat((pos_y, pos_x), dim=2) 208 | 209 | pos_embed = pos_full_embed.unsqueeze(0).repeat(B, 1, 1, 1).permute(0, 3, 1, 2) 210 | else: 211 | pos_embed = pos_embed.unsqueeze(0).repeat(B, 1, 1, 1).permute(0, 3, 1, 2) 212 | 213 | if self.encoder is None: 214 | return pos_embed 215 | else: 216 | pos_embed_contiguous = pos_embed.contiguous() 217 | return self.encoder(pos_embed_contiguous) -------------------------------------------------------------------------------- /mapmaster/models/utils/recovery_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from detectron2.projects.point_rend.point_features import point_sample 6 | 7 | 8 | class PointRecoveryLoss(nn.Module): 9 | 10 | def __init__(self, ce_weight, dice_weight, curve_width, tgt_shape): 11 | super(PointRecoveryLoss, self).__init__() 12 | self.ce_weight = ce_weight 13 | self.dice_weight = dice_weight 14 | self.kernel = self.generate_kernel(curve_width, tgt_shape) 15 | 16 | def forward(self, points, gt_masks): 17 | points_expanded = points.unsqueeze(2) - self.kernel.repeat(points.shape[0], 1, 1, 1) 18 | points_expanded = torch.clamp(points_expanded.flatten(1, 2), min=0, max=1) # (N, P*w*w, 2) [0, 1] 19 | dt_points = point_sample(gt_masks[:, None], points_expanded, align_corners=False).squeeze(1).flatten(1) 20 | gt_points = torch.ones_like(dt_points) 21 | loss = 0 22 | if self.ce_weight > 0: 23 | loss += self.ce_weight * self.forward_ce_loss(dt_points, gt_points) 24 | if self.dice_weight > 0: 25 | loss += self.dice_weight * self.forward_dice_loss(dt_points, gt_points) 26 | return loss 27 | 28 | @staticmethod 29 | def generate_kernel(curve_width, tgt_shape, device='cuda'): 30 | width = torch.tensor(list(range(curve_width))) 31 | kernel = torch.stack(torch.meshgrid(width, width), dim=-1).float() 32 | kernel = kernel - curve_width // 2 33 | kernel[..., 0] = kernel[..., 0] / tgt_shape[1] 34 | kernel[..., 1] = kernel[..., 1] / tgt_shape[0] 35 | kernel = kernel.flatten(0, 1).unsqueeze(0).unsqueeze(0) # (1, 1, w*w, 2) 36 | kernel = kernel.cuda() if device == 'cuda' else kernel 37 | return kernel 38 | 39 | @staticmethod 40 | def forward_dice_loss(inputs, targets): 41 | numerator = 2 * (inputs * targets).sum(-1) 42 | denominator = inputs.sum(-1) + targets.sum(-1) 43 | loss = 1 - (numerator + 1) / (denominator + 1) 44 | return loss 45 | 46 | @staticmethod 47 | def forward_ce_loss(inputs, targets): 48 | loss = F.binary_cross_entropy(inputs, targets, reduction="none") 49 | return loss.mean(1) 50 | -------------------------------------------------------------------------------- /mapmaster/utils/env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import PIL 5 | import importlib 6 | import warnings 7 | import subprocess 8 | import torch 9 | import torchvision 10 | import numpy as np 11 | from tabulate import tabulate 12 | from collections import defaultdict 13 | 14 | __all__ = ["collect_env_info"] 15 | 16 | 17 | def collect_torch_env(): 18 | import torch.__config__ 19 | return torch.__config__.show() 20 | 21 | 22 | def collect_git_info(): 23 | try: 24 | import git 25 | from git import InvalidGitRepositoryError 26 | except ImportError: 27 | warnings.warn("Please consider to install gitpython for git info collection by 'pip install gitpython'.") 28 | return "Git status: unknown\n" 29 | 30 | try: 31 | repo = git.Repo(get_root_dir()) 32 | except InvalidGitRepositoryError: 33 | warnings.warn("Current path is possibly not a valid git repository.") 34 | return "Git status: unknown\n" 35 | 36 | msg = "***Git status:***\n{}\nHEAD Commit-id: {}\n".format(repo.git.status().replace("<", "\<"), repo.head.commit) 37 | msg = "{}\n{}".format(msg, "***Git Diff:***\n{}\n".format(repo.git.diff().replace("<", "\<"))) 38 | return msg 39 | 40 | 41 | def detect_compute_compatibility(CUDA_HOME, so_file): 42 | try: 43 | cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump") 44 | if os.path.isfile(cuobjdump): 45 | output = subprocess.check_output("'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True) 46 | output = output.decode("utf-8").strip().split("\n") 47 | sm = [] 48 | for line in output: 49 | line = re.findall(r"\.sm_[0-9]*\.", line)[0] 50 | sm.append(line.strip(".")) 51 | sm = sorted(set(sm)) 52 | return ", ".join(sm) 53 | else: 54 | return so_file + "; cannot find cuobjdump" 55 | except Exception: 56 | # unhandled failure 57 | return so_file 58 | 59 | 60 | def collect_env_info(): 61 | data = [] 62 | data.append(("sys.platform", sys.platform)) 63 | data.append(("Python", sys.version.replace("\n", ""))) 64 | data.append(("numpy", np.__version__)) 65 | data.append(("Pillow", PIL.__version__)) 66 | 67 | data.append(("PyTorch", torch.__version__ + " @" + os.path.dirname(torch.__file__))) 68 | data.append(("PyTorch debug build", torch.version.debug)) 69 | 70 | has_cuda = torch.cuda.is_available() 71 | 72 | data.append(("CUDA available", has_cuda)) 73 | if has_cuda: 74 | devices = defaultdict(list) 75 | for k in range(torch.cuda.device_count()): 76 | devices[torch.cuda.get_device_name(k)].append(str(k)) 77 | for name, devids in devices.items(): 78 | data.append(("GPU " + ",".join(devids), name)) 79 | 80 | from torch.utils.cpp_extension import CUDA_HOME 81 | 82 | data.append(("CUDA_HOME", str(CUDA_HOME))) 83 | 84 | if CUDA_HOME is not None and os.path.isdir(CUDA_HOME): 85 | try: 86 | nvcc = os.path.join(CUDA_HOME, "bin", "nvcc") 87 | nvcc = subprocess.check_output("'{}' -V | tail -n1".format(nvcc), shell=True) 88 | nvcc = nvcc.decode("utf-8").strip() 89 | except subprocess.SubprocessError: 90 | nvcc = "Not Available" 91 | data.append(("NVCC", nvcc)) 92 | 93 | cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None) 94 | if cuda_arch_list: 95 | data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list)) 96 | 97 | try: 98 | data.append( 99 | ( 100 | "torchvision", 101 | str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__), 102 | ) 103 | ) 104 | if has_cuda: 105 | try: 106 | torchvision_C = importlib.util.find_spec("torchvision._C").origin 107 | msg = detect_compute_compatibility(CUDA_HOME, torchvision_C) 108 | data.append(("torchvision arch flags", msg)) 109 | except ImportError: 110 | data.append(("torchvision._C", "failed to find")) 111 | except AttributeError: 112 | data.append(("torchvision", "unknown")) 113 | 114 | try: 115 | import cv2 116 | 117 | data.append(("cv2", cv2.__version__)) 118 | except ImportError: 119 | pass 120 | 121 | env_str = tabulate(data) + "\n" 122 | env_str += collect_git_info() 123 | env_str += "-" * 100 + "\n" 124 | env_str += collect_torch_env() 125 | return env_str 126 | 127 | 128 | def get_root_dir(): 129 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 130 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | clearml 2 | loguru 3 | Ninja 4 | numba 5 | opencv-contrib-python 6 | pandas 7 | scikit-image 8 | tabulate 9 | tensorboardX 10 | Pillow==9.4.0 11 | numpy==1.23.5 12 | visvalingamwyatt=0.2.0 -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export PYTHONPATH=$(pwd) 4 | 5 | case "$1" in 6 | "train") 7 | CONFIG_NAME=$2 8 | NUM_EPOCHS=$3 9 | python3 configs/"${CONFIG_NAME}".py -d 0-7 -b 1 -e ${NUM_EPOCHS} --sync_bn 8 --no-clearml 10 | ;; 11 | "test") 12 | CONFIG_NAME=$2 13 | CKPT=$3 14 | python3 configs/"${CONFIG_NAME}".py -d 0-7 --eval --ckpt "${CKPT}" 15 | ;; 16 | "train-continue") 17 | CONFIG_NAME=$2 18 | CKPT=$3 19 | python3 configs/"${CONFIG_NAME}".py -d 0-7 -b 1 -e 30 --sync_bn 8 --no-clearml --ckpt "${CKPT}" 20 | ;; 21 | "pipeline") 22 | CONFIG_NAME=$2 23 | NUM_EPOCHS=$3 24 | CKPT_ID=$((NUM_EPOCHS-1)) 25 | bash run.sh train ${CONFIG_NAME} ${NUM_EPOCHS} 26 | bash run.sh test ${CONFIG_NAME} outputs/${CONFIG_NAME}/latest/dump_model/checkpoint_epoch_${CKPT_ID}.pth 27 | ;; 28 | "reproduce") 29 | CONFIG_NAME=$2 30 | bash run.sh pipeline ${CONFIG_NAME} 30 31 | bash run.sh pipeline ${CONFIG_NAME} 110 32 | ;; 33 | *) 34 | echo "error" 35 | esac 36 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/__init__.py -------------------------------------------------------------------------------- /tools/anno_converter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/anno_converter/__init__.py -------------------------------------------------------------------------------- /tools/anno_converter/bezier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from shapely.geometry import LineString 4 | from scipy.special import comb as n_over_k 5 | 6 | 7 | class PiecewiseBezierCurve(object): 8 | def __init__(self, num_points=100, num_degree=2, margin=0.05, threshold=0.1): 9 | super().__init__() 10 | self.num_points = num_points 11 | self.num_degree = num_degree 12 | self.margin = margin 13 | self.bezier_coefficient = self._get_bezier_coefficients(np.linspace(0, 1, self.num_points)) 14 | self.threshold = threshold 15 | 16 | def _get_bezier_coefficients(self, t_list): 17 | bernstein_fn = lambda n, t, k: (t ** k) * ((1 - t) ** (n - k)) * n_over_k(n, k) 18 | bezier_coefficient_fn = \ 19 | lambda ts: [[bernstein_fn(self.num_degree, t, k) for k in range(self.num_degree + 1)] for t in t_list] 20 | return np.array(bezier_coefficient_fn(t_list)) 21 | 22 | def _get_interpolated_points(self, points): 23 | line = LineString(points) 24 | distances = np.linspace(0, line.length, self.num_points) 25 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2) 26 | return sampled_points 27 | 28 | def _get_chamfer_distance(self, points_before, points_after): 29 | points_before = torch.from_numpy(points_before).float() 30 | points_after = torch.from_numpy(points_after).float() 31 | dist = torch.cdist(points_before, points_after) 32 | dist1, _ = torch.min(dist, 2) 33 | dist1 = (dist1 * (dist1 > self.margin).float()) 34 | dist2, _ = torch.min(dist, 1) 35 | dist2 = (dist2 * (dist2 > self.margin).float()) 36 | return (dist1.mean(-1) + dist2.mean(-1)) / 2 37 | 38 | def bezier_fitting(self, curve_pts): 39 | curve_pts_intered = self._get_interpolated_points(curve_pts) 40 | bezier_ctrl_pts = np.linalg.pinv(self.bezier_coefficient).dot(curve_pts_intered) 41 | bezier_ctrl_pts = np.concatenate([curve_pts[0:1], bezier_ctrl_pts[1:-1], curve_pts[-1:]], axis=0) 42 | curve_pts_recovery = self.bezier_coefficient.dot(bezier_ctrl_pts) 43 | criterion = self._get_chamfer_distance(curve_pts_intered[None, :, :], curve_pts_recovery[None, :, :]).item() 44 | return bezier_ctrl_pts, criterion 45 | 46 | @staticmethod 47 | def sequence_reverse(ctr_points): 48 | ctr_points = np.array(ctr_points) 49 | (xs, ys), (xe, ye) = ctr_points[0], ctr_points[-1] 50 | if ys > ye: 51 | ctr_points = ctr_points[::-1] 52 | return ctr_points 53 | 54 | def __call__(self, curve_pts): 55 | ctr_points_piecewise = [] 56 | num_points = curve_pts.shape[0] 57 | start, end = 0, num_points - 1 58 | while start < end: 59 | ctr_points, loss = self.bezier_fitting(curve_pts[start: end + 1]) 60 | if loss < self.threshold: 61 | start, end = end, num_points - 1 62 | if start >= end: 63 | ctr_points_piecewise += ctr_points.tolist() 64 | else: 65 | ctr_points_piecewise += ctr_points.tolist()[:-1] 66 | else: 67 | end = end - 1 68 | ctr_points_piecewise = self.sequence_reverse(ctr_points_piecewise) 69 | return ctr_points_piecewise 70 | -------------------------------------------------------------------------------- /tools/anno_converter/generate_pivots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import visvalingamwyatt as vw 3 | 4 | class GenPivots: 5 | def __init__(self, max_pts=[10, 2, 30], map_region=(30, -30, 15, -15), vm_thre=2.0, resolution=0.15): 6 | self.max_pts = max_pts 7 | self.map_region = map_region 8 | self.vm_thre = vm_thre 9 | self.resolution = resolution 10 | 11 | def pivots_generate(self, map_vectors): 12 | pivots_single_frame = {0:[], 1:[], 2:[]} 13 | lengths_single_frame = {0:[], 1:[], 2:[]} 14 | for ii, vec in enumerate(map_vectors): 15 | pts = np.array(vec["pts"]) * self.resolution # 转成 m 16 | pts = pts[:, ::-1] 17 | cls = vec["type"] 18 | 19 | # If the difference in x is obvious (greater than 1m), then rank according to x. 20 | # If the difference in x is not obvious, rank according to y. 21 | if (np.abs(pts[0][0]-pts[-1][0])>1 and pts[0][0] 0: 35 | new_pts[:, :, 0] = new_pts[:, :, 0] / (2 * self.map_region[0]) # normalize 36 | new_pts[:, :, 1] = new_pts[:, :, 1] / (2 * self.map_region[2]) 37 | pivots_single_frame[cls] = new_pts 38 | lengths_single_frame[cls] = np.array(lengths_single_frame[cls]) 39 | 40 | return pivots_single_frame, lengths_single_frame 41 | 42 | def pad_pts(self, pts, tgt_length): 43 | if len(pts) >= tgt_length: 44 | return pts[:tgt_length] 45 | pts = np.concatenate([pts, np.zeros((tgt_length-len(pts), 2))], axis=0) 46 | return pts 47 | -------------------------------------------------------------------------------- /tools/anno_converter/nuscenes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/anno_converter/nuscenes/__init__.py -------------------------------------------------------------------------------- /tools/anno_converter/nuscenes/convert.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import numpy as np 4 | from tqdm import tqdm 5 | from nuscenes import NuScenes 6 | from pyquaternion import Quaternion 7 | from torch.utils.data import Dataset 8 | from rasterize import RasterizedLocalMap 9 | from vectorize import VectorizedLocalMap 10 | from tools.anno_converter.generate_pivots import GenPivots 11 | 12 | 13 | class NuScenesDataset(Dataset): 14 | def __init__(self, version, dataroot, xbound=(-30., 30., 0.15), ybound=(-15., 15., 0.15)): 15 | super(NuScenesDataset, self).__init__() 16 | patch_h = ybound[1] - ybound[0] 17 | patch_w = xbound[1] - xbound[0] 18 | canvas_h = int(patch_h / ybound[2]) 19 | canvas_w = int(patch_w / xbound[2]) 20 | self.patch_size = (patch_h, patch_w) 21 | self.canvas_size = (canvas_h, canvas_w) 22 | self.nusc = NuScenes(version=version, dataroot=dataroot, verbose=False) 23 | self.vector_map = VectorizedLocalMap(dataroot, patch_size=self.patch_size, canvas_size=self.canvas_size) 24 | 25 | def __len__(self): 26 | return len(self.nusc.sample) 27 | 28 | def __getitem__(self, idx): 29 | record = self.nusc.sample[idx] 30 | location = self.nusc.get('log', self.nusc.get('scene', record['scene_token'])['log_token'])['location'] 31 | ego_pose = self.nusc.get('ego_pose', 32 | self.nusc.get('sample_data', record['data']['LIDAR_TOP'])['ego_pose_token']) 33 | vectors = self.vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation']) 34 | imgs, trans, rots, intrins = self.get_data_info(record) 35 | return imgs, np.stack(trans), np.stack(rots), np.stack(intrins), vectors 36 | 37 | def get_data_info(self, record): 38 | imgs, trans, rots, intrins = [], [], [], [] 39 | for cam in ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT']: 40 | samp = self.nusc.get('sample_data', record['data'][cam]) 41 | imgs.append(samp['filename']) 42 | sens = self.nusc.get('calibrated_sensor', samp['calibrated_sensor_token']) 43 | trans.append(sens['translation']) 44 | rots.append(Quaternion(sens['rotation']).rotation_matrix) 45 | intrins.append(sens['camera_intrinsic']) 46 | return imgs, trans, rots, intrins 47 | 48 | 49 | class NuScenesSemanticDataset(NuScenesDataset): 50 | def __init__(self, version, dataroot, xbound, ybound, thickness, num_degrees, max_channel=3, bezier=False): 51 | super(NuScenesSemanticDataset, self).__init__(version, dataroot, xbound, ybound) 52 | self.raster_map = RasterizedLocalMap(self.patch_size, self.canvas_size, num_degrees, max_channel, thickness, bezier=bezier) 53 | self.pivot_gen = GenPivots(map_region=(xbound[1], xbound[0], ybound[1], ybound[0]), resolution=xbound[2]) 54 | 55 | def __getitem__(self, idx): 56 | record = self.nusc.sample[idx] 57 | location = self.nusc.get('log', self.nusc.get('scene', record['scene_token'])['log_token'])['location'] 58 | ego_pose = self.nusc.get('ego_pose', self.nusc.get('sample_data', record['data']['LIDAR_TOP'])['ego_pose_token']) 59 | vectors = self.vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation']) 60 | imgs, trans, rots, intrins = self.get_data_info(record) 61 | semantic_masks, instance_masks, instance_vec_points, instance_ctr_points = \ 62 | self.raster_map.convert_vec_to_mask(vectors) 63 | pivots, pivot_lengths = self.pivot_gen.pivots_generate(instance_vec_points) 64 | 65 | return imgs, np.stack(trans), np.stack(rots), np.stack(intrins), semantic_masks, instance_masks, \ 66 | vectors, instance_vec_points, instance_ctr_points, pivots, pivot_lengths 67 | 68 | 69 | def main(): 70 | parser = argparse.ArgumentParser(description='Pivot-Bezier GT Generator.') 71 | parser.add_argument('-d', '--data_root', type=str, default='/data/dataset/public/nuScenes-tt') 72 | parser.add_argument('-v', '--version', nargs='+', type=str, default=['v1.0-trainval']) 73 | parser.add_argument("--num_degrees", nargs='+', type=int, default=[2, 1, 3]) 74 | parser.add_argument("--thickness", nargs='+', type=int, default=[1, 8]) 75 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15]) 76 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15]) 77 | parser.add_argument("--bezier", default=False, action='store_true') # whether to generate bezier GT 78 | args = parser.parse_args() 79 | 80 | n_classes = len(args.num_degrees) # 0 --> divider(d=2), 1 --> crossing(d=1), 2--> contour(d=3) 81 | save_dir = os.path.join(args.data_root, 'customer', "pivot-bezier") 82 | os.makedirs(save_dir, exist_ok=True) 83 | for version in args.version: 84 | dataset = NuScenesSemanticDataset( 85 | version, args.data_root, args.xbound, args.ybound, args.thickness, args.num_degrees, max_channel=n_classes, bezier=args.bezier) 86 | for idx in tqdm(range(dataset.__len__())): 87 | file_path = os.path.join(save_dir, dataset.nusc.sample[idx]['token'] + '.npz') 88 | # if os.path.exists(file_path): 89 | # continue 90 | item = dataset.__getitem__(idx) 91 | np.savez_compressed( 92 | file_path, image_paths=np.array(item[0]), trans=item[1], rots=item[2], intrins=item[3], 93 | semantic_mask=item[4][0], instance_mask=item[5][0], instance_mask8=item[5][1], 94 | ego_vectors=item[6], map_vectors=item[7], ctr_points=item[8], pivot_pts=item[9], pivot_length=item[10], 95 | ) 96 | 97 | 98 | if __name__ == '__main__': 99 | main() 100 | -------------------------------------------------------------------------------- /tools/anno_converter/nuscenes/rasterize.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from shapely import affinity 4 | from shapely.geometry import LineString, box 5 | from tools.anno_converter.bezier import PiecewiseBezierCurve 6 | 7 | 8 | class RasterizedLocalMap(object): 9 | def __init__(self, patch_size, canvas_size, num_degrees, max_channel, thickness, patch_angle=0.0, bezier=False): 10 | super().__init__() 11 | self.patch_size = patch_size 12 | self.canvas_size = canvas_size 13 | self.max_channel = max_channel 14 | self.num_degrees = num_degrees 15 | self.thickness = thickness 16 | assert self.thickness[0] == 1 17 | self.patch_box = (0.0, 0.0, self.patch_size[0], self.patch_size[1]) 18 | self.patch_angle = patch_angle 19 | self.patch = self.get_patch_coord() 20 | self.bezier = bezier 21 | if bezier: 22 | self.pbc_funcs = { 23 | d: PiecewiseBezierCurve(num_points=100, num_degree=d, margin=0.05, threshold=0.1) for d in num_degrees 24 | } 25 | 26 | def convert_vec_to_mask(self, vectors): 27 | vector_num_list = {cls_idx: [] for cls_idx in range(self.max_channel)} # map-type -> list 28 | for vector in vectors: 29 | if vector['pts_num'] >= 2: 30 | vector_num_list[vector['type']].append(LineString(vector['pts'][:vector['pts_num']])) 31 | ins_idx = 1 # instance-index 32 | instance_masks = np.zeros( 33 | (len(self.thickness), self.max_channel, self.canvas_size[1], self.canvas_size[0]), np.uint8) 34 | instance_vec_points, instance_ctr_points = [], [] 35 | for cls_idx in range(self.max_channel): 36 | if self.bezier: 37 | pbc_func = self.pbc_funcs[self.num_degrees[cls_idx]] 38 | else: 39 | pbc_func = None 40 | masks, map_points, ctr_points, ins_idx = self.line_geom_to_mask(vector_num_list[cls_idx], ins_idx, pbc_func) 41 | instance_masks[:, cls_idx, :, :] = masks 42 | for pts in map_points: 43 | instance_vec_points.append({'pts': pts, 'pts_num': len(pts), 'type': cls_idx}) 44 | for pts in ctr_points: 45 | instance_ctr_points.append({'pts': pts, 'pts_num': len(pts), 'type': cls_idx}) 46 | instance_masks = np.stack(instance_masks).astype(np.uint8) 47 | semantic_masks = (instance_masks != 0).astype(np.uint8) 48 | return semantic_masks, instance_masks, instance_vec_points, instance_ctr_points 49 | 50 | def line_geom_to_mask(self, layer_geom, idx, pbc_func, trans_type='index'): 51 | patch_x, patch_y, patch_h, patch_w = self.patch_box 52 | canvas_h = self.canvas_size[0] 53 | canvas_w = self.canvas_size[1] 54 | scale_height = canvas_h / patch_h 55 | scale_width = canvas_w / patch_w 56 | trans_x = -patch_x + patch_w / 2.0 57 | trans_y = -patch_y + patch_h / 2.0 58 | map_masks = np.zeros((len(self.thickness), *self.canvas_size), np.uint8) 59 | map_points, ctr_points = [], [] 60 | for line in layer_geom: 61 | new_line = line.intersection(self.patch) 62 | if not new_line.is_empty: 63 | new_line = affinity.affine_transform(new_line, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y]) 64 | if new_line.geom_type == 'MultiLineString': 65 | for single_line in new_line: 66 | pts2 = self.patch_size - np.array(single_line.coords[:])[:, ::-1] 67 | if pbc_func is not None: 68 | ctr_points.append(pbc_func(pts2)) 69 | single_line = affinity.scale(single_line, xfact=scale_width, yfact=scale_height, origin=(0, 0)) 70 | map_masks, idx = self.mask_for_lines(single_line, map_masks, self.thickness, idx, trans_type) 71 | pts = self.canvas_size - np.array(single_line.coords[:])[:, ::-1] 72 | map_points.append(pts.tolist()) 73 | else: 74 | pts2 = self.patch_size - np.array(new_line.coords[:])[:, ::-1] 75 | if pbc_func is not None: 76 | ctr_points.append(pbc_func(pts2)) 77 | new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0)) 78 | map_masks, idx = self.mask_for_lines(new_line, map_masks, self.thickness, idx, trans_type) 79 | pts = self.canvas_size - np.array(new_line.coords[:])[:, ::-1] 80 | map_points.append(pts.tolist()) 81 | map_masks_ret = [] 82 | for i in range(len(self.thickness)): 83 | map_masks_ret.append(np.flip(np.rot90(map_masks[i][None], k=1, axes=(1, 2)), axis=2)[0]) 84 | map_masks_ret = np.array(map_masks_ret) 85 | return map_masks_ret, map_points, ctr_points, idx 86 | 87 | @staticmethod 88 | def mask_for_lines(lines, mask, thickness, idx, trans_type='index'): 89 | coords = np.asarray(list(lines.coords), np.int32) 90 | coords = coords.reshape((-1, 2)) 91 | if len(coords) < 2: 92 | return mask, idx 93 | for i, t in enumerate(thickness): 94 | if trans_type == 'index': 95 | cv2.polylines(mask[i], [coords], False, color=idx, thickness=t) 96 | idx += 1 97 | return mask, idx 98 | 99 | def get_patch_coord(self): 100 | patch_x, patch_y, patch_h, patch_w = self.patch_box 101 | x_min = patch_x - patch_w / 2.0 102 | y_min = patch_y - patch_h / 2.0 103 | x_max = patch_x + patch_w / 2.0 104 | y_max = patch_y + patch_h / 2.0 105 | patch = box(x_min, y_min, x_max, y_max) 106 | patch = affinity.rotate(patch, self.patch_angle, origin=(patch_x, patch_y), use_radians=False) 107 | return patch 108 | -------------------------------------------------------------------------------- /tools/anno_converter/nuscenes/vectorize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from shapely import affinity, ops 3 | from nuscenes.eval.common.utils import quaternion_yaw, Quaternion 4 | from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer 5 | from shapely.geometry import LineString, box, MultiPolygon, MultiLineString 6 | 7 | 8 | class VectorizedLocalMap(object): 9 | def __init__(self, 10 | data_root, 11 | patch_size, 12 | canvas_size, 13 | line_classes=('road_divider', 'lane_divider'), 14 | ped_crossing_classes=('ped_crossing', ), 15 | contour_classes=('road_segment', 'lane'), 16 | sample_dist=1, 17 | num_samples=250, 18 | padding=False, 19 | normalize=False, 20 | fixed_num=-1, 21 | class2label={ 22 | 'road_divider': 0, 23 | 'lane_divider': 0, 24 | 'ped_crossing': 1, 25 | 'contours': 2, 26 | 'others': -1, 27 | }): 28 | super().__init__() 29 | self.data_root = data_root 30 | self.MAPS = ['boston-seaport', 'singapore-hollandvillage', 'singapore-onenorth', 'singapore-queenstown'] 31 | self.line_classes = line_classes 32 | self.ped_crossing_classes = ped_crossing_classes 33 | self.polygon_classes = contour_classes 34 | self.class2label = class2label 35 | self.nusc_maps = {} 36 | self.map_explorer = {} 37 | for loc in self.MAPS: 38 | self.nusc_maps[loc] = NuScenesMap(dataroot=self.data_root, map_name=loc) 39 | self.map_explorer[loc] = NuScenesMapExplorer(self.nusc_maps[loc]) 40 | self.patch_size = patch_size 41 | self.canvas_size = canvas_size 42 | self.sample_dist = sample_dist 43 | self.num_samples = num_samples 44 | self.padding = padding 45 | self.normalize = normalize 46 | self.fixed_num = fixed_num 47 | 48 | def gen_vectorized_samples(self, location, ego2global_translation, ego2global_rotation): 49 | map_pose = ego2global_translation[:2] # T 50 | rotation = Quaternion(ego2global_rotation) # R 51 | 52 | patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1]) 53 | patch_angle = quaternion_yaw(rotation) / np.pi * 180 54 | 55 | line_geom = self.get_map_geom(patch_box, patch_angle, self.line_classes, location) 56 | line_vector_dict = self.line_geoms_to_vectors(line_geom) 57 | 58 | ped_geom = self.get_map_geom(patch_box, patch_angle, self.ped_crossing_classes, location) 59 | ped_vector_list = self.line_geoms_to_vectors(ped_geom)['ped_crossing'] 60 | 61 | polygon_geom = self.get_map_geom(patch_box, patch_angle, self.polygon_classes, location) 62 | poly_bound_list = self.poly_geoms_to_vectors(polygon_geom) 63 | 64 | vectors = [] 65 | for line_type, vects in line_vector_dict.items(): 66 | for line, length in vects: 67 | vectors.append((line.astype(float), length, self.class2label.get(line_type, -1))) 68 | 69 | for ped_line, length in ped_vector_list: 70 | vectors.append((ped_line.astype(float), length, self.class2label.get('ped_crossing', -1))) 71 | 72 | for contour, length in poly_bound_list: 73 | vectors.append((contour.astype(float), length, self.class2label.get('contours', -1))) 74 | 75 | # filter out -1 76 | filtered_vectors = [] 77 | for pts, pts_num, _type in vectors: 78 | if _type != -1: 79 | filtered_vectors.append({'pts': pts, 'pts_num': pts_num, 'type': _type}) 80 | 81 | return filtered_vectors 82 | 83 | def get_map_geom(self, patch_box, patch_angle, layer_names, location): 84 | map_geom = [] 85 | for layer_name in layer_names: 86 | if layer_name in self.line_classes: 87 | map_explorer_x = self.map_explorer[location] 88 | geoms = map_explorer_x._get_layer_line(patch_box, patch_angle, layer_name) 89 | elif layer_name in self.polygon_classes: 90 | map_explorer_x = self.map_explorer[location] 91 | geoms = map_explorer_x._get_layer_polygon(patch_box, patch_angle, layer_name) 92 | elif layer_name in self.ped_crossing_classes: 93 | geoms = self.get_ped_crossing_line(patch_box, patch_angle, location) 94 | else: 95 | raise NotImplementedError 96 | map_geom.append((layer_name, geoms)) 97 | return map_geom 98 | 99 | def _one_type_line_geom_to_vectors(self, line_geom): 100 | line_vectors = [] 101 | for line in line_geom: 102 | if not line.is_empty: 103 | if line.geom_type == 'MultiLineString': 104 | for l in line.geoms: 105 | line_vectors.append(self.sample_pts_from_line(l)) 106 | elif line.geom_type == 'LineString': 107 | line_vectors.append(self.sample_pts_from_line(line)) 108 | else: 109 | raise NotImplementedError 110 | return line_vectors 111 | 112 | def poly_geoms_to_vectors(self, polygon_geom): 113 | roads = polygon_geom[0][1] 114 | lanes = polygon_geom[1][1] 115 | union_roads = ops.unary_union(roads) 116 | union_lanes = ops.unary_union(lanes) 117 | union_segments = ops.unary_union([union_roads, union_lanes]) 118 | max_x = self.patch_size[1] / 2 119 | max_y = self.patch_size[0] / 2 120 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2) 121 | exteriors = [] 122 | interiors = [] 123 | if union_segments.geom_type != 'MultiPolygon': 124 | union_segments = MultiPolygon([union_segments]) 125 | for poly in union_segments.geoms: 126 | exteriors.append(poly.exterior) 127 | for inter in poly.interiors: 128 | interiors.append(inter) 129 | 130 | results = [] 131 | for ext in exteriors: 132 | if ext.is_ccw: 133 | ext.coords = list(ext.coords)[::-1] 134 | lines = ext.intersection(local_patch) 135 | if isinstance(lines, MultiLineString): 136 | lines = ops.linemerge(lines) 137 | results.append(lines) 138 | 139 | for inter in interiors: 140 | if not inter.is_ccw: 141 | inter.coords = list(inter.coords)[::-1] 142 | lines = inter.intersection(local_patch) 143 | if isinstance(lines, MultiLineString): 144 | lines = ops.linemerge(lines) 145 | results.append(lines) 146 | 147 | return self._one_type_line_geom_to_vectors(results) 148 | 149 | def line_geoms_to_vectors(self, line_geom): 150 | line_vectors_dict = dict() 151 | for line_type, a_type_of_lines in line_geom: 152 | one_type_vectors = self._one_type_line_geom_to_vectors(a_type_of_lines) 153 | line_vectors_dict[line_type] = one_type_vectors 154 | 155 | return line_vectors_dict 156 | 157 | def ped_geoms_to_vectors(self, ped_geom): 158 | ped_geom = ped_geom[0][1] 159 | union_ped = ops.unary_union(ped_geom) 160 | if union_ped.geom_type != 'MultiPolygon': 161 | union_ped = MultiPolygon([union_ped]) 162 | 163 | max_x = self.patch_size[1] / 2 164 | max_y = self.patch_size[0] / 2 165 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2) 166 | results = [] 167 | for ped_poly in union_ped: 168 | # rect = ped_poly.minimum_rotated_rectangle 169 | ext = ped_poly.exterior 170 | if not ext.is_ccw: 171 | ext.coords = list(ext.coords)[::-1] 172 | lines = ext.intersection(local_patch) 173 | results.append(lines) 174 | 175 | return self._one_type_line_geom_to_vectors(results) 176 | 177 | def get_ped_crossing_line(self, patch_box, patch_angle, location): 178 | def add_line(poly_xy, idx, patch, patch_angle, patch_x, patch_y, line_list): 179 | points = [(p0, p1) for p0, p1 in zip(poly_xy[0, idx:idx + 2], poly_xy[1, idx:idx + 2])] 180 | line = LineString(points) 181 | line = line.intersection(patch) 182 | if not line.is_empty: 183 | line = affinity.rotate(line, -patch_angle, origin=(patch_x, patch_y), use_radians=False) 184 | line = affinity.affine_transform(line, [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y]) 185 | line_list.append(line) 186 | 187 | patch_x = patch_box[0] 188 | patch_y = patch_box[1] 189 | 190 | patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle) 191 | line_list = [] 192 | records = getattr(self.nusc_maps[location], 'ped_crossing') 193 | for record in records: 194 | polygon = self.map_explorer[location].extract_polygon(record['polygon_token']) 195 | poly_xy = np.array(polygon.exterior.xy) 196 | dist = np.square(poly_xy[:, 1:] - poly_xy[:, :-1]).sum(0) 197 | x1, x2 = np.argsort(dist)[-2:] 198 | 199 | add_line(poly_xy, x1, patch, patch_angle, patch_x, patch_y, line_list) 200 | add_line(poly_xy, x2, patch, patch_angle, patch_x, patch_y, line_list) 201 | 202 | return line_list 203 | 204 | def sample_pts_from_line(self, line): 205 | if self.fixed_num < 0: 206 | distances = np.arange(0, line.length, self.sample_dist) 207 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2) 208 | else: 209 | # fixed number of points, so distance is line.length / self.fixed_num 210 | distances = np.linspace(0, line.length, self.fixed_num) 211 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2) 212 | 213 | if self.normalize: 214 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]]) 215 | 216 | num_valid = len(sampled_points) 217 | 218 | if not self.padding or self.fixed_num > 0: 219 | # fixed num sample can return now! 220 | return sampled_points, num_valid 221 | 222 | # fixed distance sampling need padding! 223 | num_valid = len(sampled_points) 224 | 225 | if self.fixed_num < 0: 226 | if num_valid < self.num_samples: 227 | padding = np.zeros((self.num_samples - len(sampled_points), 2)) 228 | sampled_points = np.concatenate([sampled_points, padding], axis=0) 229 | else: 230 | sampled_points = sampled_points[:self.num_samples, :] 231 | num_valid = self.num_samples 232 | 233 | if self.normalize: 234 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]]) 235 | num_valid = len(sampled_points) 236 | 237 | return sampled_points, num_valid 238 | -------------------------------------------------------------------------------- /tools/evaluation/ap.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from cd import chamfer_distance 3 | 4 | 5 | def get_line_instances_from_mask(mask, scale_x, scale_y): 6 | # mask: H, W 7 | # instance: {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)} 8 | indices = torch.unique(mask) 9 | instances = {} 10 | for idx in indices: 11 | idx = idx.item() 12 | if idx == 0: 13 | continue 14 | pc_x, pc_y = torch.where(mask == idx) 15 | pc_x = pc_x.float() * scale_x 16 | pc_y = pc_y.float() * scale_y 17 | coords = torch.stack([pc_x, pc_y], -1) 18 | instances[idx] = coords 19 | return instances 20 | 21 | 22 | def line_matching_by_CD(inst_pred_lines, inst_pred_confidence, inst_label_lines, threshold): 23 | # inst_pred_line: a list of points {1: (M1, 2), 2: (M2, 2), ..., k2: (M_k2, 2)} 24 | # inst_pred_confidence: a list of confidence [c1, c2, ..., ck2] 25 | # inst_label_line: a list of points {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)} 26 | # return: a list of {'pred': (M, 2), 'label': (N, 2), 'confidence': scalar} 27 | pred_num = len(inst_pred_lines) 28 | label_num = len(inst_label_lines) 29 | CD = torch.zeros((pred_num, label_num)).cuda() 30 | 31 | inst_pred_lines_keys = [*inst_pred_lines] 32 | inst_label_lines_keys = [*inst_label_lines] 33 | for i, key_pred in enumerate(inst_pred_lines_keys): 34 | for j, key_label in enumerate(inst_label_lines_keys): 35 | CD[i, j] = chamfer_distance( 36 | inst_pred_lines[key_pred][None], 37 | inst_label_lines[key_label][None], 38 | bidirectional=True, 39 | threshold=threshold, 40 | ) 41 | 42 | pred_taken = torch.zeros(pred_num, dtype=torch.bool).cuda() 43 | label_taken = torch.zeros(label_num, dtype=torch.bool).cuda() 44 | matched_list = [] 45 | if pred_num > 0 and label_num > 0: 46 | while True: 47 | idx = torch.argmin(CD) 48 | i = torch.div(idx, CD.shape[1], rounding_mode="floor") 49 | j = (idx % CD.shape[1]).item() 50 | # i, j = (idx // CD.shape[1]).item(), (idx % CD.shape[1]).item() 51 | if CD[i, j] >= threshold: 52 | break 53 | matched_list.append( 54 | { 55 | "pred": inst_pred_lines[inst_pred_lines_keys[i]], 56 | "confidence": inst_pred_confidence[inst_pred_lines_keys[i]], 57 | "label": inst_label_lines[inst_label_lines_keys[j]], 58 | "CD": CD[i, j].item(), 59 | } 60 | ) 61 | pred_taken[i] = True 62 | label_taken[j] = True 63 | CD[i, :] = threshold 64 | CD[:, j] = threshold 65 | 66 | for i in range(pred_num): 67 | if not pred_taken[i]: 68 | matched_list.append( 69 | { 70 | "pred": inst_pred_lines[inst_pred_lines_keys[i]], 71 | "confidence": inst_pred_confidence[inst_pred_lines_keys[i]], 72 | "label": None, 73 | "CD": threshold, 74 | } 75 | ) 76 | 77 | for j in range(label_num): 78 | if not label_taken[j]: 79 | matched_list.append( 80 | { 81 | "pred": None, 82 | "confidence": 0, 83 | "label": inst_label_lines[inst_label_lines_keys[j]], 84 | "CD": threshold, 85 | } 86 | ) 87 | 88 | return matched_list 89 | 90 | 91 | def instance_mask_ap( 92 | AP_matrix, 93 | AP_count_matrix, 94 | inst_pred_mask, 95 | inst_label_mask, 96 | scale_x, 97 | scale_y, 98 | confidence, 99 | thresholds, 100 | sampled_recalls, 101 | ): 102 | # inst_pred: N, C, H, W 103 | # inst_label: N, C, H, W 104 | # confidence: N, max_instance_num 105 | N, C, H, W = inst_label_mask.shape 106 | for n in range(N): 107 | for c in range(C): 108 | inst_pred_lines = get_line_instances_from_mask(inst_pred_mask[n, c], scale_x, scale_y) 109 | inst_label_lines = get_line_instances_from_mask(inst_label_mask[n, c], scale_x, scale_y) 110 | if len(inst_pred_lines) == 0 and len(inst_label_lines) == 0: 111 | continue 112 | AP_matrix[c] += single_instance_line_AP( 113 | inst_pred_lines, confidence[n], inst_label_lines, thresholds, sampled_recalls=sampled_recalls 114 | ) 115 | AP_count_matrix[c] += 1 116 | return AP_matrix, AP_count_matrix 117 | 118 | 119 | def single_instance_line_AP(inst_pred_lines, inst_pred_confidence, inst_label_lines, thresholds, sampled_recalls): 120 | # inst_pred_line: a list of points {1: (M1, 2), 2: (M2, 2), ..., k2: (M_k2, 2)} 121 | # inst_pred_confidence: a list of confidence [c1, c2, ..., ck2] 122 | # inst_label_line: a list of points {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)} 123 | # thresholds: threshold of chamfer distance to identify TP 124 | num_thres = len(thresholds) 125 | AP_thres = torch.zeros(num_thres).cuda() 126 | for t in range(num_thres): 127 | matching_list = line_matching_by_CD(inst_pred_lines, inst_pred_confidence, inst_label_lines, thresholds[t]) 128 | precision, recall = get_precision_recall_curve_by_confidence( 129 | matching_list, len(inst_label_lines), thresholds[t] 130 | ) 131 | precision, recall = smooth_PR_curve(precision, recall) 132 | AP = calc_AP_from_precision_recall(precision, recall, sampled_recalls) 133 | AP_thres[t] = AP 134 | return AP_thres 135 | 136 | 137 | def get_precision_recall_curve_by_confidence(matching_list, num_gt, threshold): 138 | matching_list = sorted(matching_list, key=lambda x: x["confidence"]) 139 | 140 | TP = [0] 141 | FP = [0] 142 | for match_item in matching_list: 143 | pred = match_item["pred"] 144 | label = match_item["label"] 145 | dist = match_item["CD"] 146 | 147 | if pred is None: 148 | continue 149 | 150 | if label is None: 151 | TP.append(TP[-1]) 152 | FP.append(FP[-1] + 1) 153 | continue 154 | 155 | if dist < threshold: 156 | TP.append(TP[-1] + 1) 157 | FP.append(FP[-1]) 158 | else: 159 | TP.append(TP[-1]) 160 | FP.append(FP[-1] + 1) 161 | 162 | TP = torch.tensor(TP[1:]).cuda() 163 | FP = torch.tensor(FP[1:]).cuda() 164 | 165 | precision = TP / (TP + FP) 166 | recall = TP / num_gt 167 | return precision, recall 168 | 169 | 170 | def smooth_PR_curve(precision, recall): 171 | idx = torch.argsort(recall) 172 | recall = recall[idx] 173 | precision = precision[idx] 174 | length = len(precision) 175 | for i in range(length - 1, 0, -1): 176 | precision[:i][precision[:i] < precision[i]] = precision[i] 177 | return precision, recall 178 | 179 | 180 | def calc_AP_from_precision_recall(precision, recall, sampled_recalls): 181 | acc_precision = 0.0 182 | total = len(sampled_recalls) 183 | for r in sampled_recalls: 184 | idx = torch.where(recall >= r)[0] 185 | if len(idx) == 0: 186 | continue 187 | idx = idx[0] 188 | acc_precision += precision[idx] 189 | return acc_precision / total 190 | -------------------------------------------------------------------------------- /tools/evaluation/cd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def chamfer_distance(source_pc, target_pc, threshold, cum=False, bidirectional=True): 4 | torch.backends.cuda.matmul.allow_tf32 = False 5 | torch.backends.cudnn.allow_tf32 = False 6 | # dist = torch.cdist(source_pc.float(), target_pc.float()) 7 | # dist = torch.cdist(source_pc.float(), target_pc.float(), compute_mode='donot_use_mm_for_euclid_dist') 8 | dist = torch.cdist(source_pc.type(torch.float64), target_pc.type(torch.float64)) 9 | dist1, _ = torch.min(dist, 2) 10 | dist2, _ = torch.min(dist, 1) 11 | if cum: 12 | len1 = dist1.shape[-1] 13 | len2 = dist2.shape[-1] 14 | dist1 = dist1.sum(-1) 15 | dist2 = dist2.sum(-1) 16 | return dist1, dist2, len1, len2 17 | dist1 = dist1.mean(-1) 18 | dist2 = dist2.mean(-1) 19 | if bidirectional: 20 | return min((dist1 + dist2) / 2, threshold) 21 | else: 22 | return min(dist1, threshold), min(dist2, threshold) 23 | 24 | 25 | def semantic_mask_chamfer_dist_cum(seg_pred, seg_label, scale_x, scale_y, threshold): 26 | # seg_label: N, C, H, W 27 | # seg_pred: N, C, H, W 28 | N, C, H, W = seg_label.shape 29 | 30 | cum_CD1 = torch.zeros(C, device=seg_label.device) 31 | cum_CD2 = torch.zeros(C, device=seg_label.device) 32 | cum_num1 = torch.zeros(C, device=seg_label.device) 33 | cum_num2 = torch.zeros(C, device=seg_label.device) 34 | for n in range(N): 35 | for c in range(C): 36 | pred_pc_x, pred_pc_y = torch.where(seg_pred[n, c] != 0) 37 | label_pc_x, label_pc_y = torch.where(seg_label[n, c] != 0) 38 | pred_pc_x = pred_pc_x.float() * scale_x 39 | pred_pc_y = pred_pc_y.float() * scale_y 40 | label_pc_x = label_pc_x.float() * scale_x 41 | label_pc_y = label_pc_y.float() * scale_y 42 | if len(pred_pc_x) == 0 and len(label_pc_x) == 0: 43 | continue 44 | 45 | if len(label_pc_x) == 0: 46 | cum_CD1[c] += len(pred_pc_x) * threshold 47 | cum_num1[c] += len(pred_pc_x) 48 | continue 49 | 50 | if len(pred_pc_x) == 0: 51 | cum_CD2[c] += len(label_pc_x) * threshold 52 | cum_num2[c] += len(label_pc_x) 53 | continue 54 | 55 | pred_pc_coords = torch.stack([pred_pc_x, pred_pc_y], -1).float() 56 | label_pc_coords = torch.stack([label_pc_x, label_pc_y], -1).float() 57 | CD1, CD2, len1, len2 = chamfer_distance( 58 | pred_pc_coords[None], label_pc_coords[None], threshold=threshold, cum=True 59 | ) 60 | cum_CD1[c] += CD1.item() 61 | cum_CD2[c] += CD2.item() 62 | cum_num1[c] += len1 63 | cum_num2[c] += len2 64 | return cum_CD1, cum_CD2, cum_num1, cum_num2 65 | -------------------------------------------------------------------------------- /tools/evaluation/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import numpy as np 5 | import pickle as pkl 6 | from tqdm import tqdm 7 | from tabulate import tabulate 8 | from torch.utils.data import Dataset, DataLoader 9 | from ap import instance_mask_ap as get_batch_ap 10 | 11 | 12 | class BeMapNetResultForNuScenes(Dataset): 13 | def __init__(self, gt_dir, dt_dir, val_txt): 14 | self.gt_dir, self.dt_dir = gt_dir, dt_dir 15 | self.tokens = [fname.strip().split('.')[0] for fname in open(val_txt).readlines()] 16 | self.max_line_count = 100 17 | 18 | def __getitem__(self, idx): 19 | token = self.tokens[idx] 20 | gt_path = os.path.join(self.gt_dir, f"{token}.npz") 21 | gt_masks = np.load(open(gt_path, "rb"), allow_pickle=True)["instance_mask"] 22 | dt_item = np.load(os.path.join(self.dt_dir, f"{token}.npz"), allow_pickle=True) 23 | dt_masks = dt_item["dt_mask"] 24 | dt_scores = dt_item['dt_res'].item()["confidence_level"] 25 | dt_scores = np.array(list(dt_scores) + [-1] * (self.max_line_count - len(dt_scores))) 26 | return torch.from_numpy(dt_masks), torch.from_numpy(dt_scores).float(), torch.from_numpy(gt_masks) 27 | 28 | def __len__(self): 29 | return len(self.tokens) 30 | 31 | 32 | class BeMapNetEvaluatorForNuScenes(object): 33 | def __init__(self, gt_dir, dt_dir, val_txt, batch_size=4, num_classes=3, map_resolution=(0.15, 0.15)): 34 | 35 | self.THRESHOLDS = [0.2, 0.5, 1.0, 1.5] 36 | self.CLASS_NAMES = ["Divider", "PedCross", "Contour"] 37 | self.SAMPLED_RECALLS = torch.linspace(0.1, 1, 10).cuda() 38 | self.res_dataloader = DataLoader( 39 | BeMapNetResultForNuScenes(gt_dir, dt_dir, val_txt), 40 | batch_size=batch_size, shuffle=False, drop_last=False, num_workers=8 41 | ) 42 | self.map_resolution = map_resolution 43 | self.ap_matrix = torch.zeros((num_classes, len(self.THRESHOLDS))).cuda() 44 | self.ap_count_matrix = torch.zeros((num_classes, len(self.THRESHOLDS))).cuda() 45 | 46 | def execute(self): 47 | 48 | for dt_masks, dt_scores, gt_masks in tqdm(self.res_dataloader): 49 | self.ap_matrix, self.ap_count_matrix = get_batch_ap( 50 | self.ap_matrix, 51 | self.ap_count_matrix, 52 | dt_masks.cuda(), 53 | gt_masks.cuda(), 54 | *self.map_resolution, 55 | dt_scores.cuda(), 56 | self.THRESHOLDS, 57 | self.SAMPLED_RECALLS, 58 | ) 59 | ap = (self.ap_matrix / self.ap_count_matrix).cpu().data.numpy() 60 | self._format_print(ap) 61 | 62 | def _format_print(self, ap): 63 | res_matrix = [] 64 | table_header = ["Class", "AP@.2", "AP@.5", "AP@1.", "AP@1.5", "mAP@HARD", "mAP@EASY"] 65 | table_values = [] 66 | for i, cls_name in enumerate(self.CLASS_NAMES): 67 | res_matrix_line = [ap[i][0], ap[i][1], ap[i][2], ap[i][3], np.mean(ap[i][:-1]), np.mean(ap[i][1:])] 68 | res_matrix.append(res_matrix_line) 69 | table_values.append([cls_name] + self.line_data_to_str(*res_matrix_line)) 70 | avg = np.mean(np.array(res_matrix), axis=0) 71 | table_values.append(["Average", *self.line_data_to_str(*avg)]) 72 | table_str = tabulate(table_values, headers=table_header, tablefmt="grid") 73 | print(table_str) 74 | return table_str 75 | 76 | @staticmethod 77 | def line_data_to_str(ap0, ap1, ap2, ap3, map1, map2): 78 | return [ 79 | "{:.1f}".format(ap0 * 100), 80 | "{:.1f}".format(ap1 * 100), 81 | "{:.1f}".format(ap2 * 100), 82 | "{:.1f}".format(ap3 * 100), 83 | "{:.1f}".format(map1 * 100), 84 | "{:.1f}".format(map2 * 100), 85 | ] 86 | 87 | 88 | evaluator = BeMapNetEvaluatorForNuScenes( 89 | gt_dir=sys.argv[1], 90 | dt_dir=sys.argv[2], 91 | val_txt=sys.argv[3], 92 | batch_size=4, 93 | num_classes=3, 94 | map_resolution=(0.15, 0.15), 95 | ) 96 | 97 | evaluator.execute() 98 | --------------------------------------------------------------------------------