├── .gitignore
├── LICENSE
├── README.md
├── assets
├── figures
│ ├── arch.png
│ ├── demo.gif
│ ├── pivot-title.png
│ ├── pivotnet-arch.png
│ ├── pivotnet-logo.png
│ └── title.png
├── splits
│ └── nuscenes
│ │ ├── all.txt
│ │ ├── cloudy.txt
│ │ ├── day.txt
│ │ ├── night.txt
│ │ ├── rainy.txt
│ │ ├── sunny.txt
│ │ ├── test.txt
│ │ ├── train.txt
│ │ └── val.txt
└── weights
│ └── README.md
├── configs
├── bemapnet_nuscenes_effb0.py
├── bemapnet_nuscenes_res50.py
├── bemapnet_nuscenes_swint.py
├── pivotnet_nuscenes_effb0.py
├── pivotnet_nuscenes_res50.py
└── pivotnet_nuscenes_swint.py
├── mapmaster
├── __init__.py
├── dataset
│ ├── nuscenes_bemapnet.py
│ ├── nuscenes_pivotnet.py
│ ├── sampler.py
│ └── transform.py
├── engine
│ ├── callbacks.py
│ ├── core.py
│ ├── environ.py
│ ├── executor.py
│ └── experiment.py
├── models
│ ├── __init__.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── bifpn
│ │ │ ├── __init__.py
│ │ │ ├── model.py
│ │ │ └── utils.py
│ │ ├── efficientnet
│ │ │ ├── __init__.py
│ │ │ ├── model.py
│ │ │ └── utils.py
│ │ ├── model.py
│ │ ├── resnet
│ │ │ ├── __init__.py
│ │ │ ├── resnet.py
│ │ │ └── utils.py
│ │ └── swin_transformer
│ │ │ ├── __init__.py
│ │ │ ├── model.py
│ │ │ └── utils.py
│ ├── bev_decoder
│ │ ├── __init__.py
│ │ ├── deform_transformer
│ │ │ ├── __init__.py
│ │ │ ├── deform_transformer.py
│ │ │ ├── ops
│ │ │ │ ├── __init__.py
│ │ │ │ ├── functions
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── ms_deform_attn_func.py
│ │ │ │ ├── make.sh
│ │ │ │ ├── modules
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── ms_deform_attn.py
│ │ │ │ ├── setup.py
│ │ │ │ ├── src
│ │ │ │ │ ├── cpu
│ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp
│ │ │ │ │ │ └── ms_deform_attn_cpu.h
│ │ │ │ │ ├── cuda
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cu
│ │ │ │ │ │ ├── ms_deform_attn_cuda.h
│ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh
│ │ │ │ │ ├── ms_deform_attn.h
│ │ │ │ │ └── vision.cpp
│ │ │ │ └── test.py
│ │ │ └── position_encoding.py
│ │ ├── model.py
│ │ └── transformer.py
│ ├── ins_decoder
│ │ ├── __init__.py
│ │ ├── mask2former.py
│ │ ├── model.py
│ │ └── pointmask2former.py
│ ├── network.py
│ ├── output_head
│ │ ├── __init__.py
│ │ ├── bezier_outputs.py
│ │ ├── bezier_post_processor.py
│ │ ├── line_matching.py
│ │ ├── pivot_outputs.py
│ │ └── pivot_post_processor.py
│ └── utils
│ │ ├── mask_loss.py
│ │ ├── misc.py
│ │ ├── position_encoding.py
│ │ └── recovery_loss.py
└── utils
│ ├── env.py
│ └── misc.py
├── requirement.txt
├── run.sh
└── tools
├── __init__.py
├── anno_converter
├── __init__.py
├── bezier.py
├── generate_pivots.py
└── nuscenes
│ ├── __init__.py
│ ├── convert.py
│ ├── rasterize.py
│ └── vectorize.py
└── evaluation
├── ap.py
├── cd.py
└── eval.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Wenjie
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |

2 | 
3 |
4 | #
5 | ### NEWS !!!
6 | * **`Jan. 15th, 2024`:** :rocket: :rocket: :rocket: The official implementation of our PivotNet is released now. Enjoy it!
7 | * **`Jul. 14th, 2023`:** :clap: Our **PivotNet** is accepted by ICCV 2023 ! Refer to the **[Long-version Paper](https://arxiv.org/pdf/2308.16477)** for more details.
8 | * **`May. 26th, 2023`:** :trophy: Our team win the **Championship** and the **Innovation-Award** of the CVPR23 *Online HD Map Construction Challenge* ! [ **[Leaderboard](https://opendrivelab.com/AD23Challenge.html#online_hd_map_construction)** / **[Tech-Report](https://arxiv.org/abs/2306.10301)**]
9 | * **`Feb. 28th, 2023`:** :clap: Our **BeMapNet** is accepted by CVPR 2023 ! Refer to the **[Paper](https://openaccess.thecvf.com/content/CVPR2023/html/Qiao_End-to-End_Vectorized_HD-Map_Construction_With_Piecewise_Bezier_Curve_CVPR_2023_paper.html)** for more details. [**[Long version on Arxiv](https://arxiv.org/pdf/2306.09700)** / **[Code](https://github.com/er-muyue/BeMapNet/tree/main)**]
10 |
11 | ## Introduction
12 | **Vectorized high-definition map (HD-map) construction** has garnered considerable attention in the field of autonomous driving research. Towards precise map element learning, we propose a simple yet effective architecture named **PivotNet**, which adopts unified pivot-based map representations and is formulated as a direct set prediction paradigm. Concretely, we first propose a novel *Point-to-Line Mask module* to encode both the subordinate and geometrical point-line priors in the network. Then, a well-designed *Pivot Dynamic Matching module* is proposed to model the topology in dynamic point sequences by introducing the concept of sequence matching. Furthermore, to supervise the position and topology of the vectorized point predictions, we propose a *Dynamic Vectorized Sequence loss*. PivotNet contains four primary components: **Camera Feature Extractor**, **BEV Feature Decoder**, **Line-aware Point Decoder**, and **Pivotal Point Predictor**. It takes the RGB images as inputs and generates flexible and compact vectorized representation without any post-processing.
13 |
14 |
15 |
16 | ## Documentation
17 |
18 | We build the released version of **PivotNet** upon [BeMapNet](https://github.com/er-muyue/BeMapNet/tree/main) project. Therefore, this project supports the reproduction of **both** PivotNet and BeMapNet.
19 |
20 |
21 | Step-by-step Installation
22 | <\br>
23 |
24 | - **a. Check Environment**
25 | ```shell
26 | Python >= 3.8
27 | CUDA 11.1
28 | # other versions of python/cuda have not been fully tested, but I think they should work as well.
29 | ```
30 |
31 | - **b. Create a conda virtual environment and activate it. (Optional)**
32 | ```shell
33 | conda create -n pivotnet python=3.8 -y
34 | conda activate pivotnet
35 | ```
36 |
37 | - **c. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/get-started/previous-versions/).**
38 | ```shell
39 | pip3 install torch==1.10.1+cu111 torchvision==0.11.2+cu111 -f https://download.pytorch.org/whl/torch_stable.html
40 | ```
41 |
42 | - **d. Install MMCV following the [official instructions](https://github.com/open-mmlab/mmcv). (need GPU)**
43 | ```shell
44 | pip3 install -U openmim
45 | mim install mmcv==1.7.1
46 | ```
47 |
48 | - **e. Install Detectron2 following the [official instructions](https://detectron2.readthedocs.io/en/latest/tutorials/install.html).**
49 | ```shell
50 | python3 -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html
51 | ```
52 |
53 | - **f. Compiling CUDA operators for Deformable Transformer.**
54 | ```shell
55 | cd mapmaster/models/bev_decoder/deform_transformer/ops
56 | bash make.sh
57 | ```
58 |
59 | - **g. Install PivotNet.**
60 | ```shell
61 | git clone git@github.com:wenjie710/PivotNet.git
62 | cd pivotnet
63 | pip3 install -r requirement.txt
64 | ```
65 |
66 |
67 |
68 |
69 |
70 | Material Preparation
71 | <\br>
72 |
73 | - **a. Data: NuScenes**
74 | - Download&Unzip the [NuScenes](https://www.nuscenes.org/nuscenes#download) dataset into your server and link it to desirable path.
75 | ```shell
76 | cd /path/to/pivotnet
77 | mkdir data
78 | ln -s /any/path/to/your/nuscenes data/nuscenes
79 | ```
80 | - Generate Pivot/Bezier-annotations from NuScenes's raw-annotations.
81 | ```shell
82 | cd /path/to/pivotnet
83 | python3 tools/anno_converter/nuscenes/convert.py -d ./data # generate pivot-annotation only
84 | OR python3 tools/anno_converter/nuscenes/convert.py -d ./data --bezier # generate both pivot and bezier annotatation (more time needed)
85 | ```
86 |
87 | - **b. Weights: Public-Pretrain-Models**
88 | - Download public pretrain-weights as backbone initialization.
89 | ```shell
90 | cd /path/to/pivotnet
91 | cd assets/weights
92 | wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/efficientnet-b0-355c32eb.pth .
93 | wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/resnet50-0676ba61.pth .
94 | wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/upernet_swin_tiny_patch4_window7_512x512.pth .
95 | ```
96 |
97 | - **c. Check: Project-Structure**
98 | - Your project directory should be,
99 | ```shell
100 | assets
101 | | -- weights (resnet, swin-t, efficient-b0, ...)
102 | | --
103 | mapmaster
104 | configs
105 | data
106 | | -- nuscenes
107 | | -- samples (CAM_FRONT, CAM_FRONT_LEFT, CAM_FRONT_RIGHT, ...)
108 | | -- annotations
109 | | -- v1.0-trainval
110 | | -- ...
111 | | -- customer
112 | | -- pivot-bezier
113 | | -- *.npz
114 | tools
115 | ```
116 |
117 |
118 |
119 |
120 |
121 | Training and Evluation
122 | <\br>
123 |
124 | - **a. Model Training**
125 | ```shell
126 | bash run.sh train pivotnet_nuscenes_swint 30 # pivotnet, default: 8GPUs, bs=1, epochs=30
127 | bash run.sh train bemapnet_nuscenes_swint 30 # bemapnet, default: 8GPUs, bs=1, epochs=30
128 | ```
129 |
130 | - **b. Model Evaluation**
131 | ```shell
132 | bash run.sh test pivotnet_nuscenes_swint ${checkpoint-path} # for pivotnet
133 | bash run.sh test bemapnet_nuscenes_swint ${checkpoint-path} # for bemapnet
134 | ```
135 |
136 | - **c. Reproduce with one command**
137 | ```shell
138 | bash run.sh reproduce pivotnet_nuscenes_swint # for pivotnet
139 | bash run.sh reproduce bemapnet_nuscenes_swint # for bemapnet
140 | ```
141 |
142 |
143 | ## Models & Results
144 |
145 |
146 |
147 | Results on NuScenes Val Set
148 | <\br>
149 |
150 | - **a. Easy-Setting --> AP-threshold is `0.5m, 1.0m, 1.5m` (same as [VectorMapNet](https://arxiv.org/abs/2206.08920.pdf) / [MapTR](https://arxiv.org/abs/2208.14437.pdf))**
151 |
152 | | Model | Config | Schd | mAPdivider | mAPpedcross |mAPboundary | mAPavg | Download |
153 | | :---: | :---: | :---: | :---: | :---:|:---:| :---: | :---: |
154 | |PivotNet-Effb0| [config](configs/pivotnet_nuscenes_effb0.py) | 30ep | 59.3 | 54.1 | 60.0 | 57.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30_train.log)|
155 | |PivotNet-Res50| [config](configs/pivotnet_nuscenes_res50.py) | 30ep | 58.0 | 53.5 | 59.7 | 57.1 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30_train.log)|
156 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 30ep | 62.9 | 57.9 | 64.0 | 61.6 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30_train.log)|
157 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 110ep | 67.8 | 62.1 | 69.2 | 66.4 | [model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110_train.log) |
158 |
159 |
160 | - **b. Hard-Setting --> AP-threshold is `0.2m, 0.5m, 1.0m` (Recommended as a more practical HD map evaluation protocol)**
161 |
162 | | Model | Config | Schd | mAPdivider | mAPpedcross |mAPboundary | mAPavg | Download |
163 | | :---: | :---: | :---: | :---: | :---:|:---:| :---: | :---: |
164 | |PivotNet-Effb0| [config](configs/pivotnet_nuscenes_effb0.py) | 30ep | 44.0 | 35.9 | 39.7 | 39.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30_train.log) |
165 | |PivotNet-Res50| [config](configs/pivotnet_nuscenes_res50.py) | 30ep | 43.5 | 35.6 | 40.4 | 39.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30_train.log) |
166 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 30ep | 47.7 | 39.4 | 43.7 | 43.6 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30_train.log) |
167 | |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 110ep | 54.1 | 43.3 | 50.3 | 49.3 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110_train.log) |
168 |
169 |
170 |
171 |
172 | #
173 | ## Citation
174 | If you find PivotNet/BeMapNet/MachMap is useful in your research or applications, please consider giving us a star :star: and citing them by the following BibTeX entries:
175 | ```
176 | @inproceedings{ding2023pivotnet,
177 | title={Pivotnet: Vectorized pivot learning for end-to-end hd map construction},
178 | author={Ding, Wenjie and Qiao, Limeng and Qiu, Xi and Zhang, Chi},
179 | booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
180 | pages={3672--3682},
181 | year={2023}
182 | }
183 |
184 | @InProceedings{Qiao_2023_CVPR,
185 | author = {Qiao, Limeng and Ding, Wenjie and Qiu, Xi and Zhang, Chi},
186 | title = {End-to-End Vectorized HD-Map Construction With Piecewise Bezier Curve},
187 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
188 | month = {June},
189 | year = {2023},
190 | pages = {13218-13228}
191 | }
192 |
193 | @article{qiao2023machmap,
194 | author={Limeng Qiao and Yongchao Zheng and Peng Zhang and Wenjie Ding and Xi Qiu and Xing Wei and Chi Zhang},
195 | title={MachMap: End-to-End Vectorized Solution for Compact HD-Map Construction},
196 | journal={arXiv preprint arXiv:2306.10301},
197 | year={2023},
198 | }
199 |
200 | ```
201 |
--------------------------------------------------------------------------------
/assets/figures/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/arch.png
--------------------------------------------------------------------------------
/assets/figures/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/demo.gif
--------------------------------------------------------------------------------
/assets/figures/pivot-title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivot-title.png
--------------------------------------------------------------------------------
/assets/figures/pivotnet-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivotnet-arch.png
--------------------------------------------------------------------------------
/assets/figures/pivotnet-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivotnet-logo.png
--------------------------------------------------------------------------------
/assets/figures/title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/title.png
--------------------------------------------------------------------------------
/assets/weights/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/weights/README.md
--------------------------------------------------------------------------------
/configs/pivotnet_nuscenes_effb0.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mapmaster.engine.core import MapMasterCli
3 | from pivotnet_nuscenes_swint import EXPConfig, Exp
4 |
5 | EXPConfig.model_setup["im_backbone"] = dict(
6 | arch_name="efficient_net",
7 | ret_layers=2,
8 | fpn_kwargs=None,
9 | bkb_kwargs=dict(
10 | model_name='efficientnet-b0',
11 | in_channels=3,
12 | out_stride=32,
13 | with_head=False,
14 | with_cp=True,
15 | norm_layer=nn.SyncBatchNorm,
16 | weights_path="assets/weights/efficientnet-b0-355c32eb.pth",
17 | ),
18 | )
19 |
20 | EXPConfig.model_setup['bev_decoder']["net_kwargs"].update(
21 | dict(
22 | in_channels=[112, 320],
23 | )
24 | )
25 |
26 | class ExpDev(Exp):
27 | def __init__(self, batch_size_per_device=1, total_devices=8, max_epoch=60, **kwargs):
28 | super(ExpDev, self).__init__(batch_size_per_device, total_devices, max_epoch, **kwargs)
29 | self.exp_config = EXPConfig()
30 |
31 | if __name__ == "__main__":
32 | MapMasterCli(ExpDev).run()
33 |
34 |
--------------------------------------------------------------------------------
/configs/pivotnet_nuscenes_res50.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mapmaster.engine.core import MapMasterCli
3 | from pivotnet_nuscenes_swint import EXPConfig, Exp
4 |
5 |
6 | EXPConfig.model_setup["im_backbone"] = dict(
7 | arch_name="resnet",
8 | ret_layers=2,
9 | fpn_kwargs=None,
10 | bkb_kwargs=dict(
11 | depth=50,
12 | num_stages=4,
13 | out_indices=(2, 3),
14 | frozen_stages=-1, # do not freeze any layers
15 | norm_cfg=dict(type='SyncBN', requires_grad=True),
16 | norm_eval=True,
17 | style='pytorch',
18 | init_cfg=dict(
19 | type='Pretrained',
20 | checkpoint='assets/weights/resnet50-0676ba61.pth'), # from pytorch
21 | with_cp=True,
22 | ),
23 | )
24 |
25 | EXPConfig.model_setup['bev_decoder']["net_kwargs"].update(
26 | dict(
27 | in_channels=[1024, 2048],
28 | )
29 | )
30 |
31 | class ExpDev(Exp):
32 | def __init__(self, batch_size_per_device=1, total_devices=8, max_epoch=60, **kwargs):
33 | super(ExpDev, self).__init__(batch_size_per_device, total_devices, max_epoch, **kwargs)
34 | self.exp_config = EXPConfig()
35 |
36 | if __name__ == "__main__":
37 | MapMasterCli(ExpDev).run()
38 |
39 |
--------------------------------------------------------------------------------
/mapmaster/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/mapmaster/__init__.py
--------------------------------------------------------------------------------
/mapmaster/dataset/nuscenes_bemapnet.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import numpy as np
4 | from PIL import Image
5 | from copy import deepcopy
6 | from skimage import io as skimage_io
7 | from torch.utils.data import Dataset
8 |
9 |
10 | class NuScenesMapDataset(Dataset):
11 | def __init__(self, img_key_list, map_conf, ida_conf, bezier_conf, transforms, data_split="training"):
12 | super().__init__()
13 | self.img_key_list = img_key_list
14 | self.map_conf = map_conf
15 | self.ida_conf = ida_conf
16 | self.bez_conf = bezier_conf
17 | self.ego_size = map_conf["ego_size"]
18 | self.mask_key = map_conf["mask_key"]
19 | self.nusc_root = map_conf["nusc_root"]
20 | self.anno_root = map_conf["anno_root"]
21 | self.split_dir = map_conf["split_dir"]
22 | self.num_degree = bezier_conf["num_degree"]
23 | self.max_pieces = bezier_conf["max_pieces"]
24 | self.max_instances = bezier_conf["max_instances"]
25 | self.split_mode = 'train' if data_split == "training" else 'val'
26 | split_path = os.path.join(self.split_dir, f'{self.split_mode}.txt')
27 | self.tokens = [token.strip() for token in open(split_path).readlines()]
28 | self.transforms = transforms
29 |
30 | def __getitem__(self, idx: int):
31 | token = self.tokens[idx]
32 | sample = np.load(os.path.join(self.anno_root, f'{token}.npz'), allow_pickle=True)
33 | resize_dims, crop, flip, rotate = self.sample_ida_augmentation()
34 | images, ida_mats = [], []
35 | for im_view in self.img_key_list:
36 | for im_path in sample['image_paths']:
37 | if im_path.startswith(f'samples/{im_view}/'):
38 | im_path = os.path.join(self.nusc_root, im_path)
39 | img = skimage_io.imread(im_path)
40 | img, ida_mat = self.img_transform(img, resize_dims, crop, flip, rotate)
41 | images.append(img)
42 | ida_mats.append(ida_mat)
43 | extrinsic = np.stack([np.eye(4) for _ in range(sample["trans"].shape[0])], axis=0)
44 | extrinsic[:, :3, :3] = sample["rots"]
45 | extrinsic[:, :3, 3] = sample["trans"]
46 | intrinsic = sample['intrins']
47 | ctr_points = np.zeros((self.max_instances, max(self.max_pieces) * max(self.num_degree) + 1, 2), dtype=np.float)
48 | ins_labels = np.zeros((self.max_instances, 3), dtype=np.int16) - 1
49 | for ins_id, ctr_info in enumerate(sample['ctr_points']):
50 | cls_id = int(ctr_info['type'])
51 | ctr_pts_raw = np.array(ctr_info['pts'])
52 | max_points = self.max_pieces[cls_id] * self.num_degree[cls_id] + 1
53 | num_points = max_points if max_points <= ctr_pts_raw.shape[0] else ctr_pts_raw.shape[0]
54 | assert num_points >= self.num_degree[cls_id] + 1
55 | ctr_points[ins_id][:num_points] = np.array(ctr_pts_raw[:num_points])
56 | ins_labels[ins_id] = [cls_id, (num_points - 1) // self.num_degree[cls_id] - 1, num_points]
57 | masks = sample[self.mask_key]
58 | if flip:
59 | new_order = [2, 1, 0, 5, 4, 3]
60 | img_key_list = [self.img_key_list[i] for i in new_order]
61 | images = [images[i] for i in new_order]
62 | ida_mats = [ida_mats[i] for i in new_order]
63 | extrinsic = [extrinsic[i] for i in new_order]
64 | intrinsic = [intrinsic[i] for i in new_order]
65 | masks = [np.flip(mask, axis=1) for mask in masks]
66 | ctr_points = self.point_flip(ctr_points, ins_labels, self.ego_size)
67 | item = dict(
68 | images=images, targets=dict(masks=masks, points=ctr_points, labels=ins_labels),
69 | extrinsic=np.stack(extrinsic), intrinsic=np.stack(intrinsic), ida_mats=np.stack(ida_mats),
70 | extra_infos=dict(token=token, img_key_list=self.img_key_list, map_size=self.ego_size, do_flip=flip)
71 | )
72 | if self.transforms is not None:
73 | item = self.transforms(item)
74 | return item
75 |
76 | def __len__(self):
77 | return len(self.tokens)
78 |
79 | def sample_ida_augmentation(self):
80 | """Generate ida augmentation values based on ida_config."""
81 | resize_dims = w, h = self.ida_conf["resize_dims"]
82 | crop = (0, 0, w, h)
83 | if self.ida_conf["up_crop_ratio"] > 0:
84 | crop = (0, int(self.ida_conf["up_crop_ratio"] * h), w, h)
85 | flip, color, rotate_ida = False, False, 0
86 | if self.split_mode == "train":
87 | if self.ida_conf["rand_flip"] and np.random.choice([0, 1]):
88 | flip = True
89 | if self.ida_conf["rot_lim"]:
90 | assert isinstance(self.ida_conf["rot_lim"], (tuple, list))
91 | rotate_ida = np.random.uniform(*self.ida_conf["rot_lim"])
92 | return resize_dims, crop, flip, rotate_ida
93 |
94 | def img_transform(self, img, resize_dims, crop, flip, rotate):
95 | img = Image.fromarray(img)
96 | ida_rot = torch.eye(2)
97 | ida_tran = torch.zeros(2)
98 | W, H = img.size
99 | img = img.resize(resize_dims)
100 | img = img.crop(crop)
101 | if flip:
102 | img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
103 | img = img.rotate(rotate)
104 |
105 | # post-homography transformation
106 | scales = torch.tensor([resize_dims[0] / W, resize_dims[1] / H])
107 | ida_rot *= torch.Tensor(scales)
108 | ida_tran -= torch.Tensor(crop[:2])
109 | if flip:
110 | A = torch.Tensor([[-1, 0], [0, 1]])
111 | b = torch.Tensor([crop[2] - crop[0], 0])
112 | ida_rot = A.matmul(ida_rot)
113 | ida_tran = A.matmul(ida_tran) + b
114 | A = self.get_rot(rotate / 180 * np.pi)
115 | b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
116 | b = A.matmul(-b) + b
117 | ida_rot = A.matmul(ida_rot)
118 | ida_tran = A.matmul(ida_tran) + b
119 | ida_mat = ida_rot.new_zeros(3, 3)
120 | ida_mat[2, 2] = 1
121 | ida_mat[:2, :2] = ida_rot
122 | ida_mat[:2, 2] = ida_tran
123 | return np.asarray(img), ida_mat
124 |
125 | @staticmethod
126 | def point_flip(points, labels, map_shape):
127 |
128 | def _flip(pts):
129 | pts[:, 0] = map_shape[1] - pts[:, 0]
130 | return pts.copy()
131 |
132 | points_ret = deepcopy(points)
133 | for ins_id in range(points.shape[0]):
134 | end = labels[ins_id, 2]
135 | points_ret[ins_id][:end] = _flip(points[ins_id][:end])
136 |
137 | return points_ret
138 |
139 | @staticmethod
140 | def get_rot(h):
141 | return torch.Tensor([[np.cos(h), np.sin(h)], [-np.sin(h), np.cos(h)]])
142 |
--------------------------------------------------------------------------------
/mapmaster/dataset/nuscenes_pivotnet.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pickle as pkl
4 | from PIL import Image
5 | from torch.utils.data import Dataset
6 |
7 | class NuScenesMapDataset(Dataset):
8 | def __init__(self, img_key_list, map_conf, transforms, data_split="training"):
9 | super().__init__()
10 | self.img_key_list = img_key_list
11 | self.map_conf = map_conf
12 |
13 | self.ego_size = map_conf["ego_size"]
14 | self.mask_key = map_conf["mask_key"]
15 | self.nusc_root = map_conf["nusc_root"]
16 | self.anno_root = map_conf["anno_root"]
17 | self.split_dir = map_conf["split_dir"] # instance_mask/instance_mask8
18 |
19 | self.split_mode = 'train' if data_split == "training" else 'val'
20 | split_path = os.path.join(self.split_dir, f'{self.split_mode}.txt')
21 | self.tokens = [token.strip() for token in open(split_path).readlines()]
22 | self.transforms = transforms
23 |
24 | def __getitem__(self, idx: int):
25 | token = self.tokens[idx]
26 | sample = np.load(os.path.join(self.anno_root, f'{token}.npz'), allow_pickle=True)
27 | # images
28 | images = []
29 | for im_view in self.img_key_list:
30 | for im_path in sample['image_paths']:
31 | if im_path.startswith(f'samples/{im_view}/'):
32 | im_path = os.path.join(self.nusc_root, im_path)
33 | img = np.asarray(Image.open(im_path))
34 | images.append(img)
35 | # pivot pts
36 | pivot_pts = sample["pivot_pts"].item()
37 | valid_length = sample["pivot_length"].item()
38 | # targets
39 | masks=sample[self.mask_key]
40 | targets = dict(masks=masks, points=pivot_pts, valid_len=valid_length)
41 | # pose
42 | extrinsic = np.stack([np.eye(4) for _ in range(sample["trans"].shape[0])], axis=0)
43 | extrinsic[:, :3, :3] = sample["rots"]
44 | extrinsic[:, :3, 3] = sample["trans"]
45 | intrinsic = sample['intrins']
46 | # transform
47 | item = dict(images=images, targets=targets,
48 | extra_infos=dict(token=token, map_size=self.ego_size),
49 | extrinsic=np.stack(extrinsic, axis=0), intrinsic=np.stack(intrinsic, axis=0))
50 | if self.transforms is not None:
51 | item = self.transforms(item)
52 |
53 | return item
54 |
55 | def __len__(self):
56 | return len(self.tokens)
57 |
--------------------------------------------------------------------------------
/mapmaster/dataset/sampler.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import itertools
3 | import torch.distributed as dist
4 | from typing import Optional
5 | from torch.utils.data.sampler import Sampler
6 |
7 |
8 | class InfiniteSampler(Sampler):
9 | """
10 | In training, we only care about the "infinite stream" of training data.
11 | So this sampler produces an infinite stream of indices and
12 | all workers cooperate to correctly shuffle the indices and sample different indices.
13 | The samplers in each worker effectively produces `indices[worker_id::num_workers]`
14 | where `indices` is an infinite stream of indices consisting of
15 | `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
16 | or `range(size) + range(size) + ...` (if shuffle is False)
17 | """
18 |
19 | def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = 0, rank=0, world_size=1, drop_last=False):
20 | """
21 | Args:
22 | size (int): the total number of data of the underlying dataset to sample from
23 | shuffle (bool): whether to shuffle the indices or not
24 | seed (int): the initial seed of the shuffle. Must be the same
25 | across all workers. If None, will use a random seed shared
26 | among workers (require synchronization among all workers).
27 | """
28 | self._size = size
29 | assert size > 0
30 | self._shuffle = shuffle
31 | self._seed = int(seed)
32 | self.drop_last = drop_last
33 |
34 | if dist.is_available() and dist.is_initialized():
35 | self._rank = dist.get_rank()
36 | self._world_size = dist.get_world_size()
37 | else:
38 | self._rank = rank
39 | self._world_size = world_size
40 |
41 | def set_epoch(self, epoch):
42 | pass
43 |
44 | def __iter__(self):
45 | start = self._rank
46 | yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
47 |
48 | def _infinite_indices(self):
49 | g = torch.Generator()
50 | g.manual_seed(self._seed)
51 | while True:
52 | if self._shuffle:
53 | yield from torch.randperm(self._size, generator=g).tolist()
54 | else:
55 | yield from list(range(self._size))
56 |
57 | def __len__(self):
58 | if self.drop_last:
59 | return self._size // self._world_size
60 | else:
61 | return (self._size + self._world_size - 1) // self._world_size
62 |
--------------------------------------------------------------------------------
/mapmaster/dataset/transform.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import mmcv
3 | import torch
4 | import numpy as np
5 | from PIL import Image
6 | from collections.abc import Sequence
7 |
8 | class Resize(object):
9 | def __init__(self, img_scale=None, backend="cv2", interpolation="bilinear"):
10 | self.size = img_scale
11 | self.backend = backend
12 | self.interpolation = interpolation
13 | self.cv2_interp_codes = {
14 | "nearest": cv2.INTER_NEAREST,
15 | "bilinear": cv2.INTER_LINEAR,
16 | "bicubic": cv2.INTER_CUBIC,
17 | "area": cv2.INTER_AREA,
18 | "lanczos": cv2.INTER_LANCZOS4,
19 | }
20 | self.pillow_interp_codes = {
21 | "nearest": Image.NEAREST,
22 | "bilinear": Image.BILINEAR,
23 | "bicubic": Image.BICUBIC,
24 | "box": Image.BOX,
25 | "lanczos": Image.LANCZOS,
26 | "hamming": Image.HAMMING,
27 | }
28 |
29 | def __call__(self, data_dict):
30 | """Call function to resize images.
31 |
32 | Args:
33 | data_dict (dict): Result dict from loading pipeline.
34 |
35 | Returns:
36 | dict: Resized data_dict, 'scale_factor' keys are added into result dict.
37 | """
38 |
39 | imgs = []
40 | for img in data_dict["images"]:
41 | img = self.im_resize(img, self.size, backend=self.backend)
42 | imgs.append(img)
43 | data_dict["images"] = imgs
44 |
45 | new_h, new_w = imgs[0].shape[:2]
46 | h, w = data_dict["images"][0].shape[:2]
47 | w_scale = new_w / w
48 | h_scale = new_h / h
49 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
50 | data_dict["extra_infos"].update({"scale_factor": scale_factor})
51 |
52 | return data_dict
53 |
54 | def im_resize(self, img, size, return_scale=False, interpolation="bilinear", out=None, backend="cv2"):
55 | """Resize image to a given size.
56 | Args:
57 | img (ndarray): The input image.
58 | size (tuple[int]): Target size (w, h).
59 | return_scale (bool): Whether to return `w_scale` and `h_scale`.
60 | interpolation (str): Interpolation method, accepted values are
61 | "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
62 | backend, "nearest", "bilinear" for 'pillow' backend.
63 | out (ndarray): The output destination.
64 | backend (str | None): The image resize backend type. Options are `cv2`,
65 | `pillow`, `None`.
66 | Returns:
67 | tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
68 | `resized_img`.
69 | """
70 | h, w = img.shape[:2]
71 | if backend not in ["cv2", "pillow"]:
72 | raise ValueError(
73 | f"backend: {backend} is not supported for resize." f"Supported backends are 'cv2', 'pillow'"
74 | )
75 |
76 | if backend == "pillow":
77 | assert img.dtype == np.uint8, "Pillow backend only support uint8 type"
78 | pil_image = Image.fromarray(img)
79 | pil_image = pil_image.resize(size, self.pillow_interp_codes[interpolation])
80 | resized_img = np.array(pil_image)
81 | else:
82 | resized_img = cv2.resize(img, size, dst=out, interpolation=self.cv2_interp_codes[interpolation])
83 | if not return_scale:
84 | return resized_img
85 | else:
86 | w_scale = size[0] / w
87 | h_scale = size[1] / h
88 | return resized_img, w_scale, h_scale
89 |
90 | class Normalize(object):
91 | """Normalize the image.
92 |
93 | Added key is "img_norm_cfg".
94 |
95 | Args:
96 | mean (sequence): Mean values of 3 channels.
97 | std (sequence): Std values of 3 channels.
98 | to_rgb (bool): Whether to convert the image from BGR to RGB,
99 | default is true.
100 | """
101 |
102 | def __init__(self, mean, std, to_rgb=True):
103 | self.mean = np.array(mean, dtype=np.float32)
104 | self.std = np.array(std, dtype=np.float32)
105 | self.to_rgb = to_rgb
106 |
107 | def __call__(self, data_dict):
108 | imgs = []
109 | for img in data_dict["images"]:
110 | if self.to_rgb:
111 | img = img.astype(np.float32) / 255.0
112 | img = self.im_normalize(img, self.mean, self.std, self.to_rgb)
113 | imgs.append(img)
114 | data_dict["images"] = imgs
115 | data_dict["extra_infos"]["img_norm_cfg"] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb)
116 | return data_dict
117 |
118 | @staticmethod
119 | def im_normalize(img, mean, std, to_rgb=True):
120 | img = img.copy().astype(np.float32)
121 | assert img.dtype != np.uint8 # cv2 inplace normalization does not accept uint8
122 | mean = np.float64(mean.reshape(1, -1))
123 | stdinv = 1 / np.float64(std.reshape(1, -1))
124 | if to_rgb:
125 | cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace
126 | cv2.subtract(img, mean, img) # inplace
127 | cv2.multiply(img, stdinv, img) # inplace
128 | return img
129 |
130 |
131 | class ToTensor(object):
132 | """Default formatting bundle."""
133 |
134 | def __call__(self, data_dict):
135 | """Call function to transform and format common fields in data_dict.
136 |
137 | Args:
138 | data_dict (dict): Data dict contains the data to convert.
139 |
140 | Returns:
141 | dict: The result dict contains the data that is formatted with default bundle.
142 | """
143 |
144 | for k in ["images", "extrinsic", "intrinsic", "ida_mats"]:
145 | if k == "images":
146 | data_dict[k] = np.stack([img.transpose(2, 0, 1) for img in data_dict[k]], axis=0)
147 | data_dict[k] = self.to_tensor(np.ascontiguousarray(data_dict[k]))
148 |
149 | for k in ["masks", "points", "labels"]:
150 | data_dict["targets"][k] = self.to_tensor(np.ascontiguousarray(data_dict["targets"][k]))
151 |
152 | return data_dict
153 |
154 | @staticmethod
155 | def to_tensor(data):
156 | if isinstance(data, torch.Tensor):
157 | return data
158 | elif isinstance(data, np.ndarray):
159 | return torch.from_numpy(data)
160 | elif isinstance(data, Sequence) and not mmcv.is_str(data):
161 | return torch.tensor(data)
162 | elif isinstance(data, int):
163 | return torch.LongTensor([data])
164 | elif isinstance(data, float):
165 | return torch.FloatTensor([data])
166 | else:
167 | raise TypeError(f"type {type(data)} cannot be converted to tensor.")
168 |
169 | class ToTensor_Pivot(object):
170 | """Default formatting bundle."""
171 |
172 | def __call__(self, data_dict):
173 | """Call function to transform and format common fields in data_dict.
174 |
175 | Args:
176 | data_dict (dict): Data dict contains the data to convert.
177 |
178 | Returns:
179 | dict: The result dict contains the data that is formatted with default bundle.
180 | """
181 | if "images" in data_dict:
182 | if isinstance(data_dict["images"], list):
183 | # process multiple imgs in single frame
184 | imgs = [img.transpose(2, 0, 1) for img in data_dict["images"]]
185 | imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
186 | data_dict["images"] = self.to_tensor(imgs)
187 | else:
188 | img = np.ascontiguousarray(data_dict["img"].transpose(2, 0, 1))
189 | data_dict["images"] = self.to_tensor(img)
190 |
191 | for k in ["masks"]:
192 | data_dict["targets"][k] = self.to_tensor(np.ascontiguousarray(data_dict["targets"][k]))
193 |
194 | return data_dict
195 |
196 | @staticmethod
197 | def to_tensor(data):
198 | """Convert objects of various python types to :obj:`torch.Tensor`.
199 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
200 | :class:`Sequence`, :class:`int` and :class:`float`.
201 | Args:
202 | data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
203 | be converted.
204 | """
205 |
206 | if isinstance(data, torch.Tensor):
207 | return data
208 | elif isinstance(data, np.ndarray):
209 | return torch.from_numpy(data)
210 | elif isinstance(data, Sequence) and not mmcv.is_str(data):
211 | return torch.tensor(data)
212 | elif isinstance(data, int):
213 | return torch.LongTensor([data])
214 | elif isinstance(data, float):
215 | return torch.FloatTensor([data])
216 | else:
217 | raise TypeError(f"type {type(data)} cannot be converted to tensor.")
218 |
219 |
220 |
221 | class Pad(object):
222 | """Pad the image & mask.
223 |
224 | There are two padding modes: (1) pad to a fixed size and (2) pad to the
225 | minimum size that is divisible by some number.
226 | Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
227 |
228 | Args:
229 | size (tuple, optional): Fixed padding size.
230 | size_divisor (int, optional): The divisor of padded size.
231 | pad_val (float, optional): Padding value, 0 by default.
232 | """
233 |
234 | def __init__(self, size_divisor=None, pad_val=0):
235 | self.size_divisor = size_divisor
236 | self.pad_val = pad_val
237 | # only one of size and size_divisor should be valid
238 | assert size_divisor is not None
239 |
240 | def __call__(self, data_dict):
241 | """Call function to pad images, masks, semantic segmentation maps.
242 |
243 | Args:
244 | data_dict (dict): Result dict from loading pipeline.
245 |
246 | Returns:
247 | dict: Updated result dict.
248 | """
249 | padded_img = None
250 | padded_imgs = []
251 | for img in data_dict["images"]:
252 | padded_img = self.im_pad_to_multiple(img, self.size_divisor, pad_val=self.pad_val)
253 | padded_imgs.append(padded_img)
254 | data_dict["images"] = padded_imgs
255 | data_dict["extra_infos"].update(
256 | {
257 | "pad_shape": padded_img.shape,
258 | "pad_size_divisor": self.size_divisor if self.size_divisor is not None else "None",
259 | }
260 | )
261 | return data_dict
262 |
263 | def im_pad_to_multiple(self, img, divisor, pad_val=0):
264 | """Pad an image to ensure each edge to be multiple to some number.
265 | Args:
266 | img (ndarray): Image to be padded.
267 | divisor (int): Padded image edges will be multiple to divisor.
268 | pad_val (Number | Sequence[Number]): Same as :func:`impad`.
269 | Returns:
270 | ndarray: The padded image.
271 | """
272 | pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
273 | pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
274 | return self.im_pad(img, shape=(pad_h, pad_w), pad_val=pad_val)
275 |
--------------------------------------------------------------------------------
/mapmaster/engine/core.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import argparse
4 | import datetime
5 | import warnings
6 | import subprocess
7 | from mapmaster.engine.executor import Trainer, BeMapNetEvaluator
8 | from mapmaster.engine.environ import ShareFSUUIDNameServer, RlaunchReplicaEnv
9 | from mapmaster.engine.callbacks import CheckPointLoader, CheckPointSaver, ClearMLCallback, ProgressBar
10 | from mapmaster.engine.callbacks import TensorBoardMonitor, TextMonitor, ClipGrad
11 | from mapmaster.utils.env import collect_env_info, get_root_dir
12 | from mapmaster.utils.misc import setup_logger, sanitize_filename, PyDecorator, all_gather_object
13 |
14 |
15 | __all__ = ["BaseCli", "BeMapNetCli"]
16 |
17 |
18 | class BaseCli:
19 | """Command line tools for any exp."""
20 |
21 | def __init__(self, Exp):
22 | """Make sure the order of initialization is: build_args --> build_env --> build_exp,
23 | since experiments depend on the environment and the environment depends on args.
24 |
25 | Args:
26 | Exp : experiment description class
27 | """
28 | self.ExpCls = Exp
29 | self.args = self._get_parser(Exp).parse_args()
30 | self.env = RlaunchReplicaEnv(self.args.sync_bn, self.args.devices, self.args.find_unused_parameters)
31 |
32 | @property
33 | def exp(self):
34 | if not hasattr(self, "_exp"):
35 | exp = self.ExpCls(
36 | **{x if y is not None else "none": y for (x, y) in vars(self.args).items()},
37 | total_devices=self.env.world_size(),
38 | )
39 | self.exp_updated_cfg_msg = exp.update_attr(self.args.exp_options)
40 | self._exp = exp
41 | return self._exp
42 |
43 | def _get_parser(self, Exp):
44 | parser = argparse.ArgumentParser()
45 | parser = Exp.add_argparse_args(parser)
46 | parser = self.add_argparse_args(parser)
47 | return parser
48 |
49 | @staticmethod
50 | def add_argparse_args(parser: argparse.ArgumentParser):
51 | parser.add_argument("--eval", dest="eval", action="store_true", help="conduct evaluation only")
52 | parser.add_argument("-te", "--train_and_eval", dest="train_and_eval", action="store_true", help="train+eval")
53 | parser.add_argument("--find_unused_parameters", dest="find_unused_parameters", action="store_true")
54 | parser.add_argument("-d", "--devices", default="0-7", type=str, help="device for training")
55 | parser.add_argument("--ckpt", type=str, default=None, help="checkpoint to start from or be evaluated")
56 | parser.add_argument("--pretrained_model", type=str, default=None, help="pretrained_model used by training")
57 | parser.add_argument("--sync_bn", type=int, default=0, help="0-> disable sync_bn, 1-> whole world")
58 | clearml_parser = parser.add_mutually_exclusive_group(required=False)
59 | clearml_parser.add_argument("--clearml", dest="clearml", action="store_true", help="enabel clearml for train")
60 | clearml_parser.add_argument("--no-clearml", dest="clearml", action="store_false", help="disable clearml")
61 | parser.set_defaults(clearml=True)
62 | return parser
63 |
64 | def _get_exp_output_dir(self):
65 | exp_dir = os.path.join(os.path.join(get_root_dir(), "outputs"), sanitize_filename(self.exp.exp_name))
66 | os.makedirs(exp_dir, exist_ok=True)
67 | output_dir = None
68 | if self.args.ckpt:
69 | output_dir = os.path.dirname(os.path.dirname(os.path.abspath(self.args.ckpt)))
70 | elif self.env.global_rank() == 0:
71 | output_dir = os.path.join(exp_dir, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"))
72 | os.makedirs(output_dir, exist_ok=True)
73 | # make a symlink "latest"
74 | symlink, symlink_tmp = os.path.join(exp_dir, "latest"), os.path.join(exp_dir, "latest_tmp")
75 | if os.path.exists(symlink_tmp):
76 | os.remove(symlink_tmp)
77 | os.symlink(os.path.relpath(output_dir, exp_dir), symlink_tmp)
78 | os.rename(symlink_tmp, symlink)
79 | output_dir = all_gather_object(output_dir)[0]
80 | return output_dir
81 |
82 | def get_evaluator(self, callbacks=None):
83 | exp = self.exp
84 | if self.args.ckpt is None:
85 | warnings.warn("No checkpoint is specified for evaluation")
86 | if exp.eval_executor_class is None:
87 | sys.exit("No evaluator is specified for evaluation")
88 |
89 | output_dir = self._get_exp_output_dir()
90 | logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="eval.log")
91 | self._set_basic_log_message(logger)
92 | if callbacks is None:
93 | callbacks = [self.env, CheckPointLoader(self.args.ckpt)]
94 | evaluator = exp.eval_executor_class(exp=exp, callbacks=callbacks, logger=logger)
95 | return evaluator
96 |
97 | def _set_basic_log_message(self, logger):
98 | logger.opt(ansi=True).info("Cli arguments:\n{}".format(self.args))
99 | logger.info(f"exp_name: {self.exp.exp_name}")
100 | logger.opt(ansi=True).info(
101 | "Used experiment configs:\n{}".format(self.exp.get_cfg_as_str())
102 | )
103 | if self.exp_updated_cfg_msg:
104 | logger.opt(ansi=True).info(
105 | "List of override configs:\n{}".format(self.exp_updated_cfg_msg)
106 | )
107 | logger.opt(ansi=True).info("Environment info:\n{}".format(collect_env_info()))
108 |
109 | def get_trainer(self, callbacks=None, evaluator=None):
110 | args = self.args
111 | exp = self.exp
112 | if evaluator is not None:
113 | output_dir = self.exp.output_dir
114 | else:
115 | output_dir = self._get_exp_output_dir()
116 |
117 | logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="train.log")
118 | self._set_basic_log_message(logger)
119 |
120 | if callbacks is None:
121 | callbacks = [
122 | self.env,
123 | ProgressBar(logger=logger),
124 | TextMonitor(interval=exp.print_interval),
125 | TensorBoardMonitor(os.path.join(output_dir, "tensorboard"), interval=exp.print_interval),
126 | CheckPointSaver(
127 | local_path=os.path.join(output_dir, "dump_model"),
128 | remote_path=exp.ckpt_oss_save_dir,
129 | save_interval=exp.dump_interval,
130 | num_keep_latest=exp.num_keep_latest_ckpt,
131 | ),
132 | ]
133 | if "grad_clip_value" in exp.__dict__:
134 | callbacks.append(ClipGrad(exp.grad_clip_value))
135 | if args.clearml:
136 | callbacks.append(ClearMLCallback())
137 | if args.ckpt:
138 | callbacks.append(CheckPointLoader(args.ckpt))
139 | if args.pretrained_model:
140 | callbacks.append(CheckPointLoader(args.pretrained_model, weight_only=True))
141 | callbacks.extend(exp.callbacks)
142 |
143 | trainer = Trainer(exp=exp, callbacks=callbacks, logger=logger, evaluator=evaluator)
144 | return trainer
145 |
146 | def executor(self):
147 | if self.args.eval:
148 | self.get_evaluator().eval()
149 | elif self.args.train_and_eval:
150 | evaluator = self.get_evaluator(callbacks=[])
151 | self.get_trainer(evaluator=evaluator).train()
152 | else:
153 | self.get_trainer().train()
154 |
155 | def dispatch(self, executor_func):
156 | is_master = self.env.global_rank() == 0
157 | with ShareFSUUIDNameServer(is_master) as ns:
158 | self.env.set_master_uri(ns)
159 | self.env.setup_nccl()
160 | if self.env.local_rank() == 0:
161 | command = sys.argv.copy()
162 | command[0] = os.path.abspath(command[0])
163 | command = [sys.executable] + command
164 | for local_rank in range(1, self.env.nr_gpus):
165 | env_copy = os.environ.copy()
166 | env_copy["LOCAL_RANK"] = f"{local_rank}"
167 | subprocess.Popen(command, env=env_copy)
168 | self.env.init_dist()
169 | executor_func()
170 |
171 | def run(self):
172 | self.dispatch(self.executor)
173 |
174 |
175 | class MapMasterCli(BaseCli):
176 | @PyDecorator.overrides(BaseCli)
177 | def get_evaluator(self, callbacks=None):
178 | exp = self.exp
179 |
180 | output_dir = self._get_exp_output_dir()
181 | self.exp.output_dir = output_dir
182 | logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="eval.log")
183 | self._set_basic_log_message(logger)
184 | if callbacks is None:
185 | callbacks = [
186 | self.env,
187 | CheckPointLoader(self.args.ckpt),
188 | ]
189 |
190 | evaluator = BeMapNetEvaluator(exp=exp, callbacks=callbacks, logger=logger)
191 | return evaluator
192 |
--------------------------------------------------------------------------------
/mapmaster/engine/environ.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import uuid
4 | import torch
5 | import subprocess
6 | import numpy as np
7 | from torch import nn
8 | from loguru import logger
9 | import torch.distributed as dist
10 | from mapmaster.utils.env import get_root_dir
11 | from mapmaster.utils.misc import parse_devices
12 | from mapmaster.engine.callbacks import Callback
13 |
14 |
15 | __all__ = ["ShareFSUUIDNameServer", "RlaunchReplicaEnv"]
16 | output_root_dir = os.path.join(get_root_dir(), "outputs")
17 |
18 |
19 | class ShareFSUUIDNameServer:
20 | def __init__(self, is_master):
21 | self.exp_id = self._get_exp_id()
22 | self.is_master = is_master
23 | os.makedirs(os.path.dirname(self.filepath), exist_ok=True)
24 |
25 | def _get_exp_id(self):
26 | if "DET3D_EXPID" not in os.environ:
27 | if int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) == 1:
28 | return str(uuid.uuid4())
29 | msg = """cannot find DET3D_EXPID in environ please use following
30 | command DET3D_EXPID=$(cat /proc/sys/kernel/random/uuid) rlaunch ...
31 | """
32 | logger.error(msg)
33 | raise RuntimeError
34 | return str(os.environ["DET3D_EXPID"])
35 |
36 | @property
37 | def filepath(self):
38 | return os.path.join(output_root_dir, f"master_ip_{self.exp_id}.txt")
39 |
40 | def __enter__(self):
41 | if self.is_master:
42 | self.set_master()
43 | return self
44 |
45 | def __exit__(self, exc_type, exc_value, exc_tb):
46 | if self.is_master:
47 | os.remove(self.filepath)
48 |
49 | def set_master(self):
50 | assert not os.path.exists(self.filepath)
51 | hostname = "Host"
52 | with open(self.filepath, "w") as f:
53 | f.write(hostname)
54 |
55 | def get_master(self):
56 | while True:
57 | if os.path.exists(self.filepath):
58 | with open(self.filepath, "r") as f:
59 | return f.read()
60 | else:
61 | time.sleep(5)
62 |
63 |
64 | class _DDPEnv(Callback):
65 | def __init__(self, sync_bn=0, devices=None, find_unused_parameters=False):
66 | if devices:
67 | devices = parse_devices(devices)
68 | os.environ["CUDA_VISIBLE_DEVICES"] = devices
69 | self.nr_gpus = torch.cuda.device_count()
70 | self.sync_bn = sync_bn
71 | self.find_unused_parameters = find_unused_parameters
72 |
73 | @staticmethod
74 | def setup_nccl():
75 | ifname = filter(lambda x: x not in ("lo",), os.listdir("/sys/class/net/"))
76 | os.environ["NCCL_SOCKET_IFNAME"] = ",".join(ifname)
77 | os.environ["NCCL_IB_DISABLE"] = "1"
78 |
79 | # os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL"
80 | os.environ["NCCL_IB_HCA"] = subprocess.getoutput(
81 | "cd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; "
82 | "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null "
83 | "| grep v >/dev/null && echo $i ; done; > /dev/null"
84 | )
85 | os.environ["NCCL_IB_GID_INDEX"] = "3"
86 | os.environ["NCCL_IB_TC"] = "106"
87 |
88 | def after_init(self, trainer):
89 | trainer.model.cuda()
90 | if int(self.sync_bn) > 1:
91 | ranks = np.arange(self.world_size()).reshape(-1, self.sync_bn)
92 | process_groups = [torch.distributed.new_group(list(pids)) for pids in ranks]
93 | trainer.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
94 | trainer.model, process_groups[self.global_rank() // self.sync_bn]
95 | )
96 | elif int(self.sync_bn) == 1:
97 | trainer.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(trainer.model)
98 | trainer.model = nn.parallel.DistributedDataParallel(
99 | trainer.model, device_ids=[self.local_rank()], find_unused_parameters=self.find_unused_parameters
100 | )
101 |
102 | def cleanup(self):
103 | dist.destroy_process_group()
104 |
105 | def init_dist(self):
106 | torch.cuda.set_device(self.local_rank())
107 | dist.init_process_group(
108 | backend="nccl",
109 | init_method=self._master_uri,
110 | rank=self.global_rank(),
111 | world_size=self.world_size(),
112 | )
113 | dist.barrier()
114 |
115 |
116 | class RlaunchReplicaEnv(_DDPEnv):
117 | def __init__(self, sync_bn=0, devices=None, find_unused_parameters=False):
118 | super().__init__(sync_bn, devices, find_unused_parameters)
119 |
120 | def set_master_uri(self, ns):
121 | self._master_uri = f"tcp://{self.master_address(ns)}:{self.master_port()}"
122 | logger.info(self._master_uri)
123 |
124 | @staticmethod
125 | def is_brainpp_mm_env():
126 | return int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) > 1
127 |
128 | def master_address(self, ns) -> str:
129 | if self.node_rank() == 0:
130 | root_node = "localhost"
131 | else:
132 | root_node = ns.get_master()
133 | os.environ["MASTER_ADDR"] = root_node
134 | return root_node
135 |
136 | def master_port(self) -> int:
137 | port = os.environ.get("MASTER_PORT", 12345)
138 | os.environ["MASTER_PORT"] = str(port)
139 | return int(port)
140 |
141 | def world_size(self) -> int:
142 | return int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) * int(self.nr_gpus)
143 |
144 | def global_rank(self) -> int:
145 | return int(self.nr_gpus) * self.node_rank() + self.local_rank()
146 |
147 | def local_rank(self) -> int:
148 | return int(os.environ.get("LOCAL_RANK", 0))
149 |
150 | def node_rank(self) -> int:
151 | return int(os.environ.get("RLAUNCH_REPLICA", 0))
152 |
--------------------------------------------------------------------------------
/mapmaster/engine/executor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from tqdm import tqdm
4 | from typing import Sequence
5 | from mapmaster.engine.experiment import BaseExp
6 | from mapmaster.utils.misc import get_rank, synchronize
7 |
8 |
9 | __all__ = ["Callback", "BaseExecutor", "Trainer", "BeMapNetEvaluator"]
10 |
11 |
12 | class Callback:
13 |
14 | # callback enabled rank list
15 | # None means callback is always enabled
16 | enabled_rank = None
17 |
18 | def setup(self, executor):
19 | pass
20 |
21 | def load_checkpoint(self, executor):
22 | pass
23 |
24 | def after_init(self, executor):
25 | pass
26 |
27 | def before_train(self, executor):
28 | pass
29 |
30 | def before_epoch(self, executor, epoch: int):
31 | pass
32 |
33 | def before_step(self, executor, step, data_dict):
34 | pass
35 |
36 | def before_backward(self, executor):
37 | pass
38 |
39 | def before_optimize(self, executor):
40 | pass
41 |
42 | def after_step(self, executor, step, data_dict, *args, **kwargs):
43 | pass
44 |
45 | def after_epoch(self, executor, epoch: int, update_best_ckpt: bool = False):
46 | pass
47 |
48 | def after_train(self, executor):
49 | pass
50 |
51 |
52 | class BaseExecutor:
53 | def __init__(self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None) -> None:
54 | self.exp = exp
55 | self.logger = logger
56 | self.callbacks = callbacks
57 | self._invoke_callback("setup")
58 |
59 | self.epoch = 0
60 | self.global_step = 0
61 | self._invoke_callback("load_checkpoint")
62 | self._invoke_callback("after_init")
63 |
64 | @property
65 | def train_dataloader(self):
66 | return self.exp.train_dataloader
67 |
68 | @property
69 | def val_dataloader(self):
70 | return self.exp.val_dataloader
71 |
72 | @property
73 | def model(self):
74 | return self.exp.model
75 |
76 | @model.setter
77 | def model(self, value):
78 | self.exp.model = value
79 |
80 | @property
81 | def optimizer(self):
82 | return self.exp.optimizer
83 |
84 | @property
85 | def lr_scheduler(self):
86 | return self.exp.lr_scheduler
87 |
88 | def _invoke_callback(self, callback_name, *args, **kwargs):
89 | for cb in self.callbacks:
90 | if cb.enabled_rank is None or self.global_rank in cb.enabled_rank:
91 | func = getattr(cb, callback_name, None)
92 | if func:
93 | func(self, *args, **kwargs)
94 |
95 | @property
96 | def global_rank(self):
97 | return get_rank()
98 |
99 |
100 | class Trainer(BaseExecutor):
101 | def __init__(
102 | self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None, use_amp=False, evaluator=None
103 | ) -> None:
104 | super(Trainer, self).__init__(exp, callbacks, logger)
105 | self.use_amp = use_amp
106 | self.evaluator = evaluator
107 | if self.use_amp:
108 | self.grad_scaler = torch.cuda.amp.GradScaler()
109 |
110 | def train(self):
111 | self.train_iter = iter(self.train_dataloader)
112 | self._invoke_callback("before_train")
113 | self.model.cuda()
114 | self.model.train()
115 | self.optimizer_to(self.optimizer, next(self.model.parameters()).device)
116 | start_epoch = self.epoch
117 | for epoch in range(start_epoch, self.exp.max_epoch):
118 | self.epoch = epoch
119 | self.model.train()
120 | self.train_epoch(epoch)
121 | self._invoke_callback("after_train")
122 |
123 | def train_epoch(self, epoch):
124 | self._invoke_callback("before_epoch", epoch)
125 | sampler = self.train_dataloader.sampler
126 | if hasattr(sampler, "set_epoch"):
127 | sampler.set_epoch(epoch)
128 | for step in range(len(self.train_dataloader)):
129 | try:
130 | data = next(self.train_iter)
131 | except StopIteration:
132 | self.train_iter = iter(self.train_dataloader)
133 | data = next(self.train_iter)
134 | self.train_step(data, step)
135 | if self.evaluator is not None:
136 | self.evaluator.eval()
137 | self._invoke_callback("after_epoch", epoch, update_best_ckpt=False)
138 |
139 | def train_step(self, data, step):
140 | self._invoke_callback("before_step", step, data)
141 | self.lr_scheduler.step(self.global_step)
142 | self.model.train()
143 | self.optimizer.zero_grad()
144 | if not self.use_amp:
145 | ret = self.exp.training_step(data)
146 | else:
147 | with torch.cuda.amp.autocast():
148 | ret = self.exp.training_step(data)
149 | if isinstance(ret, torch.Tensor):
150 | loss = ret
151 | ext_dict = None
152 | elif isinstance(ret, tuple):
153 | loss, ext_dict = ret
154 | ext_dict = {k: v.detach() if isinstance(v, torch.Tensor) else v for k, v in ext_dict.items()}
155 | else:
156 | raise TypeError
157 | self._invoke_callback("before_backward")
158 | if not self.use_amp:
159 | loss.backward()
160 | self._invoke_callback("before_optimize")
161 | self.optimizer.step()
162 | else:
163 | self.grad_scaler.scale(loss).backward()
164 | self.grad_scaler.unscale_(self.optimizer) # NOTE: grads are unscaled before "before_optimize" callbacks
165 | self._invoke_callback("before_optimize")
166 | self.grad_scaler.step(self.optimizer)
167 | self.grad_scaler.update()
168 | self._invoke_callback("after_step", step, data, loss=loss.detach(), extra=ext_dict)
169 | self.global_step += 1
170 |
171 | # refer to: https://github.com/pytorch/pytorch/issues/8741
172 | @staticmethod
173 | def optimizer_to(optim, device):
174 | for param in optim.state.values():
175 | # Not sure there are any global tensors in the state dict
176 | if isinstance(param, torch.Tensor):
177 | param.data = param.data.to(device)
178 | if param._grad is not None:
179 | param._grad.data = param._grad.data.to(device)
180 | elif isinstance(param, dict):
181 | for subparam in param.values():
182 | if isinstance(subparam, torch.Tensor):
183 | subparam.data = subparam.data.to(device)
184 | if subparam._grad is not None:
185 | subparam._grad.data = subparam._grad.data.to(device)
186 |
187 |
188 | class BeMapNetEvaluator(BaseExecutor):
189 | def __init__(self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None) -> None:
190 | super(BeMapNetEvaluator, self).__init__(exp, callbacks, logger)
191 |
192 | def eval(self, ckpt_name=None):
193 |
194 | exp = self.exp
195 | val_iter = iter(self.val_dataloader)
196 |
197 | self._invoke_callback("before_eval")
198 |
199 | if ckpt_name is not None:
200 | if get_rank() == 0:
201 | self.logger.info("Eval with best checkpoint!")
202 | path = os.path.join(exp.output_dir, 'dump_model', ckpt_name)
203 | checkpoint = torch.load(open(path, "rb"), map_location=torch.device("cpu"))
204 | self.model.load_state_dict(checkpoint["model_state"], strict=False)
205 |
206 | self.model.cuda()
207 | self.model.eval()
208 |
209 | for step in tqdm(range(len(self.val_dataloader))):
210 | batch_data = next(val_iter)
211 | with torch.no_grad():
212 | exp.test_step(batch_data)
213 | self._invoke_callback("after_step", step, {})
214 |
215 | synchronize()
216 |
217 | if get_rank() == 0:
218 | self.logger.info("Done with inference, start evaluation later!")
219 | gt_dir = exp.exp_config.map_conf['anno_root']
220 | dt_dir = exp.evaluation_save_dir
221 | val_txts = exp.exp_config.VAL_TXT
222 |
223 | for val_txt in val_txts:
224 | ap_table = "".join(os.popen(f"python3 tools/evaluation/eval.py {gt_dir} {dt_dir} {val_txt}").readlines())
225 | self.logger.info(" AP-Performance with HDMapNetAPI: \n" + val_txt + "\n" + ap_table)
226 |
227 | self._invoke_callback("after_eval")
228 |
--------------------------------------------------------------------------------
/mapmaster/engine/experiment.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import torch
4 | import functools
5 | import numpy as np
6 | from torch.nn import Module
7 | from tabulate import tabulate
8 | from abc import ABCMeta, abstractmethod
9 | from mapmaster.utils.misc import DictAction
10 |
11 |
12 | class BaseExp(metaclass=ABCMeta):
13 | """Basic class for any experiment in Perceptron.
14 |
15 | Args:
16 | batch_size_per_device (int):
17 | batch_size of each device
18 |
19 | total_devices (int):
20 | number of devices to use
21 |
22 | max_epoch (int):
23 | total training epochs, the reason why we need to give max_epoch
24 | is that lr_scheduler may need to be adapted according to max_epoch
25 | """
26 |
27 | def __init__(self, batch_size_per_device, total_devices, max_epoch):
28 | self._batch_size_per_device = batch_size_per_device
29 | self._max_epoch = max_epoch
30 | self._total_devices = total_devices
31 | # ----------------------------------------------- extra configure ------------------------- #
32 | self.seed = None
33 | self.exp_name = os.path.splitext(os.path.basename(sys.argv.copy()[0]))[0] # entrypoint filename as exp_name
34 | self.print_interval = 100
35 | self.dump_interval = 10
36 | self.eval_interval = 10
37 | self.num_keep_latest_ckpt = 10
38 | self.ckpt_oss_save_dir = None
39 | self.enable_tensorboard = False
40 | self.eval_executor_class = None
41 |
42 | @property
43 | def train_dataloader(self):
44 | if "_train_dataloader" not in self.__dict__:
45 | self._train_dataloader = self._configure_train_dataloader()
46 | return self._train_dataloader
47 |
48 | @property
49 | def val_dataloader(self):
50 | if "_val_dataloader" not in self.__dict__:
51 | self._val_dataloader = self._configure_val_dataloader()
52 | return self._val_dataloader
53 |
54 | @property
55 | def test_dataloader(self):
56 | if "_test_dataloader" not in self.__dict__:
57 | self._test_dataloader = self._configure_test_dataloader()
58 | return self._test_dataloader
59 |
60 | @property
61 | def model(self):
62 | if "_model" not in self.__dict__:
63 | self._model = self._configure_model()
64 | return self._model
65 |
66 | @model.setter
67 | def model(self, value):
68 | self._model = value
69 |
70 | @property
71 | def callbacks(self):
72 | if not hasattr(self, "_callbacks"):
73 | self._callbacks = self._configure_callbacks()
74 | return self._callbacks
75 |
76 | @property
77 | def optimizer(self):
78 | if "_optimizer" not in self.__dict__:
79 | self._optimizer = self._configure_optimizer()
80 | return self._optimizer
81 |
82 | @property
83 | def lr_scheduler(self):
84 | if "_lr_scheduler" not in self.__dict__:
85 | self._lr_scheduler = self._configure_lr_scheduler()
86 | return self._lr_scheduler
87 |
88 | @property
89 | def batch_size_per_device(self):
90 | return self._batch_size_per_device
91 |
92 | @property
93 | def max_epoch(self):
94 | return self._max_epoch
95 |
96 | @property
97 | def total_devices(self):
98 | return self._total_devices
99 |
100 | @abstractmethod
101 | def _configure_model(self) -> Module:
102 | pass
103 |
104 | @abstractmethod
105 | def _configure_train_dataloader(self):
106 | """"""
107 |
108 | def _configure_callbacks(self):
109 | return []
110 |
111 | @abstractmethod
112 | def _configure_val_dataloader(self):
113 | """"""
114 |
115 | @abstractmethod
116 | def _configure_test_dataloader(self):
117 | """"""
118 |
119 | def training_step(self, *args, **kwargs):
120 | pass
121 |
122 | @abstractmethod
123 | def _configure_optimizer(self) -> torch.optim.Optimizer:
124 | pass
125 |
126 | @abstractmethod
127 | def _configure_lr_scheduler(self, **kwargs):
128 | pass
129 |
130 | def update_attr(self, options: dict) -> str:
131 | if options is None:
132 | return ""
133 | assert isinstance(options, dict)
134 | msg = ""
135 | for k, v in options.items():
136 | if k in self.__dict__:
137 | old_v = self.__getattribute__(k)
138 | if not v == old_v:
139 | self.__setattr__(k, v)
140 | msg = "{}\n'{}' is overriden from '{}' to '{}'".format(msg, k, old_v, v)
141 | else:
142 | self.__setattr__(k, v)
143 | msg = "{}\n'{}' is set to '{}'".format(msg, k, v)
144 |
145 | # update exp_name
146 | exp_name_suffix = "-".join(sorted([f"{k}-{v}" for k, v in options.items()]))
147 | self.exp_name = f"{self.exp_name}--{exp_name_suffix}"
148 | return msg
149 |
150 | def get_cfg_as_str(self) -> str:
151 | config_table = []
152 | for c, v in self.__dict__.items():
153 | if not isinstance(v, (int, float, str, list, tuple, dict, np.ndarray)):
154 | if hasattr(v, "__name__"):
155 | v = v.__name__
156 | elif hasattr(v, "__class__"):
157 | v = v.__class__
158 | elif type(v) == functools.partial:
159 | v = v.func.__name__
160 | if c[0] == "_":
161 | c = c[1:]
162 | config_table.append((str(c), str(v)))
163 |
164 | headers = ["config key", "value"]
165 | config_table = tabulate(config_table, headers, tablefmt="plain")
166 | return config_table
167 |
168 | def __str__(self):
169 | return self.get_cfg_as_str()
170 |
171 | def to_onnx(self):
172 | pass
173 |
174 | @classmethod
175 | def add_argparse_args(cls, parser): # pragma: no-cover
176 | parser.add_argument(
177 | "--exp_options",
178 | nargs="+",
179 | action=DictAction,
180 | help="override some settings in the exp, the key-value pair in xxx=yyy format will be merged into exp. "
181 | 'If the value to be overwritten is a list, it should be like key="[a,b]" or key=a,b '
182 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
183 | "Note that the quotation marks are necessary and that no white space is allowed.",
184 | )
185 | parser.add_argument("-b", "--batch-size-per-device", type=int, default=None)
186 | parser.add_argument("-e", "--max-epoch", type=int, default=None)
187 | return parser
188 |
--------------------------------------------------------------------------------
/mapmaster/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .network import MapMaster
2 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import ResNetBackbone, EfficientNetBackbone, SwinTRBackbone
2 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/bifpn/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import BiFPN
2 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/bifpn/utils.py:
--------------------------------------------------------------------------------
1 | # Author: Zylo117
2 |
3 | import math
4 | import torch
5 | from torch import nn
6 | import torch.nn.functional as F
7 |
8 |
9 | class Swish(nn.Module):
10 | def forward(self, x):
11 | return x * torch.sigmoid(x)
12 |
13 |
14 | class Conv2dStaticSamePadding(nn.Module):
15 | """
16 | created by Zylo117
17 | The real keras/tensorflow conv2d with same padding
18 | """
19 |
20 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True, groups=1, dilation=1, **kwargs):
21 | super().__init__()
22 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, bias=bias, groups=groups)
23 | self.stride = self.conv.stride
24 | self.kernel_size = self.conv.kernel_size
25 | self.dilation = self.conv.dilation
26 |
27 | if isinstance(self.stride, int):
28 | self.stride = [self.stride] * 2
29 | elif len(self.stride) == 1:
30 | self.stride = [self.stride[0]] * 2
31 |
32 | if isinstance(self.kernel_size, int):
33 | self.kernel_size = [self.kernel_size] * 2
34 | elif len(self.kernel_size) == 1:
35 | self.kernel_size = [self.kernel_size[0]] * 2
36 |
37 | def forward(self, x):
38 | h, w = x.shape[-2:]
39 |
40 | extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1]
41 | extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0]
42 |
43 | left = extra_h // 2
44 | right = extra_h - left
45 | top = extra_v // 2
46 | bottom = extra_v - top
47 |
48 | x = F.pad(x, [left, right, top, bottom])
49 |
50 | x = self.conv(x)
51 | return x
52 |
53 |
54 | class MaxPool2dStaticSamePadding(nn.Module):
55 | """
56 | created by Zylo117
57 | The real keras/tensorflow MaxPool2d with same padding
58 | """
59 |
60 | def __init__(self, *args, **kwargs):
61 | super().__init__()
62 | self.pool = nn.MaxPool2d(*args, **kwargs)
63 | self.stride = self.pool.stride
64 | self.kernel_size = self.pool.kernel_size
65 |
66 | if isinstance(self.stride, int):
67 | self.stride = [self.stride] * 2
68 | elif len(self.stride) == 1:
69 | self.stride = [self.stride[0]] * 2
70 |
71 | if isinstance(self.kernel_size, int):
72 | self.kernel_size = [self.kernel_size] * 2
73 | elif len(self.kernel_size) == 1:
74 | self.kernel_size = [self.kernel_size[0]] * 2
75 |
76 | def forward(self, x):
77 | h, w = x.shape[-2:]
78 |
79 | extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1]
80 | extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0]
81 |
82 | left = extra_h // 2
83 | right = extra_h - left
84 | top = extra_v // 2
85 | bottom = extra_v - top
86 |
87 | x = F.pad(x, [left, right, top, bottom])
88 |
89 | x = self.pool(x)
90 | return x
91 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/efficientnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import EfficientNet
2 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from mapmaster.models.backbone.resnet import ResNet
5 | from mapmaster.models.backbone.efficientnet import EfficientNet
6 | from mapmaster.models.backbone.swin_transformer import SwinTransformer
7 | from mapmaster.models.backbone.bifpn import BiFPN
8 |
9 |
10 | class ResNetBackbone(nn.Module):
11 | def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1):
12 | super(ResNetBackbone, self).__init__()
13 | assert 0 < ret_layers < 4
14 | self.ret_layers = ret_layers
15 | self.bkb = ResNet(**bkb_kwargs)
16 | self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg)
17 | self.up_shape = None if up_shape is None else up_shape
18 | self.bkb.init_weights()
19 |
20 | def forward(self, inputs):
21 | images = inputs["images"]
22 | images = images.view(-1, *images.shape[-3:])
23 | bkb_features = list(self.bkb(images)[-self.ret_layers:])
24 | nek_features = self.fpn(bkb_features) if self.fpn is not None else None
25 | return {"im_bkb_features": bkb_features, "im_nek_features": nek_features}
26 |
27 |
28 | class EfficientNetBackbone(nn.Module):
29 | def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1):
30 | super(EfficientNetBackbone, self).__init__()
31 | assert 0 < ret_layers < 4
32 | self.ret_layers = ret_layers
33 | self.bkb = EfficientNet.from_pretrained(**bkb_kwargs)
34 | self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg)
35 | self.up_shape = None if up_shape is None else up_shape
36 | del self.bkb._conv_head
37 | del self.bkb._bn1
38 | del self.bkb._avg_pooling
39 | del self.bkb._dropout
40 | del self.bkb._fc
41 |
42 | def forward(self, inputs):
43 | images = inputs["images"]
44 | images = images.view(-1, *images.shape[-3:])
45 | endpoints = self.bkb.extract_endpoints(images)
46 | bkb_features = []
47 | for i, (key, value) in enumerate(endpoints.items()):
48 | if i > 0:
49 | bkb_features.append(value)
50 | bkb_features = list(bkb_features[-self.ret_layers:])
51 | nek_features = self.fpn(bkb_features) if self.fpn is not None else None
52 | return {"im_bkb_features": bkb_features, "im_nek_features": nek_features}
53 |
54 |
55 | class SwinTRBackbone(nn.Module):
56 | def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1):
57 | super(SwinTRBackbone, self).__init__()
58 | assert 0 < ret_layers < 4
59 | self.ret_layers = ret_layers
60 | self.bkb = SwinTransformer(**bkb_kwargs)
61 | self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg)
62 | self.up_shape = None if up_shape is None else up_shape
63 |
64 | def forward(self, inputs):
65 | images = inputs["images"]
66 | images = images.view(-1, *images.shape[-3:])
67 | bkb_features = list(self.bkb(images)[-self.ret_layers:])
68 | nek_features = None
69 | if self.fpn is not None:
70 | nek_features = self.fpn(bkb_features)
71 | else:
72 | if self.up_shape is not None:
73 | nek_features = [torch.cat([self.up_sample(x, self.up_shape) for x in bkb_features], dim=1)]
74 |
75 | return {"im_bkb_features": bkb_features, "im_nek_features": nek_features}
76 |
77 | def up_sample(self, x, tgt_shape=None):
78 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
79 | if tuple(x.shape[-2:]) == tuple(tgt_shape):
80 | return x
81 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
82 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/resnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet
2 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/resnet/utils.py:
--------------------------------------------------------------------------------
1 | from mmcv.cnn import build_conv_layer, build_norm_layer
2 | from mmcv.runner import Sequential
3 | from torch import nn as nn
4 |
5 |
6 | class ResLayer(Sequential):
7 | """ResLayer to build ResNet style backbone.
8 | Args:
9 | block (nn.Module): block used to build ResLayer.
10 | inplanes (int): inplanes of block.
11 | planes (int): planes of block.
12 | num_blocks (int): number of blocks.
13 | stride (int): stride of the first block. Default: 1
14 | avg_down (bool): Use AvgPool instead of stride conv when
15 | downsampling in the bottleneck. Default: False
16 | conv_cfg (dict): dictionary to construct and config conv layer.
17 | Default: None
18 | norm_cfg (dict): dictionary to construct and config norm layer.
19 | Default: dict(type='BN')
20 | downsample_first (bool): Downsample at the first block or last block.
21 | False for Hourglass, True for ResNet. Default: True
22 | """
23 |
24 | def __init__(
25 | self,
26 | block,
27 | inplanes,
28 | planes,
29 | num_blocks,
30 | stride=1,
31 | avg_down=False,
32 | conv_cfg=None,
33 | norm_cfg=dict(type="BN"),
34 | downsample_first=True,
35 | **kwargs
36 | ):
37 | self.block = block
38 |
39 | downsample = None
40 | if stride != 1 or inplanes != planes * block.expansion:
41 | downsample = []
42 | conv_stride = stride
43 | if avg_down:
44 | conv_stride = 1
45 | downsample.append(
46 | nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False)
47 | )
48 | downsample.extend(
49 | [
50 | build_conv_layer(
51 | conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False
52 | ),
53 | build_norm_layer(norm_cfg, planes * block.expansion)[1],
54 | ]
55 | )
56 | downsample = nn.Sequential(*downsample)
57 |
58 | layers = []
59 | if downsample_first:
60 | layers.append(
61 | block(
62 | inplanes=inplanes,
63 | planes=planes,
64 | stride=stride,
65 | downsample=downsample,
66 | conv_cfg=conv_cfg,
67 | norm_cfg=norm_cfg,
68 | **kwargs
69 | )
70 | )
71 | inplanes = planes * block.expansion
72 | for _ in range(1, num_blocks):
73 | layers.append(
74 | block(inplanes=inplanes, planes=planes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)
75 | )
76 |
77 | else: # downsample_first=False is for HourglassModule
78 | for _ in range(num_blocks - 1):
79 | layers.append(
80 | block(inplanes=inplanes, planes=inplanes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)
81 | )
82 | layers.append(
83 | block(
84 | inplanes=inplanes,
85 | planes=planes,
86 | stride=stride,
87 | downsample=downsample,
88 | conv_cfg=conv_cfg,
89 | norm_cfg=norm_cfg,
90 | **kwargs
91 | )
92 | )
93 | super(ResLayer, self).__init__(*layers)
94 |
--------------------------------------------------------------------------------
/mapmaster/models/backbone/swin_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from .model import SwinTransformer as _SwinTransformer
4 | from torch.utils import model_zoo
5 |
6 | model_urls = {
7 | "tiny": "https://github.com/SwinTransformer/storage/releases/download/v1.0.1/upernet_swin_tiny_patch4_window7_512x512.pth",
8 | "base": "https://github.com/SwinTransformer/storage/releases/download/v1.0.1/upernet_swin_base_patch4_window7_512x512.pth",
9 | }
10 |
11 |
12 | class SwinTransformer(_SwinTransformer):
13 | def __init__(
14 | self,
15 | arch="tiny",
16 | pretrained=False,
17 | window_size=7,
18 | shift_mode=1,
19 | mlp_ratio=4.0,
20 | qkv_bias=True,
21 | qk_scale=None,
22 | drop_rate=0.0,
23 | attn_drop_rate=0.0,
24 | drop_path_rate=0.3,
25 | ape=False,
26 | patch_norm=True,
27 | out_indices=(0, 1, 2, 3),
28 | use_checkpoint=False,
29 | **kwargs
30 | ):
31 | if arch == "tiny":
32 | embed_dim = 96
33 | depths = (2, 2, 6, 2)
34 | num_heads = (3, 6, 12, 24)
35 | elif arch == "small":
36 | embed_dim = 96
37 | depths = (2, 2, 18, 2)
38 | num_heads = (3, 6, 12, 24)
39 | elif arch == "base":
40 | embed_dim = 128
41 | depths = (2, 2, 18, 2)
42 | num_heads = (4, 8, 16, 32)
43 | else:
44 | raise NotImplementedError
45 |
46 | super(SwinTransformer, self).__init__(
47 | embed_dim=embed_dim,
48 | depths=depths,
49 | num_heads=num_heads,
50 | window_size=window_size,
51 | shift_mode=shift_mode,
52 | mlp_ratio=mlp_ratio,
53 | qkv_bias=qkv_bias,
54 | qk_scale=qk_scale,
55 | drop_rate=drop_rate,
56 | attn_drop_rate=attn_drop_rate,
57 | drop_path_rate=drop_path_rate,
58 | ape=ape,
59 | patch_norm=patch_norm,
60 | out_indices=out_indices,
61 | use_checkpoint=use_checkpoint,
62 | **kwargs
63 | )
64 | if isinstance(pretrained, bool):
65 | assert pretrained is True
66 | print(model_urls[arch])
67 | state_dict = model_zoo.load_url(model_urls[arch])["state_dict"]
68 | elif isinstance(pretrained, str):
69 | assert os.path.exists(pretrained)
70 | print(pretrained)
71 | state_dict = torch.load(pretrained)["state_dict"]
72 | else:
73 | raise NotImplementedError
74 |
75 | self.arch = arch
76 | self.init_weights(state_dict=state_dict)
77 |
78 | def init_weights(self, state_dict):
79 | new_state_dict = {}
80 | for key, value in state_dict.items():
81 | if "backbone" in key:
82 | new_state_dict[key.replace("backbone.", "")] = value
83 | ret = self.load_state_dict(new_state_dict, strict=False)
84 | print("Backbone missing_keys: {}".format(ret.missing_keys))
85 | print("Backbone unexpected_keys: {}".format(ret.unexpected_keys))
86 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import TransformerBEVDecoder, DeformTransformerBEVEncoder
2 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .deform_transformer import DeformTransformer
2 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import MSDeformAttn
2 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/functions/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------------------------------
6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
7 | # ------------------------------------------------------------------------------------------------
8 |
9 | from .ms_deform_attn_func import MSDeformAttnFunction
10 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/functions/ms_deform_attn_func.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------------------------------
6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
7 | # ------------------------------------------------------------------------------------------------
8 |
9 | from __future__ import absolute_import
10 | from __future__ import print_function
11 | from __future__ import division
12 |
13 | import torch
14 | import torch.nn.functional as F
15 | from torch.autograd import Function
16 | from torch.autograd.function import once_differentiable
17 |
18 | import MultiScaleDeformableAttention as MSDA
19 |
20 |
21 | class MSDeformAttnFunction(Function):
22 | @staticmethod
23 | def forward(
24 | ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step
25 | ):
26 | ctx.im2col_step = im2col_step
27 | output = MSDA.ms_deform_attn_forward(
28 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step
29 | )
30 | ctx.save_for_backward(
31 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights
32 | )
33 | return output
34 |
35 | @staticmethod
36 | @once_differentiable
37 | def backward(ctx, grad_output):
38 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
39 | grad_value, grad_sampling_loc, grad_attn_weight = MSDA.ms_deform_attn_backward(
40 | value,
41 | value_spatial_shapes,
42 | value_level_start_index,
43 | sampling_locations,
44 | attention_weights,
45 | grad_output,
46 | ctx.im2col_step,
47 | )
48 |
49 | return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
50 |
51 |
52 | def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
53 | # for debug and test only,
54 | # need to use cuda version instead
55 | N_, S_, M_, D_ = value.shape
56 | _, Lq_, M_, L_, P_, _ = sampling_locations.shape
57 | value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
58 | sampling_grids = 2 * sampling_locations - 1
59 | sampling_value_list = []
60 | for lid_, (H_, W_) in enumerate(value_spatial_shapes):
61 | # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
62 | value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_ * M_, D_, H_, W_)
63 | # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
64 | sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
65 | # N_*M_, D_, Lq_, P_
66 | sampling_value_l_ = F.grid_sample(
67 | value_l_, sampling_grid_l_, mode="bilinear", padding_mode="zeros", align_corners=False
68 | )
69 | sampling_value_list.append(sampling_value_l_)
70 | # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
71 | attention_weights = attention_weights.transpose(1, 2).reshape(N_ * M_, 1, Lq_, L_ * P_)
72 | output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_ * D_, Lq_)
73 | return output.transpose(1, 2).contiguous()
74 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # ------------------------------------------------------------------------------------------------
3 | # Deformable DETR
4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6 | # ------------------------------------------------------------------------------------------------
7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
8 | # ------------------------------------------------------------------------------------------------
9 |
10 | python setup.py build install
11 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------------------------------
6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
7 | # ------------------------------------------------------------------------------------------------
8 |
9 | from .ms_deform_attn import MSDeformAttn
10 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/modules/ms_deform_attn.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------------------------------
6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
7 | # ------------------------------------------------------------------------------------------------
8 |
9 | from __future__ import absolute_import
10 | from __future__ import print_function
11 | from __future__ import division
12 |
13 | import warnings
14 | import math
15 |
16 | import torch
17 | from torch import nn
18 | import torch.nn.functional as F
19 | from torch.nn.init import xavier_uniform_, constant_
20 |
21 | from ..functions import MSDeformAttnFunction
22 |
23 |
24 | def _is_power_of_2(n):
25 | if (not isinstance(n, int)) or (n < 0):
26 | raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
27 | return (n & (n - 1) == 0) and n != 0
28 |
29 |
30 | class MSDeformAttn(nn.Module):
31 | def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
32 | """
33 | Multi-Scale Deformable Attention Module
34 | :param d_model hidden dimension
35 | :param n_levels number of feature levels
36 | :param n_heads number of attention heads
37 | :param n_points number of sampling points per attention head per feature level
38 | """
39 | super().__init__()
40 | if d_model % n_heads != 0:
41 | raise ValueError("d_model must be divisible by n_heads, but got {} and {}".format(d_model, n_heads))
42 | _d_per_head = d_model // n_heads
43 | # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
44 | if not _is_power_of_2(_d_per_head):
45 | warnings.warn(
46 | "You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
47 | "which is more efficient in our CUDA implementation."
48 | )
49 |
50 | self.im2col_step = 64
51 |
52 | self.d_model = d_model
53 | self.n_levels = n_levels
54 | self.n_heads = n_heads
55 | self.n_points = n_points
56 |
57 | self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2)
58 | self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
59 | self.value_proj = nn.Linear(d_model, d_model)
60 | self.output_proj = nn.Linear(d_model, d_model)
61 |
62 | self._reset_parameters()
63 |
64 | def _reset_parameters(self):
65 | constant_(self.sampling_offsets.weight.data, 0.0)
66 | thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
67 | grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
68 | grid_init = (
69 | (grid_init / grid_init.abs().max(-1, keepdim=True)[0])
70 | .view(self.n_heads, 1, 1, 2)
71 | .repeat(1, self.n_levels, self.n_points, 1)
72 | )
73 | for i in range(self.n_points):
74 | grid_init[:, :, i, :] *= i + 1
75 | with torch.no_grad():
76 | self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
77 | constant_(self.attention_weights.weight.data, 0.0)
78 | constant_(self.attention_weights.bias.data, 0.0)
79 | xavier_uniform_(self.value_proj.weight.data)
80 | constant_(self.value_proj.bias.data, 0.0)
81 | xavier_uniform_(self.output_proj.weight.data)
82 | constant_(self.output_proj.bias.data, 0.0)
83 |
84 | def forward(
85 | self,
86 | query,
87 | reference_points,
88 | input_flatten,
89 | input_spatial_shapes,
90 | input_level_start_index,
91 | input_padding_mask=None,
92 | ):
93 | """
94 | :param query (N, Length_{query}, C)
95 | :param reference_points (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
96 | or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes
97 | :param input_flatten (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C)
98 | :param input_spatial_shapes (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
99 | :param input_level_start_index (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}]
100 | :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
101 |
102 | :return output (N, Length_{query}, C)
103 | """
104 | N, Len_q, _ = query.shape
105 | N, Len_in, _ = input_flatten.shape
106 | assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in
107 |
108 | value = self.value_proj(input_flatten)
109 | if input_padding_mask is not None:
110 | value = value.masked_fill(input_padding_mask[..., None], float(0))
111 | value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
112 | sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2)
113 | attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
114 | attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
115 | # N, Len_q, n_heads, n_levels, n_points, 2
116 | if reference_points.shape[-1] == 2:
117 | offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1)
118 | sampling_locations = (
119 | reference_points[:, :, None, :, None, :]
120 | + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
121 | )
122 | elif reference_points.shape[-1] == 4:
123 | sampling_locations = (
124 | reference_points[:, :, None, :, None, :2]
125 | + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5
126 | )
127 | else:
128 | raise ValueError(
129 | "Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1])
130 | )
131 | output = MSDeformAttnFunction.apply(
132 | value,
133 | input_spatial_shapes,
134 | input_level_start_index,
135 | sampling_locations,
136 | attention_weights,
137 | self.im2col_step,
138 | )
139 | output = self.output_proj(output)
140 | return output
141 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/setup.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------------------------------
6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
7 | # ------------------------------------------------------------------------------------------------
8 |
9 | import os
10 | import glob
11 |
12 | import torch
13 |
14 | from torch.utils.cpp_extension import CUDA_HOME
15 | from torch.utils.cpp_extension import CppExtension
16 | from torch.utils.cpp_extension import CUDAExtension
17 |
18 | from setuptools import find_packages
19 | from setuptools import setup
20 |
21 | requirements = ["torch", "torchvision"]
22 |
23 |
24 | def get_extensions():
25 | this_dir = os.path.dirname(os.path.abspath(__file__))
26 | extensions_dir = os.path.join(this_dir, "src")
27 |
28 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
29 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
30 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
31 |
32 | sources = main_file + source_cpu
33 | extension = CppExtension
34 | extra_compile_args = {"cxx": []}
35 | define_macros = []
36 |
37 | if torch.cuda.is_available() and CUDA_HOME is not None:
38 | extension = CUDAExtension
39 | sources += source_cuda
40 | define_macros += [("WITH_CUDA", None)]
41 | extra_compile_args["nvcc"] = [
42 | "-DCUDA_HAS_FP16=1",
43 | "-D__CUDA_NO_HALF_OPERATORS__",
44 | "-D__CUDA_NO_HALF_CONVERSIONS__",
45 | "-D__CUDA_NO_HALF2_OPERATORS__",
46 | "-arch=sm_60",
47 | "-gencode=arch=compute_60,code=sm_60",
48 | "-gencode=arch=compute_61,code=sm_61",
49 | "-gencode=arch=compute_70,code=sm_70",
50 | "-gencode=arch=compute_75,code=sm_75",
51 | # "-gencode=arch=compute_80,code=sm_80",
52 | ]
53 | else:
54 | raise NotImplementedError("Cuda is not availabel")
55 |
56 | sources = [os.path.join(extensions_dir, s) for s in sources]
57 | include_dirs = [extensions_dir]
58 | ext_modules = [
59 | extension(
60 | "MultiScaleDeformableAttention",
61 | sources,
62 | include_dirs=include_dirs,
63 | define_macros=define_macros,
64 | extra_compile_args=extra_compile_args,
65 | )
66 | ]
67 | return ext_modules
68 |
69 |
70 | setup(
71 | name="MultiScaleDeformableAttention",
72 | version="1.0",
73 | author="Weijie Su",
74 | url="https://github.com/fundamentalvision/Deformable-DETR",
75 | description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention",
76 | packages=find_packages(
77 | exclude=(
78 | "configs",
79 | "tests",
80 | )
81 | ),
82 | ext_modules=get_extensions(),
83 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
84 | )
85 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cpu/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
1 | /*!
2 | **************************************************************************************************
3 | * Deformable DETR
4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6 | **************************************************************************************************
7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
8 | **************************************************************************************************
9 | */
10 |
11 | #include
12 |
13 | #include
14 | #include
15 |
16 |
17 | at::Tensor
18 | ms_deform_attn_cpu_forward(
19 | const at::Tensor &value,
20 | const at::Tensor &spatial_shapes,
21 | const at::Tensor &level_start_index,
22 | const at::Tensor &sampling_loc,
23 | const at::Tensor &attn_weight,
24 | const int im2col_step)
25 | {
26 | AT_ERROR("Not implement on cpu");
27 | }
28 |
29 | std::vector
30 | ms_deform_attn_cpu_backward(
31 | const at::Tensor &value,
32 | const at::Tensor &spatial_shapes,
33 | const at::Tensor &level_start_index,
34 | const at::Tensor &sampling_loc,
35 | const at::Tensor &attn_weight,
36 | const at::Tensor &grad_output,
37 | const int im2col_step)
38 | {
39 | AT_ERROR("Not implement on cpu");
40 | }
41 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cpu/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
1 | /*!
2 | **************************************************************************************************
3 | * Deformable DETR
4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6 | **************************************************************************************************
7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
8 | **************************************************************************************************
9 | */
10 |
11 | #pragma once
12 | #include
13 |
14 | at::Tensor
15 | ms_deform_attn_cpu_forward(
16 | const at::Tensor &value,
17 | const at::Tensor &spatial_shapes,
18 | const at::Tensor &level_start_index,
19 | const at::Tensor &sampling_loc,
20 | const at::Tensor &attn_weight,
21 | const int im2col_step);
22 |
23 | std::vector
24 | ms_deform_attn_cpu_backward(
25 | const at::Tensor &value,
26 | const at::Tensor &spatial_shapes,
27 | const at::Tensor &level_start_index,
28 | const at::Tensor &sampling_loc,
29 | const at::Tensor &attn_weight,
30 | const at::Tensor &grad_output,
31 | const int im2col_step);
32 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cuda/ms_deform_attn_cuda.cu:
--------------------------------------------------------------------------------
1 | /*!
2 | **************************************************************************************************
3 | * Deformable DETR
4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6 | **************************************************************************************************
7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
8 | **************************************************************************************************
9 | */
10 |
11 | #include
12 | #include "cuda/ms_deform_im2col_cuda.cuh"
13 |
14 | #include
15 | #include
16 | #include
17 | #include
18 |
19 |
20 | at::Tensor ms_deform_attn_cuda_forward(
21 | const at::Tensor &value,
22 | const at::Tensor &spatial_shapes,
23 | const at::Tensor &level_start_index,
24 | const at::Tensor &sampling_loc,
25 | const at::Tensor &attn_weight,
26 | const int im2col_step)
27 | {
28 | AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
29 | AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
30 | AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
31 | AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
32 | AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
33 |
34 | AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
35 | AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
36 | AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
37 | AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
38 | AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
39 |
40 | const int batch = value.size(0);
41 | const int spatial_size = value.size(1);
42 | const int num_heads = value.size(2);
43 | const int channels = value.size(3);
44 |
45 | const int num_levels = spatial_shapes.size(0);
46 |
47 | const int num_query = sampling_loc.size(1);
48 | const int num_point = sampling_loc.size(4);
49 |
50 | const int im2col_step_ = std::min(batch, im2col_step);
51 |
52 | AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
53 |
54 | auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
55 |
56 | const int batch_n = im2col_step_;
57 | auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
58 | auto per_value_size = spatial_size * num_heads * channels;
59 | auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
60 | auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
61 | for (int n = 0; n < batch/im2col_step_; ++n)
62 | {
63 | auto columns = output_n.select(0, n);
64 | AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
65 | ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
66 | value.data() + n * im2col_step_ * per_value_size,
67 | spatial_shapes.data(),
68 | level_start_index.data(),
69 | sampling_loc.data() + n * im2col_step_ * per_sample_loc_size,
70 | attn_weight.data() + n * im2col_step_ * per_attn_weight_size,
71 | batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
72 | columns.data());
73 |
74 | }));
75 | }
76 |
77 | output = output.view({batch, num_query, num_heads*channels});
78 |
79 | return output;
80 | }
81 |
82 |
83 | std::vector ms_deform_attn_cuda_backward(
84 | const at::Tensor &value,
85 | const at::Tensor &spatial_shapes,
86 | const at::Tensor &level_start_index,
87 | const at::Tensor &sampling_loc,
88 | const at::Tensor &attn_weight,
89 | const at::Tensor &grad_output,
90 | const int im2col_step)
91 | {
92 |
93 | AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
94 | AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
95 | AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
96 | AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
97 | AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
98 | AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
99 |
100 | AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
101 | AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
102 | AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
103 | AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
104 | AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
105 | AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
106 |
107 | const int batch = value.size(0);
108 | const int spatial_size = value.size(1);
109 | const int num_heads = value.size(2);
110 | const int channels = value.size(3);
111 |
112 | const int num_levels = spatial_shapes.size(0);
113 |
114 | const int num_query = sampling_loc.size(1);
115 | const int num_point = sampling_loc.size(4);
116 |
117 | const int im2col_step_ = std::min(batch, im2col_step);
118 |
119 | AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
120 |
121 | auto grad_value = at::zeros_like(value);
122 | auto grad_sampling_loc = at::zeros_like(sampling_loc);
123 | auto grad_attn_weight = at::zeros_like(attn_weight);
124 |
125 | const int batch_n = im2col_step_;
126 | auto per_value_size = spatial_size * num_heads * channels;
127 | auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
128 | auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
129 | auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
130 |
131 | for (int n = 0; n < batch/im2col_step_; ++n)
132 | {
133 | auto grad_output_g = grad_output_n.select(0, n);
134 | AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
135 | ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
136 | grad_output_g.data(),
137 | value.data() + n * im2col_step_ * per_value_size,
138 | spatial_shapes.data(),
139 | level_start_index.data(),
140 | sampling_loc.data() + n * im2col_step_ * per_sample_loc_size,
141 | attn_weight.data() + n * im2col_step_ * per_attn_weight_size,
142 | batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
143 | grad_value.data() + n * im2col_step_ * per_value_size,
144 | grad_sampling_loc.data() + n * im2col_step_ * per_sample_loc_size,
145 | grad_attn_weight.data() + n * im2col_step_ * per_attn_weight_size);
146 |
147 | }));
148 | }
149 |
150 | return {
151 | grad_value, grad_sampling_loc, grad_attn_weight
152 | };
153 | }
154 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cuda/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
1 | /*!
2 | **************************************************************************************************
3 | * Deformable DETR
4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6 | **************************************************************************************************
7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
8 | **************************************************************************************************
9 | */
10 |
11 | #pragma once
12 | #include
13 |
14 | at::Tensor ms_deform_attn_cuda_forward(
15 | const at::Tensor &value,
16 | const at::Tensor &spatial_shapes,
17 | const at::Tensor &level_start_index,
18 | const at::Tensor &sampling_loc,
19 | const at::Tensor &attn_weight,
20 | const int im2col_step);
21 |
22 | std::vector ms_deform_attn_cuda_backward(
23 | const at::Tensor &value,
24 | const at::Tensor &spatial_shapes,
25 | const at::Tensor &level_start_index,
26 | const at::Tensor &sampling_loc,
27 | const at::Tensor &attn_weight,
28 | const at::Tensor &grad_output,
29 | const int im2col_step);
30 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/ms_deform_attn.h:
--------------------------------------------------------------------------------
1 | /*!
2 | **************************************************************************************************
3 | * Deformable DETR
4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6 | **************************************************************************************************
7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
8 | **************************************************************************************************
9 | */
10 |
11 | #pragma once
12 |
13 | #include "cpu/ms_deform_attn_cpu.h"
14 |
15 | #ifdef WITH_CUDA
16 | #include "cuda/ms_deform_attn_cuda.h"
17 | #endif
18 |
19 |
20 | at::Tensor
21 | ms_deform_attn_forward(
22 | const at::Tensor &value,
23 | const at::Tensor &spatial_shapes,
24 | const at::Tensor &level_start_index,
25 | const at::Tensor &sampling_loc,
26 | const at::Tensor &attn_weight,
27 | const int im2col_step)
28 | {
29 | if (value.type().is_cuda())
30 | {
31 | #ifdef WITH_CUDA
32 | return ms_deform_attn_cuda_forward(
33 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
34 | #else
35 | AT_ERROR("Not compiled with GPU support");
36 | #endif
37 | }
38 | AT_ERROR("Not implemented on the CPU");
39 | }
40 |
41 | std::vector
42 | ms_deform_attn_backward(
43 | const at::Tensor &value,
44 | const at::Tensor &spatial_shapes,
45 | const at::Tensor &level_start_index,
46 | const at::Tensor &sampling_loc,
47 | const at::Tensor &attn_weight,
48 | const at::Tensor &grad_output,
49 | const int im2col_step)
50 | {
51 | if (value.type().is_cuda())
52 | {
53 | #ifdef WITH_CUDA
54 | return ms_deform_attn_cuda_backward(
55 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
56 | #else
57 | AT_ERROR("Not compiled with GPU support");
58 | #endif
59 | }
60 | AT_ERROR("Not implemented on the CPU");
61 | }
62 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/vision.cpp:
--------------------------------------------------------------------------------
1 | /*!
2 | **************************************************************************************************
3 | * Deformable DETR
4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6 | **************************************************************************************************
7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
8 | **************************************************************************************************
9 | */
10 |
11 | #include "ms_deform_attn.h"
12 |
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
15 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
16 | }
17 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/test.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------------------------
2 | # Deformable DETR
3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
5 | # ------------------------------------------------------------------------------------------------
6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
7 | # ------------------------------------------------------------------------------------------------
8 |
9 | from __future__ import absolute_import
10 | from __future__ import print_function
11 | from __future__ import division
12 |
13 | import torch
14 | from torch.autograd import gradcheck
15 |
16 | from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
17 |
18 |
19 | N, M, D = 1, 2, 2
20 | Lq, L, P = 2, 2, 2
21 | shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
22 | level_start_index = torch.cat((shapes.new_zeros((1,)), shapes.prod(1).cumsum(0)[:-1]))
23 | S = sum([(H * W).item() for H, W in shapes])
24 |
25 |
26 | torch.manual_seed(3)
27 |
28 |
29 | @torch.no_grad()
30 | def check_forward_equal_with_pytorch_double():
31 | value = torch.rand(N, S, M, D).cuda() * 0.01
32 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
33 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
34 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
35 | im2col_step = 2
36 | output_pytorch = (
37 | ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double())
38 | .detach()
39 | .cpu()
40 | )
41 | output_cuda = (
42 | MSDeformAttnFunction.apply(
43 | value.double(),
44 | shapes,
45 | level_start_index,
46 | sampling_locations.double(),
47 | attention_weights.double(),
48 | im2col_step,
49 | )
50 | .detach()
51 | .cpu()
52 | )
53 | fwdok = torch.allclose(output_cuda, output_pytorch)
54 | max_abs_err = (output_cuda - output_pytorch).abs().max()
55 | max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
56 |
57 | print(
58 | f"* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
59 | )
60 |
61 |
62 | @torch.no_grad()
63 | def check_forward_equal_with_pytorch_float():
64 | value = torch.rand(N, S, M, D).cuda() * 0.01
65 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
66 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
67 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
68 | im2col_step = 2
69 | output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
70 | output_cuda = (
71 | MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step)
72 | .detach()
73 | .cpu()
74 | )
75 | fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
76 | max_abs_err = (output_cuda - output_pytorch).abs().max()
77 | max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
78 |
79 | print(
80 | f"* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
81 | )
82 |
83 |
84 | def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
85 |
86 | value = torch.rand(N, S, M, channels).cuda() * 0.01
87 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
88 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
89 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
90 | im2col_step = 2
91 | func = MSDeformAttnFunction.apply
92 |
93 | value.requires_grad = grad_value
94 | sampling_locations.requires_grad = grad_sampling_loc
95 | attention_weights.requires_grad = grad_attn_weight
96 |
97 | gradok = gradcheck(
98 | func,
99 | (
100 | value.double(),
101 | shapes,
102 | level_start_index,
103 | sampling_locations.double(),
104 | attention_weights.double(),
105 | im2col_step,
106 | ),
107 | )
108 |
109 | print(f"* {gradok} check_gradient_numerical(D={channels})")
110 |
111 |
112 | if __name__ == "__main__":
113 | check_forward_equal_with_pytorch_double()
114 | check_forward_equal_with_pytorch_float()
115 |
116 | for channels in [30, 32, 64, 71, 1025, 2048, 3096]:
117 | check_gradient_numerical(channels, True, True, True)
118 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/position_encoding.py:
--------------------------------------------------------------------------------
1 | """
2 | Various positional encodings for the transformer.
3 | """
4 | import math
5 | import torch
6 | from torch import nn
7 |
8 | class PositionEmbeddingSine(nn.Module):
9 | """
10 | This is a more standard version of the position embedding, very similar to the one
11 | used by the Attention is all you need paper, generalized to work on images.
12 | """
13 |
14 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=True, scale=None):
15 | super().__init__()
16 | self.num_pos_feats = num_pos_feats
17 | self.temperature = temperature
18 | self.normalize = normalize
19 | if scale is not None and normalize is False:
20 | raise ValueError("normalize should be True if scale is passed")
21 | if scale is None:
22 | scale = 2 * math.pi
23 | self.scale = scale
24 |
25 | def forward(self, mask):
26 | assert mask is not None
27 | not_mask = ~mask
28 | y_embed = not_mask.cumsum(1, dtype=torch.float32)
29 | x_embed = not_mask.cumsum(2, dtype=torch.float32)
30 | if self.normalize:
31 | eps = 1e-6
32 | y_embed = (y_embed - 0.5) / (y_embed[:, -1:, :] + eps) * self.scale
33 | x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale
34 |
35 | dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=mask.device)
36 | dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2))
37 |
38 | pos_x = x_embed[:, :, :, None] / dim_t
39 | pos_y = y_embed[:, :, :, None] / dim_t
40 | pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
41 | pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
42 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
43 | return pos
44 |
45 | class PositionEmbeddingLearned(nn.Module):
46 | """
47 | Absolute pos embedding, learned.
48 | """
49 |
50 | def __init__(self, num_pos=(50, 50), num_pos_feats=256):
51 | super().__init__()
52 | self.num_pos = num_pos
53 | self.pos_embed = nn.Embedding(num_pos[0] * num_pos[1], num_pos_feats)
54 | self.reset_parameters()
55 |
56 | def reset_parameters(self):
57 | nn.init.normal_(self.pos_embed.weight)
58 |
59 | def forward(self, mask):
60 | h, w = mask.shape[-2:]
61 | pos = self.pos_embed.weight.view(*self.num_pos, -1)[:h, :w]
62 | pos = pos.permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1)
63 | return pos
64 |
--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import torch.nn as nn
4 | from mapmaster.models.bev_decoder.transformer import Transformer
5 | from mapmaster.models.bev_decoder.deform_transformer import DeformTransformer
6 |
7 | class TransformerBEVDecoder(nn.Module):
8 | def __init__(self, key='im_bkb_features', **kwargs):
9 | super(TransformerBEVDecoder, self).__init__()
10 | self.bev_encoder = Transformer(**kwargs)
11 | self.key = key
12 |
13 | def forward(self, inputs):
14 | assert self.key in inputs
15 | feats = inputs[self.key]
16 | fuse_feats = feats[-1]
17 | fuse_feats = fuse_feats.reshape(*inputs['images'].shape[:2], *fuse_feats.shape[-3:])
18 | fuse_feats = torch.cat(torch.unbind(fuse_feats, dim=1), dim=-1)
19 |
20 | cameras_info = {
21 | 'extrinsic': inputs.get('extrinsic', None),
22 | 'intrinsic': inputs.get('intrinsic', None),
23 | 'ida_mats': inputs.get('ida_mats', None),
24 | 'do_flip': inputs['extra_infos'].get('do_flip', None)
25 | }
26 |
27 | _, _, bev_feats = self.bev_encoder(fuse_feats, cameras_info=cameras_info)
28 |
29 | return {"bev_enc_features": list(bev_feats)}
30 |
31 | class DeformTransformerBEVEncoder(nn.Module):
32 | def __init__(self, **kwargs):
33 | super(DeformTransformerBEVEncoder, self).__init__()
34 | self.bev_encoder = DeformTransformer(**kwargs)
35 |
36 | def forward(self, inputs):
37 | assert "im_bkb_features" in inputs
38 | feats = inputs["im_bkb_features"]
39 | for i in range(len(feats)):
40 | feats[i] = feats[i].reshape(*inputs["images"].shape[:2], *feats[i].shape[-3:])
41 | feats[i] = feats[i].permute(0, 2, 3, 1, 4)
42 | feats[i] = feats[i].reshape(*feats[i].shape[:3], -1)
43 | cameras_info = {
44 | 'extrinsic': inputs.get('extrinsic', None),
45 | 'intrinsic': inputs.get('intrinsic', None),
46 | 'do_flip': inputs['extra_infos'].get('do_flip', None)
47 | }
48 | # src_feats: (N, H1 * W1, C) tgt_feats: # (M, N, H2 * W2, C)
49 | _, _, bev_feats = self.bev_encoder(feats, cameras_info=cameras_info)
50 |
51 | return {
52 | "bev_enc_features": list(bev_feats),
53 | }
54 |
--------------------------------------------------------------------------------
/mapmaster/models/ins_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import Mask2formerINSDecoder, PointMask2formerINSDecoder
2 |
--------------------------------------------------------------------------------
/mapmaster/models/ins_decoder/model.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from mapmaster.models.ins_decoder.mask2former import MultiScaleMaskedTransformerDecoder
4 | from mapmaster.models.ins_decoder.pointmask2former import PointMask2TransformerDecoder
5 |
6 |
7 | class INSDecoderBase(nn.Module):
8 | def __init__(self, decoder_ids=(5, ), tgt_shape=None):
9 | super(INSDecoderBase, self).__init__()
10 | self.decoder_ids = tuple(decoder_ids) # [0, 1, 2, 3, 4, 5]
11 | self.tgt_shape = tgt_shape
12 | self.bev_decoder = None
13 |
14 | def forward(self, inputs):
15 | assert "bev_enc_features" in inputs
16 | bev_enc_features = inputs["bev_enc_features"]
17 | if self.tgt_shape is not None:
18 | bev_enc_features = [self.up_sample(x) for x in inputs["bev_enc_features"]]
19 | out = self.bev_decoder(bev_enc_features[-1:], bev_enc_features[-1])
20 | return {"mask_features": [out["pred_masks"][1:][i] for i in self.decoder_ids],
21 | "obj_scores": [out["pred_logits"][1:][i] for i in self.decoder_ids],
22 | "decoder_outputs": [out["decoder_outputs"][1:][i] for i in self.decoder_ids],
23 | "bev_enc_features": bev_enc_features}
24 |
25 | def up_sample(self, x, tgt_shape=None):
26 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
27 | if tuple(x.shape[-2:]) == tuple(tgt_shape):
28 | return x
29 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
30 |
31 | class Mask2formerINSDecoder(INSDecoderBase):
32 | def __init__(self, decoder_ids=(5, ), tgt_shape=None, **kwargs):
33 | super(Mask2formerINSDecoder, self).__init__(decoder_ids, tgt_shape)
34 | self.bev_decoder = MultiScaleMaskedTransformerDecoder(**kwargs)
35 |
36 | class PointMask2formerINSDecoder(INSDecoderBase):
37 | def __init__(self, decoder_ids=(5, ), tgt_shape=None, **kwargs):
38 | super(PointMask2formerINSDecoder, self).__init__(decoder_ids, tgt_shape)
39 | self.bev_decoder = PointMask2TransformerDecoder(**kwargs)
40 |
--------------------------------------------------------------------------------
/mapmaster/models/network.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mapmaster.models import backbone, bev_decoder, ins_decoder, output_head
3 | # os.environ['TORCH_DISTRIBUTED_DEBUG'] = "INFO"
4 | # warnings.filterwarnings('ignore')
5 |
6 |
7 | class MapMaster(nn.Module):
8 | def __init__(self, model_config, *args, **kwargs):
9 | super(MapMaster, self).__init__()
10 | self.im_backbone = self.create_backbone(**model_config["im_backbone"])
11 | self.bev_decoder = self.create_bev_decoder(**model_config["bev_decoder"])
12 | self.ins_decoder = self.create_ins_decoder(**model_config["ins_decoder"])
13 | self.output_head = self.create_output_head(**model_config["output_head"])
14 | self.post_processor = self.create_post_processor(**model_config["post_processor"])
15 |
16 | def forward(self, inputs):
17 | outputs = {}
18 | outputs.update({k: inputs[k] for k in ["images", "extra_infos"]})
19 | outputs.update({k: inputs[k].float() for k in ["extrinsic", "intrinsic"]})
20 | if "ida_mats" in inputs:
21 | outputs.update({"ida_mats": inputs["ida_mats"].float()})
22 | outputs.update(self.im_backbone(outputs))
23 | outputs.update(self.bev_decoder(outputs))
24 | outputs.update(self.ins_decoder(outputs))
25 | outputs.update(self.output_head(outputs))
26 | return outputs
27 |
28 | @staticmethod
29 | def create_backbone(arch_name, ret_layers, bkb_kwargs, fpn_kwargs, up_shape=None):
30 | __factory_dict__ = {
31 | "resnet": backbone.ResNetBackbone,
32 | "efficient_net": backbone.EfficientNetBackbone,
33 | "swin_transformer": backbone.SwinTRBackbone,
34 | }
35 | return __factory_dict__[arch_name](bkb_kwargs, fpn_kwargs, up_shape, ret_layers)
36 |
37 | @staticmethod
38 | def create_bev_decoder(arch_name, net_kwargs):
39 | __factory_dict__ = {
40 | "transformer": bev_decoder.TransformerBEVDecoder,
41 | "ipm_deformable_transformer": bev_decoder.DeformTransformerBEVEncoder,
42 | }
43 | return __factory_dict__[arch_name](**net_kwargs)
44 |
45 | @staticmethod
46 | def create_ins_decoder(arch_name, net_kwargs):
47 | __factory_dict__ = {
48 | "mask2former": ins_decoder.Mask2formerINSDecoder,
49 | "line_aware_decoder": ins_decoder.PointMask2formerINSDecoder,
50 | }
51 |
52 | return __factory_dict__[arch_name](**net_kwargs)
53 |
54 | @staticmethod
55 | def create_output_head(arch_name, net_kwargs):
56 | __factory_dict__ = {
57 | "bezier_output_head": output_head.PiecewiseBezierMapOutputHead,
58 | "pivot_point_predictor": output_head.PivotMapOutputHead,
59 | }
60 | return __factory_dict__[arch_name](**net_kwargs)
61 |
62 | @staticmethod
63 | def create_post_processor(arch_name, net_kwargs):
64 | __factory_dict__ = {
65 | "bezier_post_processor": output_head.PiecewiseBezierMapPostProcessor,
66 | "pivot_post_processor": output_head.PivotMapPostProcessor,
67 | }
68 | return __factory_dict__[arch_name](**net_kwargs)
69 |
--------------------------------------------------------------------------------
/mapmaster/models/output_head/__init__.py:
--------------------------------------------------------------------------------
1 | from .bezier_outputs import PiecewiseBezierMapOutputHead
2 | from .bezier_post_processor import PiecewiseBezierMapPostProcessor
3 | from .pivot_outputs import PivotMapOutputHead
4 | from .pivot_post_processor import PivotMapPostProcessor
--------------------------------------------------------------------------------
/mapmaster/models/output_head/bezier_outputs.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class FFN(nn.Module):
7 | """ Very simple multi-layer perceptron (also called FFN)"""
8 |
9 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, basic_type='linear'):
10 | super().__init__()
11 | self.basic_type = basic_type
12 | if output_dim == 0:
13 | self.basic_type = "identity"
14 | self.num_layers = num_layers
15 | h = [hidden_dim] * (num_layers - 1)
16 | self.layers = nn.ModuleList(self.basic_layer(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
17 |
18 | def forward(self, x):
19 | for i, layer in enumerate(self.layers):
20 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
21 | return x
22 |
23 | def basic_layer(self, n, k):
24 | if self.basic_type == 'linear':
25 | return nn.Linear(n, k)
26 | elif self.basic_type == 'conv':
27 | return nn.Conv2d(n, k, kernel_size=1, stride=1)
28 | elif self.basic_type == 'identity':
29 | return nn.Identity()
30 | else:
31 | raise NotImplementedError
32 |
33 |
34 | class PiecewiseBezierMapOutputHead(nn.Module):
35 | def __init__(self, in_channel, num_queries, tgt_shape, num_degree, max_pieces, bev_channels=-1, ins_channel=64):
36 | super(PiecewiseBezierMapOutputHead, self).__init__()
37 | self.num_queries = num_queries
38 | self.num_classes = len(num_queries)
39 | self.tgt_shape = tgt_shape
40 | self.bev_channels = bev_channels
41 | self.semantic_heads = None
42 | if self.bev_channels > 0:
43 | self.semantic_heads = nn.ModuleList(
44 | nn.Sequential(nn.Conv2d(bev_channels, 2, kernel_size=1, stride=1)) for _ in range(self.num_classes)
45 | )
46 | self.num_degree = num_degree
47 | self.max_pieces = max_pieces
48 | self.num_ctr_im = [(n + 1) for n in self.max_pieces]
49 | self.num_ctr_ex = [n * (d - 1) for n, d in zip(self.max_pieces, self.num_degree)]
50 | _N = self.num_classes
51 |
52 | _C = ins_channel
53 | self.im_ctr_heads = nn.ModuleList(FFN(in_channel, 256, (self.num_ctr_im[i] * 2) * _C, 3) for i in range(_N))
54 | self.ex_ctr_heads = nn.ModuleList(FFN(in_channel, 256, (self.num_ctr_ex[i] * 2) * _C, 3) for i in range(_N))
55 | self.npiece_heads = nn.ModuleList(FFN(in_channel, 256, self.max_pieces[i], 3) for i in range(_N))
56 | self.gap_layer = nn.AdaptiveAvgPool2d((1, 1))
57 | self.coords = self.compute_locations(device='cuda')
58 | self.coords_head = FFN(2, 256, _C, 3, 'conv')
59 |
60 | def forward(self, inputs):
61 | num_decoders = len(inputs["mask_features"])
62 | dt_obj_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
63 | dt_ins_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
64 | im_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
65 | ex_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
66 | dt_end_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
67 | coords_feats = self.coords_head.forward(self.coords.repeat((inputs["mask_features"][0].shape[0], 1, 1, 1)))
68 | for i in range(num_decoders):
69 | x_ins_cw = inputs["mask_features"][i].split(self.num_queries, dim=1)
70 | x_obj_cw = inputs["obj_scores"][i].split(self.num_queries, dim=1)
71 | x_qry_cw = inputs["decoder_outputs"][i].split(self.num_queries, dim=1)
72 | batch_size = x_qry_cw[0].shape[0]
73 | for j in range(self.num_classes):
74 | num_qry = self.num_queries[j]
75 | # if self.training:
76 | dt_ins_masks[i][j] = self.up_sample(x_ins_cw[j])
77 | dt_obj_logit[i][j] = x_obj_cw[j]
78 | dt_end_logit[i][j] = self.npiece_heads[j](x_qry_cw[j])
79 | # im
80 | im_feats = self.im_ctr_heads[j](x_qry_cw[j])
81 | im_feats = im_feats.reshape(batch_size, num_qry, self.num_ctr_im[j] * 2, -1).flatten(1, 2)
82 | im_coords_map = torch.einsum("bqc,bchw->bqhw", im_feats, coords_feats)
83 | im_coords = self.gap_layer(im_coords_map)
84 | im_ctr_coord[i][j] = im_coords.reshape(batch_size, num_qry, self.max_pieces[j] + 1, 2)
85 | # ex
86 | if self.num_ctr_ex[j] == 0:
87 | ex_ctr_coord[i][j] = torch.zeros(batch_size, num_qry, self.max_pieces[j], 0, 2).cuda()
88 | else:
89 | ex_feats = self.ex_ctr_heads[j](x_qry_cw[j])
90 | ex_feats = ex_feats.reshape(batch_size, num_qry, self.num_ctr_ex[j] * 2, -1).flatten(1, 2)
91 | ex_coords_map = torch.einsum("bqc,bchw->bqhw", ex_feats, coords_feats)
92 | ex_coords = self.gap_layer(ex_coords_map)
93 | ex_ctr_coord[i][j] = ex_coords.reshape(batch_size, num_qry, self.max_pieces[j], self.num_degree[j] - 1, 2)
94 | ret = {"outputs": {"obj_logits": dt_obj_logit, "ins_masks": dt_ins_masks,
95 | "ctr_im": im_ctr_coord, "ctr_ex": ex_ctr_coord, "end_logits": dt_end_logit}}
96 | if self.semantic_heads is not None:
97 | num_decoders = len(inputs["bev_enc_features"])
98 | dt_sem_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
99 | for i in range(num_decoders):
100 | x_sem = inputs["bev_enc_features"][i]
101 | for j in range(self.num_classes):
102 | dt_sem_masks[i][j] = self.up_sample(self.semantic_heads[j](x_sem))
103 | ret["outputs"].update({"sem_masks": dt_sem_masks})
104 | return ret
105 |
106 | def up_sample(self, x, tgt_shape=None):
107 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
108 | if tuple(x.shape[-2:]) == tuple(tgt_shape):
109 | return x
110 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
111 |
112 | def compute_locations(self, stride=1, device='cpu'):
113 |
114 | fh, fw = self.tgt_shape
115 |
116 | shifts_x = torch.arange(0, fw * stride, step=stride, dtype=torch.float32, device=device)
117 | shifts_y = torch.arange(0, fh * stride, step=stride, dtype=torch.float32, device=device)
118 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
119 | shift_x = shift_x.reshape(-1)
120 | shift_y = shift_y.reshape(-1)
121 | locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
122 |
123 | locations = locations.unsqueeze(0).permute(0, 2, 1).contiguous().float().view(1, 2, fh, fw)
124 | locations[:, 0, :, :] /= fw
125 | locations[:, 1, :, :] /= fh
126 |
127 | return locations
128 |
--------------------------------------------------------------------------------
/mapmaster/models/output_head/line_matching.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def seq_matching_dist_parallel(cost, gt_lens, coe_endpts=0):
4 | # Time complexity: O(m*n)
5 | bs, m, n = cost.shape
6 | assert m <= n
7 | min_cost = np.ones((bs, m, n)) * np.inf
8 | mem_sort_value = np.ones((bs, m, n)) * np.inf # v[i][j] = np.min(min_cost[i][:j+1])
9 |
10 | # initialization
11 | for j in range(0, n):
12 | if j == 0:
13 | min_cost[:, 0, j] = cost[:, 0, j]
14 | mem_sort_value[:, 0, j] = min_cost[:, 0, 0]
15 |
16 | for i in range(1, m):
17 | for j in range(i, n):
18 | min_cost[:, i, j] = mem_sort_value[:, i-1, j-1] + cost[:, i, j]
19 | indexes = (min_cost[:, i, j] < mem_sort_value[:, i, j-1])
20 | indexes_inv = np.array(1-indexes, dtype=np.bool)
21 | mem_sort_value[indexes, i, j] = min_cost[indexes, i, j]
22 | mem_sort_value[indexes_inv, i, j] = mem_sort_value[indexes_inv, i, j-1]
23 |
24 | indexes = []
25 | for i, ll in enumerate(gt_lens):
26 | indexes.append([i, ll-1, n-1])
27 | indexes = np.array(indexes)
28 | xs, ys, zs = indexes[:, 0], indexes[:, 1], indexes[:, 2]
29 | res_cost = min_cost[xs, ys, zs] + (cost[xs, 0, 0] + cost[xs, ys, zs]) * coe_endpts
30 | return res_cost / (indexes[:, 1]+1+coe_endpts*2)
31 |
32 | def pivot_dynamic_matching(cost: np.array):
33 | # Time complexity: O(m*n)
34 | m, n = cost.shape
35 | assert m <= n
36 |
37 | min_cost = np.ones((m, n)) * np.inf
38 | mem_sort_value = np.ones((m, n)) * np.inf
39 | match_res1 = [[] for _ in range(n)]
40 | match_res2 = [[] for _ in range(n)]
41 |
42 | # initialization
43 | for j in range(0, n-m+1):
44 | match_res1[j] = [0]
45 | mem_sort_value[0][j] = cost[0][0]
46 | if j == 0:
47 | min_cost[0][j] = cost[0][0]
48 |
49 | for i in range(1, m):
50 | for j in range(i, n-m + i+1):
51 | min_cost[i][j] = mem_sort_value[i-1][j-1] + cost[i][j]
52 | if min_cost[i][j] < mem_sort_value[i][j-1]:
53 | mem_sort_value[i][j] = min_cost[i][j]
54 | if i < m-1:
55 | match_res2[j] = match_res1[j-1] + [j]
56 | else:
57 | mem_sort_value[i][j] = mem_sort_value[i][j-1]
58 | if i < m -1:
59 | match_res2[j] = match_res2[j-1]
60 | if i < m-1:
61 | match_res1, match_res2 = match_res2.copy(), [[] for _ in range(n)]
62 |
63 | total_cost = min_cost[-1][-1]
64 | final_match_res = match_res1[-2] + [n-1]
65 | return total_cost, final_match_res
--------------------------------------------------------------------------------
/mapmaster/models/output_head/pivot_outputs.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class FFN(nn.Module):
7 | """ Very simple multi-layer perceptron (also called FFN)"""
8 |
9 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, basic_type='linear'):
10 | super().__init__()
11 | self.basic_type = basic_type
12 | self.num_layers = num_layers
13 | h = [hidden_dim] * (num_layers - 1)
14 | self.layers = nn.ModuleList(self.basic_layer(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
15 |
16 | def forward(self, x):
17 | for i, layer in enumerate(self.layers):
18 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
19 | return x
20 |
21 | def basic_layer(self, n, k):
22 | if self.basic_type == 'linear':
23 | return nn.Linear(n, k)
24 | elif self.basic_type == 'conv':
25 | return nn.Conv2d(n, k, kernel_size=1, stride=1)
26 | else:
27 | raise NotImplementedError
28 |
29 | class PivotMapOutputHead(nn.Module):
30 | def __init__(self, in_channel, num_queries, tgt_shape, max_pieces, bev_channels=-1, ins_channel=64):
31 | super(PivotMapOutputHead, self).__init__()
32 | self.num_queries = num_queries
33 | self.num_classes = len(num_queries)
34 | self.tgt_shape = tgt_shape
35 | self.bev_channels = bev_channels
36 | self.semantic_heads = None
37 | if self.bev_channels > 0:
38 | self.semantic_heads = nn.ModuleList(
39 | nn.Sequential(nn.Conv2d(bev_channels, 2, kernel_size=1, stride=1)) for _ in range(self.num_classes)
40 | )
41 |
42 | self.max_pieces = max_pieces # [10, 2, 30]
43 | self.pts_split = [num_queries[i]*max_pieces[i] for i in range(len(num_queries))]
44 | _N = self.num_classes
45 | _C = ins_channel
46 | self.im_ctr_heads = nn.ModuleList(FFN(in_channel, 256, 2 * _C, 3) for _ in range(_N))
47 | self.pts_cls_heads = nn.ModuleList(FFN((_C)*2, _C*2, 2, 3) for i in range(_N))
48 | self.gap_layer = nn.AdaptiveAvgPool2d((1, 1))
49 | self.coords = self.compute_locations(device='cuda') # (1, 2, h, w)
50 | self.coords_head = FFN(2, 256, _C, 3, 'conv')
51 |
52 | def forward(self, inputs):
53 | num_decoders = len(inputs["mask_features"])
54 | dt_obj_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
55 | dt_ins_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
56 | im_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
57 | dt_pivots_logits = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
58 | coords_feats = self.coords_head.forward(self.coords.repeat((inputs["mask_features"][0].shape[0], 1, 1, 1)))
59 |
60 | for i in range(num_decoders):
61 | x_ins_cw = inputs["mask_features"][i].split(self.num_queries, dim=1)
62 | x_obj_cw = inputs["obj_scores"][i].split(self.num_queries, dim=1)
63 | x_qry_cw = inputs["decoder_outputs"][i].split(self.pts_split, dim=1) # [(b, 200, c), (b, 50, c), (b, 450, c)]
64 | batch_size = x_qry_cw[0].shape[0]
65 | for j in range(self.num_classes):
66 | dt_ins_masks[i][j] = self.up_sample(x_ins_cw[j]) # (B, P, H, W)
67 | dt_obj_logit[i][j] = x_obj_cw[j] # (B, P, 2)
68 | # im
69 | num_qry, n_pts = self.num_queries[j], self.max_pieces[j]
70 | im_feats = self.im_ctr_heads[j](x_qry_cw[j]) # (bs, n_q * n_pts, 2*c)
71 | im_feats_tmp = im_feats.reshape(batch_size, num_qry*n_pts*2, -1) # (bs, n_q*n_pts*2, c)
72 | im_coords_map = torch.einsum("bqc,bchw->bqhw", im_feats_tmp, coords_feats) # [bs, n_q*n_pts*2, h, w]
73 | im_coords = self.gap_layer(im_coords_map) # [bs, n_q * n_pts]
74 | im_coords = im_coords.reshape(batch_size, num_qry, self.max_pieces[j], 2).sigmoid()
75 | im_ctr_coord[i][j] = im_coords
76 |
77 | pt_feats = im_feats.reshape(batch_size, num_qry, self.max_pieces[j], -1).flatten(1, 2) # [bs, n_q * n_pts, 2*C]
78 | pt_logits = self.pts_cls_heads[j](pt_feats)
79 | dt_pivots_logits[i][j] = pt_logits.reshape(batch_size, num_qry, self.max_pieces[j], 2)
80 |
81 | ret = {"outputs": {"obj_logits": dt_obj_logit, "ins_masks": dt_ins_masks,
82 | "ctr_im": im_ctr_coord, "pts_logits": dt_pivots_logits}}
83 |
84 | if self.semantic_heads is not None:
85 | num_decoders = len(inputs["bev_enc_features"])
86 | dt_sem_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
87 | for i in range(num_decoders):
88 | x_sem = inputs["bev_enc_features"][i]
89 | for j in range(self.num_classes):
90 | dt_sem_masks[i][j] = self.up_sample(self.semantic_heads[j](x_sem)) # (B, P, 2, H, W)
91 | ret["outputs"].update({"sem_masks": dt_sem_masks})
92 | return ret
93 |
94 | def up_sample(self, x, tgt_shape=None):
95 | tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
96 | if tuple(x.shape[-2:]) == tuple(tgt_shape):
97 | return x
98 | return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
99 |
100 | def compute_locations(self, stride=1, device='cpu'):
101 |
102 | fh, fw = self.tgt_shape
103 |
104 | shifts_x = torch.arange(0, fw * stride, step=stride, dtype=torch.float32, device=device)
105 | shifts_y = torch.arange(0, fh * stride, step=stride, dtype=torch.float32, device=device)
106 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
107 | shift_x = shift_x.reshape(-1)
108 | shift_y = shift_y.reshape(-1)
109 | locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
110 |
111 | locations = locations.unsqueeze(0).permute(0, 2, 1).contiguous().float().view(1, 2, fh, fw)
112 | locations[:, 0, :, :] /= fw
113 | locations[:, 1, :, :] /= fh
114 |
115 | return locations
116 |
--------------------------------------------------------------------------------
/mapmaster/models/utils/mask_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from detectron2.projects.point_rend.point_features import point_sample
6 | from detectron2.projects.point_rend.point_features import get_uncertain_point_coords_with_randomness
7 |
8 |
9 | class SegmentationLoss(nn.Module):
10 |
11 | def __init__(self, ce_weight, dice_weight, use_point_render=False, num_points=8000, oversample=3.0, importance=0.75):
12 | super(SegmentationLoss, self).__init__()
13 | self.ce_weight = ce_weight
14 | self.dice_weight = dice_weight
15 | self.use_point_render = use_point_render
16 | self.num_points = num_points
17 | self.oversample = oversample
18 | self.importance = importance
19 |
20 | def forward(self, dt_masks, gt_masks, stage="loss"):
21 | loss = 0
22 | if self.use_point_render:
23 | dt_masks, gt_masks = self.points_render(dt_masks, gt_masks, stage)
24 | if self.ce_weight > 0:
25 | loss += self.ce_weight * self.forward_sigmoid_ce_loss(dt_masks, gt_masks)
26 | if self.dice_weight > 0:
27 | loss += self.dice_weight * self.forward_dice_loss(dt_masks, gt_masks)
28 | return loss
29 |
30 | @staticmethod
31 | def forward_dice_loss(inputs, targets):
32 | inputs = inputs.sigmoid()
33 | inputs = inputs.flatten(1)
34 | targets = targets.flatten(1)
35 | numerator = 2 * (inputs * targets).sum(-1)
36 | denominator = inputs.sum(-1) + targets.sum(-1)
37 | loss = 1 - (numerator + 1) / (denominator + 1)
38 | return loss
39 |
40 | @staticmethod
41 | def forward_sigmoid_ce_loss(inputs, targets):
42 | inputs = inputs.flatten(1)
43 | targets = targets.flatten(1)
44 | loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
45 | return loss.mean(1)
46 |
47 | def points_render(self, src_masks, tgt_masks, stage):
48 | assert stage in ["loss", "matcher"]
49 | assert src_masks.shape == tgt_masks.shape
50 |
51 | src_masks = src_masks[:, None]
52 | tgt_masks = tgt_masks[:, None]
53 |
54 | if stage == "matcher":
55 | point_coords = torch.rand(1, self.num_points, 2, device=src_masks.device)
56 | point_coords_src = point_coords.repeat(src_masks.shape[0], 1, 1)
57 | point_coords_tgt = point_coords.repeat(tgt_masks.shape[0], 1, 1)
58 | else:
59 | point_coords = get_uncertain_point_coords_with_randomness(
60 | src_masks,
61 | lambda logits: self.calculate_uncertainty(logits),
62 | self.num_points,
63 | self.oversample,
64 | self.importance,
65 | )
66 | point_coords_src = point_coords.clone()
67 | point_coords_tgt = point_coords.clone()
68 |
69 | src_masks = point_sample(src_masks, point_coords_src, align_corners=False).squeeze(1)
70 | tgt_masks = point_sample(tgt_masks, point_coords_tgt, align_corners=False).squeeze(1)
71 |
72 | return src_masks, tgt_masks
73 |
74 | @staticmethod
75 | def calculate_uncertainty(logits):
76 | """
77 | We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the
78 | foreground class in `classes`.
79 | Args:
80 | logits (Tensor): A tensor of shape (R, 1, ...) for class-specific or
81 | class-agnostic, where R is the total number of predicted masks in all images and C is
82 | the number of foreground classes. The values are logits.
83 | Returns:
84 | scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with
85 | the most uncertain locations having the highest uncertainty score.
86 | """
87 | assert logits.shape[1] == 1
88 | gt_class_logits = logits.clone()
89 | return -(torch.abs(gt_class_logits))
90 |
--------------------------------------------------------------------------------
/mapmaster/models/utils/misc.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 | import warnings
4 | import torch.nn as nn
5 | from torch.nn import functional as F
6 |
7 |
8 | def c2_xavier_fill(module: nn.Module) -> None:
9 | """
10 | Initialize `module.weight` using the "XavierFill" implemented in Caffe2.
11 | Also initializes `module.bias` to 0.
12 | Args:
13 | module (torch.nn.Module): module to initialize.
14 | """
15 | # Caffe2 implementation of XavierFill in fact
16 | # corresponds to kaiming_uniform_ in PyTorch
17 | # pyre-fixme[6]: For 1st param expected `Tensor` but got `Union[Module, Tensor]`.
18 | nn.init.kaiming_uniform_(module.weight, a=1)
19 | if module.bias is not None:
20 | # pyre-fixme[6]: Expected `Tensor` for 1st param but got `Union[nn.Module,
21 | # torch.Tensor]`.
22 | nn.init.constant_(module.bias, 0)
23 |
24 |
25 | class Conv2d(torch.nn.Conv2d):
26 | """
27 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
28 | """
29 |
30 | def __init__(self, *args, **kwargs):
31 | """
32 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
33 | Args:
34 | norm (nn.Module, optional): a normalization layer
35 | activation (callable(Tensor) -> Tensor): a callable activation function
36 | It assumes that norm layer is used before activation.
37 | """
38 | norm = kwargs.pop("norm", None)
39 | activation = kwargs.pop("activation", None)
40 | super().__init__(*args, **kwargs)
41 |
42 | self.norm = norm
43 | self.activation = activation
44 |
45 | def forward(self, x):
46 | # torchscript does not support SyncBatchNorm yet
47 | # https://github.com/pytorch/pytorch/issues/40507
48 | # and we skip these codes in torchscript since:
49 | # 1. currently we only support torchscript in evaluation mode
50 | # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
51 | # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
52 | if not torch.jit.is_scripting():
53 | with warnings.catch_warnings(record=True):
54 | if x.numel() == 0 and self.training:
55 | # https://github.com/pytorch/pytorch/issues/12013
56 | assert not isinstance(
57 | self.norm, torch.nn.SyncBatchNorm
58 | ), "SyncBatchNorm does not support empty inputs!"
59 |
60 | x = F.conv2d(
61 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
62 | )
63 | if self.norm is not None:
64 | x = self.norm(x)
65 | if self.activation is not None:
66 | x = self.activation(x)
67 | return x
68 |
69 |
70 | def get_activation_fn(activation):
71 | """Return an activation function given a string"""
72 | if activation == "relu":
73 | return F.relu
74 | if activation == "gelu":
75 | return F.gelu
76 | if activation == "glu":
77 | return F.glu
78 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
79 |
--------------------------------------------------------------------------------
/mapmaster/models/utils/position_encoding.py:
--------------------------------------------------------------------------------
1 | """
2 | Various positional encodings for the transformer.
3 | """
4 | import math
5 | import torch
6 | from torch import nn
7 | import torch.nn.functional as F
8 |
9 |
10 | class PositionEmbeddingSine(nn.Module):
11 | """
12 | This is a more standard version of the position embedding, very similar to the one
13 | used by the Attention is all you need paper, generalized to work on images.
14 | """
15 |
16 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=True, scale=None):
17 | super().__init__()
18 | self.num_pos_feats = num_pos_feats
19 | self.temperature = temperature
20 | self.normalize = normalize
21 | if scale is not None and normalize is False:
22 | raise ValueError("normalize should be True if scale is passed")
23 | if scale is None:
24 | scale = 2 * math.pi
25 | self.scale = scale
26 |
27 | def forward(self, mask):
28 | assert mask is not None
29 | not_mask = ~mask
30 | y_embed = not_mask.cumsum(1, dtype=torch.float32)
31 | x_embed = not_mask.cumsum(2, dtype=torch.float32)
32 | if self.normalize:
33 | eps = 1e-6
34 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
35 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
36 |
37 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=mask.device)
38 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
39 |
40 | pos_x = x_embed[:, :, :, None] / dim_t
41 | pos_y = y_embed[:, :, :, None] / dim_t
42 | pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
43 | pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
44 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
45 | return pos
46 |
47 |
48 | class PositionEmbeddingLearned(nn.Module):
49 | """
50 | Absolute pos embedding, learned.
51 | """
52 |
53 | def __init__(self, num_pos=(50, 50), num_pos_feats=256):
54 | super().__init__()
55 | self.num_pos = num_pos
56 | self.pos_embed = nn.Embedding(num_pos[0] * num_pos[1], num_pos_feats)
57 | self.reset_parameters()
58 |
59 | def reset_parameters(self):
60 | nn.init.normal_(self.pos_embed.weight)
61 |
62 | def forward(self, mask):
63 | h, w = mask.shape[-2:]
64 | pos = self.pos_embed.weight.view(*self.num_pos, -1)[:h, :w]
65 | pos = pos.permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1)
66 | return pos
67 |
68 |
69 | class PositionEmbeddingIPM(nn.Module):
70 |
71 | def __init__(self,
72 | encoder=None,
73 | num_pos=(16, 168),
74 | input_shape=(512, 896),
75 | num_pos_feats=64,
76 | sine_encoding=False,
77 | temperature=10000):
78 | super().__init__()
79 |
80 | h, w_expand = num_pos
81 | self.current_shape = (h, w_expand // 6)
82 | self.input_shape = input_shape
83 |
84 | self.num_pos_feats = num_pos_feats
85 | self.temperature = temperature
86 | self.encoder = encoder
87 | self.sine_encoding = sine_encoding
88 |
89 | def get_embedding(self, extrinsic, intrinsic, ida_mats):
90 | """
91 | Get the BeV Coordinate for Image
92 |
93 | Return
94 | xy_world_coord (N, H, W, 2) Ego x, y coordinate
95 | Valid (N, H, W, 1) -- Valid Points or Not 1 -- valid; 0 -- invalid
96 | """
97 | # extrinsic -> (B, M, 4, 4)
98 | device, b, n = extrinsic.device, extrinsic.shape[0], extrinsic.shape[1]
99 |
100 | x = torch.linspace(0, self.input_shape[1] - 1, self.current_shape[1], dtype=torch.float)
101 | y = torch.linspace(0, self.input_shape[0] - 1, self.current_shape[0], dtype=torch.float)
102 | y_grid, x_grid = torch.meshgrid(y, x)
103 | z = torch.ones(self.current_shape)
104 | feat_coords = torch.stack([x_grid, y_grid, z], dim=-1).to(device) # (H, W, 3)
105 | feat_coords = feat_coords.unsqueeze(0).repeat(n, 1, 1, 1).unsqueeze(0).repeat(b, 1, 1, 1, 1) # (B, N, H, W, 3)
106 |
107 | ida_mats = ida_mats.view(b, n, 1, 1, 3, 3)
108 | image_coords = ida_mats.inverse().matmul(feat_coords.unsqueeze(-1)) # (B, N, H, W, 3, 1)
109 |
110 | intrinsic = intrinsic.view(b, n, 1, 1, 3, 3) # (B, N, 1, 1, 3, 3)
111 | normed_coords = torch.linalg.inv(intrinsic) @ image_coords # (B, N, H, W, 3, 1)
112 |
113 | ext_rots = extrinsic[:, :, :3, :3] # (B, N, 3, 3)
114 | ext_trans = extrinsic[:, :, :3, 3] # (B, N, 3)
115 |
116 | ext_rots = ext_rots.view(b, n, 1, 1, 3, 3) # (B, N, 1, 1, 3, 3)
117 | world_coords = (ext_rots @ normed_coords).squeeze(-1) # (B, N, H, W, 3)
118 | world_coords = F.normalize(world_coords, p=2, dim=-1)
119 | z_coord = world_coords[:, :, :, :, 2] # (B, N, H, W)
120 |
121 | trans_z = ext_trans[:, :, 2].unsqueeze(-1).unsqueeze(-1) # (B, N, 1, 1)
122 | depth = - trans_z / z_coord # (B, N, H, W)
123 | valid = depth > 0 # (B, N, H, W)
124 |
125 | xy_world_coords = world_coords[:, :, :, :, :2] # (B, N, H, W, 2)
126 | xy_world_coords = xy_world_coords * depth.unsqueeze(-1)
127 | valid = valid.unsqueeze(-1) # (B, N, H, W, 1)
128 |
129 | return xy_world_coords, valid
130 |
131 | def forward(self, extrinsic, intrinsic, ida_mats, do_flip):
132 | """
133 | extrinsic (N, 6, 4, 4) torch.Tensor
134 | intrinsic (N, 6, 3, 3)
135 | """
136 | device = extrinsic.device
137 | xy_pos_embed, valid = self.get_embedding(extrinsic, intrinsic, ida_mats)
138 | if do_flip:
139 | xy_pos_embed[:, :, :, :, 1] = -1 * xy_pos_embed[:, :, :, :, 1]
140 | # along with w
141 | xy_pos_embed = torch.cat(torch.unbind(xy_pos_embed, dim=1), dim=-2) # (B, H, N*W, 2)
142 | valid = torch.cat(torch.unbind(valid, dim=1), dim=-2) # (B, H, N*W, 2)
143 | if self.sine_encoding:
144 | # Use Sine encoding to get 256 dim embeddings
145 | dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=device)
146 | dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2))
147 | pos_embed = xy_pos_embed[:, :, :, :, None] / dim_t
148 | pos_x = torch.stack((pos_embed[:, :, :, 0, 0::2].sin(), pos_embed[:, :, :, 0, 1::2].cos()), dim=4)
149 | pos_y = torch.stack((pos_embed[:, :, :, 1, 0::2].sin(), pos_embed[:, :, :, 1, 1::2].cos()), dim=4)
150 | pos_full_embed = torch.cat((pos_y.flatten(3), pos_x.flatten(3)), dim=3)
151 | pos_combined = torch.where(valid, pos_full_embed, torch.tensor(0., dtype=torch.float32, device=device))
152 | pos_combined = pos_combined.permute(0, 3, 1, 2) # (B, 2, H, W')
153 | else:
154 | assert None
155 | # pos_combined = torch.where(valid, xy_pos_embed, torch.tensor(0., dtype=torch.float32, device=device))
156 | # pos_combined = pos_combined.permute(0, 3, 1, 2)
157 |
158 | if self.encoder is None:
159 | return pos_combined, valid.squeeze(-1)
160 | else:
161 | pos_embed_contiguous = pos_combined.contiguous()
162 | return self.encoder(pos_embed_contiguous), valid.squeeze(-1)
163 |
164 |
165 | class PositionEmbeddingTgt(nn.Module):
166 | def __init__(self,
167 | encoder=None,
168 | tgt_shape=(40, 20),
169 | map_size=(400, 200),
170 | map_resolution=0.15,
171 | num_pos_feats=64,
172 | sine_encoding=False,
173 | temperature=10000):
174 | super().__init__()
175 | self.tgt_shape = tgt_shape
176 | self.encoder = encoder
177 | self.map_size = map_size
178 | self.map_resolution = map_resolution
179 | self.num_pos_feats = num_pos_feats
180 | self.temperature = temperature
181 | self.sine_encoding = sine_encoding
182 |
183 | def forward(self, mask):
184 | B = mask.shape[0]
185 |
186 | map_forward_ratio = self.tgt_shape[0] / self.map_size[0]
187 | map_lateral_ratio = self.tgt_shape[1] / self.map_size[1]
188 |
189 | map_forward_res = self.map_resolution / map_forward_ratio
190 | map_lateral_res = self.map_resolution / map_lateral_ratio
191 |
192 | X = (torch.arange(self.tgt_shape[0] - 1, -1, -1, device=mask.device) + 0.5 - self.tgt_shape[
193 | 0] / 2) * map_forward_res
194 | Y = (torch.arange(self.tgt_shape[1] - 1, -1, -1, device=mask.device) + 0.5 - self.tgt_shape[
195 | 1] / 2) * map_lateral_res
196 |
197 | grid_X, grid_Y = torch.meshgrid(X, Y)
198 | pos_embed = torch.stack([grid_X, grid_Y], dim=-1) # (H, W, 2)
199 |
200 | if self.sine_encoding:
201 | dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=mask.device)
202 | dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2))
203 |
204 | pos_embed = pos_embed[:, :, :, None] / dim_t
205 | pos_x = torch.stack((pos_embed[:, :, 0, 0::2].sin(), pos_embed[:, :, 0, 1::2].cos()), dim=3).flatten(2)
206 | pos_y = torch.stack((pos_embed[:, :, 1, 0::2].sin(), pos_embed[:, :, 1, 1::2].cos()), dim=3).flatten(2)
207 | pos_full_embed = torch.cat((pos_y, pos_x), dim=2)
208 |
209 | pos_embed = pos_full_embed.unsqueeze(0).repeat(B, 1, 1, 1).permute(0, 3, 1, 2)
210 | else:
211 | pos_embed = pos_embed.unsqueeze(0).repeat(B, 1, 1, 1).permute(0, 3, 1, 2)
212 |
213 | if self.encoder is None:
214 | return pos_embed
215 | else:
216 | pos_embed_contiguous = pos_embed.contiguous()
217 | return self.encoder(pos_embed_contiguous)
--------------------------------------------------------------------------------
/mapmaster/models/utils/recovery_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from detectron2.projects.point_rend.point_features import point_sample
6 |
7 |
8 | class PointRecoveryLoss(nn.Module):
9 |
10 | def __init__(self, ce_weight, dice_weight, curve_width, tgt_shape):
11 | super(PointRecoveryLoss, self).__init__()
12 | self.ce_weight = ce_weight
13 | self.dice_weight = dice_weight
14 | self.kernel = self.generate_kernel(curve_width, tgt_shape)
15 |
16 | def forward(self, points, gt_masks):
17 | points_expanded = points.unsqueeze(2) - self.kernel.repeat(points.shape[0], 1, 1, 1)
18 | points_expanded = torch.clamp(points_expanded.flatten(1, 2), min=0, max=1) # (N, P*w*w, 2) [0, 1]
19 | dt_points = point_sample(gt_masks[:, None], points_expanded, align_corners=False).squeeze(1).flatten(1)
20 | gt_points = torch.ones_like(dt_points)
21 | loss = 0
22 | if self.ce_weight > 0:
23 | loss += self.ce_weight * self.forward_ce_loss(dt_points, gt_points)
24 | if self.dice_weight > 0:
25 | loss += self.dice_weight * self.forward_dice_loss(dt_points, gt_points)
26 | return loss
27 |
28 | @staticmethod
29 | def generate_kernel(curve_width, tgt_shape, device='cuda'):
30 | width = torch.tensor(list(range(curve_width)))
31 | kernel = torch.stack(torch.meshgrid(width, width), dim=-1).float()
32 | kernel = kernel - curve_width // 2
33 | kernel[..., 0] = kernel[..., 0] / tgt_shape[1]
34 | kernel[..., 1] = kernel[..., 1] / tgt_shape[0]
35 | kernel = kernel.flatten(0, 1).unsqueeze(0).unsqueeze(0) # (1, 1, w*w, 2)
36 | kernel = kernel.cuda() if device == 'cuda' else kernel
37 | return kernel
38 |
39 | @staticmethod
40 | def forward_dice_loss(inputs, targets):
41 | numerator = 2 * (inputs * targets).sum(-1)
42 | denominator = inputs.sum(-1) + targets.sum(-1)
43 | loss = 1 - (numerator + 1) / (denominator + 1)
44 | return loss
45 |
46 | @staticmethod
47 | def forward_ce_loss(inputs, targets):
48 | loss = F.binary_cross_entropy(inputs, targets, reduction="none")
49 | return loss.mean(1)
50 |
--------------------------------------------------------------------------------
/mapmaster/utils/env.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import sys
4 | import PIL
5 | import importlib
6 | import warnings
7 | import subprocess
8 | import torch
9 | import torchvision
10 | import numpy as np
11 | from tabulate import tabulate
12 | from collections import defaultdict
13 |
14 | __all__ = ["collect_env_info"]
15 |
16 |
17 | def collect_torch_env():
18 | import torch.__config__
19 | return torch.__config__.show()
20 |
21 |
22 | def collect_git_info():
23 | try:
24 | import git
25 | from git import InvalidGitRepositoryError
26 | except ImportError:
27 | warnings.warn("Please consider to install gitpython for git info collection by 'pip install gitpython'.")
28 | return "Git status: unknown\n"
29 |
30 | try:
31 | repo = git.Repo(get_root_dir())
32 | except InvalidGitRepositoryError:
33 | warnings.warn("Current path is possibly not a valid git repository.")
34 | return "Git status: unknown\n"
35 |
36 | msg = "***Git status:***\n{}\nHEAD Commit-id: {}\n".format(repo.git.status().replace("<", "\<"), repo.head.commit)
37 | msg = "{}\n{}".format(msg, "***Git Diff:***\n{}\n".format(repo.git.diff().replace("<", "\<")))
38 | return msg
39 |
40 |
41 | def detect_compute_compatibility(CUDA_HOME, so_file):
42 | try:
43 | cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump")
44 | if os.path.isfile(cuobjdump):
45 | output = subprocess.check_output("'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True)
46 | output = output.decode("utf-8").strip().split("\n")
47 | sm = []
48 | for line in output:
49 | line = re.findall(r"\.sm_[0-9]*\.", line)[0]
50 | sm.append(line.strip("."))
51 | sm = sorted(set(sm))
52 | return ", ".join(sm)
53 | else:
54 | return so_file + "; cannot find cuobjdump"
55 | except Exception:
56 | # unhandled failure
57 | return so_file
58 |
59 |
60 | def collect_env_info():
61 | data = []
62 | data.append(("sys.platform", sys.platform))
63 | data.append(("Python", sys.version.replace("\n", "")))
64 | data.append(("numpy", np.__version__))
65 | data.append(("Pillow", PIL.__version__))
66 |
67 | data.append(("PyTorch", torch.__version__ + " @" + os.path.dirname(torch.__file__)))
68 | data.append(("PyTorch debug build", torch.version.debug))
69 |
70 | has_cuda = torch.cuda.is_available()
71 |
72 | data.append(("CUDA available", has_cuda))
73 | if has_cuda:
74 | devices = defaultdict(list)
75 | for k in range(torch.cuda.device_count()):
76 | devices[torch.cuda.get_device_name(k)].append(str(k))
77 | for name, devids in devices.items():
78 | data.append(("GPU " + ",".join(devids), name))
79 |
80 | from torch.utils.cpp_extension import CUDA_HOME
81 |
82 | data.append(("CUDA_HOME", str(CUDA_HOME)))
83 |
84 | if CUDA_HOME is not None and os.path.isdir(CUDA_HOME):
85 | try:
86 | nvcc = os.path.join(CUDA_HOME, "bin", "nvcc")
87 | nvcc = subprocess.check_output("'{}' -V | tail -n1".format(nvcc), shell=True)
88 | nvcc = nvcc.decode("utf-8").strip()
89 | except subprocess.SubprocessError:
90 | nvcc = "Not Available"
91 | data.append(("NVCC", nvcc))
92 |
93 | cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
94 | if cuda_arch_list:
95 | data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
96 |
97 | try:
98 | data.append(
99 | (
100 | "torchvision",
101 | str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__),
102 | )
103 | )
104 | if has_cuda:
105 | try:
106 | torchvision_C = importlib.util.find_spec("torchvision._C").origin
107 | msg = detect_compute_compatibility(CUDA_HOME, torchvision_C)
108 | data.append(("torchvision arch flags", msg))
109 | except ImportError:
110 | data.append(("torchvision._C", "failed to find"))
111 | except AttributeError:
112 | data.append(("torchvision", "unknown"))
113 |
114 | try:
115 | import cv2
116 |
117 | data.append(("cv2", cv2.__version__))
118 | except ImportError:
119 | pass
120 |
121 | env_str = tabulate(data) + "\n"
122 | env_str += collect_git_info()
123 | env_str += "-" * 100 + "\n"
124 | env_str += collect_torch_env()
125 | return env_str
126 |
127 |
128 | def get_root_dir():
129 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
130 |
--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | clearml
2 | loguru
3 | Ninja
4 | numba
5 | opencv-contrib-python
6 | pandas
7 | scikit-image
8 | tabulate
9 | tensorboardX
10 | Pillow==9.4.0
11 | numpy==1.23.5
12 | visvalingamwyatt=0.2.0
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | export PYTHONPATH=$(pwd)
4 |
5 | case "$1" in
6 | "train")
7 | CONFIG_NAME=$2
8 | NUM_EPOCHS=$3
9 | python3 configs/"${CONFIG_NAME}".py -d 0-7 -b 1 -e ${NUM_EPOCHS} --sync_bn 8 --no-clearml
10 | ;;
11 | "test")
12 | CONFIG_NAME=$2
13 | CKPT=$3
14 | python3 configs/"${CONFIG_NAME}".py -d 0-7 --eval --ckpt "${CKPT}"
15 | ;;
16 | "train-continue")
17 | CONFIG_NAME=$2
18 | CKPT=$3
19 | python3 configs/"${CONFIG_NAME}".py -d 0-7 -b 1 -e 30 --sync_bn 8 --no-clearml --ckpt "${CKPT}"
20 | ;;
21 | "pipeline")
22 | CONFIG_NAME=$2
23 | NUM_EPOCHS=$3
24 | CKPT_ID=$((NUM_EPOCHS-1))
25 | bash run.sh train ${CONFIG_NAME} ${NUM_EPOCHS}
26 | bash run.sh test ${CONFIG_NAME} outputs/${CONFIG_NAME}/latest/dump_model/checkpoint_epoch_${CKPT_ID}.pth
27 | ;;
28 | "reproduce")
29 | CONFIG_NAME=$2
30 | bash run.sh pipeline ${CONFIG_NAME} 30
31 | bash run.sh pipeline ${CONFIG_NAME} 110
32 | ;;
33 | *)
34 | echo "error"
35 | esac
36 |
--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/__init__.py
--------------------------------------------------------------------------------
/tools/anno_converter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/anno_converter/__init__.py
--------------------------------------------------------------------------------
/tools/anno_converter/bezier.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from shapely.geometry import LineString
4 | from scipy.special import comb as n_over_k
5 |
6 |
7 | class PiecewiseBezierCurve(object):
8 | def __init__(self, num_points=100, num_degree=2, margin=0.05, threshold=0.1):
9 | super().__init__()
10 | self.num_points = num_points
11 | self.num_degree = num_degree
12 | self.margin = margin
13 | self.bezier_coefficient = self._get_bezier_coefficients(np.linspace(0, 1, self.num_points))
14 | self.threshold = threshold
15 |
16 | def _get_bezier_coefficients(self, t_list):
17 | bernstein_fn = lambda n, t, k: (t ** k) * ((1 - t) ** (n - k)) * n_over_k(n, k)
18 | bezier_coefficient_fn = \
19 | lambda ts: [[bernstein_fn(self.num_degree, t, k) for k in range(self.num_degree + 1)] for t in t_list]
20 | return np.array(bezier_coefficient_fn(t_list))
21 |
22 | def _get_interpolated_points(self, points):
23 | line = LineString(points)
24 | distances = np.linspace(0, line.length, self.num_points)
25 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
26 | return sampled_points
27 |
28 | def _get_chamfer_distance(self, points_before, points_after):
29 | points_before = torch.from_numpy(points_before).float()
30 | points_after = torch.from_numpy(points_after).float()
31 | dist = torch.cdist(points_before, points_after)
32 | dist1, _ = torch.min(dist, 2)
33 | dist1 = (dist1 * (dist1 > self.margin).float())
34 | dist2, _ = torch.min(dist, 1)
35 | dist2 = (dist2 * (dist2 > self.margin).float())
36 | return (dist1.mean(-1) + dist2.mean(-1)) / 2
37 |
38 | def bezier_fitting(self, curve_pts):
39 | curve_pts_intered = self._get_interpolated_points(curve_pts)
40 | bezier_ctrl_pts = np.linalg.pinv(self.bezier_coefficient).dot(curve_pts_intered)
41 | bezier_ctrl_pts = np.concatenate([curve_pts[0:1], bezier_ctrl_pts[1:-1], curve_pts[-1:]], axis=0)
42 | curve_pts_recovery = self.bezier_coefficient.dot(bezier_ctrl_pts)
43 | criterion = self._get_chamfer_distance(curve_pts_intered[None, :, :], curve_pts_recovery[None, :, :]).item()
44 | return bezier_ctrl_pts, criterion
45 |
46 | @staticmethod
47 | def sequence_reverse(ctr_points):
48 | ctr_points = np.array(ctr_points)
49 | (xs, ys), (xe, ye) = ctr_points[0], ctr_points[-1]
50 | if ys > ye:
51 | ctr_points = ctr_points[::-1]
52 | return ctr_points
53 |
54 | def __call__(self, curve_pts):
55 | ctr_points_piecewise = []
56 | num_points = curve_pts.shape[0]
57 | start, end = 0, num_points - 1
58 | while start < end:
59 | ctr_points, loss = self.bezier_fitting(curve_pts[start: end + 1])
60 | if loss < self.threshold:
61 | start, end = end, num_points - 1
62 | if start >= end:
63 | ctr_points_piecewise += ctr_points.tolist()
64 | else:
65 | ctr_points_piecewise += ctr_points.tolist()[:-1]
66 | else:
67 | end = end - 1
68 | ctr_points_piecewise = self.sequence_reverse(ctr_points_piecewise)
69 | return ctr_points_piecewise
70 |
--------------------------------------------------------------------------------
/tools/anno_converter/generate_pivots.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import visvalingamwyatt as vw
3 |
4 | class GenPivots:
5 | def __init__(self, max_pts=[10, 2, 30], map_region=(30, -30, 15, -15), vm_thre=2.0, resolution=0.15):
6 | self.max_pts = max_pts
7 | self.map_region = map_region
8 | self.vm_thre = vm_thre
9 | self.resolution = resolution
10 |
11 | def pivots_generate(self, map_vectors):
12 | pivots_single_frame = {0:[], 1:[], 2:[]}
13 | lengths_single_frame = {0:[], 1:[], 2:[]}
14 | for ii, vec in enumerate(map_vectors):
15 | pts = np.array(vec["pts"]) * self.resolution # 转成 m
16 | pts = pts[:, ::-1]
17 | cls = vec["type"]
18 |
19 | # If the difference in x is obvious (greater than 1m), then rank according to x.
20 | # If the difference in x is not obvious, rank according to y.
21 | if (np.abs(pts[0][0]-pts[-1][0])>1 and pts[0][0] 0:
35 | new_pts[:, :, 0] = new_pts[:, :, 0] / (2 * self.map_region[0]) # normalize
36 | new_pts[:, :, 1] = new_pts[:, :, 1] / (2 * self.map_region[2])
37 | pivots_single_frame[cls] = new_pts
38 | lengths_single_frame[cls] = np.array(lengths_single_frame[cls])
39 |
40 | return pivots_single_frame, lengths_single_frame
41 |
42 | def pad_pts(self, pts, tgt_length):
43 | if len(pts) >= tgt_length:
44 | return pts[:tgt_length]
45 | pts = np.concatenate([pts, np.zeros((tgt_length-len(pts), 2))], axis=0)
46 | return pts
47 |
--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/anno_converter/nuscenes/__init__.py
--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/convert.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import numpy as np
4 | from tqdm import tqdm
5 | from nuscenes import NuScenes
6 | from pyquaternion import Quaternion
7 | from torch.utils.data import Dataset
8 | from rasterize import RasterizedLocalMap
9 | from vectorize import VectorizedLocalMap
10 | from tools.anno_converter.generate_pivots import GenPivots
11 |
12 |
13 | class NuScenesDataset(Dataset):
14 | def __init__(self, version, dataroot, xbound=(-30., 30., 0.15), ybound=(-15., 15., 0.15)):
15 | super(NuScenesDataset, self).__init__()
16 | patch_h = ybound[1] - ybound[0]
17 | patch_w = xbound[1] - xbound[0]
18 | canvas_h = int(patch_h / ybound[2])
19 | canvas_w = int(patch_w / xbound[2])
20 | self.patch_size = (patch_h, patch_w)
21 | self.canvas_size = (canvas_h, canvas_w)
22 | self.nusc = NuScenes(version=version, dataroot=dataroot, verbose=False)
23 | self.vector_map = VectorizedLocalMap(dataroot, patch_size=self.patch_size, canvas_size=self.canvas_size)
24 |
25 | def __len__(self):
26 | return len(self.nusc.sample)
27 |
28 | def __getitem__(self, idx):
29 | record = self.nusc.sample[idx]
30 | location = self.nusc.get('log', self.nusc.get('scene', record['scene_token'])['log_token'])['location']
31 | ego_pose = self.nusc.get('ego_pose',
32 | self.nusc.get('sample_data', record['data']['LIDAR_TOP'])['ego_pose_token'])
33 | vectors = self.vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation'])
34 | imgs, trans, rots, intrins = self.get_data_info(record)
35 | return imgs, np.stack(trans), np.stack(rots), np.stack(intrins), vectors
36 |
37 | def get_data_info(self, record):
38 | imgs, trans, rots, intrins = [], [], [], []
39 | for cam in ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT']:
40 | samp = self.nusc.get('sample_data', record['data'][cam])
41 | imgs.append(samp['filename'])
42 | sens = self.nusc.get('calibrated_sensor', samp['calibrated_sensor_token'])
43 | trans.append(sens['translation'])
44 | rots.append(Quaternion(sens['rotation']).rotation_matrix)
45 | intrins.append(sens['camera_intrinsic'])
46 | return imgs, trans, rots, intrins
47 |
48 |
49 | class NuScenesSemanticDataset(NuScenesDataset):
50 | def __init__(self, version, dataroot, xbound, ybound, thickness, num_degrees, max_channel=3, bezier=False):
51 | super(NuScenesSemanticDataset, self).__init__(version, dataroot, xbound, ybound)
52 | self.raster_map = RasterizedLocalMap(self.patch_size, self.canvas_size, num_degrees, max_channel, thickness, bezier=bezier)
53 | self.pivot_gen = GenPivots(map_region=(xbound[1], xbound[0], ybound[1], ybound[0]), resolution=xbound[2])
54 |
55 | def __getitem__(self, idx):
56 | record = self.nusc.sample[idx]
57 | location = self.nusc.get('log', self.nusc.get('scene', record['scene_token'])['log_token'])['location']
58 | ego_pose = self.nusc.get('ego_pose', self.nusc.get('sample_data', record['data']['LIDAR_TOP'])['ego_pose_token'])
59 | vectors = self.vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation'])
60 | imgs, trans, rots, intrins = self.get_data_info(record)
61 | semantic_masks, instance_masks, instance_vec_points, instance_ctr_points = \
62 | self.raster_map.convert_vec_to_mask(vectors)
63 | pivots, pivot_lengths = self.pivot_gen.pivots_generate(instance_vec_points)
64 |
65 | return imgs, np.stack(trans), np.stack(rots), np.stack(intrins), semantic_masks, instance_masks, \
66 | vectors, instance_vec_points, instance_ctr_points, pivots, pivot_lengths
67 |
68 |
69 | def main():
70 | parser = argparse.ArgumentParser(description='Pivot-Bezier GT Generator.')
71 | parser.add_argument('-d', '--data_root', type=str, default='/data/dataset/public/nuScenes-tt')
72 | parser.add_argument('-v', '--version', nargs='+', type=str, default=['v1.0-trainval'])
73 | parser.add_argument("--num_degrees", nargs='+', type=int, default=[2, 1, 3])
74 | parser.add_argument("--thickness", nargs='+', type=int, default=[1, 8])
75 | parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
76 | parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
77 | parser.add_argument("--bezier", default=False, action='store_true') # whether to generate bezier GT
78 | args = parser.parse_args()
79 |
80 | n_classes = len(args.num_degrees) # 0 --> divider(d=2), 1 --> crossing(d=1), 2--> contour(d=3)
81 | save_dir = os.path.join(args.data_root, 'customer', "pivot-bezier")
82 | os.makedirs(save_dir, exist_ok=True)
83 | for version in args.version:
84 | dataset = NuScenesSemanticDataset(
85 | version, args.data_root, args.xbound, args.ybound, args.thickness, args.num_degrees, max_channel=n_classes, bezier=args.bezier)
86 | for idx in tqdm(range(dataset.__len__())):
87 | file_path = os.path.join(save_dir, dataset.nusc.sample[idx]['token'] + '.npz')
88 | # if os.path.exists(file_path):
89 | # continue
90 | item = dataset.__getitem__(idx)
91 | np.savez_compressed(
92 | file_path, image_paths=np.array(item[0]), trans=item[1], rots=item[2], intrins=item[3],
93 | semantic_mask=item[4][0], instance_mask=item[5][0], instance_mask8=item[5][1],
94 | ego_vectors=item[6], map_vectors=item[7], ctr_points=item[8], pivot_pts=item[9], pivot_length=item[10],
95 | )
96 |
97 |
98 | if __name__ == '__main__':
99 | main()
100 |
--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/rasterize.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from shapely import affinity
4 | from shapely.geometry import LineString, box
5 | from tools.anno_converter.bezier import PiecewiseBezierCurve
6 |
7 |
8 | class RasterizedLocalMap(object):
9 | def __init__(self, patch_size, canvas_size, num_degrees, max_channel, thickness, patch_angle=0.0, bezier=False):
10 | super().__init__()
11 | self.patch_size = patch_size
12 | self.canvas_size = canvas_size
13 | self.max_channel = max_channel
14 | self.num_degrees = num_degrees
15 | self.thickness = thickness
16 | assert self.thickness[0] == 1
17 | self.patch_box = (0.0, 0.0, self.patch_size[0], self.patch_size[1])
18 | self.patch_angle = patch_angle
19 | self.patch = self.get_patch_coord()
20 | self.bezier = bezier
21 | if bezier:
22 | self.pbc_funcs = {
23 | d: PiecewiseBezierCurve(num_points=100, num_degree=d, margin=0.05, threshold=0.1) for d in num_degrees
24 | }
25 |
26 | def convert_vec_to_mask(self, vectors):
27 | vector_num_list = {cls_idx: [] for cls_idx in range(self.max_channel)} # map-type -> list
28 | for vector in vectors:
29 | if vector['pts_num'] >= 2:
30 | vector_num_list[vector['type']].append(LineString(vector['pts'][:vector['pts_num']]))
31 | ins_idx = 1 # instance-index
32 | instance_masks = np.zeros(
33 | (len(self.thickness), self.max_channel, self.canvas_size[1], self.canvas_size[0]), np.uint8)
34 | instance_vec_points, instance_ctr_points = [], []
35 | for cls_idx in range(self.max_channel):
36 | if self.bezier:
37 | pbc_func = self.pbc_funcs[self.num_degrees[cls_idx]]
38 | else:
39 | pbc_func = None
40 | masks, map_points, ctr_points, ins_idx = self.line_geom_to_mask(vector_num_list[cls_idx], ins_idx, pbc_func)
41 | instance_masks[:, cls_idx, :, :] = masks
42 | for pts in map_points:
43 | instance_vec_points.append({'pts': pts, 'pts_num': len(pts), 'type': cls_idx})
44 | for pts in ctr_points:
45 | instance_ctr_points.append({'pts': pts, 'pts_num': len(pts), 'type': cls_idx})
46 | instance_masks = np.stack(instance_masks).astype(np.uint8)
47 | semantic_masks = (instance_masks != 0).astype(np.uint8)
48 | return semantic_masks, instance_masks, instance_vec_points, instance_ctr_points
49 |
50 | def line_geom_to_mask(self, layer_geom, idx, pbc_func, trans_type='index'):
51 | patch_x, patch_y, patch_h, patch_w = self.patch_box
52 | canvas_h = self.canvas_size[0]
53 | canvas_w = self.canvas_size[1]
54 | scale_height = canvas_h / patch_h
55 | scale_width = canvas_w / patch_w
56 | trans_x = -patch_x + patch_w / 2.0
57 | trans_y = -patch_y + patch_h / 2.0
58 | map_masks = np.zeros((len(self.thickness), *self.canvas_size), np.uint8)
59 | map_points, ctr_points = [], []
60 | for line in layer_geom:
61 | new_line = line.intersection(self.patch)
62 | if not new_line.is_empty:
63 | new_line = affinity.affine_transform(new_line, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
64 | if new_line.geom_type == 'MultiLineString':
65 | for single_line in new_line:
66 | pts2 = self.patch_size - np.array(single_line.coords[:])[:, ::-1]
67 | if pbc_func is not None:
68 | ctr_points.append(pbc_func(pts2))
69 | single_line = affinity.scale(single_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
70 | map_masks, idx = self.mask_for_lines(single_line, map_masks, self.thickness, idx, trans_type)
71 | pts = self.canvas_size - np.array(single_line.coords[:])[:, ::-1]
72 | map_points.append(pts.tolist())
73 | else:
74 | pts2 = self.patch_size - np.array(new_line.coords[:])[:, ::-1]
75 | if pbc_func is not None:
76 | ctr_points.append(pbc_func(pts2))
77 | new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
78 | map_masks, idx = self.mask_for_lines(new_line, map_masks, self.thickness, idx, trans_type)
79 | pts = self.canvas_size - np.array(new_line.coords[:])[:, ::-1]
80 | map_points.append(pts.tolist())
81 | map_masks_ret = []
82 | for i in range(len(self.thickness)):
83 | map_masks_ret.append(np.flip(np.rot90(map_masks[i][None], k=1, axes=(1, 2)), axis=2)[0])
84 | map_masks_ret = np.array(map_masks_ret)
85 | return map_masks_ret, map_points, ctr_points, idx
86 |
87 | @staticmethod
88 | def mask_for_lines(lines, mask, thickness, idx, trans_type='index'):
89 | coords = np.asarray(list(lines.coords), np.int32)
90 | coords = coords.reshape((-1, 2))
91 | if len(coords) < 2:
92 | return mask, idx
93 | for i, t in enumerate(thickness):
94 | if trans_type == 'index':
95 | cv2.polylines(mask[i], [coords], False, color=idx, thickness=t)
96 | idx += 1
97 | return mask, idx
98 |
99 | def get_patch_coord(self):
100 | patch_x, patch_y, patch_h, patch_w = self.patch_box
101 | x_min = patch_x - patch_w / 2.0
102 | y_min = patch_y - patch_h / 2.0
103 | x_max = patch_x + patch_w / 2.0
104 | y_max = patch_y + patch_h / 2.0
105 | patch = box(x_min, y_min, x_max, y_max)
106 | patch = affinity.rotate(patch, self.patch_angle, origin=(patch_x, patch_y), use_radians=False)
107 | return patch
108 |
--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/vectorize.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from shapely import affinity, ops
3 | from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
4 | from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer
5 | from shapely.geometry import LineString, box, MultiPolygon, MultiLineString
6 |
7 |
8 | class VectorizedLocalMap(object):
9 | def __init__(self,
10 | data_root,
11 | patch_size,
12 | canvas_size,
13 | line_classes=('road_divider', 'lane_divider'),
14 | ped_crossing_classes=('ped_crossing', ),
15 | contour_classes=('road_segment', 'lane'),
16 | sample_dist=1,
17 | num_samples=250,
18 | padding=False,
19 | normalize=False,
20 | fixed_num=-1,
21 | class2label={
22 | 'road_divider': 0,
23 | 'lane_divider': 0,
24 | 'ped_crossing': 1,
25 | 'contours': 2,
26 | 'others': -1,
27 | }):
28 | super().__init__()
29 | self.data_root = data_root
30 | self.MAPS = ['boston-seaport', 'singapore-hollandvillage', 'singapore-onenorth', 'singapore-queenstown']
31 | self.line_classes = line_classes
32 | self.ped_crossing_classes = ped_crossing_classes
33 | self.polygon_classes = contour_classes
34 | self.class2label = class2label
35 | self.nusc_maps = {}
36 | self.map_explorer = {}
37 | for loc in self.MAPS:
38 | self.nusc_maps[loc] = NuScenesMap(dataroot=self.data_root, map_name=loc)
39 | self.map_explorer[loc] = NuScenesMapExplorer(self.nusc_maps[loc])
40 | self.patch_size = patch_size
41 | self.canvas_size = canvas_size
42 | self.sample_dist = sample_dist
43 | self.num_samples = num_samples
44 | self.padding = padding
45 | self.normalize = normalize
46 | self.fixed_num = fixed_num
47 |
48 | def gen_vectorized_samples(self, location, ego2global_translation, ego2global_rotation):
49 | map_pose = ego2global_translation[:2] # T
50 | rotation = Quaternion(ego2global_rotation) # R
51 |
52 | patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])
53 | patch_angle = quaternion_yaw(rotation) / np.pi * 180
54 |
55 | line_geom = self.get_map_geom(patch_box, patch_angle, self.line_classes, location)
56 | line_vector_dict = self.line_geoms_to_vectors(line_geom)
57 |
58 | ped_geom = self.get_map_geom(patch_box, patch_angle, self.ped_crossing_classes, location)
59 | ped_vector_list = self.line_geoms_to_vectors(ped_geom)['ped_crossing']
60 |
61 | polygon_geom = self.get_map_geom(patch_box, patch_angle, self.polygon_classes, location)
62 | poly_bound_list = self.poly_geoms_to_vectors(polygon_geom)
63 |
64 | vectors = []
65 | for line_type, vects in line_vector_dict.items():
66 | for line, length in vects:
67 | vectors.append((line.astype(float), length, self.class2label.get(line_type, -1)))
68 |
69 | for ped_line, length in ped_vector_list:
70 | vectors.append((ped_line.astype(float), length, self.class2label.get('ped_crossing', -1)))
71 |
72 | for contour, length in poly_bound_list:
73 | vectors.append((contour.astype(float), length, self.class2label.get('contours', -1)))
74 |
75 | # filter out -1
76 | filtered_vectors = []
77 | for pts, pts_num, _type in vectors:
78 | if _type != -1:
79 | filtered_vectors.append({'pts': pts, 'pts_num': pts_num, 'type': _type})
80 |
81 | return filtered_vectors
82 |
83 | def get_map_geom(self, patch_box, patch_angle, layer_names, location):
84 | map_geom = []
85 | for layer_name in layer_names:
86 | if layer_name in self.line_classes:
87 | map_explorer_x = self.map_explorer[location]
88 | geoms = map_explorer_x._get_layer_line(patch_box, patch_angle, layer_name)
89 | elif layer_name in self.polygon_classes:
90 | map_explorer_x = self.map_explorer[location]
91 | geoms = map_explorer_x._get_layer_polygon(patch_box, patch_angle, layer_name)
92 | elif layer_name in self.ped_crossing_classes:
93 | geoms = self.get_ped_crossing_line(patch_box, patch_angle, location)
94 | else:
95 | raise NotImplementedError
96 | map_geom.append((layer_name, geoms))
97 | return map_geom
98 |
99 | def _one_type_line_geom_to_vectors(self, line_geom):
100 | line_vectors = []
101 | for line in line_geom:
102 | if not line.is_empty:
103 | if line.geom_type == 'MultiLineString':
104 | for l in line.geoms:
105 | line_vectors.append(self.sample_pts_from_line(l))
106 | elif line.geom_type == 'LineString':
107 | line_vectors.append(self.sample_pts_from_line(line))
108 | else:
109 | raise NotImplementedError
110 | return line_vectors
111 |
112 | def poly_geoms_to_vectors(self, polygon_geom):
113 | roads = polygon_geom[0][1]
114 | lanes = polygon_geom[1][1]
115 | union_roads = ops.unary_union(roads)
116 | union_lanes = ops.unary_union(lanes)
117 | union_segments = ops.unary_union([union_roads, union_lanes])
118 | max_x = self.patch_size[1] / 2
119 | max_y = self.patch_size[0] / 2
120 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
121 | exteriors = []
122 | interiors = []
123 | if union_segments.geom_type != 'MultiPolygon':
124 | union_segments = MultiPolygon([union_segments])
125 | for poly in union_segments.geoms:
126 | exteriors.append(poly.exterior)
127 | for inter in poly.interiors:
128 | interiors.append(inter)
129 |
130 | results = []
131 | for ext in exteriors:
132 | if ext.is_ccw:
133 | ext.coords = list(ext.coords)[::-1]
134 | lines = ext.intersection(local_patch)
135 | if isinstance(lines, MultiLineString):
136 | lines = ops.linemerge(lines)
137 | results.append(lines)
138 |
139 | for inter in interiors:
140 | if not inter.is_ccw:
141 | inter.coords = list(inter.coords)[::-1]
142 | lines = inter.intersection(local_patch)
143 | if isinstance(lines, MultiLineString):
144 | lines = ops.linemerge(lines)
145 | results.append(lines)
146 |
147 | return self._one_type_line_geom_to_vectors(results)
148 |
149 | def line_geoms_to_vectors(self, line_geom):
150 | line_vectors_dict = dict()
151 | for line_type, a_type_of_lines in line_geom:
152 | one_type_vectors = self._one_type_line_geom_to_vectors(a_type_of_lines)
153 | line_vectors_dict[line_type] = one_type_vectors
154 |
155 | return line_vectors_dict
156 |
157 | def ped_geoms_to_vectors(self, ped_geom):
158 | ped_geom = ped_geom[0][1]
159 | union_ped = ops.unary_union(ped_geom)
160 | if union_ped.geom_type != 'MultiPolygon':
161 | union_ped = MultiPolygon([union_ped])
162 |
163 | max_x = self.patch_size[1] / 2
164 | max_y = self.patch_size[0] / 2
165 | local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
166 | results = []
167 | for ped_poly in union_ped:
168 | # rect = ped_poly.minimum_rotated_rectangle
169 | ext = ped_poly.exterior
170 | if not ext.is_ccw:
171 | ext.coords = list(ext.coords)[::-1]
172 | lines = ext.intersection(local_patch)
173 | results.append(lines)
174 |
175 | return self._one_type_line_geom_to_vectors(results)
176 |
177 | def get_ped_crossing_line(self, patch_box, patch_angle, location):
178 | def add_line(poly_xy, idx, patch, patch_angle, patch_x, patch_y, line_list):
179 | points = [(p0, p1) for p0, p1 in zip(poly_xy[0, idx:idx + 2], poly_xy[1, idx:idx + 2])]
180 | line = LineString(points)
181 | line = line.intersection(patch)
182 | if not line.is_empty:
183 | line = affinity.rotate(line, -patch_angle, origin=(patch_x, patch_y), use_radians=False)
184 | line = affinity.affine_transform(line, [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])
185 | line_list.append(line)
186 |
187 | patch_x = patch_box[0]
188 | patch_y = patch_box[1]
189 |
190 | patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
191 | line_list = []
192 | records = getattr(self.nusc_maps[location], 'ped_crossing')
193 | for record in records:
194 | polygon = self.map_explorer[location].extract_polygon(record['polygon_token'])
195 | poly_xy = np.array(polygon.exterior.xy)
196 | dist = np.square(poly_xy[:, 1:] - poly_xy[:, :-1]).sum(0)
197 | x1, x2 = np.argsort(dist)[-2:]
198 |
199 | add_line(poly_xy, x1, patch, patch_angle, patch_x, patch_y, line_list)
200 | add_line(poly_xy, x2, patch, patch_angle, patch_x, patch_y, line_list)
201 |
202 | return line_list
203 |
204 | def sample_pts_from_line(self, line):
205 | if self.fixed_num < 0:
206 | distances = np.arange(0, line.length, self.sample_dist)
207 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
208 | else:
209 | # fixed number of points, so distance is line.length / self.fixed_num
210 | distances = np.linspace(0, line.length, self.fixed_num)
211 | sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
212 |
213 | if self.normalize:
214 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])
215 |
216 | num_valid = len(sampled_points)
217 |
218 | if not self.padding or self.fixed_num > 0:
219 | # fixed num sample can return now!
220 | return sampled_points, num_valid
221 |
222 | # fixed distance sampling need padding!
223 | num_valid = len(sampled_points)
224 |
225 | if self.fixed_num < 0:
226 | if num_valid < self.num_samples:
227 | padding = np.zeros((self.num_samples - len(sampled_points), 2))
228 | sampled_points = np.concatenate([sampled_points, padding], axis=0)
229 | else:
230 | sampled_points = sampled_points[:self.num_samples, :]
231 | num_valid = self.num_samples
232 |
233 | if self.normalize:
234 | sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])
235 | num_valid = len(sampled_points)
236 |
237 | return sampled_points, num_valid
238 |
--------------------------------------------------------------------------------
/tools/evaluation/ap.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from cd import chamfer_distance
3 |
4 |
5 | def get_line_instances_from_mask(mask, scale_x, scale_y):
6 | # mask: H, W
7 | # instance: {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)}
8 | indices = torch.unique(mask)
9 | instances = {}
10 | for idx in indices:
11 | idx = idx.item()
12 | if idx == 0:
13 | continue
14 | pc_x, pc_y = torch.where(mask == idx)
15 | pc_x = pc_x.float() * scale_x
16 | pc_y = pc_y.float() * scale_y
17 | coords = torch.stack([pc_x, pc_y], -1)
18 | instances[idx] = coords
19 | return instances
20 |
21 |
22 | def line_matching_by_CD(inst_pred_lines, inst_pred_confidence, inst_label_lines, threshold):
23 | # inst_pred_line: a list of points {1: (M1, 2), 2: (M2, 2), ..., k2: (M_k2, 2)}
24 | # inst_pred_confidence: a list of confidence [c1, c2, ..., ck2]
25 | # inst_label_line: a list of points {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)}
26 | # return: a list of {'pred': (M, 2), 'label': (N, 2), 'confidence': scalar}
27 | pred_num = len(inst_pred_lines)
28 | label_num = len(inst_label_lines)
29 | CD = torch.zeros((pred_num, label_num)).cuda()
30 |
31 | inst_pred_lines_keys = [*inst_pred_lines]
32 | inst_label_lines_keys = [*inst_label_lines]
33 | for i, key_pred in enumerate(inst_pred_lines_keys):
34 | for j, key_label in enumerate(inst_label_lines_keys):
35 | CD[i, j] = chamfer_distance(
36 | inst_pred_lines[key_pred][None],
37 | inst_label_lines[key_label][None],
38 | bidirectional=True,
39 | threshold=threshold,
40 | )
41 |
42 | pred_taken = torch.zeros(pred_num, dtype=torch.bool).cuda()
43 | label_taken = torch.zeros(label_num, dtype=torch.bool).cuda()
44 | matched_list = []
45 | if pred_num > 0 and label_num > 0:
46 | while True:
47 | idx = torch.argmin(CD)
48 | i = torch.div(idx, CD.shape[1], rounding_mode="floor")
49 | j = (idx % CD.shape[1]).item()
50 | # i, j = (idx // CD.shape[1]).item(), (idx % CD.shape[1]).item()
51 | if CD[i, j] >= threshold:
52 | break
53 | matched_list.append(
54 | {
55 | "pred": inst_pred_lines[inst_pred_lines_keys[i]],
56 | "confidence": inst_pred_confidence[inst_pred_lines_keys[i]],
57 | "label": inst_label_lines[inst_label_lines_keys[j]],
58 | "CD": CD[i, j].item(),
59 | }
60 | )
61 | pred_taken[i] = True
62 | label_taken[j] = True
63 | CD[i, :] = threshold
64 | CD[:, j] = threshold
65 |
66 | for i in range(pred_num):
67 | if not pred_taken[i]:
68 | matched_list.append(
69 | {
70 | "pred": inst_pred_lines[inst_pred_lines_keys[i]],
71 | "confidence": inst_pred_confidence[inst_pred_lines_keys[i]],
72 | "label": None,
73 | "CD": threshold,
74 | }
75 | )
76 |
77 | for j in range(label_num):
78 | if not label_taken[j]:
79 | matched_list.append(
80 | {
81 | "pred": None,
82 | "confidence": 0,
83 | "label": inst_label_lines[inst_label_lines_keys[j]],
84 | "CD": threshold,
85 | }
86 | )
87 |
88 | return matched_list
89 |
90 |
91 | def instance_mask_ap(
92 | AP_matrix,
93 | AP_count_matrix,
94 | inst_pred_mask,
95 | inst_label_mask,
96 | scale_x,
97 | scale_y,
98 | confidence,
99 | thresholds,
100 | sampled_recalls,
101 | ):
102 | # inst_pred: N, C, H, W
103 | # inst_label: N, C, H, W
104 | # confidence: N, max_instance_num
105 | N, C, H, W = inst_label_mask.shape
106 | for n in range(N):
107 | for c in range(C):
108 | inst_pred_lines = get_line_instances_from_mask(inst_pred_mask[n, c], scale_x, scale_y)
109 | inst_label_lines = get_line_instances_from_mask(inst_label_mask[n, c], scale_x, scale_y)
110 | if len(inst_pred_lines) == 0 and len(inst_label_lines) == 0:
111 | continue
112 | AP_matrix[c] += single_instance_line_AP(
113 | inst_pred_lines, confidence[n], inst_label_lines, thresholds, sampled_recalls=sampled_recalls
114 | )
115 | AP_count_matrix[c] += 1
116 | return AP_matrix, AP_count_matrix
117 |
118 |
119 | def single_instance_line_AP(inst_pred_lines, inst_pred_confidence, inst_label_lines, thresholds, sampled_recalls):
120 | # inst_pred_line: a list of points {1: (M1, 2), 2: (M2, 2), ..., k2: (M_k2, 2)}
121 | # inst_pred_confidence: a list of confidence [c1, c2, ..., ck2]
122 | # inst_label_line: a list of points {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)}
123 | # thresholds: threshold of chamfer distance to identify TP
124 | num_thres = len(thresholds)
125 | AP_thres = torch.zeros(num_thres).cuda()
126 | for t in range(num_thres):
127 | matching_list = line_matching_by_CD(inst_pred_lines, inst_pred_confidence, inst_label_lines, thresholds[t])
128 | precision, recall = get_precision_recall_curve_by_confidence(
129 | matching_list, len(inst_label_lines), thresholds[t]
130 | )
131 | precision, recall = smooth_PR_curve(precision, recall)
132 | AP = calc_AP_from_precision_recall(precision, recall, sampled_recalls)
133 | AP_thres[t] = AP
134 | return AP_thres
135 |
136 |
137 | def get_precision_recall_curve_by_confidence(matching_list, num_gt, threshold):
138 | matching_list = sorted(matching_list, key=lambda x: x["confidence"])
139 |
140 | TP = [0]
141 | FP = [0]
142 | for match_item in matching_list:
143 | pred = match_item["pred"]
144 | label = match_item["label"]
145 | dist = match_item["CD"]
146 |
147 | if pred is None:
148 | continue
149 |
150 | if label is None:
151 | TP.append(TP[-1])
152 | FP.append(FP[-1] + 1)
153 | continue
154 |
155 | if dist < threshold:
156 | TP.append(TP[-1] + 1)
157 | FP.append(FP[-1])
158 | else:
159 | TP.append(TP[-1])
160 | FP.append(FP[-1] + 1)
161 |
162 | TP = torch.tensor(TP[1:]).cuda()
163 | FP = torch.tensor(FP[1:]).cuda()
164 |
165 | precision = TP / (TP + FP)
166 | recall = TP / num_gt
167 | return precision, recall
168 |
169 |
170 | def smooth_PR_curve(precision, recall):
171 | idx = torch.argsort(recall)
172 | recall = recall[idx]
173 | precision = precision[idx]
174 | length = len(precision)
175 | for i in range(length - 1, 0, -1):
176 | precision[:i][precision[:i] < precision[i]] = precision[i]
177 | return precision, recall
178 |
179 |
180 | def calc_AP_from_precision_recall(precision, recall, sampled_recalls):
181 | acc_precision = 0.0
182 | total = len(sampled_recalls)
183 | for r in sampled_recalls:
184 | idx = torch.where(recall >= r)[0]
185 | if len(idx) == 0:
186 | continue
187 | idx = idx[0]
188 | acc_precision += precision[idx]
189 | return acc_precision / total
190 |
--------------------------------------------------------------------------------
/tools/evaluation/cd.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | def chamfer_distance(source_pc, target_pc, threshold, cum=False, bidirectional=True):
4 | torch.backends.cuda.matmul.allow_tf32 = False
5 | torch.backends.cudnn.allow_tf32 = False
6 | # dist = torch.cdist(source_pc.float(), target_pc.float())
7 | # dist = torch.cdist(source_pc.float(), target_pc.float(), compute_mode='donot_use_mm_for_euclid_dist')
8 | dist = torch.cdist(source_pc.type(torch.float64), target_pc.type(torch.float64))
9 | dist1, _ = torch.min(dist, 2)
10 | dist2, _ = torch.min(dist, 1)
11 | if cum:
12 | len1 = dist1.shape[-1]
13 | len2 = dist2.shape[-1]
14 | dist1 = dist1.sum(-1)
15 | dist2 = dist2.sum(-1)
16 | return dist1, dist2, len1, len2
17 | dist1 = dist1.mean(-1)
18 | dist2 = dist2.mean(-1)
19 | if bidirectional:
20 | return min((dist1 + dist2) / 2, threshold)
21 | else:
22 | return min(dist1, threshold), min(dist2, threshold)
23 |
24 |
25 | def semantic_mask_chamfer_dist_cum(seg_pred, seg_label, scale_x, scale_y, threshold):
26 | # seg_label: N, C, H, W
27 | # seg_pred: N, C, H, W
28 | N, C, H, W = seg_label.shape
29 |
30 | cum_CD1 = torch.zeros(C, device=seg_label.device)
31 | cum_CD2 = torch.zeros(C, device=seg_label.device)
32 | cum_num1 = torch.zeros(C, device=seg_label.device)
33 | cum_num2 = torch.zeros(C, device=seg_label.device)
34 | for n in range(N):
35 | for c in range(C):
36 | pred_pc_x, pred_pc_y = torch.where(seg_pred[n, c] != 0)
37 | label_pc_x, label_pc_y = torch.where(seg_label[n, c] != 0)
38 | pred_pc_x = pred_pc_x.float() * scale_x
39 | pred_pc_y = pred_pc_y.float() * scale_y
40 | label_pc_x = label_pc_x.float() * scale_x
41 | label_pc_y = label_pc_y.float() * scale_y
42 | if len(pred_pc_x) == 0 and len(label_pc_x) == 0:
43 | continue
44 |
45 | if len(label_pc_x) == 0:
46 | cum_CD1[c] += len(pred_pc_x) * threshold
47 | cum_num1[c] += len(pred_pc_x)
48 | continue
49 |
50 | if len(pred_pc_x) == 0:
51 | cum_CD2[c] += len(label_pc_x) * threshold
52 | cum_num2[c] += len(label_pc_x)
53 | continue
54 |
55 | pred_pc_coords = torch.stack([pred_pc_x, pred_pc_y], -1).float()
56 | label_pc_coords = torch.stack([label_pc_x, label_pc_y], -1).float()
57 | CD1, CD2, len1, len2 = chamfer_distance(
58 | pred_pc_coords[None], label_pc_coords[None], threshold=threshold, cum=True
59 | )
60 | cum_CD1[c] += CD1.item()
61 | cum_CD2[c] += CD2.item()
62 | cum_num1[c] += len1
63 | cum_num2[c] += len2
64 | return cum_CD1, cum_CD2, cum_num1, cum_num2
65 |
--------------------------------------------------------------------------------
/tools/evaluation/eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import torch
4 | import numpy as np
5 | import pickle as pkl
6 | from tqdm import tqdm
7 | from tabulate import tabulate
8 | from torch.utils.data import Dataset, DataLoader
9 | from ap import instance_mask_ap as get_batch_ap
10 |
11 |
12 | class BeMapNetResultForNuScenes(Dataset):
13 | def __init__(self, gt_dir, dt_dir, val_txt):
14 | self.gt_dir, self.dt_dir = gt_dir, dt_dir
15 | self.tokens = [fname.strip().split('.')[0] for fname in open(val_txt).readlines()]
16 | self.max_line_count = 100
17 |
18 | def __getitem__(self, idx):
19 | token = self.tokens[idx]
20 | gt_path = os.path.join(self.gt_dir, f"{token}.npz")
21 | gt_masks = np.load(open(gt_path, "rb"), allow_pickle=True)["instance_mask"]
22 | dt_item = np.load(os.path.join(self.dt_dir, f"{token}.npz"), allow_pickle=True)
23 | dt_masks = dt_item["dt_mask"]
24 | dt_scores = dt_item['dt_res'].item()["confidence_level"]
25 | dt_scores = np.array(list(dt_scores) + [-1] * (self.max_line_count - len(dt_scores)))
26 | return torch.from_numpy(dt_masks), torch.from_numpy(dt_scores).float(), torch.from_numpy(gt_masks)
27 |
28 | def __len__(self):
29 | return len(self.tokens)
30 |
31 |
32 | class BeMapNetEvaluatorForNuScenes(object):
33 | def __init__(self, gt_dir, dt_dir, val_txt, batch_size=4, num_classes=3, map_resolution=(0.15, 0.15)):
34 |
35 | self.THRESHOLDS = [0.2, 0.5, 1.0, 1.5]
36 | self.CLASS_NAMES = ["Divider", "PedCross", "Contour"]
37 | self.SAMPLED_RECALLS = torch.linspace(0.1, 1, 10).cuda()
38 | self.res_dataloader = DataLoader(
39 | BeMapNetResultForNuScenes(gt_dir, dt_dir, val_txt),
40 | batch_size=batch_size, shuffle=False, drop_last=False, num_workers=8
41 | )
42 | self.map_resolution = map_resolution
43 | self.ap_matrix = torch.zeros((num_classes, len(self.THRESHOLDS))).cuda()
44 | self.ap_count_matrix = torch.zeros((num_classes, len(self.THRESHOLDS))).cuda()
45 |
46 | def execute(self):
47 |
48 | for dt_masks, dt_scores, gt_masks in tqdm(self.res_dataloader):
49 | self.ap_matrix, self.ap_count_matrix = get_batch_ap(
50 | self.ap_matrix,
51 | self.ap_count_matrix,
52 | dt_masks.cuda(),
53 | gt_masks.cuda(),
54 | *self.map_resolution,
55 | dt_scores.cuda(),
56 | self.THRESHOLDS,
57 | self.SAMPLED_RECALLS,
58 | )
59 | ap = (self.ap_matrix / self.ap_count_matrix).cpu().data.numpy()
60 | self._format_print(ap)
61 |
62 | def _format_print(self, ap):
63 | res_matrix = []
64 | table_header = ["Class", "AP@.2", "AP@.5", "AP@1.", "AP@1.5", "mAP@HARD", "mAP@EASY"]
65 | table_values = []
66 | for i, cls_name in enumerate(self.CLASS_NAMES):
67 | res_matrix_line = [ap[i][0], ap[i][1], ap[i][2], ap[i][3], np.mean(ap[i][:-1]), np.mean(ap[i][1:])]
68 | res_matrix.append(res_matrix_line)
69 | table_values.append([cls_name] + self.line_data_to_str(*res_matrix_line))
70 | avg = np.mean(np.array(res_matrix), axis=0)
71 | table_values.append(["Average", *self.line_data_to_str(*avg)])
72 | table_str = tabulate(table_values, headers=table_header, tablefmt="grid")
73 | print(table_str)
74 | return table_str
75 |
76 | @staticmethod
77 | def line_data_to_str(ap0, ap1, ap2, ap3, map1, map2):
78 | return [
79 | "{:.1f}".format(ap0 * 100),
80 | "{:.1f}".format(ap1 * 100),
81 | "{:.1f}".format(ap2 * 100),
82 | "{:.1f}".format(ap3 * 100),
83 | "{:.1f}".format(map1 * 100),
84 | "{:.1f}".format(map2 * 100),
85 | ]
86 |
87 |
88 | evaluator = BeMapNetEvaluatorForNuScenes(
89 | gt_dir=sys.argv[1],
90 | dt_dir=sys.argv[2],
91 | val_txt=sys.argv[3],
92 | batch_size=4,
93 | num_classes=3,
94 | map_resolution=(0.15, 0.15),
95 | )
96 |
97 | evaluator.execute()
98 |
--------------------------------------------------------------------------------