├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── figures
    │   ├── arch.png
    │   ├── demo.gif
    │   ├── pivot-title.png
    │   ├── pivotnet-arch.png
    │   ├── pivotnet-logo.png
    │   └── title.png
    ├── splits
    │   └── nuscenes
    │   │   ├── all.txt
    │   │   ├── cloudy.txt
    │   │   ├── day.txt
    │   │   ├── night.txt
    │   │   ├── rainy.txt
    │   │   ├── sunny.txt
    │   │   ├── test.txt
    │   │   ├── train.txt
    │   │   └── val.txt
    └── weights
    │   └── README.md
├── configs
    ├── bemapnet_nuscenes_effb0.py
    ├── bemapnet_nuscenes_res50.py
    ├── bemapnet_nuscenes_swint.py
    ├── pivotnet_nuscenes_effb0.py
    ├── pivotnet_nuscenes_res50.py
    └── pivotnet_nuscenes_swint.py
├── mapmaster
    ├── __init__.py
    ├── dataset
    │   ├── nuscenes_bemapnet.py
    │   ├── nuscenes_pivotnet.py
    │   ├── sampler.py
    │   └── transform.py
    ├── engine
    │   ├── callbacks.py
    │   ├── core.py
    │   ├── environ.py
    │   ├── executor.py
    │   └── experiment.py
    ├── models
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── bifpn
    │   │   │   ├── __init__.py
    │   │   │   ├── model.py
    │   │   │   └── utils.py
    │   │   ├── efficientnet
    │   │   │   ├── __init__.py
    │   │   │   ├── model.py
    │   │   │   └── utils.py
    │   │   ├── model.py
    │   │   ├── resnet
    │   │   │   ├── __init__.py
    │   │   │   ├── resnet.py
    │   │   │   └── utils.py
    │   │   └── swin_transformer
    │   │   │   ├── __init__.py
    │   │   │   ├── model.py
    │   │   │   └── utils.py
    │   ├── bev_decoder
    │   │   ├── __init__.py
    │   │   ├── deform_transformer
    │   │   │   ├── __init__.py
    │   │   │   ├── deform_transformer.py
    │   │   │   ├── ops
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── functions
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── ms_deform_attn_func.py
    │   │   │   │   ├── make.sh
    │   │   │   │   ├── modules
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── ms_deform_attn.py
    │   │   │   │   ├── setup.py
    │   │   │   │   ├── src
    │   │   │   │   │   ├── cpu
    │   │   │   │   │   │   ├── ms_deform_attn_cpu.cpp
    │   │   │   │   │   │   └── ms_deform_attn_cpu.h
    │   │   │   │   │   ├── cuda
    │   │   │   │   │   │   ├── ms_deform_attn_cuda.cu
    │   │   │   │   │   │   ├── ms_deform_attn_cuda.h
    │   │   │   │   │   │   └── ms_deform_im2col_cuda.cuh
    │   │   │   │   │   ├── ms_deform_attn.h
    │   │   │   │   │   └── vision.cpp
    │   │   │   │   └── test.py
    │   │   │   └── position_encoding.py
    │   │   ├── model.py
    │   │   └── transformer.py
    │   ├── ins_decoder
    │   │   ├── __init__.py
    │   │   ├── mask2former.py
    │   │   ├── model.py
    │   │   └── pointmask2former.py
    │   ├── network.py
    │   ├── output_head
    │   │   ├── __init__.py
    │   │   ├── bezier_outputs.py
    │   │   ├── bezier_post_processor.py
    │   │   ├── line_matching.py
    │   │   ├── pivot_outputs.py
    │   │   └── pivot_post_processor.py
    │   └── utils
    │   │   ├── mask_loss.py
    │   │   ├── misc.py
    │   │   ├── position_encoding.py
    │   │   └── recovery_loss.py
    └── utils
    │   ├── env.py
    │   └── misc.py
├── requirement.txt
├── run.sh
└── tools
    ├── __init__.py
    ├── anno_converter
        ├── __init__.py
        ├── bezier.py
        ├── generate_pivots.py
        └── nuscenes
        │   ├── __init__.py
        │   ├── convert.py
        │   ├── rasterize.py
        │   └── vectorize.py
    └── evaluation
        ├── ap.py
        ├── cd.py
        └── eval.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Wenjie
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center"><img src="assets/figures/pivotnet-logo.png" width="500"></div>
  2 | <div align="center"><img src="assets/figures/pivot-title.png" width="1000"></div>
  3 | 
  4 | #
  5 | ### NEWS !!!
  6 | * **`Jan. 15th, 2024`:** :rocket: :rocket: :rocket: The official implementation of our PivotNet is released now. Enjoy it!
  7 | * **`Jul. 14th, 2023`:** :clap: Our **PivotNet** is accepted by ICCV 2023 ! Refer to the **[Long-version Paper](https://arxiv.org/pdf/2308.16477)** for more details.
  8 | * **`May. 26th, 2023`:** :trophy: Our team win the **Championship** and the **Innovation-Award** of the CVPR23 *Online HD Map Construction Challenge* ! [ **[Leaderboard](https://opendrivelab.com/AD23Challenge.html#online_hd_map_construction)** / **[Tech-Report](https://arxiv.org/abs/2306.10301)**]
  9 | * **`Feb. 28th, 2023`:** :clap: Our **BeMapNet** is accepted by CVPR 2023 ! Refer to the **[Paper](https://openaccess.thecvf.com/content/CVPR2023/html/Qiao_End-to-End_Vectorized_HD-Map_Construction_With_Piecewise_Bezier_Curve_CVPR_2023_paper.html)** for more details. [**[Long version on Arxiv](https://arxiv.org/pdf/2306.09700)** / **[Code](https://github.com/er-muyue/BeMapNet/tree/main)**]
 10 | 
 11 | ## <div align="center">Introduction</div>
 12 | **Vectorized high-definition map (HD-map) construction** has garnered considerable attention in the field of autonomous driving research. Towards precise map element learning, we propose a simple yet effective architecture named **PivotNet**, which adopts unified pivot-based map representations and is formulated as a direct set prediction paradigm. Concretely, we first propose a novel *Point-to-Line Mask module* to encode both the subordinate and geometrical point-line priors in the network. Then, a well-designed *Pivot Dynamic Matching module* is proposed to model the topology in dynamic point sequences by introducing the concept of sequence matching. Furthermore, to supervise the position and topology of the vectorized point predictions, we propose a *Dynamic Vectorized Sequence loss*. PivotNet contains four primary components: **Camera Feature Extractor**, **BEV Feature Decoder**, **Line-aware Point Decoder**, and **Pivotal Point Predictor**. It takes the RGB images as inputs and generates flexible and compact vectorized representation without any post-processing.
 13 | 
 14 | <div align="center"><img src="assets/figures/pivotnet-arch.png"></div>
 15 | 
 16 | ## <div align="center">Documentation</div>
 17 | 
 18 | We build the released version of **PivotNet** upon [BeMapNet](https://github.com/er-muyue/BeMapNet/tree/main) project. Therefore, this project supports the reproduction of **both** PivotNet and BeMapNet.
 19 | 
 20 | <details open>
 21 | <summary><b>Step-by-step Installation</b></summary>
 22 | <\br><br>
 23 | 
 24 | - **a. Check Environment**
 25 |   ```shell
 26 |   Python >= 3.8
 27 |   CUDA 11.1
 28 |   # other versions of python/cuda have not been fully tested, but I think they should work as well.
 29 |   ```
 30 | 
 31 | - **b. Create a conda virtual environment and activate it. (Optional)**
 32 |   ```shell
 33 |   conda create -n pivotnet python=3.8 -y
 34 |   conda activate pivotnet
 35 |   ```
 36 | 
 37 | - **c. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/get-started/previous-versions/).**
 38 |   ```shell
 39 |   pip3 install torch==1.10.1+cu111 torchvision==0.11.2+cu111 -f https://download.pytorch.org/whl/torch_stable.html
 40 |   ```
 41 | 
 42 | - **d. Install MMCV following the [official instructions](https://github.com/open-mmlab/mmcv). (need GPU)**
 43 |   ```shell
 44 |   pip3 install -U openmim
 45 |   mim install mmcv==1.7.1
 46 |   ```
 47 | 
 48 | - **e. Install Detectron2 following the [official instructions](https://detectron2.readthedocs.io/en/latest/tutorials/install.html).**
 49 |   ```shell
 50 |   python3 -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html
 51 |   ```
 52 | 
 53 | - **f. Compiling CUDA operators for Deformable Transformer.**
 54 |   ```shell
 55 |   cd mapmaster/models/bev_decoder/deform_transformer/ops
 56 |   bash make.sh
 57 |   ```
 58 | 
 59 | - **g. Install PivotNet.**
 60 |   ```shell
 61 |   git clone git@github.com:wenjie710/PivotNet.git
 62 |   cd pivotnet
 63 |   pip3 install -r requirement.txt
 64 |   ```
 65 | 
 66 | </details>
 67 | 
 68 | 
 69 | <details>
 70 | <summary><b>Material Preparation</b></summary>
 71 | <\br><br>
 72 | 
 73 | - **a. Data: NuScenes**
 74 |   - Download&Unzip the [NuScenes](https://www.nuscenes.org/nuscenes#download) dataset into your server and link it to desirable path.
 75 |     ```shell
 76 |     cd /path/to/pivotnet
 77 |     mkdir data
 78 |     ln -s /any/path/to/your/nuscenes data/nuscenes
 79 |     ```
 80 |   - Generate Pivot/Bezier-annotations from NuScenes's raw-annotations.
 81 |     ```shell
 82 |     cd /path/to/pivotnet
 83 |     python3 tools/anno_converter/nuscenes/convert.py -d ./data # generate pivot-annotation only
 84 |     OR python3 tools/anno_converter/nuscenes/convert.py -d ./data --bezier   # generate both pivot and bezier annotatation (more time needed)
 85 |     ```
 86 | 
 87 | - **b. Weights: Public-Pretrain-Models**
 88 |   - Download public pretrain-weights as backbone initialization.
 89 |     ```shell
 90 |     cd /path/to/pivotnet
 91 |     cd assets/weights
 92 |     wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/efficientnet-b0-355c32eb.pth .
 93 |     wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/resnet50-0676ba61.pth .
 94 |     wget https://github.com/wenjie710/PivotNet/releases/download/v1.0/upernet_swin_tiny_patch4_window7_512x512.pth .
 95 |     ``` 
 96 |   
 97 | - **c. Check: Project-Structure**
 98 |   - Your project directory should be, 
 99 |     ```shell
100 |       assets
101 |         | -- weights (resnet, swin-t, efficient-b0, ...)
102 |         | -- 
103 |       mapmaster
104 |       configs
105 |       data
106 |         | -- nuscenes
107 |           | -- samples (CAM_FRONT, CAM_FRONT_LEFT, CAM_FRONT_RIGHT, ...)
108 |           | -- annotations
109 |           | -- v1.0-trainval
110 |           | -- ...
111 |           | -- customer
112 |             | -- pivot-bezier
113 |               | -- *.npz
114 |       tools
115 |     ```
116 | 
117 | </details>
118 | 
119 | <details>
120 | 
121 | <summary><b> Training and Evluation</b></summary>
122 | <\br><br>
123 | 
124 | - **a. Model Training**
125 |   ```shell
126 |   bash run.sh train pivotnet_nuscenes_swint 30  # pivotnet, default: 8GPUs, bs=1, epochs=30
127 |   bash run.sh train bemapnet_nuscenes_swint 30  # bemapnet, default: 8GPUs, bs=1, epochs=30
128 |   ```
129 | 
130 | - **b. Model Evaluation**
131 |   ```shell
132 |   bash run.sh test pivotnet_nuscenes_swint ${checkpoint-path}  # for pivotnet
133 |   bash run.sh test bemapnet_nuscenes_swint ${checkpoint-path}  # for bemapnet
134 |   ```
135 | 
136 | - **c. Reproduce with one command**
137 |   ```shell
138 |   bash run.sh reproduce pivotnet_nuscenes_swint  # for pivotnet
139 |   bash run.sh reproduce bemapnet_nuscenes_swint  # for bemapnet
140 |   ```
141 | </details>
142 | 
143 | ## <div align="center">Models & Results</div>
144 | 
145 | <details open>
146 | 
147 | <summary><b>Results on NuScenes Val Set</b></summary>
148 | <\br><br>
149 | 
150 | - **a. Easy-Setting --> AP-threshold is `0.5m, 1.0m, 1.5m` (same as [VectorMapNet](https://arxiv.org/abs/2206.08920.pdf) / [MapTR](https://arxiv.org/abs/2208.14437.pdf))**
151 | 
152 |   | Model | Config | Schd | mAP<sup>divider</sup> | mAP<sup>pedcross</sup> |mAP<sup>boundary</sup> | mAP<sup>avg</sup> | Download |
153 |   | :---: | :---: | :---: | :---: | :---:|:---:| :---: | :---: |
154 |   |PivotNet-Effb0| [config](configs/pivotnet_nuscenes_effb0.py) | 30ep | 59.3 | 54.1 | 60.0 | 57.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30_train.log)|
155 |   |PivotNet-Res50| [config](configs/pivotnet_nuscenes_res50.py) | 30ep | 58.0 | 53.5 | 59.7 | 57.1 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30_train.log)|
156 |   |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 30ep | 62.9 | 57.9 | 64.0 | 61.6 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30_train.log)|
157 |   |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 110ep | 67.8 | 62.1 | 69.2 | 66.4 | [model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110_train.log) |
158 | 
159 | 
160 | - **b. Hard-Setting --> AP-threshold is `0.2m, 0.5m, 1.0m` (Recommended as a more practical HD map evaluation protocol)**
161 | 
162 |   | Model | Config | Schd | mAP<sup>divider</sup> | mAP<sup>pedcross</sup> |mAP<sup>boundary</sup> | mAP<sup>avg</sup> | Download |
163 |   | :---: | :---: | :---: | :---: | :---:|:---:| :---: | :---: |
164 |   |PivotNet-Effb0| [config](configs/pivotnet_nuscenes_effb0.py) | 30ep | 44.0 | 35.9 | 39.7 | 39.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_effb0_ep30_train.log) |
165 |   |PivotNet-Res50| [config](configs/pivotnet_nuscenes_res50.py) | 30ep | 43.5 | 35.6 | 40.4 | 39.8 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_res50_ep30_train.log) |
166 |   |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 30ep | 47.7 | 39.4 | 43.7 | 43.6 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep30_train.log) |
167 |   |PivotNet-SwinT| [config](configs/pivotnet_nuscenes_swint.py) | 110ep | 54.1 | 43.3 | 50.3 | 49.3 |[model](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110.pth)/[train and eval](https://github.com/wenjie710/PivotNet/releases/download/v1.0/pivotnet_nuscenes_swint_ep110_train.log) |
168 |   
169 | 
170 | </details>
171 | 
172 | #
173 | ## Citation
174 | If you find PivotNet/BeMapNet/MachMap is useful in your research or applications, please consider giving us a star :star: and citing them by the following BibTeX entries:
175 | ```
176 | @inproceedings{ding2023pivotnet,
177 |   title={Pivotnet: Vectorized pivot learning for end-to-end hd map construction},
178 |   author={Ding, Wenjie and Qiao, Limeng and Qiu, Xi and Zhang, Chi},
179 |   booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
180 |   pages={3672--3682},
181 |   year={2023}
182 | }
183 | 
184 | @InProceedings{Qiao_2023_CVPR,
185 |     author    = {Qiao, Limeng and Ding, Wenjie and Qiu, Xi and Zhang, Chi},
186 |     title     = {End-to-End Vectorized HD-Map Construction With Piecewise Bezier Curve},
187 |     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
188 |     month     = {June},
189 |     year      = {2023},
190 |     pages     = {13218-13228}
191 | }
192 | 
193 | @article{qiao2023machmap,
194 |     author={Limeng Qiao and Yongchao Zheng and Peng Zhang and Wenjie Ding and Xi Qiu and Xing Wei and Chi Zhang},
195 |     title={MachMap: End-to-End Vectorized Solution for Compact HD-Map Construction}, 
196 |     journal={arXiv preprint arXiv:2306.10301},
197 |     year={2023},
198 | }
199 | 
200 | ```
201 | 


--------------------------------------------------------------------------------
/assets/figures/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/arch.png


--------------------------------------------------------------------------------
/assets/figures/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/demo.gif


--------------------------------------------------------------------------------
/assets/figures/pivot-title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivot-title.png


--------------------------------------------------------------------------------
/assets/figures/pivotnet-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivotnet-arch.png


--------------------------------------------------------------------------------
/assets/figures/pivotnet-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/pivotnet-logo.png


--------------------------------------------------------------------------------
/assets/figures/title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/figures/title.png


--------------------------------------------------------------------------------
/assets/weights/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/assets/weights/README.md


--------------------------------------------------------------------------------
/configs/pivotnet_nuscenes_effb0.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mapmaster.engine.core import MapMasterCli
 3 | from pivotnet_nuscenes_swint import EXPConfig, Exp
 4 | 
 5 | EXPConfig.model_setup["im_backbone"] = dict(
 6 |             arch_name="efficient_net",
 7 |             ret_layers=2,
 8 |             fpn_kwargs=None,
 9 |             bkb_kwargs=dict(
10 |                 model_name='efficientnet-b0',
11 |                 in_channels=3,
12 |                 out_stride=32,
13 |                 with_head=False,
14 |                 with_cp=True,
15 |                 norm_layer=nn.SyncBatchNorm,
16 |                 weights_path="assets/weights/efficientnet-b0-355c32eb.pth",
17 |             ),
18 |         )
19 | 
20 | EXPConfig.model_setup['bev_decoder']["net_kwargs"].update(
21 |     dict(
22 |         in_channels=[112, 320],
23 |     )
24 | )
25 | 
26 | class ExpDev(Exp):
27 |     def __init__(self, batch_size_per_device=1, total_devices=8, max_epoch=60, **kwargs):
28 |         super(ExpDev, self).__init__(batch_size_per_device, total_devices, max_epoch, **kwargs)
29 |         self.exp_config = EXPConfig()
30 | 
31 | if __name__ == "__main__":
32 |     MapMasterCli(ExpDev).run()
33 |     
34 | 


--------------------------------------------------------------------------------
/configs/pivotnet_nuscenes_res50.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mapmaster.engine.core import MapMasterCli
 3 | from pivotnet_nuscenes_swint import EXPConfig, Exp
 4 | 
 5 | 
 6 | EXPConfig.model_setup["im_backbone"] = dict(
 7 |             arch_name="resnet",
 8 |             ret_layers=2,
 9 |             fpn_kwargs=None,
10 |             bkb_kwargs=dict(
11 |                 depth=50,
12 |                 num_stages=4,
13 |                 out_indices=(2, 3),
14 |                 frozen_stages=-1,  # do not freeze any layers
15 |                 norm_cfg=dict(type='SyncBN', requires_grad=True),
16 |                 norm_eval=True,
17 |                 style='pytorch',
18 |                 init_cfg=dict(
19 |                     type='Pretrained', 
20 |                     checkpoint='assets/weights/resnet50-0676ba61.pth'),  # from pytorch
21 |                 with_cp=True,
22 |             ),   
23 |         )
24 | 
25 | EXPConfig.model_setup['bev_decoder']["net_kwargs"].update(
26 |     dict(
27 |         in_channels=[1024, 2048],
28 |     )
29 | )
30 | 
31 | class ExpDev(Exp):
32 |     def __init__(self, batch_size_per_device=1, total_devices=8, max_epoch=60, **kwargs):
33 |         super(ExpDev, self).__init__(batch_size_per_device, total_devices, max_epoch, **kwargs)
34 |         self.exp_config = EXPConfig()
35 | 
36 | if __name__ == "__main__":
37 |     MapMasterCli(ExpDev).run()
38 |     
39 | 


--------------------------------------------------------------------------------
/mapmaster/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/mapmaster/__init__.py


--------------------------------------------------------------------------------
/mapmaster/dataset/nuscenes_bemapnet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from PIL import Image
  5 | from copy import deepcopy
  6 | from skimage import io as skimage_io
  7 | from torch.utils.data import Dataset
  8 | 
  9 | 
 10 | class NuScenesMapDataset(Dataset):
 11 |     def __init__(self, img_key_list, map_conf, ida_conf, bezier_conf, transforms, data_split="training"):
 12 |         super().__init__()
 13 |         self.img_key_list = img_key_list
 14 |         self.map_conf = map_conf
 15 |         self.ida_conf = ida_conf
 16 |         self.bez_conf = bezier_conf
 17 |         self.ego_size = map_conf["ego_size"]
 18 |         self.mask_key = map_conf["mask_key"]
 19 |         self.nusc_root = map_conf["nusc_root"]
 20 |         self.anno_root = map_conf["anno_root"]
 21 |         self.split_dir = map_conf["split_dir"]
 22 |         self.num_degree = bezier_conf["num_degree"]
 23 |         self.max_pieces = bezier_conf["max_pieces"]
 24 |         self.max_instances = bezier_conf["max_instances"]
 25 |         self.split_mode = 'train' if data_split == "training" else 'val'
 26 |         split_path = os.path.join(self.split_dir, f'{self.split_mode}.txt')
 27 |         self.tokens = [token.strip() for token in open(split_path).readlines()]
 28 |         self.transforms = transforms
 29 | 
 30 |     def __getitem__(self, idx: int):
 31 |         token = self.tokens[idx]
 32 |         sample = np.load(os.path.join(self.anno_root, f'{token}.npz'), allow_pickle=True)
 33 |         resize_dims, crop, flip, rotate = self.sample_ida_augmentation()
 34 |         images, ida_mats = [], []
 35 |         for im_view in self.img_key_list:
 36 |             for im_path in sample['image_paths']:
 37 |                 if im_path.startswith(f'samples/{im_view}/'):
 38 |                     im_path = os.path.join(self.nusc_root, im_path)
 39 |                     img = skimage_io.imread(im_path)
 40 |                     img, ida_mat = self.img_transform(img, resize_dims, crop, flip, rotate)
 41 |                     images.append(img)
 42 |                     ida_mats.append(ida_mat)
 43 |         extrinsic = np.stack([np.eye(4) for _ in range(sample["trans"].shape[0])], axis=0)
 44 |         extrinsic[:, :3, :3] = sample["rots"]
 45 |         extrinsic[:, :3, 3] = sample["trans"]
 46 |         intrinsic = sample['intrins']
 47 |         ctr_points = np.zeros((self.max_instances, max(self.max_pieces) * max(self.num_degree) + 1, 2), dtype=np.float)
 48 |         ins_labels = np.zeros((self.max_instances, 3), dtype=np.int16) - 1
 49 |         for ins_id, ctr_info in enumerate(sample['ctr_points']):
 50 |             cls_id = int(ctr_info['type'])
 51 |             ctr_pts_raw = np.array(ctr_info['pts'])
 52 |             max_points = self.max_pieces[cls_id] * self.num_degree[cls_id] + 1
 53 |             num_points = max_points if max_points <= ctr_pts_raw.shape[0] else ctr_pts_raw.shape[0]
 54 |             assert num_points >= self.num_degree[cls_id] + 1
 55 |             ctr_points[ins_id][:num_points] = np.array(ctr_pts_raw[:num_points])
 56 |             ins_labels[ins_id] = [cls_id, (num_points - 1) // self.num_degree[cls_id] - 1, num_points]
 57 |         masks = sample[self.mask_key]
 58 |         if flip:
 59 |             new_order = [2, 1, 0, 5, 4, 3]
 60 |             img_key_list = [self.img_key_list[i] for i in new_order]
 61 |             images = [images[i] for i in new_order]
 62 |             ida_mats = [ida_mats[i] for i in new_order]
 63 |             extrinsic = [extrinsic[i] for i in new_order]
 64 |             intrinsic = [intrinsic[i] for i in new_order]
 65 |             masks = [np.flip(mask, axis=1) for mask in masks]
 66 |             ctr_points = self.point_flip(ctr_points, ins_labels, self.ego_size)
 67 |         item = dict(
 68 |             images=images, targets=dict(masks=masks, points=ctr_points, labels=ins_labels),
 69 |             extrinsic=np.stack(extrinsic), intrinsic=np.stack(intrinsic), ida_mats=np.stack(ida_mats),
 70 |             extra_infos=dict(token=token, img_key_list=self.img_key_list, map_size=self.ego_size, do_flip=flip)
 71 |         )
 72 |         if self.transforms is not None:
 73 |             item = self.transforms(item)
 74 |         return item
 75 | 
 76 |     def __len__(self):
 77 |         return len(self.tokens)
 78 | 
 79 |     def sample_ida_augmentation(self):
 80 |         """Generate ida augmentation values based on ida_config."""
 81 |         resize_dims = w, h = self.ida_conf["resize_dims"]
 82 |         crop = (0, 0, w, h)
 83 |         if self.ida_conf["up_crop_ratio"] > 0:
 84 |             crop = (0, int(self.ida_conf["up_crop_ratio"] * h), w, h)
 85 |         flip, color, rotate_ida = False, False, 0
 86 |         if self.split_mode == "train":
 87 |             if self.ida_conf["rand_flip"] and np.random.choice([0, 1]):
 88 |                 flip = True
 89 |             if self.ida_conf["rot_lim"]:
 90 |                 assert isinstance(self.ida_conf["rot_lim"], (tuple, list))
 91 |                 rotate_ida = np.random.uniform(*self.ida_conf["rot_lim"])
 92 |         return resize_dims, crop, flip, rotate_ida
 93 | 
 94 |     def img_transform(self, img, resize_dims, crop, flip, rotate):
 95 |         img = Image.fromarray(img)
 96 |         ida_rot = torch.eye(2)
 97 |         ida_tran = torch.zeros(2)
 98 |         W, H = img.size
 99 |         img = img.resize(resize_dims)
100 |         img = img.crop(crop)
101 |         if flip:
102 |             img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
103 |         img = img.rotate(rotate)
104 | 
105 |         # post-homography transformation
106 |         scales = torch.tensor([resize_dims[0] / W, resize_dims[1] / H])
107 |         ida_rot *= torch.Tensor(scales)
108 |         ida_tran -= torch.Tensor(crop[:2])
109 |         if flip:
110 |             A = torch.Tensor([[-1, 0], [0, 1]])
111 |             b = torch.Tensor([crop[2] - crop[0], 0])
112 |             ida_rot = A.matmul(ida_rot)
113 |             ida_tran = A.matmul(ida_tran) + b
114 |         A = self.get_rot(rotate / 180 * np.pi)
115 |         b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
116 |         b = A.matmul(-b) + b
117 |         ida_rot = A.matmul(ida_rot)
118 |         ida_tran = A.matmul(ida_tran) + b
119 |         ida_mat = ida_rot.new_zeros(3, 3)
120 |         ida_mat[2, 2] = 1
121 |         ida_mat[:2, :2] = ida_rot
122 |         ida_mat[:2, 2] = ida_tran
123 |         return np.asarray(img), ida_mat
124 | 
125 |     @staticmethod
126 |     def point_flip(points, labels, map_shape):
127 | 
128 |         def _flip(pts):
129 |             pts[:, 0] = map_shape[1] - pts[:, 0]
130 |             return pts.copy()
131 | 
132 |         points_ret = deepcopy(points)
133 |         for ins_id in range(points.shape[0]):
134 |             end = labels[ins_id, 2]
135 |             points_ret[ins_id][:end] = _flip(points[ins_id][:end])
136 | 
137 |         return points_ret
138 | 
139 |     @staticmethod
140 |     def get_rot(h):
141 |         return torch.Tensor([[np.cos(h), np.sin(h)], [-np.sin(h), np.cos(h)]])
142 | 


--------------------------------------------------------------------------------
/mapmaster/dataset/nuscenes_pivotnet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pickle as pkl
 4 | from PIL import Image
 5 | from torch.utils.data import Dataset
 6 | 
 7 | class NuScenesMapDataset(Dataset):
 8 |     def __init__(self, img_key_list, map_conf, transforms, data_split="training"):
 9 |         super().__init__()
10 |         self.img_key_list = img_key_list
11 |         self.map_conf = map_conf
12 |         
13 |         self.ego_size = map_conf["ego_size"]
14 |         self.mask_key = map_conf["mask_key"]
15 |         self.nusc_root = map_conf["nusc_root"]
16 |         self.anno_root = map_conf["anno_root"]
17 |         self.split_dir = map_conf["split_dir"]        # instance_mask/instance_mask8
18 |         
19 |         self.split_mode = 'train' if data_split == "training" else 'val'
20 |         split_path = os.path.join(self.split_dir, f'{self.split_mode}.txt')
21 |         self.tokens = [token.strip() for token in open(split_path).readlines()]
22 |         self.transforms = transforms
23 | 
24 |     def __getitem__(self, idx: int):
25 |         token = self.tokens[idx]
26 |         sample = np.load(os.path.join(self.anno_root, f'{token}.npz'), allow_pickle=True)
27 |         # images
28 |         images = []
29 |         for im_view in self.img_key_list:
30 |             for im_path in sample['image_paths']:
31 |                 if im_path.startswith(f'samples/{im_view}/'):
32 |                     im_path = os.path.join(self.nusc_root, im_path)
33 |                     img = np.asarray(Image.open(im_path))
34 |                     images.append(img)
35 |         # pivot pts
36 |         pivot_pts = sample["pivot_pts"].item()
37 |         valid_length = sample["pivot_length"].item()
38 |         # targets
39 |         masks=sample[self.mask_key]
40 |         targets = dict(masks=masks, points=pivot_pts, valid_len=valid_length) 
41 |         # pose
42 |         extrinsic = np.stack([np.eye(4) for _ in range(sample["trans"].shape[0])], axis=0)
43 |         extrinsic[:, :3, :3] = sample["rots"]
44 |         extrinsic[:, :3, 3] = sample["trans"]
45 |         intrinsic = sample['intrins']
46 |         # transform
47 |         item = dict(images=images, targets=targets,
48 |                     extra_infos=dict(token=token, map_size=self.ego_size),
49 |                     extrinsic=np.stack(extrinsic, axis=0), intrinsic=np.stack(intrinsic, axis=0))
50 |         if self.transforms is not None:
51 |             item = self.transforms(item)
52 | 
53 |         return item
54 | 
55 |     def __len__(self):
56 |         return len(self.tokens)
57 | 


--------------------------------------------------------------------------------
/mapmaster/dataset/sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import itertools
 3 | import torch.distributed as dist
 4 | from typing import Optional
 5 | from torch.utils.data.sampler import Sampler
 6 | 
 7 | 
 8 | class InfiniteSampler(Sampler):
 9 |     """
10 |     In training, we only care about the "infinite stream" of training data.
11 |     So this sampler produces an infinite stream of indices and
12 |     all workers cooperate to correctly shuffle the indices and sample different indices.
13 |     The samplers in each worker effectively produces `indices[worker_id::num_workers]`
14 |     where `indices` is an infinite stream of indices consisting of
15 |     `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
16 |     or `range(size) + range(size) + ...` (if shuffle is False)
17 |     """
18 | 
19 |     def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = 0, rank=0, world_size=1, drop_last=False):
20 |         """
21 |         Args:
22 |             size (int): the total number of data of the underlying dataset to sample from
23 |             shuffle (bool): whether to shuffle the indices or not
24 |             seed (int): the initial seed of the shuffle. Must be the same
25 |                 across all workers. If None, will use a random seed shared
26 |                 among workers (require synchronization among all workers).
27 |         """
28 |         self._size = size
29 |         assert size > 0
30 |         self._shuffle = shuffle
31 |         self._seed = int(seed)
32 |         self.drop_last = drop_last
33 | 
34 |         if dist.is_available() and dist.is_initialized():
35 |             self._rank = dist.get_rank()
36 |             self._world_size = dist.get_world_size()
37 |         else:
38 |             self._rank = rank
39 |             self._world_size = world_size
40 | 
41 |     def set_epoch(self, epoch):
42 |         pass
43 | 
44 |     def __iter__(self):
45 |         start = self._rank
46 |         yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
47 | 
48 |     def _infinite_indices(self):
49 |         g = torch.Generator()
50 |         g.manual_seed(self._seed)
51 |         while True:
52 |             if self._shuffle:
53 |                 yield from torch.randperm(self._size, generator=g).tolist()
54 |             else:
55 |                 yield from list(range(self._size))
56 | 
57 |     def __len__(self):
58 |         if self.drop_last:
59 |             return self._size // self._world_size
60 |         else:
61 |             return (self._size + self._world_size - 1) // self._world_size
62 | 


--------------------------------------------------------------------------------
/mapmaster/dataset/transform.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import mmcv
  3 | import torch
  4 | import numpy as np
  5 | from PIL import Image
  6 | from collections.abc import Sequence
  7 | 
  8 | class Resize(object):
  9 |     def __init__(self, img_scale=None, backend="cv2", interpolation="bilinear"):
 10 |         self.size = img_scale
 11 |         self.backend = backend
 12 |         self.interpolation = interpolation
 13 |         self.cv2_interp_codes = {
 14 |             "nearest": cv2.INTER_NEAREST,
 15 |             "bilinear": cv2.INTER_LINEAR,
 16 |             "bicubic": cv2.INTER_CUBIC,
 17 |             "area": cv2.INTER_AREA,
 18 |             "lanczos": cv2.INTER_LANCZOS4,
 19 |         }
 20 |         self.pillow_interp_codes = {
 21 |             "nearest": Image.NEAREST,
 22 |             "bilinear": Image.BILINEAR,
 23 |             "bicubic": Image.BICUBIC,
 24 |             "box": Image.BOX,
 25 |             "lanczos": Image.LANCZOS,
 26 |             "hamming": Image.HAMMING,
 27 |         }
 28 | 
 29 |     def __call__(self, data_dict):
 30 |         """Call function to resize images.
 31 | 
 32 |         Args:
 33 |             data_dict (dict): Result dict from loading pipeline.
 34 | 
 35 |         Returns:
 36 |             dict: Resized data_dict, 'scale_factor' keys are added into result dict.
 37 |         """
 38 | 
 39 |         imgs = []
 40 |         for img in data_dict["images"]:
 41 |             img = self.im_resize(img, self.size, backend=self.backend)
 42 |             imgs.append(img)
 43 |         data_dict["images"] = imgs
 44 | 
 45 |         new_h, new_w = imgs[0].shape[:2]
 46 |         h, w = data_dict["images"][0].shape[:2]
 47 |         w_scale = new_w / w
 48 |         h_scale = new_h / h
 49 |         scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
 50 |         data_dict["extra_infos"].update({"scale_factor": scale_factor})
 51 | 
 52 |         return data_dict
 53 | 
 54 |     def im_resize(self, img, size, return_scale=False, interpolation="bilinear", out=None, backend="cv2"):
 55 |         """Resize image to a given size.
 56 |         Args:
 57 |             img (ndarray): The input image.
 58 |             size (tuple[int]): Target size (w, h).
 59 |             return_scale (bool): Whether to return `w_scale` and `h_scale`.
 60 |             interpolation (str): Interpolation method, accepted values are
 61 |                 "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
 62 |                 backend, "nearest", "bilinear" for 'pillow' backend.
 63 |             out (ndarray): The output destination.
 64 |             backend (str | None): The image resize backend type. Options are `cv2`,
 65 |                 `pillow`, `None`.
 66 |         Returns:
 67 |             tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
 68 |             `resized_img`.
 69 |         """
 70 |         h, w = img.shape[:2]
 71 |         if backend not in ["cv2", "pillow"]:
 72 |             raise ValueError(
 73 |                 f"backend: {backend} is not supported for resize." f"Supported backends are 'cv2', 'pillow'"
 74 |             )
 75 | 
 76 |         if backend == "pillow":
 77 |             assert img.dtype == np.uint8, "Pillow backend only support uint8 type"
 78 |             pil_image = Image.fromarray(img)
 79 |             pil_image = pil_image.resize(size, self.pillow_interp_codes[interpolation])
 80 |             resized_img = np.array(pil_image)
 81 |         else:
 82 |             resized_img = cv2.resize(img, size, dst=out, interpolation=self.cv2_interp_codes[interpolation])
 83 |         if not return_scale:
 84 |             return resized_img
 85 |         else:
 86 |             w_scale = size[0] / w
 87 |             h_scale = size[1] / h
 88 |             return resized_img, w_scale, h_scale
 89 | 
 90 | class Normalize(object):
 91 |     """Normalize the image.
 92 | 
 93 |     Added key is "img_norm_cfg".
 94 | 
 95 |     Args:
 96 |         mean (sequence): Mean values of 3 channels.
 97 |         std (sequence): Std values of 3 channels.
 98 |         to_rgb (bool): Whether to convert the image from BGR to RGB,
 99 |             default is true.
100 |     """
101 | 
102 |     def __init__(self, mean, std, to_rgb=True):
103 |         self.mean = np.array(mean, dtype=np.float32)
104 |         self.std = np.array(std, dtype=np.float32)
105 |         self.to_rgb = to_rgb
106 | 
107 |     def __call__(self, data_dict):
108 |         imgs = []
109 |         for img in data_dict["images"]:
110 |             if self.to_rgb:
111 |                 img = img.astype(np.float32) / 255.0
112 |             img = self.im_normalize(img, self.mean, self.std, self.to_rgb)
113 |             imgs.append(img)
114 |         data_dict["images"] = imgs
115 |         data_dict["extra_infos"]["img_norm_cfg"] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb)
116 |         return data_dict
117 | 
118 |     @staticmethod
119 |     def im_normalize(img, mean, std, to_rgb=True):
120 |         img = img.copy().astype(np.float32)
121 |         assert img.dtype != np.uint8  # cv2 inplace normalization does not accept uint8
122 |         mean = np.float64(mean.reshape(1, -1))
123 |         stdinv = 1 / np.float64(std.reshape(1, -1))
124 |         if to_rgb:
125 |             cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)  # inplace
126 |         cv2.subtract(img, mean, img)  # inplace
127 |         cv2.multiply(img, stdinv, img)  # inplace
128 |         return img
129 | 
130 | 
131 | class ToTensor(object):
132 |     """Default formatting bundle."""
133 | 
134 |     def __call__(self, data_dict):
135 |         """Call function to transform and format common fields in data_dict.
136 | 
137 |         Args:
138 |             data_dict (dict): Data dict contains the data to convert.
139 | 
140 |         Returns:
141 |             dict: The result dict contains the data that is formatted with default bundle.
142 |         """
143 | 
144 |         for k in ["images", "extrinsic", "intrinsic", "ida_mats"]:
145 |             if k == "images":
146 |                 data_dict[k] = np.stack([img.transpose(2, 0, 1) for img in data_dict[k]], axis=0)
147 |             data_dict[k] = self.to_tensor(np.ascontiguousarray(data_dict[k]))
148 | 
149 |         for k in ["masks", "points", "labels"]:
150 |             data_dict["targets"][k] = self.to_tensor(np.ascontiguousarray(data_dict["targets"][k]))
151 | 
152 |         return data_dict
153 | 
154 |     @staticmethod
155 |     def to_tensor(data):
156 |         if isinstance(data, torch.Tensor):
157 |             return data
158 |         elif isinstance(data, np.ndarray):
159 |             return torch.from_numpy(data)
160 |         elif isinstance(data, Sequence) and not mmcv.is_str(data):
161 |             return torch.tensor(data)
162 |         elif isinstance(data, int):
163 |             return torch.LongTensor([data])
164 |         elif isinstance(data, float):
165 |             return torch.FloatTensor([data])
166 |         else:
167 |             raise TypeError(f"type {type(data)} cannot be converted to tensor.")
168 | 
169 | class ToTensor_Pivot(object):
170 |     """Default formatting bundle."""
171 | 
172 |     def __call__(self, data_dict):
173 |         """Call function to transform and format common fields in data_dict.
174 | 
175 |         Args:
176 |             data_dict (dict): Data dict contains the data to convert.
177 | 
178 |         Returns:
179 |             dict: The result dict contains the data that is formatted with default bundle.
180 |         """
181 |         if "images" in data_dict:
182 |             if isinstance(data_dict["images"], list):
183 |                 # process multiple imgs in single frame
184 |                 imgs = [img.transpose(2, 0, 1) for img in data_dict["images"]]
185 |                 imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
186 |                 data_dict["images"] = self.to_tensor(imgs)
187 |             else:
188 |                 img = np.ascontiguousarray(data_dict["img"].transpose(2, 0, 1))
189 |                 data_dict["images"] = self.to_tensor(img)
190 | 
191 |         for k in ["masks"]:
192 |             data_dict["targets"][k] = self.to_tensor(np.ascontiguousarray(data_dict["targets"][k]))
193 | 
194 |         return data_dict
195 | 
196 |     @staticmethod
197 |     def to_tensor(data):
198 |         """Convert objects of various python types to :obj:`torch.Tensor`.
199 |         Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
200 |         :class:`Sequence`, :class:`int` and :class:`float`.
201 |         Args:
202 |             data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
203 |                 be converted.
204 |         """
205 | 
206 |         if isinstance(data, torch.Tensor):
207 |             return data
208 |         elif isinstance(data, np.ndarray):
209 |             return torch.from_numpy(data)
210 |         elif isinstance(data, Sequence) and not mmcv.is_str(data):
211 |             return torch.tensor(data)
212 |         elif isinstance(data, int):
213 |             return torch.LongTensor([data])
214 |         elif isinstance(data, float):
215 |             return torch.FloatTensor([data])
216 |         else:
217 |             raise TypeError(f"type {type(data)} cannot be converted to tensor.")
218 | 
219 | 
220 | 
221 | class Pad(object):
222 |     """Pad the image & mask.
223 | 
224 |     There are two padding modes: (1) pad to a fixed size and (2) pad to the
225 |     minimum size that is divisible by some number.
226 |     Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
227 | 
228 |     Args:
229 |         size (tuple, optional): Fixed padding size.
230 |         size_divisor (int, optional): The divisor of padded size.
231 |         pad_val (float, optional): Padding value, 0 by default.
232 |     """
233 | 
234 |     def __init__(self, size_divisor=None, pad_val=0):
235 |         self.size_divisor = size_divisor
236 |         self.pad_val = pad_val
237 |         # only one of size and size_divisor should be valid
238 |         assert size_divisor is not None
239 | 
240 |     def __call__(self, data_dict):
241 |         """Call function to pad images, masks, semantic segmentation maps.
242 | 
243 |         Args:
244 |             data_dict (dict): Result dict from loading pipeline.
245 | 
246 |         Returns:
247 |             dict: Updated result dict.
248 |         """
249 |         padded_img = None
250 |         padded_imgs = []
251 |         for img in data_dict["images"]:
252 |             padded_img = self.im_pad_to_multiple(img, self.size_divisor, pad_val=self.pad_val)
253 |             padded_imgs.append(padded_img)
254 |         data_dict["images"] = padded_imgs
255 |         data_dict["extra_infos"].update(
256 |             {
257 |                 "pad_shape": padded_img.shape,
258 |                 "pad_size_divisor": self.size_divisor if self.size_divisor is not None else "None",
259 |             }
260 |         )
261 |         return data_dict
262 | 
263 |     def im_pad_to_multiple(self, img, divisor, pad_val=0):
264 |         """Pad an image to ensure each edge to be multiple to some number.
265 |         Args:
266 |             img (ndarray): Image to be padded.
267 |             divisor (int): Padded image edges will be multiple to divisor.
268 |             pad_val (Number | Sequence[Number]): Same as :func:`impad`.
269 |         Returns:
270 |             ndarray: The padded image.
271 |         """
272 |         pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
273 |         pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
274 |         return self.im_pad(img, shape=(pad_h, pad_w), pad_val=pad_val)
275 | 


--------------------------------------------------------------------------------
/mapmaster/engine/core.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | import datetime
  5 | import warnings
  6 | import subprocess
  7 | from mapmaster.engine.executor import Trainer, BeMapNetEvaluator
  8 | from mapmaster.engine.environ import ShareFSUUIDNameServer, RlaunchReplicaEnv
  9 | from mapmaster.engine.callbacks import CheckPointLoader, CheckPointSaver, ClearMLCallback, ProgressBar
 10 | from mapmaster.engine.callbacks import TensorBoardMonitor, TextMonitor, ClipGrad
 11 | from mapmaster.utils.env import collect_env_info, get_root_dir
 12 | from mapmaster.utils.misc import setup_logger, sanitize_filename, PyDecorator, all_gather_object
 13 | 
 14 | 
 15 | __all__ = ["BaseCli", "BeMapNetCli"]
 16 | 
 17 | 
 18 | class BaseCli:
 19 |     """Command line tools for any exp."""
 20 | 
 21 |     def __init__(self, Exp):
 22 |         """Make sure the order of initialization is: build_args --> build_env --> build_exp,
 23 |         since experiments depend on the environment and the environment depends on args.
 24 | 
 25 |         Args:
 26 |             Exp : experiment description class
 27 |         """
 28 |         self.ExpCls = Exp
 29 |         self.args = self._get_parser(Exp).parse_args()
 30 |         self.env = RlaunchReplicaEnv(self.args.sync_bn, self.args.devices, self.args.find_unused_parameters)
 31 | 
 32 |     @property
 33 |     def exp(self):
 34 |         if not hasattr(self, "_exp"):
 35 |             exp = self.ExpCls(
 36 |                 **{x if y is not None else "none": y for (x, y) in vars(self.args).items()},
 37 |                 total_devices=self.env.world_size(),
 38 |             )
 39 |             self.exp_updated_cfg_msg = exp.update_attr(self.args.exp_options)
 40 |             self._exp = exp
 41 |         return self._exp
 42 | 
 43 |     def _get_parser(self, Exp):
 44 |         parser = argparse.ArgumentParser()
 45 |         parser = Exp.add_argparse_args(parser)
 46 |         parser = self.add_argparse_args(parser)
 47 |         return parser
 48 | 
 49 |     @staticmethod
 50 |     def add_argparse_args(parser: argparse.ArgumentParser):
 51 |         parser.add_argument("--eval", dest="eval", action="store_true", help="conduct evaluation only")
 52 |         parser.add_argument("-te", "--train_and_eval", dest="train_and_eval", action="store_true", help="train+eval")
 53 |         parser.add_argument("--find_unused_parameters", dest="find_unused_parameters", action="store_true")
 54 |         parser.add_argument("-d", "--devices", default="0-7", type=str, help="device for training")
 55 |         parser.add_argument("--ckpt", type=str, default=None, help="checkpoint to start from or be evaluated")
 56 |         parser.add_argument("--pretrained_model", type=str, default=None, help="pretrained_model used by training")
 57 |         parser.add_argument("--sync_bn", type=int, default=0, help="0-> disable sync_bn, 1-> whole world")
 58 |         clearml_parser = parser.add_mutually_exclusive_group(required=False)
 59 |         clearml_parser.add_argument("--clearml", dest="clearml", action="store_true", help="enabel clearml for train")
 60 |         clearml_parser.add_argument("--no-clearml", dest="clearml", action="store_false", help="disable clearml")
 61 |         parser.set_defaults(clearml=True)
 62 |         return parser
 63 | 
 64 |     def _get_exp_output_dir(self):
 65 |         exp_dir = os.path.join(os.path.join(get_root_dir(), "outputs"), sanitize_filename(self.exp.exp_name))
 66 |         os.makedirs(exp_dir, exist_ok=True)
 67 |         output_dir = None
 68 |         if self.args.ckpt:
 69 |             output_dir = os.path.dirname(os.path.dirname(os.path.abspath(self.args.ckpt)))
 70 |         elif self.env.global_rank() == 0:
 71 |             output_dir = os.path.join(exp_dir, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"))
 72 |             os.makedirs(output_dir, exist_ok=True)
 73 |             # make a symlink "latest"
 74 |             symlink, symlink_tmp = os.path.join(exp_dir, "latest"), os.path.join(exp_dir, "latest_tmp")
 75 |             if os.path.exists(symlink_tmp):
 76 |                 os.remove(symlink_tmp)
 77 |             os.symlink(os.path.relpath(output_dir, exp_dir), symlink_tmp)
 78 |             os.rename(symlink_tmp, symlink)
 79 |         output_dir = all_gather_object(output_dir)[0]
 80 |         return output_dir
 81 | 
 82 |     def get_evaluator(self, callbacks=None):
 83 |         exp = self.exp
 84 |         if self.args.ckpt is None:
 85 |             warnings.warn("No checkpoint is specified for evaluation")
 86 |         if exp.eval_executor_class is None:
 87 |             sys.exit("No evaluator is specified for evaluation")
 88 | 
 89 |         output_dir = self._get_exp_output_dir()
 90 |         logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="eval.log")
 91 |         self._set_basic_log_message(logger)
 92 |         if callbacks is None:
 93 |             callbacks = [self.env, CheckPointLoader(self.args.ckpt)]
 94 |         evaluator = exp.eval_executor_class(exp=exp, callbacks=callbacks, logger=logger)
 95 |         return evaluator
 96 | 
 97 |     def _set_basic_log_message(self, logger):
 98 |         logger.opt(ansi=True).info("<yellow>Cli arguments:</yellow>\n<blue>{}</blue>".format(self.args))
 99 |         logger.info(f"exp_name: {self.exp.exp_name}")
100 |         logger.opt(ansi=True).info(
101 |             "<yellow>Used experiment configs</yellow>:\n<blue>{}</blue>".format(self.exp.get_cfg_as_str())
102 |         )
103 |         if self.exp_updated_cfg_msg:
104 |             logger.opt(ansi=True).info(
105 |                 "<yellow>List of override configs</yellow>:\n<blue>{}</blue>".format(self.exp_updated_cfg_msg)
106 |             )
107 |         logger.opt(ansi=True).info("<yellow>Environment info:</yellow>\n<blue>{}</blue>".format(collect_env_info()))
108 | 
109 |     def get_trainer(self, callbacks=None, evaluator=None):
110 |         args = self.args
111 |         exp = self.exp
112 |         if evaluator is not None:
113 |             output_dir = self.exp.output_dir
114 |         else:
115 |             output_dir = self._get_exp_output_dir()
116 | 
117 |         logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="train.log")
118 |         self._set_basic_log_message(logger)
119 | 
120 |         if callbacks is None:
121 |             callbacks = [
122 |                 self.env,
123 |                 ProgressBar(logger=logger),
124 |                 TextMonitor(interval=exp.print_interval),
125 |                 TensorBoardMonitor(os.path.join(output_dir, "tensorboard"), interval=exp.print_interval),
126 |                 CheckPointSaver(
127 |                     local_path=os.path.join(output_dir, "dump_model"),
128 |                     remote_path=exp.ckpt_oss_save_dir,
129 |                     save_interval=exp.dump_interval,
130 |                     num_keep_latest=exp.num_keep_latest_ckpt,
131 |                 ),
132 |             ]
133 |         if "grad_clip_value" in exp.__dict__:
134 |             callbacks.append(ClipGrad(exp.grad_clip_value))
135 |         if args.clearml:
136 |             callbacks.append(ClearMLCallback())
137 |         if args.ckpt:
138 |             callbacks.append(CheckPointLoader(args.ckpt))
139 |         if args.pretrained_model:
140 |             callbacks.append(CheckPointLoader(args.pretrained_model, weight_only=True))
141 |         callbacks.extend(exp.callbacks)
142 | 
143 |         trainer = Trainer(exp=exp, callbacks=callbacks, logger=logger, evaluator=evaluator)
144 |         return trainer
145 | 
146 |     def executor(self):
147 |         if self.args.eval:
148 |             self.get_evaluator().eval()
149 |         elif self.args.train_and_eval:
150 |             evaluator = self.get_evaluator(callbacks=[])
151 |             self.get_trainer(evaluator=evaluator).train()
152 |         else:
153 |             self.get_trainer().train()
154 | 
155 |     def dispatch(self, executor_func):
156 |         is_master = self.env.global_rank() == 0
157 |         with ShareFSUUIDNameServer(is_master) as ns:
158 |             self.env.set_master_uri(ns)
159 |             self.env.setup_nccl()
160 |             if self.env.local_rank() == 0:
161 |                 command = sys.argv.copy()
162 |                 command[0] = os.path.abspath(command[0])
163 |                 command = [sys.executable] + command
164 |                 for local_rank in range(1, self.env.nr_gpus):
165 |                     env_copy = os.environ.copy()
166 |                     env_copy["LOCAL_RANK"] = f"{local_rank}"
167 |                     subprocess.Popen(command, env=env_copy)
168 |             self.env.init_dist()
169 |         executor_func()
170 | 
171 |     def run(self):
172 |         self.dispatch(self.executor)
173 | 
174 | 
175 | class MapMasterCli(BaseCli):
176 |     @PyDecorator.overrides(BaseCli)
177 |     def get_evaluator(self, callbacks=None):
178 |         exp = self.exp
179 | 
180 |         output_dir = self._get_exp_output_dir()
181 |         self.exp.output_dir = output_dir
182 |         logger = setup_logger(output_dir, distributed_rank=self.env.global_rank(), filename="eval.log")
183 |         self._set_basic_log_message(logger)
184 |         if callbacks is None:
185 |             callbacks = [
186 |                 self.env,
187 |                 CheckPointLoader(self.args.ckpt),
188 |             ]
189 | 
190 |         evaluator = BeMapNetEvaluator(exp=exp, callbacks=callbacks, logger=logger)
191 |         return evaluator
192 | 


--------------------------------------------------------------------------------
/mapmaster/engine/environ.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import uuid
  4 | import torch
  5 | import subprocess
  6 | import numpy as np
  7 | from torch import nn
  8 | from loguru import logger
  9 | import torch.distributed as dist
 10 | from mapmaster.utils.env import get_root_dir
 11 | from mapmaster.utils.misc import parse_devices
 12 | from mapmaster.engine.callbacks import Callback
 13 | 
 14 | 
 15 | __all__ = ["ShareFSUUIDNameServer", "RlaunchReplicaEnv"]
 16 | output_root_dir = os.path.join(get_root_dir(), "outputs")
 17 | 
 18 | 
 19 | class ShareFSUUIDNameServer:
 20 |     def __init__(self, is_master):
 21 |         self.exp_id = self._get_exp_id()
 22 |         self.is_master = is_master
 23 |         os.makedirs(os.path.dirname(self.filepath), exist_ok=True)
 24 | 
 25 |     def _get_exp_id(self):
 26 |         if "DET3D_EXPID" not in os.environ:
 27 |             if int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) == 1:
 28 |                 return str(uuid.uuid4())
 29 |             msg = """cannot find DET3D_EXPID in environ please use following
 30 |             command DET3D_EXPID=$(cat /proc/sys/kernel/random/uuid) rlaunch ...
 31 |             """
 32 |             logger.error(msg)
 33 |             raise RuntimeError
 34 |         return str(os.environ["DET3D_EXPID"])
 35 | 
 36 |     @property
 37 |     def filepath(self):
 38 |         return os.path.join(output_root_dir, f"master_ip_{self.exp_id}.txt")
 39 | 
 40 |     def __enter__(self):
 41 |         if self.is_master:
 42 |             self.set_master()
 43 |         return self
 44 | 
 45 |     def __exit__(self, exc_type, exc_value, exc_tb):
 46 |         if self.is_master:
 47 |             os.remove(self.filepath)
 48 | 
 49 |     def set_master(self):
 50 |         assert not os.path.exists(self.filepath)
 51 |         hostname = "Host"
 52 |         with open(self.filepath, "w") as f:
 53 |             f.write(hostname)
 54 | 
 55 |     def get_master(self):
 56 |         while True:
 57 |             if os.path.exists(self.filepath):
 58 |                 with open(self.filepath, "r") as f:
 59 |                     return f.read()
 60 |             else:
 61 |                 time.sleep(5)
 62 | 
 63 | 
 64 | class _DDPEnv(Callback):
 65 |     def __init__(self, sync_bn=0, devices=None, find_unused_parameters=False):
 66 |         if devices:
 67 |             devices = parse_devices(devices)
 68 |             os.environ["CUDA_VISIBLE_DEVICES"] = devices
 69 |         self.nr_gpus = torch.cuda.device_count()
 70 |         self.sync_bn = sync_bn
 71 |         self.find_unused_parameters = find_unused_parameters
 72 | 
 73 |     @staticmethod
 74 |     def setup_nccl():
 75 |         ifname = filter(lambda x: x not in ("lo",), os.listdir("/sys/class/net/"))
 76 |         os.environ["NCCL_SOCKET_IFNAME"] = ",".join(ifname)
 77 |         os.environ["NCCL_IB_DISABLE"] = "1"
 78 | 
 79 |         # os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL"
 80 |         os.environ["NCCL_IB_HCA"] = subprocess.getoutput(
 81 |             "cd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; "
 82 |             "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null "
 83 |             "| grep v >/dev/null && echo $i ; done; > /dev/null"
 84 |         )
 85 |         os.environ["NCCL_IB_GID_INDEX"] = "3"
 86 |         os.environ["NCCL_IB_TC"] = "106"
 87 | 
 88 |     def after_init(self, trainer):
 89 |         trainer.model.cuda()
 90 |         if int(self.sync_bn) > 1:
 91 |             ranks = np.arange(self.world_size()).reshape(-1, self.sync_bn)
 92 |             process_groups = [torch.distributed.new_group(list(pids)) for pids in ranks]
 93 |             trainer.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
 94 |                 trainer.model, process_groups[self.global_rank() // self.sync_bn]
 95 |             )
 96 |         elif int(self.sync_bn) == 1:
 97 |             trainer.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(trainer.model)
 98 |         trainer.model = nn.parallel.DistributedDataParallel(
 99 |             trainer.model, device_ids=[self.local_rank()], find_unused_parameters=self.find_unused_parameters
100 |         )
101 | 
102 |     def cleanup(self):
103 |         dist.destroy_process_group()
104 | 
105 |     def init_dist(self):
106 |         torch.cuda.set_device(self.local_rank())
107 |         dist.init_process_group(
108 |             backend="nccl",
109 |             init_method=self._master_uri,
110 |             rank=self.global_rank(),
111 |             world_size=self.world_size(),
112 |         )
113 |         dist.barrier()
114 | 
115 | 
116 | class RlaunchReplicaEnv(_DDPEnv):
117 |     def __init__(self, sync_bn=0, devices=None, find_unused_parameters=False):
118 |         super().__init__(sync_bn, devices, find_unused_parameters)
119 | 
120 |     def set_master_uri(self, ns):
121 |         self._master_uri = f"tcp://{self.master_address(ns)}:{self.master_port()}"
122 |         logger.info(self._master_uri)
123 | 
124 |     @staticmethod
125 |     def is_brainpp_mm_env():
126 |         return int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) > 1
127 | 
128 |     def master_address(self, ns) -> str:
129 |         if self.node_rank() == 0:
130 |             root_node = "localhost"
131 |         else:
132 |             root_node = ns.get_master()
133 |         os.environ["MASTER_ADDR"] = root_node
134 |         return root_node
135 | 
136 |     def master_port(self) -> int:
137 |         port = os.environ.get("MASTER_PORT", 12345)
138 |         os.environ["MASTER_PORT"] = str(port)
139 |         return int(port)
140 | 
141 |     def world_size(self) -> int:
142 |         return int(os.environ.get("RLAUNCH_REPLICA_TOTAL", 1)) * int(self.nr_gpus)
143 | 
144 |     def global_rank(self) -> int:
145 |         return int(self.nr_gpus) * self.node_rank() + self.local_rank()
146 | 
147 |     def local_rank(self) -> int:
148 |         return int(os.environ.get("LOCAL_RANK", 0))
149 | 
150 |     def node_rank(self) -> int:
151 |         return int(os.environ.get("RLAUNCH_REPLICA", 0))
152 | 


--------------------------------------------------------------------------------
/mapmaster/engine/executor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | from tqdm import tqdm
  4 | from typing import Sequence
  5 | from mapmaster.engine.experiment import BaseExp
  6 | from mapmaster.utils.misc import get_rank, synchronize
  7 | 
  8 | 
  9 | __all__ = ["Callback", "BaseExecutor", "Trainer", "BeMapNetEvaluator"]
 10 | 
 11 | 
 12 | class Callback:
 13 | 
 14 |     # callback enabled rank list
 15 |     # None means callback is always enabled
 16 |     enabled_rank = None
 17 | 
 18 |     def setup(self, executor):
 19 |         pass
 20 | 
 21 |     def load_checkpoint(self, executor):
 22 |         pass
 23 | 
 24 |     def after_init(self, executor):
 25 |         pass
 26 | 
 27 |     def before_train(self, executor):
 28 |         pass
 29 | 
 30 |     def before_epoch(self, executor, epoch: int):
 31 |         pass
 32 | 
 33 |     def before_step(self, executor, step, data_dict):
 34 |         pass
 35 | 
 36 |     def before_backward(self, executor):
 37 |         pass
 38 | 
 39 |     def before_optimize(self, executor):
 40 |         pass
 41 | 
 42 |     def after_step(self, executor, step, data_dict, *args, **kwargs):
 43 |         pass
 44 | 
 45 |     def after_epoch(self, executor, epoch: int, update_best_ckpt: bool = False):
 46 |         pass
 47 | 
 48 |     def after_train(self, executor):
 49 |         pass
 50 | 
 51 | 
 52 | class BaseExecutor:
 53 |     def __init__(self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None) -> None:
 54 |         self.exp = exp
 55 |         self.logger = logger
 56 |         self.callbacks = callbacks
 57 |         self._invoke_callback("setup")
 58 | 
 59 |         self.epoch = 0
 60 |         self.global_step = 0
 61 |         self._invoke_callback("load_checkpoint")
 62 |         self._invoke_callback("after_init")
 63 | 
 64 |     @property
 65 |     def train_dataloader(self):
 66 |         return self.exp.train_dataloader
 67 | 
 68 |     @property
 69 |     def val_dataloader(self):
 70 |         return self.exp.val_dataloader
 71 | 
 72 |     @property
 73 |     def model(self):
 74 |         return self.exp.model
 75 | 
 76 |     @model.setter
 77 |     def model(self, value):
 78 |         self.exp.model = value
 79 | 
 80 |     @property
 81 |     def optimizer(self):
 82 |         return self.exp.optimizer
 83 | 
 84 |     @property
 85 |     def lr_scheduler(self):
 86 |         return self.exp.lr_scheduler
 87 | 
 88 |     def _invoke_callback(self, callback_name, *args, **kwargs):
 89 |         for cb in self.callbacks:
 90 |             if cb.enabled_rank is None or self.global_rank in cb.enabled_rank:
 91 |                 func = getattr(cb, callback_name, None)
 92 |                 if func:
 93 |                     func(self, *args, **kwargs)
 94 | 
 95 |     @property
 96 |     def global_rank(self):
 97 |         return get_rank()
 98 | 
 99 | 
100 | class Trainer(BaseExecutor):
101 |     def __init__(
102 |         self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None, use_amp=False, evaluator=None
103 |     ) -> None:
104 |         super(Trainer, self).__init__(exp, callbacks, logger)
105 |         self.use_amp = use_amp
106 |         self.evaluator = evaluator
107 |         if self.use_amp:
108 |             self.grad_scaler = torch.cuda.amp.GradScaler()
109 | 
110 |     def train(self):
111 |         self.train_iter = iter(self.train_dataloader)
112 |         self._invoke_callback("before_train")
113 |         self.model.cuda()
114 |         self.model.train()
115 |         self.optimizer_to(self.optimizer, next(self.model.parameters()).device)
116 |         start_epoch = self.epoch
117 |         for epoch in range(start_epoch, self.exp.max_epoch):
118 |             self.epoch = epoch
119 |             self.model.train()
120 |             self.train_epoch(epoch)
121 |         self._invoke_callback("after_train")
122 | 
123 |     def train_epoch(self, epoch):
124 |         self._invoke_callback("before_epoch", epoch)
125 |         sampler = self.train_dataloader.sampler
126 |         if hasattr(sampler, "set_epoch"):
127 |             sampler.set_epoch(epoch)
128 |         for step in range(len(self.train_dataloader)):
129 |             try:
130 |                 data = next(self.train_iter)
131 |             except StopIteration:
132 |                 self.train_iter = iter(self.train_dataloader)
133 |                 data = next(self.train_iter)
134 |             self.train_step(data, step)
135 |         if self.evaluator is not None:
136 |             self.evaluator.eval()
137 |         self._invoke_callback("after_epoch", epoch, update_best_ckpt=False)
138 | 
139 |     def train_step(self, data, step):
140 |         self._invoke_callback("before_step", step, data)
141 |         self.lr_scheduler.step(self.global_step)
142 |         self.model.train()
143 |         self.optimizer.zero_grad()
144 |         if not self.use_amp:
145 |             ret = self.exp.training_step(data)
146 |         else:
147 |             with torch.cuda.amp.autocast():
148 |                 ret = self.exp.training_step(data)
149 |         if isinstance(ret, torch.Tensor):
150 |             loss = ret
151 |             ext_dict = None
152 |         elif isinstance(ret, tuple):
153 |             loss, ext_dict = ret
154 |             ext_dict = {k: v.detach() if isinstance(v, torch.Tensor) else v for k, v in ext_dict.items()}
155 |         else:
156 |             raise TypeError
157 |         self._invoke_callback("before_backward")
158 |         if not self.use_amp:
159 |             loss.backward()
160 |             self._invoke_callback("before_optimize")
161 |             self.optimizer.step()
162 |         else:
163 |             self.grad_scaler.scale(loss).backward()
164 |             self.grad_scaler.unscale_(self.optimizer)  # NOTE: grads are unscaled before "before_optimize" callbacks
165 |             self._invoke_callback("before_optimize")
166 |             self.grad_scaler.step(self.optimizer)
167 |             self.grad_scaler.update()
168 |         self._invoke_callback("after_step", step, data, loss=loss.detach(), extra=ext_dict)
169 |         self.global_step += 1
170 | 
171 |     # refer to: https://github.com/pytorch/pytorch/issues/8741
172 |     @staticmethod
173 |     def optimizer_to(optim, device):
174 |         for param in optim.state.values():
175 |             # Not sure there are any global tensors in the state dict
176 |             if isinstance(param, torch.Tensor):
177 |                 param.data = param.data.to(device)
178 |                 if param._grad is not None:
179 |                     param._grad.data = param._grad.data.to(device)
180 |             elif isinstance(param, dict):
181 |                 for subparam in param.values():
182 |                     if isinstance(subparam, torch.Tensor):
183 |                         subparam.data = subparam.data.to(device)
184 |                         if subparam._grad is not None:
185 |                             subparam._grad.data = subparam._grad.data.to(device)
186 | 
187 | 
188 | class BeMapNetEvaluator(BaseExecutor):
189 |     def __init__(self, exp: BaseExp, callbacks: Sequence["Callback"], logger=None) -> None:
190 |         super(BeMapNetEvaluator, self).__init__(exp, callbacks, logger)
191 | 
192 |     def eval(self, ckpt_name=None):
193 | 
194 |         exp = self.exp
195 |         val_iter = iter(self.val_dataloader)
196 | 
197 |         self._invoke_callback("before_eval")
198 | 
199 |         if ckpt_name is not None:
200 |             if get_rank() == 0:
201 |                 self.logger.info("Eval with best checkpoint!")
202 |             path = os.path.join(exp.output_dir, 'dump_model', ckpt_name)
203 |             checkpoint = torch.load(open(path, "rb"), map_location=torch.device("cpu"))
204 |             self.model.load_state_dict(checkpoint["model_state"], strict=False)
205 | 
206 |         self.model.cuda()
207 |         self.model.eval()
208 | 
209 |         for step in tqdm(range(len(self.val_dataloader))):
210 |             batch_data = next(val_iter)
211 |             with torch.no_grad():
212 |                 exp.test_step(batch_data)
213 |             self._invoke_callback("after_step", step, {})
214 | 
215 |         synchronize()
216 | 
217 |         if get_rank() == 0:
218 |             self.logger.info("Done with inference, start evaluation later!")
219 |             gt_dir = exp.exp_config.map_conf['anno_root']
220 |             dt_dir = exp.evaluation_save_dir
221 |             val_txts = exp.exp_config.VAL_TXT
222 | 
223 |             for val_txt in val_txts:
224 |                 ap_table = "".join(os.popen(f"python3 tools/evaluation/eval.py {gt_dir} {dt_dir} {val_txt}").readlines())
225 |                 self.logger.info(" AP-Performance with HDMapNetAPI: \n" + val_txt + "\n" + ap_table)
226 | 
227 |         self._invoke_callback("after_eval")
228 | 


--------------------------------------------------------------------------------
/mapmaster/engine/experiment.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import functools
  5 | import numpy as np
  6 | from torch.nn import Module
  7 | from tabulate import tabulate
  8 | from abc import ABCMeta, abstractmethod
  9 | from mapmaster.utils.misc import DictAction
 10 | 
 11 | 
 12 | class BaseExp(metaclass=ABCMeta):
 13 |     """Basic class for any experiment in Perceptron.
 14 | 
 15 |     Args:
 16 |         batch_size_per_device (int):
 17 |             batch_size of each device
 18 | 
 19 |         total_devices (int):
 20 |             number of devices to use
 21 | 
 22 |         max_epoch (int):
 23 |             total training epochs, the reason why we need to give max_epoch
 24 |             is that lr_scheduler may need to be adapted according to max_epoch
 25 |     """
 26 | 
 27 |     def __init__(self, batch_size_per_device, total_devices, max_epoch):
 28 |         self._batch_size_per_device = batch_size_per_device
 29 |         self._max_epoch = max_epoch
 30 |         self._total_devices = total_devices
 31 |         # ----------------------------------------------- extra configure ------------------------- #
 32 |         self.seed = None
 33 |         self.exp_name = os.path.splitext(os.path.basename(sys.argv.copy()[0]))[0]  # entrypoint filename as exp_name
 34 |         self.print_interval = 100
 35 |         self.dump_interval = 10
 36 |         self.eval_interval = 10
 37 |         self.num_keep_latest_ckpt = 10
 38 |         self.ckpt_oss_save_dir = None
 39 |         self.enable_tensorboard = False
 40 |         self.eval_executor_class = None
 41 | 
 42 |     @property
 43 |     def train_dataloader(self):
 44 |         if "_train_dataloader" not in self.__dict__:
 45 |             self._train_dataloader = self._configure_train_dataloader()
 46 |         return self._train_dataloader
 47 | 
 48 |     @property
 49 |     def val_dataloader(self):
 50 |         if "_val_dataloader" not in self.__dict__:
 51 |             self._val_dataloader = self._configure_val_dataloader()
 52 |         return self._val_dataloader
 53 | 
 54 |     @property
 55 |     def test_dataloader(self):
 56 |         if "_test_dataloader" not in self.__dict__:
 57 |             self._test_dataloader = self._configure_test_dataloader()
 58 |         return self._test_dataloader
 59 | 
 60 |     @property
 61 |     def model(self):
 62 |         if "_model" not in self.__dict__:
 63 |             self._model = self._configure_model()
 64 |         return self._model
 65 | 
 66 |     @model.setter
 67 |     def model(self, value):
 68 |         self._model = value
 69 | 
 70 |     @property
 71 |     def callbacks(self):
 72 |         if not hasattr(self, "_callbacks"):
 73 |             self._callbacks = self._configure_callbacks()
 74 |         return self._callbacks
 75 | 
 76 |     @property
 77 |     def optimizer(self):
 78 |         if "_optimizer" not in self.__dict__:
 79 |             self._optimizer = self._configure_optimizer()
 80 |         return self._optimizer
 81 | 
 82 |     @property
 83 |     def lr_scheduler(self):
 84 |         if "_lr_scheduler" not in self.__dict__:
 85 |             self._lr_scheduler = self._configure_lr_scheduler()
 86 |         return self._lr_scheduler
 87 | 
 88 |     @property
 89 |     def batch_size_per_device(self):
 90 |         return self._batch_size_per_device
 91 | 
 92 |     @property
 93 |     def max_epoch(self):
 94 |         return self._max_epoch
 95 | 
 96 |     @property
 97 |     def total_devices(self):
 98 |         return self._total_devices
 99 | 
100 |     @abstractmethod
101 |     def _configure_model(self) -> Module:
102 |         pass
103 | 
104 |     @abstractmethod
105 |     def _configure_train_dataloader(self):
106 |         """"""
107 | 
108 |     def _configure_callbacks(self):
109 |         return []
110 | 
111 |     @abstractmethod
112 |     def _configure_val_dataloader(self):
113 |         """"""
114 | 
115 |     @abstractmethod
116 |     def _configure_test_dataloader(self):
117 |         """"""
118 | 
119 |     def training_step(self, *args, **kwargs):
120 |         pass
121 | 
122 |     @abstractmethod
123 |     def _configure_optimizer(self) -> torch.optim.Optimizer:
124 |         pass
125 | 
126 |     @abstractmethod
127 |     def _configure_lr_scheduler(self, **kwargs):
128 |         pass
129 | 
130 |     def update_attr(self, options: dict) -> str:
131 |         if options is None:
132 |             return ""
133 |         assert isinstance(options, dict)
134 |         msg = ""
135 |         for k, v in options.items():
136 |             if k in self.__dict__:
137 |                 old_v = self.__getattribute__(k)
138 |                 if not v == old_v:
139 |                     self.__setattr__(k, v)
140 |                     msg = "{}\n'{}' is overriden from '{}' to '{}'".format(msg, k, old_v, v)
141 |             else:
142 |                 self.__setattr__(k, v)
143 |                 msg = "{}\n'{}' is set to '{}'".format(msg, k, v)
144 | 
145 |         # update exp_name
146 |         exp_name_suffix = "-".join(sorted([f"{k}-{v}" for k, v in options.items()]))
147 |         self.exp_name = f"{self.exp_name}--{exp_name_suffix}"
148 |         return msg
149 | 
150 |     def get_cfg_as_str(self) -> str:
151 |         config_table = []
152 |         for c, v in self.__dict__.items():
153 |             if not isinstance(v, (int, float, str, list, tuple, dict, np.ndarray)):
154 |                 if hasattr(v, "__name__"):
155 |                     v = v.__name__
156 |                 elif hasattr(v, "__class__"):
157 |                     v = v.__class__
158 |                 elif type(v) == functools.partial:
159 |                     v = v.func.__name__
160 |             if c[0] == "_":
161 |                 c = c[1:]
162 |             config_table.append((str(c), str(v)))
163 | 
164 |         headers = ["config key", "value"]
165 |         config_table = tabulate(config_table, headers, tablefmt="plain")
166 |         return config_table
167 | 
168 |     def __str__(self):
169 |         return self.get_cfg_as_str()
170 | 
171 |     def to_onnx(self):
172 |         pass
173 | 
174 |     @classmethod
175 |     def add_argparse_args(cls, parser):  # pragma: no-cover
176 |         parser.add_argument(
177 |             "--exp_options",
178 |             nargs="+",
179 |             action=DictAction,
180 |             help="override some settings in the exp, the key-value pair in xxx=yyy format will be merged into exp. "
181 |             'If the value to be overwritten is a list, it should be like key="[a,b]" or key=a,b '
182 |             'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
183 |             "Note that the quotation marks are necessary and that no white space is allowed.",
184 |         )
185 |         parser.add_argument("-b", "--batch-size-per-device", type=int, default=None)
186 |         parser.add_argument("-e", "--max-epoch", type=int, default=None)
187 |         return parser
188 | 


--------------------------------------------------------------------------------
/mapmaster/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .network import MapMaster
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import ResNetBackbone, EfficientNetBackbone, SwinTRBackbone
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/bifpn/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import BiFPN
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/bifpn/utils.py:
--------------------------------------------------------------------------------
 1 | # Author: Zylo117
 2 | 
 3 | import math
 4 | import torch
 5 | from torch import nn
 6 | import torch.nn.functional as F
 7 | 
 8 | 
 9 | class Swish(nn.Module):
10 |     def forward(self, x):
11 |         return x * torch.sigmoid(x)
12 | 
13 | 
14 | class Conv2dStaticSamePadding(nn.Module):
15 |     """
16 |     created by Zylo117
17 |     The real keras/tensorflow conv2d with same padding
18 |     """
19 | 
20 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True, groups=1, dilation=1, **kwargs):
21 |         super().__init__()
22 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, bias=bias, groups=groups)
23 |         self.stride = self.conv.stride
24 |         self.kernel_size = self.conv.kernel_size
25 |         self.dilation = self.conv.dilation
26 | 
27 |         if isinstance(self.stride, int):
28 |             self.stride = [self.stride] * 2
29 |         elif len(self.stride) == 1:
30 |             self.stride = [self.stride[0]] * 2
31 | 
32 |         if isinstance(self.kernel_size, int):
33 |             self.kernel_size = [self.kernel_size] * 2
34 |         elif len(self.kernel_size) == 1:
35 |             self.kernel_size = [self.kernel_size[0]] * 2
36 | 
37 |     def forward(self, x):
38 |         h, w = x.shape[-2:]
39 | 
40 |         extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1]
41 |         extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0]
42 | 
43 |         left = extra_h // 2
44 |         right = extra_h - left
45 |         top = extra_v // 2
46 |         bottom = extra_v - top
47 | 
48 |         x = F.pad(x, [left, right, top, bottom])
49 | 
50 |         x = self.conv(x)
51 |         return x
52 | 
53 | 
54 | class MaxPool2dStaticSamePadding(nn.Module):
55 |     """
56 |     created by Zylo117
57 |     The real keras/tensorflow MaxPool2d with same padding
58 |     """
59 | 
60 |     def __init__(self, *args, **kwargs):
61 |         super().__init__()
62 |         self.pool = nn.MaxPool2d(*args, **kwargs)
63 |         self.stride = self.pool.stride
64 |         self.kernel_size = self.pool.kernel_size
65 | 
66 |         if isinstance(self.stride, int):
67 |             self.stride = [self.stride] * 2
68 |         elif len(self.stride) == 1:
69 |             self.stride = [self.stride[0]] * 2
70 | 
71 |         if isinstance(self.kernel_size, int):
72 |             self.kernel_size = [self.kernel_size] * 2
73 |         elif len(self.kernel_size) == 1:
74 |             self.kernel_size = [self.kernel_size[0]] * 2
75 | 
76 |     def forward(self, x):
77 |         h, w = x.shape[-2:]
78 | 
79 |         extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1]
80 |         extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0]
81 | 
82 |         left = extra_h // 2
83 |         right = extra_h - left
84 |         top = extra_v // 2
85 |         bottom = extra_v - top
86 | 
87 |         x = F.pad(x, [left, right, top, bottom])
88 | 
89 |         x = self.pool(x)
90 |         return x
91 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/efficientnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import EfficientNet
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from mapmaster.models.backbone.resnet import ResNet
 5 | from mapmaster.models.backbone.efficientnet import EfficientNet
 6 | from mapmaster.models.backbone.swin_transformer import SwinTransformer
 7 | from mapmaster.models.backbone.bifpn import BiFPN
 8 | 
 9 | 
10 | class ResNetBackbone(nn.Module):
11 |     def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1):
12 |         super(ResNetBackbone, self).__init__()
13 |         assert 0 < ret_layers < 4
14 |         self.ret_layers = ret_layers
15 |         self.bkb = ResNet(**bkb_kwargs)
16 |         self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg)
17 |         self.up_shape = None if up_shape is None else up_shape
18 |         self.bkb.init_weights()
19 | 
20 |     def forward(self, inputs):
21 |         images = inputs["images"]
22 |         images = images.view(-1, *images.shape[-3:])
23 |         bkb_features = list(self.bkb(images)[-self.ret_layers:])
24 |         nek_features = self.fpn(bkb_features) if self.fpn is not None else None
25 |         return {"im_bkb_features": bkb_features, "im_nek_features": nek_features}
26 | 
27 | 
28 | class EfficientNetBackbone(nn.Module):
29 |     def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1):
30 |         super(EfficientNetBackbone, self).__init__()
31 |         assert 0 < ret_layers < 4
32 |         self.ret_layers = ret_layers
33 |         self.bkb = EfficientNet.from_pretrained(**bkb_kwargs)
34 |         self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg)
35 |         self.up_shape = None if up_shape is None else up_shape
36 |         del self.bkb._conv_head
37 |         del self.bkb._bn1
38 |         del self.bkb._avg_pooling
39 |         del self.bkb._dropout
40 |         del self.bkb._fc
41 | 
42 |     def forward(self, inputs):
43 |         images = inputs["images"]
44 |         images = images.view(-1, *images.shape[-3:])
45 |         endpoints = self.bkb.extract_endpoints(images)
46 |         bkb_features = []
47 |         for i, (key, value) in enumerate(endpoints.items()):
48 |             if i > 0:
49 |                 bkb_features.append(value)
50 |         bkb_features = list(bkb_features[-self.ret_layers:])
51 |         nek_features = self.fpn(bkb_features) if self.fpn is not None else None
52 |         return {"im_bkb_features": bkb_features, "im_nek_features": nek_features}
53 | 
54 | 
55 | class SwinTRBackbone(nn.Module):
56 |     def __init__(self, bkb_kwargs, fpn_kwarg=None, up_shape=None, ret_layers=1):
57 |         super(SwinTRBackbone, self).__init__()
58 |         assert 0 < ret_layers < 4
59 |         self.ret_layers = ret_layers
60 |         self.bkb = SwinTransformer(**bkb_kwargs)
61 |         self.fpn = None if fpn_kwarg is None else BiFPN(**fpn_kwarg)
62 |         self.up_shape = None if up_shape is None else up_shape
63 | 
64 |     def forward(self, inputs):
65 |         images = inputs["images"]
66 |         images = images.view(-1, *images.shape[-3:])
67 |         bkb_features = list(self.bkb(images)[-self.ret_layers:])
68 |         nek_features = None
69 |         if self.fpn is not None:
70 |             nek_features = self.fpn(bkb_features)
71 |         else:
72 |             if self.up_shape is not None:
73 |                 nek_features = [torch.cat([self.up_sample(x, self.up_shape) for x in bkb_features], dim=1)]
74 | 
75 |         return {"im_bkb_features": bkb_features, "im_nek_features": nek_features}
76 | 
77 |     def up_sample(self, x, tgt_shape=None):
78 |         tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
79 |         if tuple(x.shape[-2:]) == tuple(tgt_shape):
80 |             return x
81 |         return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
82 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/resnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/resnet/utils.py:
--------------------------------------------------------------------------------
 1 | from mmcv.cnn import build_conv_layer, build_norm_layer
 2 | from mmcv.runner import Sequential
 3 | from torch import nn as nn
 4 | 
 5 | 
 6 | class ResLayer(Sequential):
 7 |     """ResLayer to build ResNet style backbone.
 8 |     Args:
 9 |         block (nn.Module): block used to build ResLayer.
10 |         inplanes (int): inplanes of block.
11 |         planes (int): planes of block.
12 |         num_blocks (int): number of blocks.
13 |         stride (int): stride of the first block. Default: 1
14 |         avg_down (bool): Use AvgPool instead of stride conv when
15 |             downsampling in the bottleneck. Default: False
16 |         conv_cfg (dict): dictionary to construct and config conv layer.
17 |             Default: None
18 |         norm_cfg (dict): dictionary to construct and config norm layer.
19 |             Default: dict(type='BN')
20 |         downsample_first (bool): Downsample at the first block or last block.
21 |             False for Hourglass, True for ResNet. Default: True
22 |     """
23 | 
24 |     def __init__(
25 |         self,
26 |         block,
27 |         inplanes,
28 |         planes,
29 |         num_blocks,
30 |         stride=1,
31 |         avg_down=False,
32 |         conv_cfg=None,
33 |         norm_cfg=dict(type="BN"),
34 |         downsample_first=True,
35 |         **kwargs
36 |     ):
37 |         self.block = block
38 | 
39 |         downsample = None
40 |         if stride != 1 or inplanes != planes * block.expansion:
41 |             downsample = []
42 |             conv_stride = stride
43 |             if avg_down:
44 |                 conv_stride = 1
45 |                 downsample.append(
46 |                     nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False)
47 |                 )
48 |             downsample.extend(
49 |                 [
50 |                     build_conv_layer(
51 |                         conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False
52 |                     ),
53 |                     build_norm_layer(norm_cfg, planes * block.expansion)[1],
54 |                 ]
55 |             )
56 |             downsample = nn.Sequential(*downsample)
57 | 
58 |         layers = []
59 |         if downsample_first:
60 |             layers.append(
61 |                 block(
62 |                     inplanes=inplanes,
63 |                     planes=planes,
64 |                     stride=stride,
65 |                     downsample=downsample,
66 |                     conv_cfg=conv_cfg,
67 |                     norm_cfg=norm_cfg,
68 |                     **kwargs
69 |                 )
70 |             )
71 |             inplanes = planes * block.expansion
72 |             for _ in range(1, num_blocks):
73 |                 layers.append(
74 |                     block(inplanes=inplanes, planes=planes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)
75 |                 )
76 | 
77 |         else:  # downsample_first=False is for HourglassModule
78 |             for _ in range(num_blocks - 1):
79 |                 layers.append(
80 |                     block(inplanes=inplanes, planes=inplanes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)
81 |                 )
82 |             layers.append(
83 |                 block(
84 |                     inplanes=inplanes,
85 |                     planes=planes,
86 |                     stride=stride,
87 |                     downsample=downsample,
88 |                     conv_cfg=conv_cfg,
89 |                     norm_cfg=norm_cfg,
90 |                     **kwargs
91 |                 )
92 |             )
93 |         super(ResLayer, self).__init__(*layers)
94 | 


--------------------------------------------------------------------------------
/mapmaster/models/backbone/swin_transformer/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from .model import SwinTransformer as _SwinTransformer
 4 | from torch.utils import model_zoo
 5 | 
 6 | model_urls = {
 7 |     "tiny": "https://github.com/SwinTransformer/storage/releases/download/v1.0.1/upernet_swin_tiny_patch4_window7_512x512.pth",
 8 |     "base": "https://github.com/SwinTransformer/storage/releases/download/v1.0.1/upernet_swin_base_patch4_window7_512x512.pth",
 9 | }
10 | 
11 | 
12 | class SwinTransformer(_SwinTransformer):
13 |     def __init__(
14 |         self,
15 |         arch="tiny",
16 |         pretrained=False,
17 |         window_size=7,
18 |         shift_mode=1,
19 |         mlp_ratio=4.0,
20 |         qkv_bias=True,
21 |         qk_scale=None,
22 |         drop_rate=0.0,
23 |         attn_drop_rate=0.0,
24 |         drop_path_rate=0.3,
25 |         ape=False,
26 |         patch_norm=True,
27 |         out_indices=(0, 1, 2, 3),
28 |         use_checkpoint=False,
29 |         **kwargs
30 |     ):
31 |         if arch == "tiny":
32 |             embed_dim = 96
33 |             depths = (2, 2, 6, 2)
34 |             num_heads = (3, 6, 12, 24)
35 |         elif arch == "small":
36 |             embed_dim = 96
37 |             depths = (2, 2, 18, 2)
38 |             num_heads = (3, 6, 12, 24)
39 |         elif arch == "base":
40 |             embed_dim = 128
41 |             depths = (2, 2, 18, 2)
42 |             num_heads = (4, 8, 16, 32)
43 |         else:
44 |             raise NotImplementedError
45 | 
46 |         super(SwinTransformer, self).__init__(
47 |             embed_dim=embed_dim,
48 |             depths=depths,
49 |             num_heads=num_heads,
50 |             window_size=window_size,
51 |             shift_mode=shift_mode,
52 |             mlp_ratio=mlp_ratio,
53 |             qkv_bias=qkv_bias,
54 |             qk_scale=qk_scale,
55 |             drop_rate=drop_rate,
56 |             attn_drop_rate=attn_drop_rate,
57 |             drop_path_rate=drop_path_rate,
58 |             ape=ape,
59 |             patch_norm=patch_norm,
60 |             out_indices=out_indices,
61 |             use_checkpoint=use_checkpoint,
62 |             **kwargs
63 |         )
64 |         if isinstance(pretrained, bool):
65 |             assert pretrained is True
66 |             print(model_urls[arch])
67 |             state_dict = model_zoo.load_url(model_urls[arch])["state_dict"]
68 |         elif isinstance(pretrained, str):
69 |             assert os.path.exists(pretrained)
70 |             print(pretrained)
71 |             state_dict = torch.load(pretrained)["state_dict"]
72 |         else:
73 |             raise NotImplementedError
74 | 
75 |         self.arch = arch
76 |         self.init_weights(state_dict=state_dict)
77 | 
78 |     def init_weights(self, state_dict):
79 |         new_state_dict = {}
80 |         for key, value in state_dict.items():
81 |             if "backbone" in key:
82 |                 new_state_dict[key.replace("backbone.", "")] = value
83 |         ret = self.load_state_dict(new_state_dict, strict=False)
84 |         print("Backbone missing_keys: {}".format(ret.missing_keys))
85 |         print("Backbone unexpected_keys: {}".format(ret.unexpected_keys))
86 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import TransformerBEVDecoder, DeformTransformerBEVEncoder
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .deform_transformer import DeformTransformer
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import MSDeformAttn
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn_func import MSDeformAttnFunction
10 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/functions/ms_deform_attn_func.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from __future__ import absolute_import
10 | from __future__ import print_function
11 | from __future__ import division
12 | 
13 | import torch
14 | import torch.nn.functional as F
15 | from torch.autograd import Function
16 | from torch.autograd.function import once_differentiable
17 | 
18 | import MultiScaleDeformableAttention as MSDA
19 | 
20 | 
21 | class MSDeformAttnFunction(Function):
22 |     @staticmethod
23 |     def forward(
24 |         ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step
25 |     ):
26 |         ctx.im2col_step = im2col_step
27 |         output = MSDA.ms_deform_attn_forward(
28 |             value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step
29 |         )
30 |         ctx.save_for_backward(
31 |             value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights
32 |         )
33 |         return output
34 | 
35 |     @staticmethod
36 |     @once_differentiable
37 |     def backward(ctx, grad_output):
38 |         value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
39 |         grad_value, grad_sampling_loc, grad_attn_weight = MSDA.ms_deform_attn_backward(
40 |             value,
41 |             value_spatial_shapes,
42 |             value_level_start_index,
43 |             sampling_locations,
44 |             attention_weights,
45 |             grad_output,
46 |             ctx.im2col_step,
47 |         )
48 | 
49 |         return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
50 | 
51 | 
52 | def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
53 |     # for debug and test only,
54 |     # need to use cuda version instead
55 |     N_, S_, M_, D_ = value.shape
56 |     _, Lq_, M_, L_, P_, _ = sampling_locations.shape
57 |     value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
58 |     sampling_grids = 2 * sampling_locations - 1
59 |     sampling_value_list = []
60 |     for lid_, (H_, W_) in enumerate(value_spatial_shapes):
61 |         # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
62 |         value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_ * M_, D_, H_, W_)
63 |         # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
64 |         sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
65 |         # N_*M_, D_, Lq_, P_
66 |         sampling_value_l_ = F.grid_sample(
67 |             value_l_, sampling_grid_l_, mode="bilinear", padding_mode="zeros", align_corners=False
68 |         )
69 |         sampling_value_list.append(sampling_value_l_)
70 |     # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
71 |     attention_weights = attention_weights.transpose(1, 2).reshape(N_ * M_, 1, Lq_, L_ * P_)
72 |     output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_ * D_, Lq_)
73 |     return output.transpose(1, 2).contiguous()
74 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ------------------------------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------------------------------
 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | # ------------------------------------------------------------------------------------------------
 9 | 
10 | python setup.py build install
11 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | from .ms_deform_attn import MSDeformAttn
10 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/modules/ms_deform_attn.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------------------------------
  6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
  7 | # ------------------------------------------------------------------------------------------------
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import print_function
 11 | from __future__ import division
 12 | 
 13 | import warnings
 14 | import math
 15 | 
 16 | import torch
 17 | from torch import nn
 18 | import torch.nn.functional as F
 19 | from torch.nn.init import xavier_uniform_, constant_
 20 | 
 21 | from ..functions import MSDeformAttnFunction
 22 | 
 23 | 
 24 | def _is_power_of_2(n):
 25 |     if (not isinstance(n, int)) or (n < 0):
 26 |         raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
 27 |     return (n & (n - 1) == 0) and n != 0
 28 | 
 29 | 
 30 | class MSDeformAttn(nn.Module):
 31 |     def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
 32 |         """
 33 |         Multi-Scale Deformable Attention Module
 34 |         :param d_model      hidden dimension
 35 |         :param n_levels     number of feature levels
 36 |         :param n_heads      number of attention heads
 37 |         :param n_points     number of sampling points per attention head per feature level
 38 |         """
 39 |         super().__init__()
 40 |         if d_model % n_heads != 0:
 41 |             raise ValueError("d_model must be divisible by n_heads, but got {} and {}".format(d_model, n_heads))
 42 |         _d_per_head = d_model // n_heads
 43 |         # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
 44 |         if not _is_power_of_2(_d_per_head):
 45 |             warnings.warn(
 46 |                 "You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
 47 |                 "which is more efficient in our CUDA implementation."
 48 |             )
 49 | 
 50 |         self.im2col_step = 64
 51 | 
 52 |         self.d_model = d_model
 53 |         self.n_levels = n_levels
 54 |         self.n_heads = n_heads
 55 |         self.n_points = n_points
 56 | 
 57 |         self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2)
 58 |         self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
 59 |         self.value_proj = nn.Linear(d_model, d_model)
 60 |         self.output_proj = nn.Linear(d_model, d_model)
 61 | 
 62 |         self._reset_parameters()
 63 | 
 64 |     def _reset_parameters(self):
 65 |         constant_(self.sampling_offsets.weight.data, 0.0)
 66 |         thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
 67 |         grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
 68 |         grid_init = (
 69 |             (grid_init / grid_init.abs().max(-1, keepdim=True)[0])
 70 |             .view(self.n_heads, 1, 1, 2)
 71 |             .repeat(1, self.n_levels, self.n_points, 1)
 72 |         )
 73 |         for i in range(self.n_points):
 74 |             grid_init[:, :, i, :] *= i + 1
 75 |         with torch.no_grad():
 76 |             self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
 77 |         constant_(self.attention_weights.weight.data, 0.0)
 78 |         constant_(self.attention_weights.bias.data, 0.0)
 79 |         xavier_uniform_(self.value_proj.weight.data)
 80 |         constant_(self.value_proj.bias.data, 0.0)
 81 |         xavier_uniform_(self.output_proj.weight.data)
 82 |         constant_(self.output_proj.bias.data, 0.0)
 83 | 
 84 |     def forward(
 85 |         self,
 86 |         query,
 87 |         reference_points,
 88 |         input_flatten,
 89 |         input_spatial_shapes,
 90 |         input_level_start_index,
 91 |         input_padding_mask=None,
 92 |     ):
 93 |         """
 94 |         :param query                       (N, Length_{query}, C)
 95 |         :param reference_points            (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
 96 |                                         or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes
 97 |         :param input_flatten               (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C)
 98 |         :param input_spatial_shapes        (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
 99 |         :param input_level_start_index     (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}]
100 |         :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
101 | 
102 |         :return output                     (N, Length_{query}, C)
103 |         """
104 |         N, Len_q, _ = query.shape
105 |         N, Len_in, _ = input_flatten.shape
106 |         assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in
107 | 
108 |         value = self.value_proj(input_flatten)
109 |         if input_padding_mask is not None:
110 |             value = value.masked_fill(input_padding_mask[..., None], float(0))
111 |         value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
112 |         sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2)
113 |         attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
114 |         attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
115 |         # N, Len_q, n_heads, n_levels, n_points, 2
116 |         if reference_points.shape[-1] == 2:
117 |             offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1)
118 |             sampling_locations = (
119 |                 reference_points[:, :, None, :, None, :]
120 |                 + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
121 |             )
122 |         elif reference_points.shape[-1] == 4:
123 |             sampling_locations = (
124 |                 reference_points[:, :, None, :, None, :2]
125 |                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5
126 |             )
127 |         else:
128 |             raise ValueError(
129 |                 "Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1])
130 |             )
131 |         output = MSDeformAttnFunction.apply(
132 |             value,
133 |             input_spatial_shapes,
134 |             input_level_start_index,
135 |             sampling_locations,
136 |             attention_weights,
137 |             self.im2col_step,
138 |         )
139 |         output = self.output_proj(output)
140 |         return output
141 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/setup.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | import os
10 | import glob
11 | 
12 | import torch
13 | 
14 | from torch.utils.cpp_extension import CUDA_HOME
15 | from torch.utils.cpp_extension import CppExtension
16 | from torch.utils.cpp_extension import CUDAExtension
17 | 
18 | from setuptools import find_packages
19 | from setuptools import setup
20 | 
21 | requirements = ["torch", "torchvision"]
22 | 
23 | 
24 | def get_extensions():
25 |     this_dir = os.path.dirname(os.path.abspath(__file__))
26 |     extensions_dir = os.path.join(this_dir, "src")
27 | 
28 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
29 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
30 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
31 | 
32 |     sources = main_file + source_cpu
33 |     extension = CppExtension
34 |     extra_compile_args = {"cxx": []}
35 |     define_macros = []
36 | 
37 |     if torch.cuda.is_available() and CUDA_HOME is not None:
38 |         extension = CUDAExtension
39 |         sources += source_cuda
40 |         define_macros += [("WITH_CUDA", None)]
41 |         extra_compile_args["nvcc"] = [
42 |             "-DCUDA_HAS_FP16=1",
43 |             "-D__CUDA_NO_HALF_OPERATORS__",
44 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
45 |             "-D__CUDA_NO_HALF2_OPERATORS__",
46 |             "-arch=sm_60",
47 |             "-gencode=arch=compute_60,code=sm_60",
48 |             "-gencode=arch=compute_61,code=sm_61",
49 |             "-gencode=arch=compute_70,code=sm_70",
50 |             "-gencode=arch=compute_75,code=sm_75",
51 |             # "-gencode=arch=compute_80,code=sm_80",
52 |         ]
53 |     else:
54 |         raise NotImplementedError("Cuda is not availabel")
55 | 
56 |     sources = [os.path.join(extensions_dir, s) for s in sources]
57 |     include_dirs = [extensions_dir]
58 |     ext_modules = [
59 |         extension(
60 |             "MultiScaleDeformableAttention",
61 |             sources,
62 |             include_dirs=include_dirs,
63 |             define_macros=define_macros,
64 |             extra_compile_args=extra_compile_args,
65 |         )
66 |     ]
67 |     return ext_modules
68 | 
69 | 
70 | setup(
71 |     name="MultiScaleDeformableAttention",
72 |     version="1.0",
73 |     author="Weijie Su",
74 |     url="https://github.com/fundamentalvision/Deformable-DETR",
75 |     description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention",
76 |     packages=find_packages(
77 |         exclude=(
78 |             "configs",
79 |             "tests",
80 |         )
81 |     ),
82 |     ext_modules=get_extensions(),
83 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
84 | )
85 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cpu/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include <vector>
12 | 
13 | #include <ATen/ATen.h>
14 | #include <ATen/cuda/CUDAContext.h>
15 | 
16 | 
17 | at::Tensor
18 | ms_deform_attn_cpu_forward(
19 |     const at::Tensor &value,
20 |     const at::Tensor &spatial_shapes,
21 |     const at::Tensor &level_start_index,
22 |     const at::Tensor &sampling_loc,
23 |     const at::Tensor &attn_weight,
24 |     const int im2col_step)
25 | {
26 |     AT_ERROR("Not implement on cpu");
27 | }
28 | 
29 | std::vector<at::Tensor>
30 | ms_deform_attn_cpu_backward(
31 |     const at::Tensor &value,
32 |     const at::Tensor &spatial_shapes,
33 |     const at::Tensor &level_start_index,
34 |     const at::Tensor &sampling_loc,
35 |     const at::Tensor &attn_weight,
36 |     const at::Tensor &grad_output,
37 |     const int im2col_step)
38 | {
39 |     AT_ERROR("Not implement on cpu");
40 | }
41 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cpu/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | at::Tensor
15 | ms_deform_attn_cpu_forward(
16 |     const at::Tensor &value,
17 |     const at::Tensor &spatial_shapes,
18 |     const at::Tensor &level_start_index,
19 |     const at::Tensor &sampling_loc,
20 |     const at::Tensor &attn_weight,
21 |     const int im2col_step);
22 | 
23 | std::vector<at::Tensor>
24 | ms_deform_attn_cpu_backward(
25 |     const at::Tensor &value,
26 |     const at::Tensor &spatial_shapes,
27 |     const at::Tensor &level_start_index,
28 |     const at::Tensor &sampling_loc,
29 |     const at::Tensor &attn_weight,
30 |     const at::Tensor &grad_output,
31 |     const int im2col_step);
32 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cuda/ms_deform_attn_cuda.cu:
--------------------------------------------------------------------------------
  1 | /*!
  2 | **************************************************************************************************
  3 | * Deformable DETR
  4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
  5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  6 | **************************************************************************************************
  7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
  8 | **************************************************************************************************
  9 | */
 10 | 
 11 | #include <vector>
 12 | #include "cuda/ms_deform_im2col_cuda.cuh"
 13 | 
 14 | #include <ATen/ATen.h>
 15 | #include <ATen/cuda/CUDAContext.h>
 16 | #include <cuda.h>
 17 | #include <cuda_runtime.h>
 18 | 
 19 | 
 20 | at::Tensor ms_deform_attn_cuda_forward(
 21 |     const at::Tensor &value,
 22 |     const at::Tensor &spatial_shapes,
 23 |     const at::Tensor &level_start_index,
 24 |     const at::Tensor &sampling_loc,
 25 |     const at::Tensor &attn_weight,
 26 |     const int im2col_step)
 27 | {
 28 |     AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
 29 |     AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
 30 |     AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
 31 |     AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
 32 |     AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
 33 | 
 34 |     AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
 35 |     AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
 36 |     AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
 37 |     AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
 38 |     AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
 39 | 
 40 |     const int batch = value.size(0);
 41 |     const int spatial_size = value.size(1);
 42 |     const int num_heads = value.size(2);
 43 |     const int channels = value.size(3);
 44 | 
 45 |     const int num_levels = spatial_shapes.size(0);
 46 | 
 47 |     const int num_query = sampling_loc.size(1);
 48 |     const int num_point = sampling_loc.size(4);
 49 | 
 50 |     const int im2col_step_ = std::min(batch, im2col_step);
 51 | 
 52 |     AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
 53 | 
 54 |     auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
 55 | 
 56 |     const int batch_n = im2col_step_;
 57 |     auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
 58 |     auto per_value_size = spatial_size * num_heads * channels;
 59 |     auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
 60 |     auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
 61 |     for (int n = 0; n < batch/im2col_step_; ++n)
 62 |     {
 63 |         auto columns = output_n.select(0, n);
 64 |         AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
 65 |             ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
 66 |                 value.data<scalar_t>() + n * im2col_step_ * per_value_size,
 67 |                 spatial_shapes.data<int64_t>(),
 68 |                 level_start_index.data<int64_t>(),
 69 |                 sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
 70 |                 attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
 71 |                 batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
 72 |                 columns.data<scalar_t>());
 73 | 
 74 |         }));
 75 |     }
 76 | 
 77 |     output = output.view({batch, num_query, num_heads*channels});
 78 | 
 79 |     return output;
 80 | }
 81 | 
 82 | 
 83 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
 84 |     const at::Tensor &value,
 85 |     const at::Tensor &spatial_shapes,
 86 |     const at::Tensor &level_start_index,
 87 |     const at::Tensor &sampling_loc,
 88 |     const at::Tensor &attn_weight,
 89 |     const at::Tensor &grad_output,
 90 |     const int im2col_step)
 91 | {
 92 | 
 93 |     AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
 94 |     AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
 95 |     AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
 96 |     AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
 97 |     AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
 98 |     AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
 99 | 
100 |     AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
101 |     AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
102 |     AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
103 |     AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
104 |     AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
105 |     AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
106 | 
107 |     const int batch = value.size(0);
108 |     const int spatial_size = value.size(1);
109 |     const int num_heads = value.size(2);
110 |     const int channels = value.size(3);
111 | 
112 |     const int num_levels = spatial_shapes.size(0);
113 | 
114 |     const int num_query = sampling_loc.size(1);
115 |     const int num_point = sampling_loc.size(4);
116 | 
117 |     const int im2col_step_ = std::min(batch, im2col_step);
118 | 
119 |     AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
120 | 
121 |     auto grad_value = at::zeros_like(value);
122 |     auto grad_sampling_loc = at::zeros_like(sampling_loc);
123 |     auto grad_attn_weight = at::zeros_like(attn_weight);
124 | 
125 |     const int batch_n = im2col_step_;
126 |     auto per_value_size = spatial_size * num_heads * channels;
127 |     auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
128 |     auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
129 |     auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
130 | 
131 |     for (int n = 0; n < batch/im2col_step_; ++n)
132 |     {
133 |         auto grad_output_g = grad_output_n.select(0, n);
134 |         AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
135 |             ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
136 |                                     grad_output_g.data<scalar_t>(),
137 |                                     value.data<scalar_t>() + n * im2col_step_ * per_value_size,
138 |                                     spatial_shapes.data<int64_t>(),
139 |                                     level_start_index.data<int64_t>(),
140 |                                     sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
141 |                                     attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
142 |                                     batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
143 |                                     grad_value.data<scalar_t>() +  n * im2col_step_ * per_value_size,
144 |                                     grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
145 |                                     grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);
146 | 
147 |         }));
148 |     }
149 | 
150 |     return {
151 |         grad_value, grad_sampling_loc, grad_attn_weight
152 |     };
153 | }
154 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/cuda/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | at::Tensor ms_deform_attn_cuda_forward(
15 |     const at::Tensor &value,
16 |     const at::Tensor &spatial_shapes,
17 |     const at::Tensor &level_start_index,
18 |     const at::Tensor &sampling_loc,
19 |     const at::Tensor &attn_weight,
20 |     const int im2col_step);
21 | 
22 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
23 |     const at::Tensor &value,
24 |     const at::Tensor &spatial_shapes,
25 |     const at::Tensor &level_start_index,
26 |     const at::Tensor &sampling_loc,
27 |     const at::Tensor &attn_weight,
28 |     const at::Tensor &grad_output,
29 |     const int im2col_step);
30 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/ms_deform_attn.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | 
13 | #include "cpu/ms_deform_attn_cpu.h"
14 | 
15 | #ifdef WITH_CUDA
16 | #include "cuda/ms_deform_attn_cuda.h"
17 | #endif
18 | 
19 | 
20 | at::Tensor
21 | ms_deform_attn_forward(
22 |     const at::Tensor &value,
23 |     const at::Tensor &spatial_shapes,
24 |     const at::Tensor &level_start_index,
25 |     const at::Tensor &sampling_loc,
26 |     const at::Tensor &attn_weight,
27 |     const int im2col_step)
28 | {
29 |     if (value.type().is_cuda())
30 |     {
31 | #ifdef WITH_CUDA
32 |         return ms_deform_attn_cuda_forward(
33 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
34 | #else
35 |         AT_ERROR("Not compiled with GPU support");
36 | #endif
37 |     }
38 |     AT_ERROR("Not implemented on the CPU");
39 | }
40 | 
41 | std::vector<at::Tensor>
42 | ms_deform_attn_backward(
43 |     const at::Tensor &value,
44 |     const at::Tensor &spatial_shapes,
45 |     const at::Tensor &level_start_index,
46 |     const at::Tensor &sampling_loc,
47 |     const at::Tensor &attn_weight,
48 |     const at::Tensor &grad_output,
49 |     const int im2col_step)
50 | {
51 |     if (value.type().is_cuda())
52 |     {
53 | #ifdef WITH_CUDA
54 |         return ms_deform_attn_cuda_backward(
55 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
56 | #else
57 |         AT_ERROR("Not compiled with GPU support");
58 | #endif
59 |     }
60 |     AT_ERROR("Not implemented on the CPU");
61 | }
62 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include "ms_deform_attn.h"
12 | 
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
15 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
16 | }
17 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/ops/test.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------------------------
  2 | # Deformable DETR
  3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
  4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  5 | # ------------------------------------------------------------------------------------------------
  6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
  7 | # ------------------------------------------------------------------------------------------------
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import print_function
 11 | from __future__ import division
 12 | 
 13 | import torch
 14 | from torch.autograd import gradcheck
 15 | 
 16 | from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
 17 | 
 18 | 
 19 | N, M, D = 1, 2, 2
 20 | Lq, L, P = 2, 2, 2
 21 | shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
 22 | level_start_index = torch.cat((shapes.new_zeros((1,)), shapes.prod(1).cumsum(0)[:-1]))
 23 | S = sum([(H * W).item() for H, W in shapes])
 24 | 
 25 | 
 26 | torch.manual_seed(3)
 27 | 
 28 | 
 29 | @torch.no_grad()
 30 | def check_forward_equal_with_pytorch_double():
 31 |     value = torch.rand(N, S, M, D).cuda() * 0.01
 32 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
 33 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
 34 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
 35 |     im2col_step = 2
 36 |     output_pytorch = (
 37 |         ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double())
 38 |         .detach()
 39 |         .cpu()
 40 |     )
 41 |     output_cuda = (
 42 |         MSDeformAttnFunction.apply(
 43 |             value.double(),
 44 |             shapes,
 45 |             level_start_index,
 46 |             sampling_locations.double(),
 47 |             attention_weights.double(),
 48 |             im2col_step,
 49 |         )
 50 |         .detach()
 51 |         .cpu()
 52 |     )
 53 |     fwdok = torch.allclose(output_cuda, output_pytorch)
 54 |     max_abs_err = (output_cuda - output_pytorch).abs().max()
 55 |     max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
 56 | 
 57 |     print(
 58 |         f"* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
 59 |     )
 60 | 
 61 | 
 62 | @torch.no_grad()
 63 | def check_forward_equal_with_pytorch_float():
 64 |     value = torch.rand(N, S, M, D).cuda() * 0.01
 65 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
 66 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
 67 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
 68 |     im2col_step = 2
 69 |     output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
 70 |     output_cuda = (
 71 |         MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step)
 72 |         .detach()
 73 |         .cpu()
 74 |     )
 75 |     fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
 76 |     max_abs_err = (output_cuda - output_pytorch).abs().max()
 77 |     max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
 78 | 
 79 |     print(
 80 |         f"* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
 81 |     )
 82 | 
 83 | 
 84 | def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
 85 | 
 86 |     value = torch.rand(N, S, M, channels).cuda() * 0.01
 87 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
 88 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
 89 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
 90 |     im2col_step = 2
 91 |     func = MSDeformAttnFunction.apply
 92 | 
 93 |     value.requires_grad = grad_value
 94 |     sampling_locations.requires_grad = grad_sampling_loc
 95 |     attention_weights.requires_grad = grad_attn_weight
 96 | 
 97 |     gradok = gradcheck(
 98 |         func,
 99 |         (
100 |             value.double(),
101 |             shapes,
102 |             level_start_index,
103 |             sampling_locations.double(),
104 |             attention_weights.double(),
105 |             im2col_step,
106 |         ),
107 |     )
108 | 
109 |     print(f"* {gradok} check_gradient_numerical(D={channels})")
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     check_forward_equal_with_pytorch_double()
114 |     check_forward_equal_with_pytorch_float()
115 | 
116 |     for channels in [30, 32, 64, 71, 1025, 2048, 3096]:
117 |         check_gradient_numerical(channels, True, True, True)
118 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/deform_transformer/position_encoding.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Various positional encodings for the transformer.
 3 | """
 4 | import math
 5 | import torch
 6 | from torch import nn
 7 | 
 8 | class PositionEmbeddingSine(nn.Module):
 9 |     """
10 |     This is a more standard version of the position embedding, very similar to the one
11 |     used by the Attention is all you need paper, generalized to work on images.
12 |     """
13 | 
14 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=True, scale=None):
15 |         super().__init__()
16 |         self.num_pos_feats = num_pos_feats
17 |         self.temperature = temperature
18 |         self.normalize = normalize
19 |         if scale is not None and normalize is False:
20 |             raise ValueError("normalize should be True if scale is passed")
21 |         if scale is None:
22 |             scale = 2 * math.pi
23 |         self.scale = scale
24 | 
25 |     def forward(self, mask):
26 |         assert mask is not None
27 |         not_mask = ~mask
28 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
29 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
30 |         if self.normalize:
31 |             eps = 1e-6
32 |             y_embed = (y_embed - 0.5) / (y_embed[:, -1:, :] + eps) * self.scale
33 |             x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale
34 | 
35 |         dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=mask.device)
36 |         dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2))
37 | 
38 |         pos_x = x_embed[:, :, :, None] / dim_t
39 |         pos_y = y_embed[:, :, :, None] / dim_t
40 |         pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
41 |         pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
42 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
43 |         return pos
44 | 
45 | class PositionEmbeddingLearned(nn.Module):
46 |     """
47 |     Absolute pos embedding, learned.
48 |     """
49 | 
50 |     def __init__(self, num_pos=(50, 50), num_pos_feats=256):
51 |         super().__init__()
52 |         self.num_pos = num_pos
53 |         self.pos_embed = nn.Embedding(num_pos[0] * num_pos[1], num_pos_feats)
54 |         self.reset_parameters()
55 | 
56 |     def reset_parameters(self):
57 |         nn.init.normal_(self.pos_embed.weight)
58 | 
59 |     def forward(self, mask):
60 |         h, w = mask.shape[-2:]
61 |         pos = self.pos_embed.weight.view(*self.num_pos, -1)[:h, :w]
62 |         pos = pos.permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1)
63 |         return pos
64 | 


--------------------------------------------------------------------------------
/mapmaster/models/bev_decoder/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | from mapmaster.models.bev_decoder.transformer import Transformer
 5 | from mapmaster.models.bev_decoder.deform_transformer import DeformTransformer
 6 | 
 7 | class TransformerBEVDecoder(nn.Module):
 8 |     def __init__(self, key='im_bkb_features', **kwargs):
 9 |         super(TransformerBEVDecoder, self).__init__()
10 |         self.bev_encoder = Transformer(**kwargs)
11 |         self.key = key
12 | 
13 |     def forward(self, inputs):
14 |         assert self.key in inputs
15 |         feats = inputs[self.key]
16 |         fuse_feats = feats[-1]
17 |         fuse_feats = fuse_feats.reshape(*inputs['images'].shape[:2], *fuse_feats.shape[-3:])
18 |         fuse_feats = torch.cat(torch.unbind(fuse_feats, dim=1), dim=-1)
19 | 
20 |         cameras_info = {
21 |             'extrinsic': inputs.get('extrinsic', None),
22 |             'intrinsic': inputs.get('intrinsic', None),
23 |             'ida_mats': inputs.get('ida_mats', None),
24 |             'do_flip': inputs['extra_infos'].get('do_flip', None)
25 |         }
26 | 
27 |         _, _, bev_feats = self.bev_encoder(fuse_feats, cameras_info=cameras_info)
28 | 
29 |         return {"bev_enc_features": list(bev_feats)}
30 | 
31 | class DeformTransformerBEVEncoder(nn.Module):
32 |     def __init__(self, **kwargs):
33 |         super(DeformTransformerBEVEncoder, self).__init__()
34 |         self.bev_encoder = DeformTransformer(**kwargs)
35 | 
36 |     def forward(self, inputs):
37 |         assert "im_bkb_features" in inputs
38 |         feats = inputs["im_bkb_features"]
39 |         for i in range(len(feats)):
40 |             feats[i] = feats[i].reshape(*inputs["images"].shape[:2], *feats[i].shape[-3:])
41 |             feats[i] = feats[i].permute(0, 2, 3, 1, 4)
42 |             feats[i] = feats[i].reshape(*feats[i].shape[:3], -1)
43 |         cameras_info = {
44 |             'extrinsic': inputs.get('extrinsic', None),
45 |             'intrinsic': inputs.get('intrinsic', None),
46 |             'do_flip': inputs['extra_infos'].get('do_flip', None)
47 |         }
48 |         # src_feats: (N, H1 * W1, C)  tgt_feats: # (M, N, H2 * W2, C)
49 |         _, _, bev_feats = self.bev_encoder(feats, cameras_info=cameras_info)
50 |         
51 |         return {
52 |             "bev_enc_features": list(bev_feats),
53 |         }
54 | 


--------------------------------------------------------------------------------
/mapmaster/models/ins_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import Mask2formerINSDecoder, PointMask2formerINSDecoder
2 | 


--------------------------------------------------------------------------------
/mapmaster/models/ins_decoder/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | from mapmaster.models.ins_decoder.mask2former import MultiScaleMaskedTransformerDecoder
 4 | from mapmaster.models.ins_decoder.pointmask2former import PointMask2TransformerDecoder
 5 | 
 6 | 
 7 | class INSDecoderBase(nn.Module):
 8 |     def __init__(self, decoder_ids=(5, ), tgt_shape=None):
 9 |         super(INSDecoderBase, self).__init__()
10 |         self.decoder_ids = tuple(decoder_ids)  # [0, 1, 2, 3, 4, 5]
11 |         self.tgt_shape = tgt_shape
12 |         self.bev_decoder = None
13 | 
14 |     def forward(self, inputs):
15 |         assert "bev_enc_features" in inputs
16 |         bev_enc_features = inputs["bev_enc_features"]
17 |         if self.tgt_shape is not None:
18 |             bev_enc_features = [self.up_sample(x) for x in inputs["bev_enc_features"]]
19 |         out = self.bev_decoder(bev_enc_features[-1:], bev_enc_features[-1])
20 |         return {"mask_features": [out["pred_masks"][1:][i] for i in self.decoder_ids],
21 |                 "obj_scores": [out["pred_logits"][1:][i] for i in self.decoder_ids],
22 |                 "decoder_outputs": [out["decoder_outputs"][1:][i] for i in self.decoder_ids],
23 |                 "bev_enc_features": bev_enc_features}
24 | 
25 |     def up_sample(self, x, tgt_shape=None):
26 |         tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
27 |         if tuple(x.shape[-2:]) == tuple(tgt_shape):
28 |             return x
29 |         return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
30 |     
31 | class Mask2formerINSDecoder(INSDecoderBase):
32 |     def __init__(self, decoder_ids=(5, ), tgt_shape=None, **kwargs):
33 |         super(Mask2formerINSDecoder, self).__init__(decoder_ids, tgt_shape)
34 |         self.bev_decoder = MultiScaleMaskedTransformerDecoder(**kwargs)
35 | 
36 | class PointMask2formerINSDecoder(INSDecoderBase):
37 |     def __init__(self, decoder_ids=(5, ), tgt_shape=None, **kwargs):
38 |         super(PointMask2formerINSDecoder, self).__init__(decoder_ids, tgt_shape)
39 |         self.bev_decoder = PointMask2TransformerDecoder(**kwargs)
40 | 


--------------------------------------------------------------------------------
/mapmaster/models/network.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mapmaster.models import backbone, bev_decoder, ins_decoder, output_head
 3 | # os.environ['TORCH_DISTRIBUTED_DEBUG'] = "INFO"
 4 | # warnings.filterwarnings('ignore')
 5 | 
 6 | 
 7 | class MapMaster(nn.Module):
 8 |     def __init__(self, model_config, *args, **kwargs):
 9 |         super(MapMaster, self).__init__()
10 |         self.im_backbone = self.create_backbone(**model_config["im_backbone"])
11 |         self.bev_decoder = self.create_bev_decoder(**model_config["bev_decoder"])
12 |         self.ins_decoder = self.create_ins_decoder(**model_config["ins_decoder"])
13 |         self.output_head = self.create_output_head(**model_config["output_head"])
14 |         self.post_processor = self.create_post_processor(**model_config["post_processor"])
15 | 
16 |     def forward(self, inputs):
17 |         outputs = {}
18 |         outputs.update({k: inputs[k] for k in ["images", "extra_infos"]})
19 |         outputs.update({k: inputs[k].float() for k in ["extrinsic", "intrinsic"]})
20 |         if "ida_mats" in inputs:
21 |             outputs.update({"ida_mats": inputs["ida_mats"].float()})
22 |         outputs.update(self.im_backbone(outputs))
23 |         outputs.update(self.bev_decoder(outputs))
24 |         outputs.update(self.ins_decoder(outputs))
25 |         outputs.update(self.output_head(outputs))
26 |         return outputs
27 | 
28 |     @staticmethod
29 |     def create_backbone(arch_name, ret_layers, bkb_kwargs, fpn_kwargs, up_shape=None):
30 |         __factory_dict__ = {
31 |             "resnet": backbone.ResNetBackbone,
32 |             "efficient_net": backbone.EfficientNetBackbone,
33 |             "swin_transformer": backbone.SwinTRBackbone,
34 |         }
35 |         return __factory_dict__[arch_name](bkb_kwargs, fpn_kwargs, up_shape, ret_layers)
36 | 
37 |     @staticmethod
38 |     def create_bev_decoder(arch_name, net_kwargs):
39 |         __factory_dict__ = {
40 |             "transformer": bev_decoder.TransformerBEVDecoder,
41 |             "ipm_deformable_transformer": bev_decoder.DeformTransformerBEVEncoder,
42 |         }
43 |         return __factory_dict__[arch_name](**net_kwargs)
44 | 
45 |     @staticmethod
46 |     def create_ins_decoder(arch_name, net_kwargs):
47 |         __factory_dict__ = {
48 |             "mask2former": ins_decoder.Mask2formerINSDecoder,
49 |             "line_aware_decoder": ins_decoder.PointMask2formerINSDecoder,
50 |         }
51 | 
52 |         return __factory_dict__[arch_name](**net_kwargs)
53 | 
54 |     @staticmethod
55 |     def create_output_head(arch_name, net_kwargs):
56 |         __factory_dict__ = {
57 |             "bezier_output_head": output_head.PiecewiseBezierMapOutputHead,
58 |             "pivot_point_predictor": output_head.PivotMapOutputHead,
59 |         }
60 |         return __factory_dict__[arch_name](**net_kwargs)
61 | 
62 |     @staticmethod
63 |     def create_post_processor(arch_name, net_kwargs):
64 |         __factory_dict__ = {
65 |             "bezier_post_processor": output_head.PiecewiseBezierMapPostProcessor,
66 |             "pivot_post_processor": output_head.PivotMapPostProcessor,
67 |         }
68 |         return __factory_dict__[arch_name](**net_kwargs)
69 | 


--------------------------------------------------------------------------------
/mapmaster/models/output_head/__init__.py:
--------------------------------------------------------------------------------
1 | from .bezier_outputs import PiecewiseBezierMapOutputHead
2 | from .bezier_post_processor import PiecewiseBezierMapPostProcessor
3 | from .pivot_outputs import PivotMapOutputHead
4 | from .pivot_post_processor import PivotMapPostProcessor


--------------------------------------------------------------------------------
/mapmaster/models/output_head/bezier_outputs.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class FFN(nn.Module):
  7 |     """ Very simple multi-layer perceptron (also called FFN)"""
  8 | 
  9 |     def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, basic_type='linear'):
 10 |         super().__init__()
 11 |         self.basic_type = basic_type
 12 |         if output_dim == 0:
 13 |             self.basic_type = "identity"
 14 |         self.num_layers = num_layers
 15 |         h = [hidden_dim] * (num_layers - 1)
 16 |         self.layers = nn.ModuleList(self.basic_layer(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
 17 | 
 18 |     def forward(self, x):
 19 |         for i, layer in enumerate(self.layers):
 20 |             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
 21 |         return x
 22 | 
 23 |     def basic_layer(self, n, k):
 24 |         if self.basic_type == 'linear':
 25 |             return nn.Linear(n, k)
 26 |         elif self.basic_type == 'conv':
 27 |             return nn.Conv2d(n, k, kernel_size=1, stride=1)
 28 |         elif self.basic_type == 'identity':
 29 |             return nn.Identity()
 30 |         else:
 31 |             raise NotImplementedError
 32 | 
 33 | 
 34 | class PiecewiseBezierMapOutputHead(nn.Module):
 35 |     def __init__(self, in_channel, num_queries, tgt_shape, num_degree, max_pieces, bev_channels=-1, ins_channel=64):
 36 |         super(PiecewiseBezierMapOutputHead, self).__init__()
 37 |         self.num_queries = num_queries
 38 |         self.num_classes = len(num_queries)
 39 |         self.tgt_shape = tgt_shape
 40 |         self.bev_channels = bev_channels
 41 |         self.semantic_heads = None
 42 |         if self.bev_channels > 0:
 43 |             self.semantic_heads = nn.ModuleList(
 44 |                 nn.Sequential(nn.Conv2d(bev_channels, 2, kernel_size=1, stride=1)) for _ in range(self.num_classes)
 45 |             )
 46 |         self.num_degree = num_degree
 47 |         self.max_pieces = max_pieces
 48 |         self.num_ctr_im = [(n + 1) for n in self.max_pieces]
 49 |         self.num_ctr_ex = [n * (d - 1) for n, d in zip(self.max_pieces, self.num_degree)]
 50 |         _N = self.num_classes
 51 | 
 52 |         _C = ins_channel
 53 |         self.im_ctr_heads = nn.ModuleList(FFN(in_channel, 256, (self.num_ctr_im[i] * 2) * _C, 3) for i in range(_N))
 54 |         self.ex_ctr_heads = nn.ModuleList(FFN(in_channel, 256, (self.num_ctr_ex[i] * 2) * _C, 3) for i in range(_N))
 55 |         self.npiece_heads = nn.ModuleList(FFN(in_channel, 256, self.max_pieces[i], 3) for i in range(_N))
 56 |         self.gap_layer = nn.AdaptiveAvgPool2d((1, 1))
 57 |         self.coords = self.compute_locations(device='cuda')
 58 |         self.coords_head = FFN(2, 256, _C, 3, 'conv')
 59 | 
 60 |     def forward(self, inputs):
 61 |         num_decoders = len(inputs["mask_features"])
 62 |         dt_obj_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 63 |         dt_ins_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 64 |         im_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 65 |         ex_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 66 |         dt_end_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 67 |         coords_feats = self.coords_head.forward(self.coords.repeat((inputs["mask_features"][0].shape[0], 1, 1, 1)))
 68 |         for i in range(num_decoders):
 69 |             x_ins_cw = inputs["mask_features"][i].split(self.num_queries, dim=1)
 70 |             x_obj_cw = inputs["obj_scores"][i].split(self.num_queries, dim=1)
 71 |             x_qry_cw = inputs["decoder_outputs"][i].split(self.num_queries, dim=1)
 72 |             batch_size = x_qry_cw[0].shape[0]
 73 |             for j in range(self.num_classes):
 74 |                 num_qry = self.num_queries[j]
 75 |                 # if self.training:
 76 |                 dt_ins_masks[i][j] = self.up_sample(x_ins_cw[j])
 77 |                 dt_obj_logit[i][j] = x_obj_cw[j]              
 78 |                 dt_end_logit[i][j] = self.npiece_heads[j](x_qry_cw[j])
 79 |                 # im
 80 |                 im_feats = self.im_ctr_heads[j](x_qry_cw[j])
 81 |                 im_feats = im_feats.reshape(batch_size, num_qry, self.num_ctr_im[j] * 2, -1).flatten(1, 2)
 82 |                 im_coords_map = torch.einsum("bqc,bchw->bqhw", im_feats, coords_feats)
 83 |                 im_coords = self.gap_layer(im_coords_map)
 84 |                 im_ctr_coord[i][j] = im_coords.reshape(batch_size, num_qry, self.max_pieces[j] + 1, 2)
 85 |                 # ex
 86 |                 if self.num_ctr_ex[j] == 0:
 87 |                     ex_ctr_coord[i][j] = torch.zeros(batch_size, num_qry, self.max_pieces[j], 0, 2).cuda()
 88 |                 else:
 89 |                     ex_feats = self.ex_ctr_heads[j](x_qry_cw[j])
 90 |                     ex_feats = ex_feats.reshape(batch_size, num_qry, self.num_ctr_ex[j] * 2, -1).flatten(1, 2)
 91 |                     ex_coords_map = torch.einsum("bqc,bchw->bqhw", ex_feats, coords_feats)
 92 |                     ex_coords = self.gap_layer(ex_coords_map)
 93 |                     ex_ctr_coord[i][j] = ex_coords.reshape(batch_size, num_qry, self.max_pieces[j], self.num_degree[j] - 1, 2)
 94 |         ret = {"outputs": {"obj_logits": dt_obj_logit, "ins_masks": dt_ins_masks,
 95 |                            "ctr_im": im_ctr_coord, "ctr_ex": ex_ctr_coord, "end_logits": dt_end_logit}}
 96 |         if self.semantic_heads is not None:
 97 |             num_decoders = len(inputs["bev_enc_features"])
 98 |             dt_sem_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 99 |             for i in range(num_decoders):
100 |                 x_sem = inputs["bev_enc_features"][i]
101 |                 for j in range(self.num_classes):
102 |                     dt_sem_masks[i][j] = self.up_sample(self.semantic_heads[j](x_sem))
103 |             ret["outputs"].update({"sem_masks": dt_sem_masks})
104 |         return ret
105 | 
106 |     def up_sample(self, x, tgt_shape=None):
107 |         tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
108 |         if tuple(x.shape[-2:]) == tuple(tgt_shape):
109 |             return x
110 |         return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
111 | 
112 |     def compute_locations(self, stride=1, device='cpu'):
113 | 
114 |         fh, fw = self.tgt_shape
115 | 
116 |         shifts_x = torch.arange(0, fw * stride, step=stride, dtype=torch.float32, device=device)
117 |         shifts_y = torch.arange(0, fh * stride, step=stride, dtype=torch.float32, device=device)
118 |         shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
119 |         shift_x = shift_x.reshape(-1)
120 |         shift_y = shift_y.reshape(-1)
121 |         locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
122 | 
123 |         locations = locations.unsqueeze(0).permute(0, 2, 1).contiguous().float().view(1, 2, fh, fw)
124 |         locations[:, 0, :, :] /= fw
125 |         locations[:, 1, :, :] /= fh
126 | 
127 |         return locations
128 | 


--------------------------------------------------------------------------------
/mapmaster/models/output_head/line_matching.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def seq_matching_dist_parallel(cost, gt_lens, coe_endpts=0):
 4 |     # Time complexity: O(m*n)
 5 |     bs, m, n = cost.shape
 6 |     assert m <= n
 7 |     min_cost = np.ones((bs, m, n)) * np.inf  
 8 |     mem_sort_value = np.ones((bs, m, n)) * np.inf  # v[i][j] = np.min(min_cost[i][:j+1])
 9 | 
10 |     # initialization
11 |     for j in range(0, n):
12 |         if j == 0:
13 |             min_cost[:, 0, j] = cost[:, 0, j] 
14 |         mem_sort_value[:, 0, j] = min_cost[:, 0, 0]
15 |         
16 |     for i in range(1, m):
17 |         for j in range(i, n):
18 |             min_cost[:, i, j] = mem_sort_value[:, i-1, j-1] + cost[:, i, j]
19 |             indexes = (min_cost[:, i, j] < mem_sort_value[:, i, j-1])
20 |             indexes_inv = np.array(1-indexes, dtype=np.bool)
21 |             mem_sort_value[indexes, i, j] = min_cost[indexes, i, j]
22 |             mem_sort_value[indexes_inv, i, j] = mem_sort_value[indexes_inv, i, j-1]
23 | 
24 |     indexes = []
25 |     for i, ll in enumerate(gt_lens):
26 |         indexes.append([i, ll-1, n-1])
27 |     indexes = np.array(indexes)
28 |     xs, ys, zs = indexes[:, 0], indexes[:, 1], indexes[:, 2]
29 |     res_cost = min_cost[xs, ys, zs] + (cost[xs, 0, 0] + cost[xs, ys, zs]) * coe_endpts
30 |     return  res_cost / (indexes[:, 1]+1+coe_endpts*2)
31 | 
32 | def pivot_dynamic_matching(cost: np.array):
33 |     # Time complexity: O(m*n)
34 |     m, n = cost.shape
35 |     assert m <= n
36 | 
37 |     min_cost = np.ones((m, n)) * np.inf  
38 |     mem_sort_value = np.ones((m, n)) * np.inf  
39 |     match_res1 = [[] for _ in range(n)]   
40 |     match_res2 = [[] for _ in range(n)]   
41 | 
42 |     # initialization
43 |     for j in range(0, n-m+1):
44 |         match_res1[j] = [0]
45 |         mem_sort_value[0][j] = cost[0][0]
46 |         if j == 0:
47 |             min_cost[0][j] = cost[0][0]
48 |             
49 |     for i in range(1, m):
50 |         for j in range(i, n-m + i+1):
51 |             min_cost[i][j] = mem_sort_value[i-1][j-1] + cost[i][j]
52 |             if min_cost[i][j] < mem_sort_value[i][j-1]: 
53 |                 mem_sort_value[i][j] = min_cost[i][j]
54 |                 if i < m-1: 
55 |                     match_res2[j] = match_res1[j-1] + [j]  
56 |             else:
57 |                 mem_sort_value[i][j] = mem_sort_value[i][j-1]
58 |                 if i < m -1:
59 |                     match_res2[j] = match_res2[j-1]
60 |         if i < m-1:
61 |             match_res1, match_res2 = match_res2.copy(), [[] for _ in range(n)] 
62 | 
63 |     total_cost =  min_cost[-1][-1]
64 |     final_match_res = match_res1[-2] + [n-1]
65 |     return total_cost, final_match_res


--------------------------------------------------------------------------------
/mapmaster/models/output_head/pivot_outputs.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class FFN(nn.Module):
  7 |     """ Very simple multi-layer perceptron (also called FFN)"""
  8 | 
  9 |     def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, basic_type='linear'):
 10 |         super().__init__()
 11 |         self.basic_type = basic_type
 12 |         self.num_layers = num_layers
 13 |         h = [hidden_dim] * (num_layers - 1)
 14 |         self.layers = nn.ModuleList(self.basic_layer(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
 15 | 
 16 |     def forward(self, x):
 17 |         for i, layer in enumerate(self.layers):
 18 |             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
 19 |         return x
 20 | 
 21 |     def basic_layer(self, n, k):
 22 |         if self.basic_type == 'linear':
 23 |             return nn.Linear(n, k)
 24 |         elif self.basic_type == 'conv':
 25 |             return nn.Conv2d(n, k, kernel_size=1, stride=1)
 26 |         else:
 27 |             raise NotImplementedError
 28 | 
 29 | class PivotMapOutputHead(nn.Module):
 30 |     def __init__(self, in_channel, num_queries, tgt_shape, max_pieces, bev_channels=-1, ins_channel=64):
 31 |         super(PivotMapOutputHead, self).__init__()
 32 |         self.num_queries = num_queries
 33 |         self.num_classes = len(num_queries)
 34 |         self.tgt_shape = tgt_shape
 35 |         self.bev_channels = bev_channels  
 36 |         self.semantic_heads = None
 37 |         if self.bev_channels > 0:
 38 |             self.semantic_heads = nn.ModuleList(
 39 |                 nn.Sequential(nn.Conv2d(bev_channels, 2, kernel_size=1, stride=1)) for _ in range(self.num_classes)
 40 |             )
 41 | 
 42 |         self.max_pieces = max_pieces  # [10, 2, 30]
 43 |         self.pts_split = [num_queries[i]*max_pieces[i] for i in range(len(num_queries))]
 44 |         _N = self.num_classes
 45 |         _C = ins_channel  
 46 |         self.im_ctr_heads = nn.ModuleList(FFN(in_channel, 256, 2 * _C, 3) for _ in range(_N))
 47 |         self.pts_cls_heads = nn.ModuleList(FFN((_C)*2, _C*2, 2, 3) for i in range(_N))  
 48 |         self.gap_layer = nn.AdaptiveAvgPool2d((1, 1))
 49 |         self.coords = self.compute_locations(device='cuda')  # (1, 2, h, w)
 50 |         self.coords_head = FFN(2, 256, _C, 3, 'conv')
 51 | 
 52 |     def forward(self, inputs):
 53 |         num_decoders = len(inputs["mask_features"])
 54 |         dt_obj_logit = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 55 |         dt_ins_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 56 |         im_ctr_coord = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 57 |         dt_pivots_logits = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 58 |         coords_feats = self.coords_head.forward(self.coords.repeat((inputs["mask_features"][0].shape[0], 1, 1, 1)))
 59 | 
 60 |         for i in range(num_decoders):
 61 |             x_ins_cw = inputs["mask_features"][i].split(self.num_queries, dim=1)
 62 |             x_obj_cw = inputs["obj_scores"][i].split(self.num_queries, dim=1)
 63 |             x_qry_cw = inputs["decoder_outputs"][i].split(self.pts_split, dim=1)  # [(b, 200, c), (b, 50, c), (b, 450, c)]
 64 |             batch_size = x_qry_cw[0].shape[0]
 65 |             for j in range(self.num_classes):
 66 |                 dt_ins_masks[i][j] = self.up_sample(x_ins_cw[j])  # (B, P, H, W)
 67 |                 dt_obj_logit[i][j] = x_obj_cw[j]                  # (B, P, 2)
 68 |                 # im
 69 |                 num_qry, n_pts = self.num_queries[j], self.max_pieces[j]
 70 |                 im_feats = self.im_ctr_heads[j](x_qry_cw[j])                             # (bs, n_q * n_pts, 2*c)
 71 |                 im_feats_tmp = im_feats.reshape(batch_size, num_qry*n_pts*2, -1)             # (bs, n_q*n_pts*2, c)
 72 |                 im_coords_map = torch.einsum("bqc,bchw->bqhw", im_feats_tmp, coords_feats)  # [bs, n_q*n_pts*2, h, w]                
 73 |                 im_coords = self.gap_layer(im_coords_map)                               # [bs, n_q * n_pts]
 74 |                 im_coords = im_coords.reshape(batch_size, num_qry, self.max_pieces[j], 2).sigmoid()
 75 |                 im_ctr_coord[i][j] = im_coords
 76 | 
 77 |                 pt_feats = im_feats.reshape(batch_size, num_qry, self.max_pieces[j], -1).flatten(1, 2)       # [bs, n_q * n_pts, 2*C]
 78 |                 pt_logits = self.pts_cls_heads[j](pt_feats)
 79 |                 dt_pivots_logits[i][j] = pt_logits.reshape(batch_size, num_qry, self.max_pieces[j], 2)
 80 |                 
 81 |         ret = {"outputs": {"obj_logits": dt_obj_logit, "ins_masks": dt_ins_masks,
 82 |                            "ctr_im": im_ctr_coord, "pts_logits": dt_pivots_logits}}
 83 |                            
 84 |         if self.semantic_heads is not None:
 85 |             num_decoders = len(inputs["bev_enc_features"])
 86 |             dt_sem_masks = [[[] for _ in range(self.num_classes)] for _ in range(num_decoders)]
 87 |             for i in range(num_decoders):
 88 |                 x_sem = inputs["bev_enc_features"][i]
 89 |                 for j in range(self.num_classes):
 90 |                     dt_sem_masks[i][j] = self.up_sample(self.semantic_heads[j](x_sem))  # (B, P, 2, H, W)
 91 |             ret["outputs"].update({"sem_masks": dt_sem_masks})
 92 |         return ret
 93 | 
 94 |     def up_sample(self, x, tgt_shape=None):
 95 |         tgt_shape = self.tgt_shape if tgt_shape is None else tgt_shape
 96 |         if tuple(x.shape[-2:]) == tuple(tgt_shape):
 97 |             return x
 98 |         return F.interpolate(x, size=tgt_shape, mode="bilinear", align_corners=True)
 99 | 
100 |     def compute_locations(self, stride=1, device='cpu'):
101 | 
102 |         fh, fw = self.tgt_shape
103 | 
104 |         shifts_x = torch.arange(0, fw * stride, step=stride, dtype=torch.float32, device=device)
105 |         shifts_y = torch.arange(0, fh * stride, step=stride, dtype=torch.float32, device=device)
106 |         shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
107 |         shift_x = shift_x.reshape(-1)
108 |         shift_y = shift_y.reshape(-1)
109 |         locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
110 | 
111 |         locations = locations.unsqueeze(0).permute(0, 2, 1).contiguous().float().view(1, 2, fh, fw)
112 |         locations[:, 0, :, :] /= fw
113 |         locations[:, 1, :, :] /= fh
114 | 
115 |         return locations
116 | 


--------------------------------------------------------------------------------
/mapmaster/models/utils/mask_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from detectron2.projects.point_rend.point_features import point_sample
 6 | from detectron2.projects.point_rend.point_features import get_uncertain_point_coords_with_randomness
 7 | 
 8 | 
 9 | class SegmentationLoss(nn.Module):
10 | 
11 |     def __init__(self, ce_weight, dice_weight, use_point_render=False, num_points=8000, oversample=3.0, importance=0.75):
12 |         super(SegmentationLoss, self).__init__()
13 |         self.ce_weight = ce_weight
14 |         self.dice_weight = dice_weight
15 |         self.use_point_render = use_point_render
16 |         self.num_points = num_points
17 |         self.oversample = oversample
18 |         self.importance = importance
19 | 
20 |     def forward(self, dt_masks, gt_masks, stage="loss"):
21 |         loss = 0
22 |         if self.use_point_render:
23 |             dt_masks, gt_masks = self.points_render(dt_masks, gt_masks, stage)
24 |         if self.ce_weight > 0:
25 |             loss += self.ce_weight * self.forward_sigmoid_ce_loss(dt_masks, gt_masks)
26 |         if self.dice_weight > 0:
27 |             loss += self.dice_weight * self.forward_dice_loss(dt_masks, gt_masks)
28 |         return loss
29 | 
30 |     @staticmethod
31 |     def forward_dice_loss(inputs, targets):
32 |         inputs = inputs.sigmoid()
33 |         inputs = inputs.flatten(1)
34 |         targets = targets.flatten(1)
35 |         numerator = 2 * (inputs * targets).sum(-1)
36 |         denominator = inputs.sum(-1) + targets.sum(-1)
37 |         loss = 1 - (numerator + 1) / (denominator + 1)
38 |         return loss
39 | 
40 |     @staticmethod
41 |     def forward_sigmoid_ce_loss(inputs, targets):
42 |         inputs = inputs.flatten(1)
43 |         targets = targets.flatten(1)
44 |         loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
45 |         return loss.mean(1)
46 | 
47 |     def points_render(self, src_masks, tgt_masks, stage):
48 |         assert stage in ["loss", "matcher"]
49 |         assert src_masks.shape == tgt_masks.shape
50 | 
51 |         src_masks = src_masks[:, None]
52 |         tgt_masks = tgt_masks[:, None]
53 | 
54 |         if stage == "matcher":
55 |             point_coords = torch.rand(1, self.num_points, 2, device=src_masks.device)
56 |             point_coords_src = point_coords.repeat(src_masks.shape[0], 1, 1)
57 |             point_coords_tgt = point_coords.repeat(tgt_masks.shape[0], 1, 1)
58 |         else:
59 |             point_coords = get_uncertain_point_coords_with_randomness(
60 |                 src_masks,
61 |                 lambda logits: self.calculate_uncertainty(logits),
62 |                 self.num_points,
63 |                 self.oversample,
64 |                 self.importance,
65 |             )
66 |             point_coords_src = point_coords.clone()
67 |             point_coords_tgt = point_coords.clone()
68 | 
69 |         src_masks = point_sample(src_masks, point_coords_src, align_corners=False).squeeze(1)
70 |         tgt_masks = point_sample(tgt_masks, point_coords_tgt, align_corners=False).squeeze(1)
71 | 
72 |         return src_masks, tgt_masks
73 | 
74 |     @staticmethod
75 |     def calculate_uncertainty(logits):
76 |         """
77 |         We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the
78 |             foreground class in `classes`.
79 |         Args:
80 |             logits (Tensor): A tensor of shape (R, 1, ...) for class-specific or
81 |                 class-agnostic, where R is the total number of predicted masks in all images and C is
82 |                 the number of foreground classes. The values are logits.
83 |         Returns:
84 |             scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with
85 |                 the most uncertain locations having the highest uncertainty score.
86 |         """
87 |         assert logits.shape[1] == 1
88 |         gt_class_logits = logits.clone()
89 |         return -(torch.abs(gt_class_logits))
90 | 


--------------------------------------------------------------------------------
/mapmaster/models/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | import warnings
 4 | import torch.nn as nn
 5 | from torch.nn import functional as F
 6 | 
 7 | 
 8 | def c2_xavier_fill(module: nn.Module) -> None:
 9 |     """
10 |     Initialize `module.weight` using the "XavierFill" implemented in Caffe2.
11 |     Also initializes `module.bias` to 0.
12 |     Args:
13 |         module (torch.nn.Module): module to initialize.
14 |     """
15 |     # Caffe2 implementation of XavierFill in fact
16 |     # corresponds to kaiming_uniform_ in PyTorch
17 |     # pyre-fixme[6]: For 1st param expected `Tensor` but got `Union[Module, Tensor]`.
18 |     nn.init.kaiming_uniform_(module.weight, a=1)
19 |     if module.bias is not None:
20 |         # pyre-fixme[6]: Expected `Tensor` for 1st param but got `Union[nn.Module,
21 |         #  torch.Tensor]`.
22 |         nn.init.constant_(module.bias, 0)
23 | 
24 | 
25 | class Conv2d(torch.nn.Conv2d):
26 |     """
27 |     A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
28 |     """
29 | 
30 |     def __init__(self, *args, **kwargs):
31 |         """
32 |         Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
33 |         Args:
34 |             norm (nn.Module, optional): a normalization layer
35 |             activation (callable(Tensor) -> Tensor): a callable activation function
36 |         It assumes that norm layer is used before activation.
37 |         """
38 |         norm = kwargs.pop("norm", None)
39 |         activation = kwargs.pop("activation", None)
40 |         super().__init__(*args, **kwargs)
41 | 
42 |         self.norm = norm
43 |         self.activation = activation
44 | 
45 |     def forward(self, x):
46 |         # torchscript does not support SyncBatchNorm yet
47 |         # https://github.com/pytorch/pytorch/issues/40507
48 |         # and we skip these codes in torchscript since:
49 |         # 1. currently we only support torchscript in evaluation mode
50 |         # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
51 |         # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
52 |         if not torch.jit.is_scripting():
53 |             with warnings.catch_warnings(record=True):
54 |                 if x.numel() == 0 and self.training:
55 |                     # https://github.com/pytorch/pytorch/issues/12013
56 |                     assert not isinstance(
57 |                         self.norm, torch.nn.SyncBatchNorm
58 |                     ), "SyncBatchNorm does not support empty inputs!"
59 | 
60 |         x = F.conv2d(
61 |             x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
62 |         )
63 |         if self.norm is not None:
64 |             x = self.norm(x)
65 |         if self.activation is not None:
66 |             x = self.activation(x)
67 |         return x
68 | 
69 | 
70 | def get_activation_fn(activation):
71 |     """Return an activation function given a string"""
72 |     if activation == "relu":
73 |         return F.relu
74 |     if activation == "gelu":
75 |         return F.gelu
76 |     if activation == "glu":
77 |         return F.glu
78 |     raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
79 | 


--------------------------------------------------------------------------------
/mapmaster/models/utils/position_encoding.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Various positional encodings for the transformer.
  3 | """
  4 | import math
  5 | import torch
  6 | from torch import nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class PositionEmbeddingSine(nn.Module):
 11 |     """
 12 |     This is a more standard version of the position embedding, very similar to the one
 13 |     used by the Attention is all you need paper, generalized to work on images.
 14 |     """
 15 | 
 16 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=True, scale=None):
 17 |         super().__init__()
 18 |         self.num_pos_feats = num_pos_feats
 19 |         self.temperature = temperature
 20 |         self.normalize = normalize
 21 |         if scale is not None and normalize is False:
 22 |             raise ValueError("normalize should be True if scale is passed")
 23 |         if scale is None:
 24 |             scale = 2 * math.pi
 25 |         self.scale = scale
 26 | 
 27 |     def forward(self, mask):
 28 |         assert mask is not None
 29 |         not_mask = ~mask
 30 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
 31 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
 32 |         if self.normalize:
 33 |             eps = 1e-6
 34 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
 35 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
 36 | 
 37 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=mask.device)
 38 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
 39 | 
 40 |         pos_x = x_embed[:, :, :, None] / dim_t
 41 |         pos_y = y_embed[:, :, :, None] / dim_t
 42 |         pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
 43 |         pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
 44 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
 45 |         return pos
 46 | 
 47 | 
 48 | class PositionEmbeddingLearned(nn.Module):
 49 |     """
 50 |     Absolute pos embedding, learned.
 51 |     """
 52 | 
 53 |     def __init__(self, num_pos=(50, 50), num_pos_feats=256):
 54 |         super().__init__()
 55 |         self.num_pos = num_pos
 56 |         self.pos_embed = nn.Embedding(num_pos[0] * num_pos[1], num_pos_feats)
 57 |         self.reset_parameters()
 58 | 
 59 |     def reset_parameters(self):
 60 |         nn.init.normal_(self.pos_embed.weight)
 61 | 
 62 |     def forward(self, mask):
 63 |         h, w = mask.shape[-2:]
 64 |         pos = self.pos_embed.weight.view(*self.num_pos, -1)[:h, :w]
 65 |         pos = pos.permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1)
 66 |         return pos
 67 | 
 68 | 
 69 | class PositionEmbeddingIPM(nn.Module):
 70 | 
 71 |     def __init__(self,
 72 |                  encoder=None,
 73 |                  num_pos=(16, 168),
 74 |                  input_shape=(512, 896),
 75 |                  num_pos_feats=64,
 76 |                  sine_encoding=False,
 77 |                  temperature=10000):
 78 |         super().__init__()
 79 | 
 80 |         h, w_expand = num_pos
 81 |         self.current_shape = (h, w_expand // 6)
 82 |         self.input_shape = input_shape
 83 | 
 84 |         self.num_pos_feats = num_pos_feats
 85 |         self.temperature = temperature
 86 |         self.encoder = encoder
 87 |         self.sine_encoding = sine_encoding
 88 | 
 89 |     def get_embedding(self, extrinsic, intrinsic, ida_mats):
 90 |         """
 91 |         Get the BeV Coordinate for Image
 92 | 
 93 |         Return
 94 |             xy_world_coord (N, H, W, 2) Ego x, y coordinate
 95 |             Valid (N, H, W, 1) -- Valid Points or Not 1 -- valid; 0 -- invalid
 96 |         """
 97 |         # extrinsic -> (B, M, 4, 4)
 98 |         device, b, n = extrinsic.device, extrinsic.shape[0], extrinsic.shape[1]
 99 | 
100 |         x = torch.linspace(0, self.input_shape[1] - 1, self.current_shape[1], dtype=torch.float)
101 |         y = torch.linspace(0, self.input_shape[0] - 1, self.current_shape[0], dtype=torch.float)
102 |         y_grid, x_grid = torch.meshgrid(y, x)
103 |         z = torch.ones(self.current_shape)
104 |         feat_coords = torch.stack([x_grid, y_grid, z], dim=-1).to(device)  # (H, W, 3)
105 |         feat_coords = feat_coords.unsqueeze(0).repeat(n, 1, 1, 1).unsqueeze(0).repeat(b, 1, 1, 1, 1)  # (B, N, H, W, 3)
106 | 
107 |         ida_mats = ida_mats.view(b, n, 1, 1, 3, 3)
108 |         image_coords = ida_mats.inverse().matmul(feat_coords.unsqueeze(-1))  # (B, N, H, W, 3, 1)
109 | 
110 |         intrinsic = intrinsic.view(b, n, 1, 1, 3, 3)  # (B, N, 1, 1, 3, 3)
111 |         normed_coords = torch.linalg.inv(intrinsic) @ image_coords  # (B, N, H, W, 3, 1)
112 | 
113 |         ext_rots = extrinsic[:, :, :3, :3]  # (B, N, 3, 3)
114 |         ext_trans = extrinsic[:, :, :3, 3]  # (B, N, 3)
115 | 
116 |         ext_rots = ext_rots.view(b, n, 1, 1, 3, 3)  # (B, N, 1, 1, 3, 3)
117 |         world_coords = (ext_rots @ normed_coords).squeeze(-1)  # (B, N, H, W, 3)
118 |         world_coords = F.normalize(world_coords, p=2, dim=-1)
119 |         z_coord = world_coords[:, :, :, :, 2]  # (B, N, H, W)
120 | 
121 |         trans_z = ext_trans[:, :, 2].unsqueeze(-1).unsqueeze(-1)   # (B, N, 1, 1)
122 |         depth = - trans_z / z_coord  # (B, N, H, W)
123 |         valid = depth > 0  # (B, N, H, W)
124 | 
125 |         xy_world_coords = world_coords[:, :, :, :, :2]  # (B, N, H, W, 2)
126 |         xy_world_coords = xy_world_coords * depth.unsqueeze(-1)
127 |         valid = valid.unsqueeze(-1)  # (B, N, H, W, 1)
128 | 
129 |         return xy_world_coords, valid
130 | 
131 |     def forward(self, extrinsic, intrinsic, ida_mats, do_flip):
132 |         """
133 |         extrinsic (N, 6, 4, 4) torch.Tensor
134 |         intrinsic (N, 6, 3, 3)
135 |         """
136 |         device = extrinsic.device
137 |         xy_pos_embed, valid = self.get_embedding(extrinsic, intrinsic, ida_mats)
138 |         if do_flip:
139 |             xy_pos_embed[:, :, :, :, 1] = -1 * xy_pos_embed[:, :, :, :, 1]
140 |         # along with w
141 |         xy_pos_embed = torch.cat(torch.unbind(xy_pos_embed, dim=1), dim=-2)  # (B, H, N*W, 2)
142 |         valid = torch.cat(torch.unbind(valid, dim=1), dim=-2)  # (B, H, N*W, 2)
143 |         if self.sine_encoding:
144 |             # Use Sine encoding to get 256 dim embeddings
145 |             dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=device)
146 |             dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2))
147 |             pos_embed = xy_pos_embed[:, :, :, :, None] / dim_t
148 |             pos_x = torch.stack((pos_embed[:, :, :, 0, 0::2].sin(), pos_embed[:, :, :, 0, 1::2].cos()), dim=4)
149 |             pos_y = torch.stack((pos_embed[:, :, :, 1, 0::2].sin(), pos_embed[:, :, :, 1, 1::2].cos()), dim=4)
150 |             pos_full_embed = torch.cat((pos_y.flatten(3), pos_x.flatten(3)), dim=3)
151 |             pos_combined = torch.where(valid, pos_full_embed, torch.tensor(0., dtype=torch.float32, device=device))
152 |             pos_combined = pos_combined.permute(0, 3, 1, 2)  # (B, 2, H, W')
153 |         else:
154 |             assert None
155 |             # pos_combined = torch.where(valid, xy_pos_embed, torch.tensor(0., dtype=torch.float32, device=device))
156 |             # pos_combined = pos_combined.permute(0, 3, 1, 2)
157 | 
158 |         if self.encoder is None:
159 |             return pos_combined, valid.squeeze(-1)
160 |         else:
161 |             pos_embed_contiguous = pos_combined.contiguous()
162 |             return self.encoder(pos_embed_contiguous), valid.squeeze(-1)
163 | 
164 | 
165 | class PositionEmbeddingTgt(nn.Module):
166 |     def __init__(self,
167 |                  encoder=None,
168 |                  tgt_shape=(40, 20),
169 |                  map_size=(400, 200),
170 |                  map_resolution=0.15,
171 |                  num_pos_feats=64,
172 |                  sine_encoding=False,
173 |                  temperature=10000):
174 |         super().__init__()
175 |         self.tgt_shape = tgt_shape
176 |         self.encoder = encoder
177 |         self.map_size = map_size
178 |         self.map_resolution = map_resolution
179 |         self.num_pos_feats = num_pos_feats
180 |         self.temperature = temperature
181 |         self.sine_encoding = sine_encoding
182 | 
183 |     def forward(self, mask):
184 |         B = mask.shape[0]
185 | 
186 |         map_forward_ratio = self.tgt_shape[0] / self.map_size[0]
187 |         map_lateral_ratio = self.tgt_shape[1] / self.map_size[1]
188 | 
189 |         map_forward_res = self.map_resolution / map_forward_ratio
190 |         map_lateral_res = self.map_resolution / map_lateral_ratio
191 | 
192 |         X = (torch.arange(self.tgt_shape[0] - 1, -1, -1, device=mask.device) + 0.5 - self.tgt_shape[
193 |             0] / 2) * map_forward_res
194 |         Y = (torch.arange(self.tgt_shape[1] - 1, -1, -1, device=mask.device) + 0.5 - self.tgt_shape[
195 |             1] / 2) * map_lateral_res
196 | 
197 |         grid_X, grid_Y = torch.meshgrid(X, Y)
198 |         pos_embed = torch.stack([grid_X, grid_Y], dim=-1)  # (H, W, 2)
199 | 
200 |         if self.sine_encoding:
201 |             dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=mask.device)
202 |             dim_t = self.temperature ** (2 * (dim_t // 2) / (self.num_pos_feats // 2))
203 | 
204 |             pos_embed = pos_embed[:, :, :, None] / dim_t
205 |             pos_x = torch.stack((pos_embed[:, :, 0, 0::2].sin(), pos_embed[:, :, 0, 1::2].cos()), dim=3).flatten(2)
206 |             pos_y = torch.stack((pos_embed[:, :, 1, 0::2].sin(), pos_embed[:, :, 1, 1::2].cos()), dim=3).flatten(2)
207 |             pos_full_embed = torch.cat((pos_y, pos_x), dim=2)
208 | 
209 |             pos_embed = pos_full_embed.unsqueeze(0).repeat(B, 1, 1, 1).permute(0, 3, 1, 2)
210 |         else:
211 |             pos_embed = pos_embed.unsqueeze(0).repeat(B, 1, 1, 1).permute(0, 3, 1, 2)
212 | 
213 |         if self.encoder is None:
214 |             return pos_embed
215 |         else:
216 |             pos_embed_contiguous = pos_embed.contiguous()
217 |             return self.encoder(pos_embed_contiguous)


--------------------------------------------------------------------------------
/mapmaster/models/utils/recovery_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from detectron2.projects.point_rend.point_features import point_sample
 6 | 
 7 | 
 8 | class PointRecoveryLoss(nn.Module):
 9 | 
10 |     def __init__(self, ce_weight, dice_weight, curve_width, tgt_shape):
11 |         super(PointRecoveryLoss, self).__init__()
12 |         self.ce_weight = ce_weight
13 |         self.dice_weight = dice_weight
14 |         self.kernel = self.generate_kernel(curve_width, tgt_shape)
15 | 
16 |     def forward(self, points, gt_masks):
17 |         points_expanded = points.unsqueeze(2) - self.kernel.repeat(points.shape[0], 1, 1, 1)
18 |         points_expanded = torch.clamp(points_expanded.flatten(1, 2), min=0, max=1)  # (N, P*w*w, 2) [0, 1]
19 |         dt_points = point_sample(gt_masks[:, None], points_expanded, align_corners=False).squeeze(1).flatten(1)
20 |         gt_points = torch.ones_like(dt_points)
21 |         loss = 0
22 |         if self.ce_weight > 0:
23 |             loss += self.ce_weight * self.forward_ce_loss(dt_points, gt_points)
24 |         if self.dice_weight > 0:
25 |             loss += self.dice_weight * self.forward_dice_loss(dt_points, gt_points)
26 |         return loss
27 | 
28 |     @staticmethod
29 |     def generate_kernel(curve_width, tgt_shape, device='cuda'):
30 |         width = torch.tensor(list(range(curve_width)))
31 |         kernel = torch.stack(torch.meshgrid(width, width), dim=-1).float()
32 |         kernel = kernel - curve_width // 2
33 |         kernel[..., 0] = kernel[..., 0] / tgt_shape[1]
34 |         kernel[..., 1] = kernel[..., 1] / tgt_shape[0]
35 |         kernel = kernel.flatten(0, 1).unsqueeze(0).unsqueeze(0)  # (1, 1, w*w, 2)
36 |         kernel = kernel.cuda() if device == 'cuda' else kernel
37 |         return kernel
38 | 
39 |     @staticmethod
40 |     def forward_dice_loss(inputs, targets):
41 |         numerator = 2 * (inputs * targets).sum(-1)
42 |         denominator = inputs.sum(-1) + targets.sum(-1)
43 |         loss = 1 - (numerator + 1) / (denominator + 1)
44 |         return loss
45 | 
46 |     @staticmethod
47 |     def forward_ce_loss(inputs, targets):
48 |         loss = F.binary_cross_entropy(inputs, targets, reduction="none")
49 |         return loss.mean(1)
50 | 


--------------------------------------------------------------------------------
/mapmaster/utils/env.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import sys
  4 | import PIL
  5 | import importlib
  6 | import warnings
  7 | import subprocess
  8 | import torch
  9 | import torchvision
 10 | import numpy as np
 11 | from tabulate import tabulate
 12 | from collections import defaultdict
 13 | 
 14 | __all__ = ["collect_env_info"]
 15 | 
 16 | 
 17 | def collect_torch_env():
 18 |     import torch.__config__
 19 |     return torch.__config__.show()
 20 | 
 21 | 
 22 | def collect_git_info():
 23 |     try:
 24 |         import git
 25 |         from git import InvalidGitRepositoryError
 26 |     except ImportError:
 27 |         warnings.warn("Please consider to install gitpython for git info collection by 'pip install gitpython'.")
 28 |         return "Git status: unknown\n"
 29 | 
 30 |     try:
 31 |         repo = git.Repo(get_root_dir())
 32 |     except InvalidGitRepositoryError:
 33 |         warnings.warn("Current path is possibly not a valid git repository.")
 34 |         return "Git status: unknown\n"
 35 | 
 36 |     msg = "***Git status:***\n{}\nHEAD Commit-id: {}\n".format(repo.git.status().replace("<", "\<"), repo.head.commit)
 37 |     msg = "{}\n{}".format(msg, "***Git Diff:***\n{}\n".format(repo.git.diff().replace("<", "\<")))
 38 |     return msg
 39 | 
 40 | 
 41 | def detect_compute_compatibility(CUDA_HOME, so_file):
 42 |     try:
 43 |         cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump")
 44 |         if os.path.isfile(cuobjdump):
 45 |             output = subprocess.check_output("'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True)
 46 |             output = output.decode("utf-8").strip().split("\n")
 47 |             sm = []
 48 |             for line in output:
 49 |                 line = re.findall(r"\.sm_[0-9]*\.", line)[0]
 50 |                 sm.append(line.strip("."))
 51 |             sm = sorted(set(sm))
 52 |             return ", ".join(sm)
 53 |         else:
 54 |             return so_file + "; cannot find cuobjdump"
 55 |     except Exception:
 56 |         # unhandled failure
 57 |         return so_file
 58 | 
 59 | 
 60 | def collect_env_info():
 61 |     data = []
 62 |     data.append(("sys.platform", sys.platform))
 63 |     data.append(("Python", sys.version.replace("\n", "")))
 64 |     data.append(("numpy", np.__version__))
 65 |     data.append(("Pillow", PIL.__version__))
 66 | 
 67 |     data.append(("PyTorch", torch.__version__ + " @" + os.path.dirname(torch.__file__)))
 68 |     data.append(("PyTorch debug build", torch.version.debug))
 69 | 
 70 |     has_cuda = torch.cuda.is_available()
 71 | 
 72 |     data.append(("CUDA available", has_cuda))
 73 |     if has_cuda:
 74 |         devices = defaultdict(list)
 75 |         for k in range(torch.cuda.device_count()):
 76 |             devices[torch.cuda.get_device_name(k)].append(str(k))
 77 |         for name, devids in devices.items():
 78 |             data.append(("GPU " + ",".join(devids), name))
 79 | 
 80 |         from torch.utils.cpp_extension import CUDA_HOME
 81 | 
 82 |         data.append(("CUDA_HOME", str(CUDA_HOME)))
 83 | 
 84 |         if CUDA_HOME is not None and os.path.isdir(CUDA_HOME):
 85 |             try:
 86 |                 nvcc = os.path.join(CUDA_HOME, "bin", "nvcc")
 87 |                 nvcc = subprocess.check_output("'{}' -V | tail -n1".format(nvcc), shell=True)
 88 |                 nvcc = nvcc.decode("utf-8").strip()
 89 |             except subprocess.SubprocessError:
 90 |                 nvcc = "Not Available"
 91 |             data.append(("NVCC", nvcc))
 92 | 
 93 |         cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
 94 |         if cuda_arch_list:
 95 |             data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
 96 | 
 97 |     try:
 98 |         data.append(
 99 |             (
100 |                 "torchvision",
101 |                 str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__),
102 |             )
103 |         )
104 |         if has_cuda:
105 |             try:
106 |                 torchvision_C = importlib.util.find_spec("torchvision._C").origin
107 |                 msg = detect_compute_compatibility(CUDA_HOME, torchvision_C)
108 |                 data.append(("torchvision arch flags", msg))
109 |             except ImportError:
110 |                 data.append(("torchvision._C", "failed to find"))
111 |     except AttributeError:
112 |         data.append(("torchvision", "unknown"))
113 | 
114 |     try:
115 |         import cv2
116 | 
117 |         data.append(("cv2", cv2.__version__))
118 |     except ImportError:
119 |         pass
120 | 
121 |     env_str = tabulate(data) + "\n"
122 |     env_str += collect_git_info()
123 |     env_str += "-" * 100 + "\n"
124 |     env_str += collect_torch_env()
125 |     return env_str
126 | 
127 | 
128 | def get_root_dir():
129 |     return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
130 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
 1 | clearml
 2 | loguru
 3 | Ninja
 4 | numba
 5 | opencv-contrib-python
 6 | pandas
 7 | scikit-image
 8 | tabulate
 9 | tensorboardX
10 | Pillow==9.4.0
11 | numpy==1.23.5
12 | visvalingamwyatt=0.2.0


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | export PYTHONPATH=$(pwd)
 4 | 
 5 | case "$1" in
 6 |     "train")
 7 |         CONFIG_NAME=$2
 8 |         NUM_EPOCHS=$3
 9 |         python3 configs/"${CONFIG_NAME}".py -d 0-7 -b 1 -e ${NUM_EPOCHS} --sync_bn 8 --no-clearml
10 |         ;;
11 |     "test")
12 |         CONFIG_NAME=$2
13 |         CKPT=$3
14 |         python3 configs/"${CONFIG_NAME}".py -d 0-7 --eval --ckpt "${CKPT}"
15 |         ;;
16 |     "train-continue")
17 |         CONFIG_NAME=$2
18 |         CKPT=$3
19 |         python3 configs/"${CONFIG_NAME}".py -d 0-7 -b 1 -e 30 --sync_bn 8 --no-clearml --ckpt "${CKPT}"
20 |         ;;
21 |     "pipeline")
22 |         CONFIG_NAME=$2
23 |         NUM_EPOCHS=$3
24 |         CKPT_ID=$((NUM_EPOCHS-1))
25 |         bash run.sh train ${CONFIG_NAME} ${NUM_EPOCHS}
26 |         bash run.sh test ${CONFIG_NAME} outputs/${CONFIG_NAME}/latest/dump_model/checkpoint_epoch_${CKPT_ID}.pth
27 |         ;;
28 |     "reproduce")
29 |         CONFIG_NAME=$2
30 |         bash run.sh pipeline ${CONFIG_NAME} 30
31 |         bash run.sh pipeline ${CONFIG_NAME} 110
32 |         ;;
33 |     *)
34 |         echo "error"
35 | esac
36 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/__init__.py


--------------------------------------------------------------------------------
/tools/anno_converter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/anno_converter/__init__.py


--------------------------------------------------------------------------------
/tools/anno_converter/bezier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from shapely.geometry import LineString
 4 | from scipy.special import comb as n_over_k
 5 | 
 6 | 
 7 | class PiecewiseBezierCurve(object):
 8 |     def __init__(self, num_points=100, num_degree=2, margin=0.05, threshold=0.1):
 9 |         super().__init__()
10 |         self.num_points = num_points
11 |         self.num_degree = num_degree
12 |         self.margin = margin
13 |         self.bezier_coefficient = self._get_bezier_coefficients(np.linspace(0, 1, self.num_points))
14 |         self.threshold = threshold
15 | 
16 |     def _get_bezier_coefficients(self, t_list):
17 |         bernstein_fn = lambda n, t, k: (t ** k) * ((1 - t) ** (n - k)) * n_over_k(n, k)
18 |         bezier_coefficient_fn = \
19 |             lambda ts: [[bernstein_fn(self.num_degree, t, k) for k in range(self.num_degree + 1)] for t in t_list]
20 |         return np.array(bezier_coefficient_fn(t_list))
21 | 
22 |     def _get_interpolated_points(self, points):
23 |         line = LineString(points)
24 |         distances = np.linspace(0, line.length, self.num_points)
25 |         sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
26 |         return sampled_points
27 | 
28 |     def _get_chamfer_distance(self, points_before, points_after):
29 |         points_before = torch.from_numpy(points_before).float()
30 |         points_after = torch.from_numpy(points_after).float()
31 |         dist = torch.cdist(points_before, points_after)
32 |         dist1, _ = torch.min(dist, 2)
33 |         dist1 = (dist1 * (dist1 > self.margin).float())
34 |         dist2, _ = torch.min(dist, 1)
35 |         dist2 = (dist2 * (dist2 > self.margin).float())
36 |         return (dist1.mean(-1) + dist2.mean(-1)) / 2
37 | 
38 |     def bezier_fitting(self, curve_pts):
39 |         curve_pts_intered = self._get_interpolated_points(curve_pts)
40 |         bezier_ctrl_pts = np.linalg.pinv(self.bezier_coefficient).dot(curve_pts_intered)
41 |         bezier_ctrl_pts = np.concatenate([curve_pts[0:1], bezier_ctrl_pts[1:-1], curve_pts[-1:]], axis=0)
42 |         curve_pts_recovery = self.bezier_coefficient.dot(bezier_ctrl_pts)
43 |         criterion = self._get_chamfer_distance(curve_pts_intered[None, :, :], curve_pts_recovery[None, :, :]).item()
44 |         return bezier_ctrl_pts, criterion
45 | 
46 |     @staticmethod
47 |     def sequence_reverse(ctr_points):
48 |         ctr_points = np.array(ctr_points)
49 |         (xs, ys), (xe, ye) = ctr_points[0], ctr_points[-1]
50 |         if ys > ye:
51 |             ctr_points = ctr_points[::-1]
52 |         return ctr_points
53 | 
54 |     def __call__(self, curve_pts):
55 |         ctr_points_piecewise = []
56 |         num_points = curve_pts.shape[0]
57 |         start, end = 0, num_points - 1
58 |         while start < end:
59 |             ctr_points, loss = self.bezier_fitting(curve_pts[start: end + 1])
60 |             if loss < self.threshold:
61 |                 start, end = end, num_points - 1
62 |                 if start >= end:
63 |                     ctr_points_piecewise += ctr_points.tolist()
64 |                 else:
65 |                     ctr_points_piecewise += ctr_points.tolist()[:-1]
66 |             else:
67 |                 end = end - 1
68 |         ctr_points_piecewise = self.sequence_reverse(ctr_points_piecewise)
69 |         return ctr_points_piecewise
70 | 


--------------------------------------------------------------------------------
/tools/anno_converter/generate_pivots.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import visvalingamwyatt as vw
 3 | 
 4 | class GenPivots:
 5 |     def __init__(self, max_pts=[10, 2, 30], map_region=(30, -30, 15, -15), vm_thre=2.0, resolution=0.15):
 6 |         self.max_pts = max_pts
 7 |         self.map_region = map_region
 8 |         self.vm_thre = vm_thre
 9 |         self.resolution = resolution
10 |         
11 |     def pivots_generate(self, map_vectors):
12 |         pivots_single_frame =  {0:[], 1:[], 2:[]}
13 |         lengths_single_frame =  {0:[], 1:[], 2:[]}
14 |         for ii, vec in enumerate(map_vectors):
15 |             pts = np.array(vec["pts"]) * self.resolution  # 转成 m
16 |             pts = pts[:, ::-1]
17 |             cls = vec["type"]
18 |         
19 |             # If the difference in x is obvious (greater than 1m), then rank according to x. 
20 |             # If the difference in x is not obvious, rank according to y.
21 |             if (np.abs(pts[0][0]-pts[-1][0])>1 and pts[0][0]<pts[-1][0]) \
22 |                 or (np.abs(pts[0][0]-pts[-1][0])<=1 and pts[0][1]<pts[-1][1]): 
23 |                 pts = pts[::-1]
24 |         
25 |             simplifier = vw.Simplifier(pts)
26 |             sim_pts = simplifier.simplify(threshold=self.vm_thre)
27 |             length = min(self.max_pts[cls], len(sim_pts))
28 |             padded_pts = self.pad_pts(sim_pts, self.max_pts[cls])
29 |             pivots_single_frame[cls].append(padded_pts)
30 |             lengths_single_frame[cls].append(length)
31 | 
32 |         for cls in [0, 1, 2]:
33 |             new_pts = np.array(pivots_single_frame[cls])
34 |             if new_pts.size > 0:
35 |                 new_pts[:, :, 0] = new_pts[:, :, 0] / (2 * self.map_region[0])  # normalize
36 |                 new_pts[:, :, 1] = new_pts[:, :, 1] / (2 * self.map_region[2])
37 |             pivots_single_frame[cls] = new_pts
38 |             lengths_single_frame[cls] = np.array(lengths_single_frame[cls])
39 |             
40 |         return pivots_single_frame, lengths_single_frame
41 |     
42 |     def pad_pts(self, pts, tgt_length):
43 |         if len(pts) >= tgt_length:
44 |             return pts[:tgt_length]
45 |         pts = np.concatenate([pts, np.zeros((tgt_length-len(pts), 2))], axis=0)
46 |         return pts
47 | 


--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjie710/PivotNet/3f334e499bae6c9e2f3ed06cf77ac6cbd22d0ba8/tools/anno_converter/nuscenes/__init__.py


--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/convert.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import numpy as np
  4 | from tqdm import tqdm
  5 | from nuscenes import NuScenes
  6 | from pyquaternion import Quaternion
  7 | from torch.utils.data import Dataset
  8 | from rasterize import RasterizedLocalMap
  9 | from vectorize import VectorizedLocalMap
 10 | from tools.anno_converter.generate_pivots import GenPivots
 11 | 
 12 | 
 13 | class NuScenesDataset(Dataset):
 14 |     def __init__(self, version, dataroot, xbound=(-30., 30., 0.15), ybound=(-15., 15., 0.15)):
 15 |         super(NuScenesDataset, self).__init__()
 16 |         patch_h = ybound[1] - ybound[0]
 17 |         patch_w = xbound[1] - xbound[0]
 18 |         canvas_h = int(patch_h / ybound[2])
 19 |         canvas_w = int(patch_w / xbound[2])
 20 |         self.patch_size = (patch_h, patch_w)
 21 |         self.canvas_size = (canvas_h, canvas_w)
 22 |         self.nusc = NuScenes(version=version, dataroot=dataroot, verbose=False)
 23 |         self.vector_map = VectorizedLocalMap(dataroot, patch_size=self.patch_size, canvas_size=self.canvas_size)
 24 | 
 25 |     def __len__(self):
 26 |         return len(self.nusc.sample)
 27 | 
 28 |     def __getitem__(self, idx):
 29 |         record = self.nusc.sample[idx]
 30 |         location = self.nusc.get('log', self.nusc.get('scene', record['scene_token'])['log_token'])['location']
 31 |         ego_pose = self.nusc.get('ego_pose',
 32 |                                  self.nusc.get('sample_data', record['data']['LIDAR_TOP'])['ego_pose_token'])
 33 |         vectors = self.vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation'])
 34 |         imgs, trans, rots, intrins = self.get_data_info(record)
 35 |         return imgs, np.stack(trans), np.stack(rots), np.stack(intrins), vectors
 36 | 
 37 |     def get_data_info(self, record):
 38 |         imgs, trans, rots, intrins = [], [], [], []
 39 |         for cam in ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT']:
 40 |             samp = self.nusc.get('sample_data', record['data'][cam])
 41 |             imgs.append(samp['filename'])
 42 |             sens = self.nusc.get('calibrated_sensor', samp['calibrated_sensor_token'])
 43 |             trans.append(sens['translation'])
 44 |             rots.append(Quaternion(sens['rotation']).rotation_matrix)
 45 |             intrins.append(sens['camera_intrinsic'])
 46 |         return imgs, trans, rots, intrins
 47 | 
 48 | 
 49 | class NuScenesSemanticDataset(NuScenesDataset):
 50 |     def __init__(self, version, dataroot, xbound, ybound, thickness, num_degrees, max_channel=3, bezier=False):
 51 |         super(NuScenesSemanticDataset, self).__init__(version, dataroot, xbound, ybound)
 52 |         self.raster_map = RasterizedLocalMap(self.patch_size, self.canvas_size, num_degrees, max_channel, thickness, bezier=bezier)
 53 |         self.pivot_gen = GenPivots(map_region=(xbound[1], xbound[0], ybound[1], ybound[0]), resolution=xbound[2])
 54 | 
 55 |     def __getitem__(self, idx):
 56 |         record = self.nusc.sample[idx]
 57 |         location = self.nusc.get('log', self.nusc.get('scene', record['scene_token'])['log_token'])['location']
 58 |         ego_pose = self.nusc.get('ego_pose', self.nusc.get('sample_data', record['data']['LIDAR_TOP'])['ego_pose_token'])
 59 |         vectors = self.vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation'])
 60 |         imgs, trans, rots, intrins = self.get_data_info(record)
 61 |         semantic_masks, instance_masks, instance_vec_points, instance_ctr_points = \
 62 |             self.raster_map.convert_vec_to_mask(vectors)
 63 |         pivots, pivot_lengths = self.pivot_gen.pivots_generate(instance_vec_points)
 64 |         
 65 |         return imgs, np.stack(trans), np.stack(rots), np.stack(intrins), semantic_masks, instance_masks, \
 66 |                vectors, instance_vec_points, instance_ctr_points, pivots, pivot_lengths
 67 | 
 68 | 
 69 | def main():
 70 |     parser = argparse.ArgumentParser(description='Pivot-Bezier GT Generator.')
 71 |     parser.add_argument('-d', '--data_root', type=str, default='/data/dataset/public/nuScenes-tt')
 72 |     parser.add_argument('-v', '--version', nargs='+', type=str, default=['v1.0-trainval'])
 73 |     parser.add_argument("--num_degrees", nargs='+', type=int, default=[2, 1, 3])
 74 |     parser.add_argument("--thickness", nargs='+', type=int, default=[1, 8])
 75 |     parser.add_argument("--xbound", nargs=3, type=float, default=[-30.0, 30.0, 0.15])
 76 |     parser.add_argument("--ybound", nargs=3, type=float, default=[-15.0, 15.0, 0.15])
 77 |     parser.add_argument("--bezier", default=False, action='store_true')   # whether to generate bezier GT  
 78 |     args = parser.parse_args()
 79 | 
 80 |     n_classes = len(args.num_degrees)  # 0 --> divider(d=2),  1 --> crossing(d=1),  2--> contour(d=3)
 81 |     save_dir = os.path.join(args.data_root, 'customer', "pivot-bezier")
 82 |     os.makedirs(save_dir, exist_ok=True)
 83 |     for version in args.version:
 84 |         dataset = NuScenesSemanticDataset(
 85 |             version, args.data_root, args.xbound, args.ybound, args.thickness, args.num_degrees, max_channel=n_classes, bezier=args.bezier)
 86 |         for idx in tqdm(range(dataset.__len__())):
 87 |             file_path = os.path.join(save_dir, dataset.nusc.sample[idx]['token'] + '.npz')
 88 |             # if os.path.exists(file_path):
 89 |             #     continue
 90 |             item = dataset.__getitem__(idx)
 91 |             np.savez_compressed(
 92 |                 file_path, image_paths=np.array(item[0]), trans=item[1], rots=item[2], intrins=item[3],
 93 |                 semantic_mask=item[4][0], instance_mask=item[5][0], instance_mask8=item[5][1],
 94 |                 ego_vectors=item[6], map_vectors=item[7], ctr_points=item[8], pivot_pts=item[9], pivot_length=item[10],
 95 |             )
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     main()
100 | 


--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/rasterize.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from shapely import affinity
  4 | from shapely.geometry import LineString, box
  5 | from tools.anno_converter.bezier import PiecewiseBezierCurve
  6 | 
  7 | 
  8 | class RasterizedLocalMap(object):
  9 |     def __init__(self, patch_size, canvas_size, num_degrees, max_channel, thickness, patch_angle=0.0, bezier=False):
 10 |         super().__init__()
 11 |         self.patch_size = patch_size
 12 |         self.canvas_size = canvas_size
 13 |         self.max_channel = max_channel
 14 |         self.num_degrees = num_degrees
 15 |         self.thickness = thickness
 16 |         assert self.thickness[0] == 1
 17 |         self.patch_box = (0.0, 0.0, self.patch_size[0], self.patch_size[1])
 18 |         self.patch_angle = patch_angle
 19 |         self.patch = self.get_patch_coord()
 20 |         self.bezier = bezier
 21 |         if bezier:
 22 |             self.pbc_funcs = {
 23 |                 d: PiecewiseBezierCurve(num_points=100, num_degree=d, margin=0.05, threshold=0.1) for d in num_degrees
 24 |             }
 25 |             
 26 |     def convert_vec_to_mask(self, vectors):
 27 |         vector_num_list = {cls_idx: [] for cls_idx in range(self.max_channel)}  # map-type -> list
 28 |         for vector in vectors:
 29 |             if vector['pts_num'] >= 2:
 30 |                 vector_num_list[vector['type']].append(LineString(vector['pts'][:vector['pts_num']]))
 31 |         ins_idx = 1  # instance-index
 32 |         instance_masks = np.zeros(
 33 |             (len(self.thickness), self.max_channel, self.canvas_size[1], self.canvas_size[0]), np.uint8)
 34 |         instance_vec_points, instance_ctr_points = [], []
 35 |         for cls_idx in range(self.max_channel):
 36 |             if self.bezier:
 37 |                 pbc_func = self.pbc_funcs[self.num_degrees[cls_idx]]
 38 |             else:
 39 |                 pbc_func = None
 40 |             masks, map_points, ctr_points, ins_idx = self.line_geom_to_mask(vector_num_list[cls_idx], ins_idx, pbc_func)
 41 |             instance_masks[:, cls_idx, :, :] = masks
 42 |             for pts in map_points:
 43 |                 instance_vec_points.append({'pts': pts, 'pts_num': len(pts), 'type': cls_idx})
 44 |             for pts in ctr_points:
 45 |                 instance_ctr_points.append({'pts': pts, 'pts_num': len(pts), 'type': cls_idx})
 46 |         instance_masks = np.stack(instance_masks).astype(np.uint8)
 47 |         semantic_masks = (instance_masks != 0).astype(np.uint8)
 48 |         return semantic_masks, instance_masks, instance_vec_points, instance_ctr_points
 49 | 
 50 |     def line_geom_to_mask(self, layer_geom, idx, pbc_func, trans_type='index'):
 51 |         patch_x, patch_y, patch_h, patch_w = self.patch_box
 52 |         canvas_h = self.canvas_size[0]
 53 |         canvas_w = self.canvas_size[1]
 54 |         scale_height = canvas_h / patch_h
 55 |         scale_width = canvas_w / patch_w
 56 |         trans_x = -patch_x + patch_w / 2.0
 57 |         trans_y = -patch_y + patch_h / 2.0
 58 |         map_masks = np.zeros((len(self.thickness), *self.canvas_size), np.uint8)
 59 |         map_points, ctr_points = [], []
 60 |         for line in layer_geom:
 61 |             new_line = line.intersection(self.patch)
 62 |             if not new_line.is_empty:
 63 |                 new_line = affinity.affine_transform(new_line, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
 64 |                 if new_line.geom_type == 'MultiLineString':
 65 |                     for single_line in new_line:
 66 |                         pts2 = self.patch_size - np.array(single_line.coords[:])[:, ::-1]
 67 |                         if pbc_func is not None:
 68 |                             ctr_points.append(pbc_func(pts2))
 69 |                         single_line = affinity.scale(single_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
 70 |                         map_masks, idx = self.mask_for_lines(single_line, map_masks, self.thickness, idx, trans_type)
 71 |                         pts = self.canvas_size - np.array(single_line.coords[:])[:, ::-1]
 72 |                         map_points.append(pts.tolist())
 73 |                 else:
 74 |                     pts2 = self.patch_size - np.array(new_line.coords[:])[:, ::-1]
 75 |                     if pbc_func is not None:
 76 |                         ctr_points.append(pbc_func(pts2))
 77 |                     new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))
 78 |                     map_masks, idx = self.mask_for_lines(new_line, map_masks, self.thickness, idx, trans_type)
 79 |                     pts = self.canvas_size - np.array(new_line.coords[:])[:, ::-1]
 80 |                     map_points.append(pts.tolist())
 81 |         map_masks_ret = []
 82 |         for i in range(len(self.thickness)):
 83 |             map_masks_ret.append(np.flip(np.rot90(map_masks[i][None], k=1, axes=(1, 2)), axis=2)[0])
 84 |         map_masks_ret = np.array(map_masks_ret)
 85 |         return map_masks_ret, map_points, ctr_points, idx
 86 | 
 87 |     @staticmethod
 88 |     def mask_for_lines(lines, mask, thickness, idx, trans_type='index'):
 89 |         coords = np.asarray(list(lines.coords), np.int32)
 90 |         coords = coords.reshape((-1, 2))
 91 |         if len(coords) < 2:
 92 |             return mask, idx
 93 |         for i, t in enumerate(thickness):
 94 |             if trans_type == 'index':
 95 |                 cv2.polylines(mask[i], [coords], False, color=idx, thickness=t)
 96 |                 idx += 1
 97 |         return mask, idx
 98 | 
 99 |     def get_patch_coord(self):
100 |         patch_x, patch_y, patch_h, patch_w = self.patch_box
101 |         x_min = patch_x - patch_w / 2.0
102 |         y_min = patch_y - patch_h / 2.0
103 |         x_max = patch_x + patch_w / 2.0
104 |         y_max = patch_y + patch_h / 2.0
105 |         patch = box(x_min, y_min, x_max, y_max)
106 |         patch = affinity.rotate(patch, self.patch_angle, origin=(patch_x, patch_y), use_radians=False)
107 |         return patch
108 | 


--------------------------------------------------------------------------------
/tools/anno_converter/nuscenes/vectorize.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from shapely import affinity, ops
  3 | from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
  4 | from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer
  5 | from shapely.geometry import LineString, box, MultiPolygon, MultiLineString
  6 | 
  7 | 
  8 | class VectorizedLocalMap(object):
  9 |     def __init__(self,
 10 |                  data_root,
 11 |                  patch_size,
 12 |                  canvas_size,
 13 |                  line_classes=('road_divider', 'lane_divider'),
 14 |                  ped_crossing_classes=('ped_crossing', ),
 15 |                  contour_classes=('road_segment', 'lane'),
 16 |                  sample_dist=1,
 17 |                  num_samples=250,
 18 |                  padding=False,
 19 |                  normalize=False,
 20 |                  fixed_num=-1,
 21 |                  class2label={
 22 |                      'road_divider': 0,
 23 |                      'lane_divider': 0,
 24 |                      'ped_crossing': 1,
 25 |                      'contours': 2,
 26 |                      'others': -1,
 27 |                  }):
 28 |         super().__init__()
 29 |         self.data_root = data_root
 30 |         self.MAPS = ['boston-seaport', 'singapore-hollandvillage', 'singapore-onenorth', 'singapore-queenstown']
 31 |         self.line_classes = line_classes
 32 |         self.ped_crossing_classes = ped_crossing_classes
 33 |         self.polygon_classes = contour_classes
 34 |         self.class2label = class2label
 35 |         self.nusc_maps = {}
 36 |         self.map_explorer = {}
 37 |         for loc in self.MAPS:
 38 |             self.nusc_maps[loc] = NuScenesMap(dataroot=self.data_root, map_name=loc)
 39 |             self.map_explorer[loc] = NuScenesMapExplorer(self.nusc_maps[loc])
 40 |         self.patch_size = patch_size
 41 |         self.canvas_size = canvas_size
 42 |         self.sample_dist = sample_dist
 43 |         self.num_samples = num_samples
 44 |         self.padding = padding
 45 |         self.normalize = normalize
 46 |         self.fixed_num = fixed_num
 47 | 
 48 |     def gen_vectorized_samples(self, location, ego2global_translation, ego2global_rotation):
 49 |         map_pose = ego2global_translation[:2]       # T
 50 |         rotation = Quaternion(ego2global_rotation)  # R
 51 | 
 52 |         patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])
 53 |         patch_angle = quaternion_yaw(rotation) / np.pi * 180
 54 | 
 55 |         line_geom = self.get_map_geom(patch_box, patch_angle, self.line_classes, location)
 56 |         line_vector_dict = self.line_geoms_to_vectors(line_geom)
 57 | 
 58 |         ped_geom = self.get_map_geom(patch_box, patch_angle, self.ped_crossing_classes, location)
 59 |         ped_vector_list = self.line_geoms_to_vectors(ped_geom)['ped_crossing']
 60 | 
 61 |         polygon_geom = self.get_map_geom(patch_box, patch_angle, self.polygon_classes, location)
 62 |         poly_bound_list = self.poly_geoms_to_vectors(polygon_geom)
 63 | 
 64 |         vectors = []
 65 |         for line_type, vects in line_vector_dict.items():
 66 |             for line, length in vects:
 67 |                 vectors.append((line.astype(float), length, self.class2label.get(line_type, -1)))
 68 | 
 69 |         for ped_line, length in ped_vector_list:
 70 |             vectors.append((ped_line.astype(float), length, self.class2label.get('ped_crossing', -1)))
 71 | 
 72 |         for contour, length in poly_bound_list:
 73 |             vectors.append((contour.astype(float), length, self.class2label.get('contours', -1)))
 74 | 
 75 |         # filter out -1
 76 |         filtered_vectors = []
 77 |         for pts, pts_num, _type in vectors:
 78 |             if _type != -1:
 79 |                 filtered_vectors.append({'pts': pts, 'pts_num': pts_num, 'type': _type})
 80 | 
 81 |         return filtered_vectors
 82 | 
 83 |     def get_map_geom(self, patch_box, patch_angle, layer_names, location):
 84 |         map_geom = []
 85 |         for layer_name in layer_names:
 86 |             if layer_name in self.line_classes:
 87 |                 map_explorer_x = self.map_explorer[location]
 88 |                 geoms = map_explorer_x._get_layer_line(patch_box, patch_angle, layer_name)
 89 |             elif layer_name in self.polygon_classes:
 90 |                 map_explorer_x = self.map_explorer[location]
 91 |                 geoms = map_explorer_x._get_layer_polygon(patch_box, patch_angle, layer_name)
 92 |             elif layer_name in self.ped_crossing_classes:
 93 |                 geoms = self.get_ped_crossing_line(patch_box, patch_angle, location)
 94 |             else:
 95 |                 raise NotImplementedError
 96 |             map_geom.append((layer_name, geoms))
 97 |         return map_geom
 98 | 
 99 |     def _one_type_line_geom_to_vectors(self, line_geom):
100 |         line_vectors = []
101 |         for line in line_geom:
102 |             if not line.is_empty:
103 |                 if line.geom_type == 'MultiLineString':
104 |                     for l in line.geoms:
105 |                         line_vectors.append(self.sample_pts_from_line(l))
106 |                 elif line.geom_type == 'LineString':
107 |                     line_vectors.append(self.sample_pts_from_line(line))
108 |                 else:
109 |                     raise NotImplementedError
110 |         return line_vectors
111 | 
112 |     def poly_geoms_to_vectors(self, polygon_geom):
113 |         roads = polygon_geom[0][1]
114 |         lanes = polygon_geom[1][1]
115 |         union_roads = ops.unary_union(roads)
116 |         union_lanes = ops.unary_union(lanes)
117 |         union_segments = ops.unary_union([union_roads, union_lanes])
118 |         max_x = self.patch_size[1] / 2
119 |         max_y = self.patch_size[0] / 2
120 |         local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
121 |         exteriors = []
122 |         interiors = []
123 |         if union_segments.geom_type != 'MultiPolygon':
124 |             union_segments = MultiPolygon([union_segments])
125 |         for poly in union_segments.geoms:
126 |             exteriors.append(poly.exterior)
127 |             for inter in poly.interiors:
128 |                 interiors.append(inter)
129 | 
130 |         results = []
131 |         for ext in exteriors:
132 |             if ext.is_ccw:
133 |                 ext.coords = list(ext.coords)[::-1]
134 |             lines = ext.intersection(local_patch)
135 |             if isinstance(lines, MultiLineString):
136 |                 lines = ops.linemerge(lines)
137 |             results.append(lines)
138 | 
139 |         for inter in interiors:
140 |             if not inter.is_ccw:
141 |                 inter.coords = list(inter.coords)[::-1]
142 |             lines = inter.intersection(local_patch)
143 |             if isinstance(lines, MultiLineString):
144 |                 lines = ops.linemerge(lines)
145 |             results.append(lines)
146 | 
147 |         return self._one_type_line_geom_to_vectors(results)
148 | 
149 |     def line_geoms_to_vectors(self, line_geom):
150 |         line_vectors_dict = dict()
151 |         for line_type, a_type_of_lines in line_geom:
152 |             one_type_vectors = self._one_type_line_geom_to_vectors(a_type_of_lines)
153 |             line_vectors_dict[line_type] = one_type_vectors
154 | 
155 |         return line_vectors_dict
156 | 
157 |     def ped_geoms_to_vectors(self, ped_geom):
158 |         ped_geom = ped_geom[0][1]
159 |         union_ped = ops.unary_union(ped_geom)
160 |         if union_ped.geom_type != 'MultiPolygon':
161 |             union_ped = MultiPolygon([union_ped])
162 | 
163 |         max_x = self.patch_size[1] / 2
164 |         max_y = self.patch_size[0] / 2
165 |         local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
166 |         results = []
167 |         for ped_poly in union_ped:
168 |             # rect = ped_poly.minimum_rotated_rectangle
169 |             ext = ped_poly.exterior
170 |             if not ext.is_ccw:
171 |                 ext.coords = list(ext.coords)[::-1]
172 |             lines = ext.intersection(local_patch)
173 |             results.append(lines)
174 | 
175 |         return self._one_type_line_geom_to_vectors(results)
176 | 
177 |     def get_ped_crossing_line(self, patch_box, patch_angle, location):
178 |         def add_line(poly_xy, idx, patch, patch_angle, patch_x, patch_y, line_list):
179 |             points = [(p0, p1) for p0, p1 in zip(poly_xy[0, idx:idx + 2], poly_xy[1, idx:idx + 2])]
180 |             line = LineString(points)
181 |             line = line.intersection(patch)
182 |             if not line.is_empty:
183 |                 line = affinity.rotate(line, -patch_angle, origin=(patch_x, patch_y), use_radians=False)
184 |                 line = affinity.affine_transform(line, [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])
185 |                 line_list.append(line)
186 | 
187 |         patch_x = patch_box[0]
188 |         patch_y = patch_box[1]
189 | 
190 |         patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
191 |         line_list = []
192 |         records = getattr(self.nusc_maps[location], 'ped_crossing')
193 |         for record in records:
194 |             polygon = self.map_explorer[location].extract_polygon(record['polygon_token'])
195 |             poly_xy = np.array(polygon.exterior.xy)
196 |             dist = np.square(poly_xy[:, 1:] - poly_xy[:, :-1]).sum(0)
197 |             x1, x2 = np.argsort(dist)[-2:]
198 | 
199 |             add_line(poly_xy, x1, patch, patch_angle, patch_x, patch_y, line_list)
200 |             add_line(poly_xy, x2, patch, patch_angle, patch_x, patch_y, line_list)
201 | 
202 |         return line_list
203 | 
204 |     def sample_pts_from_line(self, line):
205 |         if self.fixed_num < 0:
206 |             distances = np.arange(0, line.length, self.sample_dist)
207 |             sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
208 |         else:
209 |             # fixed number of points, so distance is line.length / self.fixed_num
210 |             distances = np.linspace(0, line.length, self.fixed_num)
211 |             sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
212 | 
213 |         if self.normalize:
214 |             sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])
215 | 
216 |         num_valid = len(sampled_points)
217 | 
218 |         if not self.padding or self.fixed_num > 0:
219 |             # fixed num sample can return now!
220 |             return sampled_points, num_valid
221 | 
222 |         # fixed distance sampling need padding!
223 |         num_valid = len(sampled_points)
224 | 
225 |         if self.fixed_num < 0:
226 |             if num_valid < self.num_samples:
227 |                 padding = np.zeros((self.num_samples - len(sampled_points), 2))
228 |                 sampled_points = np.concatenate([sampled_points, padding], axis=0)
229 |             else:
230 |                 sampled_points = sampled_points[:self.num_samples, :]
231 |                 num_valid = self.num_samples
232 | 
233 |             if self.normalize:
234 |                 sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])
235 |                 num_valid = len(sampled_points)
236 | 
237 |         return sampled_points, num_valid
238 | 


--------------------------------------------------------------------------------
/tools/evaluation/ap.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from cd import chamfer_distance
  3 | 
  4 | 
  5 | def get_line_instances_from_mask(mask, scale_x, scale_y):
  6 |     # mask: H, W
  7 |     # instance: {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)}
  8 |     indices = torch.unique(mask)
  9 |     instances = {}
 10 |     for idx in indices:
 11 |         idx = idx.item()
 12 |         if idx == 0:
 13 |             continue
 14 |         pc_x, pc_y = torch.where(mask == idx)
 15 |         pc_x = pc_x.float() * scale_x
 16 |         pc_y = pc_y.float() * scale_y
 17 |         coords = torch.stack([pc_x, pc_y], -1)
 18 |         instances[idx] = coords
 19 |     return instances
 20 | 
 21 | 
 22 | def line_matching_by_CD(inst_pred_lines, inst_pred_confidence, inst_label_lines, threshold):
 23 |     # inst_pred_line: a list of points {1: (M1, 2), 2: (M2, 2), ..., k2: (M_k2, 2)}
 24 |     # inst_pred_confidence: a list of confidence [c1, c2, ..., ck2]
 25 |     # inst_label_line: a list of points {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)}
 26 |     # return: a list of {'pred': (M, 2), 'label': (N, 2), 'confidence': scalar}
 27 |     pred_num = len(inst_pred_lines)
 28 |     label_num = len(inst_label_lines)
 29 |     CD = torch.zeros((pred_num, label_num)).cuda()
 30 | 
 31 |     inst_pred_lines_keys = [*inst_pred_lines]
 32 |     inst_label_lines_keys = [*inst_label_lines]
 33 |     for i, key_pred in enumerate(inst_pred_lines_keys):
 34 |         for j, key_label in enumerate(inst_label_lines_keys):
 35 |             CD[i, j] = chamfer_distance(
 36 |                 inst_pred_lines[key_pred][None],
 37 |                 inst_label_lines[key_label][None],
 38 |                 bidirectional=True,
 39 |                 threshold=threshold,
 40 |             )
 41 | 
 42 |     pred_taken = torch.zeros(pred_num, dtype=torch.bool).cuda()
 43 |     label_taken = torch.zeros(label_num, dtype=torch.bool).cuda()
 44 |     matched_list = []
 45 |     if pred_num > 0 and label_num > 0:
 46 |         while True:
 47 |             idx = torch.argmin(CD)
 48 |             i = torch.div(idx, CD.shape[1], rounding_mode="floor")
 49 |             j = (idx % CD.shape[1]).item()
 50 |             # i, j = (idx // CD.shape[1]).item(), (idx % CD.shape[1]).item()
 51 |             if CD[i, j] >= threshold:
 52 |                 break
 53 |             matched_list.append(
 54 |                 {
 55 |                     "pred": inst_pred_lines[inst_pred_lines_keys[i]],
 56 |                     "confidence": inst_pred_confidence[inst_pred_lines_keys[i]],
 57 |                     "label": inst_label_lines[inst_label_lines_keys[j]],
 58 |                     "CD": CD[i, j].item(),
 59 |                 }
 60 |             )
 61 |             pred_taken[i] = True
 62 |             label_taken[j] = True
 63 |             CD[i, :] = threshold
 64 |             CD[:, j] = threshold
 65 | 
 66 |     for i in range(pred_num):
 67 |         if not pred_taken[i]:
 68 |             matched_list.append(
 69 |                 {
 70 |                     "pred": inst_pred_lines[inst_pred_lines_keys[i]],
 71 |                     "confidence": inst_pred_confidence[inst_pred_lines_keys[i]],
 72 |                     "label": None,
 73 |                     "CD": threshold,
 74 |                 }
 75 |             )
 76 | 
 77 |     for j in range(label_num):
 78 |         if not label_taken[j]:
 79 |             matched_list.append(
 80 |                 {
 81 |                     "pred": None,
 82 |                     "confidence": 0,
 83 |                     "label": inst_label_lines[inst_label_lines_keys[j]],
 84 |                     "CD": threshold,
 85 |                 }
 86 |             )
 87 | 
 88 |     return matched_list
 89 | 
 90 | 
 91 | def instance_mask_ap(
 92 |     AP_matrix,
 93 |     AP_count_matrix,
 94 |     inst_pred_mask,
 95 |     inst_label_mask,
 96 |     scale_x,
 97 |     scale_y,
 98 |     confidence,
 99 |     thresholds,
100 |     sampled_recalls,
101 | ):
102 |     # inst_pred: N, C, H, W
103 |     # inst_label: N, C, H, W
104 |     # confidence: N, max_instance_num
105 |     N, C, H, W = inst_label_mask.shape
106 |     for n in range(N):
107 |         for c in range(C):
108 |             inst_pred_lines = get_line_instances_from_mask(inst_pred_mask[n, c], scale_x, scale_y)
109 |             inst_label_lines = get_line_instances_from_mask(inst_label_mask[n, c], scale_x, scale_y)
110 |             if len(inst_pred_lines) == 0 and len(inst_label_lines) == 0:
111 |                 continue
112 |             AP_matrix[c] += single_instance_line_AP(
113 |                 inst_pred_lines, confidence[n], inst_label_lines, thresholds, sampled_recalls=sampled_recalls
114 |             )
115 |             AP_count_matrix[c] += 1
116 |     return AP_matrix, AP_count_matrix
117 | 
118 | 
119 | def single_instance_line_AP(inst_pred_lines, inst_pred_confidence, inst_label_lines, thresholds, sampled_recalls):
120 |     # inst_pred_line: a list of points {1: (M1, 2), 2: (M2, 2), ..., k2: (M_k2, 2)}
121 |     # inst_pred_confidence: a list of confidence [c1, c2, ..., ck2]
122 |     # inst_label_line: a list of points {1: (N1, 2), 2: (N2, 2), ..., k1: (N_k1, 2)}
123 |     # thresholds: threshold of chamfer distance to identify TP
124 |     num_thres = len(thresholds)
125 |     AP_thres = torch.zeros(num_thres).cuda()
126 |     for t in range(num_thres):
127 |         matching_list = line_matching_by_CD(inst_pred_lines, inst_pred_confidence, inst_label_lines, thresholds[t])
128 |         precision, recall = get_precision_recall_curve_by_confidence(
129 |             matching_list, len(inst_label_lines), thresholds[t]
130 |         )
131 |         precision, recall = smooth_PR_curve(precision, recall)
132 |         AP = calc_AP_from_precision_recall(precision, recall, sampled_recalls)
133 |         AP_thres[t] = AP
134 |     return AP_thres
135 | 
136 | 
137 | def get_precision_recall_curve_by_confidence(matching_list, num_gt, threshold):
138 |     matching_list = sorted(matching_list, key=lambda x: x["confidence"])
139 | 
140 |     TP = [0]
141 |     FP = [0]
142 |     for match_item in matching_list:
143 |         pred = match_item["pred"]
144 |         label = match_item["label"]
145 |         dist = match_item["CD"]
146 | 
147 |         if pred is None:
148 |             continue
149 | 
150 |         if label is None:
151 |             TP.append(TP[-1])
152 |             FP.append(FP[-1] + 1)
153 |             continue
154 | 
155 |         if dist < threshold:
156 |             TP.append(TP[-1] + 1)
157 |             FP.append(FP[-1])
158 |         else:
159 |             TP.append(TP[-1])
160 |             FP.append(FP[-1] + 1)
161 | 
162 |     TP = torch.tensor(TP[1:]).cuda()
163 |     FP = torch.tensor(FP[1:]).cuda()
164 | 
165 |     precision = TP / (TP + FP)
166 |     recall = TP / num_gt
167 |     return precision, recall
168 | 
169 | 
170 | def smooth_PR_curve(precision, recall):
171 |     idx = torch.argsort(recall)
172 |     recall = recall[idx]
173 |     precision = precision[idx]
174 |     length = len(precision)
175 |     for i in range(length - 1, 0, -1):
176 |         precision[:i][precision[:i] < precision[i]] = precision[i]
177 |     return precision, recall
178 | 
179 | 
180 | def calc_AP_from_precision_recall(precision, recall, sampled_recalls):
181 |     acc_precision = 0.0
182 |     total = len(sampled_recalls)
183 |     for r in sampled_recalls:
184 |         idx = torch.where(recall >= r)[0]
185 |         if len(idx) == 0:
186 |             continue
187 |         idx = idx[0]
188 |         acc_precision += precision[idx]
189 |     return acc_precision / total
190 | 


--------------------------------------------------------------------------------
/tools/evaluation/cd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def chamfer_distance(source_pc, target_pc, threshold, cum=False, bidirectional=True):
 4 |     torch.backends.cuda.matmul.allow_tf32 = False
 5 |     torch.backends.cudnn.allow_tf32 = False
 6 |     # dist = torch.cdist(source_pc.float(), target_pc.float())
 7 |     # dist = torch.cdist(source_pc.float(), target_pc.float(), compute_mode='donot_use_mm_for_euclid_dist')
 8 |     dist = torch.cdist(source_pc.type(torch.float64), target_pc.type(torch.float64))
 9 |     dist1, _ = torch.min(dist, 2)
10 |     dist2, _ = torch.min(dist, 1)
11 |     if cum:
12 |         len1 = dist1.shape[-1]
13 |         len2 = dist2.shape[-1]
14 |         dist1 = dist1.sum(-1)
15 |         dist2 = dist2.sum(-1)
16 |         return dist1, dist2, len1, len2
17 |     dist1 = dist1.mean(-1)
18 |     dist2 = dist2.mean(-1)
19 |     if bidirectional:
20 |         return min((dist1 + dist2) / 2, threshold)
21 |     else:
22 |         return min(dist1, threshold), min(dist2, threshold)
23 | 
24 | 
25 | def semantic_mask_chamfer_dist_cum(seg_pred, seg_label, scale_x, scale_y, threshold):
26 |     # seg_label: N, C, H, W
27 |     # seg_pred: N, C, H, W
28 |     N, C, H, W = seg_label.shape
29 | 
30 |     cum_CD1 = torch.zeros(C, device=seg_label.device)
31 |     cum_CD2 = torch.zeros(C, device=seg_label.device)
32 |     cum_num1 = torch.zeros(C, device=seg_label.device)
33 |     cum_num2 = torch.zeros(C, device=seg_label.device)
34 |     for n in range(N):
35 |         for c in range(C):
36 |             pred_pc_x, pred_pc_y = torch.where(seg_pred[n, c] != 0)
37 |             label_pc_x, label_pc_y = torch.where(seg_label[n, c] != 0)
38 |             pred_pc_x = pred_pc_x.float() * scale_x
39 |             pred_pc_y = pred_pc_y.float() * scale_y
40 |             label_pc_x = label_pc_x.float() * scale_x
41 |             label_pc_y = label_pc_y.float() * scale_y
42 |             if len(pred_pc_x) == 0 and len(label_pc_x) == 0:
43 |                 continue
44 | 
45 |             if len(label_pc_x) == 0:
46 |                 cum_CD1[c] += len(pred_pc_x) * threshold
47 |                 cum_num1[c] += len(pred_pc_x)
48 |                 continue
49 | 
50 |             if len(pred_pc_x) == 0:
51 |                 cum_CD2[c] += len(label_pc_x) * threshold
52 |                 cum_num2[c] += len(label_pc_x)
53 |                 continue
54 | 
55 |             pred_pc_coords = torch.stack([pred_pc_x, pred_pc_y], -1).float()
56 |             label_pc_coords = torch.stack([label_pc_x, label_pc_y], -1).float()
57 |             CD1, CD2, len1, len2 = chamfer_distance(
58 |                 pred_pc_coords[None], label_pc_coords[None], threshold=threshold, cum=True
59 |             )
60 |             cum_CD1[c] += CD1.item()
61 |             cum_CD2[c] += CD2.item()
62 |             cum_num1[c] += len1
63 |             cum_num2[c] += len2
64 |     return cum_CD1, cum_CD2, cum_num1, cum_num2
65 | 


--------------------------------------------------------------------------------
/tools/evaluation/eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import torch
 4 | import numpy as np
 5 | import pickle as pkl
 6 | from tqdm import tqdm
 7 | from tabulate import tabulate
 8 | from torch.utils.data import Dataset, DataLoader
 9 | from ap import instance_mask_ap as get_batch_ap
10 | 
11 | 
12 | class BeMapNetResultForNuScenes(Dataset):
13 |     def __init__(self, gt_dir, dt_dir, val_txt):
14 |         self.gt_dir, self.dt_dir = gt_dir, dt_dir
15 |         self.tokens = [fname.strip().split('.')[0] for fname in open(val_txt).readlines()]        
16 |         self.max_line_count = 100
17 | 
18 |     def __getitem__(self, idx):
19 |         token = self.tokens[idx]
20 |         gt_path = os.path.join(self.gt_dir, f"{token}.npz")
21 |         gt_masks = np.load(open(gt_path, "rb"), allow_pickle=True)["instance_mask"]
22 |         dt_item = np.load(os.path.join(self.dt_dir, f"{token}.npz"), allow_pickle=True)
23 |         dt_masks = dt_item["dt_mask"]
24 |         dt_scores = dt_item['dt_res'].item()["confidence_level"]
25 |         dt_scores = np.array(list(dt_scores) + [-1] * (self.max_line_count - len(dt_scores)))
26 |         return torch.from_numpy(dt_masks), torch.from_numpy(dt_scores).float(), torch.from_numpy(gt_masks)
27 | 
28 |     def __len__(self):
29 |         return len(self.tokens)
30 | 
31 | 
32 | class BeMapNetEvaluatorForNuScenes(object):
33 |     def __init__(self, gt_dir, dt_dir, val_txt, batch_size=4, num_classes=3, map_resolution=(0.15, 0.15)):
34 | 
35 |         self.THRESHOLDS = [0.2, 0.5, 1.0, 1.5]
36 |         self.CLASS_NAMES = ["Divider", "PedCross", "Contour"]
37 |         self.SAMPLED_RECALLS = torch.linspace(0.1, 1, 10).cuda()
38 |         self.res_dataloader = DataLoader(
39 |             BeMapNetResultForNuScenes(gt_dir, dt_dir, val_txt), 
40 |             batch_size=batch_size, shuffle=False, drop_last=False, num_workers=8
41 |         )
42 |         self.map_resolution = map_resolution
43 |         self.ap_matrix = torch.zeros((num_classes, len(self.THRESHOLDS))).cuda()
44 |         self.ap_count_matrix = torch.zeros((num_classes, len(self.THRESHOLDS))).cuda()
45 | 
46 |     def execute(self):
47 | 
48 |         for dt_masks, dt_scores, gt_masks in tqdm(self.res_dataloader):
49 |             self.ap_matrix, self.ap_count_matrix = get_batch_ap(
50 |                 self.ap_matrix,
51 |                 self.ap_count_matrix,
52 |                 dt_masks.cuda(),
53 |                 gt_masks.cuda(),
54 |                 *self.map_resolution,
55 |                 dt_scores.cuda(),
56 |                 self.THRESHOLDS,
57 |                 self.SAMPLED_RECALLS,
58 |             )
59 |         ap = (self.ap_matrix / self.ap_count_matrix).cpu().data.numpy()
60 |         self._format_print(ap)
61 | 
62 |     def _format_print(self, ap):
63 |         res_matrix = []
64 |         table_header = ["Class", "AP@.2", "AP@.5", "AP@1.", "AP@1.5", "mAP@HARD", "mAP@EASY"]
65 |         table_values = []
66 |         for i, cls_name in enumerate(self.CLASS_NAMES):
67 |             res_matrix_line = [ap[i][0], ap[i][1], ap[i][2], ap[i][3], np.mean(ap[i][:-1]), np.mean(ap[i][1:])]
68 |             res_matrix.append(res_matrix_line)
69 |             table_values.append([cls_name] + self.line_data_to_str(*res_matrix_line))
70 |         avg = np.mean(np.array(res_matrix), axis=0)
71 |         table_values.append(["Average", *self.line_data_to_str(*avg)])
72 |         table_str = tabulate(table_values, headers=table_header, tablefmt="grid")
73 |         print(table_str)
74 |         return table_str
75 | 
76 |     @staticmethod
77 |     def line_data_to_str(ap0, ap1, ap2, ap3, map1, map2):
78 |         return [
79 |             "{:.1f}".format(ap0 * 100),
80 |             "{:.1f}".format(ap1 * 100),
81 |             "{:.1f}".format(ap2 * 100),
82 |             "{:.1f}".format(ap3 * 100),
83 |             "{:.1f}".format(map1 * 100),
84 |             "{:.1f}".format(map2 * 100),
85 |         ]
86 | 
87 | 
88 | evaluator = BeMapNetEvaluatorForNuScenes(
89 |     gt_dir=sys.argv[1],
90 |     dt_dir=sys.argv[2],
91 |     val_txt=sys.argv[3],
92 |     batch_size=4,
93 |     num_classes=3,
94 |     map_resolution=(0.15, 0.15),
95 | )
96 | 
97 | evaluator.execute()
98 | 


--------------------------------------------------------------------------------