├── LICENSE
├── README.md
├── configs
    ├── bisenetv2.yml
    ├── bisenetv2_b32.yml
    ├── ddrnet.yml
    ├── deeplabv3p.yml
    ├── hrnet_w18_s.yml
    ├── hrsegnetb16.yml
    ├── hrsegnetb32.yml
    ├── hrsegnetb48.yml
    ├── hrsegnetb64_bs16.yml
    ├── ocrnet_hrnetw18.yml
    ├── ocrnet_hrsegb64_bs16.yml
    ├── pspnet.yml
    ├── rucnet_crackseg9k.yml
    ├── stdcseg.yml
    ├── u2cracknet_crackseg9k.yml
    ├── unet.yml
    └── unet_focalloss_adam_crackseg9k.yml
├── fig
    ├── fig1.png
    ├── fig5.png
    └── fig8.png
└── models
    ├── bisenetv2.py
    ├── ddrnet.py
    ├── deeplabv3p.py
    ├── hrnet_w18_s.py
    ├── hrsegb64.py
    ├── hrsegnet_b16.py
    ├── hrsegnet_b16_d4.py
    ├── hrsegnet_b16_d5.py
    ├── hrsegnet_b32.py
    ├── hrsegnet_b48.py
    ├── hrsegnet_b64.py
    ├── ocrnet.py
    ├── pspnet.py
    ├── rucnet.py
    ├── stdcseg.py
    ├── u2cracknet.py
    └── unet.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HrSegNet4CrackSegmentation
  2 | Real-time High-Resolution Neural Network with Semantic Guidance for Crack Segmentation
  3 | 
  4 | # Abstract
  5 | The current trend in crack detection methods is leaning towards the use of machine learning or deep learning. This is because deep learning-based methods can autonomously extract features from images, thereby avoiding the low stability caused by manually designed operators. However, there are still some problems with the current deep learning-based crack segmentation algorithms. Firstly, the vast majority of research is based on the modification and improvement of commonly used scene segmentation algorithms, with no specifically designed for crack segmentation tasks. Secondly, crack detection is increasingly reliant on edge devices, such as drones and vehicle-mounted cameras. Therefore, the model must be lightweight to achieve real-time segmentation efficiency. However, there is currently limited research in this area. We propose a high-resolution neural network with semantic guidance for real-time crack segmentation, named HrSegNet.
  6 | 
  7 | # Update 
  8 | 2023-08-20
  9 | 
 10 | A comparison of the three models has been added. We implemented these three models, and their files are in `models`. The trained model and logs are in [UNet_focal](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EcxRe1WntKdOtK-_O1pHIvEBwm-b9ohinTU-03JZ_Y4UMw?e=F7pqOM), [U2CrackNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdSbeqvE-KtAtHjZj2-iS30BetzzRl9f2ockiASAgyea8A?e=3TwbYB), [RUCNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EUbnDwGhVOZNuRrhe1Ksf0IBn7xrCjgaMVp5S5ehRJIkFA?e=3FuMHM).
 11 | * Liu, Zhenqing, et al. "Computer vision-based concrete crack detection using U-net fully convolutional networks." Automation in Construction 104 (2019): 129-139.
 12 | * Shi, Pengfei, et al. "U2CrackNet: a deeper architecture with two-level nested U-structure for pavement crack detection." Structural Health Monitoring 22.4 (2023): 2910-2921.
 13 | * Yu, Gui, et al. "RUC-Net: A Residual-Unet-Based Convolutional Neural Network for Pixel-Level Pavement Crack Segmentation." Sensors 23.1 (2022): 53.
 14 | 
 15 | 
 16 | 
 17 | 2023-08-12
 18 | 
 19 | Update [Concrete3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdzjOhykuQxDjRgs6k-5PU0BtJntPGtTo445f4lBv5HV4Q?e=MCOv5W). In the original Concrete3k, some of the images and labels did not match and we have updated and uploaded them. The results of the corresponding cross-dataset will also be updated.
 20 |  
 21 | 2023-07-17
 22 | * Add new datasets: [Asphalt3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EVj4M3fxfcFEuUToiO1QODEBtUuSPXE5FQONgNYti7PDFQ?e=IwZgXT), [Concrete3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdzjOhykuQxDjRgs6k-5PU0BtJntPGtTo445f4lBv5HV4Q?e=MCOv5W). Asphalt3k sourced from [Yang](https://www.mdpi.com/2076-3417/12/19/10089), and Concrete3k sourced from [Wang](https://www.sciencedirect.com/science/article/pii/S0926580522001480).
 23 | * Add weight files pre-trained on CrackSeg9k，along with their corresponding training logs.
 24 |   
 25 | 2023-07-02
 26 | 
 27 | We are conducting more comparative experiments while using a new pavement dataset that is being manually annotated at the expert level. The results and data will be published soon. We will release the trained model parameters so that you can quickly test them.
 28 | ### Model Architecture  
 29 | ![Alt text](./fig/fig1.png)
 30 | ### [Seg-Grad-CAM](https://arxiv.org/abs/2002.11434)  
 31 | ![Alt text](./fig/fig5.png)
 32 | ### Comparisons with state-of-the-art
 33 | ![Alt text](./fig/fig8.png)
 34 | 
 35 | 
 36 | # Data
 37 | * [CrackSeg9k](https://github.com/Dhananjay42/crackseg9k) 
 38 | * [Asphalt3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EVj4M3fxfcFEuUToiO1QODEBtUuSPXE5FQONgNYti7PDFQ?e=IwZgXT)
 39 | * [Concrete3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdzjOhykuQxDjRgs6k-5PU0BtJntPGtTo445f4lBv5HV4Q?e=MCOv5W)
 40 | 
 41 | We train the model on a comprehensive dataset (CrackSeg9k) and subsequently transfer to specific downstream scenarios, asphalt (Asphalt3k) and concrete (Concrete3k).
 42 | # Installation
 43 | The code requires python>=3.8, as well as paddle=2.4.1 and paddleseg=2.7.0 and OpenCV= 4.7.0. You can follow the instructions [paddle](https://github.com/PaddlePaddle/Paddle) and [paddleseg](https://github.com/PaddlePaddle/PaddleSeg) to install all the dependencies. If you need to reproduce the results, you have to install paddle with CUDA support.
 44 | 
 45 | # How to use
 46 | Once paddle and paddleseg are installed, you can use our published models very easily.  
 47 | 
 48 | We start by describing the contents of each directory. The directory `models` defines the high-resolution crack segmentation model we designed, the three model files are almost identical except for the parameter `base`. The model files we are comparing are also included. The directory `configs` is the configuration files for all models, i.e. the details of all training and testing parameters.  
 49 | 
 50 | The easiest way to use our models is to use [paddleseg](https://github.com/PaddlePaddle/PaddleSeg). One can put the files of the desired models into the models directory of paddleseg, registering the model using `@manager.MODELS.add_component`. For training the model use the configuration files in the `configs` we provide. 
 51 | 
 52 | All data are public available.
 53 | 
 54 | 
 55 | # Trained models
 56 | **On CrackSeg9k**
 57 | * [HrSegNet-B16](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EZWMNQXFtTpPl-SnUyoKpS0B2EDCDZIn2SX00C0AI_U-Jg?e=o0gqxN)
 58 | * [HrSegNet-B32](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EVaZjUC9tVNMoMkbNOdmemEBh6xPEBUzo2-0ddjGl3bfRQ?e=MWs6Z9)
 59 | * [HrSegNet-B48](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdoG_do5oFdPmP6NDqWh8AEBh1CfTl6SxD6DX_smxl9WFA?e=WAr0Fi)
 60 | * [HrSegNet-B64(bs=16)](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/ETzpUJ9FkN1CoTOO1PB1-68BNYNdqtB0gowlkjzuNJCtQw?e=rCkTGO)
 61 | * [HRNet-W18(bs=16)](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EQcoB7KEbMZHidBi2JchS78BoeI35zALH0m6w3727u7HGA?e=nNDb39)
 62 | 
 63 | 
 64 | # Model (TensorRT engine)
 65 | We expose all our models as TensorRT, including SOTA for comparison in all experiments. Note that all inputs to the TensorRT engine are **1 × 3 × 400 × 400**. We use TensorRT 8.6.1.
 66 | | Model |
 67 | | --- |
 68 | | [U-Net](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EYoEi_aQczxOswVyAi8FQBgBYSYXalI8oZKRszWHgbzZwg?e=XuFGzf) |
 69 | | [DDRNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EX-QSVExyFVLvasiouuvEwEBe4HPdK3N8HxklK5CAn07DQ?e=DfdBZz) |
 70 | | [DeeplabV3+](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/ETkJ1rMqaqBGrfWNg5KCF0EBIxCfYlFk3t0IRD2Uk2cQcA?e=ISPLG0) |
 71 | | [OCRNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/Ed0l6UAckEFGodrNz1W7aHgBOmoVN6-yZfNIKMTJOp4Fug?e=7u8ZOD) |
 72 | | [STDCSeg](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EV1Rra3XuP5GqImDWMeYdbEBSt64lrmWnAQETKJe0NTO5Q?e=LN0VxD) |
 73 | | [BiSeNetV2](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EfovCQdm_5FJoaySbnd2SBsB2becRV7KTQa7A9_oL7lkHA?e=TI8gZJ) |
 74 | | [PSPNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/ERTJdaWfJ-9Ess81IwvnBE4Ba0pVnGgyqyZoHFC5hEe1pQ?e=ZzB5Xa) |
 75 | | [HrSegNet-B16](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EYq7OVwYeRtJm0PtXmytSmoB-Ywu8PsC-9eS95V0M7GSpQ?e=1GgLOt) |
 76 | | [HrSegNet-B32](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EURuJVQAW25GnJBvdwW76pgBZdZqyWwT_vifP7Ta98O8_w?e=kKZVLb) |
 77 | | [HrSegNet-B48](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EcUUFXq9dbJHmAz1roiZCMUB3zeM49ILOwzFzHe0iAYS8w?e=SAGci7) |
 78 | 
 79 | 
 80 | # Citation
 81 | If you find this project helpful for your research, please consider citing the following BibTeX entry.
 82 | ```bibtex
 83 | @article{li2023real,
 84 |   title={Real-time high-resolution neural network with semantic guidance for crack segmentation},
 85 |   author={Li, Yongshang and Ma, Ronggui and Liu, Han and Cheng, Gaoli},
 86 |   journal={Automation in Construction},
 87 |   volume={156},
 88 |   pages={105112},
 89 |   year={2023},
 90 |   publisher={Elsevier}
 91 | }
 92 | 
 93 | 
 94 | ```
 95 | 
 96 | # Star History
 97 | 
 98 | [![Star History Chart](https://api.star-history.com/svg?repos=CHDyshli/HrSegNet4CrackSegmentation&type=Date)](https://star-history.com/#CHDyshli/HrSegNet4CrackSegmentation&Date)
 99 | 
100 | 


--------------------------------------------------------------------------------
/configs/bisenetv2.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | iters: 100000
 3 | 
 4 | 
 5 | train_dataset:
 6 |   type: Dataset
 7 |   dataset_root: data/crackseg9k
 8 |   train_path: data/crackseg9k/train.txt
 9 |   num_classes: 2
10 |   mode: train
11 |   transforms:
12 |     - type: ResizeStepScaling
13 |       min_scale_factor: 0.5
14 |       max_scale_factor: 2.0
15 |       scale_step_size: 0.25
16 |     - type: RandomPaddingCrop
17 |       crop_size: [400, 400]
18 |     - type: RandomHorizontalFlip
19 |     - type: RandomDistort
20 |       brightness_range: 0.5
21 |       contrast_range: 0.5
22 |       saturation_range: 0.5
23 |     - type: Normalize
24 | 
25 | val_dataset:
26 |   type: Dataset
27 |   dataset_root: data/crackseg9k
28 |   val_path: data/crackseg9k/val.txt
29 |   num_classes: 2
30 |   mode: val
31 |   transforms:
32 |     - type: Normalize
33 | 
34 | 
35 | model:
36 |   type: BiSeNetV2
37 |   num_classes: 2
38 | 
39 | optimizer:
40 |   type: sgd
41 |   weight_decay: 0.0005
42 | 
43 | 
44 | loss:
45 |   types:
46 |     - type: CrossEntropyLoss
47 |     - type: CrossEntropyLoss
48 |     - type: CrossEntropyLoss
49 |     - type: CrossEntropyLoss
50 |     - type: CrossEntropyLoss
51 |   coef: [1, 1, 1, 1, 1]
52 | 
53 | lr_scheduler:
54 |   type: PolynomialDecay
55 |   learning_rate: 0.01
56 |   end_lr: 0.0
57 |   power: 0.9


--------------------------------------------------------------------------------
/configs/bisenetv2_b32.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 32
 2 | iters: 100000
 3 | 
 4 | 
 5 | train_dataset:
 6 |   type: Dataset
 7 |   dataset_root: data/crackseg9k
 8 |   train_path: data/crackseg9k/train.txt
 9 |   num_classes: 2
10 |   mode: train
11 |   transforms:
12 |     - type: ResizeStepScaling
13 |       min_scale_factor: 0.5
14 |       max_scale_factor: 2.0
15 |       scale_step_size: 0.25
16 |     - type: RandomPaddingCrop
17 |       crop_size: [400, 400]
18 |     - type: RandomHorizontalFlip
19 |     - type: RandomDistort
20 |       brightness_range: 0.5
21 |       contrast_range: 0.5
22 |       saturation_range: 0.5
23 |     - type: Normalize
24 | 
25 | val_dataset:
26 |   type: Dataset
27 |   dataset_root: data/crackseg9k
28 |   val_path: data/crackseg9k/val.txt
29 |   num_classes: 2
30 |   mode: val
31 |   transforms:
32 |     - type: Normalize
33 | 
34 | 
35 | model:
36 |   type: BiSeNetV2
37 |   num_classes: 2
38 | 
39 | optimizer:
40 |   type: sgd
41 |   weight_decay: 0.0005
42 | 
43 | 
44 | loss:
45 |   types:
46 |     - type: CrossEntropyLoss
47 |     - type: CrossEntropyLoss
48 |     - type: CrossEntropyLoss
49 |     - type: CrossEntropyLoss
50 |     - type: CrossEntropyLoss
51 |   coef: [1, 1, 1, 1, 1]
52 | 
53 | lr_scheduler:
54 |   type: PolynomialDecay
55 |   learning_rate: 0.01
56 |   end_lr: 0.0
57 |   power: 0.9


--------------------------------------------------------------------------------
/configs/ddrnet.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 32
34 | iters: 100000
35 | 
36 | 
37 | model:
38 |   type: DDRNet_23
39 |   enable_auxiliary_loss: False
40 | 
41 | 
42 | loss:
43 |   types:
44 |     - type: OhemCrossEntropyLoss
45 |   coef: [1]
46 | 
47 | 
48 | optimizer:
49 |   type: sgd
50 |   weight_decay: 0.0005
51 | 
52 | 
53 | lr_scheduler:
54 |   type: PolynomialDecay
55 |   learning_rate: 0.01
56 |   end_lr: 0.0
57 |   power: 0.9


--------------------------------------------------------------------------------
/configs/deeplabv3p.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 32
34 | iters: 100000
35 | 
36 | 
37 | model:
38 |   type: DeepLabV3P
39 |   backbone:
40 |     type: ResNet18_vd
41 |     output_stride: 8
42 |     multi_grid: [1, 2, 4]
43 |   num_classes: 2
44 |   backbone_indices: [0, 3]
45 |   aspp_ratios: [1, 12, 24, 36]
46 |   aspp_out_channels: 256
47 |   align_corners: False
48 |   pretrained: null
49 | 
50 | 
51 | loss:
52 |   types:
53 |     - type: CrossEntropyLoss
54 |   coef: [1]
55 | 
56 | 
57 | optimizer:
58 |   type: sgd
59 |   weight_decay: 0.0005
60 | 
61 | 
62 | lr_scheduler:
63 |   type: PolynomialDecay
64 |   learning_rate: 0.01
65 |   end_lr: 0.0
66 |   power: 0.9


--------------------------------------------------------------------------------
/configs/hrnet_w18_s.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | iters: 100000
 3 | 
 4 | train_dataset:
 5 |   type: Dataset
 6 |   dataset_root: data/crackseg9k
 7 |   train_path: data/crackseg9k/train.txt
 8 |   num_classes: 2
 9 |   mode: train
10 |   transforms:
11 |     - type: ResizeStepScaling
12 |       min_scale_factor: 0.5
13 |       max_scale_factor: 2.0
14 |       scale_step_size: 0.25
15 |     - type: RandomPaddingCrop
16 |       crop_size: [400, 400]
17 |     - type: RandomHorizontalFlip
18 |     - type: RandomDistort
19 |       brightness_range: 0.5
20 |       contrast_range: 0.5
21 |       saturation_range: 0.5
22 |     - type: Normalize
23 | 
24 | val_dataset:
25 |   type: Dataset
26 |   dataset_root: data/crackseg9k
27 |   val_path: data/crackseg9k/val.txt
28 |   num_classes: 2
29 |   mode: val
30 |   transforms:
31 |     - type: Normalize
32 | 
33 | 
34 | model:
35 |   type: HRNet_W18_S
36 | 
37 | 
38 | optimizer:
39 |   type: SGD
40 |   momentum: 0.9
41 |   weight_decay: 0.0005
42 |   
43 | 
44 | loss:
45 |   types:
46 |     - type: OhemCrossEntropyLoss
47 |   coef: [1]
48 | 
49 | 
50 | lr_scheduler:
51 |   type: PolynomialDecay
52 |   learning_rate: 0.01
53 |   end_lr: 0.0
54 |   power: 0.9
55 |   warmup_iters: 2000
56 |   warmup_start_lr: 1.0e-5
57 | 
58 | 
59 | 
60 |   
61 | 


--------------------------------------------------------------------------------
/configs/hrsegnetb16.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 32
 2 | iters: 100000
 3 | 
 4 | train_dataset:
 5 |   type: Dataset
 6 |   dataset_root: data/crackseg9k
 7 |   train_path: data/crackseg9k/train.txt
 8 |   num_classes: 2
 9 |   mode: train
10 |   transforms:
11 |     - type: ResizeStepScaling
12 |       min_scale_factor: 0.5
13 |       max_scale_factor: 2.0
14 |       scale_step_size: 0.25
15 |     - type: RandomPaddingCrop
16 |       crop_size: [400, 400]
17 |     - type: RandomHorizontalFlip
18 |     - type: RandomDistort
19 |       brightness_range: 0.5
20 |       contrast_range: 0.5
21 |       saturation_range: 0.5
22 |     - type: Normalize
23 | 
24 | val_dataset:
25 |   type: Dataset
26 |   dataset_root: data/crackseg9k
27 |   val_path: data/crackseg9k/val.txt
28 |   num_classes: 2
29 |   mode: val
30 |   transforms:
31 |     - type: Normalize
32 | 
33 | 
34 | model:
35 |   type: HrSegNetB16
36 | 
37 | 
38 | optimizer:
39 |   type: SGD
40 |   momentum: 0.9
41 |   weight_decay: 0.0005
42 |   
43 | 
44 | loss:
45 |   types:
46 |     - type: OhemCrossEntropyLoss
47 |     - type: OhemCrossEntropyLoss
48 |     - type: OhemCrossEntropyLoss
49 |   coef: [1, 0.5, 0.5]
50 | 
51 | 
52 | lr_scheduler:
53 |   type: PolynomialDecay
54 |   learning_rate: 0.01
55 |   end_lr: 0.0
56 |   power: 0.9
57 |   warmup_iters: 2000
58 |   warmup_start_lr: 1.0e-5
59 | 
60 | 
61 | 
62 |   
63 | 


--------------------------------------------------------------------------------
/configs/hrsegnetb32.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 32
 2 | iters: 100000
 3 | 
 4 | train_dataset:
 5 |   type: Dataset
 6 |   dataset_root: data/crackseg9k
 7 |   train_path: data/crackseg9k/train.txt
 8 |   num_classes: 2
 9 |   mode: train
10 |   transforms:
11 |     - type: ResizeStepScaling
12 |       min_scale_factor: 0.5
13 |       max_scale_factor: 2.0
14 |       scale_step_size: 0.25
15 |     - type: RandomPaddingCrop
16 |       crop_size: [400, 400]
17 |     - type: RandomHorizontalFlip
18 |     - type: RandomDistort
19 |       brightness_range: 0.5
20 |       contrast_range: 0.5
21 |       saturation_range: 0.5
22 |     - type: Normalize
23 | 
24 | val_dataset:
25 |   type: Dataset
26 |   dataset_root: data/crackseg9k
27 |   val_path: data/crackseg9k/val.txt
28 |   num_classes: 2
29 |   mode: val
30 |   transforms:
31 |     - type: Normalize
32 | 
33 | 
34 | model:
35 |   type: HrSegNetB32
36 | 
37 | 
38 | optimizer:
39 |   type: SGD
40 |   momentum: 0.9
41 |   weight_decay: 0.0005
42 |   
43 | 
44 | loss:
45 |   types:
46 |     - type: OhemCrossEntropyLoss
47 |     - type: OhemCrossEntropyLoss
48 |     - type: OhemCrossEntropyLoss
49 |   coef: [1, 0.5, 0.5]
50 | 
51 | 
52 | lr_scheduler:
53 |   type: PolynomialDecay
54 |   learning_rate: 0.01
55 |   end_lr: 0.0
56 |   power: 0.9
57 |   warmup_iters: 2000
58 |   warmup_start_lr: 1.0e-5
59 | 
60 | 
61 | 
62 |   
63 | 


--------------------------------------------------------------------------------
/configs/hrsegnetb48.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 32
 2 | iters: 100000
 3 | 
 4 | train_dataset:
 5 |   type: Dataset
 6 |   dataset_root: data/crackseg9k
 7 |   train_path: data/crackseg9k/train.txt
 8 |   num_classes: 2
 9 |   mode: train
10 |   transforms:
11 |     - type: ResizeStepScaling
12 |       min_scale_factor: 0.5
13 |       max_scale_factor: 2.0
14 |       scale_step_size: 0.25
15 |     - type: RandomPaddingCrop
16 |       crop_size: [400, 400]
17 |     - type: RandomHorizontalFlip
18 |     - type: RandomDistort
19 |       brightness_range: 0.5
20 |       contrast_range: 0.5
21 |       saturation_range: 0.5
22 |     - type: Normalize
23 | 
24 | val_dataset:
25 |   type: Dataset
26 |   dataset_root: data/crackseg9k
27 |   val_path: data/crackseg9k/val.txt
28 |   num_classes: 2
29 |   mode: val
30 |   transforms:
31 |     - type: Normalize
32 | 
33 | 
34 | model:
35 |   type: HrSegNetB48
36 | 
37 | 
38 | optimizer:
39 |   type: SGD
40 |   momentum: 0.9
41 |   weight_decay: 0.0005
42 |   
43 | 
44 | loss:
45 |   types:
46 |     - type: OhemCrossEntropyLoss
47 |     - type: OhemCrossEntropyLoss
48 |     - type: OhemCrossEntropyLoss
49 |   coef: [1, 0.5, 0.5]
50 | 
51 | 
52 | lr_scheduler:
53 |   type: PolynomialDecay
54 |   learning_rate: 0.01
55 |   end_lr: 0.0
56 |   power: 0.9
57 |   warmup_iters: 2000
58 |   warmup_start_lr: 1.0e-5
59 | 
60 | 
61 | 
62 |   
63 | 


--------------------------------------------------------------------------------
/configs/hrsegnetb64_bs16.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | iters: 100000
 3 | 
 4 | train_dataset:
 5 |   type: Dataset
 6 |   dataset_root: data/crackseg9k
 7 |   train_path: data/crackseg9k/train.txt
 8 |   num_classes: 2
 9 |   mode: train
10 |   transforms:
11 |     - type: ResizeStepScaling
12 |       min_scale_factor: 0.5
13 |       max_scale_factor: 2.0
14 |       scale_step_size: 0.25
15 |     - type: RandomPaddingCrop
16 |       crop_size: [400, 400]
17 |     - type: RandomHorizontalFlip
18 |     - type: RandomDistort
19 |       brightness_range: 0.5
20 |       contrast_range: 0.5
21 |       saturation_range: 0.5
22 |     - type: Normalize
23 | 
24 | val_dataset:
25 |   type: Dataset
26 |   dataset_root: data/crackseg9k
27 |   val_path: data/crackseg9k/val.txt
28 |   num_classes: 2
29 |   mode: val
30 |   transforms:
31 |     - type: Normalize
32 | 
33 | 
34 | model:
35 |   type: HrSegNetB64
36 | 
37 | 
38 | optimizer:
39 |   type: SGD
40 |   momentum: 0.9
41 |   weight_decay: 0.0005
42 |   
43 | 
44 | loss:
45 |   types:
46 |     - type: OhemCrossEntropyLoss
47 |     - type: OhemCrossEntropyLoss
48 |     - type: OhemCrossEntropyLoss
49 |   coef: [1, 0.5, 0.5]
50 | 
51 | 
52 | lr_scheduler:
53 |   type: PolynomialDecay
54 |   learning_rate: 0.01
55 |   end_lr: 0.0
56 |   power: 0.9
57 |   warmup_iters: 2000
58 |   warmup_start_lr: 1.0e-5
59 | 
60 | 
61 | 
62 |   
63 | 


--------------------------------------------------------------------------------
/configs/ocrnet_hrnetw18.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | iters: 100000
 3 | 
 4 | train_dataset:
 5 |   type: Dataset
 6 |   dataset_root: data/crackseg9k
 7 |   train_path: data/crackseg9k/train.txt
 8 |   num_classes: 2
 9 |   mode: train
10 |   transforms:
11 |     - type: ResizeStepScaling
12 |       min_scale_factor: 0.5
13 |       max_scale_factor: 2.0
14 |       scale_step_size: 0.25
15 |     - type: RandomPaddingCrop
16 |       crop_size: [400, 400]
17 |     - type: RandomHorizontalFlip
18 |     - type: RandomDistort
19 |       brightness_range: 0.5
20 |       contrast_range: 0.5
21 |       saturation_range: 0.5
22 |     - type: Normalize
23 | 
24 | val_dataset:
25 |   type: Dataset
26 |   dataset_root: data/crackseg9k
27 |   val_path: data/crackseg9k/val.txt
28 |   num_classes: 2
29 |   mode: val
30 |   transforms:
31 |     - type: Normalize
32 | 
33 | optimizer:
34 |   type: sgd
35 | 
36 | 
37 | lr_scheduler:
38 |   type: PolynomialDecay
39 |   learning_rate: 0.01
40 |   power: 0.9
41 | 
42 | loss:
43 |   types:
44 |     - type: MixedLoss
45 |       losses:
46 |         - type: CrossEntropyLoss
47 |         - type: LovaszSoftmaxLoss
48 |       coef: [0.8, 0.2]
49 |     - type: MixedLoss
50 |       losses:
51 |         - type: CrossEntropyLoss
52 |         - type: LovaszSoftmaxLoss
53 |       coef: [0.8, 0.2]
54 |   coef: [1, 0.4]
55 | 
56 | 
57 | model:
58 |   type: OCRNet
59 |   backbone:
60 |     type: HRNet_W18
61 |   backbone_indices: [0]


--------------------------------------------------------------------------------
/configs/ocrnet_hrsegb64_bs16.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 16
 2 | iters: 100000
 3 | 
 4 | train_dataset:
 5 |   type: Dataset
 6 |   dataset_root: data/crackseg9k
 7 |   train_path: data/crackseg9k/train.txt
 8 |   num_classes: 2
 9 |   mode: train
10 |   transforms:
11 |     - type: ResizeStepScaling
12 |       min_scale_factor: 0.5
13 |       max_scale_factor: 2.0
14 |       scale_step_size: 0.25
15 |     - type: RandomPaddingCrop
16 |       crop_size: [400, 400]
17 |     - type: RandomHorizontalFlip
18 |     - type: RandomDistort
19 |       brightness_range: 0.5
20 |       contrast_range: 0.5
21 |       saturation_range: 0.5
22 |     - type: Normalize
23 | 
24 | val_dataset:
25 |   type: Dataset
26 |   dataset_root: data/crackseg9k
27 |   val_path: data/crackseg9k/val.txt
28 |   num_classes: 2
29 |   mode: val
30 |   transforms:
31 |     - type: Normalize
32 | 
33 |   
34 |   
35 | optimizer:
36 |   type: SGD
37 |   momentum: 0.9
38 |   weight_decay: 0.0005
39 | 
40 | lr_scheduler:
41 |   type: PolynomialDecay
42 |   learning_rate: 0.01
43 |   end_lr: 0.0
44 |   power: 0.9
45 |   warmup_iters: 2000
46 |   warmup_start_lr: 1.0e-5
47 | 
48 | 
49 | 
50 | loss:
51 |   types:
52 |     - type: MixedLoss
53 |       losses:
54 |         - type: CrossEntropyLoss
55 |         - type: LovaszSoftmaxLoss
56 |       coef: [0.8, 0.2]
57 |     - type: MixedLoss
58 |       losses:
59 |         - type: CrossEntropyLoss
60 |         - type: LovaszSoftmaxLoss
61 |       coef: [0.8, 0.2]
62 |   coef: [1, 0.4]
63 | 
64 | 
65 | model:
66 |   type: OCRNet
67 |   backbone:
68 |     type: HrSegB64
69 |   backbone_indices: [0]


--------------------------------------------------------------------------------
/configs/pspnet.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 32
34 | iters: 100000
35 | 
36 | model:
37 |   type: PSPNet
38 |   num_classes: 2
39 |   backbone:
40 |     type: ResNet18_vd
41 |     output_stride: 8
42 |   enable_auxiliary_loss: True
43 |   align_corners: False
44 |   pretrained: null
45 | 
46 | 
47 | optimizer:
48 |   type: sgd
49 |   weight_decay: 0.0005
50 | 
51 | loss:
52 |   types:
53 |     - type: CrossEntropyLoss
54 |   coef: [1, 0.4]
55 | 
56 | 
57 | lr_scheduler:
58 |   type: PolynomialDecay
59 |   learning_rate: 0.01
60 |   power: 0.9
61 |   end_lr: 1.0e-5


--------------------------------------------------------------------------------
/configs/rucnet_crackseg9k.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 16
34 | iters: 100000
35 | 
36 | model:
37 |   type: RUCNet
38 |   num_classes: 2
39 |   use_deconv: False
40 |   pretrained: Null
41 | 
42 | 
43 | 
44 | optimizer:
45 |   type: adam
46 | 
47 | 
48 | loss:
49 |   types:
50 |     - type: FocalLoss
51 |   coef: [1]
52 | 
53 | 
54 | lr_scheduler:
55 |   type: PolynomialDecay
56 |   learning_rate: 0.01
57 |   end_lr: 0.0
58 |   power: 0.9
59 |   warmup_iters: 2000
60 |   warmup_start_lr: 1.0e-5
61 | 


--------------------------------------------------------------------------------
/configs/stdcseg.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 32
34 | iters: 100000
35 | 
36 | 
37 | model:
38 |   type: STDCSeg
39 |   backbone:
40 |     type: STDC1
41 |   pretrained: null
42 | 
43 | loss:
44 |   types:
45 |     - type: OhemCrossEntropyLoss
46 |     - type: OhemCrossEntropyLoss
47 |     - type: OhemCrossEntropyLoss
48 |     - type: DetailAggregateLoss
49 |   coef: [1, 1, 1, 1]
50 | 
51 | 
52 | optimizer:
53 |   type: sgd
54 |   weight_decay: 0.0005
55 | 
56 | 
57 | lr_scheduler:
58 |   type: PolynomialDecay
59 |   learning_rate: 0.01
60 |   end_lr: 0.0
61 |   power: 0.9


--------------------------------------------------------------------------------
/configs/u2cracknet_crackseg9k.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 16
34 | iters: 100000
35 | 
36 | model:
37 |   type: U2CrackNet
38 |   num_classes: 2
39 | 
40 | 
41 | 
42 | optimizer:
43 |   type: adam
44 | 
45 | loss:
46 |   types:
47 |     - type: CrossEntropyLoss
48 |   coef: [1]
49 | 
50 | 
51 | lr_scheduler:
52 |   type: PolynomialDecay
53 |   learning_rate: 0.01
54 |   end_lr: 0.0
55 |   power: 0.9
56 |   warmup_iters: 2000
57 |   warmup_start_lr: 1.0e-5
58 | 


--------------------------------------------------------------------------------
/configs/unet.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 16
34 | iters: 100000
35 | 
36 | model:
37 |   type: UNet
38 |   num_classes: 2
39 |   use_deconv: False
40 |   pretrained: Null
41 | 
42 | 
43 | 
44 | optimizer:
45 |   type: sgd
46 |   weight_decay: 0.0005
47 | 
48 | loss:
49 |   types:
50 |     - type: OhemCrossEntropyLoss
51 |   coef: [1]
52 | 
53 | 
54 | lr_scheduler:
55 |   type: PolynomialDecay
56 |   learning_rate: 0.01
57 |   end_lr: 0.0
58 |   power: 0.9


--------------------------------------------------------------------------------
/configs/unet_focalloss_adam_crackseg9k.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | train_dataset:
 4 |   type: Dataset
 5 |   dataset_root: data/crackseg9k
 6 |   train_path: data/crackseg9k/train.txt
 7 |   num_classes: 2
 8 |   mode: train
 9 |   transforms:
10 |     - type: ResizeStepScaling
11 |       min_scale_factor: 0.5
12 |       max_scale_factor: 2.0
13 |       scale_step_size: 0.25
14 |     - type: RandomPaddingCrop
15 |       crop_size: [400, 400]
16 |     - type: RandomHorizontalFlip
17 |     - type: RandomDistort
18 |       brightness_range: 0.5
19 |       contrast_range: 0.5
20 |       saturation_range: 0.5
21 |     - type: Normalize
22 | 
23 | val_dataset:
24 |   type: Dataset
25 |   dataset_root: data/crackseg9k
26 |   val_path: data/crackseg9k/val.txt
27 |   num_classes: 2
28 |   mode: val
29 |   transforms:
30 |     - type: Normalize
31 | 
32 | 
33 | batch_size: 16
34 | iters: 100000
35 | 
36 | model:
37 |   type: UNet
38 |   num_classes: 2
39 |   use_deconv: False
40 |   pretrained: Null
41 | 
42 | 
43 | 
44 | optimizer:
45 |   type: Adam
46 | 
47 | loss:
48 |   types:
49 |     - type: FocalLoss
50 |   coef: [1]
51 | 
52 | 
53 | 
54 | lr_scheduler:
55 |   type: PolynomialDecay
56 |   learning_rate: 0.01
57 |   end_lr: 0.0
58 |   power: 0.9
59 |   warmup_iters: 2000
60 |   warmup_start_lr: 1.0e-5
61 | 


--------------------------------------------------------------------------------
/fig/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CHDyshli/HrSegNet4CrackSegmentation/1e4dd172e250de5cb951414c317059b3cd89c702/fig/fig1.png


--------------------------------------------------------------------------------
/fig/fig5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CHDyshli/HrSegNet4CrackSegmentation/1e4dd172e250de5cb951414c317059b3cd89c702/fig/fig5.png


--------------------------------------------------------------------------------
/fig/fig8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CHDyshli/HrSegNet4CrackSegmentation/1e4dd172e250de5cb951414c317059b3cd89c702/fig/fig8.png


--------------------------------------------------------------------------------
/models/bisenetv2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | import paddle.nn.functional as F
  6 | 
  7 | from paddleseg import utils
  8 | from paddleseg.cvlibs import manager, param_init
  9 | from paddleseg.models import layers
 10 | 
 11 | 
 12 | # @manager.MODELS.add_component
 13 | class BiSeNetV2(nn.Layer):
 14 |     """
 15 |     The BiSeNet V2 implementation based on PaddlePaddle.
 16 | 
 17 |     The original article refers to
 18 |     Yu, Changqian, et al. "BiSeNet V2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation"
 19 |     (https://arxiv.org/abs/2004.02147)
 20 | 
 21 |     Args:
 22 |         num_classes (int): The unique number of target classes.
 23 |         lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
 24 |         in_channels (int, optional): The channels of input image. Default: 3.
 25 |         pretrained (str, optional): The path or url of pretrained model. Default: None.
 26 |     """
 27 | 
 28 |     def __init__(self,
 29 |                  num_classes,
 30 |                  lambd=0.25,
 31 |                  align_corners=False,
 32 |                  in_channels=3,
 33 |                  pretrained=None):
 34 |         super().__init__()
 35 | 
 36 |         C1, C2, C3 = 64, 64, 128
 37 |         db_channels = (C1, C2, C3)
 38 |         C1, C3, C4, C5 = int(C1 * lambd), int(C3 * lambd), 64, 128
 39 |         sb_channels = (C1, C3, C4, C5)
 40 |         mid_channels = 128
 41 | 
 42 |         self.db = DetailBranch(in_channels, db_channels)
 43 |         self.sb = SemanticBranch(in_channels, sb_channels)
 44 | 
 45 |         self.bga = BGA(mid_channels, align_corners)
 46 |         self.aux_head1 = SegHead(C1, C1, num_classes)
 47 |         self.aux_head2 = SegHead(C3, C3, num_classes)
 48 |         self.aux_head3 = SegHead(C4, C4, num_classes)
 49 |         self.aux_head4 = SegHead(C5, C5, num_classes)
 50 |         self.head = SegHead(mid_channels, mid_channels, num_classes)
 51 | 
 52 |         self.align_corners = align_corners
 53 |         self.pretrained = pretrained
 54 |         self.init_weight()
 55 | 
 56 |     def forward(self, x):
 57 |         dfm = self.db(x)
 58 |         feat1, feat2, feat3, feat4, sfm = self.sb(x)
 59 |         logit = self.head(self.bga(dfm, sfm))
 60 | 
 61 |         if not self.training:
 62 |             logit_list = [logit]
 63 |         else:
 64 |             logit1 = self.aux_head1(feat1)
 65 |             logit2 = self.aux_head2(feat2)
 66 |             logit3 = self.aux_head3(feat3)
 67 |             logit4 = self.aux_head4(feat4)
 68 |             logit_list = [logit, logit1, logit2, logit3, logit4]
 69 | 
 70 |         logit_list = [
 71 |             F.interpolate(
 72 |                 logit,
 73 |                 paddle.shape(x)[2:],
 74 |                 mode='bilinear',
 75 |                 align_corners=self.align_corners) for logit in logit_list
 76 |         ]
 77 | 
 78 |         return logit_list
 79 | 
 80 |     def init_weight(self):
 81 |         if self.pretrained is not None:
 82 |             utils.load_entire_model(self, self.pretrained)
 83 |         else:
 84 |             for sublayer in self.sublayers():
 85 |                 if isinstance(sublayer, nn.Conv2D):
 86 |                     param_init.kaiming_normal_init(sublayer.weight)
 87 |                 elif isinstance(sublayer, (nn.BatchNorm, nn.SyncBatchNorm)):
 88 |                     param_init.constant_init(sublayer.weight, value=1.0)
 89 |                     param_init.constant_init(sublayer.bias, value=0.0)
 90 | 
 91 | 
 92 | class StemBlock(nn.Layer):
 93 |     def __init__(self, in_dim, out_dim):
 94 |         super(StemBlock, self).__init__()
 95 | 
 96 |         self.conv = layers.ConvBNReLU(in_dim, out_dim, 3, stride=2)
 97 | 
 98 |         self.left = nn.Sequential(
 99 |             layers.ConvBNReLU(out_dim, out_dim // 2, 1),
100 |             layers.ConvBNReLU(
101 |                 out_dim // 2, out_dim, 3, stride=2))
102 | 
103 |         self.right = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
104 | 
105 |         self.fuse = layers.ConvBNReLU(out_dim * 2, out_dim, 3)
106 | 
107 |     def forward(self, x):
108 |         x = self.conv(x)
109 |         left = self.left(x)
110 |         right = self.right(x)
111 |         concat = paddle.concat([left, right], axis=1)
112 |         return self.fuse(concat)
113 | 
114 | 
115 | class ContextEmbeddingBlock(nn.Layer):
116 |     def __init__(self, in_dim, out_dim):
117 |         super(ContextEmbeddingBlock, self).__init__()
118 | 
119 |         self.gap = nn.AdaptiveAvgPool2D(1)
120 |         self.bn = layers.SyncBatchNorm(in_dim)
121 | 
122 |         self.conv_1x1 = layers.ConvBNReLU(in_dim, out_dim, 1)
123 |         self.add = layers.Add()
124 |         self.conv_3x3 = nn.Conv2D(out_dim, out_dim, 3, 1, 1)
125 | 
126 |     def forward(self, x):
127 |         gap = self.gap(x)
128 |         bn = self.bn(gap)
129 |         conv1 = self.add(self.conv_1x1(bn), x)
130 |         return self.conv_3x3(conv1)
131 | 
132 | 
133 | class GatherAndExpansionLayer1(nn.Layer):
134 |     """Gather And Expansion Layer with stride 1"""
135 | 
136 |     def __init__(self, in_dim, out_dim, expand):
137 |         super().__init__()
138 | 
139 |         expand_dim = expand * in_dim
140 | 
141 |         self.conv = nn.Sequential(
142 |             layers.ConvBNReLU(in_dim, in_dim, 3),
143 |             layers.DepthwiseConvBN(in_dim, expand_dim, 3),
144 |             layers.ConvBN(expand_dim, out_dim, 1))
145 |         self.relu = layers.Activation("relu")
146 | 
147 |     def forward(self, x):
148 |         return self.relu(self.conv(x) + x)
149 | 
150 | 
151 | class GatherAndExpansionLayer2(nn.Layer):
152 |     """Gather And Expansion Layer with stride 2"""
153 | 
154 |     def __init__(self, in_dim, out_dim, expand):
155 |         super().__init__()
156 | 
157 |         expand_dim = expand * in_dim
158 | 
159 |         self.branch_1 = nn.Sequential(
160 |             layers.ConvBNReLU(in_dim, in_dim, 3),
161 |             layers.DepthwiseConvBN(
162 |                 in_dim, expand_dim, 3, stride=2),
163 |             layers.DepthwiseConvBN(expand_dim, expand_dim, 3),
164 |             layers.ConvBN(expand_dim, out_dim, 1))
165 | 
166 |         self.branch_2 = nn.Sequential(
167 |             layers.DepthwiseConvBN(
168 |                 in_dim, in_dim, 3, stride=2),
169 |             layers.ConvBN(in_dim, out_dim, 1))
170 | 
171 |         self.relu = layers.Activation("relu")
172 | 
173 |     def forward(self, x):
174 |         return self.relu(self.branch_1(x) + self.branch_2(x))
175 | 
176 | 
177 | class DetailBranch(nn.Layer):
178 |     """The detail branch of BiSeNet, which has wide channels but shallow layers."""
179 | 
180 |     def __init__(self, in_channels, feature_channels):
181 |         super().__init__()
182 | 
183 |         C1, C2, C3 = feature_channels
184 | 
185 |         self.convs = nn.Sequential(
186 |             # stage 1
187 |             layers.ConvBNReLU(
188 |                 in_channels, C1, 3, stride=2),
189 |             layers.ConvBNReLU(C1, C1, 3),
190 |             # stage 2
191 |             layers.ConvBNReLU(
192 |                 C1, C2, 3, stride=2),
193 |             layers.ConvBNReLU(C2, C2, 3),
194 |             layers.ConvBNReLU(C2, C2, 3),
195 |             # stage 3
196 |             layers.ConvBNReLU(
197 |                 C2, C3, 3, stride=2),
198 |             layers.ConvBNReLU(C3, C3, 3),
199 |             layers.ConvBNReLU(C3, C3, 3), )
200 | 
201 |     def forward(self, x):
202 |         return self.convs(x)
203 | 
204 | 
205 | class SemanticBranch(nn.Layer):
206 |     """The semantic branch of BiSeNet, which has narrow channels but deep layers."""
207 | 
208 |     def __init__(self, in_channels, feature_channels):
209 |         super().__init__()
210 |         C1, C3, C4, C5 = feature_channels
211 | 
212 |         self.stem = StemBlock(in_channels, C1)
213 | 
214 |         self.stage3 = nn.Sequential(
215 |             GatherAndExpansionLayer2(C1, C3, 6),
216 |             GatherAndExpansionLayer1(C3, C3, 6))
217 | 
218 |         self.stage4 = nn.Sequential(
219 |             GatherAndExpansionLayer2(C3, C4, 6),
220 |             GatherAndExpansionLayer1(C4, C4, 6))
221 | 
222 |         self.stage5_4 = nn.Sequential(
223 |             GatherAndExpansionLayer2(C4, C5, 6),
224 |             GatherAndExpansionLayer1(C5, C5, 6),
225 |             GatherAndExpansionLayer1(C5, C5, 6),
226 |             GatherAndExpansionLayer1(C5, C5, 6))
227 | 
228 |         self.ce = ContextEmbeddingBlock(C5, C5)
229 | 
230 |     def forward(self, x):
231 |         stage2 = self.stem(x)
232 |         stage3 = self.stage3(stage2)
233 |         stage4 = self.stage4(stage3)
234 |         stage5_4 = self.stage5_4(stage4)
235 |         fm = self.ce(stage5_4)
236 |         return stage2, stage3, stage4, stage5_4, fm
237 | 
238 | 
239 | class BGA(nn.Layer):
240 |     """The Bilateral Guided Aggregation Layer, used to fuse the semantic features and spatial features."""
241 | 
242 |     def __init__(self, out_dim, align_corners):
243 |         super().__init__()
244 | 
245 |         self.align_corners = align_corners
246 | 
247 |         self.db_branch_keep = nn.Sequential(
248 |             layers.DepthwiseConvBN(out_dim, out_dim, 3),
249 |             nn.Conv2D(out_dim, out_dim, 1))
250 | 
251 |         self.db_branch_down = nn.Sequential(
252 |             layers.ConvBN(
253 |                 out_dim, out_dim, 3, stride=2),
254 |             nn.AvgPool2D(
255 |                 kernel_size=3, stride=2, padding=1))
256 | 
257 |         self.sb_branch_keep = nn.Sequential(
258 |             layers.DepthwiseConvBN(out_dim, out_dim, 3),
259 |             nn.Conv2D(out_dim, out_dim, 1),
260 |             layers.Activation(act='sigmoid'))
261 | 
262 |         self.sb_branch_up = layers.ConvBN(out_dim, out_dim, 3)
263 | 
264 |         self.conv = layers.ConvBN(out_dim, out_dim, 3)
265 | 
266 |     def forward(self, dfm, sfm):
267 |         db_feat_keep = self.db_branch_keep(dfm)
268 |         db_feat_down = self.db_branch_down(dfm)
269 |         sb_feat_keep = self.sb_branch_keep(sfm)
270 | 
271 |         sb_feat_up = self.sb_branch_up(sfm)
272 |         sb_feat_up = F.interpolate(
273 |             sb_feat_up,
274 |             paddle.shape(db_feat_keep)[2:],
275 |             mode='bilinear',
276 |             align_corners=self.align_corners)
277 | 
278 |         sb_feat_up = F.sigmoid(sb_feat_up)
279 |         db_feat = db_feat_keep * sb_feat_up
280 | 
281 |         sb_feat = db_feat_down * sb_feat_keep
282 |         sb_feat = F.interpolate(
283 |             sb_feat,
284 |             paddle.shape(db_feat)[2:],
285 |             mode='bilinear',
286 |             align_corners=self.align_corners)
287 | 
288 |         return self.conv(db_feat + sb_feat)
289 | 
290 | 
291 | class SegHead(nn.Layer):
292 |     def __init__(self, in_dim, mid_dim, num_classes):
293 |         super().__init__()
294 | 
295 |         self.conv_3x3 = nn.Sequential(
296 |             layers.ConvBNReLU(in_dim, mid_dim, 3), nn.Dropout(0.1))
297 | 
298 |         self.conv_1x1 = nn.Conv2D(mid_dim, num_classes, 1, 1)
299 | 
300 |     def forward(self, x):
301 |         conv1 = self.conv_3x3(x)
302 |         conv2 = self.conv_1x1(conv1)
303 |         return conv2
304 | 


--------------------------------------------------------------------------------
/models/ddrnet.py:
--------------------------------------------------------------------------------
  1 | import paddle
  2 | import paddle.nn as nn
  3 | import paddle.nn.functional as F
  4 | 
  5 | from paddleseg.cvlibs import manager, param_init
  6 | from paddleseg.models import layers
  7 | from paddleseg.utils import utils
  8 | 
  9 | 
 10 | class DualResNet(nn.Layer):
 11 |     """
 12 |     The DDRNet implementation based on PaddlePaddle.
 13 | 
 14 |     The original article refers to
 15 |     Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
 16 |     (https://arxiv.org/abs/2101.06085)
 17 | 
 18 |     Args:
 19 |         num_classes (int): The unique number of target classes.
 20 |         in_channels (int, optional): Number of input channels. Default: 3.
 21 |         block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
 22 |         planes (int): Base channels in network. Default: 64.
 23 |         spp_planes (int): Branch channels for DAPPM. Default: 128.
 24 |         head_planes (int): Mid channels of segmentation head. Default: 128.
 25 |         enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
 26 |         pretrained (str, optional): The path or url of pretrained model. Default: None.
 27 |     """
 28 | 
 29 |     def __init__(self,
 30 |                  num_classes,
 31 |                  in_channels=3,
 32 |                  block_layers=[2, 2, 2, 2],
 33 |                  planes=64,
 34 |                  spp_planes=128,
 35 |                  head_planes=128,
 36 |                  enable_auxiliary_loss=False,
 37 |                  pretrained=None):
 38 |         super().__init__()
 39 |         highres_planes = planes * 2
 40 |         self.enable_auxiliary_loss = enable_auxiliary_loss
 41 |         self.conv1 = nn.Sequential(
 42 |             layers.ConvBNReLU(
 43 |                 in_channels, planes, kernel_size=3, stride=2, padding=1),
 44 |             layers.ConvBNReLU(
 45 |                 planes, planes, kernel_size=3, stride=2, padding=1), )
 46 |         self.relu = nn.ReLU()
 47 |         self.layer1 = self._make_layers(BasicBlock, planes, planes,
 48 |                                         block_layers[0])
 49 |         self.layer2 = self._make_layers(
 50 |             BasicBlock, planes, planes * 2, block_layers[1], stride=2)
 51 |         self.layer3 = self._make_layers(
 52 |             BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
 53 |         self.layer4 = self._make_layers(
 54 |             BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
 55 | 
 56 |         self.compression3 = layers.ConvBN(
 57 |             planes * 4, highres_planes, kernel_size=1, bias_attr=False)
 58 | 
 59 |         self.compression4 = layers.ConvBN(
 60 |             planes * 8, highres_planes, kernel_size=1, bias_attr=False)
 61 | 
 62 |         self.down3 = layers.ConvBN(
 63 |             highres_planes,
 64 |             planes * 4,
 65 |             kernel_size=3,
 66 |             stride=2,
 67 |             bias_attr=False)
 68 | 
 69 |         self.down4 = nn.Sequential(
 70 |             layers.ConvBNReLU(
 71 |                 highres_planes,
 72 |                 planes * 4,
 73 |                 kernel_size=3,
 74 |                 stride=2,
 75 |                 padding=1,
 76 |                 bias_attr=False),
 77 |             layers.ConvBN(
 78 |                 planes * 4,
 79 |                 planes * 8,
 80 |                 kernel_size=3,
 81 |                 stride=2,
 82 |                 padding=1,
 83 |                 bias_attr=False))
 84 | 
 85 |         self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
 86 |                                          2)
 87 |         self.layer4_ = self._make_layers(BasicBlock, highres_planes,
 88 |                                          highres_planes, 2)
 89 |         self.layer5_ = self._make_layers(Bottleneck, highres_planes,
 90 |                                          highres_planes, 1)
 91 |         self.layer5 = self._make_layers(
 92 |             Bottleneck, planes * 8, planes * 8, 1, stride=2)
 93 | 
 94 |         self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
 95 |         if self.enable_auxiliary_loss:
 96 |             self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
 97 |         self.head = DDRNetHead(planes * 4, head_planes, num_classes)
 98 | 
 99 |         self.pretrained = pretrained
100 |         self.init_weight()
101 | 
102 |     def init_weight(self):
103 |         if self.pretrained is not None:
104 |             utils.load_entire_model(self, self.pretrained)
105 |         else:
106 |             for m in self.sublayers():
107 |                 if isinstance(m, nn.Conv2D):
108 |                     param_init.kaiming_normal_init(m.weight)
109 |                 elif isinstance(m, nn.BatchNorm2D):
110 |                     param_init.constant_init(m.weight, value=1)
111 |                     param_init.constant_init(m.bias, value=0)
112 | 
113 |     def _make_layers(self, block, inplanes, planes, blocks, stride=1):
114 |         downsample = None
115 |         if stride != 1 or inplanes != planes * block.expansion:
116 |             downsample = nn.Sequential(
117 |                 nn.Conv2D(
118 |                     inplanes,
119 |                     planes * block.expansion,
120 |                     kernel_size=1,
121 |                     stride=stride,
122 |                     bias_attr=False),
123 |                 nn.BatchNorm2D(planes * block.expansion), )
124 |         layers = []
125 |         layers.append(block(inplanes, planes, stride, downsample))
126 |         inplanes = planes * block.expansion
127 |         for i in range(1, blocks):
128 |             if i == (blocks - 1):
129 |                 layers.append(block(inplanes, planes, stride=1, no_relu=True))
130 |             else:
131 |                 layers.append(block(inplanes, planes, stride=1, no_relu=False))
132 |         return nn.Sequential(*layers)
133 | 
134 |     def forward(self, x):
135 |         n, c, h, w = paddle.shape(x)
136 |         width_output = w // 8
137 |         height_output = h // 8
138 | 
139 |         x = self.conv1(x)
140 |         stage1_out = self.layer1(x)
141 |         stage2_out = self.layer2(self.relu(stage1_out))
142 |         stage3_out = self.layer3(self.relu(stage2_out))
143 |         stage3_out_dual = self.layer3_(self.relu(stage2_out))
144 |         x = stage3_out + self.down3(self.relu(stage3_out_dual))
145 |         stage3_merge = stage3_out_dual + F.interpolate(
146 |             self.compression3(self.relu(stage3_out)),
147 |             size=[height_output, width_output],
148 |             mode='bilinear')
149 | 
150 |         stage4_out = self.layer4(self.relu(x))
151 |         stage4_out_dual = self.layer4_(self.relu(stage3_merge))
152 | 
153 |         x = stage4_out + self.down4(self.relu(stage4_out_dual))
154 |         stage4_merge = stage4_out_dual + F.interpolate(
155 |             self.compression4(self.relu(stage4_out)),
156 |             size=[height_output, width_output],
157 |             mode='bilinear')
158 | 
159 |         stage5_out_dual = self.layer5_(self.relu(stage4_merge))
160 |         x = F.interpolate(
161 |             self.spp(self.layer5(self.relu(x))),
162 |             size=[height_output, width_output],
163 |             mode='bilinear')
164 | 
165 |         output = self.head(x + stage5_out_dual)
166 |         logit_list = []
167 |         logit_list.append(output)
168 | 
169 |         if self.enable_auxiliary_loss:
170 |             aux_out = self.aux_head(stage3_merge)
171 |             logit_list.append(aux_out)
172 |         return [
173 |             F.interpolate(
174 |                 logit, [h, w], mode='bilinear') for logit in logit_list
175 |         ]
176 | 
177 | 
178 | class BasicBlock(nn.Layer):
179 |     expansion = 1
180 | 
181 |     def __init__(self,
182 |                  inplanes,
183 |                  planes,
184 |                  stride=1,
185 |                  downsample=None,
186 |                  no_relu=False):
187 |         super().__init__()
188 |         self.conv_bn_relu = layers.ConvBNReLU(
189 |             inplanes,
190 |             planes,
191 |             kernel_size=3,
192 |             stride=stride,
193 |             padding=1,
194 |             bias_attr=False)
195 |         self.relu = nn.ReLU()
196 |         self.conv_bn = layers.ConvBN(
197 |             planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
198 |         self.downsample = downsample
199 |         self.stride = stride
200 |         self.no_relu = no_relu
201 | 
202 |     def forward(self, x):
203 |         residual = x
204 |         out = self.conv_bn_relu(x)
205 |         out = self.conv_bn(out)
206 |         if self.downsample is not None:
207 |             residual = self.downsample(x)
208 |         out += residual
209 |         if self.no_relu:
210 |             return out
211 |         else:
212 |             return self.relu(out)
213 | 
214 | 
215 | class Bottleneck(nn.Layer):
216 |     expansion = 2
217 | 
218 |     def __init__(self,
219 |                  inplanes,
220 |                  planes,
221 |                  stride=1,
222 |                  downsample=None,
223 |                  no_relu=True):
224 |         super().__init__()
225 |         self.conv_bn_relu1 = layers.ConvBNReLU(
226 |             inplanes, planes, kernel_size=1, bias_attr=False)
227 |         self.conv_bn_relu2 = layers.ConvBNReLU(
228 |             planes,
229 |             planes,
230 |             kernel_size=3,
231 |             stride=stride,
232 |             padding=1,
233 |             bias_attr=False)
234 |         self.conv_bn = layers.ConvBN(
235 |             planes, planes * self.expansion, kernel_size=1, bias_attr=False)
236 |         self.relu = nn.ReLU()
237 |         self.downsample = downsample
238 |         self.stride = stride
239 |         self.no_relu = no_relu
240 | 
241 |     def forward(self, x):
242 |         residual = x
243 |         out = self.conv_bn_relu1(x)
244 |         out = self.conv_bn_relu2(out)
245 |         out = self.conv_bn(out)
246 |         if self.downsample is not None:
247 |             residual = self.downsample(x)
248 |         out += residual
249 |         if self.no_relu:
250 |             return out
251 |         else:
252 |             return self.relu(out)
253 | 
254 | 
255 | class DAPPM(nn.Layer):
256 |     def __init__(self, inplanes, branch_planes, outplanes):
257 |         super().__init__()
258 |         self.scale1 = nn.Sequential(
259 |             nn.AvgPool2D(
260 |                 kernel_size=5, stride=2, padding=2),
261 |             layers.SyncBatchNorm(inplanes),
262 |             nn.ReLU(),
263 |             nn.Conv2D(
264 |                 inplanes, branch_planes, kernel_size=1, bias_attr=False), )
265 |         self.scale2 = nn.Sequential(
266 |             nn.AvgPool2D(
267 |                 kernel_size=9, stride=4, padding=4),
268 |             layers.SyncBatchNorm(inplanes),
269 |             nn.ReLU(),
270 |             nn.Conv2D(
271 |                 inplanes, branch_planes, kernel_size=1, bias_attr=False), )
272 |         self.scale3 = nn.Sequential(
273 |             nn.AvgPool2D(
274 |                 kernel_size=17, stride=8, padding=8),
275 |             layers.SyncBatchNorm(inplanes),
276 |             nn.ReLU(),
277 |             nn.Conv2D(
278 |                 inplanes, branch_planes, kernel_size=1, bias_attr=False), )
279 |         self.scale4 = nn.Sequential(
280 |             nn.AdaptiveAvgPool2D((1, 1)),
281 |             layers.SyncBatchNorm(inplanes),
282 |             nn.ReLU(),
283 |             nn.Conv2D(
284 |                 inplanes, branch_planes, kernel_size=1, bias_attr=False), )
285 |         self.scale0 = nn.Sequential(
286 |             layers.SyncBatchNorm(inplanes),
287 |             nn.ReLU(),
288 |             nn.Conv2D(
289 |                 inplanes, branch_planes, kernel_size=1, bias_attr=False), )
290 |         self.process1 = nn.Sequential(
291 |             layers.SyncBatchNorm(branch_planes),
292 |             nn.ReLU(),
293 |             nn.Conv2D(
294 |                 branch_planes,
295 |                 branch_planes,
296 |                 kernel_size=3,
297 |                 padding=1,
298 |                 bias_attr=False), )
299 |         self.process2 = nn.Sequential(
300 |             layers.SyncBatchNorm(branch_planes),
301 |             nn.ReLU(),
302 |             nn.Conv2D(
303 |                 branch_planes,
304 |                 branch_planes,
305 |                 kernel_size=3,
306 |                 padding=1,
307 |                 bias_attr=False), )
308 |         self.process3 = nn.Sequential(
309 |             layers.SyncBatchNorm(branch_planes),
310 |             nn.ReLU(),
311 |             nn.Conv2D(
312 |                 branch_planes,
313 |                 branch_planes,
314 |                 kernel_size=3,
315 |                 padding=1,
316 |                 bias_attr=False), )
317 |         self.process4 = nn.Sequential(
318 |             layers.SyncBatchNorm(branch_planes),
319 |             nn.ReLU(),
320 |             nn.Conv2D(
321 |                 branch_planes,
322 |                 branch_planes,
323 |                 kernel_size=3,
324 |                 padding=1,
325 |                 bias_attr=False), )
326 |         self.compression = nn.Sequential(
327 |             layers.SyncBatchNorm(branch_planes * 5),
328 |             nn.ReLU(),
329 |             nn.Conv2D(
330 |                 branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
331 |         self.shortcut = nn.Sequential(
332 |             layers.SyncBatchNorm(inplanes),
333 |             nn.ReLU(),
334 |             nn.Conv2D(
335 |                 inplanes, outplanes, kernel_size=1, bias_attr=False))
336 | 
337 |     def forward(self, x):
338 |         n, c, h, w = paddle.shape(x)
339 |         x0 = self.scale0(x)
340 |         x1 = self.process1(
341 |             F.interpolate(
342 |                 self.scale1(x), size=[h, w], mode='bilinear') + x0)
343 |         x2 = self.process2(
344 |             F.interpolate(
345 |                 self.scale2(x), size=[h, w], mode='bilinear') + x1)
346 |         x3 = self.process3(
347 |             F.interpolate(
348 |                 self.scale3(x), size=[h, w], mode='bilinear') + x2)
349 |         x4 = self.process4(
350 |             F.interpolate(
351 |                 self.scale4(x), size=[h, w], mode='bilinear') + x3)
352 | 
353 |         out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
354 |                                              1)) + self.shortcut(x)
355 |         return out
356 | 
357 | 
358 | class DDRNetHead(nn.Layer):
359 |     def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
360 |         super().__init__()
361 |         self.bn1 = nn.BatchNorm2D(inplanes)
362 |         self.relu = nn.ReLU()
363 |         self.conv_bn_relu = layers.ConvBNReLU(
364 |             inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
365 |         self.conv = nn.Conv2D(
366 |             interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
367 | 
368 |         self.scale_factor = scale_factor
369 | 
370 |     def forward(self, x):
371 |         x = self.bn1(x)
372 |         x = self.relu(x)
373 |         x = self.conv_bn_relu(x)
374 |         out = self.conv(x)
375 | 
376 |         if self.scale_factor is not None:
377 |             out = F.interpolate(
378 |                 out, scale_factor=self.scale_factor, mode='bilinear')
379 |         return out
380 | 
381 | 
382 | @manager.MODELS.add_component
383 | def DDRNet_23(**kwargs):
384 |     return DualResNet(
385 |         block_layers=[2, 2, 2, 2],
386 |         planes=64,
387 |         spp_planes=128,
388 |         head_planes=128,
389 |         **kwargs)
390 | 


--------------------------------------------------------------------------------
/models/deeplabv3p.py:
--------------------------------------------------------------------------------
  1 | import paddle
  2 | import paddle.nn as nn
  3 | import paddle.nn.functional as F
  4 | 
  5 | from paddleseg.cvlibs import manager
  6 | from paddleseg.models import layers
  7 | from paddleseg.utils import utils
  8 | 
  9 | __all__ = ['DeepLabV3P', 'DeepLabV3']
 10 | 
 11 | 
 12 | @manager.MODELS.add_component
 13 | class DeepLabV3P(nn.Layer):
 14 |     """
 15 |     The DeepLabV3Plus implementation based on PaddlePaddle.
 16 | 
 17 |     The original article refers to
 18 |      Liang-Chieh Chen, et, al. "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
 19 |      (https://arxiv.org/abs/1802.02611)
 20 | 
 21 |     Args:
 22 |         num_classes (int): The unique number of target classes.
 23 |         backbone (paddle.nn.Layer): Backbone network, currently support Resnet50_vd/Resnet101_vd/Xception65.
 24 |         backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
 25 |            Default: (0, 3).
 26 |         aspp_ratios (tuple, optional): The dilation rate using in ASSP module.
 27 |             If output_stride=16, aspp_ratios should be set as (1, 6, 12, 18).
 28 |             If output_stride=8, aspp_ratios is (1, 12, 24, 36).
 29 |             Default: (1, 6, 12, 18).
 30 |         aspp_out_channels (int, optional): The output channels of ASPP module. Default: 256.
 31 |         align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
 32 |             e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
 33 |         pretrained (str, optional): The path or url of pretrained model. Default: None.
 34 |         data_format(str, optional): Data format that specifies the layout of input. It can be "NCHW" or "NHWC". Default: "NCHW".
 35 |     """
 36 | 
 37 |     def __init__(self,
 38 |                  num_classes,
 39 |                  backbone,
 40 |                  backbone_indices=(0, 3),
 41 |                  aspp_ratios=(1, 6, 12, 18),
 42 |                  aspp_out_channels=256,
 43 |                  align_corners=False,
 44 |                  pretrained=None,
 45 |                  data_format="NCHW"):
 46 |         super().__init__()
 47 | 
 48 |         self.backbone = backbone
 49 |         backbone_channels = [
 50 |             backbone.feat_channels[i] for i in backbone_indices
 51 |         ]
 52 | 
 53 |         self.head = DeepLabV3PHead(
 54 |             num_classes,
 55 |             backbone_indices,
 56 |             backbone_channels,
 57 |             aspp_ratios,
 58 |             aspp_out_channels,
 59 |             align_corners,
 60 |             data_format=data_format)
 61 | 
 62 |         self.align_corners = align_corners
 63 |         self.pretrained = pretrained
 64 |         self.data_format = data_format
 65 |         self.init_weight()
 66 | 
 67 |     def forward(self, x):
 68 |         feat_list = self.backbone(x)
 69 |         logit_list = self.head(feat_list)
 70 |         if self.data_format == 'NCHW':
 71 |             ori_shape = paddle.shape(x)[2:]
 72 |         else:
 73 |             ori_shape = paddle.shape(x)[1:3]
 74 |         return [
 75 |             F.interpolate(
 76 |                 logit,
 77 |                 ori_shape,
 78 |                 mode='bilinear',
 79 |                 align_corners=self.align_corners,
 80 |                 data_format=self.data_format) for logit in logit_list
 81 |         ]
 82 | 
 83 |     def init_weight(self):
 84 |         if self.pretrained is not None:
 85 |             utils.load_entire_model(self, self.pretrained)
 86 | 
 87 | 
 88 | class DeepLabV3PHead(nn.Layer):
 89 |     """
 90 |     The DeepLabV3PHead implementation based on PaddlePaddle.
 91 | 
 92 |     Args:
 93 |         num_classes (int): The unique number of target classes.
 94 |         backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone.
 95 |             the first index will be taken as a low-level feature in Decoder component;
 96 |             the second one will be taken as input of ASPP component.
 97 |             Usually backbone consists of four downsampling stage, and return an output of
 98 |             each stage. If we set it as (0, 3), it means taking feature map of the first
 99 |             stage in backbone as low-level feature used in Decoder, and feature map of the fourth
100 |             stage as input of ASPP.
101 |         backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index.
102 |         aspp_ratios (tuple): The dilation rates using in ASSP module.
103 |         aspp_out_channels (int): The output channels of ASPP module.
104 |         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
105 |             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
106 |         data_format(str, optional): Data format that specifies the layout of input. It can be "NCHW" or "NHWC". Default: "NCHW".
107 |     """
108 | 
109 |     def __init__(self,
110 |                  num_classes,
111 |                  backbone_indices,
112 |                  backbone_channels,
113 |                  aspp_ratios,
114 |                  aspp_out_channels,
115 |                  align_corners,
116 |                  data_format='NCHW'):
117 |         super().__init__()
118 | 
119 |         self.aspp = layers.ASPPModule(
120 |             aspp_ratios,
121 |             backbone_channels[1],
122 |             aspp_out_channels,
123 |             align_corners,
124 |             use_sep_conv=True,
125 |             image_pooling=True,
126 |             data_format=data_format)
127 |         self.decoder = Decoder(
128 |             num_classes,
129 |             backbone_channels[0],
130 |             align_corners,
131 |             data_format=data_format)
132 |         self.backbone_indices = backbone_indices
133 | 
134 |     def forward(self, feat_list):
135 |         logit_list = []
136 |         low_level_feat = feat_list[self.backbone_indices[0]]
137 |         x = feat_list[self.backbone_indices[1]]
138 |         x = self.aspp(x)
139 |         logit = self.decoder(x, low_level_feat)
140 |         logit_list.append(logit)
141 | 
142 |         return logit_list
143 | 
144 | 
145 | @manager.MODELS.add_component
146 | class DeepLabV3(nn.Layer):
147 |     """
148 |     The DeepLabV3 implementation based on PaddlePaddle.
149 | 
150 |     The original article refers to
151 |      Liang-Chieh Chen, et, al. "Rethinking Atrous Convolution for Semantic Image Segmentation"
152 |      (https://arxiv.org/pdf/1706.05587.pdf).
153 | 
154 |     Args:
155 |         Please Refer to DeepLabV3P above.
156 |     """
157 | 
158 |     def __init__(self,
159 |                  num_classes,
160 |                  backbone,
161 |                  backbone_indices=(3, ),
162 |                  aspp_ratios=(1, 6, 12, 18),
163 |                  aspp_out_channels=256,
164 |                  align_corners=False,
165 |                  pretrained=None):
166 |         super().__init__()
167 | 
168 |         self.backbone = backbone
169 |         backbone_channels = [
170 |             backbone.feat_channels[i] for i in backbone_indices
171 |         ]
172 | 
173 |         self.head = DeepLabV3Head(num_classes, backbone_indices,
174 |                                   backbone_channels, aspp_ratios,
175 |                                   aspp_out_channels, align_corners)
176 |         self.align_corners = align_corners
177 |         self.pretrained = pretrained
178 |         self.init_weight()
179 | 
180 |     def forward(self, x):
181 |         feat_list = self.backbone(x)
182 |         logit_list = self.head(feat_list)
183 |         return [
184 |             F.interpolate(
185 |                 logit,
186 |                 paddle.shape(x)[2:],
187 |                 mode='bilinear',
188 |                 align_corners=self.align_corners) for logit in logit_list
189 |         ]
190 | 
191 |     def init_weight(self):
192 |         if self.pretrained is not None:
193 |             utils.load_entire_model(self, self.pretrained)
194 | 
195 | 
196 | class DeepLabV3Head(nn.Layer):
197 |     """
198 |     The DeepLabV3Head implementation based on PaddlePaddle.
199 | 
200 |     Args:
201 |         Please Refer to DeepLabV3PHead above.
202 |     """
203 | 
204 |     def __init__(self, num_classes, backbone_indices, backbone_channels,
205 |                  aspp_ratios, aspp_out_channels, align_corners):
206 |         super().__init__()
207 | 
208 |         self.aspp = layers.ASPPModule(
209 |             aspp_ratios,
210 |             backbone_channels[0],
211 |             aspp_out_channels,
212 |             align_corners,
213 |             use_sep_conv=False,
214 |             image_pooling=True)
215 | 
216 |         self.cls = nn.Conv2D(
217 |             in_channels=aspp_out_channels,
218 |             out_channels=num_classes,
219 |             kernel_size=1)
220 | 
221 |         self.backbone_indices = backbone_indices
222 | 
223 |     def forward(self, feat_list):
224 |         logit_list = []
225 |         x = feat_list[self.backbone_indices[0]]
226 |         x = self.aspp(x)
227 |         logit = self.cls(x)
228 |         logit_list.append(logit)
229 | 
230 |         return logit_list
231 | 
232 | 
233 | class Decoder(nn.Layer):
234 |     """
235 |     Decoder module of DeepLabV3P model
236 | 
237 |     Args:
238 |         num_classes (int): The number of classes.
239 |         in_channels (int): The number of input channels in decoder module.
240 |     """
241 | 
242 |     def __init__(self,
243 |                  num_classes,
244 |                  in_channels,
245 |                  align_corners,
246 |                  data_format='NCHW'):
247 |         super(Decoder, self).__init__()
248 | 
249 |         self.data_format = data_format
250 |         self.conv_bn_relu1 = layers.ConvBNReLU(
251 |             in_channels=in_channels,
252 |             out_channels=48,
253 |             kernel_size=1,
254 |             data_format=data_format)
255 | 
256 |         self.conv_bn_relu2 = layers.SeparableConvBNReLU(
257 |             in_channels=304,
258 |             out_channels=256,
259 |             kernel_size=3,
260 |             padding=1,
261 |             data_format=data_format)
262 |         self.conv_bn_relu3 = layers.SeparableConvBNReLU(
263 |             in_channels=256,
264 |             out_channels=256,
265 |             kernel_size=3,
266 |             padding=1,
267 |             data_format=data_format)
268 |         self.conv = nn.Conv2D(
269 |             in_channels=256,
270 |             out_channels=num_classes,
271 |             kernel_size=1,
272 |             data_format=data_format)
273 | 
274 |         self.align_corners = align_corners
275 | 
276 |     def forward(self, x, low_level_feat):
277 |         low_level_feat = self.conv_bn_relu1(low_level_feat)
278 |         if self.data_format == 'NCHW':
279 |             low_level_shape = paddle.shape(low_level_feat)[-2:]
280 |             axis = 1
281 |         else:
282 |             low_level_shape = paddle.shape(low_level_feat)[1:3]
283 |             axis = -1
284 |         x = F.interpolate(
285 |             x,
286 |             low_level_shape,
287 |             mode='bilinear',
288 |             align_corners=self.align_corners,
289 |             data_format=self.data_format)
290 |         x = paddle.concat([x, low_level_feat], axis=axis)
291 |         x = self.conv_bn_relu2(x)
292 |         x = self.conv_bn_relu3(x)
293 |         x = self.conv(x)
294 |         return x
295 | 


--------------------------------------------------------------------------------
/models/hrnet_w18_s.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import paddle
 4 | import paddle.nn as nn
 5 | 
 6 | from paddleseg.utils import utils
 7 | from paddleseg.cvlibs import manager, param_init
 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
 9 | import paddle.nn.functional as F
10 | from paddleseg.models.backbones.hrnet import HRNet_W18
11 | 
12 | 
13 | 
14 | class SegHead(nn.Layer):
15 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
16 |         super(SegHead, self).__init__()
17 |         self.bn1 = nn.BatchNorm2D(inplanes)
18 |         self.relu = nn.ReLU()
19 |         if aux_head:
20 |             self.con_bn_relu = nn.Sequential(
21 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
22 |                 nn.BatchNorm2D(interplanes),
23 |                 nn.ReLU(),
24 |             )
25 |         else:
26 |             self.con_bn_relu = nn.Sequential(
27 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
28 |                 nn.BatchNorm2D(interplanes),
29 |                 nn.ReLU(),
30 |             )
31 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
32 | 
33 | 
34 |     def forward(self, x):
35 |         x = self.bn1(x)
36 |         x = self.relu(x)
37 |         x = self.con_bn_relu(x)
38 |         out = self.conv(x)
39 |         return out
40 |     
41 | @manager.MODELS.add_component
42 | class HRNet_W18_S(nn.Layer):
43 |     def __init__(self, 
44 |                  num_classes=2,
45 |                  ):
46 |         super().__init__()
47 |         self.hrnet = HRNet_W18()
48 |         self.head = SegHead(inplanes=270, interplanes=64, outplanes=num_classes)
49 | 
50 |     def forward(self, x):
51 |         logits = []
52 |         h, w = paddle.shape(x)[2:]
53 |         x = self.hrnet(x)
54 |         x = x[0]
55 |         x = self.head(x)
56 |         logits.append(x)
57 |         logits = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logits]
58 |         return logits
59 |     
60 | 
61 | 
62 |     def init_weights(self):
63 |         for m in self.sublayers():
64 |                 if isinstance(m, nn.Conv2D):
65 |                     param_init.kaiming_normal_init(m.weight)
66 |                 elif isinstance(m, nn.BatchNorm2D):
67 |                     param_init.constant_init(m.weight, value=1)
68 |                     param_init.constant_init(m.bias, value=0)
69 | 
70 | 
71 | 
72 | 
73 | # if __name__ == '__main__':
74 | #     model = HRNet_W18_S()
75 | #     input = paddle.rand([1, 3, 400, 400])
76 | #     output  = model(input)
77 | #     print(len(output))
78 | #     for o in output:
79 | #         print(o.shape)


--------------------------------------------------------------------------------
/models/hrsegb64.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from paddleseg.utils import utils
  7 | from paddleseg.cvlibs import manager, param_init
  8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
  9 | import paddle.nn.functional as F
 10 | 
 11 | 
 12 | 
 13 | 
 14 | # features
 15 | # 1. The size of the high-resolution path remains constant throughout the process
 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 
 17 | # the high-resolution path remains unchanged
 18 | # 3.  We use multiple segmentation heads, two of which are auxiliary segmentation
 19 | # heads used for auxiliary loss during training
 20 | # 4. The seg head is performed in two steps, instead of restoring to the 
 21 | # original resolution all at once
 22 | 
 23 | # If you need to use this model with paddleseg, you need to add it to the model library 
 24 | # using manager.MODELS.add_component()
 25 | 
 26 | """
 27 | Removed seghead and retained backbone
 28 | """
 29 | @manager.BACKBONES.add_component
 30 | class HrSegB64(nn.Layer):
 31 |     """
 32 |     The HrSegNet implementation based on PaddlePaddle.s
 33 | 
 34 |     Args:
 35 |         num_classes (int): The unique number of target classes.
 36 |         
 37 |         in_channels (int, optional): The channels of input image. Default: 3.
 38 | 
 39 |         base (int, optional): The base channel number of the model. Default: 48.
 40 |     """
 41 |     def __init__(self,
 42 |                  in_channels=3, # input channel
 43 |                  base=64, # base channel of the model, 
 44 |                  num_classes=2 # number of classes
 45 |                  ):
 46 |         super(HrSegB64, self).__init__()
 47 |         self.base = base
 48 |         self.num_classed = num_classes
 49 |         # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features.
 50 |         # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively
 51 |         self.stage1 = nn.Sequential(
 52 |             nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1),
 53 |             nn.BatchNorm2D(base // 2),
 54 |             nn.ReLU(),
 55 |         )
 56 |         self.stage2 = nn.Sequential(
 57 |             nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1),
 58 |             nn.BatchNorm2D(base),
 59 |             nn.ReLU(),
 60 |         )
 61 | 
 62 |         self.seg1 = SegBlock(base=base, stage_index=1)
 63 |         self.seg2 = SegBlock(base=base, stage_index=2)
 64 |         self.seg3 = SegBlock(base=base, stage_index=3)
 65 | 
 66 |         # self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 67 |         # self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 68 |         # self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes)
 69 | 
 70 |         self.feat_channels = [base]
 71 |         self.init_weight()
 72 |     
 73 |     def forward(self, x):
 74 |         logit_list = []
 75 |         h, w = paddle.shape(x)[2:]
 76 |         # aux_head only used in training
 77 |         stem1_out = self.stage1(x)
 78 |         stem2_out = self.stage2(stem1_out)
 79 |         hrseg1_out = self.seg1(stem2_out)
 80 |         hrseg2_out = self.seg2(hrseg1_out)
 81 |         hrseg3_out = self.seg3(hrseg2_out)
 82 |         logit_list.append(hrseg3_out)
 83 |         return  logit_list
 84 |         
 85 |         
 86 |     
 87 |     def init_weight(self):
 88 |         for m in self.sublayers():
 89 |                 if isinstance(m, nn.Conv2D):
 90 |                     param_init.kaiming_normal_init(m.weight)
 91 |                 elif isinstance(m, nn.BatchNorm2D):
 92 |                     param_init.constant_init(m.weight, value=1)
 93 |                     param_init.constant_init(m.bias, value=0)
 94 |     
 95 | 
 96 | 
 97 | class SegBlock(nn.Layer):
 98 |     def __init__(self, 
 99 |                  base=32,
100 |                  stage_index=1):# stage_index=1,2,3. 
101 |         super(SegBlock, self).__init__()
102 | 
103 |         #  Convolutional layer for high-resolution paths with constant spatial resolution and constant channel
104 |         self.h_conv1 = nn.Sequential(
105 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
106 |             nn.BatchNorm2D(base),
107 |             nn.ReLU()
108 |         )
109 |         self.h_conv2 = nn.Sequential(
110 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
111 |             nn.BatchNorm2D(base),
112 |             nn.ReLU()
113 |         )
114 |         self.h_conv3 = nn.Sequential(
115 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
116 |             nn.BatchNorm2D(base),
117 |             nn.ReLU()
118 |         )
119 | 
120 |         # sematic guidance path/low-resolution path
121 |         if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2
122 |             self.l_conv1 = nn.Sequential(
123 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
124 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
125 |                 nn.ReLU()
126 |             )
127 |         elif stage_index==2: #second stage
128 |             self.l_conv1 = nn.Sequential(
129 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
130 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
131 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
132 |                 nn.ReLU()
133 |             )
134 |         elif stage_index==3: 
135 |             self.l_conv1 = nn.Sequential(
136 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
137 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
138 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
139 |                 nn.ReLU(),
140 |                 nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
141 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
142 |                 nn.ReLU()
143 |             )
144 |         else:
145 |             raise ValueError("stage_index must be 1, 2 or 3")
146 |         self.l_conv2 = nn.Sequential(
147 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
148 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
149 |             nn.ReLU()
150 |         )
151 |         self.l_conv3 = nn.Sequential(
152 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
153 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
154 |             nn.ReLU()
155 |         )
156 | 
157 |         self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
158 |         self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
159 |         self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
160 | 
161 | 
162 | 
163 |     def forward(self, x):
164 |         # out = []
165 |         # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x))))
166 |         # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x))))
167 |         size = x.shape[2:]
168 |         out_h1 = self.h_conv1(x) # high resolution path
169 |         out_l1 = self.l_conv1(x) # low resolution path
170 |         # print(out_l1.shape)
171 |         out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample
172 |         out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high
173 | 
174 |         out_h2 = self.h_conv2(out_hl1)
175 |         out_l2 = self.l_conv2(out_l1)
176 |         # print(out_l2.shape)
177 |         out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True)
178 |         out_hl2 = self.l2h_conv2(out_l2_i) + out_h2
179 | 
180 |         out_h3 = self.h_conv3(out_hl2)
181 |         out_l3 = self.l_conv3(out_l2)
182 |         # print(out_l3.shape)
183 |         out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True)
184 |         out_hl3 = self.l2h_conv3(out_l3_i) + out_h3
185 |         return out_hl3
186 | 
187 | # seg head
188 | class SegHead(nn.Layer):
189 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
190 |         super(SegHead, self).__init__()
191 |         self.bn1 = nn.BatchNorm2D(inplanes)
192 |         self.relu = nn.ReLU()
193 |         if aux_head:
194 |             self.con_bn_relu = nn.Sequential(
195 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
196 |                 nn.BatchNorm2D(interplanes),
197 |                 nn.ReLU(),
198 |             )
199 |         else:
200 |             self.con_bn_relu = nn.Sequential(
201 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
202 |                 nn.BatchNorm2D(interplanes),
203 |                 nn.ReLU(),
204 |             )
205 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
206 | 
207 | 
208 |     def forward(self, x):
209 |         x = self.bn1(x)
210 |         x = self.relu(x)
211 |         x = self.con_bn_relu(x)
212 |         out = self.conv(x)
213 |         return out
214 | 
215 | 
216 | 
217 | # if __name__ == "__main__":
218 | #     model = HrSegNet()
219 | #     x = paddle.randn([1, 3, 400, 400])
220 | #     out = model(x)
221 | #     print(out[0].shape)
222 | 
223 | #     paddle.flops(model, input_size=(1, 3, 400, 400))
224 | 
225 | 


--------------------------------------------------------------------------------
/models/hrsegnet_b16.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from paddleseg.utils import utils
  7 | from paddleseg.cvlibs import manager, param_init
  8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
  9 | import paddle.nn.functional as F
 10 | 
 11 | 
 12 | """
 13 | This file is identical to the default HrSegNet, 
 14 | except for the modification of the parameter base
 15 | """
 16 | 
 17 | # features
 18 | # 1. The size of the high-resolution path remains constant throughout the process
 19 | # 2. In order to reduce and flexibly control the computational cost, the channel of 
 20 | # the high-resolution path remains unchanged
 21 | # 3.  We use multiple segmentation heads, two of which are auxiliary segmentation
 22 | # heads used for auxiliary loss during training
 23 | # 4. The seg head is performed in two steps, instead of restoring to the 
 24 | # original resolution all at once
 25 | 
 26 | # If you need to use this model with paddleseg, you need to add it to the model library 
 27 | # using manager.MODELS.add_component()
 28 | @manager.MODELS.add_component
 29 | class HrSegNetB16(nn.Layer):
 30 |     """
 31 |     The HrSegNet implementation based on PaddlePaddle.s
 32 | 
 33 |     Args:
 34 |         num_classes (int): The unique number of target classes.
 35 |         
 36 |         in_channels (int, optional): The channels of input image. Default: 3.
 37 | 
 38 |         base (int, optional): The base channel number of the model. Default: 16.
 39 |     """
 40 |     def __init__(self,
 41 |                  in_channels=3, # input channel
 42 |                  base=16, # base channel of the model, 
 43 |                  num_classes=2, # number of classes
 44 |                  pretrained=None # pretrained model
 45 |                  ):
 46 |         super(HrSegNetB16, self).__init__()
 47 |         self.base = base
 48 |         self.num_classed = num_classes
 49 |         self.pretrained = pretrained
 50 |         # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features.
 51 |         # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively
 52 |         self.stage1 = nn.Sequential(
 53 |             nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1),
 54 |             nn.BatchNorm2D(base // 2),
 55 |             nn.ReLU(),
 56 |         )
 57 |         self.stage2 = nn.Sequential(
 58 |             nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1),
 59 |             nn.BatchNorm2D(base),
 60 |             nn.ReLU(),
 61 |         )
 62 | 
 63 |         self.seg1 = SegBlock(base=base, stage_index=1)
 64 |         self.seg2 = SegBlock(base=base, stage_index=2)
 65 |         self.seg3 = SegBlock(base=base, stage_index=3)
 66 | 
 67 |         self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 68 |         self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 69 |         self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes)
 70 | 
 71 |         self.init_weight()
 72 |     
 73 |     def forward(self, x):
 74 |         logit_list = []
 75 |         h, w = paddle.shape(x)[2:]
 76 |         # aux_head only used in training
 77 |         if self.training:
 78 |             stem1_out = self.stage1(x)
 79 |             stem2_out = self.stage2(stem1_out)
 80 |             hrseg1_out = self.seg1(stem2_out)
 81 |             hrseg2_out = self.seg2(hrseg1_out)
 82 |             hrseg3_out = self.seg3(hrseg2_out)
 83 |             last_out = self.head(hrseg3_out)
 84 |             seghead1_out = self.aux_head1(hrseg1_out)
 85 |             seghead2_out = self.aux_head2(hrseg2_out)
 86 |             logit_list = [last_out, seghead1_out, seghead2_out]
 87 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 88 |             return  logit_list
 89 |         else:
 90 |             stem1_out = self.stage1(x)
 91 |             stem2_out = self.stage2(stem1_out)
 92 |             hrseg1_out = self.seg1(stem2_out)
 93 |             hrseg2_out = self.seg2(hrseg1_out)
 94 |             hrseg3_out = self.seg3(hrseg2_out)
 95 |             last_out = self.head(hrseg3_out)
 96 |             logit_list = [last_out]
 97 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 98 |             return  logit_list
 99 |         
100 |     
101 |     def init_weight(self):
102 |         if self.pretrained is not None:
103 |             utils.load_entire_model(self, self.pretrained)
104 |         else:
105 |             for m in self.sublayers():
106 |                     if isinstance(m, nn.Conv2D):
107 |                         param_init.kaiming_normal_init(m.weight)
108 |                     elif isinstance(m, nn.BatchNorm2D):
109 |                         param_init.constant_init(m.weight, value=1)
110 |                         param_init.constant_init(m.bias, value=0)
111 |     
112 | 
113 | 
114 | class SegBlock(nn.Layer):
115 |     def __init__(self, 
116 |                  base=32,
117 |                  stage_index=1):# stage_index=1,2,3. 
118 |         super(SegBlock, self).__init__()
119 | 
120 |         #  Convolutional layer for high-resolution paths with constant spatial resolution and constant channel
121 |         self.h_conv1 = nn.Sequential(
122 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
123 |             nn.BatchNorm2D(base),
124 |             nn.ReLU()
125 |         )
126 |         self.h_conv2 = nn.Sequential(
127 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
128 |             nn.BatchNorm2D(base),
129 |             nn.ReLU()
130 |         )
131 |         self.h_conv3 = nn.Sequential(
132 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
133 |             nn.BatchNorm2D(base),
134 |             nn.ReLU()
135 |         )
136 | 
137 |         # sematic guidance path/low-resolution path
138 |         if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2
139 |             self.l_conv1 = nn.Sequential(
140 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
141 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
142 |                 nn.ReLU()
143 |             )
144 |         elif stage_index==2: #second stage
145 |             self.l_conv1 = nn.Sequential(
146 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
147 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
148 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
149 |                 nn.ReLU()
150 |             )
151 |         elif stage_index==3: 
152 |             self.l_conv1 = nn.Sequential(
153 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
154 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
155 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
156 |                 nn.ReLU(),
157 |                 nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
158 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
159 |                 nn.ReLU()
160 |             )
161 |         else:
162 |             raise ValueError("stage_index must be 1, 2 or 3")
163 |         self.l_conv2 = nn.Sequential(
164 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
165 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
166 |             nn.ReLU()
167 |         )
168 |         self.l_conv3 = nn.Sequential(
169 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
170 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
171 |             nn.ReLU()
172 |         )
173 | 
174 |         self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
175 |         self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
176 |         self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
177 | 
178 | 
179 | 
180 |     def forward(self, x):
181 |         # out = []
182 |         # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x))))
183 |         # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x))))
184 |         size = x.shape[2:]
185 |         out_h1 = self.h_conv1(x) # high resolution path
186 |         out_l1 = self.l_conv1(x) # low resolution path
187 |         # print(out_l1.shape)
188 |         out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample
189 |         out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high
190 | 
191 |         out_h2 = self.h_conv2(out_hl1)
192 |         out_l2 = self.l_conv2(out_l1)
193 |         # print(out_l2.shape)
194 |         out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True)
195 |         out_hl2 = self.l2h_conv2(out_l2_i) + out_h2
196 | 
197 |         out_h3 = self.h_conv3(out_hl2)
198 |         out_l3 = self.l_conv3(out_l2)
199 |         # print(out_l3.shape)
200 |         out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True)
201 |         out_hl3 = self.l2h_conv3(out_l3_i) + out_h3
202 |         return out_hl3
203 | 
204 | # seg head
205 | class SegHead(nn.Layer):
206 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
207 |         super(SegHead, self).__init__()
208 |         self.bn1 = nn.BatchNorm2D(inplanes)
209 |         self.relu = nn.ReLU()
210 |         if aux_head:
211 |             self.con_bn_relu = nn.Sequential(
212 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
213 |                 nn.BatchNorm2D(interplanes),
214 |                 nn.ReLU(),
215 |             )
216 |         else:
217 |             self.con_bn_relu = nn.Sequential(
218 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
219 |                 nn.BatchNorm2D(interplanes),
220 |                 nn.ReLU(),
221 |             )
222 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
223 | 
224 | 
225 |     def forward(self, x):
226 |         x = self.bn1(x)
227 |         x = self.relu(x)
228 |         x = self.con_bn_relu(x)
229 |         out = self.conv(x)
230 |         return out
231 | 
232 | 
233 | 
234 | # if __name__ == "__main__":
235 | #     model = HrSegNetB16()
236 | #     x = paddle.randn([1, 3, 400, 400])
237 | #     out = model(x)
238 | #     print(out[0].shape)
239 | 
240 | #     paddle.flops(model, input_size=(1, 3, 400, 400))
241 | 
242 | 


--------------------------------------------------------------------------------
/models/hrsegnet_b16_d4.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from paddleseg.utils import utils
  7 | from paddleseg.cvlibs import manager, param_init
  8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
  9 | import paddle.nn.functional as F
 10 | 
 11 | 
 12 | """
 13 | This file is identical to the default HrSegNet, 
 14 | except for the modification of the parameter base
 15 | """
 16 | 
 17 | # features
 18 | # 1. The size of the high-resolution path remains constant throughout the process
 19 | # 2. In order to reduce and flexibly control the computational cost, the channel of 
 20 | # the high-resolution path remains unchanged
 21 | # 3.  We use multiple segmentation heads, two of which are auxiliary segmentation
 22 | # heads used for auxiliary loss during training
 23 | # 4. The seg head is performed in two steps, instead of restoring to the 
 24 | # original resolution all at once
 25 | 
 26 | # If you need to use this model with paddleseg, you need to add it to the model library 
 27 | # using manager.MODELS.add_component()
 28 | @manager.MODELS.add_component
 29 | class HrSegNetB16D4(nn.Layer):
 30 |     """
 31 |     The HrSegNet implementation based on PaddlePaddle.s
 32 | 
 33 |     Args:
 34 |         num_classes (int): The unique number of target classes.
 35 |         
 36 |         in_channels (int, optional): The channels of input image. Default: 3.
 37 | 
 38 |         base (int, optional): The base channel number of the model. Default: 16.
 39 |     """
 40 |     def __init__(self,
 41 |                  in_channels=3, # input channel
 42 |                  base=16, # base channel of the model, 
 43 |                  num_classes=2 # number of classes
 44 |                  ):
 45 |         super(HrSegNetB16D4, self).__init__()
 46 |         self.base = base
 47 |         self.num_classed = num_classes
 48 |         # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features.
 49 |         # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively
 50 |         self.stage1 = nn.Sequential(
 51 |             nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1),
 52 |             nn.BatchNorm2D(base // 2),
 53 |             nn.ReLU(),
 54 |         )
 55 |         self.stage2 = nn.Sequential(
 56 |             nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1),
 57 |             nn.BatchNorm2D(base),
 58 |             nn.ReLU(),
 59 |         )
 60 | 
 61 |         self.seg1 = SegBlock(base=base, stage_index=1)
 62 |         self.seg2 = SegBlock(base=base, stage_index=2)
 63 |         self.seg3 = SegBlock(base=base, stage_index=3)
 64 | 
 65 |         self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 66 |         self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 67 |         self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes)
 68 | 
 69 |         self.init_weight()
 70 |     
 71 |     def forward(self, x):
 72 |         logit_list = []
 73 |         h, w = paddle.shape(x)[2:]
 74 |         # aux_head only used in training
 75 |         if self.training:
 76 |             stem1_out = self.stage1(x)
 77 |             stem2_out = self.stage2(stem1_out)
 78 |             hrseg1_out = self.seg1(stem2_out)
 79 |             hrseg2_out = self.seg2(hrseg1_out)
 80 |             hrseg3_out = self.seg3(hrseg2_out)
 81 |             last_out = self.head(hrseg3_out)
 82 |             seghead1_out = self.aux_head1(hrseg1_out)
 83 |             seghead2_out = self.aux_head2(hrseg2_out)
 84 |             logit_list = [last_out, seghead1_out, seghead2_out]
 85 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 86 |             return  logit_list
 87 |         else:
 88 |             stem1_out = self.stage1(x)
 89 |             stem2_out = self.stage2(stem1_out)
 90 |             hrseg1_out = self.seg1(stem2_out)
 91 |             hrseg2_out = self.seg2(hrseg1_out)
 92 |             hrseg3_out = self.seg3(hrseg2_out)
 93 |             last_out = self.head(hrseg3_out)
 94 |             logit_list = [last_out]
 95 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 96 |             return  logit_list
 97 |         
 98 |     
 99 |     def init_weight(self):
100 |         for m in self.sublayers():
101 |                 if isinstance(m, nn.Conv2D):
102 |                     param_init.kaiming_normal_init(m.weight)
103 |                 elif isinstance(m, nn.BatchNorm2D):
104 |                     param_init.constant_init(m.weight, value=1)
105 |                     param_init.constant_init(m.bias, value=0)
106 |     
107 | 
108 | 
109 | class SegBlock(nn.Layer):
110 |     def __init__(self, 
111 |                  base=32,
112 |                  stage_index=1):# stage_index=1,2,3. 
113 |         super(SegBlock, self).__init__()
114 | 
115 |         #  Convolutional layer for high-resolution paths with constant spatial resolution and constant channel
116 |         self.h_conv1 = nn.Sequential(
117 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
118 |             nn.BatchNorm2D(base),
119 |             nn.ReLU()
120 |         )
121 |         self.h_conv2 = nn.Sequential(
122 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
123 |             nn.BatchNorm2D(base),
124 |             nn.ReLU()
125 |         )
126 |         self.h_conv3 = nn.Sequential(
127 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
128 |             nn.BatchNorm2D(base),
129 |             nn.ReLU()
130 |         )
131 | 
132 |         self.h_conv4 = nn.Sequential(
133 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
134 |             nn.BatchNorm2D(base),
135 |             nn.ReLU()
136 |         )
137 | 
138 |         # sematic guidance path/low-resolution path
139 |         if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2
140 |             self.l_conv1 = nn.Sequential(
141 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
142 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
143 |                 nn.ReLU()
144 |             )
145 |         elif stage_index==2: #second stage
146 |             self.l_conv1 = nn.Sequential(
147 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
148 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
149 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
150 |                 nn.ReLU()
151 |             )
152 |         elif stage_index==3: 
153 |             self.l_conv1 = nn.Sequential(
154 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
155 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
156 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
157 |                 nn.ReLU(),
158 |                 nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
159 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
160 |                 nn.ReLU()
161 |             )
162 |         else:
163 |             raise ValueError("stage_index must be 1, 2 or 3")
164 |         self.l_conv2 = nn.Sequential(
165 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
166 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
167 |             nn.ReLU()
168 |         )
169 |         self.l_conv3 = nn.Sequential(
170 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
171 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
172 |             nn.ReLU()
173 |         )
174 | 
175 |         self.l_conv4 = nn.Sequential(
176 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
177 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
178 |             nn.ReLU()
179 |         )
180 | 
181 |         self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
182 |         self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
183 |         self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
184 |         self.l2h_conv4 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
185 | 
186 | 
187 | 
188 |     def forward(self, x):
189 |         # out = []
190 |         # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x))))
191 |         # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x))))
192 |         size = x.shape[2:]
193 |         out_h1 = self.h_conv1(x) # high resolution path
194 |         out_l1 = self.l_conv1(x) # low resolution path
195 |         # print(out_l1.shape)
196 |         out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample
197 |         out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high
198 | 
199 |         out_h2 = self.h_conv2(out_hl1)
200 |         out_l2 = self.l_conv2(out_l1)
201 |         # print(out_l2.shape)
202 |         out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True)
203 |         out_hl2 = self.l2h_conv2(out_l2_i) + out_h2
204 | 
205 |         out_h3 = self.h_conv3(out_hl2)
206 |         out_l3 = self.l_conv3(out_l2)
207 |         # print(out_l3.shape)
208 |         out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True)
209 |         out_hl3 = self.l2h_conv3(out_l3_i) + out_h3
210 | 
211 |         out_h4 = self.h_conv4(out_hl3)
212 |         out_l4 = self.l_conv4(out_l3)
213 |         # print(out_l4.shape)
214 |         out_l4_i = F.interpolate(out_l4, size=size, mode='bilinear', align_corners=True)
215 |         out_hl4 = self.l2h_conv4(out_l4_i) + out_h4
216 |         return out_hl4
217 | 
218 | # seg head
219 | class SegHead(nn.Layer):
220 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
221 |         super(SegHead, self).__init__()
222 |         self.bn1 = nn.BatchNorm2D(inplanes)
223 |         self.relu = nn.ReLU()
224 |         if aux_head:
225 |             self.con_bn_relu = nn.Sequential(
226 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
227 |                 nn.BatchNorm2D(interplanes),
228 |                 nn.ReLU(),
229 |             )
230 |         else:
231 |             self.con_bn_relu = nn.Sequential(
232 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
233 |                 nn.BatchNorm2D(interplanes),
234 |                 nn.ReLU(),
235 |             )
236 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
237 | 
238 | 
239 |     def forward(self, x):
240 |         x = self.bn1(x)
241 |         x = self.relu(x)
242 |         x = self.con_bn_relu(x)
243 |         out = self.conv(x)
244 |         return out
245 | 
246 | 
247 | 
248 | # if __name__ == "__main__":
249 | #     model = HrSegNetB16D4()
250 | #     x = paddle.randn([1, 3, 400, 400])
251 | #     out = model(x)
252 | #     print(out[0].shape)
253 | 
254 | #     paddle.flops(model, input_size=(1, 3, 400, 400))
255 | 
256 | 


--------------------------------------------------------------------------------
/models/hrsegnet_b16_d5.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from paddleseg.utils import utils
  7 | from paddleseg.cvlibs import manager, param_init
  8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
  9 | import paddle.nn.functional as F
 10 | 
 11 | 
 12 | """
 13 | This file is identical to the default HrSegNet, 
 14 | except for the modification of the parameter base
 15 | """
 16 | 
 17 | # features
 18 | # 1. The size of the high-resolution path remains constant throughout the process
 19 | # 2. In order to reduce and flexibly control the computational cost, the channel of 
 20 | # the high-resolution path remains unchanged
 21 | # 3.  We use multiple segmentation heads, two of which are auxiliary segmentation
 22 | # heads used for auxiliary loss during training
 23 | # 4. The seg head is performed in two steps, instead of restoring to the 
 24 | # original resolution all at once
 25 | 
 26 | # If you need to use this model with paddleseg, you need to add it to the model library 
 27 | # using manager.MODELS.add_component()
 28 | @manager.MODELS.add_component
 29 | class HrSegNetB16D5(nn.Layer):
 30 |     """
 31 |     The HrSegNet implementation based on PaddlePaddle.s
 32 | 
 33 |     Args:
 34 |         num_classes (int): The unique number of target classes.
 35 |         
 36 |         in_channels (int, optional): The channels of input image. Default: 3.
 37 | 
 38 |         base (int, optional): The base channel number of the model. Default: 16.
 39 |     """
 40 |     def __init__(self,
 41 |                  in_channels=3, # input channel
 42 |                  base=16, # base channel of the model, 
 43 |                  num_classes=2 # number of classes
 44 |                  ):
 45 |         super(HrSegNetB16D5, self).__init__()
 46 |         self.base = base
 47 |         self.num_classed = num_classes
 48 |         # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features.
 49 |         # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively
 50 |         self.stage1 = nn.Sequential(
 51 |             nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1),
 52 |             nn.BatchNorm2D(base // 2),
 53 |             nn.ReLU(),
 54 |         )
 55 |         self.stage2 = nn.Sequential(
 56 |             nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1),
 57 |             nn.BatchNorm2D(base),
 58 |             nn.ReLU(),
 59 |         )
 60 | 
 61 |         self.seg1 = SegBlock(base=base, stage_index=1)
 62 |         self.seg2 = SegBlock(base=base, stage_index=2)
 63 |         self.seg3 = SegBlock(base=base, stage_index=3)
 64 | 
 65 |         self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 66 |         self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 67 |         self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes)
 68 | 
 69 |         self.init_weight()
 70 |     
 71 |     def forward(self, x):
 72 |         logit_list = []
 73 |         h, w = paddle.shape(x)[2:]
 74 |         # aux_head only used in training
 75 |         if self.training:
 76 |             stem1_out = self.stage1(x)
 77 |             stem2_out = self.stage2(stem1_out)
 78 |             hrseg1_out = self.seg1(stem2_out)
 79 |             hrseg2_out = self.seg2(hrseg1_out)
 80 |             hrseg3_out = self.seg3(hrseg2_out)
 81 |             last_out = self.head(hrseg3_out)
 82 |             seghead1_out = self.aux_head1(hrseg1_out)
 83 |             seghead2_out = self.aux_head2(hrseg2_out)
 84 |             logit_list = [last_out, seghead1_out, seghead2_out]
 85 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 86 |             return  logit_list
 87 |         else:
 88 |             stem1_out = self.stage1(x)
 89 |             stem2_out = self.stage2(stem1_out)
 90 |             hrseg1_out = self.seg1(stem2_out)
 91 |             hrseg2_out = self.seg2(hrseg1_out)
 92 |             hrseg3_out = self.seg3(hrseg2_out)
 93 |             last_out = self.head(hrseg3_out)
 94 |             logit_list = [last_out]
 95 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 96 |             return  logit_list
 97 |         
 98 |     
 99 |     def init_weight(self):
100 |         for m in self.sublayers():
101 |                 if isinstance(m, nn.Conv2D):
102 |                     param_init.kaiming_normal_init(m.weight)
103 |                 elif isinstance(m, nn.BatchNorm2D):
104 |                     param_init.constant_init(m.weight, value=1)
105 |                     param_init.constant_init(m.bias, value=0)
106 |     
107 | 
108 | 
109 | class SegBlock(nn.Layer):
110 |     def __init__(self, 
111 |                  base=32,
112 |                  stage_index=1):# stage_index=1,2,3. 
113 |         super(SegBlock, self).__init__()
114 | 
115 |         #  Convolutional layer for high-resolution paths with constant spatial resolution and constant channel
116 |         self.h_conv1 = nn.Sequential(
117 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
118 |             nn.BatchNorm2D(base),
119 |             nn.ReLU()
120 |         )
121 |         self.h_conv2 = nn.Sequential(
122 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
123 |             nn.BatchNorm2D(base),
124 |             nn.ReLU()
125 |         )
126 |         self.h_conv3 = nn.Sequential(
127 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
128 |             nn.BatchNorm2D(base),
129 |             nn.ReLU()
130 |         )
131 | 
132 |         self.h_conv4 = nn.Sequential(
133 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
134 |             nn.BatchNorm2D(base),
135 |             nn.ReLU()
136 |         )
137 | 
138 |         self.h_conv5 = nn.Sequential(
139 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
140 |             nn.BatchNorm2D(base),
141 |             nn.ReLU()
142 |         )
143 | 
144 |         # sematic guidance path/low-resolution path
145 |         if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2
146 |             self.l_conv1 = nn.Sequential(
147 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
148 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
149 |                 nn.ReLU()
150 |             )
151 |         elif stage_index==2: #second stage
152 |             self.l_conv1 = nn.Sequential(
153 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
154 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
155 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
156 |                 nn.ReLU()
157 |             )
158 |         elif stage_index==3: 
159 |             self.l_conv1 = nn.Sequential(
160 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
161 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
162 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
163 |                 nn.ReLU(),
164 |                 nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
165 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
166 |                 nn.ReLU()
167 |             )
168 |         else:
169 |             raise ValueError("stage_index must be 1, 2 or 3")
170 |         self.l_conv2 = nn.Sequential(
171 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
172 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
173 |             nn.ReLU()
174 |         )
175 |         self.l_conv3 = nn.Sequential(
176 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
177 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
178 |             nn.ReLU()
179 |         )
180 | 
181 |         self.l_conv4 = nn.Sequential(
182 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
183 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
184 |             nn.ReLU()
185 |         )
186 | 
187 |         self.l_conv5 = nn.Sequential(
188 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
189 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
190 |             nn.ReLU()
191 |         )
192 | 
193 |         self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
194 |         self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
195 |         self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
196 |         self.l2h_conv4 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
197 |         self.l2h_conv5 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
198 | 
199 | 
200 | 
201 |     def forward(self, x):
202 |         # out = []
203 |         # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x))))
204 |         # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x))))
205 |         size = x.shape[2:]
206 |         out_h1 = self.h_conv1(x) # high resolution path
207 |         out_l1 = self.l_conv1(x) # low resolution path
208 |         # print(out_l1.shape)
209 |         out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample
210 |         out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high
211 | 
212 |         out_h2 = self.h_conv2(out_hl1)
213 |         out_l2 = self.l_conv2(out_l1)
214 |         # print(out_l2.shape)
215 |         out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True)
216 |         out_hl2 = self.l2h_conv2(out_l2_i) + out_h2
217 | 
218 |         out_h3 = self.h_conv3(out_hl2)
219 |         out_l3 = self.l_conv3(out_l2)
220 |         # print(out_l3.shape)
221 |         out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True)
222 |         out_hl3 = self.l2h_conv3(out_l3_i) + out_h3
223 | 
224 |         out_h4 = self.h_conv4(out_hl3)
225 |         out_l4 = self.l_conv4(out_l3)
226 |         # print(out_l4.shape)
227 |         out_l4_i = F.interpolate(out_l4, size=size, mode='bilinear', align_corners=True)
228 |         out_hl4 = self.l2h_conv4(out_l4_i) + out_h4
229 | 
230 |         out_h5 = self.h_conv5(out_hl4)
231 |         out_l5 = self.l_conv5(out_l4)
232 |         # print(out_l5.shape)
233 |         out_l5_i = F.interpolate(out_l5, size=size, mode='bilinear', align_corners=True)
234 |         out_hl5 = self.l2h_conv5(out_l5_i) + out_h5
235 |         return out_hl5
236 | 
237 | # seg head
238 | class SegHead(nn.Layer):
239 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
240 |         super(SegHead, self).__init__()
241 |         self.bn1 = nn.BatchNorm2D(inplanes)
242 |         self.relu = nn.ReLU()
243 |         if aux_head:
244 |             self.con_bn_relu = nn.Sequential(
245 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
246 |                 nn.BatchNorm2D(interplanes),
247 |                 nn.ReLU(),
248 |             )
249 |         else:
250 |             self.con_bn_relu = nn.Sequential(
251 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
252 |                 nn.BatchNorm2D(interplanes),
253 |                 nn.ReLU(),
254 |             )
255 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
256 | 
257 | 
258 |     def forward(self, x):
259 |         x = self.bn1(x)
260 |         x = self.relu(x)
261 |         x = self.con_bn_relu(x)
262 |         out = self.conv(x)
263 |         return out
264 | 
265 | 
266 | 
267 | # if __name__ == "__main__":
268 | #     model = HrSegNetB16D5()
269 | #     x = paddle.randn([1, 3, 400, 400])
270 | #     out = model(x)
271 | #     print(out[0].shape)
272 | 
273 | #     paddle.flops(model, input_size=(1, 3, 400, 400))
274 | 
275 | 


--------------------------------------------------------------------------------
/models/hrsegnet_b32.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from paddleseg.utils import utils
  7 | from paddleseg.cvlibs import manager, param_init
  8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
  9 | import paddle.nn.functional as F
 10 | 
 11 | 
 12 | 
 13 | 
 14 | # features
 15 | # 1. The size of the high-resolution path remains constant throughout the process
 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 
 17 | # the high-resolution path remains unchanged
 18 | # 3.  We use multiple segmentation heads, two of which are auxiliary segmentation
 19 | # heads used for auxiliary loss during training
 20 | # 4. The seg head is performed in two steps, instead of restoring to the 
 21 | # original resolution all at once
 22 | 
 23 | # If you need to use this model with paddleseg, you need to add it to the model library 
 24 | # using manager.MODELS.add_component()
 25 | @manager.MODELS.add_component
 26 | class HrSegNetB32(nn.Layer):
 27 |     """
 28 |     The HrSegNet implementation based on PaddlePaddle.s
 29 | 
 30 |     Args:
 31 |         num_classes (int): The unique number of target classes.
 32 |         
 33 |         in_channels (int, optional): The channels of input image. Default: 3.
 34 | 
 35 |         base (int, optional): The base channel number of the model. Default: 32.
 36 |     """
 37 |     def __init__(self,
 38 |                  in_channels=3, # input channel
 39 |                  base=32, # base channel of the model, 
 40 |                  num_classes=2,  # number of classes
 41 |                  pretrained=None
 42 |                  ):
 43 |         super(HrSegNetB32, self).__init__()
 44 |         self.base = base
 45 |         self.num_classed = num_classes
 46 |         self.pretrained = pretrained
 47 |         # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features.
 48 |         # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively
 49 |         self.stage1 = nn.Sequential(
 50 |             nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1),
 51 |             nn.BatchNorm2D(base // 2),
 52 |             nn.ReLU(),
 53 |         )
 54 |         self.stage2 = nn.Sequential(
 55 |             nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1),
 56 |             nn.BatchNorm2D(base),
 57 |             nn.ReLU(),
 58 |         )
 59 | 
 60 |         self.seg1 = SegBlock(base=base, stage_index=1)
 61 |         self.seg2 = SegBlock(base=base, stage_index=2)
 62 |         self.seg3 = SegBlock(base=base, stage_index=3)
 63 | 
 64 |         self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 65 |         self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 66 |         self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes)
 67 | 
 68 |         self.init_weight()
 69 |     
 70 |     def forward(self, x):
 71 |         logit_list = []
 72 |         h, w = paddle.shape(x)[2:]
 73 |         # aux_head only used in training
 74 |         if self.training:
 75 |             stem1_out = self.stage1(x)
 76 |             stem2_out = self.stage2(stem1_out)
 77 |             hrseg1_out = self.seg1(stem2_out)
 78 |             hrseg2_out = self.seg2(hrseg1_out)
 79 |             hrseg3_out = self.seg3(hrseg2_out)
 80 |             last_out = self.head(hrseg3_out)
 81 |             seghead1_out = self.aux_head1(hrseg1_out)
 82 |             seghead2_out = self.aux_head2(hrseg2_out)
 83 |             logit_list = [last_out, seghead1_out, seghead2_out]
 84 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 85 |             return  logit_list
 86 |         else:
 87 |             stem1_out = self.stage1(x)
 88 |             stem2_out = self.stage2(stem1_out)
 89 |             hrseg1_out = self.seg1(stem2_out)
 90 |             hrseg2_out = self.seg2(hrseg1_out)
 91 |             hrseg3_out = self.seg3(hrseg2_out)
 92 |             last_out = self.head(hrseg3_out)
 93 |             logit_list = [last_out]
 94 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 95 |             return  logit_list
 96 |         
 97 |     
 98 |     def init_weight(self):
 99 |         if self.pretrained is not None:
100 |             utils.load_entire_model(self, self.pretrained)
101 |         else:
102 |             for m in self.sublayers():
103 |                     if isinstance(m, nn.Conv2D):
104 |                         param_init.kaiming_normal_init(m.weight)
105 |                     elif isinstance(m, nn.BatchNorm2D):
106 |                         param_init.constant_init(m.weight, value=1)
107 |                         param_init.constant_init(m.bias, value=0)
108 |     
109 | 
110 | 
111 | class SegBlock(nn.Layer):
112 |     def __init__(self, 
113 |                  base=32,
114 |                  stage_index=1):# stage_index=1,2,3. 
115 |         super(SegBlock, self).__init__()
116 | 
117 |         #  Convolutional layer for high-resolution paths with constant spatial resolution and constant channel
118 |         self.h_conv1 = nn.Sequential(
119 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
120 |             nn.BatchNorm2D(base),
121 |             nn.ReLU()
122 |         )
123 |         self.h_conv2 = nn.Sequential(
124 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
125 |             nn.BatchNorm2D(base),
126 |             nn.ReLU()
127 |         )
128 |         self.h_conv3 = nn.Sequential(
129 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
130 |             nn.BatchNorm2D(base),
131 |             nn.ReLU()
132 |         )
133 | 
134 |         # sematic guidance path/low-resolution path
135 |         if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2
136 |             self.l_conv1 = nn.Sequential(
137 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
138 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
139 |                 nn.ReLU()
140 |             )
141 |         elif stage_index==2: #second stage
142 |             self.l_conv1 = nn.Sequential(
143 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
144 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
145 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
146 |                 nn.ReLU()
147 |             )
148 |         elif stage_index==3: 
149 |             self.l_conv1 = nn.Sequential(
150 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
151 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
152 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
153 |                 nn.ReLU(),
154 |                 nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
155 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
156 |                 nn.ReLU()
157 |             )
158 |         else:
159 |             raise ValueError("stage_index must be 1, 2 or 3")
160 |         self.l_conv2 = nn.Sequential(
161 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
162 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
163 |             nn.ReLU()
164 |         )
165 |         self.l_conv3 = nn.Sequential(
166 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
167 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
168 |             nn.ReLU()
169 |         )
170 | 
171 |         self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
172 |         self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
173 |         self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
174 | 
175 | 
176 | 
177 |     def forward(self, x):
178 |         # out = []
179 |         # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x))))
180 |         # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x))))
181 |         size = x.shape[2:]
182 |         out_h1 = self.h_conv1(x) # high resolution path
183 |         out_l1 = self.l_conv1(x) # low resolution path
184 |         # print(out_l1.shape)
185 |         out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample
186 |         out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high
187 | 
188 |         out_h2 = self.h_conv2(out_hl1)
189 |         out_l2 = self.l_conv2(out_l1)
190 |         # print(out_l2.shape)
191 |         out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True)
192 |         out_hl2 = self.l2h_conv2(out_l2_i) + out_h2
193 | 
194 |         out_h3 = self.h_conv3(out_hl2)
195 |         out_l3 = self.l_conv3(out_l2)
196 |         # print(out_l3.shape)
197 |         out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True)
198 |         out_hl3 = self.l2h_conv3(out_l3_i) + out_h3
199 |         return out_hl3
200 | 
201 | # seg head
202 | class SegHead(nn.Layer):
203 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
204 |         super(SegHead, self).__init__()
205 |         self.bn1 = nn.BatchNorm2D(inplanes)
206 |         self.relu = nn.ReLU()
207 |         if aux_head:
208 |             self.con_bn_relu = nn.Sequential(
209 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
210 |                 nn.BatchNorm2D(interplanes),
211 |                 nn.ReLU(),
212 |             )
213 |         else:
214 |             self.con_bn_relu = nn.Sequential(
215 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
216 |                 nn.BatchNorm2D(interplanes),
217 |                 nn.ReLU(),
218 |             )
219 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
220 | 
221 | 
222 |     def forward(self, x):
223 |         x = self.bn1(x)
224 |         x = self.relu(x)
225 |         x = self.con_bn_relu(x)
226 |         out = self.conv(x)
227 |         return out
228 | 
229 | 
230 | 
231 | # if __name__ == "__main__":
232 | #     model = HrSegNet()
233 | #     x = paddle.randn([1, 3, 400, 400])
234 | #     out = model(x)
235 | #     print(out[0].shape)
236 | 
237 | #     paddle.flops(model, input_size=(1, 3, 400, 400))
238 | 
239 | 


--------------------------------------------------------------------------------
/models/hrsegnet_b48.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from paddleseg.utils import utils
  7 | from paddleseg.cvlibs import manager, param_init
  8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
  9 | import paddle.nn.functional as F
 10 | 
 11 | 
 12 | 
 13 | 
 14 | # features
 15 | # 1. The size of the high-resolution path remains constant throughout the process
 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 
 17 | # the high-resolution path remains unchanged
 18 | # 3.  We use multiple segmentation heads, two of which are auxiliary segmentation
 19 | # heads used for auxiliary loss during training
 20 | # 4. The seg head is performed in two steps, instead of restoring to the 
 21 | # original resolution all at once
 22 | 
 23 | # If you need to use this model with paddleseg, you need to add it to the model library 
 24 | # using manager.MODELS.add_component()
 25 | @manager.MODELS.add_component
 26 | class HrSegNetB48(nn.Layer):
 27 |     """
 28 |     The HrSegNet implementation based on PaddlePaddle.s
 29 | 
 30 |     Args:
 31 |         num_classes (int): The unique number of target classes.
 32 |         
 33 |         in_channels (int, optional): The channels of input image. Default: 3.
 34 | 
 35 |         base (int, optional): The base channel number of the model. Default: 48.
 36 |     """
 37 |     def __init__(self,
 38 |                  in_channels=3, # input channel
 39 |                  base=48, # base channel of the model, 
 40 |                  num_classes=2, # number of classes
 41 |                  pretrained = None # pretrained model
 42 |                  ):
 43 |         super(HrSegNetB48, self).__init__()
 44 |         self.base = base
 45 |         self.num_classed = num_classes
 46 |         self.pretrained = pretrained
 47 |         # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features.
 48 |         # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively
 49 |         self.stage1 = nn.Sequential(
 50 |             nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1),
 51 |             nn.BatchNorm2D(base // 2),
 52 |             nn.ReLU(),
 53 |         )
 54 |         self.stage2 = nn.Sequential(
 55 |             nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1),
 56 |             nn.BatchNorm2D(base),
 57 |             nn.ReLU(),
 58 |         )
 59 | 
 60 |         self.seg1 = SegBlock(base=base, stage_index=1)
 61 |         self.seg2 = SegBlock(base=base, stage_index=2)
 62 |         self.seg3 = SegBlock(base=base, stage_index=3)
 63 | 
 64 |         self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 65 |         self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 66 |         self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes)
 67 | 
 68 |         self.init_weight()
 69 |     
 70 |     def forward(self, x):
 71 |         logit_list = []
 72 |         h, w = paddle.shape(x)[2:]
 73 |         # aux_head only used in training
 74 |         if self.training:
 75 |             stem1_out = self.stage1(x)
 76 |             stem2_out = self.stage2(stem1_out)
 77 |             hrseg1_out = self.seg1(stem2_out)
 78 |             hrseg2_out = self.seg2(hrseg1_out)
 79 |             hrseg3_out = self.seg3(hrseg2_out)
 80 |             last_out = self.head(hrseg3_out)
 81 |             seghead1_out = self.aux_head1(hrseg1_out)
 82 |             seghead2_out = self.aux_head2(hrseg2_out)
 83 |             logit_list = [last_out, seghead1_out, seghead2_out]
 84 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 85 |             return  logit_list
 86 |         else:
 87 |             stem1_out = self.stage1(x)
 88 |             stem2_out = self.stage2(stem1_out)
 89 |             hrseg1_out = self.seg1(stem2_out)
 90 |             hrseg2_out = self.seg2(hrseg1_out)
 91 |             hrseg3_out = self.seg3(hrseg2_out)
 92 |             last_out = self.head(hrseg3_out)
 93 |             logit_list = [last_out]
 94 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 95 |             return  logit_list
 96 |         
 97 |     
 98 |     def init_weight(self):
 99 |             if self.pretrained is not None:
100 |                 utils.load_entire_model(self, self.pretrained)
101 |             else:
102 |                 for m in self.sublayers():
103 |                         if isinstance(m, nn.Conv2D):
104 |                             param_init.kaiming_normal_init(m.weight)
105 |                         elif isinstance(m, nn.BatchNorm2D):
106 |                             param_init.constant_init(m.weight, value=1)
107 |                             param_init.constant_init(m.bias, value=0)
108 |     
109 | 
110 | 
111 | class SegBlock(nn.Layer):
112 |     def __init__(self, 
113 |                  base=32,
114 |                  stage_index=1):# stage_index=1,2,3. 
115 |         super(SegBlock, self).__init__()
116 | 
117 |         #  Convolutional layer for high-resolution paths with constant spatial resolution and constant channel
118 |         self.h_conv1 = nn.Sequential(
119 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
120 |             nn.BatchNorm2D(base),
121 |             nn.ReLU()
122 |         )
123 |         self.h_conv2 = nn.Sequential(
124 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
125 |             nn.BatchNorm2D(base),
126 |             nn.ReLU()
127 |         )
128 |         self.h_conv3 = nn.Sequential(
129 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
130 |             nn.BatchNorm2D(base),
131 |             nn.ReLU()
132 |         )
133 | 
134 |         # sematic guidance path/low-resolution path
135 |         if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2
136 |             self.l_conv1 = nn.Sequential(
137 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
138 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
139 |                 nn.ReLU()
140 |             )
141 |         elif stage_index==2: #second stage
142 |             self.l_conv1 = nn.Sequential(
143 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
144 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
145 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
146 |                 nn.ReLU()
147 |             )
148 |         elif stage_index==3: 
149 |             self.l_conv1 = nn.Sequential(
150 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
151 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
152 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
153 |                 nn.ReLU(),
154 |                 nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
155 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
156 |                 nn.ReLU()
157 |             )
158 |         else:
159 |             raise ValueError("stage_index must be 1, 2 or 3")
160 |         self.l_conv2 = nn.Sequential(
161 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
162 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
163 |             nn.ReLU()
164 |         )
165 |         self.l_conv3 = nn.Sequential(
166 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
167 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
168 |             nn.ReLU()
169 |         )
170 | 
171 |         self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
172 |         self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
173 |         self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
174 | 
175 | 
176 | 
177 |     def forward(self, x):
178 |         # out = []
179 |         # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x))))
180 |         # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x))))
181 |         size = x.shape[2:]
182 |         out_h1 = self.h_conv1(x) # high resolution path
183 |         out_l1 = self.l_conv1(x) # low resolution path
184 |         # print(out_l1.shape)
185 |         out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample
186 |         out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high
187 | 
188 |         out_h2 = self.h_conv2(out_hl1)
189 |         out_l2 = self.l_conv2(out_l1)
190 |         # print(out_l2.shape)
191 |         out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True)
192 |         out_hl2 = self.l2h_conv2(out_l2_i) + out_h2
193 | 
194 |         out_h3 = self.h_conv3(out_hl2)
195 |         out_l3 = self.l_conv3(out_l2)
196 |         # print(out_l3.shape)
197 |         out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True)
198 |         out_hl3 = self.l2h_conv3(out_l3_i) + out_h3
199 |         return out_hl3
200 | 
201 | # seg head
202 | class SegHead(nn.Layer):
203 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
204 |         super(SegHead, self).__init__()
205 |         self.bn1 = nn.BatchNorm2D(inplanes)
206 |         self.relu = nn.ReLU()
207 |         if aux_head:
208 |             self.con_bn_relu = nn.Sequential(
209 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
210 |                 nn.BatchNorm2D(interplanes),
211 |                 nn.ReLU(),
212 |             )
213 |         else:
214 |             self.con_bn_relu = nn.Sequential(
215 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
216 |                 nn.BatchNorm2D(interplanes),
217 |                 nn.ReLU(),
218 |             )
219 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
220 | 
221 | 
222 |     def forward(self, x):
223 |         x = self.bn1(x)
224 |         x = self.relu(x)
225 |         x = self.con_bn_relu(x)
226 |         out = self.conv(x)
227 |         return out
228 | 
229 | 
230 | 
231 | # if __name__ == "__main__":
232 | #     model = HrSegNet()
233 | #     x = paddle.randn([1, 3, 400, 400])
234 | #     out = model(x)
235 | #     print(out[0].shape)
236 | 
237 | #     paddle.flops(model, input_size=(1, 3, 400, 400))
238 | 
239 | 


--------------------------------------------------------------------------------
/models/hrsegnet_b64.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from paddleseg.utils import utils
  7 | from paddleseg.cvlibs import manager, param_init
  8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm
  9 | import paddle.nn.functional as F
 10 | 
 11 | 
 12 | 
 13 | 
 14 | # features
 15 | # 1. The size of the high-resolution path remains constant throughout the process
 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 
 17 | # the high-resolution path remains unchanged
 18 | # 3.  We use multiple segmentation heads, two of which are auxiliary segmentation
 19 | # heads used for auxiliary loss during training
 20 | # 4. The seg head is performed in two steps, instead of restoring to the 
 21 | # original resolution all at once
 22 | 
 23 | # If you need to use this model with paddleseg, you need to add it to the model library 
 24 | # using manager.MODELS.add_component()
 25 | @manager.MODELS.add_component
 26 | class HrSegNetB64(nn.Layer):
 27 |     """
 28 |     The HrSegNet implementation based on PaddlePaddle.s
 29 | 
 30 |     Args:
 31 |         num_classes (int): The unique number of target classes.
 32 |         
 33 |         in_channels (int, optional): The channels of input image. Default: 3.
 34 | 
 35 |         base (int, optional): The base channel number of the model. Default: 48.
 36 |     """
 37 |     def __init__(self,
 38 |                  in_channels=3, # input channel
 39 |                  base=64, # base channel of the model, 
 40 |                  num_classes=2, # number of classes
 41 |                  pretrained=None
 42 |                  ):
 43 |         super(HrSegNetB64, self).__init__()
 44 |         self.base = base
 45 |         self.num_classed = num_classes
 46 |         self.pretrained = pretrained
 47 |         # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features.
 48 |         # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively
 49 |         self.stage1 = nn.Sequential(
 50 |             nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1),
 51 |             nn.BatchNorm2D(base // 2),
 52 |             nn.ReLU(),
 53 |         )
 54 |         self.stage2 = nn.Sequential(
 55 |             nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1),
 56 |             nn.BatchNorm2D(base),
 57 |             nn.ReLU(),
 58 |         )
 59 | 
 60 |         self.seg1 = SegBlock(base=base, stage_index=1)
 61 |         self.seg2 = SegBlock(base=base, stage_index=2)
 62 |         self.seg3 = SegBlock(base=base, stage_index=3)
 63 | 
 64 |         self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 65 |         self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True)
 66 |         self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes)
 67 | 
 68 |         self.init_weight()
 69 |     
 70 |     def forward(self, x):
 71 |         logit_list = []
 72 |         h, w = paddle.shape(x)[2:]
 73 |         # aux_head only used in training
 74 |         if self.training:
 75 |             stem1_out = self.stage1(x)
 76 |             stem2_out = self.stage2(stem1_out)
 77 |             hrseg1_out = self.seg1(stem2_out)
 78 |             hrseg2_out = self.seg2(hrseg1_out)
 79 |             hrseg3_out = self.seg3(hrseg2_out)
 80 |             last_out = self.head(hrseg3_out)
 81 |             seghead1_out = self.aux_head1(hrseg1_out)
 82 |             seghead2_out = self.aux_head2(hrseg2_out)
 83 |             logit_list = [last_out, seghead1_out, seghead2_out]
 84 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 85 |             return  logit_list
 86 |         else:
 87 |             stem1_out = self.stage1(x)
 88 |             stem2_out = self.stage2(stem1_out)
 89 |             hrseg1_out = self.seg1(stem2_out)
 90 |             hrseg2_out = self.seg2(hrseg1_out)
 91 |             hrseg3_out = self.seg3(hrseg2_out)
 92 |             last_out = self.head(hrseg3_out)
 93 |             logit_list = [last_out]
 94 |             logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list]
 95 |             return  logit_list
 96 |         
 97 |     
 98 |     def init_weight(self):
 99 |         if self.pretrained is not None:
100 |             utils.load_entire_model(self, self.pretrained)
101 |         else:
102 |             for m in self.sublayers():
103 |                     if isinstance(m, nn.Conv2D):
104 |                         param_init.kaiming_normal_init(m.weight)
105 |                     elif isinstance(m, nn.BatchNorm2D):
106 |                         param_init.constant_init(m.weight, value=1)
107 |                         param_init.constant_init(m.bias, value=0)
108 |     
109 | 
110 | 
111 | class SegBlock(nn.Layer):
112 |     def __init__(self, 
113 |                  base=32,
114 |                  stage_index=1):# stage_index=1,2,3. 
115 |         super(SegBlock, self).__init__()
116 | 
117 |         #  Convolutional layer for high-resolution paths with constant spatial resolution and constant channel
118 |         self.h_conv1 = nn.Sequential(
119 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
120 |             nn.BatchNorm2D(base),
121 |             nn.ReLU()
122 |         )
123 |         self.h_conv2 = nn.Sequential(
124 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
125 |             nn.BatchNorm2D(base),
126 |             nn.ReLU()
127 |         )
128 |         self.h_conv3 = nn.Sequential(
129 |             nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1),
130 |             nn.BatchNorm2D(base),
131 |             nn.ReLU()
132 |         )
133 | 
134 |         # sematic guidance path/low-resolution path
135 |         if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2
136 |             self.l_conv1 = nn.Sequential(
137 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
138 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
139 |                 nn.ReLU()
140 |             )
141 |         elif stage_index==2: #second stage
142 |             self.l_conv1 = nn.Sequential(
143 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
144 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
145 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
146 |                 nn.ReLU()
147 |             )
148 |         elif stage_index==3: 
149 |             self.l_conv1 = nn.Sequential(
150 |                 nn.AvgPool2D(kernel_size=3, stride=2, padding=1),
151 |                 nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
152 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
153 |                 nn.ReLU(),
154 |                 nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1),
155 |                 nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
156 |                 nn.ReLU()
157 |             )
158 |         else:
159 |             raise ValueError("stage_index must be 1, 2 or 3")
160 |         self.l_conv2 = nn.Sequential(
161 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
162 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
163 |             nn.ReLU()
164 |         )
165 |         self.l_conv3 = nn.Sequential(
166 |             nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1),
167 |             nn.BatchNorm2D(base*int(math.pow(2, stage_index))),
168 |             nn.ReLU()
169 |         )
170 | 
171 |         self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
172 |         self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
173 |         self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0)
174 | 
175 | 
176 | 
177 |     def forward(self, x):
178 |         # out = []
179 |         # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x))))
180 |         # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x))))
181 |         size = x.shape[2:]
182 |         out_h1 = self.h_conv1(x) # high resolution path
183 |         out_l1 = self.l_conv1(x) # low resolution path
184 |         # print(out_l1.shape)
185 |         out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample
186 |         out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high
187 | 
188 |         out_h2 = self.h_conv2(out_hl1)
189 |         out_l2 = self.l_conv2(out_l1)
190 |         # print(out_l2.shape)
191 |         out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True)
192 |         out_hl2 = self.l2h_conv2(out_l2_i) + out_h2
193 | 
194 |         out_h3 = self.h_conv3(out_hl2)
195 |         out_l3 = self.l_conv3(out_l2)
196 |         # print(out_l3.shape)
197 |         out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True)
198 |         out_hl3 = self.l2h_conv3(out_l3_i) + out_h3
199 |         return out_hl3
200 | 
201 | # seg head
202 | class SegHead(nn.Layer):
203 |     def __init__(self, inplanes, interplanes, outplanes, aux_head=False):
204 |         super(SegHead, self).__init__()
205 |         self.bn1 = nn.BatchNorm2D(inplanes)
206 |         self.relu = nn.ReLU()
207 |         if aux_head:
208 |             self.con_bn_relu = nn.Sequential(
209 |                 nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1),
210 |                 nn.BatchNorm2D(interplanes),
211 |                 nn.ReLU(),
212 |             )
213 |         else:
214 |             self.con_bn_relu = nn.Sequential(
215 |                 nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1),
216 |                 nn.BatchNorm2D(interplanes),
217 |                 nn.ReLU(),
218 |             )
219 |         self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0)
220 | 
221 | 
222 |     def forward(self, x):
223 |         x = self.bn1(x)
224 |         x = self.relu(x)
225 |         x = self.con_bn_relu(x)
226 |         out = self.conv(x)
227 |         return out
228 | 
229 | 
230 | 
231 | # if __name__ == "__main__":
232 | #     model = HrSegNet()
233 | #     x = paddle.randn([1, 3, 400, 400])
234 | #     out = model(x)
235 | #     print(out[0].shape)
236 | 
237 | #     paddle.flops(model, input_size=(1, 3, 400, 400))
238 | 
239 | 


--------------------------------------------------------------------------------
/models/ocrnet.py:
--------------------------------------------------------------------------------
  1 | import paddle
  2 | import paddle.nn as nn
  3 | import paddle.nn.functional as F
  4 | 
  5 | from paddleseg import utils
  6 | from paddleseg.cvlibs import manager, param_init
  7 | from paddleseg.models import layers
  8 | 
  9 | 
 10 | @manager.MODELS.add_component
 11 | class OCRNet(nn.Layer):
 12 |     """
 13 |     The OCRNet implementation based on PaddlePaddle.
 14 |     The original article refers to
 15 |         Yuan, Yuhui, et al. "Object-Contextual Representations for Semantic Segmentation"
 16 |         (https://arxiv.org/pdf/1909.11065.pdf)
 17 | 
 18 |     Args:
 19 |         num_classes (int): The unique number of target classes.
 20 |         backbone (Paddle.nn.Layer): Backbone network.
 21 |         backbone_indices (tuple): A tuple indicates the indices of output of backbone.
 22 |             It can be either one or two values, if two values, the first index will be taken as
 23 |             a deep-supervision feature in auxiliary layer; the second one will be taken as
 24 |             input of pixel representation. If one value, it is taken by both above.
 25 |         ocr_mid_channels (int, optional): The number of middle channels in OCRHead. Default: 512.
 26 |         ocr_key_channels (int, optional): The number of key channels in ObjectAttentionBlock. Default: 256.
 27 |         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
 28 |             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
 29 |         pretrained (str, optional): The path or url of pretrained model. Default: None.
 30 |     """
 31 | 
 32 |     def __init__(self,
 33 |                  num_classes,
 34 |                  backbone,
 35 |                  backbone_indices,
 36 |                  ocr_mid_channels=512,
 37 |                  ocr_key_channels=256,
 38 |                  align_corners=False,
 39 |                  pretrained=None):
 40 |         super().__init__()
 41 | 
 42 |         self.backbone = backbone
 43 |         self.backbone_indices = backbone_indices
 44 |         in_channels = [self.backbone.feat_channels[i] for i in backbone_indices]
 45 | 
 46 |         self.head = OCRHead(
 47 |             num_classes=num_classes,
 48 |             in_channels=in_channels,
 49 |             ocr_mid_channels=ocr_mid_channels,
 50 |             ocr_key_channels=ocr_key_channels)
 51 | 
 52 |         self.align_corners = align_corners
 53 |         self.pretrained = pretrained
 54 |         self.init_weight()
 55 | 
 56 |     def forward(self, x):
 57 |         feats = self.backbone(x)
 58 |         feats = [feats[i] for i in self.backbone_indices]
 59 |         logit_list = self.head(feats)
 60 |         if not self.training:
 61 |             logit_list = [logit_list[0]]
 62 | 
 63 |         logit_list = [
 64 |             F.interpolate(
 65 |                 logit,
 66 |                 paddle.shape(x)[2:],
 67 |                 mode='bilinear',
 68 |                 align_corners=self.align_corners) for logit in logit_list
 69 |         ]
 70 |         return logit_list
 71 | 
 72 |     def init_weight(self):
 73 |         if self.pretrained is not None:
 74 |             utils.load_entire_model(self, self.pretrained)
 75 | 
 76 | 
 77 | class OCRHead(nn.Layer):
 78 |     """
 79 |     The Object contextual representation head.
 80 | 
 81 |     Args:
 82 |         num_classes(int): The unique number of target classes.
 83 |         in_channels(tuple): The number of input channels.
 84 |         ocr_mid_channels(int, optional): The number of middle channels in OCRHead. Default: 512.
 85 |         ocr_key_channels(int, optional): The number of key channels in ObjectAttentionBlock. Default: 256.
 86 |     """
 87 | 
 88 |     def __init__(self,
 89 |                  num_classes,
 90 |                  in_channels,
 91 |                  ocr_mid_channels=512,
 92 |                  ocr_key_channels=256):
 93 |         super().__init__()
 94 | 
 95 |         self.num_classes = num_classes
 96 |         self.spatial_gather = SpatialGatherBlock(ocr_mid_channels, num_classes)
 97 |         self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels,
 98 |                                             ocr_mid_channels)
 99 | 
100 |         self.indices = [-2, -1] if len(in_channels) > 1 else [-1, -1]
101 | 
102 |         self.conv3x3_ocr = layers.ConvBNReLU(
103 |             in_channels[self.indices[1]], ocr_mid_channels, 3, padding=1)
104 |         self.cls_head = nn.Conv2D(ocr_mid_channels, self.num_classes, 1)
105 |         self.aux_head = nn.Sequential(
106 |             layers.ConvBNReLU(in_channels[self.indices[0]],
107 |                               in_channels[self.indices[0]], 1),
108 |             nn.Conv2D(in_channels[self.indices[0]], self.num_classes, 1))
109 | 
110 |         self.init_weight()
111 | 
112 |     def forward(self, feat_list):
113 |         feat_shallow, feat_deep = feat_list[self.indices[0]], feat_list[
114 |             self.indices[1]]
115 | 
116 |         soft_regions = self.aux_head(feat_shallow)
117 |         pixels = self.conv3x3_ocr(feat_deep)
118 | 
119 |         object_regions = self.spatial_gather(pixels, soft_regions)
120 |         ocr = self.spatial_ocr(pixels, object_regions)
121 | 
122 |         logit = self.cls_head(ocr)
123 |         return [logit, soft_regions]
124 | 
125 |     def init_weight(self):
126 |         """Initialize the parameters of model parts."""
127 |         for sublayer in self.sublayers():
128 |             if isinstance(sublayer, nn.Conv2D):
129 |                 param_init.normal_init(sublayer.weight, std=0.001)
130 |             elif isinstance(sublayer, (nn.BatchNorm, nn.SyncBatchNorm)):
131 |                 param_init.constant_init(sublayer.weight, value=1.0)
132 |                 param_init.constant_init(sublayer.bias, value=0.0)
133 | 
134 | 
135 | class SpatialGatherBlock(nn.Layer):
136 |     """Aggregation layer to compute the pixel-region representation."""
137 | 
138 |     def __init__(self, pixels_channels, regions_channels):
139 |         super().__init__()
140 |         self.pixels_channels = pixels_channels
141 |         self.regions_channels = regions_channels
142 | 
143 |     def forward(self, pixels, regions):
144 |         # pixels: from (n, c, h, w) to (n, h*w, c)
145 |         pixels = paddle.reshape(pixels, (0, self.pixels_channels, -1))
146 |         pixels = paddle.transpose(pixels, (0, 2, 1))
147 | 
148 |         # regions: from (n, k, h, w) to (n, k, h*w)
149 |         regions = paddle.reshape(regions, (0, self.regions_channels, -1))
150 |         regions = F.softmax(regions, axis=2)
151 | 
152 |         # feats: from (n, k, c) to (n, c, k, 1)
153 |         feats = paddle.bmm(regions, pixels)
154 |         feats = paddle.transpose(feats, (0, 2, 1))
155 |         feats = paddle.unsqueeze(feats, axis=-1)
156 | 
157 |         return feats
158 | 
159 | 
160 | class SpatialOCRModule(nn.Layer):
161 |     """Aggregate the global object representation to update the representation for each pixel."""
162 | 
163 |     def __init__(self,
164 |                  in_channels,
165 |                  key_channels,
166 |                  out_channels,
167 |                  dropout_rate=0.1):
168 |         super().__init__()
169 | 
170 |         self.attention_block = ObjectAttentionBlock(in_channels, key_channels)
171 |         self.conv1x1 = nn.Sequential(
172 |             layers.ConvBNReLU(2 * in_channels, out_channels, 1),
173 |             nn.Dropout2D(dropout_rate))
174 | 
175 |     def forward(self, pixels, regions):
176 |         context = self.attention_block(pixels, regions)
177 |         feats = paddle.concat([context, pixels], axis=1)
178 |         feats = self.conv1x1(feats)
179 | 
180 |         return feats
181 | 
182 | 
183 | class ObjectAttentionBlock(nn.Layer):
184 |     """A self-attention module."""
185 | 
186 |     def __init__(self, in_channels, key_channels):
187 |         super().__init__()
188 | 
189 |         self.in_channels = in_channels
190 |         self.key_channels = key_channels
191 | 
192 |         self.f_pixel = nn.Sequential(
193 |             layers.ConvBNReLU(in_channels, key_channels, 1),
194 |             layers.ConvBNReLU(key_channels, key_channels, 1))
195 | 
196 |         self.f_object = nn.Sequential(
197 |             layers.ConvBNReLU(in_channels, key_channels, 1),
198 |             layers.ConvBNReLU(key_channels, key_channels, 1))
199 | 
200 |         self.f_down = layers.ConvBNReLU(in_channels, key_channels, 1)
201 | 
202 |         self.f_up = layers.ConvBNReLU(key_channels, in_channels, 1)
203 | 
204 |     def forward(self, x, proxy):
205 |         x_shape = paddle.shape(x)
206 |         # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels)
207 |         query = self.f_pixel(x)
208 |         query = paddle.reshape(query, (0, self.key_channels, -1))
209 |         query = paddle.transpose(query, (0, 2, 1))
210 | 
211 |         # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2)
212 |         key = self.f_object(proxy)
213 |         key = paddle.reshape(key, (0, self.key_channels, -1))
214 | 
215 |         # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels)
216 |         value = self.f_down(proxy)
217 |         value = paddle.reshape(value, (0, self.key_channels, -1))
218 |         value = paddle.transpose(value, (0, 2, 1))
219 | 
220 |         # sim_map (n, h1*w1, h2*w2)
221 |         sim_map = paddle.bmm(query, key)
222 |         sim_map = (self.key_channels**-.5) * sim_map
223 |         sim_map = F.softmax(sim_map, axis=-1)
224 | 
225 |         # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1)
226 |         context = paddle.bmm(sim_map, value)
227 |         context = paddle.transpose(context, (0, 2, 1))
228 |         context = paddle.reshape(context,
229 |                                  (0, self.key_channels, x_shape[2], x_shape[3]))
230 |         context = self.f_up(context)
231 | 
232 |         return context
233 | 


--------------------------------------------------------------------------------
/models/pspnet.py:
--------------------------------------------------------------------------------
  1 | import paddle.nn as nn
  2 | import paddle.nn.functional as F
  3 | 
  4 | import paddle
  5 | from paddleseg.cvlibs import manager
  6 | from paddleseg.models import layers
  7 | from paddleseg.utils import utils
  8 | 
  9 | 
 10 | # @manager.MODELS.add_component
 11 | class PSPNet(nn.Layer):
 12 |     """
 13 |     The PSPNet implementation based on PaddlePaddle.
 14 | 
 15 |     The original article refers to
 16 |     Zhao, Hengshuang, et al. "Pyramid scene parsing network"
 17 |     (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf).
 18 | 
 19 |     Args:
 20 |         num_classes (int): The unique number of target classes.
 21 |         backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
 22 |         backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
 23 |         pp_out_channels (int, optional): The output channels after Pyramid Pooling Module. Default: 1024.
 24 |         bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1,2,3,6).
 25 |         enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
 26 |         align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
 27 |             e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
 28 |         pretrained (str, optional): The path or url of pretrained model. Default: None.
 29 |     """
 30 | 
 31 |     def __init__(self,
 32 |                  num_classes,
 33 |                  backbone,
 34 |                  backbone_indices=(2, 3),
 35 |                  pp_out_channels=1024,
 36 |                  bin_sizes=(1, 2, 3, 6),
 37 |                  enable_auxiliary_loss=True,
 38 |                  align_corners=False,
 39 |                  pretrained=None):
 40 |         super().__init__()
 41 | 
 42 |         self.backbone = backbone
 43 |         backbone_channels = [
 44 |             backbone.feat_channels[i] for i in backbone_indices
 45 |         ]
 46 | 
 47 |         self.head = PSPNetHead(num_classes, backbone_indices, backbone_channels,
 48 |                                pp_out_channels, bin_sizes,
 49 |                                enable_auxiliary_loss, align_corners)
 50 |         self.align_corners = align_corners
 51 |         self.pretrained = pretrained
 52 |         self.init_weight()
 53 | 
 54 |     def forward(self, x):
 55 |         feat_list = self.backbone(x)
 56 |         logit_list = self.head(feat_list)
 57 |         return [
 58 |             F.interpolate(
 59 |                 logit,
 60 |                 paddle.shape(x)[2:],
 61 |                 mode='bilinear',
 62 |                 align_corners=self.align_corners) for logit in logit_list
 63 |         ]
 64 | 
 65 |     def init_weight(self):
 66 |         if self.pretrained is not None:
 67 |             utils.load_entire_model(self, self.pretrained)
 68 | 
 69 | 
 70 | class PSPNetHead(nn.Layer):
 71 |     """
 72 |     The PSPNetHead implementation.
 73 | 
 74 |     Args:
 75 |         num_classes (int): The unique number of target classes.
 76 |         backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone.
 77 |             The first index will be taken as a deep-supervision feature in auxiliary layer;
 78 |             the second one will be taken as input of Pyramid Pooling Module (PPModule).
 79 |             Usually backbone consists of four downsampling stage, and return an output of
 80 |             each stage. If we set it as (2, 3) in ResNet, that means taking feature map of the third
 81 |             stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule.
 82 |         backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index.
 83 |         pp_out_channels (int): The output channels after Pyramid Pooling Module.
 84 |         bin_sizes (tuple): The out size of pooled feature maps.
 85 |         enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
 86 |         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
 87 |             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
 88 |     """
 89 | 
 90 |     def __init__(self, num_classes, backbone_indices, backbone_channels,
 91 |                  pp_out_channels, bin_sizes, enable_auxiliary_loss,
 92 |                  align_corners):
 93 | 
 94 |         super().__init__()
 95 | 
 96 |         self.backbone_indices = backbone_indices
 97 | 
 98 |         self.psp_module = layers.PPModule(
 99 |             in_channels=backbone_channels[1],
100 |             out_channels=pp_out_channels,
101 |             bin_sizes=bin_sizes,
102 |             dim_reduction=True,
103 |             align_corners=align_corners)
104 | 
105 |         self.dropout = nn.Dropout(p=0.1)  # dropout_prob
106 | 
107 |         self.conv = nn.Conv2D(
108 |             in_channels=pp_out_channels,
109 |             out_channels=num_classes,
110 |             kernel_size=1)
111 | 
112 |         if enable_auxiliary_loss:
113 |             self.auxlayer = layers.AuxLayer(
114 |                 in_channels=backbone_channels[0],
115 |                 inter_channels=backbone_channels[0] // 4,
116 |                 out_channels=num_classes)
117 | 
118 |         self.enable_auxiliary_loss = enable_auxiliary_loss
119 | 
120 |     def forward(self, feat_list):
121 |         logit_list = []
122 |         x = feat_list[self.backbone_indices[1]]
123 |         x = self.psp_module(x)
124 |         x = self.dropout(x)
125 |         logit = self.conv(x)
126 |         logit_list.append(logit)
127 | 
128 |         if self.enable_auxiliary_loss:
129 |             auxiliary_feat = feat_list[self.backbone_indices[0]]
130 |             auxiliary_logit = self.auxlayer(auxiliary_feat)
131 |             logit_list.append(auxiliary_logit)
132 | 
133 |         return logit_list
134 | 


--------------------------------------------------------------------------------
/models/rucnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import paddle
 16 | import paddle.nn as nn
 17 | import paddle.nn.functional as F
 18 | 
 19 | from paddleseg import utils
 20 | from paddleseg.cvlibs import manager
 21 | from paddleseg.models import layers
 22 | 
 23 | __all__ = ['RUCNet']
 24 | 
 25 | 
 26 | 
 27 | 
 28 | 
 29 | @manager.MODELS.add_component
 30 | class RUCNet(nn.Layer):
 31 |     """
 32 | 
 33 |     The original article refers to
 34 |     https://www.mdpi.com/1424-8220/23/1/53
 35 | 
 36 |     Args:
 37 |         num_classes (int): The unique number of target classes.
 38 |         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
 39 |             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
 40 |         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
 41 |             If False, use resize_bilinear. Default: False.
 42 |         in_channels (int, optional): The channels of input image. Default: 3.
 43 |         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
 44 |     """
 45 | 
 46 |     def __init__(self,
 47 |                  num_classes,
 48 |                  align_corners=False,
 49 |                  use_deconv=False,
 50 |                  in_channels=3,
 51 |                  pretrained=None):
 52 |         super().__init__()
 53 | 
 54 |         # self.encode = Encoder(in_channels)
 55 |         self.encode = New_Encoder(in_channels)
 56 |         self.decode = Decoder(align_corners, use_deconv=use_deconv)
 57 |         self.cls = self.conv = nn.Conv2D(
 58 |             in_channels=64,
 59 |             out_channels=num_classes,
 60 |             kernel_size=3,
 61 |             stride=1,
 62 |             padding=1)
 63 | 
 64 |         self.pretrained = pretrained
 65 |         self.init_weight()
 66 | 
 67 |     def forward(self, x):
 68 |         logit_list = []
 69 |         x, short_cuts = self.encode(x)
 70 |         x = self.decode(x, short_cuts)
 71 |         logit = self.cls(x)
 72 |         logit_list.append(logit)
 73 |         return logit_list
 74 | 
 75 |     def init_weight(self):
 76 |         if self.pretrained is not None:
 77 |             utils.load_entire_model(self, self.pretrained)
 78 | 
 79 | 
 80 | class ResidualDownsampleBlock(nn.Layer):
 81 |     def __init__(self, in_channels, out_channels):
 82 |         super().__init__()
 83 | 
 84 |         self.conv1 = layers.ConvBNReLU(in_channels, out_channels,  3, stride=2, padding=1)
 85 |         self.conv2=layers.ConvBNReLU(out_channels, out_channels, kernel_size=3, stride=1, padding="same")
 86 |         self.skip = layers.ConvBNReLU(in_channels, out_channels, 1, stride=2, padding=0)
 87 | 
 88 |         self.conv3 = layers.ConvBNReLU(out_channels, out_channels, 3, stride=1, padding=1)
 89 |         self.conv4=layers.ConvBNReLU(out_channels, out_channels, 3, stride=1, padding=1)
 90 | 
 91 |     def forward(self, x):
 92 |         x1 = self.conv1(x)
 93 |         x1 = self.conv2(x1)
 94 |         xk = self.skip(x)
 95 |         x1 = x1 + xk
 96 | 
 97 |         x2 = self.conv3(x1)
 98 |         x2 = self.conv4(x2)
 99 |         x2 = x2 + x1
100 |         return x2
101 |     
102 | class SCSE(nn.Layer): 
103 |     def __init__(self, in_channel):
104 |         super().__init__()
105 | 
106 |         self.spatial_attention=SpatialAttention(in_channel)
107 |         self.channel_attention=ChannelAttention(in_channel)
108 |     
109 |     def forward(self, x):
110 |         return self.spatial_attention(x) + self.channel_attention(x)
111 | 
112 |     
113 | class SpatialAttention(nn.Layer):
114 |     def __init__(self, in_channel):
115 |         super().__init__()
116 |         self.spatial_conv=nn.Conv2D(in_channel, out_channels=1, kernel_size=1, stride=1, padding=0)
117 | 
118 |     def forward(self, x):
119 |         return x * F.sigmoid(self.spatial_conv(x))
120 |     
121 | class ChannelAttention(nn.Layer):
122 |     def __init__(self, in_channel):
123 |         super().__init__()
124 | 
125 |         self.gap=nn.AdaptiveAvgPool2D(1)
126 |         self.linear1=nn.Linear(in_channel, in_channel//2)
127 |         self.linear2=nn.Linear(in_channel//2, in_channel)
128 | 
129 |     def forward(self, x):
130 |         t=self.gap(x).squeeze(axis=[2,3])
131 |         t=self.linear1(t)
132 |         t=self.linear2(t)
133 |         return x * F.sigmoid(t.unsqueeze(axis=[2,3]))
134 |     
135 | class New_Encoder(nn.Layer):
136 |     def __init__(self, in_channels=3):
137 |         super().__init__()
138 |     
139 |         self.double_conv = nn.Sequential(
140 |             layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
141 |         
142 |         down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
143 | 
144 |         self.down_sample_list = nn.LayerList([
145 |             self.down_sampling(channel[0], channel[1])
146 |             for channel in down_channels
147 |         ])
148 | 
149 |         self.scse_list=nn.LayerList(
150 |             [SCSE(128), 
151 |             SCSE(256),
152 |             SCSE(512),
153 |             SCSE(512)]
154 |         )
155 |         
156 |     def down_sampling(self, in_channels, out_channels):
157 |         rdb=ResidualDownsampleBlock(in_channels, out_channels)
158 |         return rdb
159 |         
160 | 
161 |     def forward(self, x):
162 |         short_cuts = []
163 |         x = self.double_conv(x)
164 |         for i, down_sample in enumerate(self.down_sample_list):
165 |             short_cuts.append(x)
166 |             x = down_sample(x)
167 |             # print(x.shape)
168 |             x=self.scse_list[i](x)
169 | 
170 |         return x, short_cuts
171 |         
172 |     
173 | class Encoder(nn.Layer):
174 |     def __init__(self, in_channels=3):
175 |         super().__init__()
176 | 
177 |         self.double_conv = nn.Sequential(
178 |             layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
179 |         down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
180 |         self.down_sample_list = nn.LayerList([
181 |             self.down_sampling(channel[0], channel[1])
182 |             for channel in down_channels
183 |         ])
184 | 
185 |     def down_sampling(self, in_channels, out_channels):
186 |         modules = []
187 |         modules.append(nn.MaxPool2D(kernel_size=2, stride=2))
188 |         modules.append(layers.ConvBNReLU(in_channels, out_channels, 3))
189 |         modules.append(layers.ConvBNReLU(out_channels, out_channels, 3))
190 |         return nn.Sequential(*modules)
191 | 
192 |     def forward(self, x):
193 |         short_cuts = []
194 |         x = self.double_conv(x)
195 |         for down_sample in self.down_sample_list:
196 |             short_cuts.append(x)
197 |             x = down_sample(x)
198 |         return x, short_cuts
199 | 
200 | 
201 | class Decoder(nn.Layer):
202 |     def __init__(self, align_corners, use_deconv=False):
203 |         super().__init__()
204 | 
205 |         up_channels = [[512, 256], [256, 128], [128, 64], [64, 64]]
206 |         self.up_sample_list = nn.LayerList([
207 |             UpSampling(channel[0], channel[1], align_corners, use_deconv)
208 |             for channel in up_channels
209 |         ])
210 | 
211 |     def forward(self, x, short_cuts):
212 |         for i in range(len(short_cuts)):
213 |             x = self.up_sample_list[i](x, short_cuts[-(i + 1)])
214 |         return x
215 | 
216 | 
217 | class UpSampling(nn.Layer):
218 |     def __init__(self,
219 |                  in_channels,
220 |                  out_channels,
221 |                  align_corners,
222 |                  use_deconv=False):
223 |         super().__init__()
224 | 
225 |         self.align_corners = align_corners
226 | 
227 |         self.use_deconv = use_deconv
228 |         if self.use_deconv:
229 |             self.deconv = nn.Conv2DTranspose(
230 |                 in_channels,
231 |                 out_channels // 2,
232 |                 kernel_size=2,
233 |                 stride=2,
234 |                 padding=0)
235 |             in_channels = in_channels + out_channels // 2
236 |         else:
237 |             in_channels *= 2
238 | 
239 |         self.double_conv = nn.Sequential(
240 |             layers.ConvBNReLU(in_channels, out_channels, 3),
241 |             layers.ConvBNReLU(out_channels, out_channels, 3), 
242 |             SCSE(out_channels)) #  scse
243 | 
244 |     def forward(self, x, short_cut):
245 |         if self.use_deconv:
246 |             x = self.deconv(x)
247 |         else:
248 |             x = F.interpolate(
249 |                 x,
250 |                 paddle.shape(short_cut)[2:],
251 |                 mode='bilinear',
252 |                 align_corners=self.align_corners)
253 |         x = paddle.concat([x, short_cut], axis=1)
254 |         x = self.double_conv(x)
255 |         return x
256 | 


--------------------------------------------------------------------------------
/models/stdcseg.py:
--------------------------------------------------------------------------------
  1 | import paddle
  2 | import paddle.nn as nn
  3 | import paddle.nn.functional as F
  4 | 
  5 | from paddleseg import utils
  6 | from paddleseg.models import layers
  7 | from paddleseg.cvlibs import manager
  8 | from paddleseg.utils import utils
  9 | 
 10 | 
 11 | # @manager.MODELS.add_component
 12 | class STDCSeg(nn.Layer):
 13 |     """
 14 |     The STDCSeg implementation based on PaddlePaddle.
 15 | 
 16 |     The original article refers to Meituan
 17 |     Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation."
 18 |     (https://arxiv.org/abs/2104.13188)
 19 | 
 20 |     Args:
 21 |         num_classes(int,optional): The unique number of target classes.
 22 |         backbone(nn.Layer): Backbone network, STDCNet1446/STDCNet813. STDCNet1446->STDC2,STDCNet813->STDC813.
 23 |         use_boundary_8(bool,non-optional): Whether to use detail loss. it should be True accroding to paper for best metric. Default: True.
 24 |         Actually,if you want to use _boundary_2/_boundary_4/_boundary_16,you should append loss function number of DetailAggregateLoss.It should work properly.
 25 |         use_conv_last(bool,optional): Determine ContextPath 's inplanes variable according to whether to use bockbone's last conv. Default: False.
 26 |         pretrained (str, optional): The path or url of pretrained model. Default: None.
 27 |     """
 28 | 
 29 |     def __init__(self,
 30 |                  num_classes,
 31 |                  backbone,
 32 |                  use_boundary_2=False,
 33 |                  use_boundary_4=False,
 34 |                  use_boundary_8=True,
 35 |                  use_boundary_16=False,
 36 |                  use_conv_last=False,
 37 |                  pretrained=None):
 38 |         super(STDCSeg, self).__init__()
 39 | 
 40 |         self.use_boundary_2 = use_boundary_2
 41 |         self.use_boundary_4 = use_boundary_4
 42 |         self.use_boundary_8 = use_boundary_8
 43 |         self.use_boundary_16 = use_boundary_16
 44 |         self.cp = ContextPath(backbone, use_conv_last=use_conv_last)
 45 |         self.ffm = FeatureFusionModule(384, 256)
 46 |         self.conv_out = SegHead(256, 256, num_classes)
 47 |         self.conv_out8 = SegHead(128, 64, num_classes)
 48 |         self.conv_out16 = SegHead(128, 64, num_classes)
 49 |         self.conv_out_sp16 = SegHead(512, 64, 1)
 50 |         self.conv_out_sp8 = SegHead(256, 64, 1)
 51 |         self.conv_out_sp4 = SegHead(64, 64, 1)
 52 |         self.conv_out_sp2 = SegHead(32, 64, 1)
 53 |         self.pretrained = pretrained
 54 |         self.init_weight()
 55 | 
 56 |     def forward(self, x):
 57 |         x_hw = paddle.shape(x)[2:]
 58 |         feat_res2, feat_res4, feat_res8, _, feat_cp8, feat_cp16 = self.cp(x)
 59 | 
 60 |         logit_list = []
 61 |         if self.training:
 62 |             feat_fuse = self.ffm(feat_res8, feat_cp8)
 63 |             feat_out = self.conv_out(feat_fuse)
 64 |             feat_out8 = self.conv_out8(feat_cp8)
 65 |             feat_out16 = self.conv_out16(feat_cp16)
 66 | 
 67 |             logit_list = [feat_out, feat_out8, feat_out16]
 68 |             logit_list = [
 69 |                 F.interpolate(
 70 |                     x, x_hw, mode='bilinear', align_corners=True)
 71 |                 for x in logit_list
 72 |             ]
 73 | 
 74 |             if self.use_boundary_2:
 75 |                 feat_out_sp2 = self.conv_out_sp2(feat_res2)
 76 |                 logit_list.append(feat_out_sp2)
 77 |             if self.use_boundary_4:
 78 |                 feat_out_sp4 = self.conv_out_sp4(feat_res4)
 79 |                 logit_list.append(feat_out_sp4)
 80 |             if self.use_boundary_8:
 81 |                 feat_out_sp8 = self.conv_out_sp8(feat_res8)
 82 |                 logit_list.append(feat_out_sp8)
 83 |         else:
 84 |             feat_fuse = self.ffm(feat_res8, feat_cp8)
 85 |             feat_out = self.conv_out(feat_fuse)
 86 |             feat_out = F.interpolate(
 87 |                 feat_out, x_hw, mode='bilinear', align_corners=True)
 88 |             logit_list = [feat_out]
 89 | 
 90 |         return logit_list
 91 | 
 92 |     def init_weight(self):
 93 |         if self.pretrained is not None:
 94 |             utils.load_entire_model(self, self.pretrained)
 95 | 
 96 | 
 97 | class SegHead(nn.Layer):
 98 |     def __init__(self, in_chan, mid_chan, n_classes):
 99 |         super(SegHead, self).__init__()
100 |         self.conv = layers.ConvBNReLU(
101 |             in_chan, mid_chan, kernel_size=3, stride=1, padding=1)
102 |         self.conv_out = nn.Conv2D(
103 |             mid_chan, n_classes, kernel_size=1, bias_attr=None)
104 | 
105 |     def forward(self, x):
106 |         x = self.conv(x)
107 |         x = self.conv_out(x)
108 |         return x
109 | 
110 | 
111 | class AttentionRefinementModule(nn.Layer):
112 |     def __init__(self, in_chan, out_chan):
113 |         super(AttentionRefinementModule, self).__init__()
114 |         self.conv = layers.ConvBNReLU(
115 |             in_chan, out_chan, kernel_size=3, stride=1, padding=1)
116 |         self.conv_atten = nn.Conv2D(
117 |             out_chan, out_chan, kernel_size=1, bias_attr=None)
118 |         self.bn_atten = nn.BatchNorm2D(out_chan)
119 |         self.sigmoid_atten = nn.Sigmoid()
120 | 
121 |     def forward(self, x):
122 |         feat = self.conv(x)
123 |         atten = F.adaptive_avg_pool2d(feat, 1)
124 |         atten = self.conv_atten(atten)
125 |         atten = self.bn_atten(atten)
126 |         atten = self.sigmoid_atten(atten)
127 |         out = paddle.multiply(feat, atten)
128 |         return out
129 | 
130 | 
131 | class ContextPath(nn.Layer):
132 |     def __init__(self, backbone, use_conv_last=False):
133 |         super(ContextPath, self).__init__()
134 |         self.backbone = backbone
135 |         self.arm16 = AttentionRefinementModule(512, 128)
136 |         inplanes = 1024
137 |         if use_conv_last:
138 |             inplanes = 1024
139 |         self.arm32 = AttentionRefinementModule(inplanes, 128)
140 |         self.conv_head32 = layers.ConvBNReLU(
141 |             128, 128, kernel_size=3, stride=1, padding=1)
142 |         self.conv_head16 = layers.ConvBNReLU(
143 |             128, 128, kernel_size=3, stride=1, padding=1)
144 |         self.conv_avg = layers.ConvBNReLU(
145 |             inplanes, 128, kernel_size=1, stride=1, padding=0)
146 | 
147 |     def forward(self, x):
148 |         feat2, feat4, feat8, feat16, feat32 = self.backbone(x)
149 | 
150 |         feat8_hw = paddle.shape(feat8)[2:]
151 |         feat16_hw = paddle.shape(feat16)[2:]
152 |         feat32_hw = paddle.shape(feat32)[2:]
153 | 
154 |         avg = F.adaptive_avg_pool2d(feat32, 1)
155 |         avg = self.conv_avg(avg)
156 |         avg_up = F.interpolate(avg, feat32_hw, mode='nearest')
157 | 
158 |         feat32_arm = self.arm32(feat32)
159 |         feat32_sum = feat32_arm + avg_up
160 |         feat32_up = F.interpolate(feat32_sum, feat16_hw, mode='nearest')
161 |         feat32_up = self.conv_head32(feat32_up)
162 | 
163 |         feat16_arm = self.arm16(feat16)
164 |         feat16_sum = feat16_arm + feat32_up
165 |         feat16_up = F.interpolate(feat16_sum, feat8_hw, mode='nearest')
166 |         feat16_up = self.conv_head16(feat16_up)
167 | 
168 |         return feat2, feat4, feat8, feat16, feat16_up, feat32_up  # x8, x16
169 | 
170 | 
171 | class FeatureFusionModule(nn.Layer):
172 |     def __init__(self, in_chan, out_chan):
173 |         super(FeatureFusionModule, self).__init__()
174 |         self.convblk = layers.ConvBNReLU(
175 |             in_chan, out_chan, kernel_size=1, stride=1, padding=0)
176 |         self.conv1 = nn.Conv2D(
177 |             out_chan,
178 |             out_chan // 4,
179 |             kernel_size=1,
180 |             stride=1,
181 |             padding=0,
182 |             bias_attr=None)
183 |         self.conv2 = nn.Conv2D(
184 |             out_chan // 4,
185 |             out_chan,
186 |             kernel_size=1,
187 |             stride=1,
188 |             padding=0,
189 |             bias_attr=None)
190 |         self.relu = nn.ReLU()
191 |         self.sigmoid = nn.Sigmoid()
192 | 
193 |     def forward(self, fsp, fcp):
194 |         fcat = paddle.concat([fsp, fcp], axis=1)
195 |         feat = self.convblk(fcat)
196 |         atten = F.adaptive_avg_pool2d(feat, 1)
197 |         atten = self.conv1(atten)
198 |         atten = self.relu(atten)
199 |         atten = self.conv2(atten)
200 |         atten = self.sigmoid(atten)
201 |         feat_atten = paddle.multiply(feat, atten)
202 |         feat_out = feat_atten + feat
203 |         return feat_out
204 | 


--------------------------------------------------------------------------------
/models/u2cracknet.py:
--------------------------------------------------------------------------------
  1 | import paddle
  2 | import paddle.nn as nn
  3 | import paddle.nn.functional as F
  4 | 
  5 | from paddleseg.cvlibs import manager
  6 | from paddleseg.models import layers
  7 | from paddleseg.utils import utils
  8 | 
  9 | import math
 10 | 
 11 | 
 12 | __all__ = ['U2CrackNet']
 13 | 
 14 | 
 15 | 
 16 | class EfficientChannelAttention(nn.Layer):
 17 |     def __init__(self, gamma=2, b=1, in_channels=128):
 18 |         super().__init__()
 19 |         t  = int(abs((math.log(in_channels, 2) + b) / gamma))
 20 |         k  = t if t % 2 else t + 1
 21 |         self.avg_pool = nn.AdaptiveAvgPool2D(1)
 22 |         self.conv = nn.Conv1D(1, 1, kernel_size=k, padding=int(k / 2), bias_attr=False)
 23 |     
 24 |     def forward(self, x):
 25 |         y = self.avg_pool(x)
 26 |         y = self.conv(y.squeeze(-1).transpose([0, 2, 1])).transpose([0, 2, 1]).unsqueeze(-1)
 27 |         y = paddle.clip(y, min=0., max=1.)
 28 |         return x * y
 29 | 
 30 | @manager.MODELS.add_component
 31 | class U2CrackNet(nn.Layer):
 32 |     """
 33 |      The original article refers to
 34 |     Yu, Gui, et al. "RUC-Net: A Residual-Unet-Based Convolutional Neural Network for Pixel-Level Pavement Crack Segmentation." 
 35 |     Sensors 23.1 (2022): 53.
 36 |     """
 37 | 
 38 |     def __init__(self, num_classes, in_channels=3, pretrained=None):
 39 |         super(U2CrackNet, self).__init__()
 40 | 
 41 |         self.stage1 = RSU7(in_channels, 16, 64)
 42 |         self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
 43 | 
 44 |         self.stage2 = RSU6(64, 16, 64)
 45 |         self.pool23 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
 46 | 
 47 |         self.stage3 = RSU5(64, 16, 64)
 48 |         self.pool34 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
 49 | 
 50 |         self.stage4 = RSU4(64, 16, 64)
 51 |         self.pool45 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
 52 | 
 53 |         self.stage5 = RSU4F(64, 16, 64)
 54 |         self.pool56 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
 55 | 
 56 |         # self.stage6 = RSU4F(64, 16, 64)
 57 |         self.stage6 = layers.ASPPModule(
 58 |             aspp_ratios=[1, 6, 12, 18],
 59 |             in_channels=64,
 60 |             out_channels=64,
 61 |             align_corners=True
 62 |         )
 63 | 
 64 | 
 65 |         # decoder
 66 |         self.stage5d = RSU4F(128, 16, 64)
 67 |         self.stage4d = RSU4(128, 16, 64)
 68 |         self.stage3d = RSU5(128, 16, 64)
 69 |         self.stage2d = RSU6(128, 16, 64)
 70 |         self.stage1d = RSU7(128, 16, 64)
 71 | 
 72 |         self.side1 = nn.Conv2D(64, num_classes, 3, padding=1)
 73 |         self.side2 = nn.Conv2D(64, num_classes, 3, padding=1)
 74 |         self.side3 = nn.Conv2D(64, num_classes, 3, padding=1)
 75 |         self.side4 = nn.Conv2D(64, num_classes, 3, padding=1)
 76 |         self.side5 = nn.Conv2D(64, num_classes, 3, padding=1)
 77 |         self.side6 = nn.Conv2D(64, num_classes, 3, padding=1)
 78 | 
 79 |         self.efa1 = EfficientChannelAttention(in_channels=num_classes)
 80 |         self.efa2 = EfficientChannelAttention(in_channels=num_classes)
 81 |         self.efa3 = EfficientChannelAttention(in_channels=num_classes)
 82 |         self.efa4 = EfficientChannelAttention(in_channels=num_classes)
 83 |         self.efa5 = EfficientChannelAttention(in_channels=num_classes)
 84 |         self.efa6 = EfficientChannelAttention(in_channels=num_classes)
 85 | 
 86 | 
 87 |         self.outconv = nn.Conv2D(6 * num_classes, num_classes, 1)
 88 | 
 89 |         self.pretrained = pretrained
 90 |         self.init_weight()
 91 | 
 92 |     def forward(self, x):
 93 | 
 94 |         hx = x
 95 | 
 96 |         #stage 1
 97 |         hx1 = self.stage1(hx)
 98 |         hx = self.pool12(hx1)
 99 | 
100 |         #stage 2
101 |         hx2 = self.stage2(hx)
102 |         hx = self.pool23(hx2)
103 | 
104 |         #stage 3
105 |         hx3 = self.stage3(hx)
106 |         hx = self.pool34(hx3)
107 | 
108 |         #stage 4
109 |         hx4 = self.stage4(hx)
110 |         hx = self.pool45(hx4)
111 | 
112 |         #stage 5
113 |         hx5 = self.stage5(hx)
114 |         # print(f'stage5.hx5.shape: {hx.shape}')
115 |         hx = self.pool56(hx5)
116 |         # print(f'stage5.hx.shape: {hx.shape}')
117 |         #stage 6
118 | 
119 |         hx6 = self.stage6(hx)
120 |         hx6up = _upsample_like(hx6, hx5)
121 | 
122 |         #decoder
123 |         hx5d = self.stage5d(paddle.concat((hx6up, hx5), 1))
124 |         hx5dup = _upsample_like(hx5d, hx4)
125 | 
126 |         hx4d = self.stage4d(paddle.concat((hx5dup, hx4), 1))
127 |         hx4dup = _upsample_like(hx4d, hx3)
128 | 
129 |         hx3d = self.stage3d(paddle.concat((hx4dup, hx3), 1))
130 |         hx3dup = _upsample_like(hx3d, hx2)
131 | 
132 |         hx2d = self.stage2d(paddle.concat((hx3dup, hx2), 1))
133 |         hx2dup = _upsample_like(hx2d, hx1)
134 | 
135 |         hx1d = self.stage1d(paddle.concat((hx2dup, hx1), 1))
136 | 
137 |         #side output
138 |         d1 = self.side1(hx1d)
139 | 
140 |         d2 = self.side2(hx2d)
141 |         d2 = _upsample_like(d2, d1)
142 | 
143 |         d3 = self.side3(hx3d)
144 |         d3 = _upsample_like(d3, d1)
145 | 
146 |         d4 = self.side4(hx4d)
147 |         d4 = _upsample_like(d4, d1)
148 | 
149 |         d5 = self.side5(hx5d)
150 |         d5 = _upsample_like(d5, d1)
151 | 
152 |         d6 = self.side6(hx6)
153 |         d6 = _upsample_like(d6, d1)
154 | 
155 |         d1 = self.efa1(d1)
156 |         d2 = self.efa2(d2)
157 |         d3 = self.efa3(d3)
158 |         d4 = self.efa4(d4)
159 |         d5 = self.efa5(d5)
160 |         d6 = self.efa6(d6)
161 | 
162 |         d0 = self.outconv(paddle.concat((d1, d2, d3, d4, d5, d6), 1))
163 | 
164 |         # return [d0, d1, d2, d3, d4, d5, d6]
165 |         return [d0]
166 | 
167 |     def init_weight(self):
168 |         if self.pretrained is not None:
169 |             utils.load_entire_model(self, self.pretrained)
170 | 
171 | 
172 | class REBNCONV(nn.Layer):
173 |     def __init__(self, in_ch=3, out_ch=3, dirate=1):
174 |         super(REBNCONV, self).__init__()
175 | 
176 |         self.conv_s1 = nn.Conv2D(
177 |             in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate)
178 |         self.bn_s1 = nn.BatchNorm2D(out_ch)
179 |         self.relu_s1 = nn.ReLU()
180 | 
181 |     def forward(self, x):
182 | 
183 |         hx = x
184 |         xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
185 | 
186 |         return xout
187 | 
188 | 
189 | ## upsample tensor 'src' to have the same spatial size with tensor 'tar'
190 | def _upsample_like(src, tar):
191 | 
192 |     src = F.upsample(src, size=paddle.shape(tar)[2:], mode='bilinear')
193 | 
194 |     return src
195 | 
196 | 
197 | ### RSU-7 ###
198 | class RSU7(nn.Layer):  #UNet07DRES(nn.Layer):
199 |     def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
200 |         super(RSU7, self).__init__()
201 | 
202 |         self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
203 | 
204 |         self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
205 |         self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
206 | 
207 |         self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
208 |         self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
209 | 
210 |         self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
211 |         self.pool3 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
212 | 
213 |         self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
214 |         self.pool4 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
215 | 
216 |         self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
217 |         self.pool5 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
218 | 
219 |         self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
220 | 
221 |         self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
222 | 
223 |         self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
224 |         self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
225 |         self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
226 |         self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
227 |         self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
228 |         self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
229 | 
230 |     def forward(self, x):
231 | 
232 |         hx = x
233 |         hxin = self.rebnconvin(hx)
234 | 
235 |         hx1 = self.rebnconv1(hxin)
236 |         hx = self.pool1(hx1)
237 | 
238 |         hx2 = self.rebnconv2(hx)
239 |         hx = self.pool2(hx2)
240 | 
241 |         hx3 = self.rebnconv3(hx)
242 |         hx = self.pool3(hx3)
243 | 
244 |         hx4 = self.rebnconv4(hx)
245 |         hx = self.pool4(hx4)
246 | 
247 |         hx5 = self.rebnconv5(hx)
248 |         hx = self.pool5(hx5)
249 | 
250 |         hx6 = self.rebnconv6(hx)
251 | 
252 |         hx7 = self.rebnconv7(hx6)
253 | 
254 |         hx6d = self.rebnconv6d(paddle.concat((hx7, hx6), 1))
255 |         hx6dup = _upsample_like(hx6d, hx5)
256 | 
257 |         hx5d = self.rebnconv5d(paddle.concat((hx6dup, hx5), 1))
258 |         hx5dup = _upsample_like(hx5d, hx4)
259 | 
260 |         hx4d = self.rebnconv4d(paddle.concat((hx5dup, hx4), 1))
261 |         hx4dup = _upsample_like(hx4d, hx3)
262 | 
263 |         hx3d = self.rebnconv3d(paddle.concat((hx4dup, hx3), 1))
264 |         hx3dup = _upsample_like(hx3d, hx2)
265 | 
266 |         hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1))
267 |         hx2dup = _upsample_like(hx2d, hx1)
268 | 
269 |         hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1))
270 | 
271 |         return hx1d + hxin
272 | 
273 | 
274 | ### RSU-6 ###
275 | class RSU6(nn.Layer):  #UNet06DRES(nn.Layer):
276 |     def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
277 |         super(RSU6, self).__init__()
278 | 
279 |         self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
280 | 
281 |         self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
282 |         self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
283 | 
284 |         self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
285 |         self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
286 | 
287 |         self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
288 |         self.pool3 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
289 | 
290 |         self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
291 |         self.pool4 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
292 | 
293 |         self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
294 | 
295 |         self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
296 | 
297 |         self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
298 |         self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
299 |         self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
300 |         self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
301 |         self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
302 | 
303 |     def forward(self, x):
304 | 
305 |         hx = x
306 | 
307 |         hxin = self.rebnconvin(hx)
308 | 
309 |         hx1 = self.rebnconv1(hxin)
310 |         hx = self.pool1(hx1)
311 | 
312 |         hx2 = self.rebnconv2(hx)
313 |         hx = self.pool2(hx2)
314 | 
315 |         hx3 = self.rebnconv3(hx)
316 |         hx = self.pool3(hx3)
317 | 
318 |         hx4 = self.rebnconv4(hx)
319 |         hx = self.pool4(hx4)
320 | 
321 |         hx5 = self.rebnconv5(hx)
322 | 
323 |         hx6 = self.rebnconv6(hx5)
324 | 
325 |         hx5d = self.rebnconv5d(paddle.concat((hx6, hx5), 1))
326 |         hx5dup = _upsample_like(hx5d, hx4)
327 | 
328 |         hx4d = self.rebnconv4d(paddle.concat((hx5dup, hx4), 1))
329 |         hx4dup = _upsample_like(hx4d, hx3)
330 | 
331 |         hx3d = self.rebnconv3d(paddle.concat((hx4dup, hx3), 1))
332 |         hx3dup = _upsample_like(hx3d, hx2)
333 | 
334 |         hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1))
335 |         hx2dup = _upsample_like(hx2d, hx1)
336 | 
337 |         hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1))
338 | 
339 |         return hx1d + hxin
340 | 
341 | 
342 | ### RSU-5 ###
343 | class RSU5(nn.Layer):  #UNet05DRES(nn.Layer):
344 |     def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
345 |         super(RSU5, self).__init__()
346 | 
347 |         self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
348 | 
349 |         self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
350 |         self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
351 | 
352 |         self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
353 |         self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
354 | 
355 |         self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
356 |         self.pool3 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
357 | 
358 |         self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
359 | 
360 |         self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
361 | 
362 |         self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
363 |         self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
364 |         self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
365 |         self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
366 | 
367 |     def forward(self, x):
368 | 
369 |         hx = x
370 | 
371 |         hxin = self.rebnconvin(hx)
372 | 
373 |         hx1 = self.rebnconv1(hxin)
374 |         hx = self.pool1(hx1)
375 | 
376 |         hx2 = self.rebnconv2(hx)
377 |         hx = self.pool2(hx2)
378 | 
379 |         hx3 = self.rebnconv3(hx)
380 |         hx = self.pool3(hx3)
381 | 
382 |         hx4 = self.rebnconv4(hx)
383 | 
384 |         hx5 = self.rebnconv5(hx4)
385 | 
386 |         hx4d = self.rebnconv4d(paddle.concat((hx5, hx4), 1))
387 |         hx4dup = _upsample_like(hx4d, hx3)
388 | 
389 |         hx3d = self.rebnconv3d(paddle.concat((hx4dup, hx3), 1))
390 |         hx3dup = _upsample_like(hx3d, hx2)
391 | 
392 |         hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1))
393 |         hx2dup = _upsample_like(hx2d, hx1)
394 | 
395 |         hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1))
396 | 
397 |         return hx1d + hxin
398 | 
399 | 
400 | ### RSU-4 ###
401 | class RSU4(nn.Layer):  #UNet04DRES(nn.Layer):
402 |     def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
403 |         super(RSU4, self).__init__()
404 | 
405 |         self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
406 | 
407 |         self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
408 |         self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
409 | 
410 |         self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
411 |         self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
412 | 
413 |         self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
414 | 
415 |         self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
416 | 
417 |         self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
418 |         self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
419 |         self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
420 | 
421 |     def forward(self, x):
422 | 
423 |         hx = x
424 | 
425 |         hxin = self.rebnconvin(hx)
426 | 
427 |         hx1 = self.rebnconv1(hxin)
428 |         hx = self.pool1(hx1)
429 | 
430 |         hx2 = self.rebnconv2(hx)
431 |         hx = self.pool2(hx2)
432 | 
433 |         hx3 = self.rebnconv3(hx)
434 | 
435 |         hx4 = self.rebnconv4(hx3)
436 | 
437 |         hx3d = self.rebnconv3d(paddle.concat((hx4, hx3), 1))
438 |         hx3dup = _upsample_like(hx3d, hx2)
439 | 
440 |         hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1))
441 |         hx2dup = _upsample_like(hx2d, hx1)
442 | 
443 |         hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1))
444 | 
445 |         return hx1d + hxin
446 | 
447 | 
448 | ### RSU-4F ###
449 | class RSU4F(nn.Layer):  #UNet04FRES(nn.Layer):
450 |     def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
451 |         super(RSU4F, self).__init__()
452 | 
453 |         self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
454 | 
455 |         self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
456 |         self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
457 |         self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
458 | 
459 |         self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
460 | 
461 |         self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
462 |         self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
463 |         self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
464 | 
465 |     def forward(self, x):
466 | 
467 |         hx = x
468 | 
469 |         hxin = self.rebnconvin(hx)
470 | 
471 |         hx1 = self.rebnconv1(hxin)
472 |         hx2 = self.rebnconv2(hx1)
473 |         hx3 = self.rebnconv3(hx2)
474 | 
475 |         hx4 = self.rebnconv4(hx3)
476 | 
477 |         hx3d = self.rebnconv3d(paddle.concat((hx4, hx3), 1))
478 |         hx2d = self.rebnconv2d(paddle.concat((hx3d, hx2), 1))
479 |         hx1d = self.rebnconv1d(paddle.concat((hx2d, hx1), 1))
480 | 
481 |         return hx1d + hxin
482 | 


--------------------------------------------------------------------------------
/models/unet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import paddle
  4 | import paddle.nn as nn
  5 | import paddle.nn.functional as F
  6 | 
  7 | from paddleseg import utils
  8 | from paddleseg.cvlibs import manager
  9 | from paddleseg.models import layers
 10 | 
 11 | 
 12 | # @manager.MODELS.add_component
 13 | class UNet(nn.Layer):
 14 |     """
 15 |     The UNet implementation based on PaddlePaddle.
 16 | 
 17 |     The original article refers to
 18 |     Olaf Ronneberger, et, al. "U-Net: Convolutional Networks for Biomedical Image Segmentation"
 19 |     (https://arxiv.org/abs/1505.04597).
 20 | 
 21 |     Args:
 22 |         num_classes (int): The unique number of target classes.
 23 |         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
 24 |             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
 25 |         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
 26 |             If False, use resize_bilinear. Default: False.
 27 |         in_channels (int, optional): The channels of input image. Default: 3.
 28 |         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
 29 |     """
 30 | 
 31 |     def __init__(self,
 32 |                  num_classes,
 33 |                  align_corners=False,
 34 |                  use_deconv=False,
 35 |                  in_channels=3,
 36 |                  pretrained=None):
 37 |         super().__init__()
 38 | 
 39 |         self.encode = Encoder(in_channels)
 40 |         self.decode = Decoder(align_corners, use_deconv=use_deconv)
 41 |         self.cls = self.conv = nn.Conv2D(
 42 |             in_channels=64,
 43 |             out_channels=num_classes,
 44 |             kernel_size=3,
 45 |             stride=1,
 46 |             padding=1)
 47 | 
 48 |         self.pretrained = pretrained
 49 |         self.init_weight()
 50 | 
 51 |     def forward(self, x):
 52 |         logit_list = []
 53 |         x, short_cuts = self.encode(x)
 54 |         x = self.decode(x, short_cuts)
 55 |         logit = self.cls(x)
 56 |         logit_list.append(logit)
 57 |         return logit_list
 58 | 
 59 |     def init_weight(self):
 60 |         if self.pretrained is not None:
 61 |             utils.load_entire_model(self, self.pretrained)
 62 | 
 63 | 
 64 | class Encoder(nn.Layer):
 65 |     def __init__(self, in_channels=3):
 66 |         super().__init__()
 67 | 
 68 |         self.double_conv = nn.Sequential(
 69 |             layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
 70 |         down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
 71 |         self.down_sample_list = nn.LayerList([
 72 |             self.down_sampling(channel[0], channel[1])
 73 |             for channel in down_channels
 74 |         ])
 75 | 
 76 |     def down_sampling(self, in_channels, out_channels):
 77 |         modules = []
 78 |         modules.append(nn.MaxPool2D(kernel_size=2, stride=2))
 79 |         modules.append(layers.ConvBNReLU(in_channels, out_channels, 3))
 80 |         modules.append(layers.ConvBNReLU(out_channels, out_channels, 3))
 81 |         return nn.Sequential(*modules)
 82 | 
 83 |     def forward(self, x):
 84 |         short_cuts = []
 85 |         x = self.double_conv(x)
 86 |         for down_sample in self.down_sample_list:
 87 |             short_cuts.append(x)
 88 |             x = down_sample(x)
 89 |         return x, short_cuts
 90 | 
 91 | 
 92 | class Decoder(nn.Layer):
 93 |     def __init__(self, align_corners, use_deconv=False):
 94 |         super().__init__()
 95 | 
 96 |         up_channels = [[512, 256], [256, 128], [128, 64], [64, 64]]
 97 |         self.up_sample_list = nn.LayerList([
 98 |             UpSampling(channel[0], channel[1], align_corners, use_deconv)
 99 |             for channel in up_channels
100 |         ])
101 | 
102 |     def forward(self, x, short_cuts):
103 |         for i in range(len(short_cuts)):
104 |             x = self.up_sample_list[i](x, short_cuts[-(i + 1)])
105 |         return x
106 | 
107 | 
108 | class UpSampling(nn.Layer):
109 |     def __init__(self,
110 |                  in_channels,
111 |                  out_channels,
112 |                  align_corners,
113 |                  use_deconv=False):
114 |         super().__init__()
115 | 
116 |         self.align_corners = align_corners
117 | 
118 |         self.use_deconv = use_deconv
119 |         if self.use_deconv:
120 |             self.deconv = nn.Conv2DTranspose(
121 |                 in_channels,
122 |                 out_channels // 2,
123 |                 kernel_size=2,
124 |                 stride=2,
125 |                 padding=0)
126 |             in_channels = in_channels + out_channels // 2
127 |         else:
128 |             in_channels *= 2
129 | 
130 |         self.double_conv = nn.Sequential(
131 |             layers.ConvBNReLU(in_channels, out_channels, 3),
132 |             layers.ConvBNReLU(out_channels, out_channels, 3))
133 | 
134 |     def forward(self, x, short_cut):
135 |         if self.use_deconv:
136 |             x = self.deconv(x)
137 |         else:
138 |             x = F.interpolate(
139 |                 x,
140 |                 paddle.shape(short_cut)[2:],
141 |                 mode='bilinear',
142 |                 align_corners=self.align_corners)
143 |         x = paddle.concat([x, short_cut], axis=1)
144 |         x = self.double_conv(x)
145 |         return x
146 | 


--------------------------------------------------------------------------------