├── LICENSE ├── README.md ├── configs ├── bisenetv2.yml ├── bisenetv2_b32.yml ├── ddrnet.yml ├── deeplabv3p.yml ├── hrnet_w18_s.yml ├── hrsegnetb16.yml ├── hrsegnetb32.yml ├── hrsegnetb48.yml ├── hrsegnetb64_bs16.yml ├── ocrnet_hrnetw18.yml ├── ocrnet_hrsegb64_bs16.yml ├── pspnet.yml ├── rucnet_crackseg9k.yml ├── stdcseg.yml ├── u2cracknet_crackseg9k.yml ├── unet.yml └── unet_focalloss_adam_crackseg9k.yml ├── fig ├── fig1.png ├── fig5.png └── fig8.png └── models ├── bisenetv2.py ├── ddrnet.py ├── deeplabv3p.py ├── hrnet_w18_s.py ├── hrsegb64.py ├── hrsegnet_b16.py ├── hrsegnet_b16_d4.py ├── hrsegnet_b16_d5.py ├── hrsegnet_b32.py ├── hrsegnet_b48.py ├── hrsegnet_b64.py ├── ocrnet.py ├── pspnet.py ├── rucnet.py ├── stdcseg.py ├── u2cracknet.py └── unet.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HrSegNet4CrackSegmentation 2 | Real-time High-Resolution Neural Network with Semantic Guidance for Crack Segmentation 3 | 4 | # Abstract 5 | The current trend in crack detection methods is leaning towards the use of machine learning or deep learning. This is because deep learning-based methods can autonomously extract features from images, thereby avoiding the low stability caused by manually designed operators. However, there are still some problems with the current deep learning-based crack segmentation algorithms. Firstly, the vast majority of research is based on the modification and improvement of commonly used scene segmentation algorithms, with no specifically designed for crack segmentation tasks. Secondly, crack detection is increasingly reliant on edge devices, such as drones and vehicle-mounted cameras. Therefore, the model must be lightweight to achieve real-time segmentation efficiency. However, there is currently limited research in this area. We propose a high-resolution neural network with semantic guidance for real-time crack segmentation, named HrSegNet. 6 | 7 | # Update 8 | 2023-08-20 9 | 10 | A comparison of the three models has been added. We implemented these three models, and their files are in `models`. The trained model and logs are in [UNet_focal](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EcxRe1WntKdOtK-_O1pHIvEBwm-b9ohinTU-03JZ_Y4UMw?e=F7pqOM), [U2CrackNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdSbeqvE-KtAtHjZj2-iS30BetzzRl9f2ockiASAgyea8A?e=3TwbYB), [RUCNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EUbnDwGhVOZNuRrhe1Ksf0IBn7xrCjgaMVp5S5ehRJIkFA?e=3FuMHM). 11 | * Liu, Zhenqing, et al. "Computer vision-based concrete crack detection using U-net fully convolutional networks." Automation in Construction 104 (2019): 129-139. 12 | * Shi, Pengfei, et al. "U2CrackNet: a deeper architecture with two-level nested U-structure for pavement crack detection." Structural Health Monitoring 22.4 (2023): 2910-2921. 13 | * Yu, Gui, et al. "RUC-Net: A Residual-Unet-Based Convolutional Neural Network for Pixel-Level Pavement Crack Segmentation." Sensors 23.1 (2022): 53. 14 | 15 | 16 | 17 | 2023-08-12 18 | 19 | Update [Concrete3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdzjOhykuQxDjRgs6k-5PU0BtJntPGtTo445f4lBv5HV4Q?e=MCOv5W). In the original Concrete3k, some of the images and labels did not match and we have updated and uploaded them. The results of the corresponding cross-dataset will also be updated. 20 | 21 | 2023-07-17 22 | * Add new datasets: [Asphalt3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EVj4M3fxfcFEuUToiO1QODEBtUuSPXE5FQONgNYti7PDFQ?e=IwZgXT), [Concrete3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdzjOhykuQxDjRgs6k-5PU0BtJntPGtTo445f4lBv5HV4Q?e=MCOv5W). Asphalt3k sourced from [Yang](https://www.mdpi.com/2076-3417/12/19/10089), and Concrete3k sourced from [Wang](https://www.sciencedirect.com/science/article/pii/S0926580522001480). 23 | * Add weight files pre-trained on CrackSeg9k,along with their corresponding training logs. 24 | 25 | 2023-07-02 26 | 27 | We are conducting more comparative experiments while using a new pavement dataset that is being manually annotated at the expert level. The results and data will be published soon. We will release the trained model parameters so that you can quickly test them. 28 | ### Model Architecture 29 | ![Alt text](./fig/fig1.png) 30 | ### [Seg-Grad-CAM](https://arxiv.org/abs/2002.11434) 31 | ![Alt text](./fig/fig5.png) 32 | ### Comparisons with state-of-the-art 33 | ![Alt text](./fig/fig8.png) 34 | 35 | 36 | # Data 37 | * [CrackSeg9k](https://github.com/Dhananjay42/crackseg9k) 38 | * [Asphalt3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EVj4M3fxfcFEuUToiO1QODEBtUuSPXE5FQONgNYti7PDFQ?e=IwZgXT) 39 | * [Concrete3k](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdzjOhykuQxDjRgs6k-5PU0BtJntPGtTo445f4lBv5HV4Q?e=MCOv5W) 40 | 41 | We train the model on a comprehensive dataset (CrackSeg9k) and subsequently transfer to specific downstream scenarios, asphalt (Asphalt3k) and concrete (Concrete3k). 42 | # Installation 43 | The code requires python>=3.8, as well as paddle=2.4.1 and paddleseg=2.7.0 and OpenCV= 4.7.0. You can follow the instructions [paddle](https://github.com/PaddlePaddle/Paddle) and [paddleseg](https://github.com/PaddlePaddle/PaddleSeg) to install all the dependencies. If you need to reproduce the results, you have to install paddle with CUDA support. 44 | 45 | # How to use 46 | Once paddle and paddleseg are installed, you can use our published models very easily. 47 | 48 | We start by describing the contents of each directory. The directory `models` defines the high-resolution crack segmentation model we designed, the three model files are almost identical except for the parameter `base`. The model files we are comparing are also included. The directory `configs` is the configuration files for all models, i.e. the details of all training and testing parameters. 49 | 50 | The easiest way to use our models is to use [paddleseg](https://github.com/PaddlePaddle/PaddleSeg). One can put the files of the desired models into the models directory of paddleseg, registering the model using `@manager.MODELS.add_component`. For training the model use the configuration files in the `configs` we provide. 51 | 52 | All data are public available. 53 | 54 | 55 | # Trained models 56 | **On CrackSeg9k** 57 | * [HrSegNet-B16](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EZWMNQXFtTpPl-SnUyoKpS0B2EDCDZIn2SX00C0AI_U-Jg?e=o0gqxN) 58 | * [HrSegNet-B32](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EVaZjUC9tVNMoMkbNOdmemEBh6xPEBUzo2-0ddjGl3bfRQ?e=MWs6Z9) 59 | * [HrSegNet-B48](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EdoG_do5oFdPmP6NDqWh8AEBh1CfTl6SxD6DX_smxl9WFA?e=WAr0Fi) 60 | * [HrSegNet-B64(bs=16)](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/ETzpUJ9FkN1CoTOO1PB1-68BNYNdqtB0gowlkjzuNJCtQw?e=rCkTGO) 61 | * [HRNet-W18(bs=16)](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EQcoB7KEbMZHidBi2JchS78BoeI35zALH0m6w3727u7HGA?e=nNDb39) 62 | 63 | 64 | # Model (TensorRT engine) 65 | We expose all our models as TensorRT, including SOTA for comparison in all experiments. Note that all inputs to the TensorRT engine are **1 × 3 × 400 × 400**. We use TensorRT 8.6.1. 66 | | Model | 67 | | --- | 68 | | [U-Net](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EYoEi_aQczxOswVyAi8FQBgBYSYXalI8oZKRszWHgbzZwg?e=XuFGzf) | 69 | | [DDRNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EX-QSVExyFVLvasiouuvEwEBe4HPdK3N8HxklK5CAn07DQ?e=DfdBZz) | 70 | | [DeeplabV3+](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/ETkJ1rMqaqBGrfWNg5KCF0EBIxCfYlFk3t0IRD2Uk2cQcA?e=ISPLG0) | 71 | | [OCRNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/Ed0l6UAckEFGodrNz1W7aHgBOmoVN6-yZfNIKMTJOp4Fug?e=7u8ZOD) | 72 | | [STDCSeg](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EV1Rra3XuP5GqImDWMeYdbEBSt64lrmWnAQETKJe0NTO5Q?e=LN0VxD) | 73 | | [BiSeNetV2](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EfovCQdm_5FJoaySbnd2SBsB2becRV7KTQa7A9_oL7lkHA?e=TI8gZJ) | 74 | | [PSPNet](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/ERTJdaWfJ-9Ess81IwvnBE4Ba0pVnGgyqyZoHFC5hEe1pQ?e=ZzB5Xa) | 75 | | [HrSegNet-B16](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EYq7OVwYeRtJm0PtXmytSmoB-Ywu8PsC-9eS95V0M7GSpQ?e=1GgLOt) | 76 | | [HrSegNet-B32](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EURuJVQAW25GnJBvdwW76pgBZdZqyWwT_vifP7Ta98O8_w?e=kKZVLb) | 77 | | [HrSegNet-B48](https://chdeducn-my.sharepoint.com/:u:/g/personal/2018024008_chd_edu_cn/EcUUFXq9dbJHmAz1roiZCMUB3zeM49ILOwzFzHe0iAYS8w?e=SAGci7) | 78 | 79 | 80 | # Citation 81 | If you find this project helpful for your research, please consider citing the following BibTeX entry. 82 | ```bibtex 83 | @article{li2023real, 84 | title={Real-time high-resolution neural network with semantic guidance for crack segmentation}, 85 | author={Li, Yongshang and Ma, Ronggui and Liu, Han and Cheng, Gaoli}, 86 | journal={Automation in Construction}, 87 | volume={156}, 88 | pages={105112}, 89 | year={2023}, 90 | publisher={Elsevier} 91 | } 92 | 93 | 94 | ``` 95 | 96 | # Star History 97 | 98 | [![Star History Chart](https://api.star-history.com/svg?repos=CHDyshli/HrSegNet4CrackSegmentation&type=Date)](https://star-history.com/#CHDyshli/HrSegNet4CrackSegmentation&Date) 99 | 100 | -------------------------------------------------------------------------------- /configs/bisenetv2.yml: -------------------------------------------------------------------------------- 1 | batch_size: 16 2 | iters: 100000 3 | 4 | 5 | train_dataset: 6 | type: Dataset 7 | dataset_root: data/crackseg9k 8 | train_path: data/crackseg9k/train.txt 9 | num_classes: 2 10 | mode: train 11 | transforms: 12 | - type: ResizeStepScaling 13 | min_scale_factor: 0.5 14 | max_scale_factor: 2.0 15 | scale_step_size: 0.25 16 | - type: RandomPaddingCrop 17 | crop_size: [400, 400] 18 | - type: RandomHorizontalFlip 19 | - type: RandomDistort 20 | brightness_range: 0.5 21 | contrast_range: 0.5 22 | saturation_range: 0.5 23 | - type: Normalize 24 | 25 | val_dataset: 26 | type: Dataset 27 | dataset_root: data/crackseg9k 28 | val_path: data/crackseg9k/val.txt 29 | num_classes: 2 30 | mode: val 31 | transforms: 32 | - type: Normalize 33 | 34 | 35 | model: 36 | type: BiSeNetV2 37 | num_classes: 2 38 | 39 | optimizer: 40 | type: sgd 41 | weight_decay: 0.0005 42 | 43 | 44 | loss: 45 | types: 46 | - type: CrossEntropyLoss 47 | - type: CrossEntropyLoss 48 | - type: CrossEntropyLoss 49 | - type: CrossEntropyLoss 50 | - type: CrossEntropyLoss 51 | coef: [1, 1, 1, 1, 1] 52 | 53 | lr_scheduler: 54 | type: PolynomialDecay 55 | learning_rate: 0.01 56 | end_lr: 0.0 57 | power: 0.9 -------------------------------------------------------------------------------- /configs/bisenetv2_b32.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | iters: 100000 3 | 4 | 5 | train_dataset: 6 | type: Dataset 7 | dataset_root: data/crackseg9k 8 | train_path: data/crackseg9k/train.txt 9 | num_classes: 2 10 | mode: train 11 | transforms: 12 | - type: ResizeStepScaling 13 | min_scale_factor: 0.5 14 | max_scale_factor: 2.0 15 | scale_step_size: 0.25 16 | - type: RandomPaddingCrop 17 | crop_size: [400, 400] 18 | - type: RandomHorizontalFlip 19 | - type: RandomDistort 20 | brightness_range: 0.5 21 | contrast_range: 0.5 22 | saturation_range: 0.5 23 | - type: Normalize 24 | 25 | val_dataset: 26 | type: Dataset 27 | dataset_root: data/crackseg9k 28 | val_path: data/crackseg9k/val.txt 29 | num_classes: 2 30 | mode: val 31 | transforms: 32 | - type: Normalize 33 | 34 | 35 | model: 36 | type: BiSeNetV2 37 | num_classes: 2 38 | 39 | optimizer: 40 | type: sgd 41 | weight_decay: 0.0005 42 | 43 | 44 | loss: 45 | types: 46 | - type: CrossEntropyLoss 47 | - type: CrossEntropyLoss 48 | - type: CrossEntropyLoss 49 | - type: CrossEntropyLoss 50 | - type: CrossEntropyLoss 51 | coef: [1, 1, 1, 1, 1] 52 | 53 | lr_scheduler: 54 | type: PolynomialDecay 55 | learning_rate: 0.01 56 | end_lr: 0.0 57 | power: 0.9 -------------------------------------------------------------------------------- /configs/ddrnet.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 32 34 | iters: 100000 35 | 36 | 37 | model: 38 | type: DDRNet_23 39 | enable_auxiliary_loss: False 40 | 41 | 42 | loss: 43 | types: 44 | - type: OhemCrossEntropyLoss 45 | coef: [1] 46 | 47 | 48 | optimizer: 49 | type: sgd 50 | weight_decay: 0.0005 51 | 52 | 53 | lr_scheduler: 54 | type: PolynomialDecay 55 | learning_rate: 0.01 56 | end_lr: 0.0 57 | power: 0.9 -------------------------------------------------------------------------------- /configs/deeplabv3p.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 32 34 | iters: 100000 35 | 36 | 37 | model: 38 | type: DeepLabV3P 39 | backbone: 40 | type: ResNet18_vd 41 | output_stride: 8 42 | multi_grid: [1, 2, 4] 43 | num_classes: 2 44 | backbone_indices: [0, 3] 45 | aspp_ratios: [1, 12, 24, 36] 46 | aspp_out_channels: 256 47 | align_corners: False 48 | pretrained: null 49 | 50 | 51 | loss: 52 | types: 53 | - type: CrossEntropyLoss 54 | coef: [1] 55 | 56 | 57 | optimizer: 58 | type: sgd 59 | weight_decay: 0.0005 60 | 61 | 62 | lr_scheduler: 63 | type: PolynomialDecay 64 | learning_rate: 0.01 65 | end_lr: 0.0 66 | power: 0.9 -------------------------------------------------------------------------------- /configs/hrnet_w18_s.yml: -------------------------------------------------------------------------------- 1 | batch_size: 16 2 | iters: 100000 3 | 4 | train_dataset: 5 | type: Dataset 6 | dataset_root: data/crackseg9k 7 | train_path: data/crackseg9k/train.txt 8 | num_classes: 2 9 | mode: train 10 | transforms: 11 | - type: ResizeStepScaling 12 | min_scale_factor: 0.5 13 | max_scale_factor: 2.0 14 | scale_step_size: 0.25 15 | - type: RandomPaddingCrop 16 | crop_size: [400, 400] 17 | - type: RandomHorizontalFlip 18 | - type: RandomDistort 19 | brightness_range: 0.5 20 | contrast_range: 0.5 21 | saturation_range: 0.5 22 | - type: Normalize 23 | 24 | val_dataset: 25 | type: Dataset 26 | dataset_root: data/crackseg9k 27 | val_path: data/crackseg9k/val.txt 28 | num_classes: 2 29 | mode: val 30 | transforms: 31 | - type: Normalize 32 | 33 | 34 | model: 35 | type: HRNet_W18_S 36 | 37 | 38 | optimizer: 39 | type: SGD 40 | momentum: 0.9 41 | weight_decay: 0.0005 42 | 43 | 44 | loss: 45 | types: 46 | - type: OhemCrossEntropyLoss 47 | coef: [1] 48 | 49 | 50 | lr_scheduler: 51 | type: PolynomialDecay 52 | learning_rate: 0.01 53 | end_lr: 0.0 54 | power: 0.9 55 | warmup_iters: 2000 56 | warmup_start_lr: 1.0e-5 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /configs/hrsegnetb16.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | iters: 100000 3 | 4 | train_dataset: 5 | type: Dataset 6 | dataset_root: data/crackseg9k 7 | train_path: data/crackseg9k/train.txt 8 | num_classes: 2 9 | mode: train 10 | transforms: 11 | - type: ResizeStepScaling 12 | min_scale_factor: 0.5 13 | max_scale_factor: 2.0 14 | scale_step_size: 0.25 15 | - type: RandomPaddingCrop 16 | crop_size: [400, 400] 17 | - type: RandomHorizontalFlip 18 | - type: RandomDistort 19 | brightness_range: 0.5 20 | contrast_range: 0.5 21 | saturation_range: 0.5 22 | - type: Normalize 23 | 24 | val_dataset: 25 | type: Dataset 26 | dataset_root: data/crackseg9k 27 | val_path: data/crackseg9k/val.txt 28 | num_classes: 2 29 | mode: val 30 | transforms: 31 | - type: Normalize 32 | 33 | 34 | model: 35 | type: HrSegNetB16 36 | 37 | 38 | optimizer: 39 | type: SGD 40 | momentum: 0.9 41 | weight_decay: 0.0005 42 | 43 | 44 | loss: 45 | types: 46 | - type: OhemCrossEntropyLoss 47 | - type: OhemCrossEntropyLoss 48 | - type: OhemCrossEntropyLoss 49 | coef: [1, 0.5, 0.5] 50 | 51 | 52 | lr_scheduler: 53 | type: PolynomialDecay 54 | learning_rate: 0.01 55 | end_lr: 0.0 56 | power: 0.9 57 | warmup_iters: 2000 58 | warmup_start_lr: 1.0e-5 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /configs/hrsegnetb32.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | iters: 100000 3 | 4 | train_dataset: 5 | type: Dataset 6 | dataset_root: data/crackseg9k 7 | train_path: data/crackseg9k/train.txt 8 | num_classes: 2 9 | mode: train 10 | transforms: 11 | - type: ResizeStepScaling 12 | min_scale_factor: 0.5 13 | max_scale_factor: 2.0 14 | scale_step_size: 0.25 15 | - type: RandomPaddingCrop 16 | crop_size: [400, 400] 17 | - type: RandomHorizontalFlip 18 | - type: RandomDistort 19 | brightness_range: 0.5 20 | contrast_range: 0.5 21 | saturation_range: 0.5 22 | - type: Normalize 23 | 24 | val_dataset: 25 | type: Dataset 26 | dataset_root: data/crackseg9k 27 | val_path: data/crackseg9k/val.txt 28 | num_classes: 2 29 | mode: val 30 | transforms: 31 | - type: Normalize 32 | 33 | 34 | model: 35 | type: HrSegNetB32 36 | 37 | 38 | optimizer: 39 | type: SGD 40 | momentum: 0.9 41 | weight_decay: 0.0005 42 | 43 | 44 | loss: 45 | types: 46 | - type: OhemCrossEntropyLoss 47 | - type: OhemCrossEntropyLoss 48 | - type: OhemCrossEntropyLoss 49 | coef: [1, 0.5, 0.5] 50 | 51 | 52 | lr_scheduler: 53 | type: PolynomialDecay 54 | learning_rate: 0.01 55 | end_lr: 0.0 56 | power: 0.9 57 | warmup_iters: 2000 58 | warmup_start_lr: 1.0e-5 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /configs/hrsegnetb48.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | iters: 100000 3 | 4 | train_dataset: 5 | type: Dataset 6 | dataset_root: data/crackseg9k 7 | train_path: data/crackseg9k/train.txt 8 | num_classes: 2 9 | mode: train 10 | transforms: 11 | - type: ResizeStepScaling 12 | min_scale_factor: 0.5 13 | max_scale_factor: 2.0 14 | scale_step_size: 0.25 15 | - type: RandomPaddingCrop 16 | crop_size: [400, 400] 17 | - type: RandomHorizontalFlip 18 | - type: RandomDistort 19 | brightness_range: 0.5 20 | contrast_range: 0.5 21 | saturation_range: 0.5 22 | - type: Normalize 23 | 24 | val_dataset: 25 | type: Dataset 26 | dataset_root: data/crackseg9k 27 | val_path: data/crackseg9k/val.txt 28 | num_classes: 2 29 | mode: val 30 | transforms: 31 | - type: Normalize 32 | 33 | 34 | model: 35 | type: HrSegNetB48 36 | 37 | 38 | optimizer: 39 | type: SGD 40 | momentum: 0.9 41 | weight_decay: 0.0005 42 | 43 | 44 | loss: 45 | types: 46 | - type: OhemCrossEntropyLoss 47 | - type: OhemCrossEntropyLoss 48 | - type: OhemCrossEntropyLoss 49 | coef: [1, 0.5, 0.5] 50 | 51 | 52 | lr_scheduler: 53 | type: PolynomialDecay 54 | learning_rate: 0.01 55 | end_lr: 0.0 56 | power: 0.9 57 | warmup_iters: 2000 58 | warmup_start_lr: 1.0e-5 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /configs/hrsegnetb64_bs16.yml: -------------------------------------------------------------------------------- 1 | batch_size: 16 2 | iters: 100000 3 | 4 | train_dataset: 5 | type: Dataset 6 | dataset_root: data/crackseg9k 7 | train_path: data/crackseg9k/train.txt 8 | num_classes: 2 9 | mode: train 10 | transforms: 11 | - type: ResizeStepScaling 12 | min_scale_factor: 0.5 13 | max_scale_factor: 2.0 14 | scale_step_size: 0.25 15 | - type: RandomPaddingCrop 16 | crop_size: [400, 400] 17 | - type: RandomHorizontalFlip 18 | - type: RandomDistort 19 | brightness_range: 0.5 20 | contrast_range: 0.5 21 | saturation_range: 0.5 22 | - type: Normalize 23 | 24 | val_dataset: 25 | type: Dataset 26 | dataset_root: data/crackseg9k 27 | val_path: data/crackseg9k/val.txt 28 | num_classes: 2 29 | mode: val 30 | transforms: 31 | - type: Normalize 32 | 33 | 34 | model: 35 | type: HrSegNetB64 36 | 37 | 38 | optimizer: 39 | type: SGD 40 | momentum: 0.9 41 | weight_decay: 0.0005 42 | 43 | 44 | loss: 45 | types: 46 | - type: OhemCrossEntropyLoss 47 | - type: OhemCrossEntropyLoss 48 | - type: OhemCrossEntropyLoss 49 | coef: [1, 0.5, 0.5] 50 | 51 | 52 | lr_scheduler: 53 | type: PolynomialDecay 54 | learning_rate: 0.01 55 | end_lr: 0.0 56 | power: 0.9 57 | warmup_iters: 2000 58 | warmup_start_lr: 1.0e-5 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /configs/ocrnet_hrnetw18.yml: -------------------------------------------------------------------------------- 1 | batch_size: 16 2 | iters: 100000 3 | 4 | train_dataset: 5 | type: Dataset 6 | dataset_root: data/crackseg9k 7 | train_path: data/crackseg9k/train.txt 8 | num_classes: 2 9 | mode: train 10 | transforms: 11 | - type: ResizeStepScaling 12 | min_scale_factor: 0.5 13 | max_scale_factor: 2.0 14 | scale_step_size: 0.25 15 | - type: RandomPaddingCrop 16 | crop_size: [400, 400] 17 | - type: RandomHorizontalFlip 18 | - type: RandomDistort 19 | brightness_range: 0.5 20 | contrast_range: 0.5 21 | saturation_range: 0.5 22 | - type: Normalize 23 | 24 | val_dataset: 25 | type: Dataset 26 | dataset_root: data/crackseg9k 27 | val_path: data/crackseg9k/val.txt 28 | num_classes: 2 29 | mode: val 30 | transforms: 31 | - type: Normalize 32 | 33 | optimizer: 34 | type: sgd 35 | 36 | 37 | lr_scheduler: 38 | type: PolynomialDecay 39 | learning_rate: 0.01 40 | power: 0.9 41 | 42 | loss: 43 | types: 44 | - type: MixedLoss 45 | losses: 46 | - type: CrossEntropyLoss 47 | - type: LovaszSoftmaxLoss 48 | coef: [0.8, 0.2] 49 | - type: MixedLoss 50 | losses: 51 | - type: CrossEntropyLoss 52 | - type: LovaszSoftmaxLoss 53 | coef: [0.8, 0.2] 54 | coef: [1, 0.4] 55 | 56 | 57 | model: 58 | type: OCRNet 59 | backbone: 60 | type: HRNet_W18 61 | backbone_indices: [0] -------------------------------------------------------------------------------- /configs/ocrnet_hrsegb64_bs16.yml: -------------------------------------------------------------------------------- 1 | batch_size: 16 2 | iters: 100000 3 | 4 | train_dataset: 5 | type: Dataset 6 | dataset_root: data/crackseg9k 7 | train_path: data/crackseg9k/train.txt 8 | num_classes: 2 9 | mode: train 10 | transforms: 11 | - type: ResizeStepScaling 12 | min_scale_factor: 0.5 13 | max_scale_factor: 2.0 14 | scale_step_size: 0.25 15 | - type: RandomPaddingCrop 16 | crop_size: [400, 400] 17 | - type: RandomHorizontalFlip 18 | - type: RandomDistort 19 | brightness_range: 0.5 20 | contrast_range: 0.5 21 | saturation_range: 0.5 22 | - type: Normalize 23 | 24 | val_dataset: 25 | type: Dataset 26 | dataset_root: data/crackseg9k 27 | val_path: data/crackseg9k/val.txt 28 | num_classes: 2 29 | mode: val 30 | transforms: 31 | - type: Normalize 32 | 33 | 34 | 35 | optimizer: 36 | type: SGD 37 | momentum: 0.9 38 | weight_decay: 0.0005 39 | 40 | lr_scheduler: 41 | type: PolynomialDecay 42 | learning_rate: 0.01 43 | end_lr: 0.0 44 | power: 0.9 45 | warmup_iters: 2000 46 | warmup_start_lr: 1.0e-5 47 | 48 | 49 | 50 | loss: 51 | types: 52 | - type: MixedLoss 53 | losses: 54 | - type: CrossEntropyLoss 55 | - type: LovaszSoftmaxLoss 56 | coef: [0.8, 0.2] 57 | - type: MixedLoss 58 | losses: 59 | - type: CrossEntropyLoss 60 | - type: LovaszSoftmaxLoss 61 | coef: [0.8, 0.2] 62 | coef: [1, 0.4] 63 | 64 | 65 | model: 66 | type: OCRNet 67 | backbone: 68 | type: HrSegB64 69 | backbone_indices: [0] -------------------------------------------------------------------------------- /configs/pspnet.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 32 34 | iters: 100000 35 | 36 | model: 37 | type: PSPNet 38 | num_classes: 2 39 | backbone: 40 | type: ResNet18_vd 41 | output_stride: 8 42 | enable_auxiliary_loss: True 43 | align_corners: False 44 | pretrained: null 45 | 46 | 47 | optimizer: 48 | type: sgd 49 | weight_decay: 0.0005 50 | 51 | loss: 52 | types: 53 | - type: CrossEntropyLoss 54 | coef: [1, 0.4] 55 | 56 | 57 | lr_scheduler: 58 | type: PolynomialDecay 59 | learning_rate: 0.01 60 | power: 0.9 61 | end_lr: 1.0e-5 -------------------------------------------------------------------------------- /configs/rucnet_crackseg9k.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 16 34 | iters: 100000 35 | 36 | model: 37 | type: RUCNet 38 | num_classes: 2 39 | use_deconv: False 40 | pretrained: Null 41 | 42 | 43 | 44 | optimizer: 45 | type: adam 46 | 47 | 48 | loss: 49 | types: 50 | - type: FocalLoss 51 | coef: [1] 52 | 53 | 54 | lr_scheduler: 55 | type: PolynomialDecay 56 | learning_rate: 0.01 57 | end_lr: 0.0 58 | power: 0.9 59 | warmup_iters: 2000 60 | warmup_start_lr: 1.0e-5 61 | -------------------------------------------------------------------------------- /configs/stdcseg.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 32 34 | iters: 100000 35 | 36 | 37 | model: 38 | type: STDCSeg 39 | backbone: 40 | type: STDC1 41 | pretrained: null 42 | 43 | loss: 44 | types: 45 | - type: OhemCrossEntropyLoss 46 | - type: OhemCrossEntropyLoss 47 | - type: OhemCrossEntropyLoss 48 | - type: DetailAggregateLoss 49 | coef: [1, 1, 1, 1] 50 | 51 | 52 | optimizer: 53 | type: sgd 54 | weight_decay: 0.0005 55 | 56 | 57 | lr_scheduler: 58 | type: PolynomialDecay 59 | learning_rate: 0.01 60 | end_lr: 0.0 61 | power: 0.9 -------------------------------------------------------------------------------- /configs/u2cracknet_crackseg9k.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 16 34 | iters: 100000 35 | 36 | model: 37 | type: U2CrackNet 38 | num_classes: 2 39 | 40 | 41 | 42 | optimizer: 43 | type: adam 44 | 45 | loss: 46 | types: 47 | - type: CrossEntropyLoss 48 | coef: [1] 49 | 50 | 51 | lr_scheduler: 52 | type: PolynomialDecay 53 | learning_rate: 0.01 54 | end_lr: 0.0 55 | power: 0.9 56 | warmup_iters: 2000 57 | warmup_start_lr: 1.0e-5 58 | -------------------------------------------------------------------------------- /configs/unet.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 16 34 | iters: 100000 35 | 36 | model: 37 | type: UNet 38 | num_classes: 2 39 | use_deconv: False 40 | pretrained: Null 41 | 42 | 43 | 44 | optimizer: 45 | type: sgd 46 | weight_decay: 0.0005 47 | 48 | loss: 49 | types: 50 | - type: OhemCrossEntropyLoss 51 | coef: [1] 52 | 53 | 54 | lr_scheduler: 55 | type: PolynomialDecay 56 | learning_rate: 0.01 57 | end_lr: 0.0 58 | power: 0.9 -------------------------------------------------------------------------------- /configs/unet_focalloss_adam_crackseg9k.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | train_dataset: 4 | type: Dataset 5 | dataset_root: data/crackseg9k 6 | train_path: data/crackseg9k/train.txt 7 | num_classes: 2 8 | mode: train 9 | transforms: 10 | - type: ResizeStepScaling 11 | min_scale_factor: 0.5 12 | max_scale_factor: 2.0 13 | scale_step_size: 0.25 14 | - type: RandomPaddingCrop 15 | crop_size: [400, 400] 16 | - type: RandomHorizontalFlip 17 | - type: RandomDistort 18 | brightness_range: 0.5 19 | contrast_range: 0.5 20 | saturation_range: 0.5 21 | - type: Normalize 22 | 23 | val_dataset: 24 | type: Dataset 25 | dataset_root: data/crackseg9k 26 | val_path: data/crackseg9k/val.txt 27 | num_classes: 2 28 | mode: val 29 | transforms: 30 | - type: Normalize 31 | 32 | 33 | batch_size: 16 34 | iters: 100000 35 | 36 | model: 37 | type: UNet 38 | num_classes: 2 39 | use_deconv: False 40 | pretrained: Null 41 | 42 | 43 | 44 | optimizer: 45 | type: Adam 46 | 47 | loss: 48 | types: 49 | - type: FocalLoss 50 | coef: [1] 51 | 52 | 53 | 54 | lr_scheduler: 55 | type: PolynomialDecay 56 | learning_rate: 0.01 57 | end_lr: 0.0 58 | power: 0.9 59 | warmup_iters: 2000 60 | warmup_start_lr: 1.0e-5 61 | -------------------------------------------------------------------------------- /fig/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CHDyshli/HrSegNet4CrackSegmentation/1e4dd172e250de5cb951414c317059b3cd89c702/fig/fig1.png -------------------------------------------------------------------------------- /fig/fig5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CHDyshli/HrSegNet4CrackSegmentation/1e4dd172e250de5cb951414c317059b3cd89c702/fig/fig5.png -------------------------------------------------------------------------------- /fig/fig8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CHDyshli/HrSegNet4CrackSegmentation/1e4dd172e250de5cb951414c317059b3cd89c702/fig/fig8.png -------------------------------------------------------------------------------- /models/bisenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | import paddle.nn.functional as F 6 | 7 | from paddleseg import utils 8 | from paddleseg.cvlibs import manager, param_init 9 | from paddleseg.models import layers 10 | 11 | 12 | # @manager.MODELS.add_component 13 | class BiSeNetV2(nn.Layer): 14 | """ 15 | The BiSeNet V2 implementation based on PaddlePaddle. 16 | 17 | The original article refers to 18 | Yu, Changqian, et al. "BiSeNet V2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation" 19 | (https://arxiv.org/abs/2004.02147) 20 | 21 | Args: 22 | num_classes (int): The unique number of target classes. 23 | lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25. 24 | in_channels (int, optional): The channels of input image. Default: 3. 25 | pretrained (str, optional): The path or url of pretrained model. Default: None. 26 | """ 27 | 28 | def __init__(self, 29 | num_classes, 30 | lambd=0.25, 31 | align_corners=False, 32 | in_channels=3, 33 | pretrained=None): 34 | super().__init__() 35 | 36 | C1, C2, C3 = 64, 64, 128 37 | db_channels = (C1, C2, C3) 38 | C1, C3, C4, C5 = int(C1 * lambd), int(C3 * lambd), 64, 128 39 | sb_channels = (C1, C3, C4, C5) 40 | mid_channels = 128 41 | 42 | self.db = DetailBranch(in_channels, db_channels) 43 | self.sb = SemanticBranch(in_channels, sb_channels) 44 | 45 | self.bga = BGA(mid_channels, align_corners) 46 | self.aux_head1 = SegHead(C1, C1, num_classes) 47 | self.aux_head2 = SegHead(C3, C3, num_classes) 48 | self.aux_head3 = SegHead(C4, C4, num_classes) 49 | self.aux_head4 = SegHead(C5, C5, num_classes) 50 | self.head = SegHead(mid_channels, mid_channels, num_classes) 51 | 52 | self.align_corners = align_corners 53 | self.pretrained = pretrained 54 | self.init_weight() 55 | 56 | def forward(self, x): 57 | dfm = self.db(x) 58 | feat1, feat2, feat3, feat4, sfm = self.sb(x) 59 | logit = self.head(self.bga(dfm, sfm)) 60 | 61 | if not self.training: 62 | logit_list = [logit] 63 | else: 64 | logit1 = self.aux_head1(feat1) 65 | logit2 = self.aux_head2(feat2) 66 | logit3 = self.aux_head3(feat3) 67 | logit4 = self.aux_head4(feat4) 68 | logit_list = [logit, logit1, logit2, logit3, logit4] 69 | 70 | logit_list = [ 71 | F.interpolate( 72 | logit, 73 | paddle.shape(x)[2:], 74 | mode='bilinear', 75 | align_corners=self.align_corners) for logit in logit_list 76 | ] 77 | 78 | return logit_list 79 | 80 | def init_weight(self): 81 | if self.pretrained is not None: 82 | utils.load_entire_model(self, self.pretrained) 83 | else: 84 | for sublayer in self.sublayers(): 85 | if isinstance(sublayer, nn.Conv2D): 86 | param_init.kaiming_normal_init(sublayer.weight) 87 | elif isinstance(sublayer, (nn.BatchNorm, nn.SyncBatchNorm)): 88 | param_init.constant_init(sublayer.weight, value=1.0) 89 | param_init.constant_init(sublayer.bias, value=0.0) 90 | 91 | 92 | class StemBlock(nn.Layer): 93 | def __init__(self, in_dim, out_dim): 94 | super(StemBlock, self).__init__() 95 | 96 | self.conv = layers.ConvBNReLU(in_dim, out_dim, 3, stride=2) 97 | 98 | self.left = nn.Sequential( 99 | layers.ConvBNReLU(out_dim, out_dim // 2, 1), 100 | layers.ConvBNReLU( 101 | out_dim // 2, out_dim, 3, stride=2)) 102 | 103 | self.right = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) 104 | 105 | self.fuse = layers.ConvBNReLU(out_dim * 2, out_dim, 3) 106 | 107 | def forward(self, x): 108 | x = self.conv(x) 109 | left = self.left(x) 110 | right = self.right(x) 111 | concat = paddle.concat([left, right], axis=1) 112 | return self.fuse(concat) 113 | 114 | 115 | class ContextEmbeddingBlock(nn.Layer): 116 | def __init__(self, in_dim, out_dim): 117 | super(ContextEmbeddingBlock, self).__init__() 118 | 119 | self.gap = nn.AdaptiveAvgPool2D(1) 120 | self.bn = layers.SyncBatchNorm(in_dim) 121 | 122 | self.conv_1x1 = layers.ConvBNReLU(in_dim, out_dim, 1) 123 | self.add = layers.Add() 124 | self.conv_3x3 = nn.Conv2D(out_dim, out_dim, 3, 1, 1) 125 | 126 | def forward(self, x): 127 | gap = self.gap(x) 128 | bn = self.bn(gap) 129 | conv1 = self.add(self.conv_1x1(bn), x) 130 | return self.conv_3x3(conv1) 131 | 132 | 133 | class GatherAndExpansionLayer1(nn.Layer): 134 | """Gather And Expansion Layer with stride 1""" 135 | 136 | def __init__(self, in_dim, out_dim, expand): 137 | super().__init__() 138 | 139 | expand_dim = expand * in_dim 140 | 141 | self.conv = nn.Sequential( 142 | layers.ConvBNReLU(in_dim, in_dim, 3), 143 | layers.DepthwiseConvBN(in_dim, expand_dim, 3), 144 | layers.ConvBN(expand_dim, out_dim, 1)) 145 | self.relu = layers.Activation("relu") 146 | 147 | def forward(self, x): 148 | return self.relu(self.conv(x) + x) 149 | 150 | 151 | class GatherAndExpansionLayer2(nn.Layer): 152 | """Gather And Expansion Layer with stride 2""" 153 | 154 | def __init__(self, in_dim, out_dim, expand): 155 | super().__init__() 156 | 157 | expand_dim = expand * in_dim 158 | 159 | self.branch_1 = nn.Sequential( 160 | layers.ConvBNReLU(in_dim, in_dim, 3), 161 | layers.DepthwiseConvBN( 162 | in_dim, expand_dim, 3, stride=2), 163 | layers.DepthwiseConvBN(expand_dim, expand_dim, 3), 164 | layers.ConvBN(expand_dim, out_dim, 1)) 165 | 166 | self.branch_2 = nn.Sequential( 167 | layers.DepthwiseConvBN( 168 | in_dim, in_dim, 3, stride=2), 169 | layers.ConvBN(in_dim, out_dim, 1)) 170 | 171 | self.relu = layers.Activation("relu") 172 | 173 | def forward(self, x): 174 | return self.relu(self.branch_1(x) + self.branch_2(x)) 175 | 176 | 177 | class DetailBranch(nn.Layer): 178 | """The detail branch of BiSeNet, which has wide channels but shallow layers.""" 179 | 180 | def __init__(self, in_channels, feature_channels): 181 | super().__init__() 182 | 183 | C1, C2, C3 = feature_channels 184 | 185 | self.convs = nn.Sequential( 186 | # stage 1 187 | layers.ConvBNReLU( 188 | in_channels, C1, 3, stride=2), 189 | layers.ConvBNReLU(C1, C1, 3), 190 | # stage 2 191 | layers.ConvBNReLU( 192 | C1, C2, 3, stride=2), 193 | layers.ConvBNReLU(C2, C2, 3), 194 | layers.ConvBNReLU(C2, C2, 3), 195 | # stage 3 196 | layers.ConvBNReLU( 197 | C2, C3, 3, stride=2), 198 | layers.ConvBNReLU(C3, C3, 3), 199 | layers.ConvBNReLU(C3, C3, 3), ) 200 | 201 | def forward(self, x): 202 | return self.convs(x) 203 | 204 | 205 | class SemanticBranch(nn.Layer): 206 | """The semantic branch of BiSeNet, which has narrow channels but deep layers.""" 207 | 208 | def __init__(self, in_channels, feature_channels): 209 | super().__init__() 210 | C1, C3, C4, C5 = feature_channels 211 | 212 | self.stem = StemBlock(in_channels, C1) 213 | 214 | self.stage3 = nn.Sequential( 215 | GatherAndExpansionLayer2(C1, C3, 6), 216 | GatherAndExpansionLayer1(C3, C3, 6)) 217 | 218 | self.stage4 = nn.Sequential( 219 | GatherAndExpansionLayer2(C3, C4, 6), 220 | GatherAndExpansionLayer1(C4, C4, 6)) 221 | 222 | self.stage5_4 = nn.Sequential( 223 | GatherAndExpansionLayer2(C4, C5, 6), 224 | GatherAndExpansionLayer1(C5, C5, 6), 225 | GatherAndExpansionLayer1(C5, C5, 6), 226 | GatherAndExpansionLayer1(C5, C5, 6)) 227 | 228 | self.ce = ContextEmbeddingBlock(C5, C5) 229 | 230 | def forward(self, x): 231 | stage2 = self.stem(x) 232 | stage3 = self.stage3(stage2) 233 | stage4 = self.stage4(stage3) 234 | stage5_4 = self.stage5_4(stage4) 235 | fm = self.ce(stage5_4) 236 | return stage2, stage3, stage4, stage5_4, fm 237 | 238 | 239 | class BGA(nn.Layer): 240 | """The Bilateral Guided Aggregation Layer, used to fuse the semantic features and spatial features.""" 241 | 242 | def __init__(self, out_dim, align_corners): 243 | super().__init__() 244 | 245 | self.align_corners = align_corners 246 | 247 | self.db_branch_keep = nn.Sequential( 248 | layers.DepthwiseConvBN(out_dim, out_dim, 3), 249 | nn.Conv2D(out_dim, out_dim, 1)) 250 | 251 | self.db_branch_down = nn.Sequential( 252 | layers.ConvBN( 253 | out_dim, out_dim, 3, stride=2), 254 | nn.AvgPool2D( 255 | kernel_size=3, stride=2, padding=1)) 256 | 257 | self.sb_branch_keep = nn.Sequential( 258 | layers.DepthwiseConvBN(out_dim, out_dim, 3), 259 | nn.Conv2D(out_dim, out_dim, 1), 260 | layers.Activation(act='sigmoid')) 261 | 262 | self.sb_branch_up = layers.ConvBN(out_dim, out_dim, 3) 263 | 264 | self.conv = layers.ConvBN(out_dim, out_dim, 3) 265 | 266 | def forward(self, dfm, sfm): 267 | db_feat_keep = self.db_branch_keep(dfm) 268 | db_feat_down = self.db_branch_down(dfm) 269 | sb_feat_keep = self.sb_branch_keep(sfm) 270 | 271 | sb_feat_up = self.sb_branch_up(sfm) 272 | sb_feat_up = F.interpolate( 273 | sb_feat_up, 274 | paddle.shape(db_feat_keep)[2:], 275 | mode='bilinear', 276 | align_corners=self.align_corners) 277 | 278 | sb_feat_up = F.sigmoid(sb_feat_up) 279 | db_feat = db_feat_keep * sb_feat_up 280 | 281 | sb_feat = db_feat_down * sb_feat_keep 282 | sb_feat = F.interpolate( 283 | sb_feat, 284 | paddle.shape(db_feat)[2:], 285 | mode='bilinear', 286 | align_corners=self.align_corners) 287 | 288 | return self.conv(db_feat + sb_feat) 289 | 290 | 291 | class SegHead(nn.Layer): 292 | def __init__(self, in_dim, mid_dim, num_classes): 293 | super().__init__() 294 | 295 | self.conv_3x3 = nn.Sequential( 296 | layers.ConvBNReLU(in_dim, mid_dim, 3), nn.Dropout(0.1)) 297 | 298 | self.conv_1x1 = nn.Conv2D(mid_dim, num_classes, 1, 1) 299 | 300 | def forward(self, x): 301 | conv1 = self.conv_3x3(x) 302 | conv2 = self.conv_1x1(conv1) 303 | return conv2 304 | -------------------------------------------------------------------------------- /models/ddrnet.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | 5 | from paddleseg.cvlibs import manager, param_init 6 | from paddleseg.models import layers 7 | from paddleseg.utils import utils 8 | 9 | 10 | class DualResNet(nn.Layer): 11 | """ 12 | The DDRNet implementation based on PaddlePaddle. 13 | 14 | The original article refers to 15 | Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes" 16 | (https://arxiv.org/abs/2101.06085) 17 | 18 | Args: 19 | num_classes (int): The unique number of target classes. 20 | in_channels (int, optional): Number of input channels. Default: 3. 21 | block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2]. 22 | planes (int): Base channels in network. Default: 64. 23 | spp_planes (int): Branch channels for DAPPM. Default: 128. 24 | head_planes (int): Mid channels of segmentation head. Default: 128. 25 | enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False. 26 | pretrained (str, optional): The path or url of pretrained model. Default: None. 27 | """ 28 | 29 | def __init__(self, 30 | num_classes, 31 | in_channels=3, 32 | block_layers=[2, 2, 2, 2], 33 | planes=64, 34 | spp_planes=128, 35 | head_planes=128, 36 | enable_auxiliary_loss=False, 37 | pretrained=None): 38 | super().__init__() 39 | highres_planes = planes * 2 40 | self.enable_auxiliary_loss = enable_auxiliary_loss 41 | self.conv1 = nn.Sequential( 42 | layers.ConvBNReLU( 43 | in_channels, planes, kernel_size=3, stride=2, padding=1), 44 | layers.ConvBNReLU( 45 | planes, planes, kernel_size=3, stride=2, padding=1), ) 46 | self.relu = nn.ReLU() 47 | self.layer1 = self._make_layers(BasicBlock, planes, planes, 48 | block_layers[0]) 49 | self.layer2 = self._make_layers( 50 | BasicBlock, planes, planes * 2, block_layers[1], stride=2) 51 | self.layer3 = self._make_layers( 52 | BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2) 53 | self.layer4 = self._make_layers( 54 | BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2) 55 | 56 | self.compression3 = layers.ConvBN( 57 | planes * 4, highres_planes, kernel_size=1, bias_attr=False) 58 | 59 | self.compression4 = layers.ConvBN( 60 | planes * 8, highres_planes, kernel_size=1, bias_attr=False) 61 | 62 | self.down3 = layers.ConvBN( 63 | highres_planes, 64 | planes * 4, 65 | kernel_size=3, 66 | stride=2, 67 | bias_attr=False) 68 | 69 | self.down4 = nn.Sequential( 70 | layers.ConvBNReLU( 71 | highres_planes, 72 | planes * 4, 73 | kernel_size=3, 74 | stride=2, 75 | padding=1, 76 | bias_attr=False), 77 | layers.ConvBN( 78 | planes * 4, 79 | planes * 8, 80 | kernel_size=3, 81 | stride=2, 82 | padding=1, 83 | bias_attr=False)) 84 | 85 | self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes, 86 | 2) 87 | self.layer4_ = self._make_layers(BasicBlock, highres_planes, 88 | highres_planes, 2) 89 | self.layer5_ = self._make_layers(Bottleneck, highres_planes, 90 | highres_planes, 1) 91 | self.layer5 = self._make_layers( 92 | Bottleneck, planes * 8, planes * 8, 1, stride=2) 93 | 94 | self.spp = DAPPM(planes * 16, spp_planes, planes * 4) 95 | if self.enable_auxiliary_loss: 96 | self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes) 97 | self.head = DDRNetHead(planes * 4, head_planes, num_classes) 98 | 99 | self.pretrained = pretrained 100 | self.init_weight() 101 | 102 | def init_weight(self): 103 | if self.pretrained is not None: 104 | utils.load_entire_model(self, self.pretrained) 105 | else: 106 | for m in self.sublayers(): 107 | if isinstance(m, nn.Conv2D): 108 | param_init.kaiming_normal_init(m.weight) 109 | elif isinstance(m, nn.BatchNorm2D): 110 | param_init.constant_init(m.weight, value=1) 111 | param_init.constant_init(m.bias, value=0) 112 | 113 | def _make_layers(self, block, inplanes, planes, blocks, stride=1): 114 | downsample = None 115 | if stride != 1 or inplanes != planes * block.expansion: 116 | downsample = nn.Sequential( 117 | nn.Conv2D( 118 | inplanes, 119 | planes * block.expansion, 120 | kernel_size=1, 121 | stride=stride, 122 | bias_attr=False), 123 | nn.BatchNorm2D(planes * block.expansion), ) 124 | layers = [] 125 | layers.append(block(inplanes, planes, stride, downsample)) 126 | inplanes = planes * block.expansion 127 | for i in range(1, blocks): 128 | if i == (blocks - 1): 129 | layers.append(block(inplanes, planes, stride=1, no_relu=True)) 130 | else: 131 | layers.append(block(inplanes, planes, stride=1, no_relu=False)) 132 | return nn.Sequential(*layers) 133 | 134 | def forward(self, x): 135 | n, c, h, w = paddle.shape(x) 136 | width_output = w // 8 137 | height_output = h // 8 138 | 139 | x = self.conv1(x) 140 | stage1_out = self.layer1(x) 141 | stage2_out = self.layer2(self.relu(stage1_out)) 142 | stage3_out = self.layer3(self.relu(stage2_out)) 143 | stage3_out_dual = self.layer3_(self.relu(stage2_out)) 144 | x = stage3_out + self.down3(self.relu(stage3_out_dual)) 145 | stage3_merge = stage3_out_dual + F.interpolate( 146 | self.compression3(self.relu(stage3_out)), 147 | size=[height_output, width_output], 148 | mode='bilinear') 149 | 150 | stage4_out = self.layer4(self.relu(x)) 151 | stage4_out_dual = self.layer4_(self.relu(stage3_merge)) 152 | 153 | x = stage4_out + self.down4(self.relu(stage4_out_dual)) 154 | stage4_merge = stage4_out_dual + F.interpolate( 155 | self.compression4(self.relu(stage4_out)), 156 | size=[height_output, width_output], 157 | mode='bilinear') 158 | 159 | stage5_out_dual = self.layer5_(self.relu(stage4_merge)) 160 | x = F.interpolate( 161 | self.spp(self.layer5(self.relu(x))), 162 | size=[height_output, width_output], 163 | mode='bilinear') 164 | 165 | output = self.head(x + stage5_out_dual) 166 | logit_list = [] 167 | logit_list.append(output) 168 | 169 | if self.enable_auxiliary_loss: 170 | aux_out = self.aux_head(stage3_merge) 171 | logit_list.append(aux_out) 172 | return [ 173 | F.interpolate( 174 | logit, [h, w], mode='bilinear') for logit in logit_list 175 | ] 176 | 177 | 178 | class BasicBlock(nn.Layer): 179 | expansion = 1 180 | 181 | def __init__(self, 182 | inplanes, 183 | planes, 184 | stride=1, 185 | downsample=None, 186 | no_relu=False): 187 | super().__init__() 188 | self.conv_bn_relu = layers.ConvBNReLU( 189 | inplanes, 190 | planes, 191 | kernel_size=3, 192 | stride=stride, 193 | padding=1, 194 | bias_attr=False) 195 | self.relu = nn.ReLU() 196 | self.conv_bn = layers.ConvBN( 197 | planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False) 198 | self.downsample = downsample 199 | self.stride = stride 200 | self.no_relu = no_relu 201 | 202 | def forward(self, x): 203 | residual = x 204 | out = self.conv_bn_relu(x) 205 | out = self.conv_bn(out) 206 | if self.downsample is not None: 207 | residual = self.downsample(x) 208 | out += residual 209 | if self.no_relu: 210 | return out 211 | else: 212 | return self.relu(out) 213 | 214 | 215 | class Bottleneck(nn.Layer): 216 | expansion = 2 217 | 218 | def __init__(self, 219 | inplanes, 220 | planes, 221 | stride=1, 222 | downsample=None, 223 | no_relu=True): 224 | super().__init__() 225 | self.conv_bn_relu1 = layers.ConvBNReLU( 226 | inplanes, planes, kernel_size=1, bias_attr=False) 227 | self.conv_bn_relu2 = layers.ConvBNReLU( 228 | planes, 229 | planes, 230 | kernel_size=3, 231 | stride=stride, 232 | padding=1, 233 | bias_attr=False) 234 | self.conv_bn = layers.ConvBN( 235 | planes, planes * self.expansion, kernel_size=1, bias_attr=False) 236 | self.relu = nn.ReLU() 237 | self.downsample = downsample 238 | self.stride = stride 239 | self.no_relu = no_relu 240 | 241 | def forward(self, x): 242 | residual = x 243 | out = self.conv_bn_relu1(x) 244 | out = self.conv_bn_relu2(out) 245 | out = self.conv_bn(out) 246 | if self.downsample is not None: 247 | residual = self.downsample(x) 248 | out += residual 249 | if self.no_relu: 250 | return out 251 | else: 252 | return self.relu(out) 253 | 254 | 255 | class DAPPM(nn.Layer): 256 | def __init__(self, inplanes, branch_planes, outplanes): 257 | super().__init__() 258 | self.scale1 = nn.Sequential( 259 | nn.AvgPool2D( 260 | kernel_size=5, stride=2, padding=2), 261 | layers.SyncBatchNorm(inplanes), 262 | nn.ReLU(), 263 | nn.Conv2D( 264 | inplanes, branch_planes, kernel_size=1, bias_attr=False), ) 265 | self.scale2 = nn.Sequential( 266 | nn.AvgPool2D( 267 | kernel_size=9, stride=4, padding=4), 268 | layers.SyncBatchNorm(inplanes), 269 | nn.ReLU(), 270 | nn.Conv2D( 271 | inplanes, branch_planes, kernel_size=1, bias_attr=False), ) 272 | self.scale3 = nn.Sequential( 273 | nn.AvgPool2D( 274 | kernel_size=17, stride=8, padding=8), 275 | layers.SyncBatchNorm(inplanes), 276 | nn.ReLU(), 277 | nn.Conv2D( 278 | inplanes, branch_planes, kernel_size=1, bias_attr=False), ) 279 | self.scale4 = nn.Sequential( 280 | nn.AdaptiveAvgPool2D((1, 1)), 281 | layers.SyncBatchNorm(inplanes), 282 | nn.ReLU(), 283 | nn.Conv2D( 284 | inplanes, branch_planes, kernel_size=1, bias_attr=False), ) 285 | self.scale0 = nn.Sequential( 286 | layers.SyncBatchNorm(inplanes), 287 | nn.ReLU(), 288 | nn.Conv2D( 289 | inplanes, branch_planes, kernel_size=1, bias_attr=False), ) 290 | self.process1 = nn.Sequential( 291 | layers.SyncBatchNorm(branch_planes), 292 | nn.ReLU(), 293 | nn.Conv2D( 294 | branch_planes, 295 | branch_planes, 296 | kernel_size=3, 297 | padding=1, 298 | bias_attr=False), ) 299 | self.process2 = nn.Sequential( 300 | layers.SyncBatchNorm(branch_planes), 301 | nn.ReLU(), 302 | nn.Conv2D( 303 | branch_planes, 304 | branch_planes, 305 | kernel_size=3, 306 | padding=1, 307 | bias_attr=False), ) 308 | self.process3 = nn.Sequential( 309 | layers.SyncBatchNorm(branch_planes), 310 | nn.ReLU(), 311 | nn.Conv2D( 312 | branch_planes, 313 | branch_planes, 314 | kernel_size=3, 315 | padding=1, 316 | bias_attr=False), ) 317 | self.process4 = nn.Sequential( 318 | layers.SyncBatchNorm(branch_planes), 319 | nn.ReLU(), 320 | nn.Conv2D( 321 | branch_planes, 322 | branch_planes, 323 | kernel_size=3, 324 | padding=1, 325 | bias_attr=False), ) 326 | self.compression = nn.Sequential( 327 | layers.SyncBatchNorm(branch_planes * 5), 328 | nn.ReLU(), 329 | nn.Conv2D( 330 | branch_planes * 5, outplanes, kernel_size=1, bias_attr=False)) 331 | self.shortcut = nn.Sequential( 332 | layers.SyncBatchNorm(inplanes), 333 | nn.ReLU(), 334 | nn.Conv2D( 335 | inplanes, outplanes, kernel_size=1, bias_attr=False)) 336 | 337 | def forward(self, x): 338 | n, c, h, w = paddle.shape(x) 339 | x0 = self.scale0(x) 340 | x1 = self.process1( 341 | F.interpolate( 342 | self.scale1(x), size=[h, w], mode='bilinear') + x0) 343 | x2 = self.process2( 344 | F.interpolate( 345 | self.scale2(x), size=[h, w], mode='bilinear') + x1) 346 | x3 = self.process3( 347 | F.interpolate( 348 | self.scale3(x), size=[h, w], mode='bilinear') + x2) 349 | x4 = self.process4( 350 | F.interpolate( 351 | self.scale4(x), size=[h, w], mode='bilinear') + x3) 352 | 353 | out = self.compression(paddle.concat([x0, x1, x2, x3, x4], 354 | 1)) + self.shortcut(x) 355 | return out 356 | 357 | 358 | class DDRNetHead(nn.Layer): 359 | def __init__(self, inplanes, interplanes, outplanes, scale_factor=None): 360 | super().__init__() 361 | self.bn1 = nn.BatchNorm2D(inplanes) 362 | self.relu = nn.ReLU() 363 | self.conv_bn_relu = layers.ConvBNReLU( 364 | inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False) 365 | self.conv = nn.Conv2D( 366 | interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True) 367 | 368 | self.scale_factor = scale_factor 369 | 370 | def forward(self, x): 371 | x = self.bn1(x) 372 | x = self.relu(x) 373 | x = self.conv_bn_relu(x) 374 | out = self.conv(x) 375 | 376 | if self.scale_factor is not None: 377 | out = F.interpolate( 378 | out, scale_factor=self.scale_factor, mode='bilinear') 379 | return out 380 | 381 | 382 | @manager.MODELS.add_component 383 | def DDRNet_23(**kwargs): 384 | return DualResNet( 385 | block_layers=[2, 2, 2, 2], 386 | planes=64, 387 | spp_planes=128, 388 | head_planes=128, 389 | **kwargs) 390 | -------------------------------------------------------------------------------- /models/deeplabv3p.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | 5 | from paddleseg.cvlibs import manager 6 | from paddleseg.models import layers 7 | from paddleseg.utils import utils 8 | 9 | __all__ = ['DeepLabV3P', 'DeepLabV3'] 10 | 11 | 12 | @manager.MODELS.add_component 13 | class DeepLabV3P(nn.Layer): 14 | """ 15 | The DeepLabV3Plus implementation based on PaddlePaddle. 16 | 17 | The original article refers to 18 | Liang-Chieh Chen, et, al. "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation" 19 | (https://arxiv.org/abs/1802.02611) 20 | 21 | Args: 22 | num_classes (int): The unique number of target classes. 23 | backbone (paddle.nn.Layer): Backbone network, currently support Resnet50_vd/Resnet101_vd/Xception65. 24 | backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. 25 | Default: (0, 3). 26 | aspp_ratios (tuple, optional): The dilation rate using in ASSP module. 27 | If output_stride=16, aspp_ratios should be set as (1, 6, 12, 18). 28 | If output_stride=8, aspp_ratios is (1, 12, 24, 36). 29 | Default: (1, 6, 12, 18). 30 | aspp_out_channels (int, optional): The output channels of ASPP module. Default: 256. 31 | align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, 32 | e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. 33 | pretrained (str, optional): The path or url of pretrained model. Default: None. 34 | data_format(str, optional): Data format that specifies the layout of input. It can be "NCHW" or "NHWC". Default: "NCHW". 35 | """ 36 | 37 | def __init__(self, 38 | num_classes, 39 | backbone, 40 | backbone_indices=(0, 3), 41 | aspp_ratios=(1, 6, 12, 18), 42 | aspp_out_channels=256, 43 | align_corners=False, 44 | pretrained=None, 45 | data_format="NCHW"): 46 | super().__init__() 47 | 48 | self.backbone = backbone 49 | backbone_channels = [ 50 | backbone.feat_channels[i] for i in backbone_indices 51 | ] 52 | 53 | self.head = DeepLabV3PHead( 54 | num_classes, 55 | backbone_indices, 56 | backbone_channels, 57 | aspp_ratios, 58 | aspp_out_channels, 59 | align_corners, 60 | data_format=data_format) 61 | 62 | self.align_corners = align_corners 63 | self.pretrained = pretrained 64 | self.data_format = data_format 65 | self.init_weight() 66 | 67 | def forward(self, x): 68 | feat_list = self.backbone(x) 69 | logit_list = self.head(feat_list) 70 | if self.data_format == 'NCHW': 71 | ori_shape = paddle.shape(x)[2:] 72 | else: 73 | ori_shape = paddle.shape(x)[1:3] 74 | return [ 75 | F.interpolate( 76 | logit, 77 | ori_shape, 78 | mode='bilinear', 79 | align_corners=self.align_corners, 80 | data_format=self.data_format) for logit in logit_list 81 | ] 82 | 83 | def init_weight(self): 84 | if self.pretrained is not None: 85 | utils.load_entire_model(self, self.pretrained) 86 | 87 | 88 | class DeepLabV3PHead(nn.Layer): 89 | """ 90 | The DeepLabV3PHead implementation based on PaddlePaddle. 91 | 92 | Args: 93 | num_classes (int): The unique number of target classes. 94 | backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone. 95 | the first index will be taken as a low-level feature in Decoder component; 96 | the second one will be taken as input of ASPP component. 97 | Usually backbone consists of four downsampling stage, and return an output of 98 | each stage. If we set it as (0, 3), it means taking feature map of the first 99 | stage in backbone as low-level feature used in Decoder, and feature map of the fourth 100 | stage as input of ASPP. 101 | backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index. 102 | aspp_ratios (tuple): The dilation rates using in ASSP module. 103 | aspp_out_channels (int): The output channels of ASPP module. 104 | align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature 105 | is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. 106 | data_format(str, optional): Data format that specifies the layout of input. It can be "NCHW" or "NHWC". Default: "NCHW". 107 | """ 108 | 109 | def __init__(self, 110 | num_classes, 111 | backbone_indices, 112 | backbone_channels, 113 | aspp_ratios, 114 | aspp_out_channels, 115 | align_corners, 116 | data_format='NCHW'): 117 | super().__init__() 118 | 119 | self.aspp = layers.ASPPModule( 120 | aspp_ratios, 121 | backbone_channels[1], 122 | aspp_out_channels, 123 | align_corners, 124 | use_sep_conv=True, 125 | image_pooling=True, 126 | data_format=data_format) 127 | self.decoder = Decoder( 128 | num_classes, 129 | backbone_channels[0], 130 | align_corners, 131 | data_format=data_format) 132 | self.backbone_indices = backbone_indices 133 | 134 | def forward(self, feat_list): 135 | logit_list = [] 136 | low_level_feat = feat_list[self.backbone_indices[0]] 137 | x = feat_list[self.backbone_indices[1]] 138 | x = self.aspp(x) 139 | logit = self.decoder(x, low_level_feat) 140 | logit_list.append(logit) 141 | 142 | return logit_list 143 | 144 | 145 | @manager.MODELS.add_component 146 | class DeepLabV3(nn.Layer): 147 | """ 148 | The DeepLabV3 implementation based on PaddlePaddle. 149 | 150 | The original article refers to 151 | Liang-Chieh Chen, et, al. "Rethinking Atrous Convolution for Semantic Image Segmentation" 152 | (https://arxiv.org/pdf/1706.05587.pdf). 153 | 154 | Args: 155 | Please Refer to DeepLabV3P above. 156 | """ 157 | 158 | def __init__(self, 159 | num_classes, 160 | backbone, 161 | backbone_indices=(3, ), 162 | aspp_ratios=(1, 6, 12, 18), 163 | aspp_out_channels=256, 164 | align_corners=False, 165 | pretrained=None): 166 | super().__init__() 167 | 168 | self.backbone = backbone 169 | backbone_channels = [ 170 | backbone.feat_channels[i] for i in backbone_indices 171 | ] 172 | 173 | self.head = DeepLabV3Head(num_classes, backbone_indices, 174 | backbone_channels, aspp_ratios, 175 | aspp_out_channels, align_corners) 176 | self.align_corners = align_corners 177 | self.pretrained = pretrained 178 | self.init_weight() 179 | 180 | def forward(self, x): 181 | feat_list = self.backbone(x) 182 | logit_list = self.head(feat_list) 183 | return [ 184 | F.interpolate( 185 | logit, 186 | paddle.shape(x)[2:], 187 | mode='bilinear', 188 | align_corners=self.align_corners) for logit in logit_list 189 | ] 190 | 191 | def init_weight(self): 192 | if self.pretrained is not None: 193 | utils.load_entire_model(self, self.pretrained) 194 | 195 | 196 | class DeepLabV3Head(nn.Layer): 197 | """ 198 | The DeepLabV3Head implementation based on PaddlePaddle. 199 | 200 | Args: 201 | Please Refer to DeepLabV3PHead above. 202 | """ 203 | 204 | def __init__(self, num_classes, backbone_indices, backbone_channels, 205 | aspp_ratios, aspp_out_channels, align_corners): 206 | super().__init__() 207 | 208 | self.aspp = layers.ASPPModule( 209 | aspp_ratios, 210 | backbone_channels[0], 211 | aspp_out_channels, 212 | align_corners, 213 | use_sep_conv=False, 214 | image_pooling=True) 215 | 216 | self.cls = nn.Conv2D( 217 | in_channels=aspp_out_channels, 218 | out_channels=num_classes, 219 | kernel_size=1) 220 | 221 | self.backbone_indices = backbone_indices 222 | 223 | def forward(self, feat_list): 224 | logit_list = [] 225 | x = feat_list[self.backbone_indices[0]] 226 | x = self.aspp(x) 227 | logit = self.cls(x) 228 | logit_list.append(logit) 229 | 230 | return logit_list 231 | 232 | 233 | class Decoder(nn.Layer): 234 | """ 235 | Decoder module of DeepLabV3P model 236 | 237 | Args: 238 | num_classes (int): The number of classes. 239 | in_channels (int): The number of input channels in decoder module. 240 | """ 241 | 242 | def __init__(self, 243 | num_classes, 244 | in_channels, 245 | align_corners, 246 | data_format='NCHW'): 247 | super(Decoder, self).__init__() 248 | 249 | self.data_format = data_format 250 | self.conv_bn_relu1 = layers.ConvBNReLU( 251 | in_channels=in_channels, 252 | out_channels=48, 253 | kernel_size=1, 254 | data_format=data_format) 255 | 256 | self.conv_bn_relu2 = layers.SeparableConvBNReLU( 257 | in_channels=304, 258 | out_channels=256, 259 | kernel_size=3, 260 | padding=1, 261 | data_format=data_format) 262 | self.conv_bn_relu3 = layers.SeparableConvBNReLU( 263 | in_channels=256, 264 | out_channels=256, 265 | kernel_size=3, 266 | padding=1, 267 | data_format=data_format) 268 | self.conv = nn.Conv2D( 269 | in_channels=256, 270 | out_channels=num_classes, 271 | kernel_size=1, 272 | data_format=data_format) 273 | 274 | self.align_corners = align_corners 275 | 276 | def forward(self, x, low_level_feat): 277 | low_level_feat = self.conv_bn_relu1(low_level_feat) 278 | if self.data_format == 'NCHW': 279 | low_level_shape = paddle.shape(low_level_feat)[-2:] 280 | axis = 1 281 | else: 282 | low_level_shape = paddle.shape(low_level_feat)[1:3] 283 | axis = -1 284 | x = F.interpolate( 285 | x, 286 | low_level_shape, 287 | mode='bilinear', 288 | align_corners=self.align_corners, 289 | data_format=self.data_format) 290 | x = paddle.concat([x, low_level_feat], axis=axis) 291 | x = self.conv_bn_relu2(x) 292 | x = self.conv_bn_relu3(x) 293 | x = self.conv(x) 294 | return x 295 | -------------------------------------------------------------------------------- /models/hrnet_w18_s.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | from paddleseg.models.backbones.hrnet import HRNet_W18 11 | 12 | 13 | 14 | class SegHead(nn.Layer): 15 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 16 | super(SegHead, self).__init__() 17 | self.bn1 = nn.BatchNorm2D(inplanes) 18 | self.relu = nn.ReLU() 19 | if aux_head: 20 | self.con_bn_relu = nn.Sequential( 21 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 22 | nn.BatchNorm2D(interplanes), 23 | nn.ReLU(), 24 | ) 25 | else: 26 | self.con_bn_relu = nn.Sequential( 27 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 28 | nn.BatchNorm2D(interplanes), 29 | nn.ReLU(), 30 | ) 31 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 32 | 33 | 34 | def forward(self, x): 35 | x = self.bn1(x) 36 | x = self.relu(x) 37 | x = self.con_bn_relu(x) 38 | out = self.conv(x) 39 | return out 40 | 41 | @manager.MODELS.add_component 42 | class HRNet_W18_S(nn.Layer): 43 | def __init__(self, 44 | num_classes=2, 45 | ): 46 | super().__init__() 47 | self.hrnet = HRNet_W18() 48 | self.head = SegHead(inplanes=270, interplanes=64, outplanes=num_classes) 49 | 50 | def forward(self, x): 51 | logits = [] 52 | h, w = paddle.shape(x)[2:] 53 | x = self.hrnet(x) 54 | x = x[0] 55 | x = self.head(x) 56 | logits.append(x) 57 | logits = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logits] 58 | return logits 59 | 60 | 61 | 62 | def init_weights(self): 63 | for m in self.sublayers(): 64 | if isinstance(m, nn.Conv2D): 65 | param_init.kaiming_normal_init(m.weight) 66 | elif isinstance(m, nn.BatchNorm2D): 67 | param_init.constant_init(m.weight, value=1) 68 | param_init.constant_init(m.bias, value=0) 69 | 70 | 71 | 72 | 73 | # if __name__ == '__main__': 74 | # model = HRNet_W18_S() 75 | # input = paddle.rand([1, 3, 400, 400]) 76 | # output = model(input) 77 | # print(len(output)) 78 | # for o in output: 79 | # print(o.shape) -------------------------------------------------------------------------------- /models/hrsegb64.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | 11 | 12 | 13 | 14 | # features 15 | # 1. The size of the high-resolution path remains constant throughout the process 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 17 | # the high-resolution path remains unchanged 18 | # 3. We use multiple segmentation heads, two of which are auxiliary segmentation 19 | # heads used for auxiliary loss during training 20 | # 4. The seg head is performed in two steps, instead of restoring to the 21 | # original resolution all at once 22 | 23 | # If you need to use this model with paddleseg, you need to add it to the model library 24 | # using manager.MODELS.add_component() 25 | 26 | """ 27 | Removed seghead and retained backbone 28 | """ 29 | @manager.BACKBONES.add_component 30 | class HrSegB64(nn.Layer): 31 | """ 32 | The HrSegNet implementation based on PaddlePaddle.s 33 | 34 | Args: 35 | num_classes (int): The unique number of target classes. 36 | 37 | in_channels (int, optional): The channels of input image. Default: 3. 38 | 39 | base (int, optional): The base channel number of the model. Default: 48. 40 | """ 41 | def __init__(self, 42 | in_channels=3, # input channel 43 | base=64, # base channel of the model, 44 | num_classes=2 # number of classes 45 | ): 46 | super(HrSegB64, self).__init__() 47 | self.base = base 48 | self.num_classed = num_classes 49 | # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features. 50 | # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively 51 | self.stage1 = nn.Sequential( 52 | nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1), 53 | nn.BatchNorm2D(base // 2), 54 | nn.ReLU(), 55 | ) 56 | self.stage2 = nn.Sequential( 57 | nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1), 58 | nn.BatchNorm2D(base), 59 | nn.ReLU(), 60 | ) 61 | 62 | self.seg1 = SegBlock(base=base, stage_index=1) 63 | self.seg2 = SegBlock(base=base, stage_index=2) 64 | self.seg3 = SegBlock(base=base, stage_index=3) 65 | 66 | # self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 67 | # self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 68 | # self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes) 69 | 70 | self.feat_channels = [base] 71 | self.init_weight() 72 | 73 | def forward(self, x): 74 | logit_list = [] 75 | h, w = paddle.shape(x)[2:] 76 | # aux_head only used in training 77 | stem1_out = self.stage1(x) 78 | stem2_out = self.stage2(stem1_out) 79 | hrseg1_out = self.seg1(stem2_out) 80 | hrseg2_out = self.seg2(hrseg1_out) 81 | hrseg3_out = self.seg3(hrseg2_out) 82 | logit_list.append(hrseg3_out) 83 | return logit_list 84 | 85 | 86 | 87 | def init_weight(self): 88 | for m in self.sublayers(): 89 | if isinstance(m, nn.Conv2D): 90 | param_init.kaiming_normal_init(m.weight) 91 | elif isinstance(m, nn.BatchNorm2D): 92 | param_init.constant_init(m.weight, value=1) 93 | param_init.constant_init(m.bias, value=0) 94 | 95 | 96 | 97 | class SegBlock(nn.Layer): 98 | def __init__(self, 99 | base=32, 100 | stage_index=1):# stage_index=1,2,3. 101 | super(SegBlock, self).__init__() 102 | 103 | # Convolutional layer for high-resolution paths with constant spatial resolution and constant channel 104 | self.h_conv1 = nn.Sequential( 105 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 106 | nn.BatchNorm2D(base), 107 | nn.ReLU() 108 | ) 109 | self.h_conv2 = nn.Sequential( 110 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 111 | nn.BatchNorm2D(base), 112 | nn.ReLU() 113 | ) 114 | self.h_conv3 = nn.Sequential( 115 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 116 | nn.BatchNorm2D(base), 117 | nn.ReLU() 118 | ) 119 | 120 | # sematic guidance path/low-resolution path 121 | if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2 122 | self.l_conv1 = nn.Sequential( 123 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 124 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 125 | nn.ReLU() 126 | ) 127 | elif stage_index==2: #second stage 128 | self.l_conv1 = nn.Sequential( 129 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 130 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 131 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 132 | nn.ReLU() 133 | ) 134 | elif stage_index==3: 135 | self.l_conv1 = nn.Sequential( 136 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 137 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 138 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 139 | nn.ReLU(), 140 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 141 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 142 | nn.ReLU() 143 | ) 144 | else: 145 | raise ValueError("stage_index must be 1, 2 or 3") 146 | self.l_conv2 = nn.Sequential( 147 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 148 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 149 | nn.ReLU() 150 | ) 151 | self.l_conv3 = nn.Sequential( 152 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 153 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 154 | nn.ReLU() 155 | ) 156 | 157 | self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 158 | self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 159 | self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 160 | 161 | 162 | 163 | def forward(self, x): 164 | # out = [] 165 | # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x)))) 166 | # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x)))) 167 | size = x.shape[2:] 168 | out_h1 = self.h_conv1(x) # high resolution path 169 | out_l1 = self.l_conv1(x) # low resolution path 170 | # print(out_l1.shape) 171 | out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample 172 | out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high 173 | 174 | out_h2 = self.h_conv2(out_hl1) 175 | out_l2 = self.l_conv2(out_l1) 176 | # print(out_l2.shape) 177 | out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True) 178 | out_hl2 = self.l2h_conv2(out_l2_i) + out_h2 179 | 180 | out_h3 = self.h_conv3(out_hl2) 181 | out_l3 = self.l_conv3(out_l2) 182 | # print(out_l3.shape) 183 | out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True) 184 | out_hl3 = self.l2h_conv3(out_l3_i) + out_h3 185 | return out_hl3 186 | 187 | # seg head 188 | class SegHead(nn.Layer): 189 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 190 | super(SegHead, self).__init__() 191 | self.bn1 = nn.BatchNorm2D(inplanes) 192 | self.relu = nn.ReLU() 193 | if aux_head: 194 | self.con_bn_relu = nn.Sequential( 195 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 196 | nn.BatchNorm2D(interplanes), 197 | nn.ReLU(), 198 | ) 199 | else: 200 | self.con_bn_relu = nn.Sequential( 201 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 202 | nn.BatchNorm2D(interplanes), 203 | nn.ReLU(), 204 | ) 205 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 206 | 207 | 208 | def forward(self, x): 209 | x = self.bn1(x) 210 | x = self.relu(x) 211 | x = self.con_bn_relu(x) 212 | out = self.conv(x) 213 | return out 214 | 215 | 216 | 217 | # if __name__ == "__main__": 218 | # model = HrSegNet() 219 | # x = paddle.randn([1, 3, 400, 400]) 220 | # out = model(x) 221 | # print(out[0].shape) 222 | 223 | # paddle.flops(model, input_size=(1, 3, 400, 400)) 224 | 225 | -------------------------------------------------------------------------------- /models/hrsegnet_b16.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | 11 | 12 | """ 13 | This file is identical to the default HrSegNet, 14 | except for the modification of the parameter base 15 | """ 16 | 17 | # features 18 | # 1. The size of the high-resolution path remains constant throughout the process 19 | # 2. In order to reduce and flexibly control the computational cost, the channel of 20 | # the high-resolution path remains unchanged 21 | # 3. We use multiple segmentation heads, two of which are auxiliary segmentation 22 | # heads used for auxiliary loss during training 23 | # 4. The seg head is performed in two steps, instead of restoring to the 24 | # original resolution all at once 25 | 26 | # If you need to use this model with paddleseg, you need to add it to the model library 27 | # using manager.MODELS.add_component() 28 | @manager.MODELS.add_component 29 | class HrSegNetB16(nn.Layer): 30 | """ 31 | The HrSegNet implementation based on PaddlePaddle.s 32 | 33 | Args: 34 | num_classes (int): The unique number of target classes. 35 | 36 | in_channels (int, optional): The channels of input image. Default: 3. 37 | 38 | base (int, optional): The base channel number of the model. Default: 16. 39 | """ 40 | def __init__(self, 41 | in_channels=3, # input channel 42 | base=16, # base channel of the model, 43 | num_classes=2, # number of classes 44 | pretrained=None # pretrained model 45 | ): 46 | super(HrSegNetB16, self).__init__() 47 | self.base = base 48 | self.num_classed = num_classes 49 | self.pretrained = pretrained 50 | # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features. 51 | # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively 52 | self.stage1 = nn.Sequential( 53 | nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1), 54 | nn.BatchNorm2D(base // 2), 55 | nn.ReLU(), 56 | ) 57 | self.stage2 = nn.Sequential( 58 | nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1), 59 | nn.BatchNorm2D(base), 60 | nn.ReLU(), 61 | ) 62 | 63 | self.seg1 = SegBlock(base=base, stage_index=1) 64 | self.seg2 = SegBlock(base=base, stage_index=2) 65 | self.seg3 = SegBlock(base=base, stage_index=3) 66 | 67 | self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 68 | self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 69 | self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes) 70 | 71 | self.init_weight() 72 | 73 | def forward(self, x): 74 | logit_list = [] 75 | h, w = paddle.shape(x)[2:] 76 | # aux_head only used in training 77 | if self.training: 78 | stem1_out = self.stage1(x) 79 | stem2_out = self.stage2(stem1_out) 80 | hrseg1_out = self.seg1(stem2_out) 81 | hrseg2_out = self.seg2(hrseg1_out) 82 | hrseg3_out = self.seg3(hrseg2_out) 83 | last_out = self.head(hrseg3_out) 84 | seghead1_out = self.aux_head1(hrseg1_out) 85 | seghead2_out = self.aux_head2(hrseg2_out) 86 | logit_list = [last_out, seghead1_out, seghead2_out] 87 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 88 | return logit_list 89 | else: 90 | stem1_out = self.stage1(x) 91 | stem2_out = self.stage2(stem1_out) 92 | hrseg1_out = self.seg1(stem2_out) 93 | hrseg2_out = self.seg2(hrseg1_out) 94 | hrseg3_out = self.seg3(hrseg2_out) 95 | last_out = self.head(hrseg3_out) 96 | logit_list = [last_out] 97 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 98 | return logit_list 99 | 100 | 101 | def init_weight(self): 102 | if self.pretrained is not None: 103 | utils.load_entire_model(self, self.pretrained) 104 | else: 105 | for m in self.sublayers(): 106 | if isinstance(m, nn.Conv2D): 107 | param_init.kaiming_normal_init(m.weight) 108 | elif isinstance(m, nn.BatchNorm2D): 109 | param_init.constant_init(m.weight, value=1) 110 | param_init.constant_init(m.bias, value=0) 111 | 112 | 113 | 114 | class SegBlock(nn.Layer): 115 | def __init__(self, 116 | base=32, 117 | stage_index=1):# stage_index=1,2,3. 118 | super(SegBlock, self).__init__() 119 | 120 | # Convolutional layer for high-resolution paths with constant spatial resolution and constant channel 121 | self.h_conv1 = nn.Sequential( 122 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 123 | nn.BatchNorm2D(base), 124 | nn.ReLU() 125 | ) 126 | self.h_conv2 = nn.Sequential( 127 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 128 | nn.BatchNorm2D(base), 129 | nn.ReLU() 130 | ) 131 | self.h_conv3 = nn.Sequential( 132 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 133 | nn.BatchNorm2D(base), 134 | nn.ReLU() 135 | ) 136 | 137 | # sematic guidance path/low-resolution path 138 | if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2 139 | self.l_conv1 = nn.Sequential( 140 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 141 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 142 | nn.ReLU() 143 | ) 144 | elif stage_index==2: #second stage 145 | self.l_conv1 = nn.Sequential( 146 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 147 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 148 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 149 | nn.ReLU() 150 | ) 151 | elif stage_index==3: 152 | self.l_conv1 = nn.Sequential( 153 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 154 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 155 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 156 | nn.ReLU(), 157 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 158 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 159 | nn.ReLU() 160 | ) 161 | else: 162 | raise ValueError("stage_index must be 1, 2 or 3") 163 | self.l_conv2 = nn.Sequential( 164 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 165 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 166 | nn.ReLU() 167 | ) 168 | self.l_conv3 = nn.Sequential( 169 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 170 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 171 | nn.ReLU() 172 | ) 173 | 174 | self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 175 | self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 176 | self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 177 | 178 | 179 | 180 | def forward(self, x): 181 | # out = [] 182 | # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x)))) 183 | # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x)))) 184 | size = x.shape[2:] 185 | out_h1 = self.h_conv1(x) # high resolution path 186 | out_l1 = self.l_conv1(x) # low resolution path 187 | # print(out_l1.shape) 188 | out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample 189 | out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high 190 | 191 | out_h2 = self.h_conv2(out_hl1) 192 | out_l2 = self.l_conv2(out_l1) 193 | # print(out_l2.shape) 194 | out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True) 195 | out_hl2 = self.l2h_conv2(out_l2_i) + out_h2 196 | 197 | out_h3 = self.h_conv3(out_hl2) 198 | out_l3 = self.l_conv3(out_l2) 199 | # print(out_l3.shape) 200 | out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True) 201 | out_hl3 = self.l2h_conv3(out_l3_i) + out_h3 202 | return out_hl3 203 | 204 | # seg head 205 | class SegHead(nn.Layer): 206 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 207 | super(SegHead, self).__init__() 208 | self.bn1 = nn.BatchNorm2D(inplanes) 209 | self.relu = nn.ReLU() 210 | if aux_head: 211 | self.con_bn_relu = nn.Sequential( 212 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 213 | nn.BatchNorm2D(interplanes), 214 | nn.ReLU(), 215 | ) 216 | else: 217 | self.con_bn_relu = nn.Sequential( 218 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 219 | nn.BatchNorm2D(interplanes), 220 | nn.ReLU(), 221 | ) 222 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 223 | 224 | 225 | def forward(self, x): 226 | x = self.bn1(x) 227 | x = self.relu(x) 228 | x = self.con_bn_relu(x) 229 | out = self.conv(x) 230 | return out 231 | 232 | 233 | 234 | # if __name__ == "__main__": 235 | # model = HrSegNetB16() 236 | # x = paddle.randn([1, 3, 400, 400]) 237 | # out = model(x) 238 | # print(out[0].shape) 239 | 240 | # paddle.flops(model, input_size=(1, 3, 400, 400)) 241 | 242 | -------------------------------------------------------------------------------- /models/hrsegnet_b16_d4.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | 11 | 12 | """ 13 | This file is identical to the default HrSegNet, 14 | except for the modification of the parameter base 15 | """ 16 | 17 | # features 18 | # 1. The size of the high-resolution path remains constant throughout the process 19 | # 2. In order to reduce and flexibly control the computational cost, the channel of 20 | # the high-resolution path remains unchanged 21 | # 3. We use multiple segmentation heads, two of which are auxiliary segmentation 22 | # heads used for auxiliary loss during training 23 | # 4. The seg head is performed in two steps, instead of restoring to the 24 | # original resolution all at once 25 | 26 | # If you need to use this model with paddleseg, you need to add it to the model library 27 | # using manager.MODELS.add_component() 28 | @manager.MODELS.add_component 29 | class HrSegNetB16D4(nn.Layer): 30 | """ 31 | The HrSegNet implementation based on PaddlePaddle.s 32 | 33 | Args: 34 | num_classes (int): The unique number of target classes. 35 | 36 | in_channels (int, optional): The channels of input image. Default: 3. 37 | 38 | base (int, optional): The base channel number of the model. Default: 16. 39 | """ 40 | def __init__(self, 41 | in_channels=3, # input channel 42 | base=16, # base channel of the model, 43 | num_classes=2 # number of classes 44 | ): 45 | super(HrSegNetB16D4, self).__init__() 46 | self.base = base 47 | self.num_classed = num_classes 48 | # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features. 49 | # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively 50 | self.stage1 = nn.Sequential( 51 | nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1), 52 | nn.BatchNorm2D(base // 2), 53 | nn.ReLU(), 54 | ) 55 | self.stage2 = nn.Sequential( 56 | nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1), 57 | nn.BatchNorm2D(base), 58 | nn.ReLU(), 59 | ) 60 | 61 | self.seg1 = SegBlock(base=base, stage_index=1) 62 | self.seg2 = SegBlock(base=base, stage_index=2) 63 | self.seg3 = SegBlock(base=base, stage_index=3) 64 | 65 | self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 66 | self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 67 | self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes) 68 | 69 | self.init_weight() 70 | 71 | def forward(self, x): 72 | logit_list = [] 73 | h, w = paddle.shape(x)[2:] 74 | # aux_head only used in training 75 | if self.training: 76 | stem1_out = self.stage1(x) 77 | stem2_out = self.stage2(stem1_out) 78 | hrseg1_out = self.seg1(stem2_out) 79 | hrseg2_out = self.seg2(hrseg1_out) 80 | hrseg3_out = self.seg3(hrseg2_out) 81 | last_out = self.head(hrseg3_out) 82 | seghead1_out = self.aux_head1(hrseg1_out) 83 | seghead2_out = self.aux_head2(hrseg2_out) 84 | logit_list = [last_out, seghead1_out, seghead2_out] 85 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 86 | return logit_list 87 | else: 88 | stem1_out = self.stage1(x) 89 | stem2_out = self.stage2(stem1_out) 90 | hrseg1_out = self.seg1(stem2_out) 91 | hrseg2_out = self.seg2(hrseg1_out) 92 | hrseg3_out = self.seg3(hrseg2_out) 93 | last_out = self.head(hrseg3_out) 94 | logit_list = [last_out] 95 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 96 | return logit_list 97 | 98 | 99 | def init_weight(self): 100 | for m in self.sublayers(): 101 | if isinstance(m, nn.Conv2D): 102 | param_init.kaiming_normal_init(m.weight) 103 | elif isinstance(m, nn.BatchNorm2D): 104 | param_init.constant_init(m.weight, value=1) 105 | param_init.constant_init(m.bias, value=0) 106 | 107 | 108 | 109 | class SegBlock(nn.Layer): 110 | def __init__(self, 111 | base=32, 112 | stage_index=1):# stage_index=1,2,3. 113 | super(SegBlock, self).__init__() 114 | 115 | # Convolutional layer for high-resolution paths with constant spatial resolution and constant channel 116 | self.h_conv1 = nn.Sequential( 117 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 118 | nn.BatchNorm2D(base), 119 | nn.ReLU() 120 | ) 121 | self.h_conv2 = nn.Sequential( 122 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 123 | nn.BatchNorm2D(base), 124 | nn.ReLU() 125 | ) 126 | self.h_conv3 = nn.Sequential( 127 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 128 | nn.BatchNorm2D(base), 129 | nn.ReLU() 130 | ) 131 | 132 | self.h_conv4 = nn.Sequential( 133 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 134 | nn.BatchNorm2D(base), 135 | nn.ReLU() 136 | ) 137 | 138 | # sematic guidance path/low-resolution path 139 | if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2 140 | self.l_conv1 = nn.Sequential( 141 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 142 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 143 | nn.ReLU() 144 | ) 145 | elif stage_index==2: #second stage 146 | self.l_conv1 = nn.Sequential( 147 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 148 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 149 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 150 | nn.ReLU() 151 | ) 152 | elif stage_index==3: 153 | self.l_conv1 = nn.Sequential( 154 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 155 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 156 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 157 | nn.ReLU(), 158 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 159 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 160 | nn.ReLU() 161 | ) 162 | else: 163 | raise ValueError("stage_index must be 1, 2 or 3") 164 | self.l_conv2 = nn.Sequential( 165 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 166 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 167 | nn.ReLU() 168 | ) 169 | self.l_conv3 = nn.Sequential( 170 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 171 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 172 | nn.ReLU() 173 | ) 174 | 175 | self.l_conv4 = nn.Sequential( 176 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 177 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 178 | nn.ReLU() 179 | ) 180 | 181 | self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 182 | self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 183 | self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 184 | self.l2h_conv4 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 185 | 186 | 187 | 188 | def forward(self, x): 189 | # out = [] 190 | # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x)))) 191 | # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x)))) 192 | size = x.shape[2:] 193 | out_h1 = self.h_conv1(x) # high resolution path 194 | out_l1 = self.l_conv1(x) # low resolution path 195 | # print(out_l1.shape) 196 | out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample 197 | out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high 198 | 199 | out_h2 = self.h_conv2(out_hl1) 200 | out_l2 = self.l_conv2(out_l1) 201 | # print(out_l2.shape) 202 | out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True) 203 | out_hl2 = self.l2h_conv2(out_l2_i) + out_h2 204 | 205 | out_h3 = self.h_conv3(out_hl2) 206 | out_l3 = self.l_conv3(out_l2) 207 | # print(out_l3.shape) 208 | out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True) 209 | out_hl3 = self.l2h_conv3(out_l3_i) + out_h3 210 | 211 | out_h4 = self.h_conv4(out_hl3) 212 | out_l4 = self.l_conv4(out_l3) 213 | # print(out_l4.shape) 214 | out_l4_i = F.interpolate(out_l4, size=size, mode='bilinear', align_corners=True) 215 | out_hl4 = self.l2h_conv4(out_l4_i) + out_h4 216 | return out_hl4 217 | 218 | # seg head 219 | class SegHead(nn.Layer): 220 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 221 | super(SegHead, self).__init__() 222 | self.bn1 = nn.BatchNorm2D(inplanes) 223 | self.relu = nn.ReLU() 224 | if aux_head: 225 | self.con_bn_relu = nn.Sequential( 226 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 227 | nn.BatchNorm2D(interplanes), 228 | nn.ReLU(), 229 | ) 230 | else: 231 | self.con_bn_relu = nn.Sequential( 232 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 233 | nn.BatchNorm2D(interplanes), 234 | nn.ReLU(), 235 | ) 236 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 237 | 238 | 239 | def forward(self, x): 240 | x = self.bn1(x) 241 | x = self.relu(x) 242 | x = self.con_bn_relu(x) 243 | out = self.conv(x) 244 | return out 245 | 246 | 247 | 248 | # if __name__ == "__main__": 249 | # model = HrSegNetB16D4() 250 | # x = paddle.randn([1, 3, 400, 400]) 251 | # out = model(x) 252 | # print(out[0].shape) 253 | 254 | # paddle.flops(model, input_size=(1, 3, 400, 400)) 255 | 256 | -------------------------------------------------------------------------------- /models/hrsegnet_b16_d5.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | 11 | 12 | """ 13 | This file is identical to the default HrSegNet, 14 | except for the modification of the parameter base 15 | """ 16 | 17 | # features 18 | # 1. The size of the high-resolution path remains constant throughout the process 19 | # 2. In order to reduce and flexibly control the computational cost, the channel of 20 | # the high-resolution path remains unchanged 21 | # 3. We use multiple segmentation heads, two of which are auxiliary segmentation 22 | # heads used for auxiliary loss during training 23 | # 4. The seg head is performed in two steps, instead of restoring to the 24 | # original resolution all at once 25 | 26 | # If you need to use this model with paddleseg, you need to add it to the model library 27 | # using manager.MODELS.add_component() 28 | @manager.MODELS.add_component 29 | class HrSegNetB16D5(nn.Layer): 30 | """ 31 | The HrSegNet implementation based on PaddlePaddle.s 32 | 33 | Args: 34 | num_classes (int): The unique number of target classes. 35 | 36 | in_channels (int, optional): The channels of input image. Default: 3. 37 | 38 | base (int, optional): The base channel number of the model. Default: 16. 39 | """ 40 | def __init__(self, 41 | in_channels=3, # input channel 42 | base=16, # base channel of the model, 43 | num_classes=2 # number of classes 44 | ): 45 | super(HrSegNetB16D5, self).__init__() 46 | self.base = base 47 | self.num_classed = num_classes 48 | # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features. 49 | # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively 50 | self.stage1 = nn.Sequential( 51 | nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1), 52 | nn.BatchNorm2D(base // 2), 53 | nn.ReLU(), 54 | ) 55 | self.stage2 = nn.Sequential( 56 | nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1), 57 | nn.BatchNorm2D(base), 58 | nn.ReLU(), 59 | ) 60 | 61 | self.seg1 = SegBlock(base=base, stage_index=1) 62 | self.seg2 = SegBlock(base=base, stage_index=2) 63 | self.seg3 = SegBlock(base=base, stage_index=3) 64 | 65 | self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 66 | self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 67 | self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes) 68 | 69 | self.init_weight() 70 | 71 | def forward(self, x): 72 | logit_list = [] 73 | h, w = paddle.shape(x)[2:] 74 | # aux_head only used in training 75 | if self.training: 76 | stem1_out = self.stage1(x) 77 | stem2_out = self.stage2(stem1_out) 78 | hrseg1_out = self.seg1(stem2_out) 79 | hrseg2_out = self.seg2(hrseg1_out) 80 | hrseg3_out = self.seg3(hrseg2_out) 81 | last_out = self.head(hrseg3_out) 82 | seghead1_out = self.aux_head1(hrseg1_out) 83 | seghead2_out = self.aux_head2(hrseg2_out) 84 | logit_list = [last_out, seghead1_out, seghead2_out] 85 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 86 | return logit_list 87 | else: 88 | stem1_out = self.stage1(x) 89 | stem2_out = self.stage2(stem1_out) 90 | hrseg1_out = self.seg1(stem2_out) 91 | hrseg2_out = self.seg2(hrseg1_out) 92 | hrseg3_out = self.seg3(hrseg2_out) 93 | last_out = self.head(hrseg3_out) 94 | logit_list = [last_out] 95 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 96 | return logit_list 97 | 98 | 99 | def init_weight(self): 100 | for m in self.sublayers(): 101 | if isinstance(m, nn.Conv2D): 102 | param_init.kaiming_normal_init(m.weight) 103 | elif isinstance(m, nn.BatchNorm2D): 104 | param_init.constant_init(m.weight, value=1) 105 | param_init.constant_init(m.bias, value=0) 106 | 107 | 108 | 109 | class SegBlock(nn.Layer): 110 | def __init__(self, 111 | base=32, 112 | stage_index=1):# stage_index=1,2,3. 113 | super(SegBlock, self).__init__() 114 | 115 | # Convolutional layer for high-resolution paths with constant spatial resolution and constant channel 116 | self.h_conv1 = nn.Sequential( 117 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 118 | nn.BatchNorm2D(base), 119 | nn.ReLU() 120 | ) 121 | self.h_conv2 = nn.Sequential( 122 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 123 | nn.BatchNorm2D(base), 124 | nn.ReLU() 125 | ) 126 | self.h_conv3 = nn.Sequential( 127 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 128 | nn.BatchNorm2D(base), 129 | nn.ReLU() 130 | ) 131 | 132 | self.h_conv4 = nn.Sequential( 133 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 134 | nn.BatchNorm2D(base), 135 | nn.ReLU() 136 | ) 137 | 138 | self.h_conv5 = nn.Sequential( 139 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 140 | nn.BatchNorm2D(base), 141 | nn.ReLU() 142 | ) 143 | 144 | # sematic guidance path/low-resolution path 145 | if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2 146 | self.l_conv1 = nn.Sequential( 147 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 148 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 149 | nn.ReLU() 150 | ) 151 | elif stage_index==2: #second stage 152 | self.l_conv1 = nn.Sequential( 153 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 154 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 155 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 156 | nn.ReLU() 157 | ) 158 | elif stage_index==3: 159 | self.l_conv1 = nn.Sequential( 160 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 161 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 162 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 163 | nn.ReLU(), 164 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 165 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 166 | nn.ReLU() 167 | ) 168 | else: 169 | raise ValueError("stage_index must be 1, 2 or 3") 170 | self.l_conv2 = nn.Sequential( 171 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 172 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 173 | nn.ReLU() 174 | ) 175 | self.l_conv3 = nn.Sequential( 176 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 177 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 178 | nn.ReLU() 179 | ) 180 | 181 | self.l_conv4 = nn.Sequential( 182 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 183 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 184 | nn.ReLU() 185 | ) 186 | 187 | self.l_conv5 = nn.Sequential( 188 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 189 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 190 | nn.ReLU() 191 | ) 192 | 193 | self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 194 | self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 195 | self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 196 | self.l2h_conv4 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 197 | self.l2h_conv5 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 198 | 199 | 200 | 201 | def forward(self, x): 202 | # out = [] 203 | # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x)))) 204 | # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x)))) 205 | size = x.shape[2:] 206 | out_h1 = self.h_conv1(x) # high resolution path 207 | out_l1 = self.l_conv1(x) # low resolution path 208 | # print(out_l1.shape) 209 | out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample 210 | out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high 211 | 212 | out_h2 = self.h_conv2(out_hl1) 213 | out_l2 = self.l_conv2(out_l1) 214 | # print(out_l2.shape) 215 | out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True) 216 | out_hl2 = self.l2h_conv2(out_l2_i) + out_h2 217 | 218 | out_h3 = self.h_conv3(out_hl2) 219 | out_l3 = self.l_conv3(out_l2) 220 | # print(out_l3.shape) 221 | out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True) 222 | out_hl3 = self.l2h_conv3(out_l3_i) + out_h3 223 | 224 | out_h4 = self.h_conv4(out_hl3) 225 | out_l4 = self.l_conv4(out_l3) 226 | # print(out_l4.shape) 227 | out_l4_i = F.interpolate(out_l4, size=size, mode='bilinear', align_corners=True) 228 | out_hl4 = self.l2h_conv4(out_l4_i) + out_h4 229 | 230 | out_h5 = self.h_conv5(out_hl4) 231 | out_l5 = self.l_conv5(out_l4) 232 | # print(out_l5.shape) 233 | out_l5_i = F.interpolate(out_l5, size=size, mode='bilinear', align_corners=True) 234 | out_hl5 = self.l2h_conv5(out_l5_i) + out_h5 235 | return out_hl5 236 | 237 | # seg head 238 | class SegHead(nn.Layer): 239 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 240 | super(SegHead, self).__init__() 241 | self.bn1 = nn.BatchNorm2D(inplanes) 242 | self.relu = nn.ReLU() 243 | if aux_head: 244 | self.con_bn_relu = nn.Sequential( 245 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 246 | nn.BatchNorm2D(interplanes), 247 | nn.ReLU(), 248 | ) 249 | else: 250 | self.con_bn_relu = nn.Sequential( 251 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 252 | nn.BatchNorm2D(interplanes), 253 | nn.ReLU(), 254 | ) 255 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 256 | 257 | 258 | def forward(self, x): 259 | x = self.bn1(x) 260 | x = self.relu(x) 261 | x = self.con_bn_relu(x) 262 | out = self.conv(x) 263 | return out 264 | 265 | 266 | 267 | # if __name__ == "__main__": 268 | # model = HrSegNetB16D5() 269 | # x = paddle.randn([1, 3, 400, 400]) 270 | # out = model(x) 271 | # print(out[0].shape) 272 | 273 | # paddle.flops(model, input_size=(1, 3, 400, 400)) 274 | 275 | -------------------------------------------------------------------------------- /models/hrsegnet_b32.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | 11 | 12 | 13 | 14 | # features 15 | # 1. The size of the high-resolution path remains constant throughout the process 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 17 | # the high-resolution path remains unchanged 18 | # 3. We use multiple segmentation heads, two of which are auxiliary segmentation 19 | # heads used for auxiliary loss during training 20 | # 4. The seg head is performed in two steps, instead of restoring to the 21 | # original resolution all at once 22 | 23 | # If you need to use this model with paddleseg, you need to add it to the model library 24 | # using manager.MODELS.add_component() 25 | @manager.MODELS.add_component 26 | class HrSegNetB32(nn.Layer): 27 | """ 28 | The HrSegNet implementation based on PaddlePaddle.s 29 | 30 | Args: 31 | num_classes (int): The unique number of target classes. 32 | 33 | in_channels (int, optional): The channels of input image. Default: 3. 34 | 35 | base (int, optional): The base channel number of the model. Default: 32. 36 | """ 37 | def __init__(self, 38 | in_channels=3, # input channel 39 | base=32, # base channel of the model, 40 | num_classes=2, # number of classes 41 | pretrained=None 42 | ): 43 | super(HrSegNetB32, self).__init__() 44 | self.base = base 45 | self.num_classed = num_classes 46 | self.pretrained = pretrained 47 | # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features. 48 | # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively 49 | self.stage1 = nn.Sequential( 50 | nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1), 51 | nn.BatchNorm2D(base // 2), 52 | nn.ReLU(), 53 | ) 54 | self.stage2 = nn.Sequential( 55 | nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1), 56 | nn.BatchNorm2D(base), 57 | nn.ReLU(), 58 | ) 59 | 60 | self.seg1 = SegBlock(base=base, stage_index=1) 61 | self.seg2 = SegBlock(base=base, stage_index=2) 62 | self.seg3 = SegBlock(base=base, stage_index=3) 63 | 64 | self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 65 | self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 66 | self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes) 67 | 68 | self.init_weight() 69 | 70 | def forward(self, x): 71 | logit_list = [] 72 | h, w = paddle.shape(x)[2:] 73 | # aux_head only used in training 74 | if self.training: 75 | stem1_out = self.stage1(x) 76 | stem2_out = self.stage2(stem1_out) 77 | hrseg1_out = self.seg1(stem2_out) 78 | hrseg2_out = self.seg2(hrseg1_out) 79 | hrseg3_out = self.seg3(hrseg2_out) 80 | last_out = self.head(hrseg3_out) 81 | seghead1_out = self.aux_head1(hrseg1_out) 82 | seghead2_out = self.aux_head2(hrseg2_out) 83 | logit_list = [last_out, seghead1_out, seghead2_out] 84 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 85 | return logit_list 86 | else: 87 | stem1_out = self.stage1(x) 88 | stem2_out = self.stage2(stem1_out) 89 | hrseg1_out = self.seg1(stem2_out) 90 | hrseg2_out = self.seg2(hrseg1_out) 91 | hrseg3_out = self.seg3(hrseg2_out) 92 | last_out = self.head(hrseg3_out) 93 | logit_list = [last_out] 94 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 95 | return logit_list 96 | 97 | 98 | def init_weight(self): 99 | if self.pretrained is not None: 100 | utils.load_entire_model(self, self.pretrained) 101 | else: 102 | for m in self.sublayers(): 103 | if isinstance(m, nn.Conv2D): 104 | param_init.kaiming_normal_init(m.weight) 105 | elif isinstance(m, nn.BatchNorm2D): 106 | param_init.constant_init(m.weight, value=1) 107 | param_init.constant_init(m.bias, value=0) 108 | 109 | 110 | 111 | class SegBlock(nn.Layer): 112 | def __init__(self, 113 | base=32, 114 | stage_index=1):# stage_index=1,2,3. 115 | super(SegBlock, self).__init__() 116 | 117 | # Convolutional layer for high-resolution paths with constant spatial resolution and constant channel 118 | self.h_conv1 = nn.Sequential( 119 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 120 | nn.BatchNorm2D(base), 121 | nn.ReLU() 122 | ) 123 | self.h_conv2 = nn.Sequential( 124 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 125 | nn.BatchNorm2D(base), 126 | nn.ReLU() 127 | ) 128 | self.h_conv3 = nn.Sequential( 129 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 130 | nn.BatchNorm2D(base), 131 | nn.ReLU() 132 | ) 133 | 134 | # sematic guidance path/low-resolution path 135 | if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2 136 | self.l_conv1 = nn.Sequential( 137 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 138 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 139 | nn.ReLU() 140 | ) 141 | elif stage_index==2: #second stage 142 | self.l_conv1 = nn.Sequential( 143 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 144 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 145 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 146 | nn.ReLU() 147 | ) 148 | elif stage_index==3: 149 | self.l_conv1 = nn.Sequential( 150 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 151 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 152 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 153 | nn.ReLU(), 154 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 155 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 156 | nn.ReLU() 157 | ) 158 | else: 159 | raise ValueError("stage_index must be 1, 2 or 3") 160 | self.l_conv2 = nn.Sequential( 161 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 162 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 163 | nn.ReLU() 164 | ) 165 | self.l_conv3 = nn.Sequential( 166 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 167 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 168 | nn.ReLU() 169 | ) 170 | 171 | self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 172 | self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 173 | self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 174 | 175 | 176 | 177 | def forward(self, x): 178 | # out = [] 179 | # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x)))) 180 | # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x)))) 181 | size = x.shape[2:] 182 | out_h1 = self.h_conv1(x) # high resolution path 183 | out_l1 = self.l_conv1(x) # low resolution path 184 | # print(out_l1.shape) 185 | out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample 186 | out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high 187 | 188 | out_h2 = self.h_conv2(out_hl1) 189 | out_l2 = self.l_conv2(out_l1) 190 | # print(out_l2.shape) 191 | out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True) 192 | out_hl2 = self.l2h_conv2(out_l2_i) + out_h2 193 | 194 | out_h3 = self.h_conv3(out_hl2) 195 | out_l3 = self.l_conv3(out_l2) 196 | # print(out_l3.shape) 197 | out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True) 198 | out_hl3 = self.l2h_conv3(out_l3_i) + out_h3 199 | return out_hl3 200 | 201 | # seg head 202 | class SegHead(nn.Layer): 203 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 204 | super(SegHead, self).__init__() 205 | self.bn1 = nn.BatchNorm2D(inplanes) 206 | self.relu = nn.ReLU() 207 | if aux_head: 208 | self.con_bn_relu = nn.Sequential( 209 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 210 | nn.BatchNorm2D(interplanes), 211 | nn.ReLU(), 212 | ) 213 | else: 214 | self.con_bn_relu = nn.Sequential( 215 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 216 | nn.BatchNorm2D(interplanes), 217 | nn.ReLU(), 218 | ) 219 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 220 | 221 | 222 | def forward(self, x): 223 | x = self.bn1(x) 224 | x = self.relu(x) 225 | x = self.con_bn_relu(x) 226 | out = self.conv(x) 227 | return out 228 | 229 | 230 | 231 | # if __name__ == "__main__": 232 | # model = HrSegNet() 233 | # x = paddle.randn([1, 3, 400, 400]) 234 | # out = model(x) 235 | # print(out[0].shape) 236 | 237 | # paddle.flops(model, input_size=(1, 3, 400, 400)) 238 | 239 | -------------------------------------------------------------------------------- /models/hrsegnet_b48.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | 11 | 12 | 13 | 14 | # features 15 | # 1. The size of the high-resolution path remains constant throughout the process 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 17 | # the high-resolution path remains unchanged 18 | # 3. We use multiple segmentation heads, two of which are auxiliary segmentation 19 | # heads used for auxiliary loss during training 20 | # 4. The seg head is performed in two steps, instead of restoring to the 21 | # original resolution all at once 22 | 23 | # If you need to use this model with paddleseg, you need to add it to the model library 24 | # using manager.MODELS.add_component() 25 | @manager.MODELS.add_component 26 | class HrSegNetB48(nn.Layer): 27 | """ 28 | The HrSegNet implementation based on PaddlePaddle.s 29 | 30 | Args: 31 | num_classes (int): The unique number of target classes. 32 | 33 | in_channels (int, optional): The channels of input image. Default: 3. 34 | 35 | base (int, optional): The base channel number of the model. Default: 48. 36 | """ 37 | def __init__(self, 38 | in_channels=3, # input channel 39 | base=48, # base channel of the model, 40 | num_classes=2, # number of classes 41 | pretrained = None # pretrained model 42 | ): 43 | super(HrSegNetB48, self).__init__() 44 | self.base = base 45 | self.num_classed = num_classes 46 | self.pretrained = pretrained 47 | # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features. 48 | # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively 49 | self.stage1 = nn.Sequential( 50 | nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1), 51 | nn.BatchNorm2D(base // 2), 52 | nn.ReLU(), 53 | ) 54 | self.stage2 = nn.Sequential( 55 | nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1), 56 | nn.BatchNorm2D(base), 57 | nn.ReLU(), 58 | ) 59 | 60 | self.seg1 = SegBlock(base=base, stage_index=1) 61 | self.seg2 = SegBlock(base=base, stage_index=2) 62 | self.seg3 = SegBlock(base=base, stage_index=3) 63 | 64 | self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 65 | self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 66 | self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes) 67 | 68 | self.init_weight() 69 | 70 | def forward(self, x): 71 | logit_list = [] 72 | h, w = paddle.shape(x)[2:] 73 | # aux_head only used in training 74 | if self.training: 75 | stem1_out = self.stage1(x) 76 | stem2_out = self.stage2(stem1_out) 77 | hrseg1_out = self.seg1(stem2_out) 78 | hrseg2_out = self.seg2(hrseg1_out) 79 | hrseg3_out = self.seg3(hrseg2_out) 80 | last_out = self.head(hrseg3_out) 81 | seghead1_out = self.aux_head1(hrseg1_out) 82 | seghead2_out = self.aux_head2(hrseg2_out) 83 | logit_list = [last_out, seghead1_out, seghead2_out] 84 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 85 | return logit_list 86 | else: 87 | stem1_out = self.stage1(x) 88 | stem2_out = self.stage2(stem1_out) 89 | hrseg1_out = self.seg1(stem2_out) 90 | hrseg2_out = self.seg2(hrseg1_out) 91 | hrseg3_out = self.seg3(hrseg2_out) 92 | last_out = self.head(hrseg3_out) 93 | logit_list = [last_out] 94 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 95 | return logit_list 96 | 97 | 98 | def init_weight(self): 99 | if self.pretrained is not None: 100 | utils.load_entire_model(self, self.pretrained) 101 | else: 102 | for m in self.sublayers(): 103 | if isinstance(m, nn.Conv2D): 104 | param_init.kaiming_normal_init(m.weight) 105 | elif isinstance(m, nn.BatchNorm2D): 106 | param_init.constant_init(m.weight, value=1) 107 | param_init.constant_init(m.bias, value=0) 108 | 109 | 110 | 111 | class SegBlock(nn.Layer): 112 | def __init__(self, 113 | base=32, 114 | stage_index=1):# stage_index=1,2,3. 115 | super(SegBlock, self).__init__() 116 | 117 | # Convolutional layer for high-resolution paths with constant spatial resolution and constant channel 118 | self.h_conv1 = nn.Sequential( 119 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 120 | nn.BatchNorm2D(base), 121 | nn.ReLU() 122 | ) 123 | self.h_conv2 = nn.Sequential( 124 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 125 | nn.BatchNorm2D(base), 126 | nn.ReLU() 127 | ) 128 | self.h_conv3 = nn.Sequential( 129 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 130 | nn.BatchNorm2D(base), 131 | nn.ReLU() 132 | ) 133 | 134 | # sematic guidance path/low-resolution path 135 | if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2 136 | self.l_conv1 = nn.Sequential( 137 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 138 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 139 | nn.ReLU() 140 | ) 141 | elif stage_index==2: #second stage 142 | self.l_conv1 = nn.Sequential( 143 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 144 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 145 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 146 | nn.ReLU() 147 | ) 148 | elif stage_index==3: 149 | self.l_conv1 = nn.Sequential( 150 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 151 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 152 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 153 | nn.ReLU(), 154 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 155 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 156 | nn.ReLU() 157 | ) 158 | else: 159 | raise ValueError("stage_index must be 1, 2 or 3") 160 | self.l_conv2 = nn.Sequential( 161 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 162 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 163 | nn.ReLU() 164 | ) 165 | self.l_conv3 = nn.Sequential( 166 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 167 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 168 | nn.ReLU() 169 | ) 170 | 171 | self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 172 | self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 173 | self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 174 | 175 | 176 | 177 | def forward(self, x): 178 | # out = [] 179 | # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x)))) 180 | # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x)))) 181 | size = x.shape[2:] 182 | out_h1 = self.h_conv1(x) # high resolution path 183 | out_l1 = self.l_conv1(x) # low resolution path 184 | # print(out_l1.shape) 185 | out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample 186 | out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high 187 | 188 | out_h2 = self.h_conv2(out_hl1) 189 | out_l2 = self.l_conv2(out_l1) 190 | # print(out_l2.shape) 191 | out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True) 192 | out_hl2 = self.l2h_conv2(out_l2_i) + out_h2 193 | 194 | out_h3 = self.h_conv3(out_hl2) 195 | out_l3 = self.l_conv3(out_l2) 196 | # print(out_l3.shape) 197 | out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True) 198 | out_hl3 = self.l2h_conv3(out_l3_i) + out_h3 199 | return out_hl3 200 | 201 | # seg head 202 | class SegHead(nn.Layer): 203 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 204 | super(SegHead, self).__init__() 205 | self.bn1 = nn.BatchNorm2D(inplanes) 206 | self.relu = nn.ReLU() 207 | if aux_head: 208 | self.con_bn_relu = nn.Sequential( 209 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 210 | nn.BatchNorm2D(interplanes), 211 | nn.ReLU(), 212 | ) 213 | else: 214 | self.con_bn_relu = nn.Sequential( 215 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 216 | nn.BatchNorm2D(interplanes), 217 | nn.ReLU(), 218 | ) 219 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 220 | 221 | 222 | def forward(self, x): 223 | x = self.bn1(x) 224 | x = self.relu(x) 225 | x = self.con_bn_relu(x) 226 | out = self.conv(x) 227 | return out 228 | 229 | 230 | 231 | # if __name__ == "__main__": 232 | # model = HrSegNet() 233 | # x = paddle.randn([1, 3, 400, 400]) 234 | # out = model(x) 235 | # print(out[0].shape) 236 | 237 | # paddle.flops(model, input_size=(1, 3, 400, 400)) 238 | 239 | -------------------------------------------------------------------------------- /models/hrsegnet_b64.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from paddleseg.utils import utils 7 | from paddleseg.cvlibs import manager, param_init 8 | from paddleseg.models.layers.layer_libs import SyncBatchNorm 9 | import paddle.nn.functional as F 10 | 11 | 12 | 13 | 14 | # features 15 | # 1. The size of the high-resolution path remains constant throughout the process 16 | # 2. In order to reduce and flexibly control the computational cost, the channel of 17 | # the high-resolution path remains unchanged 18 | # 3. We use multiple segmentation heads, two of which are auxiliary segmentation 19 | # heads used for auxiliary loss during training 20 | # 4. The seg head is performed in two steps, instead of restoring to the 21 | # original resolution all at once 22 | 23 | # If you need to use this model with paddleseg, you need to add it to the model library 24 | # using manager.MODELS.add_component() 25 | @manager.MODELS.add_component 26 | class HrSegNetB64(nn.Layer): 27 | """ 28 | The HrSegNet implementation based on PaddlePaddle.s 29 | 30 | Args: 31 | num_classes (int): The unique number of target classes. 32 | 33 | in_channels (int, optional): The channels of input image. Default: 3. 34 | 35 | base (int, optional): The base channel number of the model. Default: 48. 36 | """ 37 | def __init__(self, 38 | in_channels=3, # input channel 39 | base=64, # base channel of the model, 40 | num_classes=2, # number of classes 41 | pretrained=None 42 | ): 43 | super(HrSegNetB64, self).__init__() 44 | self.base = base 45 | self.num_classed = num_classes 46 | self.pretrained = pretrained 47 | # Stage 1 and 2 constitute the stem of the model, which is mainly used to extract low-level features. 48 | # Meanwhile, stage1 and 2 reduce the input image to 1/2 and 1/4 of the original size respectively 49 | self.stage1 = nn.Sequential( 50 | nn.Conv2D(in_channels=in_channels, out_channels=base // 2, kernel_size=3, stride=2, padding=1), 51 | nn.BatchNorm2D(base // 2), 52 | nn.ReLU(), 53 | ) 54 | self.stage2 = nn.Sequential( 55 | nn.Conv2D(in_channels=base // 2, out_channels=base, kernel_size=3, stride=2, padding=1), 56 | nn.BatchNorm2D(base), 57 | nn.ReLU(), 58 | ) 59 | 60 | self.seg1 = SegBlock(base=base, stage_index=1) 61 | self.seg2 = SegBlock(base=base, stage_index=2) 62 | self.seg3 = SegBlock(base=base, stage_index=3) 63 | 64 | self.aux_head1 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 65 | self.aux_head2 = SegHead(inplanes=base, interplanes=base, outplanes=num_classes, aux_head=True) 66 | self.head = SegHead(inplanes=base, interplanes=base, outplanes=num_classes) 67 | 68 | self.init_weight() 69 | 70 | def forward(self, x): 71 | logit_list = [] 72 | h, w = paddle.shape(x)[2:] 73 | # aux_head only used in training 74 | if self.training: 75 | stem1_out = self.stage1(x) 76 | stem2_out = self.stage2(stem1_out) 77 | hrseg1_out = self.seg1(stem2_out) 78 | hrseg2_out = self.seg2(hrseg1_out) 79 | hrseg3_out = self.seg3(hrseg2_out) 80 | last_out = self.head(hrseg3_out) 81 | seghead1_out = self.aux_head1(hrseg1_out) 82 | seghead2_out = self.aux_head2(hrseg2_out) 83 | logit_list = [last_out, seghead1_out, seghead2_out] 84 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 85 | return logit_list 86 | else: 87 | stem1_out = self.stage1(x) 88 | stem2_out = self.stage2(stem1_out) 89 | hrseg1_out = self.seg1(stem2_out) 90 | hrseg2_out = self.seg2(hrseg1_out) 91 | hrseg3_out = self.seg3(hrseg2_out) 92 | last_out = self.head(hrseg3_out) 93 | logit_list = [last_out] 94 | logit_list = [F.interpolate(logit, size=(h, w), mode='bilinear', align_corners=True) for logit in logit_list] 95 | return logit_list 96 | 97 | 98 | def init_weight(self): 99 | if self.pretrained is not None: 100 | utils.load_entire_model(self, self.pretrained) 101 | else: 102 | for m in self.sublayers(): 103 | if isinstance(m, nn.Conv2D): 104 | param_init.kaiming_normal_init(m.weight) 105 | elif isinstance(m, nn.BatchNorm2D): 106 | param_init.constant_init(m.weight, value=1) 107 | param_init.constant_init(m.bias, value=0) 108 | 109 | 110 | 111 | class SegBlock(nn.Layer): 112 | def __init__(self, 113 | base=32, 114 | stage_index=1):# stage_index=1,2,3. 115 | super(SegBlock, self).__init__() 116 | 117 | # Convolutional layer for high-resolution paths with constant spatial resolution and constant channel 118 | self.h_conv1 = nn.Sequential( 119 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 120 | nn.BatchNorm2D(base), 121 | nn.ReLU() 122 | ) 123 | self.h_conv2 = nn.Sequential( 124 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 125 | nn.BatchNorm2D(base), 126 | nn.ReLU() 127 | ) 128 | self.h_conv3 = nn.Sequential( 129 | nn.Conv2D(in_channels=base, out_channels=base, kernel_size=3, stride=1, padding=1), 130 | nn.BatchNorm2D(base), 131 | nn.ReLU() 132 | ) 133 | 134 | # sematic guidance path/low-resolution path 135 | if stage_index==1: #first stage, stride=2, spatial resolution/2, channel*2 136 | self.l_conv1 = nn.Sequential( 137 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 138 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 139 | nn.ReLU() 140 | ) 141 | elif stage_index==2: #second stage 142 | self.l_conv1 = nn.Sequential( 143 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 144 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 145 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 146 | nn.ReLU() 147 | ) 148 | elif stage_index==3: 149 | self.l_conv1 = nn.Sequential( 150 | nn.AvgPool2D(kernel_size=3, stride=2, padding=1), 151 | nn.Conv2D(in_channels=base, out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 152 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 153 | nn.ReLU(), 154 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=2, padding=1), 155 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 156 | nn.ReLU() 157 | ) 158 | else: 159 | raise ValueError("stage_index must be 1, 2 or 3") 160 | self.l_conv2 = nn.Sequential( 161 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 162 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 163 | nn.ReLU() 164 | ) 165 | self.l_conv3 = nn.Sequential( 166 | nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base*int(math.pow(2, stage_index)), kernel_size=3, stride=1, padding=1), 167 | nn.BatchNorm2D(base*int(math.pow(2, stage_index))), 168 | nn.ReLU() 169 | ) 170 | 171 | self.l2h_conv1 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 172 | self.l2h_conv2 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 173 | self.l2h_conv3 = nn.Conv2D(in_channels=base*int(math.pow(2, stage_index)), out_channels=base, kernel_size=1, stride=1, padding=0) 174 | 175 | 176 | 177 | def forward(self, x): 178 | # out = [] 179 | # out.append(self.h_conv3(self.h_conv2(self.h_conv1(x)))) 180 | # out.append(self.l_conv3(self.l_conv2(self.l_conv1(x)))) 181 | size = x.shape[2:] 182 | out_h1 = self.h_conv1(x) # high resolution path 183 | out_l1 = self.l_conv1(x) # low resolution path 184 | # print(out_l1.shape) 185 | out_l1_i = F.interpolate(out_l1, size=size, mode='bilinear', align_corners=True) # upsample 186 | out_hl1 = self.l2h_conv1(out_l1_i) + out_h1 # low to high 187 | 188 | out_h2 = self.h_conv2(out_hl1) 189 | out_l2 = self.l_conv2(out_l1) 190 | # print(out_l2.shape) 191 | out_l2_i = F.interpolate(out_l2, size=size, mode='bilinear', align_corners=True) 192 | out_hl2 = self.l2h_conv2(out_l2_i) + out_h2 193 | 194 | out_h3 = self.h_conv3(out_hl2) 195 | out_l3 = self.l_conv3(out_l2) 196 | # print(out_l3.shape) 197 | out_l3_i = F.interpolate(out_l3, size=size, mode='bilinear', align_corners=True) 198 | out_hl3 = self.l2h_conv3(out_l3_i) + out_h3 199 | return out_hl3 200 | 201 | # seg head 202 | class SegHead(nn.Layer): 203 | def __init__(self, inplanes, interplanes, outplanes, aux_head=False): 204 | super(SegHead, self).__init__() 205 | self.bn1 = nn.BatchNorm2D(inplanes) 206 | self.relu = nn.ReLU() 207 | if aux_head: 208 | self.con_bn_relu = nn.Sequential( 209 | nn.Conv2D(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=1, padding=1), 210 | nn.BatchNorm2D(interplanes), 211 | nn.ReLU(), 212 | ) 213 | else: 214 | self.con_bn_relu = nn.Sequential( 215 | nn.Conv2DTranspose(in_channels=inplanes, out_channels=interplanes, kernel_size=3, stride=2, padding=1, output_padding=1), 216 | nn.BatchNorm2D(interplanes), 217 | nn.ReLU(), 218 | ) 219 | self.conv = nn.Conv2D(in_channels=interplanes, out_channels=outplanes, kernel_size=1, stride=1, padding=0) 220 | 221 | 222 | def forward(self, x): 223 | x = self.bn1(x) 224 | x = self.relu(x) 225 | x = self.con_bn_relu(x) 226 | out = self.conv(x) 227 | return out 228 | 229 | 230 | 231 | # if __name__ == "__main__": 232 | # model = HrSegNet() 233 | # x = paddle.randn([1, 3, 400, 400]) 234 | # out = model(x) 235 | # print(out[0].shape) 236 | 237 | # paddle.flops(model, input_size=(1, 3, 400, 400)) 238 | 239 | -------------------------------------------------------------------------------- /models/ocrnet.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | 5 | from paddleseg import utils 6 | from paddleseg.cvlibs import manager, param_init 7 | from paddleseg.models import layers 8 | 9 | 10 | @manager.MODELS.add_component 11 | class OCRNet(nn.Layer): 12 | """ 13 | The OCRNet implementation based on PaddlePaddle. 14 | The original article refers to 15 | Yuan, Yuhui, et al. "Object-Contextual Representations for Semantic Segmentation" 16 | (https://arxiv.org/pdf/1909.11065.pdf) 17 | 18 | Args: 19 | num_classes (int): The unique number of target classes. 20 | backbone (Paddle.nn.Layer): Backbone network. 21 | backbone_indices (tuple): A tuple indicates the indices of output of backbone. 22 | It can be either one or two values, if two values, the first index will be taken as 23 | a deep-supervision feature in auxiliary layer; the second one will be taken as 24 | input of pixel representation. If one value, it is taken by both above. 25 | ocr_mid_channels (int, optional): The number of middle channels in OCRHead. Default: 512. 26 | ocr_key_channels (int, optional): The number of key channels in ObjectAttentionBlock. Default: 256. 27 | align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature 28 | is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. 29 | pretrained (str, optional): The path or url of pretrained model. Default: None. 30 | """ 31 | 32 | def __init__(self, 33 | num_classes, 34 | backbone, 35 | backbone_indices, 36 | ocr_mid_channels=512, 37 | ocr_key_channels=256, 38 | align_corners=False, 39 | pretrained=None): 40 | super().__init__() 41 | 42 | self.backbone = backbone 43 | self.backbone_indices = backbone_indices 44 | in_channels = [self.backbone.feat_channels[i] for i in backbone_indices] 45 | 46 | self.head = OCRHead( 47 | num_classes=num_classes, 48 | in_channels=in_channels, 49 | ocr_mid_channels=ocr_mid_channels, 50 | ocr_key_channels=ocr_key_channels) 51 | 52 | self.align_corners = align_corners 53 | self.pretrained = pretrained 54 | self.init_weight() 55 | 56 | def forward(self, x): 57 | feats = self.backbone(x) 58 | feats = [feats[i] for i in self.backbone_indices] 59 | logit_list = self.head(feats) 60 | if not self.training: 61 | logit_list = [logit_list[0]] 62 | 63 | logit_list = [ 64 | F.interpolate( 65 | logit, 66 | paddle.shape(x)[2:], 67 | mode='bilinear', 68 | align_corners=self.align_corners) for logit in logit_list 69 | ] 70 | return logit_list 71 | 72 | def init_weight(self): 73 | if self.pretrained is not None: 74 | utils.load_entire_model(self, self.pretrained) 75 | 76 | 77 | class OCRHead(nn.Layer): 78 | """ 79 | The Object contextual representation head. 80 | 81 | Args: 82 | num_classes(int): The unique number of target classes. 83 | in_channels(tuple): The number of input channels. 84 | ocr_mid_channels(int, optional): The number of middle channels in OCRHead. Default: 512. 85 | ocr_key_channels(int, optional): The number of key channels in ObjectAttentionBlock. Default: 256. 86 | """ 87 | 88 | def __init__(self, 89 | num_classes, 90 | in_channels, 91 | ocr_mid_channels=512, 92 | ocr_key_channels=256): 93 | super().__init__() 94 | 95 | self.num_classes = num_classes 96 | self.spatial_gather = SpatialGatherBlock(ocr_mid_channels, num_classes) 97 | self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels, 98 | ocr_mid_channels) 99 | 100 | self.indices = [-2, -1] if len(in_channels) > 1 else [-1, -1] 101 | 102 | self.conv3x3_ocr = layers.ConvBNReLU( 103 | in_channels[self.indices[1]], ocr_mid_channels, 3, padding=1) 104 | self.cls_head = nn.Conv2D(ocr_mid_channels, self.num_classes, 1) 105 | self.aux_head = nn.Sequential( 106 | layers.ConvBNReLU(in_channels[self.indices[0]], 107 | in_channels[self.indices[0]], 1), 108 | nn.Conv2D(in_channels[self.indices[0]], self.num_classes, 1)) 109 | 110 | self.init_weight() 111 | 112 | def forward(self, feat_list): 113 | feat_shallow, feat_deep = feat_list[self.indices[0]], feat_list[ 114 | self.indices[1]] 115 | 116 | soft_regions = self.aux_head(feat_shallow) 117 | pixels = self.conv3x3_ocr(feat_deep) 118 | 119 | object_regions = self.spatial_gather(pixels, soft_regions) 120 | ocr = self.spatial_ocr(pixels, object_regions) 121 | 122 | logit = self.cls_head(ocr) 123 | return [logit, soft_regions] 124 | 125 | def init_weight(self): 126 | """Initialize the parameters of model parts.""" 127 | for sublayer in self.sublayers(): 128 | if isinstance(sublayer, nn.Conv2D): 129 | param_init.normal_init(sublayer.weight, std=0.001) 130 | elif isinstance(sublayer, (nn.BatchNorm, nn.SyncBatchNorm)): 131 | param_init.constant_init(sublayer.weight, value=1.0) 132 | param_init.constant_init(sublayer.bias, value=0.0) 133 | 134 | 135 | class SpatialGatherBlock(nn.Layer): 136 | """Aggregation layer to compute the pixel-region representation.""" 137 | 138 | def __init__(self, pixels_channels, regions_channels): 139 | super().__init__() 140 | self.pixels_channels = pixels_channels 141 | self.regions_channels = regions_channels 142 | 143 | def forward(self, pixels, regions): 144 | # pixels: from (n, c, h, w) to (n, h*w, c) 145 | pixels = paddle.reshape(pixels, (0, self.pixels_channels, -1)) 146 | pixels = paddle.transpose(pixels, (0, 2, 1)) 147 | 148 | # regions: from (n, k, h, w) to (n, k, h*w) 149 | regions = paddle.reshape(regions, (0, self.regions_channels, -1)) 150 | regions = F.softmax(regions, axis=2) 151 | 152 | # feats: from (n, k, c) to (n, c, k, 1) 153 | feats = paddle.bmm(regions, pixels) 154 | feats = paddle.transpose(feats, (0, 2, 1)) 155 | feats = paddle.unsqueeze(feats, axis=-1) 156 | 157 | return feats 158 | 159 | 160 | class SpatialOCRModule(nn.Layer): 161 | """Aggregate the global object representation to update the representation for each pixel.""" 162 | 163 | def __init__(self, 164 | in_channels, 165 | key_channels, 166 | out_channels, 167 | dropout_rate=0.1): 168 | super().__init__() 169 | 170 | self.attention_block = ObjectAttentionBlock(in_channels, key_channels) 171 | self.conv1x1 = nn.Sequential( 172 | layers.ConvBNReLU(2 * in_channels, out_channels, 1), 173 | nn.Dropout2D(dropout_rate)) 174 | 175 | def forward(self, pixels, regions): 176 | context = self.attention_block(pixels, regions) 177 | feats = paddle.concat([context, pixels], axis=1) 178 | feats = self.conv1x1(feats) 179 | 180 | return feats 181 | 182 | 183 | class ObjectAttentionBlock(nn.Layer): 184 | """A self-attention module.""" 185 | 186 | def __init__(self, in_channels, key_channels): 187 | super().__init__() 188 | 189 | self.in_channels = in_channels 190 | self.key_channels = key_channels 191 | 192 | self.f_pixel = nn.Sequential( 193 | layers.ConvBNReLU(in_channels, key_channels, 1), 194 | layers.ConvBNReLU(key_channels, key_channels, 1)) 195 | 196 | self.f_object = nn.Sequential( 197 | layers.ConvBNReLU(in_channels, key_channels, 1), 198 | layers.ConvBNReLU(key_channels, key_channels, 1)) 199 | 200 | self.f_down = layers.ConvBNReLU(in_channels, key_channels, 1) 201 | 202 | self.f_up = layers.ConvBNReLU(key_channels, in_channels, 1) 203 | 204 | def forward(self, x, proxy): 205 | x_shape = paddle.shape(x) 206 | # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels) 207 | query = self.f_pixel(x) 208 | query = paddle.reshape(query, (0, self.key_channels, -1)) 209 | query = paddle.transpose(query, (0, 2, 1)) 210 | 211 | # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2) 212 | key = self.f_object(proxy) 213 | key = paddle.reshape(key, (0, self.key_channels, -1)) 214 | 215 | # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels) 216 | value = self.f_down(proxy) 217 | value = paddle.reshape(value, (0, self.key_channels, -1)) 218 | value = paddle.transpose(value, (0, 2, 1)) 219 | 220 | # sim_map (n, h1*w1, h2*w2) 221 | sim_map = paddle.bmm(query, key) 222 | sim_map = (self.key_channels**-.5) * sim_map 223 | sim_map = F.softmax(sim_map, axis=-1) 224 | 225 | # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1) 226 | context = paddle.bmm(sim_map, value) 227 | context = paddle.transpose(context, (0, 2, 1)) 228 | context = paddle.reshape(context, 229 | (0, self.key_channels, x_shape[2], x_shape[3])) 230 | context = self.f_up(context) 231 | 232 | return context 233 | -------------------------------------------------------------------------------- /models/pspnet.py: -------------------------------------------------------------------------------- 1 | import paddle.nn as nn 2 | import paddle.nn.functional as F 3 | 4 | import paddle 5 | from paddleseg.cvlibs import manager 6 | from paddleseg.models import layers 7 | from paddleseg.utils import utils 8 | 9 | 10 | # @manager.MODELS.add_component 11 | class PSPNet(nn.Layer): 12 | """ 13 | The PSPNet implementation based on PaddlePaddle. 14 | 15 | The original article refers to 16 | Zhao, Hengshuang, et al. "Pyramid scene parsing network" 17 | (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf). 18 | 19 | Args: 20 | num_classes (int): The unique number of target classes. 21 | backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101. 22 | backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. 23 | pp_out_channels (int, optional): The output channels after Pyramid Pooling Module. Default: 1024. 24 | bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1,2,3,6). 25 | enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. 26 | align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, 27 | e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. 28 | pretrained (str, optional): The path or url of pretrained model. Default: None. 29 | """ 30 | 31 | def __init__(self, 32 | num_classes, 33 | backbone, 34 | backbone_indices=(2, 3), 35 | pp_out_channels=1024, 36 | bin_sizes=(1, 2, 3, 6), 37 | enable_auxiliary_loss=True, 38 | align_corners=False, 39 | pretrained=None): 40 | super().__init__() 41 | 42 | self.backbone = backbone 43 | backbone_channels = [ 44 | backbone.feat_channels[i] for i in backbone_indices 45 | ] 46 | 47 | self.head = PSPNetHead(num_classes, backbone_indices, backbone_channels, 48 | pp_out_channels, bin_sizes, 49 | enable_auxiliary_loss, align_corners) 50 | self.align_corners = align_corners 51 | self.pretrained = pretrained 52 | self.init_weight() 53 | 54 | def forward(self, x): 55 | feat_list = self.backbone(x) 56 | logit_list = self.head(feat_list) 57 | return [ 58 | F.interpolate( 59 | logit, 60 | paddle.shape(x)[2:], 61 | mode='bilinear', 62 | align_corners=self.align_corners) for logit in logit_list 63 | ] 64 | 65 | def init_weight(self): 66 | if self.pretrained is not None: 67 | utils.load_entire_model(self, self.pretrained) 68 | 69 | 70 | class PSPNetHead(nn.Layer): 71 | """ 72 | The PSPNetHead implementation. 73 | 74 | Args: 75 | num_classes (int): The unique number of target classes. 76 | backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone. 77 | The first index will be taken as a deep-supervision feature in auxiliary layer; 78 | the second one will be taken as input of Pyramid Pooling Module (PPModule). 79 | Usually backbone consists of four downsampling stage, and return an output of 80 | each stage. If we set it as (2, 3) in ResNet, that means taking feature map of the third 81 | stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule. 82 | backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index. 83 | pp_out_channels (int): The output channels after Pyramid Pooling Module. 84 | bin_sizes (tuple): The out size of pooled feature maps. 85 | enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. 86 | align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature 87 | is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. 88 | """ 89 | 90 | def __init__(self, num_classes, backbone_indices, backbone_channels, 91 | pp_out_channels, bin_sizes, enable_auxiliary_loss, 92 | align_corners): 93 | 94 | super().__init__() 95 | 96 | self.backbone_indices = backbone_indices 97 | 98 | self.psp_module = layers.PPModule( 99 | in_channels=backbone_channels[1], 100 | out_channels=pp_out_channels, 101 | bin_sizes=bin_sizes, 102 | dim_reduction=True, 103 | align_corners=align_corners) 104 | 105 | self.dropout = nn.Dropout(p=0.1) # dropout_prob 106 | 107 | self.conv = nn.Conv2D( 108 | in_channels=pp_out_channels, 109 | out_channels=num_classes, 110 | kernel_size=1) 111 | 112 | if enable_auxiliary_loss: 113 | self.auxlayer = layers.AuxLayer( 114 | in_channels=backbone_channels[0], 115 | inter_channels=backbone_channels[0] // 4, 116 | out_channels=num_classes) 117 | 118 | self.enable_auxiliary_loss = enable_auxiliary_loss 119 | 120 | def forward(self, feat_list): 121 | logit_list = [] 122 | x = feat_list[self.backbone_indices[1]] 123 | x = self.psp_module(x) 124 | x = self.dropout(x) 125 | logit = self.conv(x) 126 | logit_list.append(logit) 127 | 128 | if self.enable_auxiliary_loss: 129 | auxiliary_feat = feat_list[self.backbone_indices[0]] 130 | auxiliary_logit = self.auxlayer(auxiliary_feat) 131 | logit_list.append(auxiliary_logit) 132 | 133 | return logit_list 134 | -------------------------------------------------------------------------------- /models/rucnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle 16 | import paddle.nn as nn 17 | import paddle.nn.functional as F 18 | 19 | from paddleseg import utils 20 | from paddleseg.cvlibs import manager 21 | from paddleseg.models import layers 22 | 23 | __all__ = ['RUCNet'] 24 | 25 | 26 | 27 | 28 | 29 | @manager.MODELS.add_component 30 | class RUCNet(nn.Layer): 31 | """ 32 | 33 | The original article refers to 34 | https://www.mdpi.com/1424-8220/23/1/53 35 | 36 | Args: 37 | num_classes (int): The unique number of target classes. 38 | align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature 39 | is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. 40 | use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling. 41 | If False, use resize_bilinear. Default: False. 42 | in_channels (int, optional): The channels of input image. Default: 3. 43 | pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None. 44 | """ 45 | 46 | def __init__(self, 47 | num_classes, 48 | align_corners=False, 49 | use_deconv=False, 50 | in_channels=3, 51 | pretrained=None): 52 | super().__init__() 53 | 54 | # self.encode = Encoder(in_channels) 55 | self.encode = New_Encoder(in_channels) 56 | self.decode = Decoder(align_corners, use_deconv=use_deconv) 57 | self.cls = self.conv = nn.Conv2D( 58 | in_channels=64, 59 | out_channels=num_classes, 60 | kernel_size=3, 61 | stride=1, 62 | padding=1) 63 | 64 | self.pretrained = pretrained 65 | self.init_weight() 66 | 67 | def forward(self, x): 68 | logit_list = [] 69 | x, short_cuts = self.encode(x) 70 | x = self.decode(x, short_cuts) 71 | logit = self.cls(x) 72 | logit_list.append(logit) 73 | return logit_list 74 | 75 | def init_weight(self): 76 | if self.pretrained is not None: 77 | utils.load_entire_model(self, self.pretrained) 78 | 79 | 80 | class ResidualDownsampleBlock(nn.Layer): 81 | def __init__(self, in_channels, out_channels): 82 | super().__init__() 83 | 84 | self.conv1 = layers.ConvBNReLU(in_channels, out_channels, 3, stride=2, padding=1) 85 | self.conv2=layers.ConvBNReLU(out_channels, out_channels, kernel_size=3, stride=1, padding="same") 86 | self.skip = layers.ConvBNReLU(in_channels, out_channels, 1, stride=2, padding=0) 87 | 88 | self.conv3 = layers.ConvBNReLU(out_channels, out_channels, 3, stride=1, padding=1) 89 | self.conv4=layers.ConvBNReLU(out_channels, out_channels, 3, stride=1, padding=1) 90 | 91 | def forward(self, x): 92 | x1 = self.conv1(x) 93 | x1 = self.conv2(x1) 94 | xk = self.skip(x) 95 | x1 = x1 + xk 96 | 97 | x2 = self.conv3(x1) 98 | x2 = self.conv4(x2) 99 | x2 = x2 + x1 100 | return x2 101 | 102 | class SCSE(nn.Layer): 103 | def __init__(self, in_channel): 104 | super().__init__() 105 | 106 | self.spatial_attention=SpatialAttention(in_channel) 107 | self.channel_attention=ChannelAttention(in_channel) 108 | 109 | def forward(self, x): 110 | return self.spatial_attention(x) + self.channel_attention(x) 111 | 112 | 113 | class SpatialAttention(nn.Layer): 114 | def __init__(self, in_channel): 115 | super().__init__() 116 | self.spatial_conv=nn.Conv2D(in_channel, out_channels=1, kernel_size=1, stride=1, padding=0) 117 | 118 | def forward(self, x): 119 | return x * F.sigmoid(self.spatial_conv(x)) 120 | 121 | class ChannelAttention(nn.Layer): 122 | def __init__(self, in_channel): 123 | super().__init__() 124 | 125 | self.gap=nn.AdaptiveAvgPool2D(1) 126 | self.linear1=nn.Linear(in_channel, in_channel//2) 127 | self.linear2=nn.Linear(in_channel//2, in_channel) 128 | 129 | def forward(self, x): 130 | t=self.gap(x).squeeze(axis=[2,3]) 131 | t=self.linear1(t) 132 | t=self.linear2(t) 133 | return x * F.sigmoid(t.unsqueeze(axis=[2,3])) 134 | 135 | class New_Encoder(nn.Layer): 136 | def __init__(self, in_channels=3): 137 | super().__init__() 138 | 139 | self.double_conv = nn.Sequential( 140 | layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3)) 141 | 142 | down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]] 143 | 144 | self.down_sample_list = nn.LayerList([ 145 | self.down_sampling(channel[0], channel[1]) 146 | for channel in down_channels 147 | ]) 148 | 149 | self.scse_list=nn.LayerList( 150 | [SCSE(128), 151 | SCSE(256), 152 | SCSE(512), 153 | SCSE(512)] 154 | ) 155 | 156 | def down_sampling(self, in_channels, out_channels): 157 | rdb=ResidualDownsampleBlock(in_channels, out_channels) 158 | return rdb 159 | 160 | 161 | def forward(self, x): 162 | short_cuts = [] 163 | x = self.double_conv(x) 164 | for i, down_sample in enumerate(self.down_sample_list): 165 | short_cuts.append(x) 166 | x = down_sample(x) 167 | # print(x.shape) 168 | x=self.scse_list[i](x) 169 | 170 | return x, short_cuts 171 | 172 | 173 | class Encoder(nn.Layer): 174 | def __init__(self, in_channels=3): 175 | super().__init__() 176 | 177 | self.double_conv = nn.Sequential( 178 | layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3)) 179 | down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]] 180 | self.down_sample_list = nn.LayerList([ 181 | self.down_sampling(channel[0], channel[1]) 182 | for channel in down_channels 183 | ]) 184 | 185 | def down_sampling(self, in_channels, out_channels): 186 | modules = [] 187 | modules.append(nn.MaxPool2D(kernel_size=2, stride=2)) 188 | modules.append(layers.ConvBNReLU(in_channels, out_channels, 3)) 189 | modules.append(layers.ConvBNReLU(out_channels, out_channels, 3)) 190 | return nn.Sequential(*modules) 191 | 192 | def forward(self, x): 193 | short_cuts = [] 194 | x = self.double_conv(x) 195 | for down_sample in self.down_sample_list: 196 | short_cuts.append(x) 197 | x = down_sample(x) 198 | return x, short_cuts 199 | 200 | 201 | class Decoder(nn.Layer): 202 | def __init__(self, align_corners, use_deconv=False): 203 | super().__init__() 204 | 205 | up_channels = [[512, 256], [256, 128], [128, 64], [64, 64]] 206 | self.up_sample_list = nn.LayerList([ 207 | UpSampling(channel[0], channel[1], align_corners, use_deconv) 208 | for channel in up_channels 209 | ]) 210 | 211 | def forward(self, x, short_cuts): 212 | for i in range(len(short_cuts)): 213 | x = self.up_sample_list[i](x, short_cuts[-(i + 1)]) 214 | return x 215 | 216 | 217 | class UpSampling(nn.Layer): 218 | def __init__(self, 219 | in_channels, 220 | out_channels, 221 | align_corners, 222 | use_deconv=False): 223 | super().__init__() 224 | 225 | self.align_corners = align_corners 226 | 227 | self.use_deconv = use_deconv 228 | if self.use_deconv: 229 | self.deconv = nn.Conv2DTranspose( 230 | in_channels, 231 | out_channels // 2, 232 | kernel_size=2, 233 | stride=2, 234 | padding=0) 235 | in_channels = in_channels + out_channels // 2 236 | else: 237 | in_channels *= 2 238 | 239 | self.double_conv = nn.Sequential( 240 | layers.ConvBNReLU(in_channels, out_channels, 3), 241 | layers.ConvBNReLU(out_channels, out_channels, 3), 242 | SCSE(out_channels)) # scse 243 | 244 | def forward(self, x, short_cut): 245 | if self.use_deconv: 246 | x = self.deconv(x) 247 | else: 248 | x = F.interpolate( 249 | x, 250 | paddle.shape(short_cut)[2:], 251 | mode='bilinear', 252 | align_corners=self.align_corners) 253 | x = paddle.concat([x, short_cut], axis=1) 254 | x = self.double_conv(x) 255 | return x 256 | -------------------------------------------------------------------------------- /models/stdcseg.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | 5 | from paddleseg import utils 6 | from paddleseg.models import layers 7 | from paddleseg.cvlibs import manager 8 | from paddleseg.utils import utils 9 | 10 | 11 | # @manager.MODELS.add_component 12 | class STDCSeg(nn.Layer): 13 | """ 14 | The STDCSeg implementation based on PaddlePaddle. 15 | 16 | The original article refers to Meituan 17 | Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation." 18 | (https://arxiv.org/abs/2104.13188) 19 | 20 | Args: 21 | num_classes(int,optional): The unique number of target classes. 22 | backbone(nn.Layer): Backbone network, STDCNet1446/STDCNet813. STDCNet1446->STDC2,STDCNet813->STDC813. 23 | use_boundary_8(bool,non-optional): Whether to use detail loss. it should be True accroding to paper for best metric. Default: True. 24 | Actually,if you want to use _boundary_2/_boundary_4/_boundary_16,you should append loss function number of DetailAggregateLoss.It should work properly. 25 | use_conv_last(bool,optional): Determine ContextPath 's inplanes variable according to whether to use bockbone's last conv. Default: False. 26 | pretrained (str, optional): The path or url of pretrained model. Default: None. 27 | """ 28 | 29 | def __init__(self, 30 | num_classes, 31 | backbone, 32 | use_boundary_2=False, 33 | use_boundary_4=False, 34 | use_boundary_8=True, 35 | use_boundary_16=False, 36 | use_conv_last=False, 37 | pretrained=None): 38 | super(STDCSeg, self).__init__() 39 | 40 | self.use_boundary_2 = use_boundary_2 41 | self.use_boundary_4 = use_boundary_4 42 | self.use_boundary_8 = use_boundary_8 43 | self.use_boundary_16 = use_boundary_16 44 | self.cp = ContextPath(backbone, use_conv_last=use_conv_last) 45 | self.ffm = FeatureFusionModule(384, 256) 46 | self.conv_out = SegHead(256, 256, num_classes) 47 | self.conv_out8 = SegHead(128, 64, num_classes) 48 | self.conv_out16 = SegHead(128, 64, num_classes) 49 | self.conv_out_sp16 = SegHead(512, 64, 1) 50 | self.conv_out_sp8 = SegHead(256, 64, 1) 51 | self.conv_out_sp4 = SegHead(64, 64, 1) 52 | self.conv_out_sp2 = SegHead(32, 64, 1) 53 | self.pretrained = pretrained 54 | self.init_weight() 55 | 56 | def forward(self, x): 57 | x_hw = paddle.shape(x)[2:] 58 | feat_res2, feat_res4, feat_res8, _, feat_cp8, feat_cp16 = self.cp(x) 59 | 60 | logit_list = [] 61 | if self.training: 62 | feat_fuse = self.ffm(feat_res8, feat_cp8) 63 | feat_out = self.conv_out(feat_fuse) 64 | feat_out8 = self.conv_out8(feat_cp8) 65 | feat_out16 = self.conv_out16(feat_cp16) 66 | 67 | logit_list = [feat_out, feat_out8, feat_out16] 68 | logit_list = [ 69 | F.interpolate( 70 | x, x_hw, mode='bilinear', align_corners=True) 71 | for x in logit_list 72 | ] 73 | 74 | if self.use_boundary_2: 75 | feat_out_sp2 = self.conv_out_sp2(feat_res2) 76 | logit_list.append(feat_out_sp2) 77 | if self.use_boundary_4: 78 | feat_out_sp4 = self.conv_out_sp4(feat_res4) 79 | logit_list.append(feat_out_sp4) 80 | if self.use_boundary_8: 81 | feat_out_sp8 = self.conv_out_sp8(feat_res8) 82 | logit_list.append(feat_out_sp8) 83 | else: 84 | feat_fuse = self.ffm(feat_res8, feat_cp8) 85 | feat_out = self.conv_out(feat_fuse) 86 | feat_out = F.interpolate( 87 | feat_out, x_hw, mode='bilinear', align_corners=True) 88 | logit_list = [feat_out] 89 | 90 | return logit_list 91 | 92 | def init_weight(self): 93 | if self.pretrained is not None: 94 | utils.load_entire_model(self, self.pretrained) 95 | 96 | 97 | class SegHead(nn.Layer): 98 | def __init__(self, in_chan, mid_chan, n_classes): 99 | super(SegHead, self).__init__() 100 | self.conv = layers.ConvBNReLU( 101 | in_chan, mid_chan, kernel_size=3, stride=1, padding=1) 102 | self.conv_out = nn.Conv2D( 103 | mid_chan, n_classes, kernel_size=1, bias_attr=None) 104 | 105 | def forward(self, x): 106 | x = self.conv(x) 107 | x = self.conv_out(x) 108 | return x 109 | 110 | 111 | class AttentionRefinementModule(nn.Layer): 112 | def __init__(self, in_chan, out_chan): 113 | super(AttentionRefinementModule, self).__init__() 114 | self.conv = layers.ConvBNReLU( 115 | in_chan, out_chan, kernel_size=3, stride=1, padding=1) 116 | self.conv_atten = nn.Conv2D( 117 | out_chan, out_chan, kernel_size=1, bias_attr=None) 118 | self.bn_atten = nn.BatchNorm2D(out_chan) 119 | self.sigmoid_atten = nn.Sigmoid() 120 | 121 | def forward(self, x): 122 | feat = self.conv(x) 123 | atten = F.adaptive_avg_pool2d(feat, 1) 124 | atten = self.conv_atten(atten) 125 | atten = self.bn_atten(atten) 126 | atten = self.sigmoid_atten(atten) 127 | out = paddle.multiply(feat, atten) 128 | return out 129 | 130 | 131 | class ContextPath(nn.Layer): 132 | def __init__(self, backbone, use_conv_last=False): 133 | super(ContextPath, self).__init__() 134 | self.backbone = backbone 135 | self.arm16 = AttentionRefinementModule(512, 128) 136 | inplanes = 1024 137 | if use_conv_last: 138 | inplanes = 1024 139 | self.arm32 = AttentionRefinementModule(inplanes, 128) 140 | self.conv_head32 = layers.ConvBNReLU( 141 | 128, 128, kernel_size=3, stride=1, padding=1) 142 | self.conv_head16 = layers.ConvBNReLU( 143 | 128, 128, kernel_size=3, stride=1, padding=1) 144 | self.conv_avg = layers.ConvBNReLU( 145 | inplanes, 128, kernel_size=1, stride=1, padding=0) 146 | 147 | def forward(self, x): 148 | feat2, feat4, feat8, feat16, feat32 = self.backbone(x) 149 | 150 | feat8_hw = paddle.shape(feat8)[2:] 151 | feat16_hw = paddle.shape(feat16)[2:] 152 | feat32_hw = paddle.shape(feat32)[2:] 153 | 154 | avg = F.adaptive_avg_pool2d(feat32, 1) 155 | avg = self.conv_avg(avg) 156 | avg_up = F.interpolate(avg, feat32_hw, mode='nearest') 157 | 158 | feat32_arm = self.arm32(feat32) 159 | feat32_sum = feat32_arm + avg_up 160 | feat32_up = F.interpolate(feat32_sum, feat16_hw, mode='nearest') 161 | feat32_up = self.conv_head32(feat32_up) 162 | 163 | feat16_arm = self.arm16(feat16) 164 | feat16_sum = feat16_arm + feat32_up 165 | feat16_up = F.interpolate(feat16_sum, feat8_hw, mode='nearest') 166 | feat16_up = self.conv_head16(feat16_up) 167 | 168 | return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16 169 | 170 | 171 | class FeatureFusionModule(nn.Layer): 172 | def __init__(self, in_chan, out_chan): 173 | super(FeatureFusionModule, self).__init__() 174 | self.convblk = layers.ConvBNReLU( 175 | in_chan, out_chan, kernel_size=1, stride=1, padding=0) 176 | self.conv1 = nn.Conv2D( 177 | out_chan, 178 | out_chan // 4, 179 | kernel_size=1, 180 | stride=1, 181 | padding=0, 182 | bias_attr=None) 183 | self.conv2 = nn.Conv2D( 184 | out_chan // 4, 185 | out_chan, 186 | kernel_size=1, 187 | stride=1, 188 | padding=0, 189 | bias_attr=None) 190 | self.relu = nn.ReLU() 191 | self.sigmoid = nn.Sigmoid() 192 | 193 | def forward(self, fsp, fcp): 194 | fcat = paddle.concat([fsp, fcp], axis=1) 195 | feat = self.convblk(fcat) 196 | atten = F.adaptive_avg_pool2d(feat, 1) 197 | atten = self.conv1(atten) 198 | atten = self.relu(atten) 199 | atten = self.conv2(atten) 200 | atten = self.sigmoid(atten) 201 | feat_atten = paddle.multiply(feat, atten) 202 | feat_out = feat_atten + feat 203 | return feat_out 204 | -------------------------------------------------------------------------------- /models/u2cracknet.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | 5 | from paddleseg.cvlibs import manager 6 | from paddleseg.models import layers 7 | from paddleseg.utils import utils 8 | 9 | import math 10 | 11 | 12 | __all__ = ['U2CrackNet'] 13 | 14 | 15 | 16 | class EfficientChannelAttention(nn.Layer): 17 | def __init__(self, gamma=2, b=1, in_channels=128): 18 | super().__init__() 19 | t = int(abs((math.log(in_channels, 2) + b) / gamma)) 20 | k = t if t % 2 else t + 1 21 | self.avg_pool = nn.AdaptiveAvgPool2D(1) 22 | self.conv = nn.Conv1D(1, 1, kernel_size=k, padding=int(k / 2), bias_attr=False) 23 | 24 | def forward(self, x): 25 | y = self.avg_pool(x) 26 | y = self.conv(y.squeeze(-1).transpose([0, 2, 1])).transpose([0, 2, 1]).unsqueeze(-1) 27 | y = paddle.clip(y, min=0., max=1.) 28 | return x * y 29 | 30 | @manager.MODELS.add_component 31 | class U2CrackNet(nn.Layer): 32 | """ 33 | The original article refers to 34 | Yu, Gui, et al. "RUC-Net: A Residual-Unet-Based Convolutional Neural Network for Pixel-Level Pavement Crack Segmentation." 35 | Sensors 23.1 (2022): 53. 36 | """ 37 | 38 | def __init__(self, num_classes, in_channels=3, pretrained=None): 39 | super(U2CrackNet, self).__init__() 40 | 41 | self.stage1 = RSU7(in_channels, 16, 64) 42 | self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 43 | 44 | self.stage2 = RSU6(64, 16, 64) 45 | self.pool23 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 46 | 47 | self.stage3 = RSU5(64, 16, 64) 48 | self.pool34 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 49 | 50 | self.stage4 = RSU4(64, 16, 64) 51 | self.pool45 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 52 | 53 | self.stage5 = RSU4F(64, 16, 64) 54 | self.pool56 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 55 | 56 | # self.stage6 = RSU4F(64, 16, 64) 57 | self.stage6 = layers.ASPPModule( 58 | aspp_ratios=[1, 6, 12, 18], 59 | in_channels=64, 60 | out_channels=64, 61 | align_corners=True 62 | ) 63 | 64 | 65 | # decoder 66 | self.stage5d = RSU4F(128, 16, 64) 67 | self.stage4d = RSU4(128, 16, 64) 68 | self.stage3d = RSU5(128, 16, 64) 69 | self.stage2d = RSU6(128, 16, 64) 70 | self.stage1d = RSU7(128, 16, 64) 71 | 72 | self.side1 = nn.Conv2D(64, num_classes, 3, padding=1) 73 | self.side2 = nn.Conv2D(64, num_classes, 3, padding=1) 74 | self.side3 = nn.Conv2D(64, num_classes, 3, padding=1) 75 | self.side4 = nn.Conv2D(64, num_classes, 3, padding=1) 76 | self.side5 = nn.Conv2D(64, num_classes, 3, padding=1) 77 | self.side6 = nn.Conv2D(64, num_classes, 3, padding=1) 78 | 79 | self.efa1 = EfficientChannelAttention(in_channels=num_classes) 80 | self.efa2 = EfficientChannelAttention(in_channels=num_classes) 81 | self.efa3 = EfficientChannelAttention(in_channels=num_classes) 82 | self.efa4 = EfficientChannelAttention(in_channels=num_classes) 83 | self.efa5 = EfficientChannelAttention(in_channels=num_classes) 84 | self.efa6 = EfficientChannelAttention(in_channels=num_classes) 85 | 86 | 87 | self.outconv = nn.Conv2D(6 * num_classes, num_classes, 1) 88 | 89 | self.pretrained = pretrained 90 | self.init_weight() 91 | 92 | def forward(self, x): 93 | 94 | hx = x 95 | 96 | #stage 1 97 | hx1 = self.stage1(hx) 98 | hx = self.pool12(hx1) 99 | 100 | #stage 2 101 | hx2 = self.stage2(hx) 102 | hx = self.pool23(hx2) 103 | 104 | #stage 3 105 | hx3 = self.stage3(hx) 106 | hx = self.pool34(hx3) 107 | 108 | #stage 4 109 | hx4 = self.stage4(hx) 110 | hx = self.pool45(hx4) 111 | 112 | #stage 5 113 | hx5 = self.stage5(hx) 114 | # print(f'stage5.hx5.shape: {hx.shape}') 115 | hx = self.pool56(hx5) 116 | # print(f'stage5.hx.shape: {hx.shape}') 117 | #stage 6 118 | 119 | hx6 = self.stage6(hx) 120 | hx6up = _upsample_like(hx6, hx5) 121 | 122 | #decoder 123 | hx5d = self.stage5d(paddle.concat((hx6up, hx5), 1)) 124 | hx5dup = _upsample_like(hx5d, hx4) 125 | 126 | hx4d = self.stage4d(paddle.concat((hx5dup, hx4), 1)) 127 | hx4dup = _upsample_like(hx4d, hx3) 128 | 129 | hx3d = self.stage3d(paddle.concat((hx4dup, hx3), 1)) 130 | hx3dup = _upsample_like(hx3d, hx2) 131 | 132 | hx2d = self.stage2d(paddle.concat((hx3dup, hx2), 1)) 133 | hx2dup = _upsample_like(hx2d, hx1) 134 | 135 | hx1d = self.stage1d(paddle.concat((hx2dup, hx1), 1)) 136 | 137 | #side output 138 | d1 = self.side1(hx1d) 139 | 140 | d2 = self.side2(hx2d) 141 | d2 = _upsample_like(d2, d1) 142 | 143 | d3 = self.side3(hx3d) 144 | d3 = _upsample_like(d3, d1) 145 | 146 | d4 = self.side4(hx4d) 147 | d4 = _upsample_like(d4, d1) 148 | 149 | d5 = self.side5(hx5d) 150 | d5 = _upsample_like(d5, d1) 151 | 152 | d6 = self.side6(hx6) 153 | d6 = _upsample_like(d6, d1) 154 | 155 | d1 = self.efa1(d1) 156 | d2 = self.efa2(d2) 157 | d3 = self.efa3(d3) 158 | d4 = self.efa4(d4) 159 | d5 = self.efa5(d5) 160 | d6 = self.efa6(d6) 161 | 162 | d0 = self.outconv(paddle.concat((d1, d2, d3, d4, d5, d6), 1)) 163 | 164 | # return [d0, d1, d2, d3, d4, d5, d6] 165 | return [d0] 166 | 167 | def init_weight(self): 168 | if self.pretrained is not None: 169 | utils.load_entire_model(self, self.pretrained) 170 | 171 | 172 | class REBNCONV(nn.Layer): 173 | def __init__(self, in_ch=3, out_ch=3, dirate=1): 174 | super(REBNCONV, self).__init__() 175 | 176 | self.conv_s1 = nn.Conv2D( 177 | in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate) 178 | self.bn_s1 = nn.BatchNorm2D(out_ch) 179 | self.relu_s1 = nn.ReLU() 180 | 181 | def forward(self, x): 182 | 183 | hx = x 184 | xout = self.relu_s1(self.bn_s1(self.conv_s1(hx))) 185 | 186 | return xout 187 | 188 | 189 | ## upsample tensor 'src' to have the same spatial size with tensor 'tar' 190 | def _upsample_like(src, tar): 191 | 192 | src = F.upsample(src, size=paddle.shape(tar)[2:], mode='bilinear') 193 | 194 | return src 195 | 196 | 197 | ### RSU-7 ### 198 | class RSU7(nn.Layer): #UNet07DRES(nn.Layer): 199 | def __init__(self, in_ch=3, mid_ch=12, out_ch=3): 200 | super(RSU7, self).__init__() 201 | 202 | self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) 203 | 204 | self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) 205 | self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 206 | 207 | self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) 208 | self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 209 | 210 | self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) 211 | self.pool3 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 212 | 213 | self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) 214 | self.pool4 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 215 | 216 | self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1) 217 | self.pool5 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 218 | 219 | self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1) 220 | 221 | self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2) 222 | 223 | self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 224 | self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 225 | self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 226 | self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 227 | self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 228 | self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) 229 | 230 | def forward(self, x): 231 | 232 | hx = x 233 | hxin = self.rebnconvin(hx) 234 | 235 | hx1 = self.rebnconv1(hxin) 236 | hx = self.pool1(hx1) 237 | 238 | hx2 = self.rebnconv2(hx) 239 | hx = self.pool2(hx2) 240 | 241 | hx3 = self.rebnconv3(hx) 242 | hx = self.pool3(hx3) 243 | 244 | hx4 = self.rebnconv4(hx) 245 | hx = self.pool4(hx4) 246 | 247 | hx5 = self.rebnconv5(hx) 248 | hx = self.pool5(hx5) 249 | 250 | hx6 = self.rebnconv6(hx) 251 | 252 | hx7 = self.rebnconv7(hx6) 253 | 254 | hx6d = self.rebnconv6d(paddle.concat((hx7, hx6), 1)) 255 | hx6dup = _upsample_like(hx6d, hx5) 256 | 257 | hx5d = self.rebnconv5d(paddle.concat((hx6dup, hx5), 1)) 258 | hx5dup = _upsample_like(hx5d, hx4) 259 | 260 | hx4d = self.rebnconv4d(paddle.concat((hx5dup, hx4), 1)) 261 | hx4dup = _upsample_like(hx4d, hx3) 262 | 263 | hx3d = self.rebnconv3d(paddle.concat((hx4dup, hx3), 1)) 264 | hx3dup = _upsample_like(hx3d, hx2) 265 | 266 | hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1)) 267 | hx2dup = _upsample_like(hx2d, hx1) 268 | 269 | hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1)) 270 | 271 | return hx1d + hxin 272 | 273 | 274 | ### RSU-6 ### 275 | class RSU6(nn.Layer): #UNet06DRES(nn.Layer): 276 | def __init__(self, in_ch=3, mid_ch=12, out_ch=3): 277 | super(RSU6, self).__init__() 278 | 279 | self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) 280 | 281 | self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) 282 | self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 283 | 284 | self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) 285 | self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 286 | 287 | self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) 288 | self.pool3 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 289 | 290 | self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) 291 | self.pool4 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 292 | 293 | self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1) 294 | 295 | self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2) 296 | 297 | self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 298 | self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 299 | self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 300 | self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 301 | self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) 302 | 303 | def forward(self, x): 304 | 305 | hx = x 306 | 307 | hxin = self.rebnconvin(hx) 308 | 309 | hx1 = self.rebnconv1(hxin) 310 | hx = self.pool1(hx1) 311 | 312 | hx2 = self.rebnconv2(hx) 313 | hx = self.pool2(hx2) 314 | 315 | hx3 = self.rebnconv3(hx) 316 | hx = self.pool3(hx3) 317 | 318 | hx4 = self.rebnconv4(hx) 319 | hx = self.pool4(hx4) 320 | 321 | hx5 = self.rebnconv5(hx) 322 | 323 | hx6 = self.rebnconv6(hx5) 324 | 325 | hx5d = self.rebnconv5d(paddle.concat((hx6, hx5), 1)) 326 | hx5dup = _upsample_like(hx5d, hx4) 327 | 328 | hx4d = self.rebnconv4d(paddle.concat((hx5dup, hx4), 1)) 329 | hx4dup = _upsample_like(hx4d, hx3) 330 | 331 | hx3d = self.rebnconv3d(paddle.concat((hx4dup, hx3), 1)) 332 | hx3dup = _upsample_like(hx3d, hx2) 333 | 334 | hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1)) 335 | hx2dup = _upsample_like(hx2d, hx1) 336 | 337 | hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1)) 338 | 339 | return hx1d + hxin 340 | 341 | 342 | ### RSU-5 ### 343 | class RSU5(nn.Layer): #UNet05DRES(nn.Layer): 344 | def __init__(self, in_ch=3, mid_ch=12, out_ch=3): 345 | super(RSU5, self).__init__() 346 | 347 | self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) 348 | 349 | self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) 350 | self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 351 | 352 | self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) 353 | self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 354 | 355 | self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) 356 | self.pool3 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 357 | 358 | self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) 359 | 360 | self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2) 361 | 362 | self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 363 | self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 364 | self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 365 | self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) 366 | 367 | def forward(self, x): 368 | 369 | hx = x 370 | 371 | hxin = self.rebnconvin(hx) 372 | 373 | hx1 = self.rebnconv1(hxin) 374 | hx = self.pool1(hx1) 375 | 376 | hx2 = self.rebnconv2(hx) 377 | hx = self.pool2(hx2) 378 | 379 | hx3 = self.rebnconv3(hx) 380 | hx = self.pool3(hx3) 381 | 382 | hx4 = self.rebnconv4(hx) 383 | 384 | hx5 = self.rebnconv5(hx4) 385 | 386 | hx4d = self.rebnconv4d(paddle.concat((hx5, hx4), 1)) 387 | hx4dup = _upsample_like(hx4d, hx3) 388 | 389 | hx3d = self.rebnconv3d(paddle.concat((hx4dup, hx3), 1)) 390 | hx3dup = _upsample_like(hx3d, hx2) 391 | 392 | hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1)) 393 | hx2dup = _upsample_like(hx2d, hx1) 394 | 395 | hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1)) 396 | 397 | return hx1d + hxin 398 | 399 | 400 | ### RSU-4 ### 401 | class RSU4(nn.Layer): #UNet04DRES(nn.Layer): 402 | def __init__(self, in_ch=3, mid_ch=12, out_ch=3): 403 | super(RSU4, self).__init__() 404 | 405 | self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) 406 | 407 | self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) 408 | self.pool1 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 409 | 410 | self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) 411 | self.pool2 = nn.MaxPool2D(2, stride=2, ceil_mode=True) 412 | 413 | self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) 414 | 415 | self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2) 416 | 417 | self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 418 | self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) 419 | self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) 420 | 421 | def forward(self, x): 422 | 423 | hx = x 424 | 425 | hxin = self.rebnconvin(hx) 426 | 427 | hx1 = self.rebnconv1(hxin) 428 | hx = self.pool1(hx1) 429 | 430 | hx2 = self.rebnconv2(hx) 431 | hx = self.pool2(hx2) 432 | 433 | hx3 = self.rebnconv3(hx) 434 | 435 | hx4 = self.rebnconv4(hx3) 436 | 437 | hx3d = self.rebnconv3d(paddle.concat((hx4, hx3), 1)) 438 | hx3dup = _upsample_like(hx3d, hx2) 439 | 440 | hx2d = self.rebnconv2d(paddle.concat((hx3dup, hx2), 1)) 441 | hx2dup = _upsample_like(hx2d, hx1) 442 | 443 | hx1d = self.rebnconv1d(paddle.concat((hx2dup, hx1), 1)) 444 | 445 | return hx1d + hxin 446 | 447 | 448 | ### RSU-4F ### 449 | class RSU4F(nn.Layer): #UNet04FRES(nn.Layer): 450 | def __init__(self, in_ch=3, mid_ch=12, out_ch=3): 451 | super(RSU4F, self).__init__() 452 | 453 | self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) 454 | 455 | self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) 456 | self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2) 457 | self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4) 458 | 459 | self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8) 460 | 461 | self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4) 462 | self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2) 463 | self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) 464 | 465 | def forward(self, x): 466 | 467 | hx = x 468 | 469 | hxin = self.rebnconvin(hx) 470 | 471 | hx1 = self.rebnconv1(hxin) 472 | hx2 = self.rebnconv2(hx1) 473 | hx3 = self.rebnconv3(hx2) 474 | 475 | hx4 = self.rebnconv4(hx3) 476 | 477 | hx3d = self.rebnconv3d(paddle.concat((hx4, hx3), 1)) 478 | hx2d = self.rebnconv2d(paddle.concat((hx3d, hx2), 1)) 479 | hx1d = self.rebnconv1d(paddle.concat((hx2d, hx1), 1)) 480 | 481 | return hx1d + hxin 482 | -------------------------------------------------------------------------------- /models/unet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | import paddle.nn.functional as F 6 | 7 | from paddleseg import utils 8 | from paddleseg.cvlibs import manager 9 | from paddleseg.models import layers 10 | 11 | 12 | # @manager.MODELS.add_component 13 | class UNet(nn.Layer): 14 | """ 15 | The UNet implementation based on PaddlePaddle. 16 | 17 | The original article refers to 18 | Olaf Ronneberger, et, al. "U-Net: Convolutional Networks for Biomedical Image Segmentation" 19 | (https://arxiv.org/abs/1505.04597). 20 | 21 | Args: 22 | num_classes (int): The unique number of target classes. 23 | align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature 24 | is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. 25 | use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling. 26 | If False, use resize_bilinear. Default: False. 27 | in_channels (int, optional): The channels of input image. Default: 3. 28 | pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None. 29 | """ 30 | 31 | def __init__(self, 32 | num_classes, 33 | align_corners=False, 34 | use_deconv=False, 35 | in_channels=3, 36 | pretrained=None): 37 | super().__init__() 38 | 39 | self.encode = Encoder(in_channels) 40 | self.decode = Decoder(align_corners, use_deconv=use_deconv) 41 | self.cls = self.conv = nn.Conv2D( 42 | in_channels=64, 43 | out_channels=num_classes, 44 | kernel_size=3, 45 | stride=1, 46 | padding=1) 47 | 48 | self.pretrained = pretrained 49 | self.init_weight() 50 | 51 | def forward(self, x): 52 | logit_list = [] 53 | x, short_cuts = self.encode(x) 54 | x = self.decode(x, short_cuts) 55 | logit = self.cls(x) 56 | logit_list.append(logit) 57 | return logit_list 58 | 59 | def init_weight(self): 60 | if self.pretrained is not None: 61 | utils.load_entire_model(self, self.pretrained) 62 | 63 | 64 | class Encoder(nn.Layer): 65 | def __init__(self, in_channels=3): 66 | super().__init__() 67 | 68 | self.double_conv = nn.Sequential( 69 | layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3)) 70 | down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]] 71 | self.down_sample_list = nn.LayerList([ 72 | self.down_sampling(channel[0], channel[1]) 73 | for channel in down_channels 74 | ]) 75 | 76 | def down_sampling(self, in_channels, out_channels): 77 | modules = [] 78 | modules.append(nn.MaxPool2D(kernel_size=2, stride=2)) 79 | modules.append(layers.ConvBNReLU(in_channels, out_channels, 3)) 80 | modules.append(layers.ConvBNReLU(out_channels, out_channels, 3)) 81 | return nn.Sequential(*modules) 82 | 83 | def forward(self, x): 84 | short_cuts = [] 85 | x = self.double_conv(x) 86 | for down_sample in self.down_sample_list: 87 | short_cuts.append(x) 88 | x = down_sample(x) 89 | return x, short_cuts 90 | 91 | 92 | class Decoder(nn.Layer): 93 | def __init__(self, align_corners, use_deconv=False): 94 | super().__init__() 95 | 96 | up_channels = [[512, 256], [256, 128], [128, 64], [64, 64]] 97 | self.up_sample_list = nn.LayerList([ 98 | UpSampling(channel[0], channel[1], align_corners, use_deconv) 99 | for channel in up_channels 100 | ]) 101 | 102 | def forward(self, x, short_cuts): 103 | for i in range(len(short_cuts)): 104 | x = self.up_sample_list[i](x, short_cuts[-(i + 1)]) 105 | return x 106 | 107 | 108 | class UpSampling(nn.Layer): 109 | def __init__(self, 110 | in_channels, 111 | out_channels, 112 | align_corners, 113 | use_deconv=False): 114 | super().__init__() 115 | 116 | self.align_corners = align_corners 117 | 118 | self.use_deconv = use_deconv 119 | if self.use_deconv: 120 | self.deconv = nn.Conv2DTranspose( 121 | in_channels, 122 | out_channels // 2, 123 | kernel_size=2, 124 | stride=2, 125 | padding=0) 126 | in_channels = in_channels + out_channels // 2 127 | else: 128 | in_channels *= 2 129 | 130 | self.double_conv = nn.Sequential( 131 | layers.ConvBNReLU(in_channels, out_channels, 3), 132 | layers.ConvBNReLU(out_channels, out_channels, 3)) 133 | 134 | def forward(self, x, short_cut): 135 | if self.use_deconv: 136 | x = self.deconv(x) 137 | else: 138 | x = F.interpolate( 139 | x, 140 | paddle.shape(short_cut)[2:], 141 | mode='bilinear', 142 | align_corners=self.align_corners) 143 | x = paddle.concat([x, short_cut], axis=1) 144 | x = self.double_conv(x) 145 | return x 146 | --------------------------------------------------------------------------------