├── .gitignore ├── LICENSE ├── data ├── camvid │ ├── camvid_test_list.txt │ ├── camvid_train_list.txt │ ├── camvid_trainval_list.txt │ └── camvid_val_list.txt ├── cityscapes │ ├── test.txt │ ├── train++.txt │ ├── train+.txt │ ├── train.txt │ ├── trainval.txt │ └── val.txt └── fig │ ├── frankfurt_000000_002196_gtFine_color.png │ ├── frankfurt_000000_002196_leftImg8bit.png │ └── frankfurt_000000_002196_leftImg8bit_pred.png ├── exp ├── train_dfanet.sh ├── train_dfsegv1.sh ├── train_dfsegv2.sh ├── train_icnet.sh └── train_pspnet.sh ├── libs ├── __init__.py ├── core │ ├── __init__.py │ ├── loss.py │ └── operators.py ├── datasets │ ├── __init__.py │ ├── camvid.py │ ├── cityscapes.py │ └── mapillary.py ├── models │ ├── BiSegNet.py │ ├── DFANet.py │ ├── DFSegNet.py │ ├── ESPNet.py │ ├── FastSCNN.py │ ├── ICNet.py │ ├── MSFNet.py │ ├── PSPNet.py │ ├── SwiftNet.py │ ├── __init__.py │ └── backbone │ │ ├── __init__.py │ │ ├── dfnet.py │ │ ├── resnet.py │ │ └── xception.py └── utils │ ├── __init__.py │ ├── image_utils.py │ ├── logger.py │ └── tools.py ├── prediction_test_different_size.py ├── readme.md ├── requirement.txt ├── train_distribute.py └── val.py /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | instant_test_output 4 | inference_test_output 5 | 6 | 7 | *.jpg 8 | 9 | # compilation and distribution 10 | __pycache__ 11 | _ext 12 | *.pyc 13 | *.so 14 | detectron2.egg-info/ 15 | build/ 16 | dist/ 17 | 18 | # pytorch/python/numpy formats 19 | *.pth 20 | *.pkl 21 | *.npy 22 | 23 | # ipython/jupyter notebooks 24 | *.ipynb 25 | **/.ipynb_checkpoints/ 26 | 27 | # Editor temporaries 28 | *.swn 29 | *.swo 30 | *.swp 31 | *~ 32 | 33 | # Pycharm editor settings 34 | .idea 35 | 36 | # project dirs 37 | /datasets 38 | /models 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /data/camvid/camvid_test_list.txt: -------------------------------------------------------------------------------- 1 | test/0001TP_008550.png testannot/0001TP_008550.png 2 | test/0001TP_008580.png testannot/0001TP_008580.png 3 | test/0001TP_008610.png testannot/0001TP_008610.png 4 | test/0001TP_008640.png testannot/0001TP_008640.png 5 | test/0001TP_008670.png testannot/0001TP_008670.png 6 | test/0001TP_008700.png testannot/0001TP_008700.png 7 | test/0001TP_008730.png testannot/0001TP_008730.png 8 | test/0001TP_008760.png testannot/0001TP_008760.png 9 | test/0001TP_008790.png testannot/0001TP_008790.png 10 | test/0001TP_008820.png testannot/0001TP_008820.png 11 | test/0001TP_008850.png testannot/0001TP_008850.png 12 | test/0001TP_008880.png testannot/0001TP_008880.png 13 | test/0001TP_008910.png testannot/0001TP_008910.png 14 | test/0001TP_008940.png testannot/0001TP_008940.png 15 | test/0001TP_008970.png testannot/0001TP_008970.png 16 | test/0001TP_009000.png testannot/0001TP_009000.png 17 | test/0001TP_009030.png testannot/0001TP_009030.png 18 | test/0001TP_009060.png testannot/0001TP_009060.png 19 | test/0001TP_009090.png testannot/0001TP_009090.png 20 | test/0001TP_009120.png testannot/0001TP_009120.png 21 | test/0001TP_009150.png testannot/0001TP_009150.png 22 | test/0001TP_009180.png testannot/0001TP_009180.png 23 | test/0001TP_009210.png testannot/0001TP_009210.png 24 | test/0001TP_009240.png testannot/0001TP_009240.png 25 | test/0001TP_009270.png testannot/0001TP_009270.png 26 | test/0001TP_009300.png testannot/0001TP_009300.png 27 | test/0001TP_009330.png testannot/0001TP_009330.png 28 | test/0001TP_009360.png testannot/0001TP_009360.png 29 | test/0001TP_009390.png testannot/0001TP_009390.png 30 | test/0001TP_009420.png testannot/0001TP_009420.png 31 | test/0001TP_009450.png testannot/0001TP_009450.png 32 | test/0001TP_009480.png testannot/0001TP_009480.png 33 | test/0001TP_009510.png testannot/0001TP_009510.png 34 | test/0001TP_009540.png testannot/0001TP_009540.png 35 | test/0001TP_009570.png testannot/0001TP_009570.png 36 | test/0001TP_009600.png testannot/0001TP_009600.png 37 | test/0001TP_009630.png testannot/0001TP_009630.png 38 | test/0001TP_009660.png testannot/0001TP_009660.png 39 | test/0001TP_009690.png testannot/0001TP_009690.png 40 | test/0001TP_009720.png testannot/0001TP_009720.png 41 | test/0001TP_009750.png testannot/0001TP_009750.png 42 | test/0001TP_009780.png testannot/0001TP_009780.png 43 | test/0001TP_009810.png testannot/0001TP_009810.png 44 | test/0001TP_009840.png testannot/0001TP_009840.png 45 | test/0001TP_009870.png testannot/0001TP_009870.png 46 | test/0001TP_009900.png testannot/0001TP_009900.png 47 | test/0001TP_009930.png testannot/0001TP_009930.png 48 | test/0001TP_009960.png testannot/0001TP_009960.png 49 | test/0001TP_009990.png testannot/0001TP_009990.png 50 | test/0001TP_010020.png testannot/0001TP_010020.png 51 | test/0001TP_010050.png testannot/0001TP_010050.png 52 | test/0001TP_010080.png testannot/0001TP_010080.png 53 | test/0001TP_010110.png testannot/0001TP_010110.png 54 | test/0001TP_010140.png testannot/0001TP_010140.png 55 | test/0001TP_010170.png testannot/0001TP_010170.png 56 | test/0001TP_010200.png testannot/0001TP_010200.png 57 | test/0001TP_010230.png testannot/0001TP_010230.png 58 | test/0001TP_010260.png testannot/0001TP_010260.png 59 | test/0001TP_010290.png testannot/0001TP_010290.png 60 | test/0001TP_010320.png testannot/0001TP_010320.png 61 | test/0001TP_010350.png testannot/0001TP_010350.png 62 | test/0001TP_010380.png testannot/0001TP_010380.png 63 | test/Seq05VD_f00000.png testannot/Seq05VD_f00000.png 64 | test/Seq05VD_f00030.png testannot/Seq05VD_f00030.png 65 | test/Seq05VD_f00060.png testannot/Seq05VD_f00060.png 66 | test/Seq05VD_f00090.png testannot/Seq05VD_f00090.png 67 | test/Seq05VD_f00120.png testannot/Seq05VD_f00120.png 68 | test/Seq05VD_f00150.png testannot/Seq05VD_f00150.png 69 | test/Seq05VD_f00180.png testannot/Seq05VD_f00180.png 70 | test/Seq05VD_f00210.png testannot/Seq05VD_f00210.png 71 | test/Seq05VD_f00240.png testannot/Seq05VD_f00240.png 72 | test/Seq05VD_f00270.png testannot/Seq05VD_f00270.png 73 | test/Seq05VD_f00300.png testannot/Seq05VD_f00300.png 74 | test/Seq05VD_f00330.png testannot/Seq05VD_f00330.png 75 | test/Seq05VD_f00360.png testannot/Seq05VD_f00360.png 76 | test/Seq05VD_f00390.png testannot/Seq05VD_f00390.png 77 | test/Seq05VD_f00420.png testannot/Seq05VD_f00420.png 78 | test/Seq05VD_f00450.png testannot/Seq05VD_f00450.png 79 | test/Seq05VD_f00480.png testannot/Seq05VD_f00480.png 80 | test/Seq05VD_f00510.png testannot/Seq05VD_f00510.png 81 | test/Seq05VD_f00540.png testannot/Seq05VD_f00540.png 82 | test/Seq05VD_f00570.png testannot/Seq05VD_f00570.png 83 | test/Seq05VD_f00600.png testannot/Seq05VD_f00600.png 84 | test/Seq05VD_f00630.png testannot/Seq05VD_f00630.png 85 | test/Seq05VD_f00660.png testannot/Seq05VD_f00660.png 86 | test/Seq05VD_f00690.png testannot/Seq05VD_f00690.png 87 | test/Seq05VD_f00720.png testannot/Seq05VD_f00720.png 88 | test/Seq05VD_f00750.png testannot/Seq05VD_f00750.png 89 | test/Seq05VD_f00780.png testannot/Seq05VD_f00780.png 90 | test/Seq05VD_f00810.png testannot/Seq05VD_f00810.png 91 | test/Seq05VD_f00840.png testannot/Seq05VD_f00840.png 92 | test/Seq05VD_f00870.png testannot/Seq05VD_f00870.png 93 | test/Seq05VD_f00900.png testannot/Seq05VD_f00900.png 94 | test/Seq05VD_f00930.png testannot/Seq05VD_f00930.png 95 | test/Seq05VD_f00960.png testannot/Seq05VD_f00960.png 96 | test/Seq05VD_f00990.png testannot/Seq05VD_f00990.png 97 | test/Seq05VD_f01020.png testannot/Seq05VD_f01020.png 98 | test/Seq05VD_f01050.png testannot/Seq05VD_f01050.png 99 | test/Seq05VD_f01080.png testannot/Seq05VD_f01080.png 100 | test/Seq05VD_f01110.png testannot/Seq05VD_f01110.png 101 | test/Seq05VD_f01140.png testannot/Seq05VD_f01140.png 102 | test/Seq05VD_f01170.png testannot/Seq05VD_f01170.png 103 | test/Seq05VD_f01200.png testannot/Seq05VD_f01200.png 104 | test/Seq05VD_f01230.png testannot/Seq05VD_f01230.png 105 | test/Seq05VD_f01260.png testannot/Seq05VD_f01260.png 106 | test/Seq05VD_f01290.png testannot/Seq05VD_f01290.png 107 | test/Seq05VD_f01320.png testannot/Seq05VD_f01320.png 108 | test/Seq05VD_f01350.png testannot/Seq05VD_f01350.png 109 | test/Seq05VD_f01380.png testannot/Seq05VD_f01380.png 110 | test/Seq05VD_f01410.png testannot/Seq05VD_f01410.png 111 | test/Seq05VD_f01440.png testannot/Seq05VD_f01440.png 112 | test/Seq05VD_f01470.png testannot/Seq05VD_f01470.png 113 | test/Seq05VD_f01500.png testannot/Seq05VD_f01500.png 114 | test/Seq05VD_f01530.png testannot/Seq05VD_f01530.png 115 | test/Seq05VD_f01560.png testannot/Seq05VD_f01560.png 116 | test/Seq05VD_f01590.png testannot/Seq05VD_f01590.png 117 | test/Seq05VD_f01620.png testannot/Seq05VD_f01620.png 118 | test/Seq05VD_f01650.png testannot/Seq05VD_f01650.png 119 | test/Seq05VD_f01680.png testannot/Seq05VD_f01680.png 120 | test/Seq05VD_f01710.png testannot/Seq05VD_f01710.png 121 | test/Seq05VD_f01740.png testannot/Seq05VD_f01740.png 122 | test/Seq05VD_f01770.png testannot/Seq05VD_f01770.png 123 | test/Seq05VD_f01800.png testannot/Seq05VD_f01800.png 124 | test/Seq05VD_f01830.png testannot/Seq05VD_f01830.png 125 | test/Seq05VD_f01860.png testannot/Seq05VD_f01860.png 126 | test/Seq05VD_f01890.png testannot/Seq05VD_f01890.png 127 | test/Seq05VD_f01920.png testannot/Seq05VD_f01920.png 128 | test/Seq05VD_f01950.png testannot/Seq05VD_f01950.png 129 | test/Seq05VD_f01980.png testannot/Seq05VD_f01980.png 130 | test/Seq05VD_f02010.png testannot/Seq05VD_f02010.png 131 | test/Seq05VD_f02040.png testannot/Seq05VD_f02040.png 132 | test/Seq05VD_f02070.png testannot/Seq05VD_f02070.png 133 | test/Seq05VD_f02100.png testannot/Seq05VD_f02100.png 134 | test/Seq05VD_f02130.png testannot/Seq05VD_f02130.png 135 | test/Seq05VD_f02160.png testannot/Seq05VD_f02160.png 136 | test/Seq05VD_f02190.png testannot/Seq05VD_f02190.png 137 | test/Seq05VD_f02220.png testannot/Seq05VD_f02220.png 138 | test/Seq05VD_f02250.png testannot/Seq05VD_f02250.png 139 | test/Seq05VD_f02280.png testannot/Seq05VD_f02280.png 140 | test/Seq05VD_f02310.png testannot/Seq05VD_f02310.png 141 | test/Seq05VD_f02340.png testannot/Seq05VD_f02340.png 142 | test/Seq05VD_f02370.png testannot/Seq05VD_f02370.png 143 | test/Seq05VD_f02400.png testannot/Seq05VD_f02400.png 144 | test/Seq05VD_f02430.png testannot/Seq05VD_f02430.png 145 | test/Seq05VD_f02460.png testannot/Seq05VD_f02460.png 146 | test/Seq05VD_f02490.png testannot/Seq05VD_f02490.png 147 | test/Seq05VD_f02520.png testannot/Seq05VD_f02520.png 148 | test/Seq05VD_f02550.png testannot/Seq05VD_f02550.png 149 | test/Seq05VD_f02580.png testannot/Seq05VD_f02580.png 150 | test/Seq05VD_f02610.png testannot/Seq05VD_f02610.png 151 | test/Seq05VD_f02640.png testannot/Seq05VD_f02640.png 152 | test/Seq05VD_f02670.png testannot/Seq05VD_f02670.png 153 | test/Seq05VD_f02700.png testannot/Seq05VD_f02700.png 154 | test/Seq05VD_f02730.png testannot/Seq05VD_f02730.png 155 | test/Seq05VD_f02760.png testannot/Seq05VD_f02760.png 156 | test/Seq05VD_f02790.png testannot/Seq05VD_f02790.png 157 | test/Seq05VD_f02820.png testannot/Seq05VD_f02820.png 158 | test/Seq05VD_f02850.png testannot/Seq05VD_f02850.png 159 | test/Seq05VD_f02880.png testannot/Seq05VD_f02880.png 160 | test/Seq05VD_f02910.png testannot/Seq05VD_f02910.png 161 | test/Seq05VD_f02940.png testannot/Seq05VD_f02940.png 162 | test/Seq05VD_f02970.png testannot/Seq05VD_f02970.png 163 | test/Seq05VD_f03000.png testannot/Seq05VD_f03000.png 164 | test/Seq05VD_f03030.png testannot/Seq05VD_f03030.png 165 | test/Seq05VD_f03060.png testannot/Seq05VD_f03060.png 166 | test/Seq05VD_f03090.png testannot/Seq05VD_f03090.png 167 | test/Seq05VD_f03120.png testannot/Seq05VD_f03120.png 168 | test/Seq05VD_f03150.png testannot/Seq05VD_f03150.png 169 | test/Seq05VD_f03180.png testannot/Seq05VD_f03180.png 170 | test/Seq05VD_f03210.png testannot/Seq05VD_f03210.png 171 | test/Seq05VD_f03240.png testannot/Seq05VD_f03240.png 172 | test/Seq05VD_f03270.png testannot/Seq05VD_f03270.png 173 | test/Seq05VD_f03300.png testannot/Seq05VD_f03300.png 174 | test/Seq05VD_f03330.png testannot/Seq05VD_f03330.png 175 | test/Seq05VD_f03360.png testannot/Seq05VD_f03360.png 176 | test/Seq05VD_f03390.png testannot/Seq05VD_f03390.png 177 | test/Seq05VD_f03420.png testannot/Seq05VD_f03420.png 178 | test/Seq05VD_f03450.png testannot/Seq05VD_f03450.png 179 | test/Seq05VD_f03480.png testannot/Seq05VD_f03480.png 180 | test/Seq05VD_f03510.png testannot/Seq05VD_f03510.png 181 | test/Seq05VD_f03540.png testannot/Seq05VD_f03540.png 182 | test/Seq05VD_f03570.png testannot/Seq05VD_f03570.png 183 | test/Seq05VD_f03600.png testannot/Seq05VD_f03600.png 184 | test/Seq05VD_f03630.png testannot/Seq05VD_f03630.png 185 | test/Seq05VD_f03660.png testannot/Seq05VD_f03660.png 186 | test/Seq05VD_f03690.png testannot/Seq05VD_f03690.png 187 | test/Seq05VD_f03720.png testannot/Seq05VD_f03720.png 188 | test/Seq05VD_f03750.png testannot/Seq05VD_f03750.png 189 | test/Seq05VD_f03780.png testannot/Seq05VD_f03780.png 190 | test/Seq05VD_f03810.png testannot/Seq05VD_f03810.png 191 | test/Seq05VD_f03840.png testannot/Seq05VD_f03840.png 192 | test/Seq05VD_f03870.png testannot/Seq05VD_f03870.png 193 | test/Seq05VD_f03900.png testannot/Seq05VD_f03900.png 194 | test/Seq05VD_f03930.png testannot/Seq05VD_f03930.png 195 | test/Seq05VD_f03960.png testannot/Seq05VD_f03960.png 196 | test/Seq05VD_f03990.png testannot/Seq05VD_f03990.png 197 | test/Seq05VD_f04020.png testannot/Seq05VD_f04020.png 198 | test/Seq05VD_f04050.png testannot/Seq05VD_f04050.png 199 | test/Seq05VD_f04080.png testannot/Seq05VD_f04080.png 200 | test/Seq05VD_f04110.png testannot/Seq05VD_f04110.png 201 | test/Seq05VD_f04140.png testannot/Seq05VD_f04140.png 202 | test/Seq05VD_f04170.png testannot/Seq05VD_f04170.png 203 | test/Seq05VD_f04200.png testannot/Seq05VD_f04200.png 204 | test/Seq05VD_f04230.png testannot/Seq05VD_f04230.png 205 | test/Seq05VD_f04260.png testannot/Seq05VD_f04260.png 206 | test/Seq05VD_f04290.png testannot/Seq05VD_f04290.png 207 | test/Seq05VD_f04320.png testannot/Seq05VD_f04320.png 208 | test/Seq05VD_f04350.png testannot/Seq05VD_f04350.png 209 | test/Seq05VD_f04380.png testannot/Seq05VD_f04380.png 210 | test/Seq05VD_f04410.png testannot/Seq05VD_f04410.png 211 | test/Seq05VD_f04440.png testannot/Seq05VD_f04440.png 212 | test/Seq05VD_f04470.png testannot/Seq05VD_f04470.png 213 | test/Seq05VD_f04500.png testannot/Seq05VD_f04500.png 214 | test/Seq05VD_f04530.png testannot/Seq05VD_f04530.png 215 | test/Seq05VD_f04560.png testannot/Seq05VD_f04560.png 216 | test/Seq05VD_f04590.png testannot/Seq05VD_f04590.png 217 | test/Seq05VD_f04620.png testannot/Seq05VD_f04620.png 218 | test/Seq05VD_f04650.png testannot/Seq05VD_f04650.png 219 | test/Seq05VD_f04680.png testannot/Seq05VD_f04680.png 220 | test/Seq05VD_f04710.png testannot/Seq05VD_f04710.png 221 | test/Seq05VD_f04740.png testannot/Seq05VD_f04740.png 222 | test/Seq05VD_f04770.png testannot/Seq05VD_f04770.png 223 | test/Seq05VD_f04800.png testannot/Seq05VD_f04800.png 224 | test/Seq05VD_f04830.png testannot/Seq05VD_f04830.png 225 | test/Seq05VD_f04860.png testannot/Seq05VD_f04860.png 226 | test/Seq05VD_f04890.png testannot/Seq05VD_f04890.png 227 | test/Seq05VD_f04920.png testannot/Seq05VD_f04920.png 228 | test/Seq05VD_f04950.png testannot/Seq05VD_f04950.png 229 | test/Seq05VD_f04980.png testannot/Seq05VD_f04980.png 230 | test/Seq05VD_f05010.png testannot/Seq05VD_f05010.png 231 | test/Seq05VD_f05040.png testannot/Seq05VD_f05040.png 232 | test/Seq05VD_f05070.png testannot/Seq05VD_f05070.png 233 | test/Seq05VD_f05100.png testannot/Seq05VD_f05100.png 234 | -------------------------------------------------------------------------------- /data/camvid/camvid_val_list.txt: -------------------------------------------------------------------------------- 1 | val/0016E5_07959.png valannot/0016E5_07959.png 2 | val/0016E5_07961.png valannot/0016E5_07961.png 3 | val/0016E5_07963.png valannot/0016E5_07963.png 4 | val/0016E5_07965.png valannot/0016E5_07965.png 5 | val/0016E5_07967.png valannot/0016E5_07967.png 6 | val/0016E5_07969.png valannot/0016E5_07969.png 7 | val/0016E5_07971.png valannot/0016E5_07971.png 8 | val/0016E5_07973.png valannot/0016E5_07973.png 9 | val/0016E5_07975.png valannot/0016E5_07975.png 10 | val/0016E5_07977.png valannot/0016E5_07977.png 11 | val/0016E5_07979.png valannot/0016E5_07979.png 12 | val/0016E5_07981.png valannot/0016E5_07981.png 13 | val/0016E5_07983.png valannot/0016E5_07983.png 14 | val/0016E5_07985.png valannot/0016E5_07985.png 15 | val/0016E5_07987.png valannot/0016E5_07987.png 16 | val/0016E5_07989.png valannot/0016E5_07989.png 17 | val/0016E5_07991.png valannot/0016E5_07991.png 18 | val/0016E5_07993.png valannot/0016E5_07993.png 19 | val/0016E5_07995.png valannot/0016E5_07995.png 20 | val/0016E5_07997.png valannot/0016E5_07997.png 21 | val/0016E5_07999.png valannot/0016E5_07999.png 22 | val/0016E5_08001.png valannot/0016E5_08001.png 23 | val/0016E5_08003.png valannot/0016E5_08003.png 24 | val/0016E5_08005.png valannot/0016E5_08005.png 25 | val/0016E5_08007.png valannot/0016E5_08007.png 26 | val/0016E5_08009.png valannot/0016E5_08009.png 27 | val/0016E5_08011.png valannot/0016E5_08011.png 28 | val/0016E5_08013.png valannot/0016E5_08013.png 29 | val/0016E5_08015.png valannot/0016E5_08015.png 30 | val/0016E5_08017.png valannot/0016E5_08017.png 31 | val/0016E5_08019.png valannot/0016E5_08019.png 32 | val/0016E5_08021.png valannot/0016E5_08021.png 33 | val/0016E5_08023.png valannot/0016E5_08023.png 34 | val/0016E5_08025.png valannot/0016E5_08025.png 35 | val/0016E5_08027.png valannot/0016E5_08027.png 36 | val/0016E5_08029.png valannot/0016E5_08029.png 37 | val/0016E5_08031.png valannot/0016E5_08031.png 38 | val/0016E5_08033.png valannot/0016E5_08033.png 39 | val/0016E5_08035.png valannot/0016E5_08035.png 40 | val/0016E5_08037.png valannot/0016E5_08037.png 41 | val/0016E5_08039.png valannot/0016E5_08039.png 42 | val/0016E5_08041.png valannot/0016E5_08041.png 43 | val/0016E5_08043.png valannot/0016E5_08043.png 44 | val/0016E5_08045.png valannot/0016E5_08045.png 45 | val/0016E5_08047.png valannot/0016E5_08047.png 46 | val/0016E5_08049.png valannot/0016E5_08049.png 47 | val/0016E5_08051.png valannot/0016E5_08051.png 48 | val/0016E5_08053.png valannot/0016E5_08053.png 49 | val/0016E5_08055.png valannot/0016E5_08055.png 50 | val/0016E5_08057.png valannot/0016E5_08057.png 51 | val/0016E5_08059.png valannot/0016E5_08059.png 52 | val/0016E5_08061.png valannot/0016E5_08061.png 53 | val/0016E5_08063.png valannot/0016E5_08063.png 54 | val/0016E5_08065.png valannot/0016E5_08065.png 55 | val/0016E5_08067.png valannot/0016E5_08067.png 56 | val/0016E5_08069.png valannot/0016E5_08069.png 57 | val/0016E5_08071.png valannot/0016E5_08071.png 58 | val/0016E5_08073.png valannot/0016E5_08073.png 59 | val/0016E5_08075.png valannot/0016E5_08075.png 60 | val/0016E5_08077.png valannot/0016E5_08077.png 61 | val/0016E5_08079.png valannot/0016E5_08079.png 62 | val/0016E5_08081.png valannot/0016E5_08081.png 63 | val/0016E5_08083.png valannot/0016E5_08083.png 64 | val/0016E5_08085.png valannot/0016E5_08085.png 65 | val/0016E5_08087.png valannot/0016E5_08087.png 66 | val/0016E5_08089.png valannot/0016E5_08089.png 67 | val/0016E5_08091.png valannot/0016E5_08091.png 68 | val/0016E5_08093.png valannot/0016E5_08093.png 69 | val/0016E5_08095.png valannot/0016E5_08095.png 70 | val/0016E5_08097.png valannot/0016E5_08097.png 71 | val/0016E5_08099.png valannot/0016E5_08099.png 72 | val/0016E5_08101.png valannot/0016E5_08101.png 73 | val/0016E5_08103.png valannot/0016E5_08103.png 74 | val/0016E5_08105.png valannot/0016E5_08105.png 75 | val/0016E5_08107.png valannot/0016E5_08107.png 76 | val/0016E5_08109.png valannot/0016E5_08109.png 77 | val/0016E5_08111.png valannot/0016E5_08111.png 78 | val/0016E5_08113.png valannot/0016E5_08113.png 79 | val/0016E5_08115.png valannot/0016E5_08115.png 80 | val/0016E5_08117.png valannot/0016E5_08117.png 81 | val/0016E5_08119.png valannot/0016E5_08119.png 82 | val/0016E5_08121.png valannot/0016E5_08121.png 83 | val/0016E5_08123.png valannot/0016E5_08123.png 84 | val/0016E5_08125.png valannot/0016E5_08125.png 85 | val/0016E5_08127.png valannot/0016E5_08127.png 86 | val/0016E5_08129.png valannot/0016E5_08129.png 87 | val/0016E5_08131.png valannot/0016E5_08131.png 88 | val/0016E5_08133.png valannot/0016E5_08133.png 89 | val/0016E5_08135.png valannot/0016E5_08135.png 90 | val/0016E5_08137.png valannot/0016E5_08137.png 91 | val/0016E5_08139.png valannot/0016E5_08139.png 92 | val/0016E5_08141.png valannot/0016E5_08141.png 93 | val/0016E5_08143.png valannot/0016E5_08143.png 94 | val/0016E5_08145.png valannot/0016E5_08145.png 95 | val/0016E5_08147.png valannot/0016E5_08147.png 96 | val/0016E5_08149.png valannot/0016E5_08149.png 97 | val/0016E5_08151.png valannot/0016E5_08151.png 98 | val/0016E5_08153.png valannot/0016E5_08153.png 99 | val/0016E5_08155.png valannot/0016E5_08155.png 100 | val/0016E5_08157.png valannot/0016E5_08157.png 101 | val/0016E5_08159.png valannot/0016E5_08159.png 102 | -------------------------------------------------------------------------------- /data/fig/frankfurt_000000_002196_gtFine_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/data/fig/frankfurt_000000_002196_gtFine_color.png -------------------------------------------------------------------------------- /data/fig/frankfurt_000000_002196_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/data/fig/frankfurt_000000_002196_leftImg8bit.png -------------------------------------------------------------------------------- /data/fig/frankfurt_000000_002196_leftImg8bit_pred.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/data/fig/frankfurt_000000_002196_leftImg8bit_pred.png -------------------------------------------------------------------------------- /exp/train_dfanet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # train the net (suppose 4 gpus) 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \ 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \ 6 | --data_dir "/nas/dataset/CityScapes" \ 7 | --data_list "./data/cityscapes/train.txt" \ 8 | --arch dfanet \ 9 | --restore_from "/nas/dataset/pretrained/xceptiona_imagenet.pth" \ 10 | --input_size 1024 \ 11 | --batch_size_per_gpu 4 \ 12 | --learning_rate 0.01 \ 13 | --num_steps 60000 \ 14 | --save_dir "./save/dfanet" \ 15 | --rgb 1 \ 16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \ 17 | --log_file "./save/dfanet.log" 18 | 19 | 20 | # whole evaluation 21 | python val.py --data_set cityscapes \ 22 | --data_dir "/nas/dataset/CityScapes" \ 23 | --data_list "./data/cityscapes/val.txt" \ 24 | --arch dfanet \ 25 | --rgb 1 \ 26 | --restore_from "./save/dfnetv1seg/dfanet_final.pth" \ 27 | --whole True \ 28 | --output_dir "./dfanet_out" -------------------------------------------------------------------------------- /exp/train_dfsegv1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # train the net (suppose 4 gpus) 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \ 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \ 6 | --data_dir "/nas/dataset/CityScapes" \ 7 | --data_list "./data/cityscapes/train.txt" \ 8 | --arch dfnetv1seg \ 9 | --restore_from "/nas/dataset/pretrained/df1_imagenet.pth" \ 10 | --input_size 832 \ 11 | --batch_size_per_gpu 4 \ 12 | --learning_rate 0.01 \ 13 | --num_steps 50000 \ 14 | --save_dir "./save/dfnetv1seg" \ 15 | --rgb 1 \ 16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \ 17 | --log_file "./save/dfnetv1seg.log" 18 | 19 | 20 | # whole evaluation 21 | python val.py --data_set cityscapes \ 22 | --data_dir "/nas/dataset/CityScapes" \ 23 | --data_list "./data/cityscapes/val.txt" \ 24 | --arch dfnetv1seg \ 25 | --rgb 1 \ 26 | --restore_from "./save/dfnetv1seg/dfnetv1seg_final.pth" \ 27 | --whole True \ 28 | --output_dir "./ICNet_vis" -------------------------------------------------------------------------------- /exp/train_dfsegv2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # train the net (suppose 4 gpus) 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \ 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \ 6 | --data_dir "/nas/dataset/CityScapes" \ 7 | --data_list "./data/cityscapes/train.txt" \ 8 | --arch dfnetv2seg \ 9 | --restore_from "/nas/dataset/pretrained/df2_imagenet.pth" \ 10 | --input_size 832 \ 11 | --batch_size_per_gpu 4 \ 12 | --learning_rate 0.01 \ 13 | --num_steps 50000 \ 14 | --save_dir "./saveDFnetv2" \ 15 | --rgb 1 \ 16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \ 17 | --log_file "./log/saveDFnetv2.log" 18 | 19 | 20 | # whole evaluation 21 | python val.py --data_set cityscapes \ 22 | --data_dir "/nas/dataset/CityScapes" \ 23 | --data_list "./data/cityscapes/val.txt" \ 24 | --arch dfnetv2seg \ 25 | --rgb 1 \ 26 | --restore_from "./saveICNet/dfnetv2seg_final.pth" \ 27 | --whole True \ 28 | --output_dir "./dfnetv2seg" -------------------------------------------------------------------------------- /exp/train_icnet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # train the net (suppose 4 gpus) 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \ 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \ 6 | --data_dir "/nas/dataset/CityScapes" \ 7 | --data_list "./data/cityscapes/train.txt" \ 8 | --arch icnet \ 9 | --restore_from "/nas/dataset/pretrained/resnet50-deep.pth" \ 10 | --input_size 832 \ 11 | --batch_size_per_gpu 4 \ 12 | --learning_rate 0.01 \ 13 | --num_steps 50000 \ 14 | --save_dir "./saveICNet" \ 15 | --rgb 0 \ 16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \ 17 | --log_file "./log/ICNet.log" 18 | 19 | 20 | # whole evaluation 21 | python val.py --data_set cityscapes \ 22 | --data_dir "/nas/dataset/CityScapes" \ 23 | --data_list "./data/cityscapes/val.txt" \ 24 | --arch ICNet \ 25 | --rgb 0 \ 26 | --restore_from "./saveICNet/icnet_final.pth" \ 27 | --whole True \ 28 | --output_dir "./ICNet_vis" -------------------------------------------------------------------------------- /exp/train_pspnet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/__init__.py -------------------------------------------------------------------------------- /libs/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/core/__init__.py -------------------------------------------------------------------------------- /libs/core/loss.py: -------------------------------------------------------------------------------- 1 | # CE-loss 2 | import torch.nn as nn 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | 7 | class OhemCrossEntropy2dTensor(nn.Module): 8 | def __init__(self, ignore_label, reduction='elementwise_mean', thresh=0.6, min_kept=256, 9 | down_ratio=1, use_weight=False): 10 | super(OhemCrossEntropy2dTensor, self).__init__() 11 | self.ignore_label = ignore_label 12 | self.thresh = float(thresh) 13 | self.min_kept = int(min_kept) 14 | self.down_ratio = down_ratio 15 | if use_weight: 16 | weight = torch.FloatTensor( 17 | [0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 18 | 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 19 | 1.0865, 1.1529, 1.0507]) 20 | self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction, 21 | weight=weight, 22 | ignore_index=ignore_label) 23 | else: 24 | self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction, 25 | ignore_index=ignore_label) 26 | 27 | def forward(self, pred, target): 28 | b, c, h, w = pred.size() 29 | target = target.view(-1) 30 | valid_mask = target.ne(self.ignore_label) 31 | target = target * valid_mask.long() 32 | num_valid = valid_mask.sum() 33 | 34 | prob = F.softmax(pred, dim=1) 35 | prob = (prob.transpose(0, 1)).reshape(c, -1) 36 | 37 | if self.min_kept > num_valid: 38 | print('Labels: {}'.format(num_valid)) 39 | elif num_valid > 0: 40 | prob = prob.masked_fill_(1 - valid_mask, 1) 41 | mask_prob = prob[ 42 | target, torch.arange(len(target), dtype=torch.long)] 43 | threshold = self.thresh 44 | if self.min_kept > 0: 45 | _, index = mask_prob.sort() 46 | threshold_index = index[min(len(index), self.min_kept) - 1] 47 | if mask_prob[threshold_index] > self.thresh: 48 | threshold = mask_prob[threshold_index] 49 | kept_mask = mask_prob.le(threshold) 50 | target = target * kept_mask.long() 51 | valid_mask = valid_mask * kept_mask 52 | 53 | target = target.masked_fill_(1 - valid_mask, self.ignore_label) 54 | target = target.view(b, h, w) 55 | 56 | return self.criterion(pred, target) 57 | 58 | 59 | class CriterionDSN(nn.CrossEntropyLoss): 60 | def __init__(self, ignore_index=255,reduce=True): 61 | super(CriterionDSN, self).__init__() 62 | 63 | self.ignore_index = ignore_index 64 | self.reduce = reduce 65 | def forward(self, preds, target): 66 | scale_pred = preds[0] 67 | loss1 = super(CriterionDSN, self).forward(scale_pred, target) 68 | scale_pred = preds[1] 69 | loss2 = super(CriterionDSN, self).forward(scale_pred, target) 70 | 71 | return loss1 + loss2 * 0.4 72 | 73 | 74 | class CriterionOhemDSN(nn.Module): 75 | ''' 76 | DSN : We need to consider two supervision for the models. 77 | ''' 78 | def __init__(self, ignore_index=255, thresh=0.7, min_kept=100000, reduce=True): 79 | super(CriterionOhemDSN, self).__init__() 80 | self.ignore_index = ignore_index 81 | self.criterion1 = OhemCrossEntropy2dTensor(ignore_index, thresh=thresh, min_kept=min_kept) 82 | self.criterion2 = torch.nn.CrossEntropyLoss(ignore_index=ignore_index, reduce=reduce) 83 | if not reduce: 84 | print("disabled the reduce.") 85 | 86 | def forward(self, preds, target): 87 | h, w = target.size(1), target.size(2) 88 | 89 | scale_pred = F.upsample(input=preds[0], size=(h, w), mode='bilinear', align_corners=True) 90 | loss1 = self.criterion1(scale_pred, target) 91 | 92 | scale_pred = F.upsample(input=preds[1], size=(h, w), mode='bilinear', align_corners=True) 93 | loss2 = self.criterion2(scale_pred, target) 94 | 95 | return loss1 + loss2 * 0.4 96 | 97 | 98 | 99 | class CriterionICNet(nn.Module): 100 | """ 101 | ICNet loss 102 | """ 103 | 104 | def __init__(self, ignore_index=255, thresh=0.7, min_kept=100000, reduce=True): 105 | super(CriterionICNet, self).__init__() 106 | self.ignore_index = ignore_index 107 | self.criterion1 = OhemCrossEntropy2dTensor(ignore_index, thresh=thresh, min_kept=min_kept) 108 | 109 | if not reduce: 110 | print("disabled the reduce.") 111 | 112 | def forward(self, preds, target): 113 | h, w = target.size(1), target.size(2) 114 | 115 | scale_pred = F.upsample(input=preds[0], size=(h, w), mode='bilinear', align_corners=True) 116 | loss1 = self.criterion1(scale_pred, target) 117 | 118 | scale_pred = F.upsample(input=preds[1], size=(h, w), mode='bilinear', align_corners=True) 119 | loss2 = self.criterion1(scale_pred, target) 120 | 121 | scale_pred = F.upsample(input=preds[2], size=(h, w), mode='bilinear', align_corners=True) 122 | loss3 = self.criterion1(scale_pred, target) 123 | 124 | scale_pred = F.upsample(input=preds[3], size=(h, w), mode='bilinear', align_corners=True) 125 | loss4 = self.criterion1(scale_pred, target) 126 | 127 | return loss1 + 0.4 * loss2 + 0.4 * loss3 + 0.4 * loss4 128 | 129 | 130 | class CriterionDFANet(nn.Module): 131 | """ 132 | ICNet loss 133 | """ 134 | 135 | def __init__(self, ignore_index=255, thresh=0.7, min_kept=100000, reduce=True): 136 | super(CriterionDFANet, self).__init__() 137 | self.ignore_index = ignore_index 138 | self.criterion1 = OhemCrossEntropy2dTensor(ignore_index, thresh=thresh, min_kept=min_kept) 139 | self.criterion2 = torch.nn.CrossEntropyLoss(ignore_index=ignore_index, reduce=reduce) 140 | 141 | if not reduce: 142 | print("disabled the reduce.") 143 | 144 | def forward(self, preds, target): 145 | h, w = target.size(1), target.size(2) 146 | 147 | scale_pred = F.upsample(input=preds[0], size=(h, w), mode='bilinear', align_corners=True) 148 | loss1 = self.criterion1(scale_pred, target) 149 | 150 | scale_pred = F.upsample(input=preds[1], size=(h, w), mode='bilinear', align_corners=True) 151 | loss2 = self.criterion1(scale_pred, target) 152 | 153 | scale_pred = F.upsample(input=preds[2], size=(h, w), mode='bilinear', align_corners=True) 154 | loss3 = self.criterion1(scale_pred, target) 155 | 156 | return loss1 + 0.4 * loss2 + 0.4 * loss3 157 | -------------------------------------------------------------------------------- /libs/core/operators.py: -------------------------------------------------------------------------------- 1 | # Common Segmentation Operator implemented by Pytorch 2 | # XiangtaiLi(lxtpku@pku.edu.cn) 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.nn import BatchNorm2d 8 | 9 | 10 | upsample = lambda x, size: F.interpolate(x, size, mode='bilinear', align_corners=True) 11 | 12 | 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | """3x3 convolution with padding""" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class GlobalAvgPool2d(nn.Module): 19 | def __init__(self): 20 | """Global average pooling over the input's spatial dimensions""" 21 | super(GlobalAvgPool2d, self).__init__() 22 | 23 | def forward(self, inputs): 24 | in_size = inputs.size() 25 | inputs = inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) 26 | inputs = inputs.view(in_size[0], in_size[1], 1, 1) 27 | 28 | return inputs 29 | 30 | 31 | class SELayer(nn.Module): 32 | def __init__(self, in_planes, out_planes, reduction=16): 33 | super(SELayer, self).__init__() 34 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 35 | self.fc = nn.Sequential( 36 | nn.Linear(in_planes, out_planes // reduction), 37 | nn.ReLU(inplace=True), 38 | nn.Linear(out_planes // reduction, out_planes), 39 | nn.Sigmoid() 40 | ) 41 | self.out_planes = out_planes 42 | 43 | def forward(self, x): 44 | b, c, _, _ = x.size() 45 | y = self.avg_pool(x).view(b, c) 46 | y = self.fc(y).view(b, self.out_planes, 1, 1) 47 | return y 48 | 49 | 50 | class ConvBnRelu(nn.Module): 51 | def __init__(self, in_planes, out_planes, ksize, stride=1, pad=0, dilation=1, 52 | groups=1, has_bn=True, norm_layer=nn.BatchNorm2d, bn_eps=1e-5, 53 | has_relu=True, inplace=True, has_bias=False): 54 | super(ConvBnRelu, self).__init__() 55 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=ksize, 56 | stride=stride, padding=pad, 57 | dilation=dilation, groups=groups, bias=has_bias) 58 | self.has_bn = has_bn 59 | if self.has_bn: 60 | self.bn = norm_layer(out_planes, eps=bn_eps) 61 | self.has_relu = has_relu 62 | if self.has_relu: 63 | self.relu = nn.ReLU(inplace=inplace) 64 | 65 | def forward(self, x): 66 | x = self.conv(x) 67 | if self.has_bn: 68 | x = self.bn(x) 69 | if self.has_relu: 70 | x = self.relu(x) 71 | 72 | return x 73 | 74 | def dsn(in_channels, nclass, norm_layer=nn.BatchNorm2d): 75 | return nn.Sequential( 76 | nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1), 77 | norm_layer(in_channels), 78 | nn.ReLU(), 79 | nn.Dropout2d(0.1), 80 | nn.Conv2d(in_channels, nclass, kernel_size=1, stride=1, padding=0, bias=True) 81 | ) 82 | 83 | 84 | class SeparableConv2d(nn.Module): 85 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, bias=False, norm_layer=None): 86 | super(SeparableConv2d, self).__init__() 87 | self.kernel_size = kernel_size 88 | self.dilation = dilation 89 | 90 | self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride, 0, dilation, groups=in_channels, 91 | bias=bias) 92 | self.bn = norm_layer(in_channels) 93 | self.pointwise = nn.Conv2d(in_channels, out_channels, 1, bias=bias) 94 | 95 | def forward(self, x): 96 | x = self.fix_padding(x, self.kernel_size, self.dilation) 97 | x = self.conv1(x) 98 | x = self.bn(x) 99 | x = self.pointwise(x) 100 | 101 | return x 102 | 103 | def fix_padding(self, x, kernel_size, dilation): 104 | kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1) 105 | pad_total = kernel_size_effective - 1 106 | pad_beg = pad_total // 2 107 | pad_end = pad_total - pad_beg 108 | padded_inputs = F.pad(x, (pad_beg, pad_end, pad_beg, pad_end)) 109 | return padded_inputs 110 | 111 | 112 | class ASPPModule(nn.Module): 113 | """ 114 | Reference: 115 | Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."* 116 | """ 117 | 118 | def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36), norm_layer=nn.BatchNorm2d): 119 | super(ASPPModule, self).__init__() 120 | 121 | self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), 122 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, 123 | bias=False), 124 | norm_layer(inner_features), 125 | nn.ReLU() 126 | ) 127 | self.conv2 = nn.Sequential( 128 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False), 129 | norm_layer(inner_features), nn.ReLU()) 130 | self.conv3 = nn.Sequential( 131 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), 132 | norm_layer(inner_features), nn.ReLU()) 133 | self.conv4 = nn.Sequential( 134 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), 135 | norm_layer(inner_features), nn.ReLU()) 136 | self.conv5 = nn.Sequential( 137 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), 138 | norm_layer(inner_features), nn.ReLU()) 139 | 140 | self.bottleneck = nn.Sequential( 141 | nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), 142 | norm_layer(out_features), 143 | nn.ReLU(), 144 | nn.Dropout2d(0.1) 145 | ) 146 | 147 | def forward(self, x): 148 | _, _, h, w = x.size() 149 | 150 | feat1 = F.upsample(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) 151 | 152 | feat2 = self.conv2(x) 153 | feat3 = self.conv3(x) 154 | feat4 = self.conv4(x) 155 | feat5 = self.conv5(x) 156 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) 157 | 158 | bottle = self.bottleneck(out) 159 | return bottle 160 | 161 | 162 | class A2Block(nn.Module): 163 | """ 164 | Implementation of A2Block(NIPS 2018) 165 | """ 166 | def __init__(self, inplane, plane): 167 | super(A2Block, self).__init__() 168 | self.down = nn.Conv2d(inplane, plane, 1) 169 | self.up = nn.Conv2d(plane, inplane, 1) 170 | self.gather_down = nn.Conv2d(inplane, plane, 1) 171 | self.distribue_down = nn.Conv2d(inplane, plane, 1) 172 | self.softmax = nn.Softmax(dim=-1) 173 | 174 | def forward(self, x): 175 | res = x 176 | A = self.down(res) 177 | B = self.gather_down(res) 178 | b, c, h, w = A.size() 179 | A = A.view(b, c, -1) # (b, c, h*w) 180 | B = B.view(b, c, -1) # (b, c, h*w) 181 | B = self.softmax(B) 182 | B = B.permute(0, 2, 1) # (b, h*w, c) 183 | 184 | G = torch.bmm(A, B) # (b,c,c) 185 | 186 | C = self.distribue_down(res) 187 | C = C.view(b, c, -1) # (b, c, h*w) 188 | C = self.softmax(C) 189 | C = C.permute(0, 2, 1) # (b, h*w, c) 190 | 191 | atten = torch.bmm(C, G) # (b, h*w, c) 192 | atten = atten.permute(0, 2, 1).view(b, c, h, -1) 193 | atten = self.up(atten) 194 | 195 | out = res + atten 196 | return out 197 | 198 | 199 | class PSPModule(nn.Module): 200 | """ 201 | Reference: 202 | Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* 203 | """ 204 | def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6), norm_layer=BatchNorm2d): 205 | super(PSPModule, self).__init__() 206 | self.stages = [] 207 | self.stages = nn.ModuleList([self._make_stage(features, out_features, size, norm_layer) for size in sizes]) 208 | self.bottleneck = nn.Sequential( 209 | nn.Conv2d(features+len(sizes)*out_features, out_features, kernel_size=1, padding=1, dilation=1, bias=False), 210 | norm_layer(out_features), 211 | nn.ReLU(), 212 | nn.Dropout2d(0.1) 213 | ) 214 | 215 | def _make_stage(self, features, out_features, size, norm_layer): 216 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) 217 | conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False) 218 | bn = norm_layer(out_features) 219 | return nn.Sequential(prior, conv, bn) 220 | 221 | def forward(self, feats): 222 | h, w = feats.size(2), feats.size(3) 223 | priors = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages] + [feats] 224 | bottle = self.bottleneck(torch.cat(priors, 1)) 225 | return bottle 226 | 227 | 228 | 229 | 230 | # For BiSeNet 231 | class AttentionRefinement(nn.Module): 232 | def __init__(self, in_planes, out_planes, 233 | norm_layer=nn.BatchNorm2d): 234 | super(AttentionRefinement, self).__init__() 235 | self.conv_3x3 = ConvBnRelu(in_planes, out_planes, 3, 1, 1, 236 | has_bn=True, norm_layer=norm_layer, 237 | has_relu=True, has_bias=False) 238 | self.channel_attention = nn.Sequential( 239 | nn.AdaptiveAvgPool2d(1), 240 | ConvBnRelu(out_planes, out_planes, 1, 1, 0, 241 | has_bn=True, norm_layer=norm_layer, 242 | has_relu=False, has_bias=False), 243 | nn.Sigmoid() 244 | ) 245 | 246 | def forward(self, x): 247 | fm = self.conv_3x3(x) 248 | fm_se = self.channel_attention(fm) 249 | fm = fm * fm_se 250 | 251 | return fm 252 | 253 | # For BiSeNet 254 | class FeatureFusion(nn.Module): 255 | def __init__(self, in_planes, out_planes, 256 | reduction=1, norm_layer=nn.BatchNorm2d): 257 | super(FeatureFusion, self).__init__() 258 | self.conv_1x1 = ConvBnRelu(in_planes, out_planes, 1, 1, 0, 259 | has_bn=True, norm_layer=norm_layer, 260 | has_relu=True, has_bias=False) 261 | self.channel_attention = nn.Sequential( 262 | nn.AdaptiveAvgPool2d(1), 263 | ConvBnRelu(out_planes, out_planes // reduction, 1, 1, 0, 264 | has_bn=False, norm_layer=norm_layer, 265 | has_relu=True, has_bias=False), 266 | ConvBnRelu(out_planes // reduction, out_planes, 1, 1, 0, 267 | has_bn=False, norm_layer=norm_layer, 268 | has_relu=False, has_bias=False), 269 | nn.Sigmoid() 270 | ) 271 | 272 | def forward(self, x1, x2): 273 | fm = torch.cat([x1, x2], dim=1) 274 | fm = self.conv_1x1(fm) 275 | fm_se = self.channel_attention(fm) 276 | output = fm + fm * fm_se 277 | return output 278 | 279 | 280 | -------------------------------------------------------------------------------- /libs/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/datasets/__init__.py -------------------------------------------------------------------------------- /libs/datasets/camvid.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | 4 | import os.path as osp 5 | import numpy as np 6 | import random 7 | import cv2 8 | from torch.utils import data 9 | 10 | 11 | """ 12 | CamVid is a road scene understanding dataset with 367 training images and 233 testing images of day and dusk scenes. 13 | The challenge is to segment 11 classes such as road, building, cars, pedestrians, signs, poles, side-walk etc. We 14 | resize images to 360x480 pixels for training and testing. 15 | """ 16 | 17 | CAMVID_CLASSES = ['Sky', 18 | 'Building', 19 | 'Column-Pole', 20 | 'Road', 21 | 'Sidewalk', 22 | 'Tree', 23 | 'Sign-Symbol', 24 | 'Fence', 25 | 'Car', 26 | 'Pedestrain', 27 | 'Bicyclist', 28 | 'Void'] 29 | 30 | CAMVID_CLASS_COLORS = [ 31 | (128, 128, 128), 32 | (128, 0, 0), 33 | (192, 192, 128), 34 | (128, 64, 128), 35 | (0, 0, 192), 36 | (128, 128, 0), 37 | (192, 128, 128), 38 | (64, 64, 128), 39 | (64, 0, 128), 40 | (64, 64, 0), 41 | (0, 128, 192), 42 | (0, 0, 0), 43 | ] 44 | 45 | 46 | class CamVidDataSet(data.Dataset): 47 | """ 48 | CamVidDataSet is employed to load train set 49 | Args: 50 | root: the CamVid dataset path, 51 | list_path: camvid_train_list.txt, include partial path 52 | 53 | """ 54 | def __init__(self, root=None, list_path='./dataset/list/CamVid/camvid_train_list.txt', 55 | max_iters=None, crop_size=(360, 360), 56 | mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255, vars=(1,1,1), RGB=False): 57 | self.root = root 58 | self.list_path = list_path 59 | self.crop_h, self.crop_w = crop_size 60 | self.scale = scale 61 | self.ignore_label = ignore_label 62 | self.mean = mean 63 | self.vars = vars 64 | self.is_mirror = mirror 65 | self.rgb = RGB 66 | self.img_ids = [i_id.strip() for i_id in open(list_path)] 67 | if not max_iters == None: 68 | self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids))) 69 | self.files = [] 70 | 71 | for name in self.img_ids: 72 | img_file = osp.join(self.root, name.split()[0]) 73 | label_file = osp.join(self.root, name.split()[1]) 74 | self.files.append({ 75 | "img": img_file, 76 | "label": label_file, 77 | "name": name 78 | }) 79 | 80 | print("length of train set: ", len(self.files)) 81 | 82 | def __len__(self): 83 | return len(self.files) 84 | 85 | def __getitem__(self, index): 86 | datafiles = self.files[index] 87 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 88 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 89 | label[label==11] = self.ignore_label 90 | size = image.shape 91 | name = datafiles["name"] 92 | if self.scale: 93 | f_scale = 0.5 + random.randint(0, 15) / 10.0 # random resize between 0.5 and 2 94 | image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_LINEAR) 95 | label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_NEAREST) 96 | 97 | image = np.asarray(image, np.float32) 98 | 99 | if self.rgb: 100 | image = image[:,:, ::-1] ## BGR -> RGB 101 | image /= 255 ## using pytorch pretrained models 102 | 103 | image -= self.mean 104 | image /= self.vars 105 | 106 | img_h, img_w = label.shape 107 | pad_h = max(self.crop_h - img_h, 0) 108 | pad_w = max(self.crop_w - img_w, 0) 109 | if pad_h > 0 or pad_w > 0: 110 | img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 111 | pad_w, cv2.BORDER_CONSTANT, 112 | value=(0.0, 0.0, 0.0)) 113 | label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 114 | pad_w, cv2.BORDER_CONSTANT, 115 | value=(self.ignore_label,)) 116 | else: 117 | img_pad, label_pad = image, label 118 | 119 | img_h, img_w = label_pad.shape 120 | h_off = random.randint(0, img_h - self.crop_h) 121 | w_off = random.randint(0, img_w - self.crop_w) 122 | 123 | image = np.asarray(img_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32) 124 | label = np.asarray(label_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32) 125 | 126 | image = image.transpose((2, 0, 1)) # NHWC -> NCHW 127 | 128 | if self.is_mirror: 129 | flip = np.random.choice(2) * 2 - 1 130 | image = image[:, :, ::flip] 131 | label = label[:, ::flip] 132 | 133 | return image.copy(), label.copy(), np.array(size), name 134 | 135 | 136 | class CamVidTestDataSet(data.Dataset): 137 | """ 138 | CamVidValDataSet is employed to load val set 139 | Args: 140 | root: the CamVid dataset path, 141 | list_path: camvid_val_list.txt, include partial path 142 | 143 | """ 144 | 145 | def __init__(self, root='/home/DataSet/CamVid', list_path='./dataset/list/CamVid/camvid_val_list.txt', 146 | f_scale=1, mean=(128, 128, 128), ignore_label=255, vars=(1,1,1), RGB=False): 147 | self.root = root 148 | self.list_path = list_path 149 | self.ignore_label = ignore_label 150 | self.mean = mean 151 | self.vars = vars 152 | self.rgb = RGB 153 | self.f_scale = f_scale 154 | self.img_ids = [i_id.strip() for i_id in open(list_path)] 155 | self.files = [] 156 | for name in self.img_ids: 157 | img_file = osp.join(self.root, name.split()[0]) 158 | label_file = osp.join(self.root, name.split()[1]) 159 | image_name = name.strip().split()[0].strip().split('/', 1)[1].split('.')[0] 160 | self.files.append({ 161 | "img": img_file, 162 | "label": label_file, 163 | "name": image_name 164 | }) 165 | 166 | print("length of Test Set: ", len(self.files)) 167 | 168 | def __len__(self): 169 | return len(self.files) 170 | 171 | def __getitem__(self, index): 172 | datafiles = self.files[index] 173 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 174 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 175 | size = image.shape 176 | name = datafiles["name"] 177 | if self.f_scale != 1: 178 | image = cv2.resize(image, None, fx=self.f_scale, fy=self.f_scale, interpolation=cv2.INTER_LINEAR) 179 | label = cv2.resize(label, None, fx=self.f_scale, fy=self.f_scale, interpolation = cv2.INTER_NEAREST) 180 | 181 | label[label == 11] = self.ignore_label 182 | 183 | image = np.asarray(image, np.float32) 184 | 185 | if self.rgb: 186 | image = image[:, :, ::-1] ## BGR -> RGB 187 | image /= 255 ## using pytorch pretrained models 188 | 189 | image -= self.mean 190 | image /= self.vars 191 | 192 | image = image.transpose((2, 0, 1)) # HWC -> CHW 193 | 194 | # print('image.shape:',image.shape) 195 | return image.copy(), label.copy(), np.array(size), name 196 | -------------------------------------------------------------------------------- /libs/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | 4 | 5 | import os.path as osp 6 | import numpy as np 7 | import random 8 | import cv2 9 | 10 | from torch.utils import data 11 | 12 | 13 | class Cityscapes(data.Dataset): 14 | def __init__(self, root, list_path="./list/cityscapes/train.txt", max_iters=None, crop_size=(321, 321), 15 | mean=(128, 128, 128), vars=(1,1,1), scale=True, mirror=True, ignore_label=255, RGB=False): 16 | self.root = root 17 | self.list_path = list_path 18 | self.crop_h, self.crop_w = crop_size 19 | self.scale = scale 20 | self.ignore_label = ignore_label 21 | self.mean = mean 22 | self.vars = vars 23 | self.is_mirror = mirror 24 | self.rgb = RGB 25 | self.img_ids = [i_id.strip().split() for i_id in open(list_path)] 26 | if not max_iters==None: 27 | self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids))) 28 | self.files = [] 29 | for item in self.img_ids: 30 | image_path, label_path = item 31 | name = osp.splitext(osp.basename(label_path))[0] 32 | img_file = osp.join(self.root, image_path) 33 | label_file = osp.join(self.root, label_path) 34 | self.files.append({ 35 | "img": img_file, 36 | "label": label_file, 37 | "name": name 38 | }) 39 | self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label, 40 | 3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label, 41 | 7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4, 42 | 14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5, 43 | 18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14, 44 | 28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18} 45 | print('{} images are loaded!'.format(len(self.files))) 46 | 47 | def __len__(self): 48 | return len(self.files) 49 | 50 | def generate_scale_label(self, image, label): 51 | f_scale = 0.7 + random.randint(0, 14) / 10.0 52 | image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR) 53 | label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST) 54 | return image, label 55 | 56 | def id2trainId(self, label, reverse=False): 57 | label_copy = label.copy() 58 | if reverse: 59 | for v, k in self.id_to_trainid.items(): 60 | label_copy[label == k] = v 61 | else: 62 | for k, v in self.id_to_trainid.items(): 63 | label_copy[label == k] = v 64 | return label_copy 65 | 66 | def __getitem__(self, index): 67 | datafiles = self.files[index] 68 | image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) 69 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 70 | label = self.id2trainId(label) 71 | 72 | if self.scale: 73 | image, label = self.generate_scale_label(image, label) 74 | image = np.asarray(image, np.float32) 75 | 76 | if self.rgb: 77 | image = image[:,:, ::-1] ## BGR -> RGB 78 | image /= 255 ## using pytorch pretrained models 79 | 80 | image -= self.mean 81 | image /= self.vars 82 | 83 | img_h, img_w = label.shape 84 | pad_h = max(self.crop_h - img_h, 0) 85 | pad_w = max(self.crop_w - img_w, 0) 86 | if pad_h > 0 or pad_w > 0: 87 | img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 88 | pad_w, cv2.BORDER_CONSTANT, 89 | value=(0.0, 0.0, 0.0)) 90 | label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 91 | pad_w, cv2.BORDER_CONSTANT, 92 | value=(self.ignore_label,)) 93 | else: 94 | img_pad, label_pad = image, label 95 | 96 | img_h, img_w = label_pad.shape 97 | h_off = random.randint(0, img_h - self.crop_h) 98 | w_off = random.randint(0, img_w - self.crop_w) 99 | 100 | image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) 101 | label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) 102 | 103 | image = image.transpose((2, 0, 1)) 104 | if self.is_mirror: 105 | flip = np.random.choice(2) * 2 - 1 106 | image = image[:, :, ::flip] 107 | label = label[:, ::flip] 108 | 109 | return image.copy(), label.copy() -------------------------------------------------------------------------------- /libs/datasets/mapillary.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | 4 | import os 5 | import numpy as np 6 | import random 7 | import cv2 8 | from torch.utils import data 9 | 10 | 11 | class MapDataSet(data.Dataset): 12 | def __init__(self, root, split="train", max_iters=80000, crop_size=(321, 321), mean=(128, 128, 128), vars=(1, 1, 1), scale=True, 13 | mirror=True, ignore_label=255, RGB=False): 14 | self.root = root 15 | self.crop_h, self.crop_w = crop_size 16 | self.scale = scale 17 | self.ignore_label = ignore_label 18 | self.mean = mean 19 | self.vars = vars 20 | self.is_mirror = mirror 21 | self.rgb = RGB 22 | self.img_list, self.label_list = self._make_dataset(root, split) 23 | assert len(self.label_list) == len(self.img_list) 24 | print("Found dataset {} images".format(len(self.img_list))) 25 | if not max_iters == None: 26 | self.img_total = self.img_list * int(np.ceil(float(max_iters) / len(self.img_list))) 27 | self.label_total = self.label_list * int(np.ceil(float(max_iters) / len(self.label_list))) 28 | self.pair_list = [] 29 | for i, img in enumerate(self.img_total): 30 | self.pair_list.append({ 31 | "image": img, 32 | "label": self.label_total[i] 33 | }) 34 | print('Total {} images are loaded!'.format(len(self.pair_list))) 35 | 36 | self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label, 37 | 3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label, 38 | 7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4, 39 | 14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5, 40 | 18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14, 41 | 28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18} 42 | 43 | def __len__(self): 44 | return len(self.pair_list) 45 | 46 | def generate_scale_label(self, image, label): 47 | f_scale = 0.7 + random.randint(0, 14) / 10.0 48 | image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_LINEAR) 49 | label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_NEAREST) 50 | return image, label 51 | 52 | def id2trainId(self, label, reverse=False): 53 | label_copy = label.copy() 54 | if reverse: 55 | for v, k in self.id_to_trainid.items(): 56 | label_copy[label == k] = v 57 | else: 58 | for k, v in self.id_to_trainid.items(): 59 | label_copy[label == k] = v 60 | return label_copy 61 | 62 | def _make_dataset(self, root, split="train"): 63 | image_list = [] 64 | label_list = [] 65 | if split == "train": 66 | floder = os.path.join(root, "training") 67 | image_floder = os.path.join(floder, "images") 68 | label_floder = os.path.join(floder, "seg19_lbl") 69 | for sub_file in os.listdir(image_floder): 70 | image_list.append(os.path.join(image_floder, sub_file)) 71 | for sub_file in os.listdir(label_floder): 72 | label_list.append(os.path.join(label_floder, sub_file)) 73 | if split == "trainval": 74 | train_floder = os.path.join(root, "training") 75 | val_floder = os.path.join(root, "validation") 76 | 77 | image_floder = os.path.join(train_floder, "images") 78 | label_floder = os.path.join(train_floder, "seg19_lbl") 79 | for sub_file in os.listdir(image_floder): 80 | image_list.append(os.path.join(image_floder, sub_file)) 81 | for sub_file in os.listdir(label_floder): 82 | label_list.append(os.path.join(label_floder, sub_file)) 83 | 84 | image_floder = os.path.join(val_floder, "images") 85 | label_floder = os.path.join(val_floder, "seg19_lbl") 86 | for sub_file in os.listdir(image_floder): 87 | image_list.append(os.path.join(image_floder, sub_file)) 88 | for sub_file in os.listdir(label_floder): 89 | label_list.append(os.path.join(label_floder, sub_file)) 90 | 91 | image_list.sort() 92 | label_list.sort() 93 | return image_list, label_list 94 | 95 | def __getitem__(self, index): 96 | datafiles = self.pair_list[index] 97 | image = cv2.imread(datafiles["image"], cv2.IMREAD_COLOR) 98 | label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) 99 | label = self.id2trainId(label) 100 | size = image.shape 101 | 102 | if self.scale: 103 | image, label = self.generate_scale_label(image, label) 104 | image = np.asarray(image, np.float32) 105 | 106 | if self.rgb: 107 | image = image[:, :, ::-1] ## BGR -> RGB 108 | image /= 255 ## using pytorch pretrained models 109 | 110 | image -= self.mean 111 | image /= self.vars 112 | 113 | img_h, img_w = label.shape 114 | pad_h = max(self.crop_h - img_h, 0) 115 | pad_w = max(self.crop_w - img_w, 0) 116 | if pad_h > 0 or pad_w > 0: 117 | img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 118 | pad_w, cv2.BORDER_CONSTANT, 119 | value=(0.0, 0.0, 0.0)) 120 | label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 121 | pad_w, cv2.BORDER_CONSTANT, 122 | value=(self.ignore_label,)) 123 | else: 124 | img_pad, label_pad = image, label 125 | 126 | img_h, img_w = label_pad.shape 127 | h_off = random.randint(0, img_h - self.crop_h) 128 | w_off = random.randint(0, img_w - self.crop_w) 129 | image = np.asarray(img_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32) 130 | label = np.asarray(label_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32) 131 | image = image.transpose((2, 0, 1)) 132 | if self.is_mirror: 133 | flip = np.random.choice(2) * 2 - 1 134 | image = image[:, :, ::flip] 135 | label = label[:, ::flip] 136 | 137 | return image.copy(), label.copy(), np.array(size) -------------------------------------------------------------------------------- /libs/models/BiSegNet.py: -------------------------------------------------------------------------------- 1 | # @Author: yuchangqian 2 | # Modified: XiangtaiLi 3 | # BiSeg uses deeply based backbone. 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from libs.models.backbone.resnet import resnet18 10 | from libs.core.operators import ConvBnRelu, FeatureFusion, AttentionRefinement 11 | 12 | 13 | class SpatialPath(nn.Module): 14 | def __init__(self, in_planes, out_planes, norm_layer=nn.BatchNorm2d): 15 | super(SpatialPath, self).__init__() 16 | inner_channel = 64 17 | self.conv_7x7 = ConvBnRelu(in_planes, inner_channel, 7, 2, 3, 18 | has_bn=True, norm_layer=norm_layer, 19 | has_relu=True, has_bias=False) 20 | self.conv_3x3_1 = ConvBnRelu(inner_channel, inner_channel, 3, 2, 1, 21 | has_bn=True, norm_layer=norm_layer, 22 | has_relu=True, has_bias=False) 23 | self.conv_3x3_2 = ConvBnRelu(inner_channel, inner_channel, 3, 2, 1, 24 | has_bn=True, norm_layer=norm_layer, 25 | has_relu=True, has_bias=False) 26 | self.conv_1x1 = ConvBnRelu(inner_channel, out_planes, 1, 1, 0, 27 | has_bn=True, norm_layer=norm_layer, 28 | has_relu=True, has_bias=False) 29 | 30 | def forward(self, x): 31 | x = self.conv_7x7(x) 32 | x = self.conv_3x3_1(x) 33 | x = self.conv_3x3_2(x) 34 | output = self.conv_1x1(x) 35 | 36 | return output 37 | 38 | 39 | class BiSeNetHead(nn.Module): 40 | def __init__(self, in_planes, out_planes, scale, 41 | is_aux=False, norm_layer=nn.BatchNorm2d): 42 | super(BiSeNetHead, self).__init__() 43 | if is_aux: 44 | self.conv_3x3 = ConvBnRelu(in_planes, 128, 3, 1, 1, 45 | has_bn=True, norm_layer=norm_layer, 46 | has_relu=True, has_bias=False) 47 | else: 48 | self.conv_3x3 = ConvBnRelu(in_planes, 64, 3, 1, 1, 49 | has_bn=True, norm_layer=norm_layer, 50 | has_relu=True, has_bias=False) 51 | # self.dropout = nn.Dropout(0.1) 52 | if is_aux: 53 | self.conv_1x1 = nn.Conv2d(128, out_planes, kernel_size=1, 54 | stride=1, padding=0) 55 | else: 56 | self.conv_1x1 = nn.Conv2d(64, out_planes, kernel_size=1, 57 | stride=1, padding=0) 58 | self.scale = scale 59 | 60 | def forward(self, x): 61 | fm = self.conv_3x3(x) 62 | # fm = self.dropout(fm) 63 | output = self.conv_1x1(fm) 64 | if self.scale > 1: 65 | output = F.interpolate(output, scale_factor=self.scale, 66 | mode='bilinear', 67 | align_corners=True) 68 | 69 | return output 70 | 71 | 72 | class BiSeNet(nn.Module): 73 | def __init__(self, out_planes, is_training=False, 74 | pretrained_model=None, 75 | norm_layer=nn.BatchNorm2d): 76 | super(BiSeNet, self).__init__() 77 | self.backbone = resnet18(pretrained_model, norm_layer=norm_layer, 78 | bn_eps=1e-5, 79 | bn_momentum=0.1, 80 | deep_stem=True, stem_width=64) 81 | 82 | self.business_layer = [] 83 | self.is_training = is_training 84 | 85 | self.spatial_path = SpatialPath(3, 128, norm_layer) 86 | 87 | conv_channel = 128 88 | self.global_context = nn.Sequential( 89 | nn.AdaptiveAvgPool2d(1), 90 | ConvBnRelu(512, conv_channel, 1, 1, 0, 91 | has_bn=True, 92 | has_relu=True, has_bias=False, norm_layer=norm_layer) 93 | ) 94 | 95 | # stage = [512, 256, 128, 64] 96 | arms = [AttentionRefinement(512, conv_channel, norm_layer), 97 | AttentionRefinement(256, conv_channel, norm_layer)] 98 | refines = [ConvBnRelu(conv_channel, conv_channel, 3, 1, 1, 99 | has_bn=True, norm_layer=norm_layer, 100 | has_relu=True, has_bias=False), 101 | ConvBnRelu(conv_channel, conv_channel, 3, 1, 1, 102 | has_bn=True, norm_layer=norm_layer, 103 | has_relu=True, has_bias=False)] 104 | 105 | if is_training: 106 | heads = [BiSeNetHead(conv_channel, out_planes, 2, 107 | True, norm_layer), 108 | BiSeNetHead(conv_channel, out_planes, 1, 109 | True, norm_layer), 110 | BiSeNetHead(conv_channel * 2, out_planes, 1, 111 | False, norm_layer)] 112 | else: 113 | heads = [None, None, 114 | BiSeNetHead(conv_channel * 2, out_planes, 1, 115 | False, norm_layer)] 116 | 117 | self.ffm = FeatureFusion(conv_channel * 2, conv_channel * 2, 118 | 1, norm_layer) 119 | 120 | self.arms = nn.ModuleList(arms) 121 | self.refines = nn.ModuleList(refines) 122 | self.heads = nn.ModuleList(heads) 123 | 124 | self.business_layer.append(self.spatial_path) 125 | self.business_layer.append(self.global_context) 126 | self.business_layer.append(self.arms) 127 | self.business_layer.append(self.refines) 128 | self.business_layer.append(self.heads) 129 | self.business_layer.append(self.ffm) 130 | 131 | 132 | def forward(self, data, label=None): 133 | spatial_out = self.spatial_path(data) 134 | 135 | context_blocks = self.backbone(data) 136 | context_blocks.reverse() 137 | 138 | global_context = self.global_context(context_blocks[0]) 139 | global_context = F.interpolate(global_context, 140 | size=context_blocks[0].size()[2:], 141 | mode='bilinear', align_corners=True) 142 | 143 | last_fm = global_context 144 | pred_out = [] 145 | 146 | for i, (fm, arm, refine) in enumerate(zip(context_blocks[:2], self.arms, 147 | self.refines)): 148 | fm = arm(fm) 149 | fm += last_fm 150 | last_fm = F.interpolate(fm, size=(context_blocks[i + 1].size()[2:]), 151 | mode='bilinear', align_corners=True) 152 | last_fm = refine(last_fm) 153 | pred_out.append(last_fm) 154 | context_out = last_fm 155 | 156 | concate_fm = self.ffm(spatial_out, context_out) 157 | pred_out.append(concate_fm) 158 | 159 | if self.is_training: 160 | return pred_out 161 | 162 | return F.log_softmax(self.heads[-1](pred_out[-1]), dim=1) 163 | 164 | 165 | if __name__ == '__main__': 166 | i = torch.Tensor(1,3,512,512).cuda() 167 | m = BiSeNet(19).cuda() 168 | m.eval() 169 | o = m(i) 170 | print(o.size()) -------------------------------------------------------------------------------- /libs/models/DFANet.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | """ 4 | Implementation of DFANet: a little different from the origin paper, I add more dsn loss for training. 5 | DFANet uses modified Xception backbone pretrained on ImageNet. 6 | """ 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | from libs.models.backbone.xception import Enc, FCAttention, XceptionA 12 | from libs.core.operators import ConvBnRelu, dsn 13 | 14 | 15 | class DFANet(nn.Module): 16 | def __init__(self, nclass, **kwargs): 17 | super(DFANet, self).__init__() 18 | self.backbone = XceptionA() 19 | 20 | self.enc2_2 = Enc(240, 48, 4, **kwargs) 21 | self.enc3_2 = Enc(144, 96, 6, **kwargs) 22 | self.enc4_2 = Enc(288, 192, 4, **kwargs) 23 | self.fca_2 = FCAttention(192, **kwargs) 24 | 25 | self.enc2_3 = Enc(240, 48, 4, **kwargs) 26 | self.enc3_3 = Enc(144, 96, 6, **kwargs) 27 | self.enc3_4 = Enc(288, 192, 4, **kwargs) 28 | self.fca_3 = FCAttention(192, **kwargs) 29 | 30 | self.enc2_1_reduce = ConvBnRelu(48, 32, 1, **kwargs) 31 | self.enc2_2_reduce = ConvBnRelu(48, 32, 1, **kwargs) 32 | self.enc2_3_reduce = ConvBnRelu(48, 32, 1, **kwargs) 33 | self.conv_fusion = ConvBnRelu(32, 32, 1, **kwargs) 34 | 35 | self.fca_1_reduce = ConvBnRelu(192, 32, 1, **kwargs) 36 | self.fca_2_reduce = ConvBnRelu(192, 32, 1, **kwargs) 37 | self.fca_3_reduce = ConvBnRelu(192, 32, 1, **kwargs) 38 | self.conv_out = nn.Conv2d(32, nclass, 1) 39 | 40 | self.dsn1 = dsn(192, nclass) 41 | self.dsn2 = dsn(192, nclass) 42 | 43 | self.__setattr__('exclusive', ['enc2_2', 'enc3_2', 'enc4_2', 'fca_2', 'enc2_3', 'enc3_3', 'enc3_4', 'fca_3', 44 | 'enc2_1_reduce', 'enc2_2_reduce', 'enc2_3_reduce', 'conv_fusion', 'fca_1_reduce', 45 | 'fca_2_reduce', 'fca_3_reduce', 'conv_out']) 46 | 47 | def forward(self, x): 48 | # backbone 49 | stage1_conv1 = self.backbone.conv1(x) 50 | stage1_enc2 = self.backbone.enc2(stage1_conv1) 51 | stage1_enc3 = self.backbone.enc3(stage1_enc2) 52 | stage1_enc4 = self.backbone.enc4(stage1_enc3) 53 | stage1_fca = self.backbone.fca(stage1_enc4) 54 | stage1_out = F.interpolate(stage1_fca, scale_factor=4, mode='bilinear', align_corners=True) 55 | 56 | dsn1 = self.dsn1(stage1_out) 57 | # stage2 58 | stage2_enc2 = self.enc2_2(torch.cat([stage1_enc2, stage1_out], dim=1)) 59 | stage2_enc3 = self.enc3_2(torch.cat([stage1_enc3, stage2_enc2], dim=1)) 60 | stage2_enc4 = self.enc4_2(torch.cat([stage1_enc4, stage2_enc3], dim=1)) 61 | stage2_fca = self.fca_2(stage2_enc4) 62 | stage2_out = F.interpolate(stage2_fca, scale_factor=4, mode='bilinear', align_corners=True) 63 | 64 | dsn2 = self.dsn2(stage2_out) 65 | 66 | # stage3 67 | stage3_enc2 = self.enc2_3(torch.cat([stage2_enc2, stage2_out], dim=1)) 68 | stage3_enc3 = self.enc3_3(torch.cat([stage2_enc3, stage3_enc2], dim=1)) 69 | stage3_enc4 = self.enc3_4(torch.cat([stage2_enc4, stage3_enc3], dim=1)) 70 | stage3_fca = self.fca_3(stage3_enc4) 71 | 72 | 73 | stage1_enc2_decoder = self.enc2_1_reduce(stage1_enc2) 74 | stage2_enc2_docoder = F.interpolate(self.enc2_2_reduce(stage2_enc2), scale_factor=2, 75 | mode='bilinear', align_corners=True) 76 | stage3_enc2_decoder = F.interpolate(self.enc2_3_reduce(stage3_enc2), scale_factor=4, 77 | mode='bilinear', align_corners=True) 78 | fusion = stage1_enc2_decoder + stage2_enc2_docoder + stage3_enc2_decoder 79 | fusion = self.conv_fusion(fusion) 80 | 81 | stage1_fca_decoder = F.interpolate(self.fca_1_reduce(stage1_fca), scale_factor=4, 82 | mode='bilinear', align_corners=True) 83 | stage2_fca_decoder = F.interpolate(self.fca_2_reduce(stage2_fca), scale_factor=8, 84 | mode='bilinear', align_corners=True) 85 | stage3_fca_decoder = F.interpolate(self.fca_3_reduce(stage3_fca), scale_factor=16, 86 | mode='bilinear', align_corners=True) 87 | fusion = fusion + stage1_fca_decoder + stage2_fca_decoder + stage3_fca_decoder 88 | 89 | outputs = list() 90 | out = self.conv_out(fusion) 91 | outputs.append(out) 92 | outputs.append(dsn1) 93 | outputs.append(dsn2) 94 | return outputs 95 | 96 | def dfanet(num_classes=19, data_set="cityscapes"): 97 | return DFANet(num_classes) 98 | 99 | 100 | if __name__ == '__main__': 101 | i = torch.Tensor(1,3,512,512).cuda() 102 | m = DFANet(19).cuda() 103 | m.eval() 104 | o = m(i) 105 | print(o[0].size()) 106 | print("output length: ", len(o)) -------------------------------------------------------------------------------- /libs/models/DFSegNet.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | # Pytorch Implementation of DongFeng SegNet: 4 | # Partial Order Pruning: for Best Speed/Accuracy Trade-off in Neural Architecture Search. 5 | # The backbone is pretrained on ImageNet 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | from libs.core.operators import PSPModule, conv3x3, dsn 12 | from libs.models.backbone.dfnet import dfnetv2, dfnetv1 13 | 14 | 15 | class FusionNode(nn.Module): 16 | def __init__(self, inplane): 17 | super(FusionNode, self).__init__() 18 | self.fusion = conv3x3(inplane*2, inplane) 19 | 20 | def forward(self, x): 21 | x_h, x_l = x 22 | size = x_l.size()[2:] 23 | x_h = F.upsample(x_h, size, mode="bilinear", align_corners=True) 24 | res = self.fusion(torch.cat([x_h,x_l],dim=1)) 25 | return res 26 | 27 | 28 | class DFSeg(nn.Module): 29 | def __init__(self, nclass, type="dfv1"): 30 | super(DFSeg, self).__init__() 31 | 32 | if type == "dfv1": 33 | self.backbone = dfnetv1() 34 | else: 35 | self.backbone = dfnetv2() 36 | 37 | self.cc5 = nn.Conv2d(128,128,1) 38 | self.cc4 = nn.Conv2d(256,128,1) 39 | self.cc3 = nn.Conv2d(128,128,1) 40 | 41 | self.ppm = PSPModule(512,128) 42 | 43 | self.fn4 = FusionNode(128) 44 | self.fn3 = FusionNode(128) 45 | 46 | self.fc = dsn(128, nclass) 47 | 48 | def forward(self, x): 49 | x3,x4,x5 = self.backbone(x) 50 | x5 = self.ppm(x5) 51 | x5 = self.cc5(x5) 52 | x4 = self.cc4(x4) 53 | f4 = self.fn4([x5, x4]) 54 | x3 = self.cc3(x3) 55 | out = self.fn3([f4, x3]) 56 | out = self.fc(out) 57 | 58 | return [out] 59 | 60 | 61 | def dfnetv1seg(num_classes=19, data_set="cityscapes"): 62 | return DFSeg(num_classes,type="dfv1") 63 | 64 | 65 | def dfnetv2seg(num_classes=19, data_set="cityscapes"): 66 | return DFSeg(num_classes,type="dfv2") 67 | 68 | 69 | if __name__ == '__main__': 70 | i = torch.Tensor(1,3,512,512).cuda() 71 | m = DFSeg(19,"dfv2").cuda() 72 | m.eval() 73 | o = m(i) 74 | print(o[0].size()) -------------------------------------------------------------------------------- /libs/models/ESPNet.py: -------------------------------------------------------------------------------- 1 | # Author: "Sachin Mehta" 2 | # ESPNet doesn't use pretrained backbone network while usually takes longer training time. 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class CBR(nn.Module): 8 | ''' 9 | This class defines the convolution layer with batch normalization and PReLU activation 10 | ''' 11 | 12 | def __init__(self, nIn, nOut, kSize, stride=1): 13 | ''' 14 | 15 | :param nIn: number of input channels 16 | :param nOut: number of output channels 17 | :param kSize: kernel size 18 | :param stride: stride rate for down-sampling. Default is 1 19 | ''' 20 | super().__init__() 21 | padding = int((kSize - 1) / 2) 22 | # self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False) 23 | self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False) 24 | # self.conv1 = nn.Conv2d(nOut, nOut, (1, kSize), stride=1, padding=(0, padding), bias=False) 25 | self.bn = nn.BatchNorm2d(nOut, eps=1e-03) 26 | self.act = nn.PReLU(nOut) 27 | 28 | def forward(self, input): 29 | ''' 30 | :param input: input feature map 31 | :return: transformed feature map 32 | ''' 33 | output = self.conv(input) 34 | # output = self.conv1(output) 35 | output = self.bn(output) 36 | output = self.act(output) 37 | return output 38 | 39 | 40 | class BR(nn.Module): 41 | ''' 42 | This class groups the batch normalization and PReLU activation 43 | ''' 44 | 45 | def __init__(self, nOut): 46 | ''' 47 | :param nOut: output feature maps 48 | ''' 49 | super().__init__() 50 | self.bn = nn.BatchNorm2d(nOut, eps=1e-03) 51 | self.act = nn.PReLU(nOut) 52 | 53 | def forward(self, input): 54 | ''' 55 | :param input: input feature map 56 | :return: normalized and thresholded feature map 57 | ''' 58 | output = self.bn(input) 59 | output = self.act(output) 60 | return output 61 | 62 | 63 | class CB(nn.Module): 64 | ''' 65 | This class groups the convolution and batch normalization 66 | ''' 67 | 68 | def __init__(self, nIn, nOut, kSize, stride=1): 69 | ''' 70 | :param nIn: number of input channels 71 | :param nOut: number of output channels 72 | :param kSize: kernel size 73 | :param stride: optinal stide for down-sampling 74 | ''' 75 | super().__init__() 76 | padding = int((kSize - 1) / 2) 77 | self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False) 78 | self.bn = nn.BatchNorm2d(nOut, eps=1e-03) 79 | 80 | def forward(self, input): 81 | ''' 82 | 83 | :param input: input feature map 84 | :return: transformed feature map 85 | ''' 86 | output = self.conv(input) 87 | output = self.bn(output) 88 | return output 89 | 90 | 91 | class C(nn.Module): 92 | ''' 93 | This class is for a convolutional layer. 94 | ''' 95 | 96 | def __init__(self, nIn, nOut, kSize, stride=1): 97 | ''' 98 | 99 | :param nIn: number of input channels 100 | :param nOut: number of output channels 101 | :param kSize: kernel size 102 | :param stride: optional stride rate for down-sampling 103 | ''' 104 | super().__init__() 105 | padding = int((kSize - 1) / 2) 106 | self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False) 107 | 108 | def forward(self, input): 109 | ''' 110 | :param input: input feature map 111 | :return: transformed feature map 112 | ''' 113 | output = self.conv(input) 114 | return output 115 | 116 | 117 | class CDilated(nn.Module): 118 | ''' 119 | This class defines the dilated convolution. 120 | ''' 121 | 122 | def __init__(self, nIn, nOut, kSize, stride=1, d=1): 123 | ''' 124 | :param nIn: number of input channels 125 | :param nOut: number of output channels 126 | :param kSize: kernel size 127 | :param stride: optional stride rate for down-sampling 128 | :param d: optional dilation rate 129 | ''' 130 | super().__init__() 131 | padding = int((kSize - 1) / 2) * d 132 | self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False, 133 | dilation=d) 134 | 135 | def forward(self, input): 136 | ''' 137 | :param input: input feature map 138 | :return: transformed feature map 139 | ''' 140 | output = self.conv(input) 141 | return output 142 | 143 | 144 | class DownSamplerB(nn.Module): 145 | def __init__(self, nIn, nOut): 146 | super().__init__() 147 | n = int(nOut / 5) 148 | n1 = nOut - 4 * n 149 | self.c1 = C(nIn, n, 3, 2) 150 | self.d1 = CDilated(n, n1, 3, 1, 1) 151 | self.d2 = CDilated(n, n, 3, 1, 2) 152 | self.d4 = CDilated(n, n, 3, 1, 4) 153 | self.d8 = CDilated(n, n, 3, 1, 8) 154 | self.d16 = CDilated(n, n, 3, 1, 16) 155 | self.bn = nn.BatchNorm2d(nOut, eps=1e-3) 156 | self.act = nn.PReLU(nOut) 157 | 158 | def forward(self, input): 159 | output1 = self.c1(input) 160 | d1 = self.d1(output1) 161 | d2 = self.d2(output1) 162 | d4 = self.d4(output1) 163 | d8 = self.d8(output1) 164 | d16 = self.d16(output1) 165 | 166 | add1 = d2 167 | add2 = add1 + d4 168 | add3 = add2 + d8 169 | add4 = add3 + d16 170 | 171 | combine = torch.cat([d1, add1, add2, add3, add4], 1) 172 | # combine_in_out = input + combine 173 | output = self.bn(combine) 174 | output = self.act(output) 175 | return output 176 | 177 | 178 | class DilatedParllelResidualBlockB(nn.Module): 179 | ''' 180 | This class defines the ESP block, which is based on the following principle 181 | Reduce ---> Split ---> Transform --> Merge 182 | ''' 183 | 184 | def __init__(self, nIn, nOut, add=True): 185 | ''' 186 | :param nIn: number of input channels 187 | :param nOut: number of output channels 188 | :param add: if true, add a residual connection through identity operation. You can use projection too as 189 | in ResNet paper, but we avoid to use it if the dimensions are not the same because we do not want to 190 | increase the module complexity 191 | ''' 192 | super().__init__() 193 | n = int(nOut / 5) 194 | n1 = nOut - 4 * n 195 | self.c1 = C(nIn, n, 1, 1) 196 | self.d1 = CDilated(n, n1, 3, 1, 1) # dilation rate of 2^0 197 | self.d2 = CDilated(n, n, 3, 1, 2) # dilation rate of 2^1 198 | self.d4 = CDilated(n, n, 3, 1, 4) # dilation rate of 2^2 199 | self.d8 = CDilated(n, n, 3, 1, 8) # dilation rate of 2^3 200 | self.d16 = CDilated(n, n, 3, 1, 16) # dilation rate of 2^4 201 | self.bn = BR(nOut) 202 | self.add = add 203 | 204 | def forward(self, input): 205 | ''' 206 | :param input: input feature map 207 | :return: transformed feature map 208 | ''' 209 | # reduce 210 | output1 = self.c1(input) 211 | # split and transform 212 | d1 = self.d1(output1) 213 | d2 = self.d2(output1) 214 | d4 = self.d4(output1) 215 | d8 = self.d8(output1) 216 | d16 = self.d16(output1) 217 | 218 | # heirarchical fusion for de-gridding 219 | add1 = d2 220 | add2 = add1 + d4 221 | add3 = add2 + d8 222 | add4 = add3 + d16 223 | 224 | # merge 225 | combine = torch.cat([d1, add1, add2, add3, add4], 1) 226 | 227 | # if residual version 228 | if self.add: 229 | combine = input + combine 230 | output = self.bn(combine) 231 | return output 232 | 233 | 234 | class InputProjectionA(nn.Module): 235 | ''' 236 | This class projects the input image to the same spatial dimensions as the feature map. 237 | For example, if the input image is 512 x512 x3 and spatial dimensions of feature map size are 56x56xF, then 238 | this class will generate an output of 56x56x3 239 | ''' 240 | 241 | def __init__(self, samplingTimes): 242 | ''' 243 | :param samplingTimes: The rate at which you want to down-sample the image 244 | ''' 245 | super().__init__() 246 | self.pool = nn.ModuleList() 247 | for i in range(0, samplingTimes): 248 | # pyramid-based approach for down-sampling 249 | self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) 250 | 251 | def forward(self, input): 252 | ''' 253 | :param input: Input RGB Image 254 | :return: down-sampled image (pyramid-based approach) 255 | ''' 256 | for pool in self.pool: 257 | input = pool(input) 258 | return input 259 | 260 | 261 | class ESPNet_Encoder(nn.Module): 262 | ''' 263 | This class defines the ESPNet-C network in the paper 264 | ''' 265 | 266 | def __init__(self, classes=20, p=5, q=3): 267 | ''' 268 | :param classes: number of classes in the dataset. Default is 20 for the cityscapes 269 | :param p: depth multiplier 270 | :param q: depth multiplier 271 | ''' 272 | super().__init__() 273 | self.level1 = CBR(3, 16, 3, 2) 274 | self.sample1 = InputProjectionA(1) 275 | self.sample2 = InputProjectionA(2) 276 | 277 | self.b1 = BR(16 + 3) 278 | self.level2_0 = DownSamplerB(16 + 3, 64) 279 | 280 | self.level2 = nn.ModuleList() 281 | for i in range(0, p): 282 | self.level2.append(DilatedParllelResidualBlockB(64, 64)) 283 | self.b2 = BR(128 + 3) 284 | 285 | self.level3_0 = DownSamplerB(128 + 3, 128) 286 | self.level3 = nn.ModuleList() 287 | for i in range(0, q): 288 | self.level3.append(DilatedParllelResidualBlockB(128, 128)) 289 | self.b3 = BR(256) 290 | 291 | self.classifier = C(256, classes, 1, 1) 292 | 293 | def forward(self, input): 294 | ''' 295 | :param input: Receives the input RGB image 296 | :return: the transformed feature map with spatial dimensions 1/8th of the input image 297 | ''' 298 | output0 = self.level1(input) 299 | inp1 = self.sample1(input) 300 | inp2 = self.sample2(input) 301 | 302 | output0_cat = self.b1(torch.cat([output0, inp1], 1)) 303 | output1_0 = self.level2_0(output0_cat) # down-sampled 304 | 305 | for i, layer in enumerate(self.level2): 306 | if i == 0: 307 | output1 = layer(output1_0) 308 | else: 309 | output1 = layer(output1) 310 | 311 | output1_cat = self.b2(torch.cat([output1, output1_0, inp2], 1)) 312 | 313 | output2_0 = self.level3_0(output1_cat) # down-sampled 314 | for i, layer in enumerate(self.level3): 315 | if i == 0: 316 | output2 = layer(output2_0) 317 | else: 318 | output2 = layer(output2) 319 | 320 | output2_cat = self.b3(torch.cat([output2_0, output2], 1)) 321 | 322 | classifier = self.classifier(output2_cat) 323 | 324 | return classifier 325 | 326 | 327 | class ESPNet(nn.Module): 328 | ''' 329 | This class defines the ESPNet network 330 | ''' 331 | 332 | def __init__(self, classes=20, p=2, q=3, encoderFile=None): 333 | ''' 334 | :param classes: number of classes in the dataset. Default is 20 for the cityscapes 335 | :param p: depth multiplier 336 | :param q: depth multiplier 337 | :param encoderFile: pretrained encoder weights. Recall that we first trained the ESPNet-C and then attached the 338 | RUM-based light weight decoder. See paper for more details. 339 | ''' 340 | super().__init__() 341 | self.encoder = ESPNet_Encoder(classes, p, q) 342 | if encoderFile != None: 343 | self.encoder.load_state_dict(torch.load(encoderFile)) 344 | print('Encoder loaded!') 345 | # load the encoder modules 346 | self.modules = [] 347 | for i, m in enumerate(self.encoder.children()): 348 | self.modules.append(m) 349 | 350 | # light-weight decoder 351 | self.level3_C = C(128 + 3, classes, 1, 1) 352 | self.br = nn.BatchNorm2d(classes, eps=1e-03) 353 | self.conv = CBR(19 + classes, classes, 3, 1) 354 | 355 | self.up_l3 = nn.Sequential( 356 | nn.ConvTranspose2d(classes, classes, 2, stride=2, padding=0, output_padding=0, bias=False)) 357 | self.combine_l2_l3 = nn.Sequential(BR(2 * classes), 358 | DilatedParllelResidualBlockB(2 * classes, classes, add=False)) 359 | 360 | self.up_l2 = nn.Sequential( 361 | nn.ConvTranspose2d(classes, classes, 2, stride=2, padding=0, output_padding=0, bias=False), BR(classes)) 362 | 363 | self.classifier = nn.ConvTranspose2d(classes, classes, 2, stride=2, padding=0, output_padding=0, bias=False) 364 | 365 | def forward(self, input): 366 | ''' 367 | :param input: RGB image 368 | :return: transformed feature map 369 | ''' 370 | output0 = self.modules[0](input) 371 | inp1 = self.modules[1](input) 372 | inp2 = self.modules[2](input) 373 | 374 | output0_cat = self.modules[3](torch.cat([output0, inp1], 1)) 375 | output1_0 = self.modules[4](output0_cat) # down-sampled 376 | 377 | for i, layer in enumerate(self.modules[5]): 378 | if i == 0: 379 | output1 = layer(output1_0) 380 | else: 381 | output1 = layer(output1) 382 | 383 | output1_cat = self.modules[6](torch.cat([output1, output1_0, inp2], 1)) 384 | 385 | output2_0 = self.modules[7](output1_cat) # down-sampled 386 | for i, layer in enumerate(self.modules[8]): 387 | if i == 0: 388 | output2 = layer(output2_0) 389 | else: 390 | output2 = layer(output2) 391 | 392 | output2_cat = self.modules[9](torch.cat([output2_0, output2], 1)) # concatenate for feature map width expansion 393 | 394 | output2_c = self.up_l3(self.br(self.modules[10](output2_cat))) # RUM 395 | 396 | output1_C = self.level3_C(output1_cat) # project to C-dimensional space 397 | comb_l2_l3 = self.up_l2(self.combine_l2_l3(torch.cat([output1_C, output2_c], 1))) # RUM 398 | 399 | concat_features = self.conv(torch.cat([comb_l2_l3, output0_cat], 1)) 400 | 401 | classifier = self.classifier(concat_features) 402 | 403 | out = [] 404 | out.append(classifier) 405 | return out 406 | 407 | 408 | if __name__ == '__main__': 409 | i = torch.Tensor(1,3,512,512).cuda() 410 | m = ESPNet(19).cuda() 411 | m.eval() 412 | o = m(i) 413 | print(o[0].size()) -------------------------------------------------------------------------------- /libs/models/FastSCNN.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | # FastSCNN doesn't use pretrained backbone network while usually takes longer training time. 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class FastSCNN(nn.Module): 10 | def __init__(self, num_classes, aux=False): 11 | super(FastSCNN, self).__init__() 12 | self.aux = aux 13 | self.learning_to_downsample = LearningToDownsample(32, 48, 64) 14 | self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3]) 15 | self.feature_fusion = FeatureFusionModule(64, 128, 128) 16 | self.classifier = Classifer(128, num_classes) 17 | if self.aux: 18 | self.auxlayer = nn.Sequential( 19 | nn.Conv2d(64, 64, 3, padding=1, bias=False), 20 | nn.BatchNorm2d(64), 21 | nn.ReLU(True), 22 | nn.Dropout(0.1), 23 | nn.Conv2d(64, num_classes, 1) 24 | ) 25 | 26 | def forward(self, x): 27 | higher_res_features = self.learning_to_downsample(x) 28 | x = self.global_feature_extractor(higher_res_features) 29 | x = self.feature_fusion(higher_res_features, x) 30 | x = self.classifier(x) 31 | outputs = [] 32 | outputs.append(x) 33 | if self.aux: 34 | auxout = self.auxlayer(higher_res_features) 35 | outputs.append(auxout) 36 | return tuple(outputs) 37 | 38 | 39 | class _ConvBNReLU(nn.Module): 40 | """Conv-BN-ReLU""" 41 | 42 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, **kwargs): 43 | super(_ConvBNReLU, self).__init__() 44 | self.conv = nn.Sequential( 45 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False), 46 | nn.BatchNorm2d(out_channels), 47 | nn.ReLU(True) 48 | ) 49 | 50 | def forward(self, x): 51 | return self.conv(x) 52 | 53 | 54 | class _DSConv(nn.Module): 55 | """Depthwise Separable Convolutions""" 56 | 57 | def __init__(self, dw_channels, out_channels, stride=1, **kwargs): 58 | super(_DSConv, self).__init__() 59 | self.conv = nn.Sequential( 60 | nn.Conv2d(dw_channels, dw_channels, 3, stride, 1, groups=dw_channels, bias=False), 61 | nn.BatchNorm2d(dw_channels), 62 | nn.ReLU(True), 63 | nn.Conv2d(dw_channels, out_channels, 1, bias=False), 64 | nn.BatchNorm2d(out_channels), 65 | nn.ReLU(True) 66 | ) 67 | 68 | def forward(self, x): 69 | return self.conv(x) 70 | 71 | 72 | class _DWConv(nn.Module): 73 | def __init__(self, dw_channels, out_channels, stride=1, **kwargs): 74 | super(_DWConv, self).__init__() 75 | self.conv = nn.Sequential( 76 | nn.Conv2d(dw_channels, out_channels, 3, stride, 1, groups=dw_channels, bias=False), 77 | nn.BatchNorm2d(out_channels), 78 | nn.ReLU(True) 79 | ) 80 | 81 | def forward(self, x): 82 | return self.conv(x) 83 | 84 | 85 | class LinearBottleneck(nn.Module): 86 | """LinearBottleneck used in MobileNetV2""" 87 | 88 | def __init__(self, in_channels, out_channels, t=6, stride=2, **kwargs): 89 | super(LinearBottleneck, self).__init__() 90 | self.use_shortcut = stride == 1 and in_channels == out_channels 91 | self.block = nn.Sequential( 92 | # pw 93 | _ConvBNReLU(in_channels, in_channels * t, 1), 94 | # dw 95 | _DWConv(in_channels * t, in_channels * t, stride), 96 | # pw-linear 97 | nn.Conv2d(in_channels * t, out_channels, 1, bias=False), 98 | nn.BatchNorm2d(out_channels) 99 | ) 100 | 101 | def forward(self, x): 102 | out = self.block(x) 103 | if self.use_shortcut: 104 | out = x + out 105 | return out 106 | 107 | 108 | class PyramidPooling(nn.Module): 109 | """Pyramid pooling module""" 110 | 111 | def __init__(self, in_channels, out_channels, **kwargs): 112 | super(PyramidPooling, self).__init__() 113 | inter_channels = int(in_channels / 4) 114 | self.conv1 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs) 115 | self.conv2 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs) 116 | self.conv3 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs) 117 | self.conv4 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs) 118 | self.out = _ConvBNReLU(in_channels * 2, out_channels, 1) 119 | 120 | def pool(self, x, size): 121 | avgpool = nn.AdaptiveAvgPool2d(size) 122 | return avgpool(x) 123 | 124 | def upsample(self, x, size): 125 | return F.interpolate(x, size, mode='bilinear', align_corners=True) 126 | 127 | def forward(self, x): 128 | size = x.size()[2:] 129 | feat1 = self.upsample(self.conv1(self.pool(x, 1)), size) 130 | feat2 = self.upsample(self.conv2(self.pool(x, 2)), size) 131 | feat3 = self.upsample(self.conv3(self.pool(x, 3)), size) 132 | feat4 = self.upsample(self.conv4(self.pool(x, 6)), size) 133 | x = torch.cat([x, feat1, feat2, feat3, feat4], dim=1) 134 | x = self.out(x) 135 | return x 136 | 137 | 138 | class LearningToDownsample(nn.Module): 139 | """Learning to downsample module""" 140 | 141 | def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64, **kwargs): 142 | super(LearningToDownsample, self).__init__() 143 | self.conv = _ConvBNReLU(3, dw_channels1, 3, 2) 144 | self.dsconv1 = _DSConv(dw_channels1, dw_channels2, 2) 145 | self.dsconv2 = _DSConv(dw_channels2, out_channels, 2) 146 | 147 | def forward(self, x): 148 | x = self.conv(x) 149 | x = self.dsconv1(x) 150 | x = self.dsconv2(x) 151 | return x 152 | 153 | 154 | class GlobalFeatureExtractor(nn.Module): 155 | """Global feature extractor module""" 156 | 157 | def __init__(self, in_channels=64, block_channels=(64, 96, 128), 158 | out_channels=128, t=6, num_blocks=(3, 3, 3)): 159 | super(GlobalFeatureExtractor, self).__init__() 160 | self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], t, 2) 161 | self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], t, 2) 162 | self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], t, 1) 163 | self.ppm = PyramidPooling(block_channels[2], out_channels) 164 | 165 | def _make_layer(self, block, inplanes, planes, blocks, t=6, stride=1): 166 | layers = [] 167 | layers.append(block(inplanes, planes, t, stride)) 168 | for i in range(1, blocks): 169 | layers.append(block(planes, planes, t, 1)) 170 | return nn.Sequential(*layers) 171 | 172 | def forward(self, x): 173 | x = self.bottleneck1(x) 174 | x = self.bottleneck2(x) 175 | x = self.bottleneck3(x) 176 | x = self.ppm(x) 177 | return x 178 | 179 | 180 | class FeatureFusionModule(nn.Module): 181 | """Feature fusion module""" 182 | 183 | def __init__(self, highter_in_channels, lower_in_channels, out_channels, scale_factor=4, **kwargs): 184 | super(FeatureFusionModule, self).__init__() 185 | self.scale_factor = scale_factor 186 | self.dwconv = _DWConv(lower_in_channels, out_channels, 1) 187 | self.conv_lower_res = nn.Sequential( 188 | nn.Conv2d(out_channels, out_channels, 1), 189 | nn.BatchNorm2d(out_channels) 190 | ) 191 | self.conv_higher_res = nn.Sequential( 192 | nn.Conv2d(highter_in_channels, out_channels, 1), 193 | nn.BatchNorm2d(out_channels) 194 | ) 195 | self.relu = nn.ReLU(True) 196 | 197 | def forward(self, higher_res_feature, lower_res_feature): 198 | lower_res_feature = F.interpolate(lower_res_feature, scale_factor=4, mode='bilinear', align_corners=True) 199 | lower_res_feature = self.dwconv(lower_res_feature) 200 | lower_res_feature = self.conv_lower_res(lower_res_feature) 201 | 202 | higher_res_feature = self.conv_higher_res(higher_res_feature) 203 | out = higher_res_feature + lower_res_feature 204 | return self.relu(out) 205 | 206 | 207 | class Classifer(nn.Module): 208 | """Classifer""" 209 | def __init__(self, dw_channels, num_classes, stride=1, **kwargs): 210 | super(Classifer, self).__init__() 211 | self.dsconv1 = _DSConv(dw_channels, dw_channels, stride) 212 | self.dsconv2 = _DSConv(dw_channels, dw_channels, stride) 213 | self.conv = nn.Sequential( 214 | nn.Dropout(0.1), 215 | nn.Conv2d(dw_channels, num_classes, 1) 216 | ) 217 | 218 | def forward(self, x): 219 | x = self.dsconv1(x) 220 | x = self.dsconv2(x) 221 | x = self.conv(x) 222 | return x 223 | 224 | 225 | if __name__ == '__main__': 226 | i = torch.Tensor(1,3,512,512).cuda() 227 | m = FastSCNN(19).cuda() 228 | m.eval() 229 | o = m(i) 230 | print(o[0].size()) -------------------------------------------------------------------------------- /libs/models/ICNet.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from libs.core.operators import ConvBnRelu 8 | from libs.models.PSPNet import PSPHead_res50 9 | 10 | 11 | class CascadeFeatureFusion(nn.Module): 12 | """CFF Unit""" 13 | def __init__(self, low_channels, high_channels, out_channels, nclass, norm_layer=nn.BatchNorm2d): 14 | super(CascadeFeatureFusion, self).__init__() 15 | self.conv_low = nn.Sequential( 16 | nn.Conv2d(low_channels, out_channels, 3, padding=2, dilation=2, bias=False), 17 | norm_layer(out_channels) 18 | ) 19 | self.conv_high = nn.Sequential( 20 | nn.Conv2d(high_channels, out_channels, 1, bias=False), 21 | norm_layer(out_channels) 22 | ) 23 | self.conv_low_cls = nn.Conv2d(out_channels, nclass, 1, bias=False) 24 | 25 | def forward(self, x_low, x_high): 26 | x_low = F.interpolate(x_low, size=x_high.size()[2:], mode='bilinear', align_corners=True) 27 | x_low = self.conv_low(x_low) 28 | x_high = self.conv_high(x_high) 29 | x = x_low + x_high 30 | x = F.relu(x, inplace=True) 31 | x_low_cls = self.conv_low_cls(x_low) 32 | 33 | return x, x_low_cls 34 | 35 | 36 | class _ICHead(nn.Module): 37 | def __init__(self, nclass, norm_layer=nn.BatchNorm2d): 38 | super(_ICHead, self).__init__() 39 | self.cff_12 = CascadeFeatureFusion(128, 64, 128, nclass, norm_layer) 40 | self.cff_24 = CascadeFeatureFusion(256, 256, 128, nclass, norm_layer) 41 | 42 | self.conv_cls = nn.Conv2d(128, nclass, 1, bias=False) 43 | 44 | def forward(self, x_sub1, x_sub2, x_sub4): 45 | outputs = list() 46 | x_cff_24, x_24_cls = self.cff_24(x_sub4, x_sub2) 47 | outputs.append(x_24_cls) 48 | x_cff_12, x_12_cls = self.cff_12(x_cff_24, x_sub1) 49 | outputs.append(x_12_cls) 50 | 51 | up_x2 = F.interpolate(x_cff_12, scale_factor=2, mode='bilinear', align_corners=True) 52 | up_x2 = self.conv_cls(up_x2) 53 | outputs.append(up_x2) 54 | up_x8 = F.interpolate(up_x2, scale_factor=4, mode='bilinear', align_corners=True) 55 | outputs.append(up_x8) 56 | # 1 -> 1/4 -> 1/8 -> 1/16 57 | outputs.reverse() 58 | return outputs 59 | 60 | 61 | class ICNet(nn.Module): 62 | def __init__(self, nclass): 63 | super(ICNet, self).__init__() 64 | self.conv_sub1 = nn.Sequential( 65 | ConvBnRelu(3, 32, 3, 2, 1), 66 | ConvBnRelu(32, 32, 3, 2, 1), 67 | ConvBnRelu(32, 64, 3, 2, 1) 68 | ) 69 | self.backbone = PSPHead_res50() 70 | self.head = _ICHead(nclass) 71 | 72 | self.conv_sub4 = ConvBnRelu(512, 256, 1) 73 | self.conv_sub2 = ConvBnRelu(512, 256, 1) 74 | 75 | def forward(self, x): 76 | 77 | # sub 1 78 | x_sub1_out = self.conv_sub1(x) 79 | 80 | # sub 2 81 | x_sub2 = F.interpolate(x, scale_factor=0.5, mode='bilinear', align_corners=True) 82 | 83 | x = self.backbone.relu1(self.backbone.bn1(self.backbone.conv1(x_sub2))) 84 | x = self.backbone.relu2(self.backbone.bn2(self.backbone.conv2(x))) 85 | x = self.backbone.relu3(self.backbone.bn3(self.backbone.conv3(x))) 86 | x = self.backbone.maxpool(x) 87 | 88 | x = self.backbone.layer1(x) 89 | x_sub2_out = self.backbone.layer2(x) 90 | 91 | # sub 4 92 | x_sub4 = F.interpolate(x_sub2_out, scale_factor=0.5, mode='bilinear', align_corners=True) 93 | 94 | x = self.backbone.layer3(x_sub4) 95 | x = self.backbone.layer4(x) 96 | x_sub4_out = self.backbone.head(x) 97 | 98 | x_sub4_out = self.conv_sub4(x_sub4_out) 99 | x_sub2_out = self.conv_sub2(x_sub2_out) 100 | 101 | res = self.head(x_sub1_out, x_sub2_out, x_sub4_out) 102 | 103 | return res 104 | 105 | 106 | def icnet(num_classes=19, data_set="cityscape"): 107 | return ICNet(num_classes) 108 | 109 | 110 | 111 | if __name__ == '__main__': 112 | i = torch.Tensor(1,3,512,512).cuda() 113 | m = ICNet(19).cuda() 114 | m.eval() 115 | res= m(i) 116 | print("ICnet output length: ", len(res)) 117 | for i in res: 118 | print(i.size()) -------------------------------------------------------------------------------- /libs/models/MSFNet.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | # Pytorch Implementation Of MSFNet: Real-Time Semantic Segmentation via Multiply Spatial Fusion Network(face++) 4 | # I didn't include the boundaries information 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | 11 | class MSFNet(nn.Module): 12 | def __init__(self): 13 | super(MSFNet, self).__init__() 14 | 15 | 16 | def forward(self, x): 17 | pass 18 | 19 | 20 | 21 | if __name__ == '__main__': 22 | i = torch.Tensor(1, 3, 512, 512).cuda() 23 | m = MSFNet().cuda() 24 | m.eval() 25 | o = m(i) 26 | print(o[0].size()) 27 | print("output length: ", len(o)) -------------------------------------------------------------------------------- /libs/models/PSPNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import functional as F 3 | import torch 4 | affine_par = True 5 | 6 | from torch.nn import BatchNorm2d 7 | 8 | 9 | def conv3x3(in_planes, out_planes, stride=1): 10 | "3x3 convolution with padding" 11 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 12 | padding=1, bias=False) 13 | 14 | 15 | class Bottleneck(nn.Module): 16 | expansion = 4 17 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1): 18 | super(Bottleneck, self).__init__() 19 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 20 | self.bn1 = BatchNorm2d(planes) 21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 22 | padding=dilation*multi_grid, dilation=dilation*multi_grid, bias=False) 23 | self.bn2 = BatchNorm2d(planes) 24 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 25 | self.bn3 = BatchNorm2d(planes * 4) 26 | self.relu = nn.ReLU(inplace=False) 27 | self.relu_inplace = nn.ReLU(inplace=True) 28 | self.downsample = downsample 29 | self.dilation = dilation 30 | self.stride = stride 31 | 32 | def forward(self, x): 33 | residual = x 34 | 35 | out = self.conv1(x) 36 | out = self.bn1(out) 37 | out = self.relu(out) 38 | 39 | out = self.conv2(out) 40 | out = self.bn2(out) 41 | out = self.relu(out) 42 | 43 | out = self.conv3(out) 44 | out = self.bn3(out) 45 | 46 | if self.downsample is not None: 47 | residual = self.downsample(x) 48 | 49 | out = out + residual 50 | out = self.relu_inplace(out) 51 | 52 | return out 53 | 54 | class PSPModule(nn.Module): 55 | """ 56 | Reference: 57 | Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* 58 | """ 59 | def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)): 60 | super(PSPModule, self).__init__() 61 | 62 | self.stages = [] 63 | self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes]) 64 | self.bottleneck = nn.Sequential( 65 | nn.Conv2d(features+len(sizes)*out_features, out_features, kernel_size=3, padding=1, dilation=1, bias=False), 66 | BatchNorm2d(out_features), 67 | nn.ReLU(), 68 | nn.Dropout2d(0.1) 69 | ) 70 | 71 | def _make_stage(self, features, out_features, size): 72 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) 73 | conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False) 74 | bn = BatchNorm2d(out_features) 75 | return nn.Sequential(prior, conv, bn) 76 | 77 | def forward(self, feats): 78 | h, w = feats.size(2), feats.size(3) 79 | priors = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages] + [feats] 80 | bottle = self.bottleneck(torch.cat(priors, 1)) 81 | return bottle 82 | 83 | 84 | class ResNet(nn.Module): 85 | def __init__(self, block, layers, num_classes): 86 | self.inplanes = 128 87 | super(ResNet, self).__init__() 88 | self.conv1 = conv3x3(3, 64, stride=2) 89 | self.bn1 = BatchNorm2d(64) 90 | self.relu1 = nn.ReLU(inplace=False) 91 | self.conv2 = conv3x3(64, 64) 92 | self.bn2 = BatchNorm2d(64) 93 | self.relu2 = nn.ReLU(inplace=False) 94 | self.conv3 = conv3x3(64, 128) 95 | self.bn3 = BatchNorm2d(128) 96 | self.relu3 = nn.ReLU(inplace=False) 97 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 98 | 99 | self.relu = nn.ReLU(inplace=False) 100 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change 101 | self.layer1 = self._make_layer(block, 64, layers[0]) 102 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 103 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 104 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1,1,1)) 105 | 106 | self.head = nn.Sequential(PSPModule(2048, 512), 107 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True)) 108 | 109 | self.dsn = nn.Sequential( 110 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1), 111 | BatchNorm2d(512), 112 | nn.ReLU(), 113 | nn.Dropout2d(0.1), 114 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True) 115 | ) 116 | 117 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1): 118 | downsample = None 119 | if stride != 1 or self.inplanes != planes * block.expansion: 120 | downsample = nn.Sequential( 121 | nn.Conv2d(self.inplanes, planes * block.expansion, 122 | kernel_size=1, stride=stride, bias=False), 123 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 124 | 125 | layers = [] 126 | generate_multi_grid = lambda index, grids: grids[index%len(grids)] if isinstance(grids, tuple) else 1 127 | layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample, multi_grid=generate_multi_grid(0, multi_grid))) 128 | self.inplanes = planes * block.expansion 129 | for i in range(1, blocks): 130 | layers.append(block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid))) 131 | 132 | return nn.Sequential(*layers) 133 | 134 | def forward(self, x): 135 | x = self.relu1(self.bn1(self.conv1(x))) 136 | x = self.relu2(self.bn2(self.conv2(x))) 137 | x = self.relu3(self.bn3(self.conv3(x))) 138 | x = self.maxpool(x) 139 | x = self.layer1(x) 140 | x = self.layer2(x) 141 | x = self.layer3(x) 142 | x_dsn = None 143 | if self.training: 144 | x_dsn = self.dsn(x) 145 | x = self.layer4(x) 146 | x = self.head(x) 147 | if self.training: 148 | return [x, x_dsn] 149 | else: 150 | return [x] 151 | 152 | 153 | class PSPHead(nn.Module): 154 | """ 155 | Used for ICNet 156 | """ 157 | def __init__(self, block, layers): 158 | self.inplanes = 128 159 | super(PSPHead, self).__init__() 160 | self.conv1 = conv3x3(3, 64, stride=2) 161 | self.bn1 = BatchNorm2d(64) 162 | self.relu1 = nn.ReLU(inplace=False) 163 | self.conv2 = conv3x3(64, 64) 164 | self.bn2 = BatchNorm2d(64) 165 | self.relu2 = nn.ReLU(inplace=False) 166 | self.conv3 = conv3x3(64, 128) 167 | self.bn3 = BatchNorm2d(128) 168 | self.relu3 = nn.ReLU(inplace=False) 169 | 170 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change 171 | 172 | self.layer1 = self._make_layer(block, 64, layers[0]) 173 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 174 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 175 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1)) 176 | self.head = PSPModule(2048, 512) 177 | 178 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1): 179 | downsample = None 180 | if stride != 1 or self.inplanes != planes * block.expansion: 181 | downsample = nn.Sequential( 182 | nn.Conv2d(self.inplanes, planes * block.expansion, 183 | kernel_size=1, stride=stride, bias=False), 184 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 185 | 186 | layers = [] 187 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 188 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, 189 | multi_grid=generate_multi_grid(0, multi_grid))) 190 | self.inplanes = planes * block.expansion 191 | for i in range(1, blocks): 192 | layers.append( 193 | block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid))) 194 | 195 | return nn.Sequential(*layers) 196 | 197 | def forward(self, x): 198 | x = self.relu1(self.bn1(self.conv1(x))) 199 | x = self.relu2(self.bn2(self.conv2(x))) 200 | x = self.relu3(self.bn3(self.conv3(x))) 201 | x = self.maxpool(x) 202 | x = self.layer1(x) 203 | x = self.layer2(x) 204 | x = self.layer3(x) 205 | x = self.layer4(x) 206 | x = self.head(x) 207 | return x 208 | 209 | 210 | def PSPNet_res101(num_classes=21): 211 | model = ResNet(Bottleneck,[3, 4, 23, 3], num_classes) 212 | return model 213 | 214 | 215 | def PSPNet_res50(num_classes=21): 216 | model = ResNet(Bottleneck,[3, 4, 6, 3], num_classes) 217 | return model 218 | 219 | 220 | def PSPHead_res50(): 221 | model = PSPHead(Bottleneck,[3, 4, 6, 3]) 222 | return model 223 | 224 | 225 | if __name__ == '__main__': 226 | i = torch.Tensor(1,3,769,769).cuda() 227 | model = PSPNet_res101(19) 228 | model.eval() 229 | o = model(i) 230 | print(o[0].size()) 231 | print(o[1].size()) -------------------------------------------------------------------------------- /libs/models/SwiftNet.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | """ 4 | SwiftNet is a little different 5 | 1. because it use the pre-activation input as lateral feature input. 6 | The backbone need writing for easier experiment 7 | 2. I also add dsn head for easier training during the decoder upsample process. 8 | 3. SwiftNet use torch pretrained backbone. 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | from libs.core.operators import dsn, upsample, conv3x3 17 | 18 | model_urls = { 19 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 20 | } 21 | 22 | 23 | class BasicBlock(nn.Module): 24 | expansion = 1 25 | 26 | def __init__(self, inplanes, planes, stride=1, downsample=None, efficient=True, use_bn=True): 27 | super(BasicBlock, self).__init__() 28 | self.use_bn = use_bn 29 | self.conv1 = conv3x3(inplanes, planes, stride) 30 | self.bn1 = nn.BatchNorm2d(planes) if self.use_bn else None 31 | self.relu = nn.ReLU(inplace=True) 32 | self.conv2 = conv3x3(planes, planes) 33 | self.bn2 = nn.BatchNorm2d(planes) if self.use_bn else None 34 | self.downsample = downsample 35 | self.stride = stride 36 | self.efficient = efficient 37 | 38 | def forward(self, x): 39 | residual = x 40 | out = self.conv1(x) 41 | out = self.bn1(out) 42 | out = self.relu(out) 43 | 44 | out = self.conv2(out) 45 | out = self.bn2(out) 46 | 47 | if self.downsample is not None: 48 | residual = self.downsample(x) 49 | 50 | out = out + residual 51 | relu = self.relu(out) 52 | 53 | return relu, out 54 | 55 | 56 | class SwiftNetResNet(nn.Module): 57 | def __init__(self, block, layers, num_features=19, k_up=3, efficient=True, use_bn=True, 58 | spp_grids=(8, 4, 2, 1), spp_square_grid=False): 59 | super(SwiftNetResNet, self).__init__() 60 | self.inplanes = 64 61 | self.efficient = efficient 62 | self.nclass = num_features 63 | self.use_bn = use_bn 64 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 65 | bias=False) 66 | self.bn1 = nn.BatchNorm2d(64) if self.use_bn else lambda x: x 67 | self.relu = nn.ReLU(inplace=True) 68 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 69 | upsamples = [] 70 | self.layer1 = self._make_layer(block, 64, layers[0]) 71 | upsamples += [_Upsample(num_features, self.inplanes, num_features, use_bn=self.use_bn, k=k_up)] 72 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 73 | upsamples += [_Upsample(num_features, self.inplanes, num_features, use_bn=self.use_bn, k=k_up)] 74 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 75 | upsamples += [_Upsample(num_features, self.inplanes, num_features, use_bn=self.use_bn, k=k_up)] 76 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 77 | 78 | self.fine_tune = [self.conv1, self.maxpool, self.layer1, self.layer2, self.layer3, self.layer4] 79 | if self.use_bn: 80 | self.fine_tune += [self.bn1] 81 | 82 | num_levels = 3 83 | self.spp_size = num_features 84 | bt_size = self.spp_size 85 | 86 | level_size = self.spp_size // num_levels 87 | 88 | self.dsn = dsn(256, self.nclass) 89 | 90 | self.spp = SpatialPyramidPooling(self.inplanes, num_levels, bt_size=bt_size, level_size=level_size, 91 | out_size=self.spp_size, grids=spp_grids, square_grid=spp_square_grid, 92 | bn_momentum=0.01 / 2, use_bn=self.use_bn) 93 | self.upsample = nn.ModuleList(list(reversed(upsamples))) 94 | 95 | self.random_init = [self.spp, self.upsample] 96 | 97 | self.num_features = num_features 98 | 99 | for m in self.modules(): 100 | if isinstance(m, nn.Conv2d): 101 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 102 | elif isinstance(m, nn.BatchNorm2d): 103 | nn.init.constant_(m.weight, 1) 104 | nn.init.constant_(m.bias, 0) 105 | 106 | def _make_layer(self, block, planes, blocks, stride=1): 107 | downsample = None 108 | if stride != 1 or self.inplanes != planes * block.expansion: 109 | layers = [nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False)] 110 | if self.use_bn: 111 | layers += [nn.BatchNorm2d(planes * block.expansion)] 112 | downsample = nn.Sequential(*layers) 113 | layers = [block(self.inplanes, planes, stride, downsample, efficient=self.efficient, use_bn=self.use_bn)] 114 | self.inplanes = planes * block.expansion 115 | for i in range(1, blocks): 116 | layers += [block(self.inplanes, planes, efficient=self.efficient, use_bn=self.use_bn)] 117 | 118 | return nn.Sequential(*layers) 119 | 120 | 121 | def forward_resblock(self, x, layers): 122 | skip = None 123 | for l in layers: 124 | x = l(x) 125 | if isinstance(x, tuple): 126 | x, skip = x 127 | return x, skip 128 | 129 | def forward_down(self, image): 130 | x = self.conv1(image) 131 | x = self.bn1(x) 132 | x = self.relu(x) 133 | x = self.maxpool(x) 134 | 135 | features = [] 136 | x, skip = self.forward_resblock(x, self.layer1) 137 | features += [skip] 138 | x, skip = self.forward_resblock(x, self.layer2) 139 | features += [skip] 140 | x, skip = self.forward_resblock(x, self.layer3) 141 | features += [skip] 142 | 143 | dsn = None 144 | if self.training: 145 | dsn = self.dsn(x) 146 | x, skip = self.forward_resblock(x, self.layer4) 147 | 148 | features += [self.spp.forward(skip)] 149 | if self.training: 150 | return features, dsn 151 | else: 152 | return features 153 | 154 | def forward_up(self, features): 155 | features = features[::-1] 156 | 157 | x = features[0] 158 | 159 | upsamples = [] 160 | for skip, up in zip(features[1:], self.upsample): 161 | x = up(x, skip) 162 | upsamples += [x] 163 | return [x] 164 | 165 | def forward(self, x): 166 | dsn = None 167 | if self.training: 168 | features, dsn = self.forward_down(x) 169 | else: 170 | features = self.forward_down(x) 171 | 172 | res = self.forward_up(features) 173 | 174 | if self.training: 175 | res.append(dsn) 176 | return res 177 | 178 | 179 | class SpatialPyramidPooling(nn.Module): 180 | """ 181 | SPP module is little different from ppm by inserting middle level feature to save the computation and memory. 182 | """ 183 | def __init__(self, num_maps_in, num_levels, bt_size=512, level_size=128, out_size=128, 184 | grids=(6, 3, 2, 1), square_grid=False, bn_momentum=0.1, use_bn=True): 185 | super(SpatialPyramidPooling, self).__init__() 186 | self.grids = grids 187 | self.square_grid = square_grid 188 | self.spp = nn.Sequential() 189 | self.spp.add_module('spp_bn', 190 | _BNReluConv(num_maps_in, bt_size, k=1, bn_momentum=bn_momentum, batch_norm=use_bn)) 191 | num_features = bt_size 192 | final_size = num_features 193 | for i in range(num_levels): 194 | final_size += level_size 195 | self.spp.add_module('spp' + str(i), 196 | _BNReluConv(num_features, level_size, k=1, bn_momentum=bn_momentum, batch_norm=use_bn)) 197 | self.spp.add_module('spp_fuse', 198 | _BNReluConv(final_size, out_size, k=1, bn_momentum=bn_momentum, batch_norm=use_bn)) 199 | 200 | def forward(self, x): 201 | levels = [] 202 | target_size = x.size()[2:4] 203 | 204 | ar = target_size[1] / target_size[0] 205 | 206 | x = self.spp[0].forward(x) 207 | levels.append(x) 208 | num = len(self.spp) - 1 209 | 210 | for i in range(1, num): 211 | if not self.square_grid: 212 | grid_size = (self.grids[i - 1], max(1, round(ar * self.grids[i - 1]))) 213 | x_pooled = F.adaptive_avg_pool2d(x, grid_size) 214 | else: 215 | x_pooled = F.adaptive_avg_pool2d(x, self.grids[i - 1]) 216 | level = self.spp[i].forward(x_pooled) 217 | 218 | level = upsample(level, target_size) 219 | levels.append(level) 220 | x = torch.cat(levels, 1) 221 | x = self.spp[-1].forward(x) 222 | return x 223 | 224 | 225 | class _BNReluConv(nn.Sequential): 226 | def __init__(self, num_maps_in, num_maps_out, k=3, batch_norm=True, bn_momentum=0.1, bias=False, dilation=1): 227 | super(_BNReluConv, self).__init__() 228 | if batch_norm: 229 | self.add_module('norm', nn.BatchNorm2d(num_maps_in, momentum=bn_momentum)) 230 | self.add_module('relu', nn.ReLU(inplace=batch_norm is True)) 231 | padding = k // 2 232 | self.add_module('conv', nn.Conv2d(num_maps_in, num_maps_out, 233 | kernel_size=k, padding=padding, bias=bias, dilation=dilation)) 234 | 235 | 236 | class _Upsample(nn.Module): 237 | def __init__(self, num_maps_in, skip_maps_in, num_maps_out, use_bn=True, k=3): 238 | super(_Upsample, self).__init__() 239 | print(f'Upsample layer: in = {num_maps_in}, skip = {skip_maps_in}, out = {num_maps_out}') 240 | self.bottleneck = _BNReluConv(skip_maps_in, num_maps_in, k=1, batch_norm=use_bn) 241 | self.blend_conv = _BNReluConv(num_maps_in, num_maps_out, k=k, batch_norm=use_bn) 242 | 243 | def forward(self, x, skip): 244 | skip = self.bottleneck.forward(skip) 245 | skip_size = skip.size()[2:4] 246 | x = upsample(x, skip_size) 247 | x = x + skip 248 | x = self.blend_conv.forward(x) 249 | return x 250 | 251 | 252 | def SwiftNetRes18(nclass=19, pretrained=True, **kwargs): 253 | """Constructs a ResNet-18 model. 254 | Args: 255 | pretrained (bool): If True, returns a model pre-trained on ImageNet 256 | """ 257 | model = SwiftNetResNet(BasicBlock, [2, 2, 2, 2], nclass, **kwargs) 258 | if pretrained: 259 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), strict=False) 260 | return model 261 | 262 | 263 | if __name__ == '__main__': 264 | i = torch.Tensor(1, 3, 512, 512).cuda() 265 | m = SwiftNetRes18(pretrained=False).cuda() 266 | m.eval() 267 | o = m(i) 268 | print(o[0].size()) 269 | print("output length: ", len(o)) -------------------------------------------------------------------------------- /libs/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .PSPNet import * 2 | from .DFSegNet import * 3 | from .ICNet import * 4 | from .FastSCNN import * 5 | from .SwiftNet import * 6 | from .ESPNet import * -------------------------------------------------------------------------------- /libs/models/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/models/backbone/__init__.py -------------------------------------------------------------------------------- /libs/models/backbone/dfnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import math 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.nn import BatchNorm2d 8 | 9 | __all__ = ["dfnetv1", "dfnetv2"] 10 | 11 | 12 | def conv3x3(in_planes, out_planes, stride=1): 13 | "3x3 convolution with padding" 14 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 15 | padding=1, bias=False) 16 | 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class dfnetv1(nn.Module): 51 | def __init__(self, num_classes=1000): 52 | super(dfnetv1, self).__init__() 53 | self.inplanes = 64 54 | self.stage1 = nn.Sequential( 55 | nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=2, bias=False), 56 | BatchNorm2d(32), 57 | nn.ReLU(inplace=True), 58 | nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=2, bias=False), 59 | BatchNorm2d(64), 60 | nn.ReLU(inplace=True) 61 | ) 62 | 63 | self.stage2 = self._make_layer(64, 3, stride=2) 64 | self.stage3 = self._make_layer(128, 3, stride=2) 65 | self.stage4 = self._make_layer(256, 3, stride=2) 66 | self.stage5 = self._make_layer(512, 1, stride=1) 67 | self.avgpool = nn.AvgPool2d(7, stride=1) 68 | self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes) 69 | 70 | for m in self.modules(): 71 | if isinstance(m, nn.Conv2d): 72 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 73 | m.weight.data.normal_(0, math.sqrt(2. / n)) 74 | elif isinstance(m, BatchNorm2d): 75 | m.weight.data.fill_(1) 76 | m.bias.data.zero_() 77 | 78 | def _make_layer(self, planes, blocks, stride=1): 79 | downsample = None 80 | if stride != 1 or self.inplanes != planes * BasicBlock.expansion: 81 | 82 | downsample = nn.Sequential( 83 | nn.Conv2d(self.inplanes, planes * BasicBlock.expansion, 84 | kernel_size=1, stride=stride, bias=False), 85 | BatchNorm2d(planes * BasicBlock.expansion), 86 | ) 87 | 88 | layers = [] 89 | layers.append(BasicBlock(self.inplanes, planes, stride, downsample)) 90 | self.inplanes = planes * BasicBlock.expansion 91 | for i in range(1, blocks): 92 | layers.append(BasicBlock(self.inplanes, planes)) 93 | 94 | return nn.Sequential(*layers) 95 | 96 | def forward(self, x): 97 | x = self.stage1(x) # 4x32 98 | x = self.stage2(x) # 8x64 99 | x3 = self.stage3(x) # 16x128 100 | x4 = self.stage4(x3) # 32x256 101 | x5 = self.stage5(x4) # 32x512 102 | 103 | return x3, x4, x5 104 | 105 | 106 | class dfnetv2(nn.Module): 107 | def __init__(self, num_classes=1000): 108 | super(dfnetv2, self).__init__() 109 | self.inplanes = 64 110 | self.stage1 = nn.Sequential( 111 | nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=2, bias=False), 112 | BatchNorm2d(32), 113 | nn.ReLU(inplace=True), 114 | nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=2, bias=False), 115 | BatchNorm2d(64), 116 | nn.ReLU(inplace=True) 117 | ) 118 | 119 | self.stage2_1 = self._make_layer(64, 2, stride=2) 120 | self.stage2_2 = self._make_layer(128, 1, stride=1) 121 | self.stage3_1 = self._make_layer(128, 10, stride=2) 122 | self.stage3_2 = self._make_layer(256, 1, stride=1) 123 | self.stage4_1 = self._make_layer(256, 4, stride=2) 124 | self.stage4_2 = self._make_layer(512, 2, stride=1) 125 | self.avgpool = nn.AvgPool2d(7, stride=1) 126 | self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes) 127 | 128 | for m in self.modules(): 129 | if isinstance(m, nn.Conv2d): 130 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 131 | m.weight.data.normal_(0, math.sqrt(2. / n)) 132 | elif isinstance(m, BatchNorm2d): 133 | m.weight.data.fill_(1) 134 | m.bias.data.zero_() 135 | 136 | def _make_layer(self, planes, blocks, stride=1): 137 | downsample = None 138 | if stride != 1 or self.inplanes != planes * BasicBlock.expansion: 139 | 140 | downsample = nn.Sequential( 141 | nn.Conv2d(self.inplanes, planes * BasicBlock.expansion, 142 | kernel_size=1, stride=stride, bias=False), 143 | BatchNorm2d(planes * BasicBlock.expansion), 144 | ) 145 | 146 | layers = [] 147 | layers.append(BasicBlock(self.inplanes, planes, stride, downsample)) 148 | self.inplanes = planes * BasicBlock.expansion 149 | for i in range(1, blocks): 150 | layers.append(BasicBlock(self.inplanes, planes)) 151 | 152 | return nn.Sequential(*layers) 153 | 154 | def forward(self, x): 155 | x = self.stage1(x) # 4x32 156 | x = self.stage2_1(x) # 8x64 157 | x3 = self.stage2_2(x) # 8x64 158 | x4 = self.stage3_1(x3) # 16x128 159 | x4 = self.stage3_2(x4) # 16x128 160 | x5 = self.stage4_1(x4) # 32x256 161 | x5 = self.stage4_2(x5) # 32x256 162 | return x3,x4,x5 163 | 164 | 165 | if __name__ == '__main__': 166 | i = torch.Tensor(1,3,512,512).cuda() 167 | m = dfnetv2().cuda() 168 | m(i) 169 | 170 | -------------------------------------------------------------------------------- /libs/models/backbone/resnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import torch.nn as nn 4 | 5 | from libs.utils.tools import load_model 6 | 7 | 8 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101'] 9 | 10 | 11 | def conv3x3(in_planes, out_planes, stride=1): 12 | """3x3 convolution with padding""" 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 14 | padding=1, bias=False) 15 | 16 | 17 | class BasicBlock(nn.Module): 18 | expansion = 1 19 | 20 | def __init__(self, inplanes, planes, stride=1, norm_layer=None, 21 | bn_eps=1e-5, bn_momentum=0.1, downsample=None, inplace=True): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum) 25 | self.relu = nn.ReLU(inplace=inplace) 26 | self.relu_inplace = nn.ReLU(inplace=True) 27 | self.conv2 = conv3x3(planes, planes) 28 | self.bn2 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum) 29 | self.downsample = downsample 30 | self.stride = stride 31 | self.inplace = inplace 32 | 33 | def forward(self, x): 34 | residual = x 35 | 36 | out = self.conv1(x) 37 | out = self.bn1(out) 38 | out = self.relu(out) 39 | 40 | out = self.conv2(out) 41 | out = self.bn2(out) 42 | 43 | if self.downsample is not None: 44 | residual = self.downsample(x) 45 | 46 | if self.inplace: 47 | out += residual 48 | else: 49 | out = out + residual 50 | 51 | out = self.relu_inplace(out) 52 | 53 | return out 54 | 55 | 56 | class Bottleneck(nn.Module): 57 | expansion = 4 58 | 59 | def __init__(self, inplanes, planes, stride=1, 60 | norm_layer=None, bn_eps=1e-5, bn_momentum=0.1, 61 | downsample=None, inplace=True): 62 | super(Bottleneck, self).__init__() 63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 64 | self.bn1 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 66 | padding=1, bias=False) 67 | self.bn2 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum) 68 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 69 | bias=False) 70 | self.bn3 = norm_layer(planes * self.expansion, eps=bn_eps, 71 | momentum=bn_momentum) 72 | self.relu = nn.ReLU(inplace=inplace) 73 | self.relu_inplace = nn.ReLU(inplace=True) 74 | self.downsample = downsample 75 | self.stride = stride 76 | self.inplace = inplace 77 | 78 | def forward(self, x): 79 | residual = x 80 | 81 | out = self.conv1(x) 82 | out = self.bn1(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv2(out) 86 | out = self.bn2(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv3(out) 90 | out = self.bn3(out) 91 | 92 | if self.downsample is not None: 93 | residual = self.downsample(x) 94 | 95 | if self.inplace: 96 | out += residual 97 | else: 98 | out = out + residual 99 | out = self.relu_inplace(out) 100 | 101 | return out 102 | 103 | 104 | class ResNet(nn.Module): 105 | 106 | def __init__(self, block, layers, norm_layer=nn.BatchNorm2d, bn_eps=1e-5, 107 | bn_momentum=0.1, deep_stem=False, stem_width=32, inplace=True): 108 | self.inplanes = stem_width * 2 if deep_stem else 64 109 | super(ResNet, self).__init__() 110 | if deep_stem: 111 | self.conv1 = nn.Sequential( 112 | nn.Conv2d(3, stem_width, kernel_size=3, stride=2, padding=1, 113 | bias=False), 114 | norm_layer(stem_width, eps=bn_eps, momentum=bn_momentum), 115 | nn.ReLU(inplace=inplace), 116 | nn.Conv2d(stem_width, stem_width, kernel_size=3, stride=1, 117 | padding=1, 118 | bias=False), 119 | norm_layer(stem_width, eps=bn_eps, momentum=bn_momentum), 120 | nn.ReLU(inplace=inplace), 121 | nn.Conv2d(stem_width, stem_width * 2, kernel_size=3, stride=1, 122 | padding=1, 123 | bias=False), 124 | ) 125 | else: 126 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 127 | bias=False) 128 | 129 | self.bn1 = norm_layer(stem_width * 2 if deep_stem else 64, eps=bn_eps, 130 | momentum=bn_momentum) 131 | self.relu = nn.ReLU(inplace=inplace) 132 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 133 | self.layer1 = self._make_layer(block, norm_layer, 64, layers[0], 134 | inplace, 135 | bn_eps=bn_eps, bn_momentum=bn_momentum) 136 | self.layer2 = self._make_layer(block, norm_layer, 128, layers[1], 137 | inplace, stride=2, 138 | bn_eps=bn_eps, bn_momentum=bn_momentum) 139 | self.layer3 = self._make_layer(block, norm_layer, 256, layers[2], 140 | inplace, stride=2, 141 | bn_eps=bn_eps, bn_momentum=bn_momentum) 142 | self.layer4 = self._make_layer(block, norm_layer, 512, layers[3], 143 | inplace, stride=2, 144 | bn_eps=bn_eps, bn_momentum=bn_momentum) 145 | 146 | def _make_layer(self, block, norm_layer, planes, blocks, inplace=True, 147 | stride=1, bn_eps=1e-5, bn_momentum=0.1): 148 | downsample = None 149 | if stride != 1 or self.inplanes != planes * block.expansion: 150 | downsample = nn.Sequential( 151 | nn.Conv2d(self.inplanes, planes * block.expansion, 152 | kernel_size=1, stride=stride, bias=False), 153 | norm_layer(planes * block.expansion, eps=bn_eps, 154 | momentum=bn_momentum), 155 | ) 156 | 157 | layers = [] 158 | layers.append(block(self.inplanes, planes, stride, norm_layer, bn_eps, 159 | bn_momentum, downsample, inplace)) 160 | self.inplanes = planes * block.expansion 161 | for i in range(1, blocks): 162 | layers.append(block(self.inplanes, planes, 163 | norm_layer=norm_layer, bn_eps=bn_eps, 164 | bn_momentum=bn_momentum, inplace=inplace)) 165 | 166 | return nn.Sequential(*layers) 167 | 168 | def forward(self, x): 169 | x = self.conv1(x) 170 | x = self.bn1(x) 171 | x = self.relu(x) 172 | x = self.maxpool(x) 173 | 174 | layers = [] 175 | x = self.layer1(x) 176 | layers.append(x) 177 | x = self.layer2(x) 178 | layers.append(x) 179 | x = self.layer3(x) 180 | layers.append(x) 181 | x = self.layer4(x) 182 | layers.append(x) 183 | 184 | return layers 185 | 186 | 187 | def resnet18(pretrained_model=None, **kwargs): 188 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 189 | 190 | if pretrained_model is not None: 191 | model = load_model(model, pretrained_model) 192 | return model 193 | 194 | 195 | def resnet34(pretrained_model=None, **kwargs): 196 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 197 | 198 | if pretrained_model is not None: 199 | model = load_model(model, pretrained_model) 200 | return model 201 | 202 | 203 | def resnet50(pretrained_model=None, **kwargs): 204 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 205 | 206 | if pretrained_model is not None: 207 | model = load_model(model, pretrained_model) 208 | return model 209 | 210 | 211 | def resnet101(pretrained_model=None, **kwargs): 212 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 213 | 214 | if pretrained_model is not None: 215 | model = load_model(model, pretrained_model) 216 | return model 217 | 218 | -------------------------------------------------------------------------------- /libs/models/backbone/xception.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | import torch.nn as nn 3 | 4 | from libs.core.operators import ConvBnRelu, SeparableConv2d 5 | from libs.utils.tools import load_model 6 | 7 | __all__ = ['Xception', 'Xception39','XceptionA'] 8 | 9 | 10 | class SeparableConvBnRelu(nn.Module): 11 | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, 12 | padding=0, dilation=1, 13 | has_relu=True, norm_layer=nn.BatchNorm2d): 14 | super(SeparableConvBnRelu, self).__init__() 15 | 16 | self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride, 17 | padding, dilation, groups=in_channels, 18 | bias=False) 19 | self.point_wise_cbr = ConvBnRelu(in_channels, out_channels, 1, 1, 0, 20 | has_bn=True, norm_layer=norm_layer, 21 | has_relu=has_relu, has_bias=False) 22 | 23 | def forward(self, x): 24 | x = self.conv1(x) 25 | x = self.point_wise_cbr(x) 26 | return x 27 | 28 | 29 | class Block(nn.Module): 30 | expansion = 4 31 | 32 | def __init__(self, in_channels, mid_out_channels, has_proj, stride, 33 | dilation=1, norm_layer=nn.BatchNorm2d): 34 | super(Block, self).__init__() 35 | self.has_proj = has_proj 36 | 37 | if has_proj: 38 | self.proj = SeparableConvBnRelu(in_channels, 39 | mid_out_channels * self.expansion, 40 | 3, stride, 1, 41 | has_relu=False, 42 | norm_layer=norm_layer) 43 | 44 | self.residual_branch = nn.Sequential( 45 | SeparableConvBnRelu(in_channels, mid_out_channels, 46 | 3, stride, dilation, dilation, 47 | has_relu=True, norm_layer=norm_layer), 48 | SeparableConvBnRelu(mid_out_channels, mid_out_channels, 3, 1, 1, 49 | has_relu=True, norm_layer=norm_layer), 50 | SeparableConvBnRelu(mid_out_channels, 51 | mid_out_channels * self.expansion, 3, 1, 1, 52 | has_relu=False, norm_layer=norm_layer)) 53 | self.relu = nn.ReLU(inplace=True) 54 | 55 | def forward(self, x): 56 | shortcut = x 57 | if self.has_proj: 58 | shortcut = self.proj(x) 59 | 60 | residual = self.residual_branch(x) 61 | output = self.relu(shortcut + residual) 62 | 63 | return output 64 | 65 | 66 | class Xception(nn.Module): 67 | def __init__(self, block, layers, channels, norm_layer=nn.BatchNorm2d): 68 | super(Xception, self).__init__() 69 | 70 | self.in_channels = 8 71 | self.conv1 = ConvBnRelu(3, self.in_channels, 3, 2, 1, 72 | has_bn=True, norm_layer=norm_layer, 73 | has_relu=True, has_bias=False) 74 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 75 | 76 | self.layer1 = self._make_layer(block, norm_layer, 77 | layers[0], channels[0], stride=2) 78 | self.layer2 = self._make_layer(block, norm_layer, 79 | layers[1], channels[1], stride=2) 80 | self.layer3 = self._make_layer(block, norm_layer, 81 | layers[2], channels[2], stride=2) 82 | 83 | def _make_layer(self, block, norm_layer, blocks, 84 | mid_out_channels, stride=1): 85 | layers = [] 86 | has_proj = True if stride > 1 else False 87 | layers.append(block(self.in_channels, mid_out_channels, has_proj, 88 | stride=stride, norm_layer=norm_layer)) 89 | self.in_channels = mid_out_channels * block.expansion 90 | for i in range(1, blocks): 91 | layers.append(block(self.in_channels, mid_out_channels, 92 | has_proj=False, stride=1, 93 | norm_layer=norm_layer)) 94 | 95 | return nn.Sequential(*layers) 96 | 97 | def forward(self, x): 98 | x = self.conv1(x) 99 | x = self.maxpool(x) 100 | 101 | blocks = [] 102 | x = self.layer1(x) 103 | blocks.append(x) 104 | x = self.layer2(x) 105 | blocks.append(x) 106 | x = self.layer3(x) 107 | blocks.append(x) 108 | 109 | return blocks 110 | 111 | 112 | """ 113 | Xception39 is used for BiSeg Network 114 | """ 115 | def Xception39(pretrained_model=None, **kwargs): 116 | model = Xception(Block, [4, 8, 4], [16, 32, 64], **kwargs) 117 | 118 | if pretrained_model is not None: 119 | model = load_model(model, pretrained_model) 120 | return model 121 | 122 | 123 | class BlockA(nn.Module): 124 | def __init__(self, in_channels, out_channels, stride=1, dilation=1, norm_layer=nn.BatchNorm2d, start_with_relu=True): 125 | super(BlockA, self).__init__() 126 | if out_channels != in_channels or stride != 1: 127 | self.skip = nn.Conv2d(in_channels, out_channels, 1, stride, bias=False) 128 | self.skipbn = norm_layer(out_channels) 129 | else: 130 | self.skip = None 131 | self.relu = nn.ReLU() 132 | rep = list() 133 | inter_channels = out_channels // 4 134 | 135 | if start_with_relu: 136 | rep.append(self.relu) 137 | rep.append(SeparableConv2d(in_channels, inter_channels, 3, 1, dilation, norm_layer=norm_layer)) 138 | rep.append(norm_layer(inter_channels)) 139 | 140 | rep.append(self.relu) 141 | rep.append(SeparableConv2d(inter_channels, inter_channels, 3, 1, dilation, norm_layer=norm_layer)) 142 | rep.append(norm_layer(inter_channels)) 143 | 144 | if stride != 1: 145 | rep.append(self.relu) 146 | rep.append(SeparableConv2d(inter_channels, out_channels, 3, stride, norm_layer=norm_layer)) 147 | rep.append(norm_layer(out_channels)) 148 | else: 149 | rep.append(self.relu) 150 | rep.append(SeparableConv2d(inter_channels, out_channels, 3, 1, norm_layer=norm_layer)) 151 | rep.append(norm_layer(out_channels)) 152 | self.rep = nn.Sequential(*rep) 153 | 154 | def forward(self, x): 155 | out = self.rep(x) 156 | if self.skip is not None: 157 | skip = self.skipbn(self.skip(x)) 158 | else: 159 | skip = x 160 | out = out + skip 161 | return out 162 | 163 | 164 | class Enc(nn.Module): 165 | def __init__(self, in_channels, out_channels, blocks, norm_layer=nn.BatchNorm2d): 166 | super(Enc, self).__init__() 167 | block = list() 168 | block.append(BlockA(in_channels, out_channels, 2, norm_layer=norm_layer)) 169 | for i in range(blocks - 1): 170 | block.append(BlockA(out_channels, out_channels, 1, norm_layer=norm_layer)) 171 | self.block = nn.Sequential(*block) 172 | 173 | def forward(self, x): 174 | return self.block(x) 175 | 176 | 177 | class FCAttention(nn.Module): 178 | def __init__(self, in_channels, norm_layer=nn.BatchNorm2d): 179 | super(FCAttention, self).__init__() 180 | self.avgpool = nn.AdaptiveAvgPool2d(1) 181 | self.fc = nn.Linear(in_channels, 1000) 182 | self.conv = nn.Sequential( 183 | nn.Conv2d(1000, in_channels, 1, bias=False), 184 | norm_layer(in_channels), 185 | nn.ReLU()) 186 | 187 | def forward(self, x): 188 | n, c, _, _ = x.size() 189 | att = self.avgpool(x).view(n, c) 190 | att = self.fc(att).view(n, 1000, 1, 1) 191 | att = self.conv(att) 192 | return x * att.expand_as(x) 193 | 194 | 195 | """ 196 | XceptionA is used for DFANet 197 | """ 198 | 199 | class XceptionA(nn.Module): 200 | def __init__(self, num_classes=1000, norm_layer=nn.BatchNorm2d): 201 | super(XceptionA, self).__init__() 202 | self.conv1 = nn.Sequential(nn.Conv2d(3, 8, 3, 2, 1, bias=False), 203 | norm_layer(8), 204 | nn.ReLU()) 205 | 206 | self.enc2 = Enc(8, 48, 4, norm_layer=norm_layer) 207 | self.enc3 = Enc(48, 96, 6, norm_layer=norm_layer) 208 | self.enc4 = Enc(96, 192, 4, norm_layer=norm_layer) 209 | 210 | self.fca = FCAttention(192, norm_layer=norm_layer) 211 | self.avgpool = nn.AdaptiveAvgPool2d(1) 212 | self.fc = nn.Linear(192, num_classes) 213 | 214 | def forward(self, x): 215 | x = self.conv1(x) 216 | 217 | x = self.enc2(x) 218 | x = self.enc3(x) 219 | x = self.enc4(x) 220 | x = self.fca(x) 221 | 222 | x = self.avgpool(x) 223 | x = x.view(x.size(0), -1) 224 | x = self.fc(x) 225 | 226 | return x -------------------------------------------------------------------------------- /libs/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/utils/__init__.py -------------------------------------------------------------------------------- /libs/utils/image_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import numbers 4 | import random 5 | import collections 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | 11 | def get_2dshape(shape, *, zero=True): 12 | if not isinstance(shape, collections.Iterable): 13 | shape = int(shape) 14 | shape = (shape, shape) 15 | else: 16 | h, w = map(int, shape) 17 | shape = (h, w) 18 | if zero: 19 | minv = 0 20 | else: 21 | minv = 1 22 | 23 | assert min(shape) >= minv, 'invalid shape: {}'.format(shape) 24 | return shape 25 | 26 | 27 | def random_crop_pad_to_shape(img, crop_pos, crop_size, pad_label_value): 28 | h, w = img.shape[:2] 29 | start_crop_h, start_crop_w = crop_pos 30 | assert ((start_crop_h < h) and (start_crop_h >= 0)) 31 | assert ((start_crop_w < w) and (start_crop_w >= 0)) 32 | 33 | crop_size = get_2dshape(crop_size) 34 | crop_h, crop_w = crop_size 35 | 36 | img_crop = img[start_crop_h:start_crop_h + crop_h, 37 | start_crop_w:start_crop_w + crop_w, ...] 38 | 39 | img_, margin = pad_image_to_shape(img_crop, crop_size, cv2.BORDER_CONSTANT, 40 | pad_label_value) 41 | 42 | return img_, margin 43 | 44 | 45 | def generate_random_crop_pos(ori_size, crop_size): 46 | ori_size = get_2dshape(ori_size) 47 | h, w = ori_size 48 | 49 | crop_size = get_2dshape(crop_size) 50 | crop_h, crop_w = crop_size 51 | 52 | pos_h, pos_w = 0, 0 53 | 54 | if h > crop_h: 55 | pos_h = random.randint(0, h - crop_h + 1) 56 | 57 | if w > crop_w: 58 | pos_w = random.randint(0, w - crop_w + 1) 59 | 60 | return pos_h, pos_w 61 | 62 | 63 | def pad_image_to_shape(img, shape, border_mode, value): 64 | margin = np.zeros(4, np.uint32) 65 | shape = get_2dshape(shape) 66 | pad_height = shape[0] - img.shape[0] if shape[0] - img.shape[0] > 0 else 0 67 | pad_width = shape[1] - img.shape[1] if shape[1] - img.shape[1] > 0 else 0 68 | 69 | margin[0] = pad_height // 2 70 | margin[1] = pad_height // 2 + pad_height % 2 71 | margin[2] = pad_width // 2 72 | margin[3] = pad_width // 2 + pad_width % 2 73 | 74 | img = cv2.copyMakeBorder(img, margin[0], margin[1], margin[2], margin[3], 75 | border_mode, value=value) 76 | 77 | return img, margin 78 | 79 | 80 | def pad_image_size_to_multiples_of(img, multiple, pad_value): 81 | h, w = img.shape[:2] 82 | d = multiple 83 | 84 | def canonicalize(s): 85 | v = s // d 86 | return (v + (v * d != s)) * d 87 | 88 | th, tw = map(canonicalize, (h, w)) 89 | 90 | return pad_image_to_shape(img, (th, tw), cv2.BORDER_CONSTANT, pad_value) 91 | 92 | 93 | def resize_ensure_shortest_edge(img, edge_length, 94 | interpolation_mode=cv2.INTER_LINEAR): 95 | assert isinstance(edge_length, int) and edge_length > 0, edge_length 96 | h, w = img.shape[:2] 97 | if h < w: 98 | ratio = float(edge_length) / h 99 | th, tw = edge_length, max(1, int(ratio * w)) 100 | else: 101 | ratio = float(edge_length) / w 102 | th, tw = max(1, int(ratio * h)), edge_length 103 | img = cv2.resize(img, (tw, th), interpolation_mode) 104 | 105 | return img 106 | 107 | 108 | def random_scale(img, gt, scales): 109 | scale = random.choice(scales) 110 | sh = int(img.shape[0] * scale) 111 | sw = int(img.shape[1] * scale) 112 | img = cv2.resize(img, (sw, sh), interpolation=cv2.INTER_LINEAR) 113 | gt = cv2.resize(gt, (sw, sh), interpolation=cv2.INTER_NEAREST) 114 | 115 | return img, gt, scale 116 | 117 | 118 | def random_scale_with_length(img, gt, length): 119 | size = random.choice(length) 120 | sh = size 121 | sw = size 122 | img = cv2.resize(img, (sw, sh), interpolation=cv2.INTER_LINEAR) 123 | gt = cv2.resize(gt, (sw, sh), interpolation=cv2.INTER_NEAREST) 124 | 125 | return img, gt, size 126 | 127 | 128 | def random_mirror(img, gt): 129 | if random.random() >= 0.5: 130 | img = cv2.flip(img, 1) 131 | gt = cv2.flip(gt, 1) 132 | 133 | return img, gt, 134 | 135 | 136 | def random_rotation(img, gt): 137 | angle = random.random() * 20 - 10 138 | h, w = img.shape[:2] 139 | rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) 140 | img = cv2.warpAffine(img, rotation_matrix, (w, h), flags=cv2.INTER_LINEAR) 141 | gt = cv2.warpAffine(gt, rotation_matrix, (w, h), flags=cv2.INTER_NEAREST) 142 | 143 | return img, gt 144 | 145 | 146 | def random_gaussian_blur(img): 147 | gauss_size = random.choice([1, 3, 5, 7]) 148 | if gauss_size > 1: 149 | # do the gaussian blur 150 | img = cv2.GaussianBlur(img, (gauss_size, gauss_size), 0) 151 | 152 | return img 153 | 154 | 155 | def center_crop(img, shape): 156 | h, w = shape[0], shape[1] 157 | y = (img.shape[0] - h) // 2 158 | x = (img.shape[1] - w) // 2 159 | return img[y:y + h, x:x + w] 160 | 161 | 162 | def random_crop(img, gt, size): 163 | if isinstance(size, numbers.Number): 164 | size = (int(size), int(size)) 165 | else: 166 | size = size 167 | 168 | h, w = img.shape[:2] 169 | crop_h, crop_w = size[0], size[1] 170 | 171 | if h > crop_h: 172 | x = random.randint(0, h - crop_h + 1) 173 | img = img[x:x + crop_h, :, :] 174 | gt = gt[x:x + crop_h, :] 175 | 176 | if w > crop_w: 177 | x = random.randint(0, w - crop_w + 1) 178 | img = img[:, x:x + crop_w, :] 179 | gt = gt[:, x:x + crop_w] 180 | 181 | return img, gt 182 | 183 | 184 | 185 | def normalize(img, mean, std): 186 | # pytorch pretrained model need the input range: 0-1 187 | img = img.astype(np.float32) / 255.0 188 | img = img - mean 189 | img = img / std 190 | 191 | return img 192 | def resize_image(img, h, w, **up_kwargs): 193 | return F.upsample(img, (h, w), **up_kwargs) 194 | 195 | 196 | def pad_image(img, mean, std, crop_size): 197 | b,c,h,w = img.size() 198 | assert(c==3) 199 | padh = crop_size - h if h < crop_size else 0 200 | padw = crop_size - w if w < crop_size else 0 201 | pad_values = -np.array(mean) / np.array(std) 202 | img_pad = img.new().resize_(b,c,h+padh,w+padw) 203 | for i in range(c): 204 | # note that pytorch pad params is in reversed orders 205 | img_pad[:,i,:,:] = F.pad(img[:,i,:,:], (0, padw, 0, padh), value=pad_values[i]) 206 | assert(img_pad.size(2)>=crop_size and img_pad.size(3)>=crop_size) 207 | return img_pad 208 | 209 | 210 | def crop_image(img, h0, h1, w0, w1): 211 | return img[:,:,h0:h1,w0:w1] 212 | 213 | 214 | def flip_image(img): 215 | assert(img.dim()==4) 216 | with torch.cuda.device_of(img): 217 | idx = torch.arange(img.size(3)-1, -1, -1).type_as(img).long() 218 | return img.index_select(3, idx) 219 | -------------------------------------------------------------------------------- /libs/utils/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Donny You(youansheng@gmail.com) 4 | # Logging tool implemented with the python Package logging. 5 | 6 | 7 | import argparse 8 | import logging 9 | import os 10 | import sys 11 | 12 | DEFAULT_LOG_LEVEL = 'info' 13 | DEFAULT_LOG_FILE = './default.log' 14 | DEFAULT_LOG_FORMAT = '%(asctime)s %(levelname)-7s %(message)s' 15 | 16 | LOG_LEVEL_DICT = { 17 | 'debug': logging.DEBUG, 18 | 'info': logging.INFO, 19 | 'warning': logging.WARNING, 20 | 'error': logging.ERROR, 21 | 'critical': logging.CRITICAL 22 | } 23 | 24 | 25 | class Logger(object): 26 | """ 27 | Args: 28 | Log level: CRITICAL>ERROR>WARNING>INFO>DEBUG. 29 | Log file: The file that stores the logging info. 30 | rewrite: Clear the log file. 31 | log format: The format of log messages. 32 | stdout level: The log level to print on the screen. 33 | """ 34 | log_level = None 35 | log_file = None 36 | log_format = None 37 | rewrite = None 38 | stdout_level = None 39 | logger = None 40 | 41 | @staticmethod 42 | def init(log_level = DEFAULT_LOG_LEVEL, 43 | log_file = DEFAULT_LOG_FILE, 44 | log_format = DEFAULT_LOG_FORMAT, 45 | rewrite = False, 46 | stdout_level = None): 47 | Logger.log_level = log_level 48 | Logger.log_file = log_file 49 | Logger.log_format = log_format 50 | Logger.rewrite = rewrite 51 | Logger.stdout_level = stdout_level 52 | 53 | filemode = 'w' 54 | if not Logger.rewrite: 55 | filemode = 'a' 56 | 57 | dir_name = os.path.dirname(os.path.abspath(Logger.log_file)) 58 | if not os.path.exists(dir_name): 59 | os.makedirs(dir_name) 60 | 61 | Logger.logger = logging.getLogger() 62 | 63 | if not Logger.log_level in LOG_LEVEL_DICT: 64 | print('Invalid logging level: {}'.format(Logger.log_level)) 65 | Logger.log_level = DEFAULT_LOG_LEVEL 66 | 67 | Logger.logger.setLevel(LOG_LEVEL_DICT[Logger.log_level]) 68 | 69 | fmt = logging.Formatter(Logger.log_format) 70 | fh = logging.FileHandler(Logger.log_file, mode=filemode) 71 | fh.setFormatter(fmt) 72 | fh.setLevel(LOG_LEVEL_DICT[Logger.log_level]) 73 | 74 | Logger.logger.addHandler(fh) 75 | 76 | if stdout_level is not None: 77 | console = logging.StreamHandler() 78 | if not Logger.stdout_level in LOG_LEVEL_DICT: 79 | print('Invalid logging level: {}'.format(Logger.stdout_level)) 80 | return 81 | 82 | console.setLevel(LOG_LEVEL_DICT[Logger.stdout_level]) 83 | console.setFormatter(fmt) 84 | Logger.logger.addHandler(console) 85 | 86 | @staticmethod 87 | def set_log_file(file_path): 88 | Logger.log_file = file_path 89 | Logger.init() 90 | 91 | @staticmethod 92 | def set_log_level(log_level): 93 | if not LOG_LEVEL_DICT.has_key(log_level): 94 | print('Invalid logging level: {}'.format(Logger.log_level)) 95 | return 96 | 97 | Logger.log_level = log_level 98 | Logger.init() 99 | 100 | @staticmethod 101 | def clear_log_file(): 102 | Logger.rewrite = True 103 | Logger.init() 104 | 105 | @staticmethod 106 | def set_stdout_level(log_level): 107 | if not LOG_LEVEL_DICT.has_key(log_level): 108 | print('Invalid logging level: {}'.format(Logger.log_level)) 109 | return 110 | 111 | Logger.stdout_level = log_level 112 | Logger.init() 113 | 114 | @staticmethod 115 | def debug(message): 116 | filename = os.path.basename(sys._getframe().f_back.f_code.co_filename) 117 | lineno = sys._getframe().f_back.f_lineno 118 | prefix = '[{}, {}]'.format(filename,lineno) 119 | Logger.logger.debug('{} {}'.format(prefix, message)) 120 | 121 | @staticmethod 122 | def info(message): 123 | filename = os.path.basename(sys._getframe().f_back.f_code.co_filename) 124 | lineno = sys._getframe().f_back.f_lineno 125 | prefix = '[{}, {}]'.format(filename,lineno) 126 | Logger.logger.info('{} {}'.format(prefix, message)) 127 | 128 | @staticmethod 129 | def warn(message): 130 | filename = os.path.basename(sys._getframe().f_back.f_code.co_filename) 131 | lineno = sys._getframe().f_back.f_lineno 132 | prefix = '[{}, {}]'.format(filename,lineno) 133 | Logger.logger.warn('{} {}'.format(prefix, message)) 134 | 135 | @staticmethod 136 | def error(message): 137 | filename = os.path.basename(sys._getframe().f_back.f_code.co_filename) 138 | lineno = sys._getframe().f_back.f_lineno 139 | prefix = '[{}, {}]'.format(filename,lineno) 140 | Logger.logger.error('{} {}'.format(prefix, message)) 141 | 142 | @staticmethod 143 | def critical(message): 144 | filename = os.path.basename(sys._getframe().f_back.f_code.co_filename) 145 | lineno = sys._getframe().f_back.f_lineno 146 | prefix = '[{}, {}]'.format(filename,lineno) 147 | Logger.logger.critical('{} {}'.format(prefix, message)) 148 | 149 | 150 | if __name__ == "__main__": 151 | parser = argparse.ArgumentParser() 152 | parser.add_argument('--log_level', default="info", type=str, 153 | dest='log_level', help='To set the log level to files.') 154 | parser.add_argument('--stdout_level', default=None, type=str, 155 | dest='stdout_level', help='To set the level to print to screen.') 156 | parser.add_argument('--log_file', default="./default.log", type=str, 157 | dest='log_file', help='The path of log files.') 158 | parser.add_argument('--log_format', default="%(asctime)s %(levelname)-7s %(message)s", 159 | type=str, dest='log_format', help='The format of log messages.') 160 | parser.add_argument('--rewrite', default=False, type=bool, 161 | dest='rewrite', help='Clear the log files existed.') 162 | 163 | args = parser.parse_args() 164 | Logger.init(log_level = args.log_level, 165 | stdout_level = args.stdout_level, 166 | log_file = args.log_file, 167 | log_format = args.log_format, 168 | rewrite = args.rewrite) 169 | 170 | Logger.info("info test.") 171 | Logger.debug("debug test.") 172 | Logger.warn("warn test.") 173 | Logger.error("error test.") 174 | -------------------------------------------------------------------------------- /libs/utils/tools.py: -------------------------------------------------------------------------------- 1 | # some tools for network training 2 | 3 | import argparse 4 | import time 5 | from collections import OrderedDict 6 | 7 | import torch 8 | import torch.distributed as dist 9 | 10 | 11 | def all_reduce_tensor(tensor, op=dist.ReduceOp.SUM, world_size=1): 12 | tensor = tensor.clone() 13 | dist.all_reduce(tensor, op) 14 | tensor.div_(world_size) 15 | return tensor 16 | 17 | 18 | def str2bool(v): 19 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 20 | return True 21 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 22 | return False 23 | else: 24 | raise argparse.ArgumentTypeError('Boolean value expected.') 25 | 26 | 27 | def lr_poly(base_lr, iter, max_iter, power): 28 | return base_lr * ((1 - float(iter) / max_iter) ** (power)) 29 | 30 | 31 | 32 | def adjust_learning_rate(optimizer, args, i_iter, total_steps): 33 | lr = lr_poly(args.learning_rate, i_iter, total_steps, args.power) 34 | optimizer.param_groups[0]['lr'] = lr 35 | return lr 36 | 37 | 38 | 39 | def set_bn_momentum(m): 40 | classname = m.__class__.__name__ 41 | if classname.find('BatchNorm') != -1: 42 | 43 | m.momentum = 0.0003 44 | 45 | def fixModelBN(m): 46 | pass 47 | 48 | 49 | def load_model(model, model_file, is_restore=False): 50 | t_start = time.time() 51 | if isinstance(model_file, str): 52 | state_dict = torch.load(model_file, map_location=torch.device('cpu')) 53 | if 'model' in state_dict.keys(): 54 | state_dict = state_dict['model'] 55 | else: 56 | state_dict = model_file 57 | t_ioend = time.time() 58 | 59 | if is_restore: 60 | new_state_dict = OrderedDict() 61 | for k, v in state_dict.items(): 62 | name = 'module.' + k 63 | new_state_dict[name] = v 64 | state_dict = new_state_dict 65 | 66 | model.load_state_dict(state_dict, strict=False) 67 | ckpt_keys = set(state_dict.keys()) 68 | own_keys = set(model.state_dict().keys()) 69 | missing_keys = own_keys - ckpt_keys 70 | unexpected_keys = ckpt_keys - own_keys 71 | 72 | if len(missing_keys) > 0: 73 | print('Missing key(s) in state_dict: {}'.format( 74 | ', '.join('{}'.format(k) for k in missing_keys))) 75 | 76 | if len(unexpected_keys) > 0: 77 | print('Unexpected key(s) in state_dict: {}'.format( 78 | ', '.join('{}'.format(k) for k in unexpected_keys))) 79 | 80 | del state_dict 81 | t_end = time.time() 82 | print( 83 | "Load model, Time usage:\n\tIO: {}, initialize parameters: {}".format( 84 | t_ioend - t_start, t_end - t_ioend)) 85 | 86 | return model -------------------------------------------------------------------------------- /prediction_test_different_size.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch 4 | import torch.nn.functional as F 5 | import cv2 6 | import numpy as np 7 | import datetime 8 | 9 | 10 | import libs.models as models 11 | 12 | 13 | N_CLASS = 19 14 | color_list = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33] 15 | color_map = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153), 16 | (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), 17 | (255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)] 18 | up_kwargs = {'mode': 'bilinear', 'align_corners': True} 19 | 20 | 21 | def transform(img): 22 | img = cv2.imread(img) 23 | IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) 24 | img = img - IMG_MEAN 25 | img = img.transpose((2, 0, 1)) 26 | img = torch.from_numpy(img).unsqueeze(0).cuda() 27 | return img 28 | 29 | def transform_rgb(img): 30 | img = cv2.imread(img, cv2.IMREAD_COLOR)[:, :, ::-1].astype(np.float32) 31 | 32 | img /= 255 33 | IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32) 34 | IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32) 35 | 36 | img -= IMG_MEAN 37 | img /= IMG_VARS 38 | 39 | img = img.transpose((2, 0, 1)) 40 | img = torch.from_numpy(img).unsqueeze(0).cuda() 41 | return img 42 | 43 | 44 | 45 | def makeTestlist(dir,start=0,end=1525): 46 | out = [] 47 | floder = os.listdir(dir) 48 | for f in floder: 49 | floder_dir = os.path.join(dir, f) 50 | for i in os.listdir(floder_dir): 51 | out.append(os.path.join(floder_dir, i)) 52 | out.sort() 53 | return out[start:end] 54 | 55 | 56 | def WholeTest(args, model, size=1.0): 57 | net = model.cuda() 58 | net.eval() 59 | saved_state_dict = torch.load(args.resume) 60 | net.load_state_dict(saved_state_dict) 61 | img_list = makeTestlist(args.input_dir) 62 | out_dir = args.output_dir 63 | for i in img_list: 64 | name = i 65 | with torch.no_grad(): 66 | if args.rgb: 67 | img = transform_rgb(i) 68 | else: 69 | img = transform(i) 70 | _, _, origin_h, origin_w = img.size() 71 | h, w = int(origin_h*size), int(origin_w*size) 72 | img = F.upsample(img, size=(h, w), mode="bilinear", align_corners=True) 73 | out = net(img)[0] 74 | out = F.upsample(out, size=(origin_h, origin_w), mode='bilinear', align_corners=True) 75 | result = out.argmax(dim=1)[0] 76 | result = result.data.cpu().squeeze().numpy() 77 | row, col = result.shape 78 | dst = np.ones((row, col), dtype=np.uint8) * 255 79 | for i in range(19): 80 | dst[result == i] = color_list[i] 81 | print(name, " done!") 82 | save_name = os.path.join(out_dir, "/".join(name.split('/')[4:])) 83 | save_dir = "/".join(save_name.split("/")[:-1]) 84 | if not os.path.exists(save_dir): 85 | os.makedirs(save_dir) 86 | cv2.imwrite(save_name, dst) 87 | 88 | if __name__ == '__main__': 89 | parser = argparse.ArgumentParser(description='PyTorch \ 90 | Segmentation Crop Prediction') 91 | parser.add_argument('--input_dir', type=str, 92 | default="/home/lxt/data/Cityscapes/leftImg8bit/test", 93 | help='training dataset folder (default: \ 94 | $(HOME)/data)') 95 | parser.add_argument("--input_disp_dir", type=str, default=None) 96 | parser.add_argument('--output_dir', type=str, default="/home/lxt/debug/cgnl_ohem_crop_ms", 97 | help='output directory of the model_test, for saving the seg_models') 98 | parser.add_argument("--resume", type=str, default="/home/lxt/Desktop/Seg_model_ZOO/CNL_net_4w_ohem/CS_scenes_40000.pth") 99 | parser.add_argument("--start",type=int,default=0,help="start index of crop test") 100 | parser.add_argument("--end",type=int,default=1525,help="end index of crop test") 101 | parser.add_argument("--gpu",type=str,default="0",help="which gpu to use") 102 | parser.add_argument("--arch",type=str,default=None, help="which network are used") 103 | parser.add_argument("--size",type=float,default=1.0,help="ratio of the input images") 104 | parser.add_argument("--rgb",type=int,default=0) 105 | args = parser.parse_args() 106 | os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) 107 | test_list = makeTestlist(args.input_dir,args.start, args.end) 108 | model= models.__dict__[args.arch](num_classes=19, data_set="cityscapes") 109 | WholeTest(args, model=model, size=args.size) -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # :zap:Fast_Seg:zap: 4 | 5 | This repo try to implement **state-of-art fast semantic segmentation model** s on **road scene dataset**(CityScape, 6 | Mapillary, Camvid). 7 | 8 | 9 | ## News!! 10 | 11 | Check out our Fast Segmentation Framework in [SFSegNets](https://github.com/lxtGH/SFSegNets). SFNet-ECCV-2020, SFNet-Lite, IJCV-2023 12 | 13 | # What is the purpose of this repo? 14 | This repo aims to do experiments and verify the idea of fast semantic segmentation, and this repo also provides some fast models. 15 | 16 | Our ICnet implementation achieves **74.5% mIoU**, which is **5%** point higher than the original paper. !!!!! Here: [model](https://drive.google.com/open?id=1A6z87_GCHEuKeZfbGpEvnkZ0POdW2Q_U) 17 | 18 | # Another Link For Accurate Seg: 19 | [GALD-net](https://github.com/lxtGH/GALD-Net) provides some state-of-art accurate methods of implementation. 20 | 21 | # Model Zoo (Updating) 22 | 1. ICNet: ICnet for real-time semantic segmentation on high-resolution images. ECCV-2018, [paper](https://arxiv.org/abs/1704.08545) 23 | 2. DF-Net: Partial Order Pruning: for Best Speed/Accuracy Trade-off in Neural Architecture Search.CVPR-2019, [paper](https://arxiv.org/abs/1903.03777) 24 | 3. Bi-Seg: Bilateral segmentation network for real-time semantic segmentation.ECCV-2018, [paper](https://arxiv.org/pdf/1808.00897.pdf) 25 | 4. DFA-Net: Deep feature aggregation for real-time semantic segmentation.CVPR-2019,[paper](https://arxiv.org/abs/1904.02216) 26 | 5. ESP-Net: Efficient Spatial Pyramid of Dilated Convolutions for Semantic Segmentation. ECCV-2018,[paper](https://arxiv.org/abs/1803.06815) 27 | 6. SwiftNet: In defense of pre-trained imagenet architectures for real-time semantic segmentation of road-driving images. CVPR2019, [paper](http://openaccess.thecvf.com/content_CVPR_2019/papers/Orsic_In_Defense_of_Pre-Trained_ImageNet_Architectures_for_Real-Time_Semantic_Segmentation_CVPR_2019_paper.pdf) 28 | 7. Real-Time Semantic Segmentation via Multiply Spatial Fusion Network.(face++) arxiv,[paper](https://arxiv.org/abs/1911.07217) 29 | 8. Fast-SCNN: Fast Semantic Segmentation Network.BMVC-2019 [paper](https://arxiv.org/abs/1902.04502) 30 | 31 | 32 | 33 | 34 | # Usage 35 | 1. use train_distribute.py for training For example, use scripts in exp floder for training and evaluation. 36 | 2. use prediction_test_different_size.py for prediction with different size input. 37 | 38 | 39 | ## Datasets Perparation 40 | - You can download [cityscapes] dataset (https://www.cityscapes-dataset.com/) from [here](https://www.cityscapes-dataset.com/downloads/). Note: please download [leftImg8bit_trainvaltest.zip(11GB)](https://www.cityscapes-dataset.com/file-handling/?packageID=4) and [gtFine_trainvaltest(241MB)](https://www.cityscapes-dataset.com/file-handling/?packageID=1). 41 | - You can download camvid dataset from [here](https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid). 42 | - You can download pretrained XceptionA with RGB input and ResNet18 with bgr input and ResNet50 with bgr input 43 | [link]:(https://pan.baidu.com/s/1mM_Lc44iX9CT1nPq6tjOAA) password:bnfv. 44 | or ['link']: [resnet50-deep.pth](https://drive.google.com/file/d/166ANLmlV5cQTkmzD0pngc8leOQUR_32n/view?usp=sharing), [icnet_final.pth](https://drive.google.com/file/d/1A6z87_GCHEuKeZfbGpEvnkZ0POdW2Q_U/view?usp=sharing), [resnet18-deep-caffe.pth](https://drive.google.com/file/d/1P_d9T__kTKIEFK8ElQFq0cZ1XKx1gMGn/view?usp=sharing), [xceptiona_imagenet.pth](https://drive.google.com/file/d/1y4TuRod_F9NEeBQ1fo9GI-WLETS-b1jF/view?usp=sharing) 45 | 46 | 47 | # Some Advice on Training 48 | 1. use syn-bn(apex). 49 | 2. use batch-size >=8. 50 | 3. use deep supervised loss for easier optimation. 51 | 4. use large crop size during training. 52 | 5. longer training time for small models(60,000 interaction or more). 53 | 6. use Mapillary data for pretraining for boosting performance. 54 | 7. Deeply based resnet runs slowly than torch pretrained resnet but with higher accuracy. 55 | 8. The small network doesn't need ImageNet pretraining if training longer time on Cityscape.(Fast-SCNN paper) 56 | 57 | |(a) test image|(b) ground truth|(c) predicted result| 58 | |:--:|:--:|:--:| 59 | |![a](data/fig/frankfurt_000000_002196_leftImg8bit.png)|![b](data/fig/frankfurt_000000_002196_gtFine_color.png)|![c](data/fig/frankfurt_000000_002196_leftImg8bit_pred.png)| 60 | 61 | # License 62 | This project is released under the Apache 2.0 license. 63 | 64 | 65 | # Acknowledgement 66 | 67 | Thanks to the previous open-sourced repo: 68 | [Encoding](https://github.com/zhanghang1989/PyTorch-Encoding) 69 | [CCNet](https://github.com/speedinghzl/CCNet) 70 | [TorchSeg](https://github.com/ycszen/TorchSeg) 71 | [pytorchseg](https://github.com/meetshah1995/pytorch-semseg) 72 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | apex 2 | opencv-python 3 | torch>=1.1.0 4 | torchvision 5 | -------------------------------------------------------------------------------- /train_distribute.py: -------------------------------------------------------------------------------- 1 | # Author: Xiangtai Li 2 | # Email: lxtpku@pku.edu.cn 3 | """ 4 | Distribute Training Code For Fast training. 5 | """ 6 | 7 | import argparse 8 | import os 9 | import os.path as osp 10 | import timeit 11 | import numpy as np 12 | 13 | 14 | import torch 15 | from torch.utils import data 16 | import torch.optim as optim 17 | import torch.backends.cudnn as cudnn 18 | 19 | from libs.utils.logger import Logger as Log 20 | from libs.utils.tools import adjust_learning_rate, all_reduce_tensor 21 | from libs.datasets.cityscapes import Cityscapes 22 | from libs.datasets.camvid import CamVidDataSet 23 | 24 | from libs.core.loss import CriterionOhemDSN, CriterionDSN, CriterionICNet, CriterionDFANet 25 | 26 | 27 | try: 28 | import apex 29 | from apex import amp 30 | from apex.parallel import DistributedDataParallel, SyncBatchNorm 31 | except ImportError: 32 | raise ImportError( 33 | "Please install apex from https://www.github.com/nvidia/apex.") 34 | 35 | 36 | def str2bool(v): 37 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 38 | return True 39 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 40 | return False 41 | else: 42 | raise argparse.ArgumentTypeError('Boolean value expected.') 43 | 44 | 45 | def get_arguments(): 46 | """ 47 | Parse all the arguments 48 | Returns: args 49 | A list of parsed arguments. 50 | """ 51 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network") 52 | parser.add_argument("--batch_size_per_gpu", type=int, default=1, 53 | help="Number of images sent to the network in one step.") 54 | parser.add_argument("--batch_size", type=int, default=8, 55 | help="Number of images sent to the network in one step.") 56 | parser.add_argument('--gpu_num',type=int, default=8) 57 | parser.add_argument("--data_dir", type=str, default="./data", 58 | help="Path to the directory containing the Cityscapes dataset.") 59 | parser.add_argument("--data_list", type=str, default="./data/cityscapes/train.txt", 60 | help="Path to the file listing the images in the dataset.") 61 | parser.add_argument("--data_set", type=str, default="cityscapes", help="dataset to train") 62 | parser.add_argument("--arch", type=str, default="ICNet", help="network architecture") 63 | parser.add_argument("--ignore_label", type=int, default=255, 64 | help="The index of the label to ignore during the training.") 65 | parser.add_argument("--input_size", type=int, default=832 , 66 | help="Comma-separated string with height and width of images.") 67 | parser.add_argument("--learning_rate", type=float, default=1e-2, 68 | help="Base learning rate for training with polynomial decay.") 69 | parser.add_argument("--momentum", type=float, default=0.9, 70 | help="Momentum component of the optimiser.") 71 | parser.add_argument("--num_classes", type=int, default=19, 72 | help="Number of classes to predict (including background).") 73 | parser.add_argument("--num_steps", type=int, default=50000, 74 | help="Number of training steps.") 75 | parser.add_argument("--power", type=float, default=0.9, 76 | help="Decay parameter to compute the learning rate.") 77 | parser.add_argument("--weight_decay", type=float, default=5e-4, 78 | help="Regularisation parameter for L2-loss.") 79 | parser.add_argument("--num_workers", type=int, default=8) 80 | parser.add_argument("--random_mirror", action="store_true", default=True, 81 | help="Whether to randomly mirror the inputs during the training.") 82 | parser.add_argument("--random_scale", action="store_true", default=True, 83 | help="Whether to randomly scale the inputs during the training.") 84 | parser.add_argument("--random_seed", type=int, default=1234, 85 | help="Random seed to have reproducible results.") 86 | 87 | # ***** Params for save and load ****** 88 | parser.add_argument("--restore_from", type=str, default="./pretrained", 89 | help="Where restore models parameters from.") 90 | parser.add_argument("--save_pred_every", type=int, default=5000, 91 | help="Save summaries and checkpoint every often.") 92 | parser.add_argument("--save_dir", type=str, default=None, 93 | help="Where to save snapshots of the models.") 94 | parser.add_argument("--save_start",type=int, default=40000) 95 | parser.add_argument("--gpu", type=str, default=None, 96 | help="choose gpu device.") 97 | parser.add_argument("--ft", type=bool, default=False, 98 | help="fine-tune the models with large input size.") 99 | # **** Params for OHEM **** # 100 | parser.add_argument("--ohem", type=str2bool, default='False', 101 | help="use hard negative mining") 102 | parser.add_argument("--ohem_thres", type=float, default=0.7, 103 | help="choose the samples with correct probability underthe threshold.") 104 | parser.add_argument("--ohem_keep", type=int, default=100000, 105 | help="choose the samples with correct probability underthe threshold.") 106 | # ***** Params for logging ***** # 107 | parser.add_argument('--log_level', default="info", type=str, 108 | dest='log_level', help='To set the log level to files.') 109 | parser.add_argument('--log_file', default="./log/train.log", type=str, 110 | dest='log_file', help='The path of log files.') 111 | parser.add_argument("--log_format", default="%(asctime)s %(levelname)-7s %(message)s", type=str, 112 | dest="log_format", help="format of log files" 113 | ) 114 | parser.add_argument('--stdout_level', default="info", type=str, 115 | dest='stdout_level', help='To set the level to print to screen.') 116 | parser.add_argument("--rewrite", default=False, type=bool, 117 | dest="rewrite", help="whether write the file when using log" 118 | ) 119 | parser.add_argument("--rgb", type=str2bool, default='False') 120 | # ***** Params for Distributed Traning ***** # 121 | parser.add_argument('--apex', action='store_true', default=False, 122 | help='Use Nvidia Apex Distributed Data Parallel') 123 | parser.add_argument("--local_rank", default=0, type=int, help="parameter used by apex library") 124 | args = parser.parse_args() 125 | return args 126 | 127 | 128 | start = timeit.default_timer() 129 | 130 | args = get_arguments() 131 | 132 | 133 | def main(): 134 | 135 | # make save dir 136 | if args.local_rank == 0: 137 | if not os.path.exists(args.save_dir): 138 | os.makedirs(args.save_dir) 139 | # launch the logger 140 | Log.init( 141 | log_level=args.log_level, 142 | log_file=osp.join(args.save_dir, args.log_file), 143 | log_format=args.log_format, 144 | rewrite=args.rewrite, 145 | stdout_level=args.stdout_level 146 | ) 147 | # RGB or BGR input(RGB input for ImageNet pretrained models while BGR input for caffe pretrained models) 148 | if args.rgb: 149 | IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32) 150 | IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32) 151 | else: 152 | IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) 153 | IMG_VARS = np.array((1, 1, 1), dtype=np.float32) 154 | 155 | # set models 156 | import libs.models as models 157 | deeplab = models.__dict__[args.arch](num_classes=args.num_classes, data_set=args.data_set) 158 | if args.restore_from is not None: 159 | saved_state_dict = torch.load(args.restore_from, map_location=torch.device('cpu')) 160 | new_params = deeplab.state_dict().copy() 161 | for i in saved_state_dict: 162 | i_parts = i.split('.') 163 | if not i_parts[0] == 'fc': 164 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i] 165 | Log.info("load pretrined models") 166 | if deeplab.backbone is not None: 167 | deeplab.backbone.load_state_dict(new_params, strict=False) 168 | else: 169 | deeplab.load_state_dict(new_params, strict=False) 170 | else: 171 | Log.info("train from stracth") 172 | 173 | 174 | args.world_size = 1 175 | 176 | if 'WORLD_SIZE' in os.environ and args.apex: 177 | args.apex = int(os.environ['WORLD_SIZE']) > 1 178 | args.world_size = int(os.environ['WORLD_SIZE']) 179 | print("Total world size: ", int(os.environ['WORLD_SIZE'])) 180 | 181 | if not args.gpu == None: 182 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 183 | h, w = args.input_size, args.input_size 184 | input_size = (h, w) 185 | 186 | 187 | # Set the device according to local_rank. 188 | torch.cuda.set_device(args.local_rank) 189 | Log.info("Local Rank: {}".format(args.local_rank)) 190 | torch.distributed.init_process_group(backend='nccl', 191 | init_method='env://') 192 | # set optimizer 193 | optimizer = optim.SGD( 194 | [{'params': filter(lambda p: p.requires_grad, deeplab.parameters()), 'lr': args.learning_rate}], 195 | lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 196 | optimizer.zero_grad() 197 | 198 | # set on cuda 199 | deeplab.cuda() 200 | 201 | # models transformation 202 | model = DistributedDataParallel(deeplab) 203 | model = apex.parallel.convert_syncbn_model(model) 204 | model.train() 205 | model.float() 206 | model.cuda() 207 | 208 | # set loss function 209 | if args.ohem: 210 | criterion = CriterionOhemDSN(thresh=args.ohem_thres, min_kept=args.ohem_keep) # OHEM CrossEntrop 211 | if "ic" in args.arch: 212 | criterion = CriterionICNet(thresh=args.ohem_thres, min_kept=args.ohem_keep) 213 | if "dfa" in args.arch: 214 | criterion = CriterionDFANet(thresh=args.ohem_thres, min_kept=args.ohem_keep) 215 | else: 216 | criterion = CriterionDSN() # CrossEntropy 217 | criterion.cuda() 218 | 219 | cudnn.benchmark = True 220 | 221 | if args.world_size == 1: 222 | print(model) 223 | 224 | # this is a little different from mul-gpu traning setting in distributed training 225 | # because each trainloader is a process that sample from the dataset class. 226 | batch_size = args.gpu_num * args.batch_size_per_gpu 227 | max_iters = args.num_steps * batch_size / args.gpu_num 228 | # set data loader 229 | if args.data_set == "cityscapes": 230 | data_set = Cityscapes(args.data_dir, args.data_list, max_iters=max_iters, crop_size=input_size, 231 | scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN,vars=IMG_VARS, RGB= args.rgb) 232 | elif args.data_set == "camvid": 233 | data_set = CamVidDataSet(args.data_dir, args.data_list, max_iters=max_iters, crop_size=input_size, 234 | scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN,vars=IMG_VARS, RGB= args.rgb) 235 | else: 236 | raise "No such dataset support!" 237 | 238 | trainloader = data.DataLoader( 239 | data_set, 240 | batch_size=args.batch_size_per_gpu, shuffle=True, num_workers=args.num_workers, pin_memory=True) 241 | 242 | print("trainloader", len(trainloader)) 243 | 244 | torch.cuda.empty_cache() 245 | 246 | # start training: 247 | for i_iter, batch in enumerate(trainloader): 248 | images, labels = batch 249 | images = images.cuda() 250 | labels = labels.long().cuda() 251 | optimizer.zero_grad() 252 | lr = adjust_learning_rate(optimizer, args, i_iter, len(trainloader)) 253 | preds = model(images) 254 | 255 | loss = criterion(preds, labels) 256 | loss.backward() 257 | optimizer.step() 258 | reduce_loss = all_reduce_tensor(loss, 259 | world_size=args.gpu_num) 260 | if args.local_rank == 0: 261 | Log.info('iter = {} of {} completed, lr={}, loss = {}'.format(i_iter, 262 | len(trainloader), lr, reduce_loss.data.cpu().numpy())) 263 | if i_iter % args.save_pred_every == 0 and i_iter > args.save_start: 264 | print('save models ...') 265 | torch.save(deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + str(i_iter) + '.pth')) 266 | 267 | end = timeit.default_timer() 268 | 269 | if args.local_rank == 0: 270 | Log.info("Training cost: "+ str(end - start) + 'seconds') 271 | Log.info("Save final models") 272 | torch.save(deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + '_final' + '.pth')) 273 | 274 | 275 | if __name__ == '__main__': 276 | main() 277 | -------------------------------------------------------------------------------- /val.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from scipy import ndimage 3 | import numpy as np 4 | import json 5 | 6 | import torch 7 | from torch.utils import data 8 | import torch.nn as nn 9 | 10 | import os 11 | from math import ceil 12 | from PIL import Image as PILImage 13 | 14 | from libs.datasets.cityscapes import Cityscapes 15 | from libs.datasets.camvid import CamVidDataSet 16 | 17 | DATA_DIRECTORY = 'cityscapes' 18 | DATA_LIST_PATH = './data/cityscapes/val.lst' 19 | IGNORE_LABEL = 255 20 | NUM_CLASSES = 19 21 | NUM_STEPS = 500 # Number of images in the validation set. 22 | INPUT_SIZE = 832 23 | RESTORE_FROM = './deeplab_resnet.pth' 24 | 25 | 26 | def str2bool(v): 27 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 28 | return True 29 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 30 | return False 31 | else: 32 | raise argparse.ArgumentTypeError('Boolean value expected.') 33 | 34 | 35 | def get_arguments(): 36 | """Parse all the arguments provided from the CLI. 37 | Returns: 38 | A list of parsed arguments. 39 | """ 40 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network") 41 | parser.add_argument("--data_dir", type=str, default=DATA_DIRECTORY, 42 | help="Path to the directory containing the PASCAL VOC dataset.") 43 | parser.add_argument("--data_list", type=str, default=DATA_LIST_PATH, 44 | help="Path to the file listing the images in the dataset.") 45 | parser.add_argument("--data_set", type=str, default="cityscapes", help="dataset to train") 46 | parser.add_argument("--arch",type=str,default="CascadeRelatioNet_res50") 47 | parser.add_argument("--ignore_label", type=int, default=IGNORE_LABEL, 48 | help="The index of the label to ignore during the training.") 49 | parser.add_argument("--num_classes", type=int, default=19, 50 | help="Number of classes to predict (including background).") 51 | parser.add_argument("--restore_from", type=str, default=RESTORE_FROM, 52 | help="Where restore models parameters from.") 53 | parser.add_argument("--gpu", type=str, default='0', 54 | help="choose gpu device.") 55 | parser.add_argument("--input_size", type=int, default=INPUT_SIZE, 56 | help="Comma-separated string with height and width of images.") 57 | parser.add_argument("--whole", type=bool, default=False, 58 | help="use whole input size.") 59 | parser.add_argument("--output_dir", type=str, default="outputs", 60 | help="output dir of prediction") 61 | parser.add_argument("--rgb", type=str2bool, default='False') 62 | return parser.parse_args() 63 | 64 | 65 | def get_palette(num_cls): 66 | """ Returns the color map for visualizing the segmentation mask. 67 | Args: 68 | num_cls: Number of classes 69 | Returns: 70 | The color map 71 | """ 72 | n = num_cls 73 | palette = [0] * (n * 3) 74 | for j in range(0, n): 75 | lab = j 76 | palette[j * 3 + 0] = 0 77 | palette[j * 3 + 1] = 0 78 | palette[j * 3 + 2] = 0 79 | i = 0 80 | while lab: 81 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 82 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 83 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 84 | i += 1 85 | lab >>= 3 86 | return palette 87 | 88 | 89 | def pad_image(img, target_size): 90 | """Pad an image up to the target size.""" 91 | rows_missing = target_size[0] - img.shape[2] 92 | cols_missing = target_size[1] - img.shape[3] 93 | padded_img = np.pad(img, ((0, 0), (0, 0), (0, rows_missing), (0, cols_missing)), 'constant') 94 | return padded_img 95 | 96 | 97 | def predict_sliding(net, image, tile_size, classes, flip_evaluation): 98 | interp = nn.Upsample(size=tile_size, mode='bilinear', align_corners=True) 99 | image_size = image.shape 100 | overlap = 1.0 / 3.0 101 | 102 | stride = ceil(tile_size[0] * (1 - overlap)) 103 | tile_rows = int(ceil((image_size[2] - tile_size[0]) / stride) + 1) # strided convolution formula 104 | tile_cols = int(ceil((image_size[3] - tile_size[1]) / stride) + 1) 105 | print("Need %i x %i prediction tiles @ stride %i px" % (tile_cols, tile_rows, stride)) 106 | full_probs = np.zeros((image_size[2], image_size[3], classes)) 107 | count_predictions = np.zeros((image_size[2], image_size[3], classes)) 108 | tile_counter = 0 109 | 110 | for row in range(tile_rows): 111 | for col in range(tile_cols): 112 | x1 = int(col * stride) 113 | y1 = int(row * stride) 114 | x2 = min(x1 + tile_size[1], image_size[3]) 115 | y2 = min(y1 + tile_size[0], image_size[2]) 116 | x1 = max(int(x2 - tile_size[1]), 0) # for portrait images the x1 underflows sometimes 117 | y1 = max(int(y2 - tile_size[0]), 0) # for very few rows y1 underflows 118 | 119 | img = image[:, :, y1:y2, x1:x2] 120 | padded_img = pad_image(img, tile_size) 121 | tile_counter += 1 122 | # print("Predicting tile %i" % tile_counter) 123 | # print(padded_img.shape) 124 | padded_img = torch.from_numpy(padded_img) 125 | padded_img = padded_img.cuda() 126 | # print(len(padded_img)) 127 | # print(padded_img) 128 | padded_prediction = net(padded_img) 129 | if isinstance(padded_prediction, list): 130 | padded_prediction = padded_prediction[0] 131 | padded_prediction = interp(padded_prediction).cpu().data[0].numpy().transpose(1, 2, 0) 132 | prediction = padded_prediction[0:img.shape[2], 0:img.shape[3], :] 133 | count_predictions[y1:y2, x1:x2] += 1 134 | full_probs[y1:y2, x1:x2] += prediction # accumulate the predictions also in the overlapping regions 135 | 136 | # average the predictions in the overlapping regions 137 | full_probs /= count_predictions 138 | return full_probs 139 | 140 | 141 | def predict_whole(net, image, tile_size): 142 | image = torch.from_numpy(image) 143 | interp = nn.Upsample(size=tile_size, mode='bilinear', align_corners=True) 144 | prediction = net(image.cuda()) 145 | if isinstance(prediction, list): 146 | prediction = prediction[0] 147 | prediction = interp(prediction).cpu().data[0].numpy().transpose(1, 2, 0) 148 | return prediction 149 | 150 | 151 | def predict_multiscale(net, image, tile_size, scales, classes, flip_evaluation): 152 | """ 153 | Predict an image by looking at it with different scales. 154 | We choose the "predict_whole_img" for the image with less than the original input size, 155 | for the input of larger size, we would choose the cropping method to ensure that GPU memory is enough. 156 | """ 157 | image = image.data 158 | N_, C_, H_, W_ = image.shape 159 | full_probs = np.zeros((H_, W_, classes)) 160 | for scale in scales: 161 | scale = float(scale) 162 | print("Predicting image scaled by %f" % scale) 163 | scale_image = ndimage.zoom(image, (1.0, 1.0, scale, scale), order=1, prefilter=False) 164 | scaled_probs = predict_whole(net, scale_image, tile_size) 165 | if flip_evaluation == True: 166 | flip_scaled_probs = predict_whole(net, scale_image[:, :, :, ::-1].copy(), tile_size) 167 | scaled_probs = 0.5 * (scaled_probs + flip_scaled_probs[:, ::-1, :]) 168 | full_probs += scaled_probs 169 | full_probs /= len(scales) 170 | return full_probs 171 | 172 | 173 | def get_confusion_matrix(gt_label, pred_label, class_num): 174 | """ 175 | Calcute the confusion matrix by given label and pred 176 | :param gt_label: the ground truth label 177 | :param pred_label: the pred label 178 | :param class_num: the nunber of class 179 | :return: the confusion matrix 180 | """ 181 | index = (gt_label * class_num + pred_label).astype('int32') 182 | label_count = np.bincount(index) 183 | confusion_matrix = np.zeros((class_num, class_num)) 184 | 185 | for i_label in range(class_num): 186 | for i_pred_label in range(class_num): 187 | cur_index = i_label * class_num + i_pred_label 188 | if cur_index < len(label_count): 189 | confusion_matrix[i_label, i_pred_label] = label_count[cur_index] 190 | 191 | return confusion_matrix 192 | 193 | 194 | def val(): 195 | """Create the models and start the evaluation process.""" 196 | args = get_arguments() 197 | 198 | h, w = args.input_size, args.input_size 199 | if args.whole: 200 | input_size = (1024, 2048) 201 | else: 202 | input_size = (h, w) 203 | import libs.models as models 204 | model = models.__dict__[args.arch](num_classes=args.num_classes, data_set=args.data_set) 205 | saved_state_dict = torch.load(args.restore_from) 206 | model.load_state_dict(saved_state_dict,strict=False) 207 | 208 | model.eval() 209 | model.cuda() 210 | if args.rgb == 1: 211 | IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32) 212 | IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32) 213 | else: 214 | IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) 215 | IMG_VARS = np.array((1, 1, 1), dtype=np.float32) 216 | 217 | # dataset = Cityscapes(args.data_dir, args.data_list, crop_size=(1024, 2048), mean=IMG_MEAN, vars=IMG_VARS, 218 | # scale=False, mirror=False, RGB=args.rgb) 219 | # set data loader 220 | if args.data_set == "cityscapes": 221 | data_set = Cityscapes(args.data_dir, args.data_list, crop_size=(1024, 2048), mean=IMG_MEAN, vars=IMG_VARS, 222 | scale=False, mirror=False, RGB=args.rgb) 223 | elif args.data_set == "camvid": 224 | data_set = CamVidDataSet(args.data_dir, args.data_list, crop_size=(360, 480), 225 | mean=IMG_MEAN, vars=IMG_VARS, scale=False, mirror=False, RGB=args.rgb) 226 | else: 227 | return 228 | 229 | testloader = data.DataLoader(data_set, batch_size=1, shuffle=False, pin_memory=True) 230 | 231 | confusion_matrix = np.zeros((args.num_classes, args.num_classes)) 232 | 233 | output_images = os.path.join(args.output_dir, "./images") 234 | output_results = os.path.join(args.output_dir, "./result") 235 | if not os.path.exists(args.output_dir): 236 | os.makedirs(args.output_dir) 237 | if not os.path.exists(output_images): 238 | os.makedirs(output_images) 239 | if not os.path.exists(output_results): 240 | os.makedirs(output_results) 241 | 242 | for index, batch in enumerate(testloader): 243 | if index % 100 == 0: 244 | print('%d processd' % (index)) 245 | image, label = batch 246 | size = image[0].size()[-2:] 247 | with torch.no_grad(): 248 | if args.whole: 249 | output = predict_multiscale(model, image, input_size, [1.0], args.num_classes, False) 250 | else: 251 | output = predict_sliding(model, image.numpy(), input_size, args.num_classes, True) 252 | 253 | seg_pred = np.asarray(np.argmax(output, axis=2), dtype=np.uint8) 254 | 255 | seg_gt = np.asarray(label[0].numpy()[:size[0], :size[1]], dtype=np.int) 256 | 257 | ignore_index = seg_gt != 255 258 | seg_gt = seg_gt[ignore_index] 259 | seg_pred = seg_pred[ignore_index] 260 | confusion_matrix += get_confusion_matrix(seg_gt, seg_pred, args.num_classes) 261 | 262 | pos = confusion_matrix.sum(1) 263 | res = confusion_matrix.sum(0) 264 | tp = np.diag(confusion_matrix) 265 | 266 | IU_array = (tp / np.maximum(1.0, pos + res - tp)) 267 | mean_IU = IU_array.mean() 268 | 269 | print({'meanIU': mean_IU, 'IU_array': IU_array}) 270 | with open(os.path.join(args.output_dir,"result",'result.txt'), 'w') as f: 271 | f.write(json.dumps({'meanIU': mean_IU, 'IU_array': IU_array.tolist()})) 272 | 273 | 274 | if __name__ == '__main__': 275 | val() 276 | --------------------------------------------------------------------------------