├── .gitignore
├── LICENSE
├── data
    ├── camvid
    │   ├── camvid_test_list.txt
    │   ├── camvid_train_list.txt
    │   ├── camvid_trainval_list.txt
    │   └── camvid_val_list.txt
    ├── cityscapes
    │   ├── test.txt
    │   ├── train++.txt
    │   ├── train+.txt
    │   ├── train.txt
    │   ├── trainval.txt
    │   └── val.txt
    └── fig
    │   ├── frankfurt_000000_002196_gtFine_color.png
    │   ├── frankfurt_000000_002196_leftImg8bit.png
    │   └── frankfurt_000000_002196_leftImg8bit_pred.png
├── exp
    ├── train_dfanet.sh
    ├── train_dfsegv1.sh
    ├── train_dfsegv2.sh
    ├── train_icnet.sh
    └── train_pspnet.sh
├── libs
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── loss.py
    │   └── operators.py
    ├── datasets
    │   ├── __init__.py
    │   ├── camvid.py
    │   ├── cityscapes.py
    │   └── mapillary.py
    ├── models
    │   ├── BiSegNet.py
    │   ├── DFANet.py
    │   ├── DFSegNet.py
    │   ├── ESPNet.py
    │   ├── FastSCNN.py
    │   ├── ICNet.py
    │   ├── MSFNet.py
    │   ├── PSPNet.py
    │   ├── SwiftNet.py
    │   ├── __init__.py
    │   └── backbone
    │   │   ├── __init__.py
    │   │   ├── dfnet.py
    │   │   ├── resnet.py
    │   │   └── xception.py
    └── utils
    │   ├── __init__.py
    │   ├── image_utils.py
    │   ├── logger.py
    │   └── tools.py
├── prediction_test_different_size.py
├── readme.md
├── requirement.txt
├── train_distribute.py
└── val.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output
 3 | instant_test_output
 4 | inference_test_output
 5 | 
 6 | 
 7 | *.jpg
 8 | 
 9 | # compilation and distribution
10 | __pycache__
11 | _ext
12 | *.pyc
13 | *.so
14 | detectron2.egg-info/
15 | build/
16 | dist/
17 | 
18 | # pytorch/python/numpy formats
19 | *.pth
20 | *.pkl
21 | *.npy
22 | 
23 | # ipython/jupyter notebooks
24 | *.ipynb
25 | **/.ipynb_checkpoints/
26 | 
27 | # Editor temporaries
28 | *.swn
29 | *.swo
30 | *.swp
31 | *~
32 | 
33 | # Pycharm editor settings
34 | .idea
35 | 
36 | # project dirs
37 | /datasets
38 | /models
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/data/camvid/camvid_test_list.txt:
--------------------------------------------------------------------------------
  1 | test/0001TP_008550.png testannot/0001TP_008550.png
  2 | test/0001TP_008580.png testannot/0001TP_008580.png
  3 | test/0001TP_008610.png testannot/0001TP_008610.png
  4 | test/0001TP_008640.png testannot/0001TP_008640.png
  5 | test/0001TP_008670.png testannot/0001TP_008670.png
  6 | test/0001TP_008700.png testannot/0001TP_008700.png
  7 | test/0001TP_008730.png testannot/0001TP_008730.png
  8 | test/0001TP_008760.png testannot/0001TP_008760.png
  9 | test/0001TP_008790.png testannot/0001TP_008790.png
 10 | test/0001TP_008820.png testannot/0001TP_008820.png
 11 | test/0001TP_008850.png testannot/0001TP_008850.png
 12 | test/0001TP_008880.png testannot/0001TP_008880.png
 13 | test/0001TP_008910.png testannot/0001TP_008910.png
 14 | test/0001TP_008940.png testannot/0001TP_008940.png
 15 | test/0001TP_008970.png testannot/0001TP_008970.png
 16 | test/0001TP_009000.png testannot/0001TP_009000.png
 17 | test/0001TP_009030.png testannot/0001TP_009030.png
 18 | test/0001TP_009060.png testannot/0001TP_009060.png
 19 | test/0001TP_009090.png testannot/0001TP_009090.png
 20 | test/0001TP_009120.png testannot/0001TP_009120.png
 21 | test/0001TP_009150.png testannot/0001TP_009150.png
 22 | test/0001TP_009180.png testannot/0001TP_009180.png
 23 | test/0001TP_009210.png testannot/0001TP_009210.png
 24 | test/0001TP_009240.png testannot/0001TP_009240.png
 25 | test/0001TP_009270.png testannot/0001TP_009270.png
 26 | test/0001TP_009300.png testannot/0001TP_009300.png
 27 | test/0001TP_009330.png testannot/0001TP_009330.png
 28 | test/0001TP_009360.png testannot/0001TP_009360.png
 29 | test/0001TP_009390.png testannot/0001TP_009390.png
 30 | test/0001TP_009420.png testannot/0001TP_009420.png
 31 | test/0001TP_009450.png testannot/0001TP_009450.png
 32 | test/0001TP_009480.png testannot/0001TP_009480.png
 33 | test/0001TP_009510.png testannot/0001TP_009510.png
 34 | test/0001TP_009540.png testannot/0001TP_009540.png
 35 | test/0001TP_009570.png testannot/0001TP_009570.png
 36 | test/0001TP_009600.png testannot/0001TP_009600.png
 37 | test/0001TP_009630.png testannot/0001TP_009630.png
 38 | test/0001TP_009660.png testannot/0001TP_009660.png
 39 | test/0001TP_009690.png testannot/0001TP_009690.png
 40 | test/0001TP_009720.png testannot/0001TP_009720.png
 41 | test/0001TP_009750.png testannot/0001TP_009750.png
 42 | test/0001TP_009780.png testannot/0001TP_009780.png
 43 | test/0001TP_009810.png testannot/0001TP_009810.png
 44 | test/0001TP_009840.png testannot/0001TP_009840.png
 45 | test/0001TP_009870.png testannot/0001TP_009870.png
 46 | test/0001TP_009900.png testannot/0001TP_009900.png
 47 | test/0001TP_009930.png testannot/0001TP_009930.png
 48 | test/0001TP_009960.png testannot/0001TP_009960.png
 49 | test/0001TP_009990.png testannot/0001TP_009990.png
 50 | test/0001TP_010020.png testannot/0001TP_010020.png
 51 | test/0001TP_010050.png testannot/0001TP_010050.png
 52 | test/0001TP_010080.png testannot/0001TP_010080.png
 53 | test/0001TP_010110.png testannot/0001TP_010110.png
 54 | test/0001TP_010140.png testannot/0001TP_010140.png
 55 | test/0001TP_010170.png testannot/0001TP_010170.png
 56 | test/0001TP_010200.png testannot/0001TP_010200.png
 57 | test/0001TP_010230.png testannot/0001TP_010230.png
 58 | test/0001TP_010260.png testannot/0001TP_010260.png
 59 | test/0001TP_010290.png testannot/0001TP_010290.png
 60 | test/0001TP_010320.png testannot/0001TP_010320.png
 61 | test/0001TP_010350.png testannot/0001TP_010350.png
 62 | test/0001TP_010380.png testannot/0001TP_010380.png
 63 | test/Seq05VD_f00000.png testannot/Seq05VD_f00000.png
 64 | test/Seq05VD_f00030.png testannot/Seq05VD_f00030.png
 65 | test/Seq05VD_f00060.png testannot/Seq05VD_f00060.png
 66 | test/Seq05VD_f00090.png testannot/Seq05VD_f00090.png
 67 | test/Seq05VD_f00120.png testannot/Seq05VD_f00120.png
 68 | test/Seq05VD_f00150.png testannot/Seq05VD_f00150.png
 69 | test/Seq05VD_f00180.png testannot/Seq05VD_f00180.png
 70 | test/Seq05VD_f00210.png testannot/Seq05VD_f00210.png
 71 | test/Seq05VD_f00240.png testannot/Seq05VD_f00240.png
 72 | test/Seq05VD_f00270.png testannot/Seq05VD_f00270.png
 73 | test/Seq05VD_f00300.png testannot/Seq05VD_f00300.png
 74 | test/Seq05VD_f00330.png testannot/Seq05VD_f00330.png
 75 | test/Seq05VD_f00360.png testannot/Seq05VD_f00360.png
 76 | test/Seq05VD_f00390.png testannot/Seq05VD_f00390.png
 77 | test/Seq05VD_f00420.png testannot/Seq05VD_f00420.png
 78 | test/Seq05VD_f00450.png testannot/Seq05VD_f00450.png
 79 | test/Seq05VD_f00480.png testannot/Seq05VD_f00480.png
 80 | test/Seq05VD_f00510.png testannot/Seq05VD_f00510.png
 81 | test/Seq05VD_f00540.png testannot/Seq05VD_f00540.png
 82 | test/Seq05VD_f00570.png testannot/Seq05VD_f00570.png
 83 | test/Seq05VD_f00600.png testannot/Seq05VD_f00600.png
 84 | test/Seq05VD_f00630.png testannot/Seq05VD_f00630.png
 85 | test/Seq05VD_f00660.png testannot/Seq05VD_f00660.png
 86 | test/Seq05VD_f00690.png testannot/Seq05VD_f00690.png
 87 | test/Seq05VD_f00720.png testannot/Seq05VD_f00720.png
 88 | test/Seq05VD_f00750.png testannot/Seq05VD_f00750.png
 89 | test/Seq05VD_f00780.png testannot/Seq05VD_f00780.png
 90 | test/Seq05VD_f00810.png testannot/Seq05VD_f00810.png
 91 | test/Seq05VD_f00840.png testannot/Seq05VD_f00840.png
 92 | test/Seq05VD_f00870.png testannot/Seq05VD_f00870.png
 93 | test/Seq05VD_f00900.png testannot/Seq05VD_f00900.png
 94 | test/Seq05VD_f00930.png testannot/Seq05VD_f00930.png
 95 | test/Seq05VD_f00960.png testannot/Seq05VD_f00960.png
 96 | test/Seq05VD_f00990.png testannot/Seq05VD_f00990.png
 97 | test/Seq05VD_f01020.png testannot/Seq05VD_f01020.png
 98 | test/Seq05VD_f01050.png testannot/Seq05VD_f01050.png
 99 | test/Seq05VD_f01080.png testannot/Seq05VD_f01080.png
100 | test/Seq05VD_f01110.png testannot/Seq05VD_f01110.png
101 | test/Seq05VD_f01140.png testannot/Seq05VD_f01140.png
102 | test/Seq05VD_f01170.png testannot/Seq05VD_f01170.png
103 | test/Seq05VD_f01200.png testannot/Seq05VD_f01200.png
104 | test/Seq05VD_f01230.png testannot/Seq05VD_f01230.png
105 | test/Seq05VD_f01260.png testannot/Seq05VD_f01260.png
106 | test/Seq05VD_f01290.png testannot/Seq05VD_f01290.png
107 | test/Seq05VD_f01320.png testannot/Seq05VD_f01320.png
108 | test/Seq05VD_f01350.png testannot/Seq05VD_f01350.png
109 | test/Seq05VD_f01380.png testannot/Seq05VD_f01380.png
110 | test/Seq05VD_f01410.png testannot/Seq05VD_f01410.png
111 | test/Seq05VD_f01440.png testannot/Seq05VD_f01440.png
112 | test/Seq05VD_f01470.png testannot/Seq05VD_f01470.png
113 | test/Seq05VD_f01500.png testannot/Seq05VD_f01500.png
114 | test/Seq05VD_f01530.png testannot/Seq05VD_f01530.png
115 | test/Seq05VD_f01560.png testannot/Seq05VD_f01560.png
116 | test/Seq05VD_f01590.png testannot/Seq05VD_f01590.png
117 | test/Seq05VD_f01620.png testannot/Seq05VD_f01620.png
118 | test/Seq05VD_f01650.png testannot/Seq05VD_f01650.png
119 | test/Seq05VD_f01680.png testannot/Seq05VD_f01680.png
120 | test/Seq05VD_f01710.png testannot/Seq05VD_f01710.png
121 | test/Seq05VD_f01740.png testannot/Seq05VD_f01740.png
122 | test/Seq05VD_f01770.png testannot/Seq05VD_f01770.png
123 | test/Seq05VD_f01800.png testannot/Seq05VD_f01800.png
124 | test/Seq05VD_f01830.png testannot/Seq05VD_f01830.png
125 | test/Seq05VD_f01860.png testannot/Seq05VD_f01860.png
126 | test/Seq05VD_f01890.png testannot/Seq05VD_f01890.png
127 | test/Seq05VD_f01920.png testannot/Seq05VD_f01920.png
128 | test/Seq05VD_f01950.png testannot/Seq05VD_f01950.png
129 | test/Seq05VD_f01980.png testannot/Seq05VD_f01980.png
130 | test/Seq05VD_f02010.png testannot/Seq05VD_f02010.png
131 | test/Seq05VD_f02040.png testannot/Seq05VD_f02040.png
132 | test/Seq05VD_f02070.png testannot/Seq05VD_f02070.png
133 | test/Seq05VD_f02100.png testannot/Seq05VD_f02100.png
134 | test/Seq05VD_f02130.png testannot/Seq05VD_f02130.png
135 | test/Seq05VD_f02160.png testannot/Seq05VD_f02160.png
136 | test/Seq05VD_f02190.png testannot/Seq05VD_f02190.png
137 | test/Seq05VD_f02220.png testannot/Seq05VD_f02220.png
138 | test/Seq05VD_f02250.png testannot/Seq05VD_f02250.png
139 | test/Seq05VD_f02280.png testannot/Seq05VD_f02280.png
140 | test/Seq05VD_f02310.png testannot/Seq05VD_f02310.png
141 | test/Seq05VD_f02340.png testannot/Seq05VD_f02340.png
142 | test/Seq05VD_f02370.png testannot/Seq05VD_f02370.png
143 | test/Seq05VD_f02400.png testannot/Seq05VD_f02400.png
144 | test/Seq05VD_f02430.png testannot/Seq05VD_f02430.png
145 | test/Seq05VD_f02460.png testannot/Seq05VD_f02460.png
146 | test/Seq05VD_f02490.png testannot/Seq05VD_f02490.png
147 | test/Seq05VD_f02520.png testannot/Seq05VD_f02520.png
148 | test/Seq05VD_f02550.png testannot/Seq05VD_f02550.png
149 | test/Seq05VD_f02580.png testannot/Seq05VD_f02580.png
150 | test/Seq05VD_f02610.png testannot/Seq05VD_f02610.png
151 | test/Seq05VD_f02640.png testannot/Seq05VD_f02640.png
152 | test/Seq05VD_f02670.png testannot/Seq05VD_f02670.png
153 | test/Seq05VD_f02700.png testannot/Seq05VD_f02700.png
154 | test/Seq05VD_f02730.png testannot/Seq05VD_f02730.png
155 | test/Seq05VD_f02760.png testannot/Seq05VD_f02760.png
156 | test/Seq05VD_f02790.png testannot/Seq05VD_f02790.png
157 | test/Seq05VD_f02820.png testannot/Seq05VD_f02820.png
158 | test/Seq05VD_f02850.png testannot/Seq05VD_f02850.png
159 | test/Seq05VD_f02880.png testannot/Seq05VD_f02880.png
160 | test/Seq05VD_f02910.png testannot/Seq05VD_f02910.png
161 | test/Seq05VD_f02940.png testannot/Seq05VD_f02940.png
162 | test/Seq05VD_f02970.png testannot/Seq05VD_f02970.png
163 | test/Seq05VD_f03000.png testannot/Seq05VD_f03000.png
164 | test/Seq05VD_f03030.png testannot/Seq05VD_f03030.png
165 | test/Seq05VD_f03060.png testannot/Seq05VD_f03060.png
166 | test/Seq05VD_f03090.png testannot/Seq05VD_f03090.png
167 | test/Seq05VD_f03120.png testannot/Seq05VD_f03120.png
168 | test/Seq05VD_f03150.png testannot/Seq05VD_f03150.png
169 | test/Seq05VD_f03180.png testannot/Seq05VD_f03180.png
170 | test/Seq05VD_f03210.png testannot/Seq05VD_f03210.png
171 | test/Seq05VD_f03240.png testannot/Seq05VD_f03240.png
172 | test/Seq05VD_f03270.png testannot/Seq05VD_f03270.png
173 | test/Seq05VD_f03300.png testannot/Seq05VD_f03300.png
174 | test/Seq05VD_f03330.png testannot/Seq05VD_f03330.png
175 | test/Seq05VD_f03360.png testannot/Seq05VD_f03360.png
176 | test/Seq05VD_f03390.png testannot/Seq05VD_f03390.png
177 | test/Seq05VD_f03420.png testannot/Seq05VD_f03420.png
178 | test/Seq05VD_f03450.png testannot/Seq05VD_f03450.png
179 | test/Seq05VD_f03480.png testannot/Seq05VD_f03480.png
180 | test/Seq05VD_f03510.png testannot/Seq05VD_f03510.png
181 | test/Seq05VD_f03540.png testannot/Seq05VD_f03540.png
182 | test/Seq05VD_f03570.png testannot/Seq05VD_f03570.png
183 | test/Seq05VD_f03600.png testannot/Seq05VD_f03600.png
184 | test/Seq05VD_f03630.png testannot/Seq05VD_f03630.png
185 | test/Seq05VD_f03660.png testannot/Seq05VD_f03660.png
186 | test/Seq05VD_f03690.png testannot/Seq05VD_f03690.png
187 | test/Seq05VD_f03720.png testannot/Seq05VD_f03720.png
188 | test/Seq05VD_f03750.png testannot/Seq05VD_f03750.png
189 | test/Seq05VD_f03780.png testannot/Seq05VD_f03780.png
190 | test/Seq05VD_f03810.png testannot/Seq05VD_f03810.png
191 | test/Seq05VD_f03840.png testannot/Seq05VD_f03840.png
192 | test/Seq05VD_f03870.png testannot/Seq05VD_f03870.png
193 | test/Seq05VD_f03900.png testannot/Seq05VD_f03900.png
194 | test/Seq05VD_f03930.png testannot/Seq05VD_f03930.png
195 | test/Seq05VD_f03960.png testannot/Seq05VD_f03960.png
196 | test/Seq05VD_f03990.png testannot/Seq05VD_f03990.png
197 | test/Seq05VD_f04020.png testannot/Seq05VD_f04020.png
198 | test/Seq05VD_f04050.png testannot/Seq05VD_f04050.png
199 | test/Seq05VD_f04080.png testannot/Seq05VD_f04080.png
200 | test/Seq05VD_f04110.png testannot/Seq05VD_f04110.png
201 | test/Seq05VD_f04140.png testannot/Seq05VD_f04140.png
202 | test/Seq05VD_f04170.png testannot/Seq05VD_f04170.png
203 | test/Seq05VD_f04200.png testannot/Seq05VD_f04200.png
204 | test/Seq05VD_f04230.png testannot/Seq05VD_f04230.png
205 | test/Seq05VD_f04260.png testannot/Seq05VD_f04260.png
206 | test/Seq05VD_f04290.png testannot/Seq05VD_f04290.png
207 | test/Seq05VD_f04320.png testannot/Seq05VD_f04320.png
208 | test/Seq05VD_f04350.png testannot/Seq05VD_f04350.png
209 | test/Seq05VD_f04380.png testannot/Seq05VD_f04380.png
210 | test/Seq05VD_f04410.png testannot/Seq05VD_f04410.png
211 | test/Seq05VD_f04440.png testannot/Seq05VD_f04440.png
212 | test/Seq05VD_f04470.png testannot/Seq05VD_f04470.png
213 | test/Seq05VD_f04500.png testannot/Seq05VD_f04500.png
214 | test/Seq05VD_f04530.png testannot/Seq05VD_f04530.png
215 | test/Seq05VD_f04560.png testannot/Seq05VD_f04560.png
216 | test/Seq05VD_f04590.png testannot/Seq05VD_f04590.png
217 | test/Seq05VD_f04620.png testannot/Seq05VD_f04620.png
218 | test/Seq05VD_f04650.png testannot/Seq05VD_f04650.png
219 | test/Seq05VD_f04680.png testannot/Seq05VD_f04680.png
220 | test/Seq05VD_f04710.png testannot/Seq05VD_f04710.png
221 | test/Seq05VD_f04740.png testannot/Seq05VD_f04740.png
222 | test/Seq05VD_f04770.png testannot/Seq05VD_f04770.png
223 | test/Seq05VD_f04800.png testannot/Seq05VD_f04800.png
224 | test/Seq05VD_f04830.png testannot/Seq05VD_f04830.png
225 | test/Seq05VD_f04860.png testannot/Seq05VD_f04860.png
226 | test/Seq05VD_f04890.png testannot/Seq05VD_f04890.png
227 | test/Seq05VD_f04920.png testannot/Seq05VD_f04920.png
228 | test/Seq05VD_f04950.png testannot/Seq05VD_f04950.png
229 | test/Seq05VD_f04980.png testannot/Seq05VD_f04980.png
230 | test/Seq05VD_f05010.png testannot/Seq05VD_f05010.png
231 | test/Seq05VD_f05040.png testannot/Seq05VD_f05040.png
232 | test/Seq05VD_f05070.png testannot/Seq05VD_f05070.png
233 | test/Seq05VD_f05100.png testannot/Seq05VD_f05100.png
234 | 


--------------------------------------------------------------------------------
/data/camvid/camvid_val_list.txt:
--------------------------------------------------------------------------------
  1 | val/0016E5_07959.png valannot/0016E5_07959.png
  2 | val/0016E5_07961.png valannot/0016E5_07961.png
  3 | val/0016E5_07963.png valannot/0016E5_07963.png
  4 | val/0016E5_07965.png valannot/0016E5_07965.png
  5 | val/0016E5_07967.png valannot/0016E5_07967.png
  6 | val/0016E5_07969.png valannot/0016E5_07969.png
  7 | val/0016E5_07971.png valannot/0016E5_07971.png
  8 | val/0016E5_07973.png valannot/0016E5_07973.png
  9 | val/0016E5_07975.png valannot/0016E5_07975.png
 10 | val/0016E5_07977.png valannot/0016E5_07977.png
 11 | val/0016E5_07979.png valannot/0016E5_07979.png
 12 | val/0016E5_07981.png valannot/0016E5_07981.png
 13 | val/0016E5_07983.png valannot/0016E5_07983.png
 14 | val/0016E5_07985.png valannot/0016E5_07985.png
 15 | val/0016E5_07987.png valannot/0016E5_07987.png
 16 | val/0016E5_07989.png valannot/0016E5_07989.png
 17 | val/0016E5_07991.png valannot/0016E5_07991.png
 18 | val/0016E5_07993.png valannot/0016E5_07993.png
 19 | val/0016E5_07995.png valannot/0016E5_07995.png
 20 | val/0016E5_07997.png valannot/0016E5_07997.png
 21 | val/0016E5_07999.png valannot/0016E5_07999.png
 22 | val/0016E5_08001.png valannot/0016E5_08001.png
 23 | val/0016E5_08003.png valannot/0016E5_08003.png
 24 | val/0016E5_08005.png valannot/0016E5_08005.png
 25 | val/0016E5_08007.png valannot/0016E5_08007.png
 26 | val/0016E5_08009.png valannot/0016E5_08009.png
 27 | val/0016E5_08011.png valannot/0016E5_08011.png
 28 | val/0016E5_08013.png valannot/0016E5_08013.png
 29 | val/0016E5_08015.png valannot/0016E5_08015.png
 30 | val/0016E5_08017.png valannot/0016E5_08017.png
 31 | val/0016E5_08019.png valannot/0016E5_08019.png
 32 | val/0016E5_08021.png valannot/0016E5_08021.png
 33 | val/0016E5_08023.png valannot/0016E5_08023.png
 34 | val/0016E5_08025.png valannot/0016E5_08025.png
 35 | val/0016E5_08027.png valannot/0016E5_08027.png
 36 | val/0016E5_08029.png valannot/0016E5_08029.png
 37 | val/0016E5_08031.png valannot/0016E5_08031.png
 38 | val/0016E5_08033.png valannot/0016E5_08033.png
 39 | val/0016E5_08035.png valannot/0016E5_08035.png
 40 | val/0016E5_08037.png valannot/0016E5_08037.png
 41 | val/0016E5_08039.png valannot/0016E5_08039.png
 42 | val/0016E5_08041.png valannot/0016E5_08041.png
 43 | val/0016E5_08043.png valannot/0016E5_08043.png
 44 | val/0016E5_08045.png valannot/0016E5_08045.png
 45 | val/0016E5_08047.png valannot/0016E5_08047.png
 46 | val/0016E5_08049.png valannot/0016E5_08049.png
 47 | val/0016E5_08051.png valannot/0016E5_08051.png
 48 | val/0016E5_08053.png valannot/0016E5_08053.png
 49 | val/0016E5_08055.png valannot/0016E5_08055.png
 50 | val/0016E5_08057.png valannot/0016E5_08057.png
 51 | val/0016E5_08059.png valannot/0016E5_08059.png
 52 | val/0016E5_08061.png valannot/0016E5_08061.png
 53 | val/0016E5_08063.png valannot/0016E5_08063.png
 54 | val/0016E5_08065.png valannot/0016E5_08065.png
 55 | val/0016E5_08067.png valannot/0016E5_08067.png
 56 | val/0016E5_08069.png valannot/0016E5_08069.png
 57 | val/0016E5_08071.png valannot/0016E5_08071.png
 58 | val/0016E5_08073.png valannot/0016E5_08073.png
 59 | val/0016E5_08075.png valannot/0016E5_08075.png
 60 | val/0016E5_08077.png valannot/0016E5_08077.png
 61 | val/0016E5_08079.png valannot/0016E5_08079.png
 62 | val/0016E5_08081.png valannot/0016E5_08081.png
 63 | val/0016E5_08083.png valannot/0016E5_08083.png
 64 | val/0016E5_08085.png valannot/0016E5_08085.png
 65 | val/0016E5_08087.png valannot/0016E5_08087.png
 66 | val/0016E5_08089.png valannot/0016E5_08089.png
 67 | val/0016E5_08091.png valannot/0016E5_08091.png
 68 | val/0016E5_08093.png valannot/0016E5_08093.png
 69 | val/0016E5_08095.png valannot/0016E5_08095.png
 70 | val/0016E5_08097.png valannot/0016E5_08097.png
 71 | val/0016E5_08099.png valannot/0016E5_08099.png
 72 | val/0016E5_08101.png valannot/0016E5_08101.png
 73 | val/0016E5_08103.png valannot/0016E5_08103.png
 74 | val/0016E5_08105.png valannot/0016E5_08105.png
 75 | val/0016E5_08107.png valannot/0016E5_08107.png
 76 | val/0016E5_08109.png valannot/0016E5_08109.png
 77 | val/0016E5_08111.png valannot/0016E5_08111.png
 78 | val/0016E5_08113.png valannot/0016E5_08113.png
 79 | val/0016E5_08115.png valannot/0016E5_08115.png
 80 | val/0016E5_08117.png valannot/0016E5_08117.png
 81 | val/0016E5_08119.png valannot/0016E5_08119.png
 82 | val/0016E5_08121.png valannot/0016E5_08121.png
 83 | val/0016E5_08123.png valannot/0016E5_08123.png
 84 | val/0016E5_08125.png valannot/0016E5_08125.png
 85 | val/0016E5_08127.png valannot/0016E5_08127.png
 86 | val/0016E5_08129.png valannot/0016E5_08129.png
 87 | val/0016E5_08131.png valannot/0016E5_08131.png
 88 | val/0016E5_08133.png valannot/0016E5_08133.png
 89 | val/0016E5_08135.png valannot/0016E5_08135.png
 90 | val/0016E5_08137.png valannot/0016E5_08137.png
 91 | val/0016E5_08139.png valannot/0016E5_08139.png
 92 | val/0016E5_08141.png valannot/0016E5_08141.png
 93 | val/0016E5_08143.png valannot/0016E5_08143.png
 94 | val/0016E5_08145.png valannot/0016E5_08145.png
 95 | val/0016E5_08147.png valannot/0016E5_08147.png
 96 | val/0016E5_08149.png valannot/0016E5_08149.png
 97 | val/0016E5_08151.png valannot/0016E5_08151.png
 98 | val/0016E5_08153.png valannot/0016E5_08153.png
 99 | val/0016E5_08155.png valannot/0016E5_08155.png
100 | val/0016E5_08157.png valannot/0016E5_08157.png
101 | val/0016E5_08159.png valannot/0016E5_08159.png
102 | 


--------------------------------------------------------------------------------
/data/fig/frankfurt_000000_002196_gtFine_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/data/fig/frankfurt_000000_002196_gtFine_color.png


--------------------------------------------------------------------------------
/data/fig/frankfurt_000000_002196_leftImg8bit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/data/fig/frankfurt_000000_002196_leftImg8bit.png


--------------------------------------------------------------------------------
/data/fig/frankfurt_000000_002196_leftImg8bit_pred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/data/fig/frankfurt_000000_002196_leftImg8bit_pred.png


--------------------------------------------------------------------------------
/exp/train_dfanet.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # train the net (suppose 4 gpus)
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \
 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \
 6 | --data_dir "/nas/dataset/CityScapes" \
 7 | --data_list "./data/cityscapes/train.txt" \
 8 | --arch dfanet \
 9 | --restore_from "/nas/dataset/pretrained/xceptiona_imagenet.pth" \
10 | --input_size 1024 \
11 | --batch_size_per_gpu 4 \
12 | --learning_rate 0.01 \
13 | --num_steps 60000 \
14 | --save_dir "./save/dfanet" \
15 | --rgb 1 \
16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \
17 | --log_file "./save/dfanet.log"
18 | 
19 | 
20 | # whole evaluation
21 | python val.py --data_set cityscapes \
22 | --data_dir "/nas/dataset/CityScapes" \
23 | --data_list "./data/cityscapes/val.txt" \
24 | --arch dfanet \
25 | --rgb 1 \
26 | --restore_from "./save/dfnetv1seg/dfanet_final.pth" \
27 | --whole True \
28 | --output_dir "./dfanet_out"


--------------------------------------------------------------------------------
/exp/train_dfsegv1.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # train the net (suppose 4 gpus)
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \
 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \
 6 | --data_dir "/nas/dataset/CityScapes" \
 7 | --data_list "./data/cityscapes/train.txt" \
 8 | --arch dfnetv1seg \
 9 | --restore_from "/nas/dataset/pretrained/df1_imagenet.pth" \
10 | --input_size 832 \
11 | --batch_size_per_gpu 4 \
12 | --learning_rate 0.01 \
13 | --num_steps 50000 \
14 | --save_dir "./save/dfnetv1seg" \
15 | --rgb 1 \
16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \
17 | --log_file "./save/dfnetv1seg.log"
18 | 
19 | 
20 | # whole evaluation
21 | python val.py --data_set cityscapes \
22 | --data_dir "/nas/dataset/CityScapes" \
23 | --data_list "./data/cityscapes/val.txt" \
24 | --arch dfnetv1seg \
25 | --rgb 1 \
26 | --restore_from "./save/dfnetv1seg/dfnetv1seg_final.pth" \
27 | --whole True \
28 | --output_dir "./ICNet_vis"


--------------------------------------------------------------------------------
/exp/train_dfsegv2.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # train the net (suppose 4 gpus)
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \
 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \
 6 | --data_dir "/nas/dataset/CityScapes" \
 7 | --data_list "./data/cityscapes/train.txt" \
 8 | --arch dfnetv2seg \
 9 | --restore_from "/nas/dataset/pretrained/df2_imagenet.pth" \
10 | --input_size 832 \
11 | --batch_size_per_gpu 4 \
12 | --learning_rate 0.01 \
13 | --num_steps 50000 \
14 | --save_dir "./saveDFnetv2" \
15 | --rgb 1 \
16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \
17 | --log_file "./log/saveDFnetv2.log"
18 | 
19 | 
20 | # whole evaluation
21 | python val.py --data_set cityscapes \
22 | --data_dir "/nas/dataset/CityScapes" \
23 | --data_list "./data/cityscapes/val.txt" \
24 | --arch dfnetv2seg \
25 | --rgb 1 \
26 | --restore_from "./saveICNet/dfnetv2seg_final.pth" \
27 | --whole True \
28 | --output_dir "./dfnetv2seg"


--------------------------------------------------------------------------------
/exp/train_icnet.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # train the net (suppose 4 gpus)
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \
 5 | --nproc_per_node=4 train_distribute.py --data_set cityscapes \
 6 | --data_dir "/nas/dataset/CityScapes" \
 7 | --data_list "./data/cityscapes/train.txt" \
 8 | --arch icnet \
 9 | --restore_from "/nas/dataset/pretrained/resnet50-deep.pth" \
10 | --input_size 832 \
11 | --batch_size_per_gpu 4 \
12 | --learning_rate 0.01 \
13 | --num_steps 50000 \
14 | --save_dir "./saveICNet" \
15 | --rgb 0 \
16 | --ohem 1 --ohem_thres 0.7 --ohem_keep 100000 \
17 | --log_file "./log/ICNet.log"
18 | 
19 | 
20 | # whole evaluation
21 | python val.py --data_set cityscapes \
22 | --data_dir "/nas/dataset/CityScapes" \
23 | --data_list "./data/cityscapes/val.txt" \
24 | --arch ICNet \
25 | --rgb 0 \
26 | --restore_from "./saveICNet/icnet_final.pth" \
27 | --whole True \
28 | --output_dir "./ICNet_vis"


--------------------------------------------------------------------------------
/exp/train_pspnet.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash


--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/__init__.py


--------------------------------------------------------------------------------
/libs/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/core/__init__.py


--------------------------------------------------------------------------------
/libs/core/loss.py:
--------------------------------------------------------------------------------
  1 | # CE-loss
  2 | import torch.nn as nn
  3 | import torch
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class OhemCrossEntropy2dTensor(nn.Module):
  8 |     def __init__(self, ignore_label, reduction='elementwise_mean', thresh=0.6, min_kept=256,
  9 |                  down_ratio=1, use_weight=False):
 10 |         super(OhemCrossEntropy2dTensor, self).__init__()
 11 |         self.ignore_label = ignore_label
 12 |         self.thresh = float(thresh)
 13 |         self.min_kept = int(min_kept)
 14 |         self.down_ratio = down_ratio
 15 |         if use_weight:
 16 |             weight = torch.FloatTensor(
 17 |                 [0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489,
 18 |                  0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955,
 19 |                  1.0865, 1.1529, 1.0507])
 20 |             self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction,
 21 |                                                        weight=weight,
 22 |                                                        ignore_index=ignore_label)
 23 |         else:
 24 |             self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction,
 25 |                                                        ignore_index=ignore_label)
 26 | 
 27 |     def forward(self, pred, target):
 28 |         b, c, h, w = pred.size()
 29 |         target = target.view(-1)
 30 |         valid_mask = target.ne(self.ignore_label)
 31 |         target = target * valid_mask.long()
 32 |         num_valid = valid_mask.sum()
 33 | 
 34 |         prob = F.softmax(pred, dim=1)
 35 |         prob = (prob.transpose(0, 1)).reshape(c, -1)
 36 | 
 37 |         if self.min_kept > num_valid:
 38 |             print('Labels: {}'.format(num_valid))
 39 |         elif num_valid > 0:
 40 |             prob = prob.masked_fill_(1 - valid_mask, 1)
 41 |             mask_prob = prob[
 42 |                 target, torch.arange(len(target), dtype=torch.long)]
 43 |             threshold = self.thresh
 44 |             if self.min_kept > 0:
 45 |                 _, index = mask_prob.sort()
 46 |                 threshold_index = index[min(len(index), self.min_kept) - 1]
 47 |                 if mask_prob[threshold_index] > self.thresh:
 48 |                     threshold = mask_prob[threshold_index]
 49 |                 kept_mask = mask_prob.le(threshold)
 50 |                 target = target * kept_mask.long()
 51 |                 valid_mask = valid_mask * kept_mask
 52 | 
 53 |         target = target.masked_fill_(1 - valid_mask, self.ignore_label)
 54 |         target = target.view(b, h, w)
 55 | 
 56 |         return self.criterion(pred, target)
 57 | 
 58 | 
 59 | class CriterionDSN(nn.CrossEntropyLoss):
 60 |     def __init__(self, ignore_index=255,reduce=True):
 61 |         super(CriterionDSN, self).__init__()
 62 | 
 63 |         self.ignore_index = ignore_index
 64 |         self.reduce = reduce
 65 |     def forward(self, preds, target):
 66 |         scale_pred = preds[0]
 67 |         loss1 = super(CriterionDSN, self).forward(scale_pred, target)
 68 |         scale_pred = preds[1]
 69 |         loss2 = super(CriterionDSN, self).forward(scale_pred, target)
 70 | 
 71 |         return loss1 + loss2 * 0.4
 72 | 
 73 | 
 74 | class CriterionOhemDSN(nn.Module):
 75 |     '''
 76 |     DSN : We need to consider two supervision for the models.
 77 |     '''
 78 |     def __init__(self, ignore_index=255, thresh=0.7, min_kept=100000, reduce=True):
 79 |         super(CriterionOhemDSN, self).__init__()
 80 |         self.ignore_index = ignore_index
 81 |         self.criterion1 = OhemCrossEntropy2dTensor(ignore_index, thresh=thresh, min_kept=min_kept)
 82 |         self.criterion2 = torch.nn.CrossEntropyLoss(ignore_index=ignore_index, reduce=reduce)
 83 |         if not reduce:
 84 |             print("disabled the reduce.")
 85 | 
 86 |     def forward(self, preds, target):
 87 |         h, w = target.size(1), target.size(2)
 88 | 
 89 |         scale_pred = F.upsample(input=preds[0], size=(h, w), mode='bilinear', align_corners=True)
 90 |         loss1 = self.criterion1(scale_pred, target)
 91 | 
 92 |         scale_pred = F.upsample(input=preds[1], size=(h, w), mode='bilinear', align_corners=True)
 93 |         loss2 = self.criterion2(scale_pred, target)
 94 | 
 95 |         return loss1 + loss2 * 0.4
 96 | 
 97 | 
 98 | 
 99 | class CriterionICNet(nn.Module):
100 |     """
101 |     ICNet loss
102 |     """
103 | 
104 |     def __init__(self, ignore_index=255, thresh=0.7, min_kept=100000, reduce=True):
105 |         super(CriterionICNet, self).__init__()
106 |         self.ignore_index = ignore_index
107 |         self.criterion1 = OhemCrossEntropy2dTensor(ignore_index, thresh=thresh, min_kept=min_kept)
108 | 
109 |         if not reduce:
110 |             print("disabled the reduce.")
111 | 
112 |     def forward(self, preds, target):
113 |         h, w = target.size(1), target.size(2)
114 | 
115 |         scale_pred = F.upsample(input=preds[0], size=(h, w), mode='bilinear', align_corners=True)
116 |         loss1 = self.criterion1(scale_pred, target)
117 | 
118 |         scale_pred = F.upsample(input=preds[1], size=(h, w), mode='bilinear', align_corners=True)
119 |         loss2 = self.criterion1(scale_pred, target)
120 | 
121 |         scale_pred = F.upsample(input=preds[2], size=(h, w), mode='bilinear', align_corners=True)
122 |         loss3 = self.criterion1(scale_pred, target)
123 | 
124 |         scale_pred = F.upsample(input=preds[3], size=(h, w), mode='bilinear', align_corners=True)
125 |         loss4 = self.criterion1(scale_pred, target)
126 | 
127 |         return loss1 + 0.4 * loss2 + 0.4 * loss3 + 0.4 * loss4
128 | 
129 | 
130 | class CriterionDFANet(nn.Module):
131 |     """
132 |     ICNet loss
133 |     """
134 | 
135 |     def __init__(self, ignore_index=255, thresh=0.7, min_kept=100000, reduce=True):
136 |         super(CriterionDFANet, self).__init__()
137 |         self.ignore_index = ignore_index
138 |         self.criterion1 = OhemCrossEntropy2dTensor(ignore_index, thresh=thresh, min_kept=min_kept)
139 |         self.criterion2 = torch.nn.CrossEntropyLoss(ignore_index=ignore_index, reduce=reduce)
140 | 
141 |         if not reduce:
142 |             print("disabled the reduce.")
143 | 
144 |     def forward(self, preds, target):
145 |         h, w = target.size(1), target.size(2)
146 | 
147 |         scale_pred = F.upsample(input=preds[0], size=(h, w), mode='bilinear', align_corners=True)
148 |         loss1 = self.criterion1(scale_pred, target)
149 | 
150 |         scale_pred = F.upsample(input=preds[1], size=(h, w), mode='bilinear', align_corners=True)
151 |         loss2 = self.criterion1(scale_pred, target)
152 | 
153 |         scale_pred = F.upsample(input=preds[2], size=(h, w), mode='bilinear', align_corners=True)
154 |         loss3 = self.criterion1(scale_pred, target)
155 | 
156 |         return loss1 + 0.4 * loss2 + 0.4 * loss3
157 | 


--------------------------------------------------------------------------------
/libs/core/operators.py:
--------------------------------------------------------------------------------
  1 | # Common Segmentation Operator implemented by Pytorch
  2 | # XiangtaiLi(lxtpku@pku.edu.cn)
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torch.nn import BatchNorm2d
  8 | 
  9 | 
 10 | upsample = lambda x, size: F.interpolate(x, size, mode='bilinear', align_corners=True)
 11 | 
 12 | 
 13 | def conv3x3(in_planes, out_planes, stride=1):
 14 |     """3x3 convolution with padding"""
 15 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 16 |                      padding=1, bias=False)
 17 | 
 18 | class GlobalAvgPool2d(nn.Module):
 19 |     def __init__(self):
 20 |         """Global average pooling over the input's spatial dimensions"""
 21 |         super(GlobalAvgPool2d, self).__init__()
 22 | 
 23 |     def forward(self, inputs):
 24 |         in_size = inputs.size()
 25 |         inputs = inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
 26 |         inputs = inputs.view(in_size[0], in_size[1], 1, 1)
 27 | 
 28 |         return inputs
 29 | 
 30 | 
 31 | class SELayer(nn.Module):
 32 |     def __init__(self, in_planes, out_planes, reduction=16):
 33 |         super(SELayer, self).__init__()
 34 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 35 |         self.fc = nn.Sequential(
 36 |             nn.Linear(in_planes, out_planes // reduction),
 37 |             nn.ReLU(inplace=True),
 38 |             nn.Linear(out_planes // reduction, out_planes),
 39 |             nn.Sigmoid()
 40 |         )
 41 |         self.out_planes = out_planes
 42 | 
 43 |     def forward(self, x):
 44 |         b, c, _, _ = x.size()
 45 |         y = self.avg_pool(x).view(b, c)
 46 |         y = self.fc(y).view(b, self.out_planes, 1, 1)
 47 |         return y
 48 | 
 49 | 
 50 | class ConvBnRelu(nn.Module):
 51 |     def __init__(self, in_planes, out_planes, ksize, stride=1, pad=0, dilation=1,
 52 |                  groups=1, has_bn=True, norm_layer=nn.BatchNorm2d, bn_eps=1e-5,
 53 |                  has_relu=True, inplace=True, has_bias=False):
 54 |         super(ConvBnRelu, self).__init__()
 55 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=ksize,
 56 |                               stride=stride, padding=pad,
 57 |                               dilation=dilation, groups=groups, bias=has_bias)
 58 |         self.has_bn = has_bn
 59 |         if self.has_bn:
 60 |             self.bn = norm_layer(out_planes, eps=bn_eps)
 61 |         self.has_relu = has_relu
 62 |         if self.has_relu:
 63 |             self.relu = nn.ReLU(inplace=inplace)
 64 | 
 65 |     def forward(self, x):
 66 |         x = self.conv(x)
 67 |         if self.has_bn:
 68 |             x = self.bn(x)
 69 |         if self.has_relu:
 70 |             x = self.relu(x)
 71 | 
 72 |         return x
 73 | 
 74 | def dsn(in_channels, nclass, norm_layer=nn.BatchNorm2d):
 75 |     return nn.Sequential(
 76 |         nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
 77 |         norm_layer(in_channels),
 78 |         nn.ReLU(),
 79 |         nn.Dropout2d(0.1),
 80 |         nn.Conv2d(in_channels, nclass, kernel_size=1, stride=1, padding=0, bias=True)
 81 |     )
 82 | 
 83 | 
 84 | class SeparableConv2d(nn.Module):
 85 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, bias=False, norm_layer=None):
 86 |         super(SeparableConv2d, self).__init__()
 87 |         self.kernel_size = kernel_size
 88 |         self.dilation = dilation
 89 | 
 90 |         self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride, 0, dilation, groups=in_channels,
 91 |                                bias=bias)
 92 |         self.bn = norm_layer(in_channels)
 93 |         self.pointwise = nn.Conv2d(in_channels, out_channels, 1, bias=bias)
 94 | 
 95 |     def forward(self, x):
 96 |         x = self.fix_padding(x, self.kernel_size, self.dilation)
 97 |         x = self.conv1(x)
 98 |         x = self.bn(x)
 99 |         x = self.pointwise(x)
100 | 
101 |         return x
102 | 
103 |     def fix_padding(self, x, kernel_size, dilation):
104 |         kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
105 |         pad_total = kernel_size_effective - 1
106 |         pad_beg = pad_total // 2
107 |         pad_end = pad_total - pad_beg
108 |         padded_inputs = F.pad(x, (pad_beg, pad_end, pad_beg, pad_end))
109 |         return padded_inputs
110 | 
111 | 
112 | class ASPPModule(nn.Module):
113 |     """
114 |     Reference:
115 |         Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
116 |     """
117 | 
118 |     def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36), norm_layer=nn.BatchNorm2d):
119 |         super(ASPPModule, self).__init__()
120 | 
121 |         self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
122 |                                    nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
123 |                                              bias=False),
124 |                                    norm_layer(inner_features),
125 |                                    nn.ReLU()
126 |                                    )
127 |         self.conv2 = nn.Sequential(
128 |             nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
129 |             norm_layer(inner_features), nn.ReLU())
130 |         self.conv3 = nn.Sequential(
131 |             nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
132 |             norm_layer(inner_features), nn.ReLU())
133 |         self.conv4 = nn.Sequential(
134 |             nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
135 |             norm_layer(inner_features), nn.ReLU())
136 |         self.conv5 = nn.Sequential(
137 |             nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
138 |             norm_layer(inner_features), nn.ReLU())
139 | 
140 |         self.bottleneck = nn.Sequential(
141 |             nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
142 |             norm_layer(out_features),
143 |             nn.ReLU(),
144 |             nn.Dropout2d(0.1)
145 |         )
146 | 
147 |     def forward(self, x):
148 |         _, _, h, w = x.size()
149 | 
150 |         feat1 = F.upsample(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
151 | 
152 |         feat2 = self.conv2(x)
153 |         feat3 = self.conv3(x)
154 |         feat4 = self.conv4(x)
155 |         feat5 = self.conv5(x)
156 |         out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
157 | 
158 |         bottle = self.bottleneck(out)
159 |         return bottle
160 | 
161 | 
162 | class A2Block(nn.Module):
163 |     """
164 |         Implementation of A2Block(NIPS 2018)
165 |     """
166 |     def __init__(self, inplane, plane):
167 |         super(A2Block, self).__init__()
168 |         self.down = nn.Conv2d(inplane, plane, 1)
169 |         self.up = nn.Conv2d(plane, inplane, 1)
170 |         self.gather_down = nn.Conv2d(inplane, plane, 1)
171 |         self.distribue_down = nn.Conv2d(inplane, plane, 1)
172 |         self.softmax = nn.Softmax(dim=-1)
173 | 
174 |     def forward(self, x):
175 |         res = x
176 |         A = self.down(res)
177 |         B = self.gather_down(res)
178 |         b, c, h, w = A.size()
179 |         A = A.view(b, c, -1)  # (b, c, h*w)
180 |         B = B.view(b, c, -1)  # (b, c, h*w)
181 |         B = self.softmax(B)
182 |         B = B.permute(0, 2, 1)  # (b, h*w, c)
183 | 
184 |         G = torch.bmm(A, B)  # (b,c,c)
185 | 
186 |         C = self.distribue_down(res)
187 |         C = C.view(b, c, -1)  # (b, c, h*w)
188 |         C = self.softmax(C)
189 |         C = C.permute(0, 2, 1)  # (b, h*w, c)
190 | 
191 |         atten = torch.bmm(C, G)  # (b, h*w, c)
192 |         atten = atten.permute(0, 2, 1).view(b, c, h, -1)
193 |         atten = self.up(atten)
194 | 
195 |         out = res + atten
196 |         return out
197 | 
198 | 
199 | class PSPModule(nn.Module):
200 |     """
201 |     Reference:
202 |         Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
203 |     """
204 |     def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6), norm_layer=BatchNorm2d):
205 |         super(PSPModule, self).__init__()
206 |         self.stages = []
207 |         self.stages = nn.ModuleList([self._make_stage(features, out_features, size, norm_layer) for size in sizes])
208 |         self.bottleneck = nn.Sequential(
209 |             nn.Conv2d(features+len(sizes)*out_features, out_features, kernel_size=1, padding=1, dilation=1, bias=False),
210 |             norm_layer(out_features),
211 |             nn.ReLU(),
212 |             nn.Dropout2d(0.1)
213 |             )
214 | 
215 |     def _make_stage(self, features, out_features, size, norm_layer):
216 |         prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
217 |         conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
218 |         bn = norm_layer(out_features)
219 |         return nn.Sequential(prior, conv, bn)
220 | 
221 |     def forward(self, feats):
222 |         h, w = feats.size(2), feats.size(3)
223 |         priors = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages] + [feats]
224 |         bottle = self.bottleneck(torch.cat(priors, 1))
225 |         return bottle
226 | 
227 | 
228 | 
229 | 
230 | # For BiSeNet
231 | class AttentionRefinement(nn.Module):
232 |     def __init__(self, in_planes, out_planes,
233 |                  norm_layer=nn.BatchNorm2d):
234 |         super(AttentionRefinement, self).__init__()
235 |         self.conv_3x3 = ConvBnRelu(in_planes, out_planes, 3, 1, 1,
236 |                                    has_bn=True, norm_layer=norm_layer,
237 |                                    has_relu=True, has_bias=False)
238 |         self.channel_attention = nn.Sequential(
239 |             nn.AdaptiveAvgPool2d(1),
240 |             ConvBnRelu(out_planes, out_planes, 1, 1, 0,
241 |                        has_bn=True, norm_layer=norm_layer,
242 |                        has_relu=False, has_bias=False),
243 |             nn.Sigmoid()
244 |         )
245 | 
246 |     def forward(self, x):
247 |         fm = self.conv_3x3(x)
248 |         fm_se = self.channel_attention(fm)
249 |         fm = fm * fm_se
250 | 
251 |         return fm
252 | 
253 | # For BiSeNet
254 | class FeatureFusion(nn.Module):
255 |     def __init__(self, in_planes, out_planes,
256 |                  reduction=1, norm_layer=nn.BatchNorm2d):
257 |         super(FeatureFusion, self).__init__()
258 |         self.conv_1x1 = ConvBnRelu(in_planes, out_planes, 1, 1, 0,
259 |                                    has_bn=True, norm_layer=norm_layer,
260 |                                    has_relu=True, has_bias=False)
261 |         self.channel_attention = nn.Sequential(
262 |             nn.AdaptiveAvgPool2d(1),
263 |             ConvBnRelu(out_planes, out_planes // reduction, 1, 1, 0,
264 |                        has_bn=False, norm_layer=norm_layer,
265 |                        has_relu=True, has_bias=False),
266 |             ConvBnRelu(out_planes // reduction, out_planes, 1, 1, 0,
267 |                        has_bn=False, norm_layer=norm_layer,
268 |                        has_relu=False, has_bias=False),
269 |             nn.Sigmoid()
270 |         )
271 | 
272 |     def forward(self, x1, x2):
273 |         fm = torch.cat([x1, x2], dim=1)
274 |         fm = self.conv_1x1(fm)
275 |         fm_se = self.channel_attention(fm)
276 |         output = fm + fm * fm_se
277 |         return output
278 | 
279 | 
280 | 


--------------------------------------------------------------------------------
/libs/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/datasets/__init__.py


--------------------------------------------------------------------------------
/libs/datasets/camvid.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | 
  4 | import os.path as osp
  5 | import numpy as np
  6 | import random
  7 | import cv2
  8 | from torch.utils import data
  9 | 
 10 | 
 11 | """
 12 | CamVid is a road scene understanding dataset with 367 training images and 233 testing images of day and dusk scenes. 
 13 | The challenge is to segment 11 classes such as road, building, cars, pedestrians, signs, poles, side-walk etc. We 
 14 | resize images to 360x480 pixels for training and testing.
 15 | """
 16 | 
 17 | CAMVID_CLASSES = ['Sky',
 18 |                   'Building',
 19 |                   'Column-Pole',
 20 |                   'Road',
 21 |                   'Sidewalk',
 22 |                   'Tree',
 23 |                   'Sign-Symbol',
 24 |                   'Fence',
 25 |                   'Car',
 26 |                   'Pedestrain',
 27 |                   'Bicyclist',
 28 |                   'Void']
 29 | 
 30 | CAMVID_CLASS_COLORS = [
 31 |     (128, 128, 128),
 32 |     (128, 0, 0),
 33 |     (192, 192, 128),
 34 |     (128, 64, 128),
 35 |     (0, 0, 192),
 36 |     (128, 128, 0),
 37 |     (192, 128, 128),
 38 |     (64, 64, 128),
 39 |     (64, 0, 128),
 40 |     (64, 64, 0),
 41 |     (0, 128, 192),
 42 |     (0, 0, 0),
 43 | ]
 44 | 
 45 | 
 46 | class CamVidDataSet(data.Dataset):
 47 |     """
 48 |        CamVidDataSet is employed to load train set
 49 |        Args:
 50 |         root: the CamVid dataset path,
 51 |         list_path: camvid_train_list.txt, include partial path
 52 | 
 53 |     """
 54 |     def __init__(self, root=None, list_path='./dataset/list/CamVid/camvid_train_list.txt',
 55 |                  max_iters=None, crop_size=(360, 360),
 56 |                  mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255, vars=(1,1,1), RGB=False):
 57 |         self.root = root
 58 |         self.list_path = list_path
 59 |         self.crop_h, self.crop_w = crop_size
 60 |         self.scale = scale
 61 |         self.ignore_label = ignore_label
 62 |         self.mean = mean
 63 |         self.vars = vars
 64 |         self.is_mirror = mirror
 65 |         self.rgb = RGB
 66 |         self.img_ids = [i_id.strip() for i_id in open(list_path)]
 67 |         if not max_iters == None:
 68 |             self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
 69 |         self.files = []
 70 | 
 71 |         for name in self.img_ids:
 72 |             img_file = osp.join(self.root, name.split()[0])
 73 |             label_file = osp.join(self.root, name.split()[1])
 74 |             self.files.append({
 75 |                 "img": img_file,
 76 |                 "label": label_file,
 77 |                 "name": name
 78 |             })
 79 | 
 80 |         print("length of train set: ", len(self.files))
 81 | 
 82 |     def __len__(self):
 83 |         return len(self.files)
 84 | 
 85 |     def __getitem__(self, index):
 86 |         datafiles = self.files[index]
 87 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
 88 |         label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
 89 |         label[label==11] = self.ignore_label
 90 |         size = image.shape
 91 |         name = datafiles["name"]
 92 |         if self.scale:
 93 |             f_scale = 0.5 + random.randint(0, 15) / 10.0  # random resize between 0.5 and 2
 94 |             image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_LINEAR)
 95 |             label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_NEAREST)
 96 | 
 97 |         image = np.asarray(image, np.float32)
 98 | 
 99 |         if self.rgb:
100 |             image = image[:,:, ::-1]  ## BGR -> RGB
101 |             image /= 255         ## using pytorch pretrained models
102 | 
103 |         image -= self.mean
104 |         image /= self.vars
105 | 
106 |         img_h, img_w = label.shape
107 |         pad_h = max(self.crop_h - img_h, 0)
108 |         pad_w = max(self.crop_w - img_w, 0)
109 |         if pad_h > 0 or pad_w > 0:
110 |             img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0,
111 |                                          pad_w, cv2.BORDER_CONSTANT,
112 |                                          value=(0.0, 0.0, 0.0))
113 |             label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0,
114 |                                            pad_w, cv2.BORDER_CONSTANT,
115 |                                            value=(self.ignore_label,))
116 |         else:
117 |             img_pad, label_pad = image, label
118 | 
119 |         img_h, img_w = label_pad.shape
120 |         h_off = random.randint(0, img_h - self.crop_h)
121 |         w_off = random.randint(0, img_w - self.crop_w)
122 | 
123 |         image = np.asarray(img_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32)
124 |         label = np.asarray(label_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32)
125 | 
126 |         image = image.transpose((2, 0, 1))  # NHWC -> NCHW
127 | 
128 |         if self.is_mirror:
129 |             flip = np.random.choice(2) * 2 - 1
130 |             image = image[:, :, ::flip]
131 |             label = label[:, ::flip]
132 | 
133 |         return image.copy(), label.copy(), np.array(size), name
134 | 
135 | 
136 | class CamVidTestDataSet(data.Dataset):
137 |     """
138 |        CamVidValDataSet is employed to load val set
139 |        Args:
140 |         root: the CamVid dataset path,
141 |         list_path: camvid_val_list.txt, include partial path
142 | 
143 |     """
144 | 
145 |     def __init__(self, root='/home/DataSet/CamVid', list_path='./dataset/list/CamVid/camvid_val_list.txt',
146 |                  f_scale=1, mean=(128, 128, 128), ignore_label=255, vars=(1,1,1), RGB=False):
147 |         self.root = root
148 |         self.list_path = list_path
149 |         self.ignore_label = ignore_label
150 |         self.mean = mean
151 |         self.vars = vars
152 |         self.rgb = RGB
153 |         self.f_scale = f_scale
154 |         self.img_ids = [i_id.strip() for i_id in open(list_path)]
155 |         self.files = []
156 |         for name in self.img_ids:
157 |             img_file = osp.join(self.root, name.split()[0])
158 |             label_file = osp.join(self.root, name.split()[1])
159 |             image_name = name.strip().split()[0].strip().split('/', 1)[1].split('.')[0]
160 |             self.files.append({
161 |                 "img": img_file,
162 |                 "label": label_file,
163 |                 "name": image_name
164 |             })
165 | 
166 |         print("length of Test Set: ", len(self.files))
167 | 
168 |     def __len__(self):
169 |         return len(self.files)
170 | 
171 |     def __getitem__(self, index):
172 |         datafiles = self.files[index]
173 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
174 |         label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
175 |         size = image.shape
176 |         name = datafiles["name"]
177 |         if self.f_scale != 1:
178 |             image = cv2.resize(image, None, fx=self.f_scale, fy=self.f_scale, interpolation=cv2.INTER_LINEAR)
179 |             label = cv2.resize(label, None, fx=self.f_scale, fy=self.f_scale, interpolation = cv2.INTER_NEAREST)
180 | 
181 |         label[label == 11] = self.ignore_label
182 | 
183 |         image = np.asarray(image, np.float32)
184 | 
185 |         if self.rgb:
186 |             image = image[:, :, ::-1]  ## BGR -> RGB
187 |             image /= 255  ## using pytorch pretrained models
188 | 
189 |         image -= self.mean
190 |         image /= self.vars
191 | 
192 |         image = image.transpose((2, 0, 1))  # HWC -> CHW
193 | 
194 |         # print('image.shape:',image.shape)
195 |         return image.copy(), label.copy(), np.array(size), name
196 | 


--------------------------------------------------------------------------------
/libs/datasets/cityscapes.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | 
  4 | 
  5 | import os.path as osp
  6 | import numpy as np
  7 | import random
  8 | import cv2
  9 | 
 10 | from torch.utils import data
 11 | 
 12 | 
 13 | class Cityscapes(data.Dataset):
 14 |     def __init__(self, root, list_path="./list/cityscapes/train.txt", max_iters=None, crop_size=(321, 321),
 15 |                  mean=(128, 128, 128), vars=(1,1,1), scale=True, mirror=True, ignore_label=255, RGB=False):
 16 |         self.root = root
 17 |         self.list_path = list_path
 18 |         self.crop_h, self.crop_w = crop_size
 19 |         self.scale = scale
 20 |         self.ignore_label = ignore_label
 21 |         self.mean = mean
 22 |         self.vars = vars
 23 |         self.is_mirror = mirror
 24 |         self.rgb = RGB
 25 |         self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
 26 |         if not max_iters==None:
 27 |                 self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
 28 |         self.files = []
 29 |         for item in self.img_ids:
 30 |             image_path, label_path = item
 31 |             name = osp.splitext(osp.basename(label_path))[0]
 32 |             img_file = osp.join(self.root, image_path)
 33 |             label_file = osp.join(self.root, label_path)
 34 |             self.files.append({
 35 |                 "img": img_file,
 36 |                 "label": label_file,
 37 |                 "name": name
 38 |             })
 39 |         self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label,
 40 |                               3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label,
 41 |                               7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4,
 42 |                               14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5,
 43 |                               18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14,
 44 |                               28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18}
 45 |         print('{} images are loaded!'.format(len(self.files)))
 46 | 
 47 |     def __len__(self):
 48 |         return len(self.files)
 49 | 
 50 |     def generate_scale_label(self, image, label):
 51 |         f_scale = 0.7 + random.randint(0, 14) / 10.0
 52 |         image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
 53 |         label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
 54 |         return image, label
 55 | 
 56 |     def id2trainId(self, label, reverse=False):
 57 |         label_copy = label.copy()
 58 |         if reverse:
 59 |             for v, k in self.id_to_trainid.items():
 60 |                 label_copy[label == k] = v
 61 |         else:
 62 |             for k, v in self.id_to_trainid.items():
 63 |                 label_copy[label == k] = v
 64 |         return label_copy
 65 | 
 66 |     def __getitem__(self, index):
 67 |         datafiles = self.files[index]
 68 |         image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
 69 |         label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
 70 |         label = self.id2trainId(label)
 71 | 
 72 |         if self.scale:
 73 |             image, label = self.generate_scale_label(image, label)
 74 |         image = np.asarray(image, np.float32)
 75 | 
 76 |         if self.rgb:
 77 |             image = image[:,:, ::-1]  ## BGR -> RGB
 78 |             image /= 255         ## using pytorch pretrained models
 79 | 
 80 |         image -= self.mean
 81 |         image /= self.vars
 82 | 
 83 |         img_h, img_w = label.shape
 84 |         pad_h = max(self.crop_h - img_h, 0)
 85 |         pad_w = max(self.crop_w - img_w, 0)
 86 |         if pad_h > 0 or pad_w > 0:
 87 |             img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0,
 88 |                 pad_w, cv2.BORDER_CONSTANT,
 89 |                 value=(0.0, 0.0, 0.0))
 90 |             label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0,
 91 |                 pad_w, cv2.BORDER_CONSTANT,
 92 |                 value=(self.ignore_label,))
 93 |         else:
 94 |             img_pad, label_pad = image, label
 95 | 
 96 |         img_h, img_w = label_pad.shape
 97 |         h_off = random.randint(0, img_h - self.crop_h)
 98 |         w_off = random.randint(0, img_w - self.crop_w)
 99 | 
100 |         image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
101 |         label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
102 | 
103 |         image = image.transpose((2, 0, 1))
104 |         if self.is_mirror:
105 |             flip = np.random.choice(2) * 2 - 1
106 |             image = image[:, :, ::flip]
107 |             label = label[:, ::flip]
108 | 
109 |         return image.copy(), label.copy()


--------------------------------------------------------------------------------
/libs/datasets/mapillary.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | 
  4 | import os
  5 | import numpy as np
  6 | import random
  7 | import cv2
  8 | from torch.utils import data
  9 | 
 10 | 
 11 | class MapDataSet(data.Dataset):
 12 |     def __init__(self, root, split="train", max_iters=80000, crop_size=(321, 321), mean=(128, 128, 128), vars=(1, 1, 1), scale=True,
 13 |                  mirror=True, ignore_label=255, RGB=False):
 14 |         self.root = root
 15 |         self.crop_h, self.crop_w = crop_size
 16 |         self.scale = scale
 17 |         self.ignore_label = ignore_label
 18 |         self.mean = mean
 19 |         self.vars = vars
 20 |         self.is_mirror = mirror
 21 |         self.rgb = RGB
 22 |         self.img_list, self.label_list = self._make_dataset(root, split)
 23 |         assert len(self.label_list) == len(self.img_list)
 24 |         print("Found dataset {} images".format(len(self.img_list)))
 25 |         if not max_iters == None:
 26 |             self.img_total = self.img_list * int(np.ceil(float(max_iters) / len(self.img_list)))
 27 |             self.label_total = self.label_list * int(np.ceil(float(max_iters) / len(self.label_list)))
 28 |         self.pair_list = []
 29 |         for i, img in enumerate(self.img_total):
 30 |             self.pair_list.append({
 31 |                 "image": img,
 32 |                 "label": self.label_total[i]
 33 |             })
 34 |         print('Total {} images are loaded!'.format(len(self.pair_list)))
 35 | 
 36 |         self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label,
 37 |                               3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label,
 38 |                               7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4,
 39 |                               14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5,
 40 |                               18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14,
 41 |                               28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18}
 42 | 
 43 |     def __len__(self):
 44 |         return len(self.pair_list)
 45 | 
 46 |     def generate_scale_label(self, image, label):
 47 |         f_scale = 0.7 + random.randint(0, 14) / 10.0
 48 |         image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_LINEAR)
 49 |         label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_NEAREST)
 50 |         return image, label
 51 | 
 52 |     def id2trainId(self, label, reverse=False):
 53 |         label_copy = label.copy()
 54 |         if reverse:
 55 |             for v, k in self.id_to_trainid.items():
 56 |                 label_copy[label == k] = v
 57 |         else:
 58 |             for k, v in self.id_to_trainid.items():
 59 |                 label_copy[label == k] = v
 60 |         return label_copy
 61 | 
 62 |     def _make_dataset(self, root, split="train"):
 63 |         image_list = []
 64 |         label_list = []
 65 |         if split == "train":
 66 |             floder = os.path.join(root, "training")
 67 |             image_floder = os.path.join(floder, "images")
 68 |             label_floder = os.path.join(floder, "seg19_lbl")
 69 |             for sub_file in os.listdir(image_floder):
 70 |                 image_list.append(os.path.join(image_floder, sub_file))
 71 |             for sub_file in os.listdir(label_floder):
 72 |                 label_list.append(os.path.join(label_floder, sub_file))
 73 |         if split == "trainval":
 74 |             train_floder = os.path.join(root, "training")
 75 |             val_floder = os.path.join(root, "validation")
 76 | 
 77 |             image_floder = os.path.join(train_floder, "images")
 78 |             label_floder = os.path.join(train_floder, "seg19_lbl")
 79 |             for sub_file in os.listdir(image_floder):
 80 |                 image_list.append(os.path.join(image_floder, sub_file))
 81 |             for sub_file in os.listdir(label_floder):
 82 |                 label_list.append(os.path.join(label_floder, sub_file))
 83 | 
 84 |             image_floder = os.path.join(val_floder, "images")
 85 |             label_floder = os.path.join(val_floder, "seg19_lbl")
 86 |             for sub_file in os.listdir(image_floder):
 87 |                 image_list.append(os.path.join(image_floder, sub_file))
 88 |             for sub_file in os.listdir(label_floder):
 89 |                 label_list.append(os.path.join(label_floder, sub_file))
 90 | 
 91 |         image_list.sort()
 92 |         label_list.sort()
 93 |         return image_list, label_list
 94 | 
 95 |     def __getitem__(self, index):
 96 |         datafiles = self.pair_list[index]
 97 |         image = cv2.imread(datafiles["image"], cv2.IMREAD_COLOR)
 98 |         label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
 99 |         label = self.id2trainId(label)
100 |         size = image.shape
101 | 
102 |         if self.scale:
103 |             image, label = self.generate_scale_label(image, label)
104 |         image = np.asarray(image, np.float32)
105 | 
106 |         if self.rgb:
107 |             image = image[:, :, ::-1]  ## BGR -> RGB
108 |             image /= 255  ## using pytorch pretrained models
109 | 
110 |         image -= self.mean
111 |         image /= self.vars
112 | 
113 |         img_h, img_w = label.shape
114 |         pad_h = max(self.crop_h - img_h, 0)
115 |         pad_w = max(self.crop_w - img_w, 0)
116 |         if pad_h > 0 or pad_w > 0:
117 |             img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0,
118 |                                          pad_w, cv2.BORDER_CONSTANT,
119 |                                          value=(0.0, 0.0, 0.0))
120 |             label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0,
121 |                                            pad_w, cv2.BORDER_CONSTANT,
122 |                                            value=(self.ignore_label,))
123 |         else:
124 |             img_pad, label_pad = image, label
125 | 
126 |         img_h, img_w = label_pad.shape
127 |         h_off = random.randint(0, img_h - self.crop_h)
128 |         w_off = random.randint(0, img_w - self.crop_w)
129 |         image = np.asarray(img_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32)
130 |         label = np.asarray(label_pad[h_off: h_off + self.crop_h, w_off: w_off + self.crop_w], np.float32)
131 |         image = image.transpose((2, 0, 1))
132 |         if self.is_mirror:
133 |             flip = np.random.choice(2) * 2 - 1
134 |             image = image[:, :, ::flip]
135 |             label = label[:, ::flip]
136 | 
137 |         return image.copy(), label.copy(), np.array(size)


--------------------------------------------------------------------------------
/libs/models/BiSegNet.py:
--------------------------------------------------------------------------------
  1 | # @Author: yuchangqian
  2 | # Modified: XiangtaiLi
  3 | # BiSeg uses deeply based backbone.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from libs.models.backbone.resnet import resnet18
 10 | from libs.core.operators import ConvBnRelu, FeatureFusion, AttentionRefinement
 11 | 
 12 | 
 13 | class SpatialPath(nn.Module):
 14 |     def __init__(self, in_planes, out_planes, norm_layer=nn.BatchNorm2d):
 15 |         super(SpatialPath, self).__init__()
 16 |         inner_channel = 64
 17 |         self.conv_7x7 = ConvBnRelu(in_planes, inner_channel, 7, 2, 3,
 18 |                                    has_bn=True, norm_layer=norm_layer,
 19 |                                    has_relu=True, has_bias=False)
 20 |         self.conv_3x3_1 = ConvBnRelu(inner_channel, inner_channel, 3, 2, 1,
 21 |                                      has_bn=True, norm_layer=norm_layer,
 22 |                                      has_relu=True, has_bias=False)
 23 |         self.conv_3x3_2 = ConvBnRelu(inner_channel, inner_channel, 3, 2, 1,
 24 |                                      has_bn=True, norm_layer=norm_layer,
 25 |                                      has_relu=True, has_bias=False)
 26 |         self.conv_1x1 = ConvBnRelu(inner_channel, out_planes, 1, 1, 0,
 27 |                                    has_bn=True, norm_layer=norm_layer,
 28 |                                    has_relu=True, has_bias=False)
 29 | 
 30 |     def forward(self, x):
 31 |         x = self.conv_7x7(x)
 32 |         x = self.conv_3x3_1(x)
 33 |         x = self.conv_3x3_2(x)
 34 |         output = self.conv_1x1(x)
 35 | 
 36 |         return output
 37 | 
 38 | 
 39 | class BiSeNetHead(nn.Module):
 40 |     def __init__(self, in_planes, out_planes, scale,
 41 |                  is_aux=False, norm_layer=nn.BatchNorm2d):
 42 |         super(BiSeNetHead, self).__init__()
 43 |         if is_aux:
 44 |             self.conv_3x3 = ConvBnRelu(in_planes, 128, 3, 1, 1,
 45 |                                        has_bn=True, norm_layer=norm_layer,
 46 |                                        has_relu=True, has_bias=False)
 47 |         else:
 48 |             self.conv_3x3 = ConvBnRelu(in_planes, 64, 3, 1, 1,
 49 |                                        has_bn=True, norm_layer=norm_layer,
 50 |                                        has_relu=True, has_bias=False)
 51 |         # self.dropout = nn.Dropout(0.1)
 52 |         if is_aux:
 53 |             self.conv_1x1 = nn.Conv2d(128, out_planes, kernel_size=1,
 54 |                                       stride=1, padding=0)
 55 |         else:
 56 |             self.conv_1x1 = nn.Conv2d(64, out_planes, kernel_size=1,
 57 |                                       stride=1, padding=0)
 58 |         self.scale = scale
 59 | 
 60 |     def forward(self, x):
 61 |         fm = self.conv_3x3(x)
 62 |         # fm = self.dropout(fm)
 63 |         output = self.conv_1x1(fm)
 64 |         if self.scale > 1:
 65 |             output = F.interpolate(output, scale_factor=self.scale,
 66 |                                    mode='bilinear',
 67 |                                    align_corners=True)
 68 | 
 69 |         return output
 70 | 
 71 | 
 72 | class BiSeNet(nn.Module):
 73 |     def __init__(self, out_planes, is_training=False,
 74 |                   pretrained_model=None,
 75 |                  norm_layer=nn.BatchNorm2d):
 76 |         super(BiSeNet, self).__init__()
 77 |         self.backbone = resnet18(pretrained_model, norm_layer=norm_layer,
 78 |                                      bn_eps=1e-5,
 79 |                                      bn_momentum=0.1,
 80 |                                      deep_stem=True, stem_width=64)
 81 | 
 82 |         self.business_layer = []
 83 |         self.is_training = is_training
 84 | 
 85 |         self.spatial_path = SpatialPath(3, 128, norm_layer)
 86 | 
 87 |         conv_channel = 128
 88 |         self.global_context = nn.Sequential(
 89 |             nn.AdaptiveAvgPool2d(1),
 90 |             ConvBnRelu(512, conv_channel, 1, 1, 0,
 91 |                        has_bn=True,
 92 |                        has_relu=True, has_bias=False, norm_layer=norm_layer)
 93 |         )
 94 | 
 95 |         # stage = [512, 256, 128, 64]
 96 |         arms = [AttentionRefinement(512, conv_channel, norm_layer),
 97 |                 AttentionRefinement(256, conv_channel, norm_layer)]
 98 |         refines = [ConvBnRelu(conv_channel, conv_channel, 3, 1, 1,
 99 |                               has_bn=True, norm_layer=norm_layer,
100 |                               has_relu=True, has_bias=False),
101 |                    ConvBnRelu(conv_channel, conv_channel, 3, 1, 1,
102 |                               has_bn=True, norm_layer=norm_layer,
103 |                               has_relu=True, has_bias=False)]
104 | 
105 |         if is_training:
106 |             heads = [BiSeNetHead(conv_channel, out_planes, 2,
107 |                                  True, norm_layer),
108 |                      BiSeNetHead(conv_channel, out_planes, 1,
109 |                                  True, norm_layer),
110 |                      BiSeNetHead(conv_channel * 2, out_planes, 1,
111 |                                  False, norm_layer)]
112 |         else:
113 |             heads = [None, None,
114 |                      BiSeNetHead(conv_channel * 2, out_planes, 1,
115 |                                  False, norm_layer)]
116 | 
117 |         self.ffm = FeatureFusion(conv_channel * 2, conv_channel * 2,
118 |                                  1, norm_layer)
119 | 
120 |         self.arms = nn.ModuleList(arms)
121 |         self.refines = nn.ModuleList(refines)
122 |         self.heads = nn.ModuleList(heads)
123 | 
124 |         self.business_layer.append(self.spatial_path)
125 |         self.business_layer.append(self.global_context)
126 |         self.business_layer.append(self.arms)
127 |         self.business_layer.append(self.refines)
128 |         self.business_layer.append(self.heads)
129 |         self.business_layer.append(self.ffm)
130 | 
131 | 
132 |     def forward(self, data, label=None):
133 |         spatial_out = self.spatial_path(data)
134 | 
135 |         context_blocks = self.backbone(data)
136 |         context_blocks.reverse()
137 | 
138 |         global_context = self.global_context(context_blocks[0])
139 |         global_context = F.interpolate(global_context,
140 |                                        size=context_blocks[0].size()[2:],
141 |                                        mode='bilinear', align_corners=True)
142 | 
143 |         last_fm = global_context
144 |         pred_out = []
145 | 
146 |         for i, (fm, arm, refine) in enumerate(zip(context_blocks[:2], self.arms,
147 |                                                   self.refines)):
148 |             fm = arm(fm)
149 |             fm += last_fm
150 |             last_fm = F.interpolate(fm, size=(context_blocks[i + 1].size()[2:]),
151 |                                     mode='bilinear', align_corners=True)
152 |             last_fm = refine(last_fm)
153 |             pred_out.append(last_fm)
154 |         context_out = last_fm
155 | 
156 |         concate_fm = self.ffm(spatial_out, context_out)
157 |         pred_out.append(concate_fm)
158 | 
159 |         if self.is_training:
160 |             return pred_out
161 | 
162 |         return F.log_softmax(self.heads[-1](pred_out[-1]), dim=1)
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     i = torch.Tensor(1,3,512,512).cuda()
167 |     m = BiSeNet(19).cuda()
168 |     m.eval()
169 |     o = m(i)
170 |     print(o.size())


--------------------------------------------------------------------------------
/libs/models/DFANet.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | """
  4 |     Implementation of DFANet: a little different from the origin paper, I add more dsn loss for training.
  5 |     DFANet uses modified Xception backbone pretrained on ImageNet.
  6 | """
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | from libs.models.backbone.xception import Enc, FCAttention, XceptionA
 12 | from libs.core.operators import ConvBnRelu, dsn
 13 | 
 14 | 
 15 | class DFANet(nn.Module):
 16 |     def __init__(self, nclass, **kwargs):
 17 |         super(DFANet, self).__init__()
 18 |         self.backbone = XceptionA()
 19 | 
 20 |         self.enc2_2 = Enc(240, 48, 4, **kwargs)
 21 |         self.enc3_2 = Enc(144, 96, 6, **kwargs)
 22 |         self.enc4_2 = Enc(288, 192, 4, **kwargs)
 23 |         self.fca_2 = FCAttention(192, **kwargs)
 24 | 
 25 |         self.enc2_3 = Enc(240, 48, 4, **kwargs)
 26 |         self.enc3_3 = Enc(144, 96, 6, **kwargs)
 27 |         self.enc3_4 = Enc(288, 192, 4, **kwargs)
 28 |         self.fca_3 = FCAttention(192, **kwargs)
 29 | 
 30 |         self.enc2_1_reduce = ConvBnRelu(48, 32, 1, **kwargs)
 31 |         self.enc2_2_reduce = ConvBnRelu(48, 32, 1, **kwargs)
 32 |         self.enc2_3_reduce = ConvBnRelu(48, 32, 1, **kwargs)
 33 |         self.conv_fusion = ConvBnRelu(32, 32, 1, **kwargs)
 34 | 
 35 |         self.fca_1_reduce = ConvBnRelu(192, 32, 1, **kwargs)
 36 |         self.fca_2_reduce = ConvBnRelu(192, 32, 1, **kwargs)
 37 |         self.fca_3_reduce = ConvBnRelu(192, 32, 1, **kwargs)
 38 |         self.conv_out = nn.Conv2d(32, nclass, 1)
 39 | 
 40 |         self.dsn1 = dsn(192, nclass)
 41 |         self.dsn2 = dsn(192, nclass)
 42 | 
 43 |         self.__setattr__('exclusive', ['enc2_2', 'enc3_2', 'enc4_2', 'fca_2', 'enc2_3', 'enc3_3', 'enc3_4', 'fca_3',
 44 |                                        'enc2_1_reduce', 'enc2_2_reduce', 'enc2_3_reduce', 'conv_fusion', 'fca_1_reduce',
 45 |                                        'fca_2_reduce', 'fca_3_reduce', 'conv_out'])
 46 | 
 47 |     def forward(self, x):
 48 |         # backbone
 49 |         stage1_conv1 = self.backbone.conv1(x)
 50 |         stage1_enc2 = self.backbone.enc2(stage1_conv1)
 51 |         stage1_enc3 = self.backbone.enc3(stage1_enc2)
 52 |         stage1_enc4 = self.backbone.enc4(stage1_enc3)
 53 |         stage1_fca = self.backbone.fca(stage1_enc4)
 54 |         stage1_out = F.interpolate(stage1_fca, scale_factor=4, mode='bilinear', align_corners=True)
 55 | 
 56 |         dsn1 = self.dsn1(stage1_out)
 57 |         # stage2
 58 |         stage2_enc2 = self.enc2_2(torch.cat([stage1_enc2, stage1_out], dim=1))
 59 |         stage2_enc3 = self.enc3_2(torch.cat([stage1_enc3, stage2_enc2], dim=1))
 60 |         stage2_enc4 = self.enc4_2(torch.cat([stage1_enc4, stage2_enc3], dim=1))
 61 |         stage2_fca = self.fca_2(stage2_enc4)
 62 |         stage2_out = F.interpolate(stage2_fca, scale_factor=4, mode='bilinear', align_corners=True)
 63 | 
 64 |         dsn2 = self.dsn2(stage2_out)
 65 | 
 66 |         # stage3
 67 |         stage3_enc2 = self.enc2_3(torch.cat([stage2_enc2, stage2_out], dim=1))
 68 |         stage3_enc3 = self.enc3_3(torch.cat([stage2_enc3, stage3_enc2], dim=1))
 69 |         stage3_enc4 = self.enc3_4(torch.cat([stage2_enc4, stage3_enc3], dim=1))
 70 |         stage3_fca = self.fca_3(stage3_enc4)
 71 | 
 72 | 
 73 |         stage1_enc2_decoder = self.enc2_1_reduce(stage1_enc2)
 74 |         stage2_enc2_docoder = F.interpolate(self.enc2_2_reduce(stage2_enc2), scale_factor=2,
 75 |                                             mode='bilinear', align_corners=True)
 76 |         stage3_enc2_decoder = F.interpolate(self.enc2_3_reduce(stage3_enc2), scale_factor=4,
 77 |                                             mode='bilinear', align_corners=True)
 78 |         fusion = stage1_enc2_decoder + stage2_enc2_docoder + stage3_enc2_decoder
 79 |         fusion = self.conv_fusion(fusion)
 80 | 
 81 |         stage1_fca_decoder = F.interpolate(self.fca_1_reduce(stage1_fca), scale_factor=4,
 82 |                                            mode='bilinear', align_corners=True)
 83 |         stage2_fca_decoder = F.interpolate(self.fca_2_reduce(stage2_fca), scale_factor=8,
 84 |                                            mode='bilinear', align_corners=True)
 85 |         stage3_fca_decoder = F.interpolate(self.fca_3_reduce(stage3_fca), scale_factor=16,
 86 |                                            mode='bilinear', align_corners=True)
 87 |         fusion = fusion + stage1_fca_decoder + stage2_fca_decoder + stage3_fca_decoder
 88 | 
 89 |         outputs = list()
 90 |         out = self.conv_out(fusion)
 91 |         outputs.append(out)
 92 |         outputs.append(dsn1)
 93 |         outputs.append(dsn2)
 94 |         return outputs
 95 | 
 96 | def dfanet(num_classes=19, data_set="cityscapes"):
 97 |     return DFANet(num_classes)
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     i = torch.Tensor(1,3,512,512).cuda()
102 |     m = DFANet(19).cuda()
103 |     m.eval()
104 |     o = m(i)
105 |     print(o[0].size())
106 |     print("output length: ", len(o))


--------------------------------------------------------------------------------
/libs/models/DFSegNet.py:
--------------------------------------------------------------------------------
 1 | # Author: Xiangtai Li
 2 | # Email: lxtpku@pku.edu.cn
 3 | # Pytorch Implementation of DongFeng SegNet:
 4 | # Partial Order Pruning: for Best Speed/Accuracy Trade-off in Neural Architecture Search.
 5 | # The backbone is pretrained on ImageNet
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | from libs.core.operators import PSPModule, conv3x3, dsn
12 | from libs.models.backbone.dfnet import dfnetv2, dfnetv1
13 | 
14 | 
15 | class FusionNode(nn.Module):
16 |     def __init__(self, inplane):
17 |         super(FusionNode, self).__init__()
18 |         self.fusion = conv3x3(inplane*2, inplane)
19 | 
20 |     def forward(self, x):
21 |         x_h, x_l = x
22 |         size = x_l.size()[2:]
23 |         x_h = F.upsample(x_h, size, mode="bilinear", align_corners=True)
24 |         res = self.fusion(torch.cat([x_h,x_l],dim=1))
25 |         return res
26 | 
27 | 
28 | class DFSeg(nn.Module):
29 |     def __init__(self, nclass, type="dfv1"):
30 |         super(DFSeg, self).__init__()
31 | 
32 |         if type == "dfv1":
33 |             self.backbone = dfnetv1()
34 |         else:
35 |             self.backbone = dfnetv2()
36 | 
37 |         self.cc5 = nn.Conv2d(128,128,1)
38 |         self.cc4 = nn.Conv2d(256,128,1)
39 |         self.cc3 = nn.Conv2d(128,128,1)
40 | 
41 |         self.ppm = PSPModule(512,128)
42 | 
43 |         self.fn4 = FusionNode(128)
44 |         self.fn3 = FusionNode(128)
45 | 
46 |         self.fc = dsn(128, nclass)
47 | 
48 |     def forward(self, x):
49 |         x3,x4,x5 = self.backbone(x)
50 |         x5 = self.ppm(x5)
51 |         x5 = self.cc5(x5)
52 |         x4 = self.cc4(x4)
53 |         f4 = self.fn4([x5, x4])
54 |         x3 = self.cc3(x3)
55 |         out = self.fn3([f4, x3])
56 |         out = self.fc(out)
57 | 
58 |         return [out]
59 | 
60 | 
61 | def dfnetv1seg(num_classes=19, data_set="cityscapes"):
62 |     return DFSeg(num_classes,type="dfv1")
63 | 
64 | 
65 | def dfnetv2seg(num_classes=19, data_set="cityscapes"):
66 |     return DFSeg(num_classes,type="dfv2")
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     i = torch.Tensor(1,3,512,512).cuda()
71 |     m = DFSeg(19,"dfv2").cuda()
72 |     m.eval()
73 |     o = m(i)
74 |     print(o[0].size())


--------------------------------------------------------------------------------
/libs/models/ESPNet.py:
--------------------------------------------------------------------------------
  1 | # Author: "Sachin Mehta"
  2 | # ESPNet doesn't use pretrained backbone network while usually takes longer training time.
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | 
  7 | class CBR(nn.Module):
  8 |     '''
  9 |     This class defines the convolution layer with batch normalization and PReLU activation
 10 |     '''
 11 | 
 12 |     def __init__(self, nIn, nOut, kSize, stride=1):
 13 |         '''
 14 | 
 15 |         :param nIn: number of input channels
 16 |         :param nOut: number of output channels
 17 |         :param kSize: kernel size
 18 |         :param stride: stride rate for down-sampling. Default is 1
 19 |         '''
 20 |         super().__init__()
 21 |         padding = int((kSize - 1) / 2)
 22 |         # self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False)
 23 |         self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False)
 24 |         # self.conv1 = nn.Conv2d(nOut, nOut, (1, kSize), stride=1, padding=(0, padding), bias=False)
 25 |         self.bn = nn.BatchNorm2d(nOut, eps=1e-03)
 26 |         self.act = nn.PReLU(nOut)
 27 | 
 28 |     def forward(self, input):
 29 |         '''
 30 |         :param input: input feature map
 31 |         :return: transformed feature map
 32 |         '''
 33 |         output = self.conv(input)
 34 |         # output = self.conv1(output)
 35 |         output = self.bn(output)
 36 |         output = self.act(output)
 37 |         return output
 38 | 
 39 | 
 40 | class BR(nn.Module):
 41 |     '''
 42 |         This class groups the batch normalization and PReLU activation
 43 |     '''
 44 | 
 45 |     def __init__(self, nOut):
 46 |         '''
 47 |         :param nOut: output feature maps
 48 |         '''
 49 |         super().__init__()
 50 |         self.bn = nn.BatchNorm2d(nOut, eps=1e-03)
 51 |         self.act = nn.PReLU(nOut)
 52 | 
 53 |     def forward(self, input):
 54 |         '''
 55 |         :param input: input feature map
 56 |         :return: normalized and thresholded feature map
 57 |         '''
 58 |         output = self.bn(input)
 59 |         output = self.act(output)
 60 |         return output
 61 | 
 62 | 
 63 | class CB(nn.Module):
 64 |     '''
 65 |        This class groups the convolution and batch normalization
 66 |     '''
 67 | 
 68 |     def __init__(self, nIn, nOut, kSize, stride=1):
 69 |         '''
 70 |         :param nIn: number of input channels
 71 |         :param nOut: number of output channels
 72 |         :param kSize: kernel size
 73 |         :param stride: optinal stide for down-sampling
 74 |         '''
 75 |         super().__init__()
 76 |         padding = int((kSize - 1) / 2)
 77 |         self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False)
 78 |         self.bn = nn.BatchNorm2d(nOut, eps=1e-03)
 79 | 
 80 |     def forward(self, input):
 81 |         '''
 82 | 
 83 |         :param input: input feature map
 84 |         :return: transformed feature map
 85 |         '''
 86 |         output = self.conv(input)
 87 |         output = self.bn(output)
 88 |         return output
 89 | 
 90 | 
 91 | class C(nn.Module):
 92 |     '''
 93 |     This class is for a convolutional layer.
 94 |     '''
 95 | 
 96 |     def __init__(self, nIn, nOut, kSize, stride=1):
 97 |         '''
 98 | 
 99 |         :param nIn: number of input channels
100 |         :param nOut: number of output channels
101 |         :param kSize: kernel size
102 |         :param stride: optional stride rate for down-sampling
103 |         '''
104 |         super().__init__()
105 |         padding = int((kSize - 1) / 2)
106 |         self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False)
107 | 
108 |     def forward(self, input):
109 |         '''
110 |         :param input: input feature map
111 |         :return: transformed feature map
112 |         '''
113 |         output = self.conv(input)
114 |         return output
115 | 
116 | 
117 | class CDilated(nn.Module):
118 |     '''
119 |     This class defines the dilated convolution.
120 |     '''
121 | 
122 |     def __init__(self, nIn, nOut, kSize, stride=1, d=1):
123 |         '''
124 |         :param nIn: number of input channels
125 |         :param nOut: number of output channels
126 |         :param kSize: kernel size
127 |         :param stride: optional stride rate for down-sampling
128 |         :param d: optional dilation rate
129 |         '''
130 |         super().__init__()
131 |         padding = int((kSize - 1) / 2) * d
132 |         self.conv = nn.Conv2d(nIn, nOut, (kSize, kSize), stride=stride, padding=(padding, padding), bias=False,
133 |                               dilation=d)
134 | 
135 |     def forward(self, input):
136 |         '''
137 |         :param input: input feature map
138 |         :return: transformed feature map
139 |         '''
140 |         output = self.conv(input)
141 |         return output
142 | 
143 | 
144 | class DownSamplerB(nn.Module):
145 |     def __init__(self, nIn, nOut):
146 |         super().__init__()
147 |         n = int(nOut / 5)
148 |         n1 = nOut - 4 * n
149 |         self.c1 = C(nIn, n, 3, 2)
150 |         self.d1 = CDilated(n, n1, 3, 1, 1)
151 |         self.d2 = CDilated(n, n, 3, 1, 2)
152 |         self.d4 = CDilated(n, n, 3, 1, 4)
153 |         self.d8 = CDilated(n, n, 3, 1, 8)
154 |         self.d16 = CDilated(n, n, 3, 1, 16)
155 |         self.bn = nn.BatchNorm2d(nOut, eps=1e-3)
156 |         self.act = nn.PReLU(nOut)
157 | 
158 |     def forward(self, input):
159 |         output1 = self.c1(input)
160 |         d1 = self.d1(output1)
161 |         d2 = self.d2(output1)
162 |         d4 = self.d4(output1)
163 |         d8 = self.d8(output1)
164 |         d16 = self.d16(output1)
165 | 
166 |         add1 = d2
167 |         add2 = add1 + d4
168 |         add3 = add2 + d8
169 |         add4 = add3 + d16
170 | 
171 |         combine = torch.cat([d1, add1, add2, add3, add4], 1)
172 |         # combine_in_out = input + combine
173 |         output = self.bn(combine)
174 |         output = self.act(output)
175 |         return output
176 | 
177 | 
178 | class DilatedParllelResidualBlockB(nn.Module):
179 |     '''
180 |     This class defines the ESP block, which is based on the following principle
181 |         Reduce ---> Split ---> Transform --> Merge
182 |     '''
183 | 
184 |     def __init__(self, nIn, nOut, add=True):
185 |         '''
186 |         :param nIn: number of input channels
187 |         :param nOut: number of output channels
188 |         :param add: if true, add a residual connection through identity operation. You can use projection too as
189 |                 in ResNet paper, but we avoid to use it if the dimensions are not the same because we do not want to
190 |                 increase the module complexity
191 |         '''
192 |         super().__init__()
193 |         n = int(nOut / 5)
194 |         n1 = nOut - 4 * n
195 |         self.c1 = C(nIn, n, 1, 1)
196 |         self.d1 = CDilated(n, n1, 3, 1, 1)  # dilation rate of 2^0
197 |         self.d2 = CDilated(n, n, 3, 1, 2)  # dilation rate of 2^1
198 |         self.d4 = CDilated(n, n, 3, 1, 4)  # dilation rate of 2^2
199 |         self.d8 = CDilated(n, n, 3, 1, 8)  # dilation rate of 2^3
200 |         self.d16 = CDilated(n, n, 3, 1, 16)  # dilation rate of 2^4
201 |         self.bn = BR(nOut)
202 |         self.add = add
203 | 
204 |     def forward(self, input):
205 |         '''
206 |         :param input: input feature map
207 |         :return: transformed feature map
208 |         '''
209 |         # reduce
210 |         output1 = self.c1(input)
211 |         # split and transform
212 |         d1 = self.d1(output1)
213 |         d2 = self.d2(output1)
214 |         d4 = self.d4(output1)
215 |         d8 = self.d8(output1)
216 |         d16 = self.d16(output1)
217 | 
218 |         # heirarchical fusion for de-gridding
219 |         add1 = d2
220 |         add2 = add1 + d4
221 |         add3 = add2 + d8
222 |         add4 = add3 + d16
223 | 
224 |         # merge
225 |         combine = torch.cat([d1, add1, add2, add3, add4], 1)
226 | 
227 |         # if residual version
228 |         if self.add:
229 |             combine = input + combine
230 |         output = self.bn(combine)
231 |         return output
232 | 
233 | 
234 | class InputProjectionA(nn.Module):
235 |     '''
236 |     This class projects the input image to the same spatial dimensions as the feature map.
237 |     For example, if the input image is 512 x512 x3 and spatial dimensions of feature map size are 56x56xF, then
238 |     this class will generate an output of 56x56x3
239 |     '''
240 | 
241 |     def __init__(self, samplingTimes):
242 |         '''
243 |         :param samplingTimes: The rate at which you want to down-sample the image
244 |         '''
245 |         super().__init__()
246 |         self.pool = nn.ModuleList()
247 |         for i in range(0, samplingTimes):
248 |             # pyramid-based approach for down-sampling
249 |             self.pool.append(nn.AvgPool2d(3, stride=2, padding=1))
250 | 
251 |     def forward(self, input):
252 |         '''
253 |         :param input: Input RGB Image
254 |         :return: down-sampled image (pyramid-based approach)
255 |         '''
256 |         for pool in self.pool:
257 |             input = pool(input)
258 |         return input
259 | 
260 | 
261 | class ESPNet_Encoder(nn.Module):
262 |     '''
263 |     This class defines the ESPNet-C network in the paper
264 |     '''
265 | 
266 |     def __init__(self, classes=20, p=5, q=3):
267 |         '''
268 |         :param classes: number of classes in the dataset. Default is 20 for the cityscapes
269 |         :param p: depth multiplier
270 |         :param q: depth multiplier
271 |         '''
272 |         super().__init__()
273 |         self.level1 = CBR(3, 16, 3, 2)
274 |         self.sample1 = InputProjectionA(1)
275 |         self.sample2 = InputProjectionA(2)
276 | 
277 |         self.b1 = BR(16 + 3)
278 |         self.level2_0 = DownSamplerB(16 + 3, 64)
279 | 
280 |         self.level2 = nn.ModuleList()
281 |         for i in range(0, p):
282 |             self.level2.append(DilatedParllelResidualBlockB(64, 64))
283 |         self.b2 = BR(128 + 3)
284 | 
285 |         self.level3_0 = DownSamplerB(128 + 3, 128)
286 |         self.level3 = nn.ModuleList()
287 |         for i in range(0, q):
288 |             self.level3.append(DilatedParllelResidualBlockB(128, 128))
289 |         self.b3 = BR(256)
290 | 
291 |         self.classifier = C(256, classes, 1, 1)
292 | 
293 |     def forward(self, input):
294 |         '''
295 |         :param input: Receives the input RGB image
296 |         :return: the transformed feature map with spatial dimensions 1/8th of the input image
297 |         '''
298 |         output0 = self.level1(input)
299 |         inp1 = self.sample1(input)
300 |         inp2 = self.sample2(input)
301 | 
302 |         output0_cat = self.b1(torch.cat([output0, inp1], 1))
303 |         output1_0 = self.level2_0(output0_cat)  # down-sampled
304 | 
305 |         for i, layer in enumerate(self.level2):
306 |             if i == 0:
307 |                 output1 = layer(output1_0)
308 |             else:
309 |                 output1 = layer(output1)
310 | 
311 |         output1_cat = self.b2(torch.cat([output1, output1_0, inp2], 1))
312 | 
313 |         output2_0 = self.level3_0(output1_cat)  # down-sampled
314 |         for i, layer in enumerate(self.level3):
315 |             if i == 0:
316 |                 output2 = layer(output2_0)
317 |             else:
318 |                 output2 = layer(output2)
319 | 
320 |         output2_cat = self.b3(torch.cat([output2_0, output2], 1))
321 | 
322 |         classifier = self.classifier(output2_cat)
323 | 
324 |         return classifier
325 | 
326 | 
327 | class ESPNet(nn.Module):
328 |     '''
329 |     This class defines the ESPNet network
330 |     '''
331 | 
332 |     def __init__(self, classes=20, p=2, q=3, encoderFile=None):
333 |         '''
334 |         :param classes: number of classes in the dataset. Default is 20 for the cityscapes
335 |         :param p: depth multiplier
336 |         :param q: depth multiplier
337 |         :param encoderFile: pretrained encoder weights. Recall that we first trained the ESPNet-C and then attached the
338 |                             RUM-based light weight decoder. See paper for more details.
339 |         '''
340 |         super().__init__()
341 |         self.encoder = ESPNet_Encoder(classes, p, q)
342 |         if encoderFile != None:
343 |             self.encoder.load_state_dict(torch.load(encoderFile))
344 |             print('Encoder loaded!')
345 |         # load the encoder modules
346 |         self.modules = []
347 |         for i, m in enumerate(self.encoder.children()):
348 |             self.modules.append(m)
349 | 
350 |         # light-weight decoder
351 |         self.level3_C = C(128 + 3, classes, 1, 1)
352 |         self.br = nn.BatchNorm2d(classes, eps=1e-03)
353 |         self.conv = CBR(19 + classes, classes, 3, 1)
354 | 
355 |         self.up_l3 = nn.Sequential(
356 |             nn.ConvTranspose2d(classes, classes, 2, stride=2, padding=0, output_padding=0, bias=False))
357 |         self.combine_l2_l3 = nn.Sequential(BR(2 * classes),
358 |                                            DilatedParllelResidualBlockB(2 * classes, classes, add=False))
359 | 
360 |         self.up_l2 = nn.Sequential(
361 |             nn.ConvTranspose2d(classes, classes, 2, stride=2, padding=0, output_padding=0, bias=False), BR(classes))
362 | 
363 |         self.classifier = nn.ConvTranspose2d(classes, classes, 2, stride=2, padding=0, output_padding=0, bias=False)
364 | 
365 |     def forward(self, input):
366 |         '''
367 |         :param input: RGB image
368 |         :return: transformed feature map
369 |         '''
370 |         output0 = self.modules[0](input)
371 |         inp1 = self.modules[1](input)
372 |         inp2 = self.modules[2](input)
373 | 
374 |         output0_cat = self.modules[3](torch.cat([output0, inp1], 1))
375 |         output1_0 = self.modules[4](output0_cat)  # down-sampled
376 | 
377 |         for i, layer in enumerate(self.modules[5]):
378 |             if i == 0:
379 |                 output1 = layer(output1_0)
380 |             else:
381 |                 output1 = layer(output1)
382 | 
383 |         output1_cat = self.modules[6](torch.cat([output1, output1_0, inp2], 1))
384 | 
385 |         output2_0 = self.modules[7](output1_cat)  # down-sampled
386 |         for i, layer in enumerate(self.modules[8]):
387 |             if i == 0:
388 |                 output2 = layer(output2_0)
389 |             else:
390 |                 output2 = layer(output2)
391 | 
392 |         output2_cat = self.modules[9](torch.cat([output2_0, output2], 1))  # concatenate for feature map width expansion
393 | 
394 |         output2_c = self.up_l3(self.br(self.modules[10](output2_cat)))  # RUM
395 | 
396 |         output1_C = self.level3_C(output1_cat)  # project to C-dimensional space
397 |         comb_l2_l3 = self.up_l2(self.combine_l2_l3(torch.cat([output1_C, output2_c], 1)))  # RUM
398 | 
399 |         concat_features = self.conv(torch.cat([comb_l2_l3, output0_cat], 1))
400 | 
401 |         classifier = self.classifier(concat_features)
402 | 
403 |         out = []
404 |         out.append(classifier)
405 |         return out
406 | 
407 | 
408 | if __name__ == '__main__':
409 |     i = torch.Tensor(1,3,512,512).cuda()
410 |     m = ESPNet(19).cuda()
411 |     m.eval()
412 |     o = m(i)
413 |     print(o[0].size())


--------------------------------------------------------------------------------
/libs/models/FastSCNN.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | # FastSCNN doesn't use pretrained backbone network while usually takes longer training time.
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | class FastSCNN(nn.Module):
 10 |     def __init__(self, num_classes, aux=False):
 11 |         super(FastSCNN, self).__init__()
 12 |         self.aux = aux
 13 |         self.learning_to_downsample = LearningToDownsample(32, 48, 64)
 14 |         self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3])
 15 |         self.feature_fusion = FeatureFusionModule(64, 128, 128)
 16 |         self.classifier = Classifer(128, num_classes)
 17 |         if self.aux:
 18 |             self.auxlayer = nn.Sequential(
 19 |                 nn.Conv2d(64, 64, 3, padding=1, bias=False),
 20 |                 nn.BatchNorm2d(64),
 21 |                 nn.ReLU(True),
 22 |                 nn.Dropout(0.1),
 23 |                 nn.Conv2d(64, num_classes, 1)
 24 |             )
 25 | 
 26 |     def forward(self, x):
 27 |         higher_res_features = self.learning_to_downsample(x)
 28 |         x = self.global_feature_extractor(higher_res_features)
 29 |         x = self.feature_fusion(higher_res_features, x)
 30 |         x = self.classifier(x)
 31 |         outputs = []
 32 |         outputs.append(x)
 33 |         if self.aux:
 34 |             auxout = self.auxlayer(higher_res_features)
 35 |             outputs.append(auxout)
 36 |         return tuple(outputs)
 37 | 
 38 | 
 39 | class _ConvBNReLU(nn.Module):
 40 |     """Conv-BN-ReLU"""
 41 | 
 42 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, **kwargs):
 43 |         super(_ConvBNReLU, self).__init__()
 44 |         self.conv = nn.Sequential(
 45 |             nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
 46 |             nn.BatchNorm2d(out_channels),
 47 |             nn.ReLU(True)
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         return self.conv(x)
 52 | 
 53 | 
 54 | class _DSConv(nn.Module):
 55 |     """Depthwise Separable Convolutions"""
 56 | 
 57 |     def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
 58 |         super(_DSConv, self).__init__()
 59 |         self.conv = nn.Sequential(
 60 |             nn.Conv2d(dw_channels, dw_channels, 3, stride, 1, groups=dw_channels, bias=False),
 61 |             nn.BatchNorm2d(dw_channels),
 62 |             nn.ReLU(True),
 63 |             nn.Conv2d(dw_channels, out_channels, 1, bias=False),
 64 |             nn.BatchNorm2d(out_channels),
 65 |             nn.ReLU(True)
 66 |         )
 67 | 
 68 |     def forward(self, x):
 69 |         return self.conv(x)
 70 | 
 71 | 
 72 | class _DWConv(nn.Module):
 73 |     def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
 74 |         super(_DWConv, self).__init__()
 75 |         self.conv = nn.Sequential(
 76 |             nn.Conv2d(dw_channels, out_channels, 3, stride, 1, groups=dw_channels, bias=False),
 77 |             nn.BatchNorm2d(out_channels),
 78 |             nn.ReLU(True)
 79 |         )
 80 | 
 81 |     def forward(self, x):
 82 |         return self.conv(x)
 83 | 
 84 | 
 85 | class LinearBottleneck(nn.Module):
 86 |     """LinearBottleneck used in MobileNetV2"""
 87 | 
 88 |     def __init__(self, in_channels, out_channels, t=6, stride=2, **kwargs):
 89 |         super(LinearBottleneck, self).__init__()
 90 |         self.use_shortcut = stride == 1 and in_channels == out_channels
 91 |         self.block = nn.Sequential(
 92 |             # pw
 93 |             _ConvBNReLU(in_channels, in_channels * t, 1),
 94 |             # dw
 95 |             _DWConv(in_channels * t, in_channels * t, stride),
 96 |             # pw-linear
 97 |             nn.Conv2d(in_channels * t, out_channels, 1, bias=False),
 98 |             nn.BatchNorm2d(out_channels)
 99 |         )
100 | 
101 |     def forward(self, x):
102 |         out = self.block(x)
103 |         if self.use_shortcut:
104 |             out = x + out
105 |         return out
106 | 
107 | 
108 | class PyramidPooling(nn.Module):
109 |     """Pyramid pooling module"""
110 | 
111 |     def __init__(self, in_channels, out_channels, **kwargs):
112 |         super(PyramidPooling, self).__init__()
113 |         inter_channels = int(in_channels / 4)
114 |         self.conv1 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
115 |         self.conv2 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
116 |         self.conv3 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
117 |         self.conv4 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
118 |         self.out = _ConvBNReLU(in_channels * 2, out_channels, 1)
119 | 
120 |     def pool(self, x, size):
121 |         avgpool = nn.AdaptiveAvgPool2d(size)
122 |         return avgpool(x)
123 | 
124 |     def upsample(self, x, size):
125 |         return F.interpolate(x, size, mode='bilinear', align_corners=True)
126 | 
127 |     def forward(self, x):
128 |         size = x.size()[2:]
129 |         feat1 = self.upsample(self.conv1(self.pool(x, 1)), size)
130 |         feat2 = self.upsample(self.conv2(self.pool(x, 2)), size)
131 |         feat3 = self.upsample(self.conv3(self.pool(x, 3)), size)
132 |         feat4 = self.upsample(self.conv4(self.pool(x, 6)), size)
133 |         x = torch.cat([x, feat1, feat2, feat3, feat4], dim=1)
134 |         x = self.out(x)
135 |         return x
136 | 
137 | 
138 | class LearningToDownsample(nn.Module):
139 |     """Learning to downsample module"""
140 | 
141 |     def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64, **kwargs):
142 |         super(LearningToDownsample, self).__init__()
143 |         self.conv = _ConvBNReLU(3, dw_channels1, 3, 2)
144 |         self.dsconv1 = _DSConv(dw_channels1, dw_channels2, 2)
145 |         self.dsconv2 = _DSConv(dw_channels2, out_channels, 2)
146 | 
147 |     def forward(self, x):
148 |         x = self.conv(x)
149 |         x = self.dsconv1(x)
150 |         x = self.dsconv2(x)
151 |         return x
152 | 
153 | 
154 | class GlobalFeatureExtractor(nn.Module):
155 |     """Global feature extractor module"""
156 | 
157 |     def __init__(self, in_channels=64, block_channels=(64, 96, 128),
158 |                  out_channels=128, t=6, num_blocks=(3, 3, 3)):
159 |         super(GlobalFeatureExtractor, self).__init__()
160 |         self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], t, 2)
161 |         self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], t, 2)
162 |         self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], t, 1)
163 |         self.ppm = PyramidPooling(block_channels[2], out_channels)
164 | 
165 |     def _make_layer(self, block, inplanes, planes, blocks, t=6, stride=1):
166 |         layers = []
167 |         layers.append(block(inplanes, planes, t, stride))
168 |         for i in range(1, blocks):
169 |             layers.append(block(planes, planes, t, 1))
170 |         return nn.Sequential(*layers)
171 | 
172 |     def forward(self, x):
173 |         x = self.bottleneck1(x)
174 |         x = self.bottleneck2(x)
175 |         x = self.bottleneck3(x)
176 |         x = self.ppm(x)
177 |         return x
178 | 
179 | 
180 | class FeatureFusionModule(nn.Module):
181 |     """Feature fusion module"""
182 | 
183 |     def __init__(self, highter_in_channels, lower_in_channels, out_channels, scale_factor=4, **kwargs):
184 |         super(FeatureFusionModule, self).__init__()
185 |         self.scale_factor = scale_factor
186 |         self.dwconv = _DWConv(lower_in_channels, out_channels, 1)
187 |         self.conv_lower_res = nn.Sequential(
188 |             nn.Conv2d(out_channels, out_channels, 1),
189 |             nn.BatchNorm2d(out_channels)
190 |         )
191 |         self.conv_higher_res = nn.Sequential(
192 |             nn.Conv2d(highter_in_channels, out_channels, 1),
193 |             nn.BatchNorm2d(out_channels)
194 |         )
195 |         self.relu = nn.ReLU(True)
196 | 
197 |     def forward(self, higher_res_feature, lower_res_feature):
198 |         lower_res_feature = F.interpolate(lower_res_feature, scale_factor=4, mode='bilinear', align_corners=True)
199 |         lower_res_feature = self.dwconv(lower_res_feature)
200 |         lower_res_feature = self.conv_lower_res(lower_res_feature)
201 | 
202 |         higher_res_feature = self.conv_higher_res(higher_res_feature)
203 |         out = higher_res_feature + lower_res_feature
204 |         return self.relu(out)
205 | 
206 | 
207 | class Classifer(nn.Module):
208 |     """Classifer"""
209 |     def __init__(self, dw_channels, num_classes, stride=1, **kwargs):
210 |         super(Classifer, self).__init__()
211 |         self.dsconv1 = _DSConv(dw_channels, dw_channels, stride)
212 |         self.dsconv2 = _DSConv(dw_channels, dw_channels, stride)
213 |         self.conv = nn.Sequential(
214 |             nn.Dropout(0.1),
215 |             nn.Conv2d(dw_channels, num_classes, 1)
216 |         )
217 | 
218 |     def forward(self, x):
219 |         x = self.dsconv1(x)
220 |         x = self.dsconv2(x)
221 |         x = self.conv(x)
222 |         return x
223 | 
224 | 
225 | if __name__ == '__main__':
226 |     i = torch.Tensor(1,3,512,512).cuda()
227 |     m = FastSCNN(19).cuda()
228 |     m.eval()
229 |     o = m(i)
230 |     print(o[0].size())


--------------------------------------------------------------------------------
/libs/models/ICNet.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from libs.core.operators import ConvBnRelu
  8 | from libs.models.PSPNet import PSPHead_res50
  9 | 
 10 | 
 11 | class CascadeFeatureFusion(nn.Module):
 12 |     """CFF Unit"""
 13 |     def __init__(self, low_channels, high_channels, out_channels, nclass, norm_layer=nn.BatchNorm2d):
 14 |         super(CascadeFeatureFusion, self).__init__()
 15 |         self.conv_low = nn.Sequential(
 16 |             nn.Conv2d(low_channels, out_channels, 3, padding=2, dilation=2, bias=False),
 17 |             norm_layer(out_channels)
 18 |         )
 19 |         self.conv_high = nn.Sequential(
 20 |             nn.Conv2d(high_channels, out_channels, 1, bias=False),
 21 |             norm_layer(out_channels)
 22 |         )
 23 |         self.conv_low_cls = nn.Conv2d(out_channels, nclass, 1, bias=False)
 24 | 
 25 |     def forward(self, x_low, x_high):
 26 |         x_low = F.interpolate(x_low, size=x_high.size()[2:], mode='bilinear', align_corners=True)
 27 |         x_low = self.conv_low(x_low)
 28 |         x_high = self.conv_high(x_high)
 29 |         x = x_low + x_high
 30 |         x = F.relu(x, inplace=True)
 31 |         x_low_cls = self.conv_low_cls(x_low)
 32 | 
 33 |         return x, x_low_cls
 34 | 
 35 | 
 36 | class _ICHead(nn.Module):
 37 |     def __init__(self, nclass, norm_layer=nn.BatchNorm2d):
 38 |         super(_ICHead, self).__init__()
 39 |         self.cff_12 = CascadeFeatureFusion(128, 64, 128, nclass, norm_layer)
 40 |         self.cff_24 = CascadeFeatureFusion(256, 256, 128, nclass, norm_layer)
 41 | 
 42 |         self.conv_cls = nn.Conv2d(128, nclass, 1, bias=False)
 43 | 
 44 |     def forward(self, x_sub1, x_sub2, x_sub4):
 45 |         outputs = list()
 46 |         x_cff_24, x_24_cls = self.cff_24(x_sub4, x_sub2)
 47 |         outputs.append(x_24_cls)
 48 |         x_cff_12, x_12_cls = self.cff_12(x_cff_24, x_sub1)
 49 |         outputs.append(x_12_cls)
 50 | 
 51 |         up_x2 = F.interpolate(x_cff_12, scale_factor=2, mode='bilinear', align_corners=True)
 52 |         up_x2 = self.conv_cls(up_x2)
 53 |         outputs.append(up_x2)
 54 |         up_x8 = F.interpolate(up_x2, scale_factor=4, mode='bilinear', align_corners=True)
 55 |         outputs.append(up_x8)
 56 |         # 1 -> 1/4 -> 1/8 -> 1/16
 57 |         outputs.reverse()
 58 |         return outputs
 59 | 
 60 | 
 61 | class ICNet(nn.Module):
 62 |     def __init__(self, nclass):
 63 |         super(ICNet, self).__init__()
 64 |         self.conv_sub1 = nn.Sequential(
 65 |             ConvBnRelu(3, 32, 3, 2, 1),
 66 |             ConvBnRelu(32, 32, 3, 2, 1),
 67 |             ConvBnRelu(32, 64, 3, 2, 1)
 68 |         )
 69 |         self.backbone = PSPHead_res50()
 70 |         self.head = _ICHead(nclass)
 71 | 
 72 |         self.conv_sub4 = ConvBnRelu(512, 256, 1)
 73 |         self.conv_sub2 = ConvBnRelu(512, 256, 1)
 74 | 
 75 |     def forward(self, x):
 76 | 
 77 |         # sub 1
 78 |         x_sub1_out = self.conv_sub1(x)
 79 | 
 80 |         # sub 2
 81 |         x_sub2 = F.interpolate(x, scale_factor=0.5, mode='bilinear', align_corners=True)
 82 | 
 83 |         x = self.backbone.relu1(self.backbone.bn1(self.backbone.conv1(x_sub2)))
 84 |         x = self.backbone.relu2(self.backbone.bn2(self.backbone.conv2(x)))
 85 |         x = self.backbone.relu3(self.backbone.bn3(self.backbone.conv3(x)))
 86 |         x = self.backbone.maxpool(x)
 87 | 
 88 |         x = self.backbone.layer1(x)
 89 |         x_sub2_out = self.backbone.layer2(x)
 90 | 
 91 |         # sub 4
 92 |         x_sub4 = F.interpolate(x_sub2_out, scale_factor=0.5, mode='bilinear', align_corners=True)
 93 | 
 94 |         x = self.backbone.layer3(x_sub4)
 95 |         x = self.backbone.layer4(x)
 96 |         x_sub4_out = self.backbone.head(x)
 97 | 
 98 |         x_sub4_out = self.conv_sub4(x_sub4_out)
 99 |         x_sub2_out = self.conv_sub2(x_sub2_out)
100 | 
101 |         res = self.head(x_sub1_out, x_sub2_out, x_sub4_out)
102 | 
103 |         return res
104 | 
105 | 
106 | def icnet(num_classes=19, data_set="cityscape"):
107 |     return ICNet(num_classes)
108 | 
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     i = torch.Tensor(1,3,512,512).cuda()
113 |     m = ICNet(19).cuda()
114 |     m.eval()
115 |     res= m(i)
116 |     print("ICnet output length: ", len(res))
117 |     for i in res:
118 |         print(i.size())


--------------------------------------------------------------------------------
/libs/models/MSFNet.py:
--------------------------------------------------------------------------------
 1 | # Author: Xiangtai Li
 2 | # Email: lxtpku@pku.edu.cn
 3 | # Pytorch Implementation Of MSFNet: Real-Time Semantic Segmentation via Multiply Spatial Fusion Network(face++)
 4 | # I didn't include the boundaries information
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | 
10 | 
11 | class MSFNet(nn.Module):
12 |     def __init__(self):
13 |         super(MSFNet, self).__init__()
14 | 
15 | 
16 |     def forward(self, x):
17 |         pass
18 | 
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     i = torch.Tensor(1, 3, 512, 512).cuda()
23 |     m = MSFNet().cuda()
24 |     m.eval()
25 |     o = m(i)
26 |     print(o[0].size())
27 |     print("output length: ", len(o))


--------------------------------------------------------------------------------
/libs/models/PSPNet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from torch.nn import functional as F
  3 | import torch
  4 | affine_par = True
  5 | 
  6 | from torch.nn import BatchNorm2d
  7 | 
  8 | 
  9 | def conv3x3(in_planes, out_planes, stride=1):
 10 |     "3x3 convolution with padding"
 11 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 12 |                      padding=1, bias=False)
 13 | 
 14 | 
 15 | class Bottleneck(nn.Module):
 16 |     expansion = 4
 17 |     def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1):
 18 |         super(Bottleneck, self).__init__()
 19 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 20 |         self.bn1 = BatchNorm2d(planes)
 21 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 22 |                                padding=dilation*multi_grid, dilation=dilation*multi_grid, bias=False)
 23 |         self.bn2 = BatchNorm2d(planes)
 24 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 25 |         self.bn3 = BatchNorm2d(planes * 4)
 26 |         self.relu = nn.ReLU(inplace=False)
 27 |         self.relu_inplace = nn.ReLU(inplace=True)
 28 |         self.downsample = downsample
 29 |         self.dilation = dilation
 30 |         self.stride = stride
 31 | 
 32 |     def forward(self, x):
 33 |         residual = x
 34 | 
 35 |         out = self.conv1(x)
 36 |         out = self.bn1(out)
 37 |         out = self.relu(out)
 38 | 
 39 |         out = self.conv2(out)
 40 |         out = self.bn2(out)
 41 |         out = self.relu(out)
 42 | 
 43 |         out = self.conv3(out)
 44 |         out = self.bn3(out)
 45 | 
 46 |         if self.downsample is not None:
 47 |             residual = self.downsample(x)
 48 | 
 49 |         out = out + residual
 50 |         out = self.relu_inplace(out)
 51 | 
 52 |         return out
 53 | 
 54 | class PSPModule(nn.Module):
 55 |     """
 56 |     Reference:
 57 |         Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
 58 |     """
 59 |     def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
 60 |         super(PSPModule, self).__init__()
 61 | 
 62 |         self.stages = []
 63 |         self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
 64 |         self.bottleneck = nn.Sequential(
 65 |             nn.Conv2d(features+len(sizes)*out_features, out_features, kernel_size=3, padding=1, dilation=1, bias=False),
 66 |             BatchNorm2d(out_features),
 67 |             nn.ReLU(),
 68 |             nn.Dropout2d(0.1)
 69 |             )
 70 | 
 71 |     def _make_stage(self, features, out_features, size):
 72 |         prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
 73 |         conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
 74 |         bn = BatchNorm2d(out_features)
 75 |         return nn.Sequential(prior, conv, bn)
 76 | 
 77 |     def forward(self, feats):
 78 |         h, w = feats.size(2), feats.size(3)
 79 |         priors = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages] + [feats]
 80 |         bottle = self.bottleneck(torch.cat(priors, 1))
 81 |         return bottle
 82 | 
 83 | 
 84 | class ResNet(nn.Module):
 85 |     def __init__(self, block, layers, num_classes):
 86 |         self.inplanes = 128
 87 |         super(ResNet, self).__init__()
 88 |         self.conv1 = conv3x3(3, 64, stride=2)
 89 |         self.bn1 = BatchNorm2d(64)
 90 |         self.relu1 = nn.ReLU(inplace=False)
 91 |         self.conv2 = conv3x3(64, 64)
 92 |         self.bn2 = BatchNorm2d(64)
 93 |         self.relu2 = nn.ReLU(inplace=False)
 94 |         self.conv3 = conv3x3(64, 128)
 95 |         self.bn3 = BatchNorm2d(128)
 96 |         self.relu3 = nn.ReLU(inplace=False)
 97 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 98 | 
 99 |         self.relu = nn.ReLU(inplace=False)
100 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change
101 |         self.layer1 = self._make_layer(block, 64, layers[0])
102 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
103 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
104 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1,1,1))
105 | 
106 |         self.head = nn.Sequential(PSPModule(2048, 512),
107 |             nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True))
108 | 
109 |         self.dsn = nn.Sequential(
110 |             nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),
111 |             BatchNorm2d(512),
112 |             nn.ReLU(),
113 |             nn.Dropout2d(0.1),
114 |             nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True)
115 |             )
116 | 
117 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
118 |         downsample = None
119 |         if stride != 1 or self.inplanes != planes * block.expansion:
120 |             downsample = nn.Sequential(
121 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
122 |                           kernel_size=1, stride=stride, bias=False),
123 |                 BatchNorm2d(planes * block.expansion, affine=affine_par))
124 | 
125 |         layers = []
126 |         generate_multi_grid = lambda index, grids: grids[index%len(grids)] if isinstance(grids, tuple) else 1
127 |         layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample, multi_grid=generate_multi_grid(0, multi_grid)))
128 |         self.inplanes = planes * block.expansion
129 |         for i in range(1, blocks):
130 |             layers.append(block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
131 | 
132 |         return nn.Sequential(*layers)
133 | 
134 |     def forward(self, x):
135 |         x = self.relu1(self.bn1(self.conv1(x)))
136 |         x = self.relu2(self.bn2(self.conv2(x)))
137 |         x = self.relu3(self.bn3(self.conv3(x)))
138 |         x = self.maxpool(x)
139 |         x = self.layer1(x)
140 |         x = self.layer2(x)
141 |         x = self.layer3(x)
142 |         x_dsn = None
143 |         if self.training:
144 |             x_dsn = self.dsn(x)
145 |         x = self.layer4(x)
146 |         x = self.head(x)
147 |         if self.training:
148 |             return [x, x_dsn]
149 |         else:
150 |             return [x]
151 | 
152 | 
153 | class PSPHead(nn.Module):
154 |     """
155 |         Used for ICNet
156 |     """
157 |     def __init__(self, block, layers):
158 |         self.inplanes = 128
159 |         super(PSPHead, self).__init__()
160 |         self.conv1 = conv3x3(3, 64, stride=2)
161 |         self.bn1 = BatchNorm2d(64)
162 |         self.relu1 = nn.ReLU(inplace=False)
163 |         self.conv2 = conv3x3(64, 64)
164 |         self.bn2 = BatchNorm2d(64)
165 |         self.relu2 = nn.ReLU(inplace=False)
166 |         self.conv3 = conv3x3(64, 128)
167 |         self.bn3 = BatchNorm2d(128)
168 |         self.relu3 = nn.ReLU(inplace=False)
169 | 
170 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)  # change
171 | 
172 |         self.layer1 = self._make_layer(block, 64, layers[0])
173 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
174 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
175 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1))
176 |         self.head = PSPModule(2048, 512)
177 | 
178 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
179 |         downsample = None
180 |         if stride != 1 or self.inplanes != planes * block.expansion:
181 |             downsample = nn.Sequential(
182 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
183 |                           kernel_size=1, stride=stride, bias=False),
184 |                 BatchNorm2d(planes * block.expansion, affine=affine_par))
185 | 
186 |         layers = []
187 |         generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
188 |         layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
189 |                             multi_grid=generate_multi_grid(0, multi_grid)))
190 |         self.inplanes = planes * block.expansion
191 |         for i in range(1, blocks):
192 |             layers.append(
193 |                 block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
194 | 
195 |         return nn.Sequential(*layers)
196 | 
197 |     def forward(self, x):
198 |         x = self.relu1(self.bn1(self.conv1(x)))
199 |         x = self.relu2(self.bn2(self.conv2(x)))
200 |         x = self.relu3(self.bn3(self.conv3(x)))
201 |         x = self.maxpool(x)
202 |         x = self.layer1(x)
203 |         x = self.layer2(x)
204 |         x = self.layer3(x)
205 |         x = self.layer4(x)
206 |         x = self.head(x)
207 |         return x
208 | 
209 | 
210 | def PSPNet_res101(num_classes=21):
211 |     model = ResNet(Bottleneck,[3, 4, 23, 3], num_classes)
212 |     return model
213 | 
214 | 
215 | def PSPNet_res50(num_classes=21):
216 |     model = ResNet(Bottleneck,[3, 4, 6, 3], num_classes)
217 |     return model
218 | 
219 | 
220 | def PSPHead_res50():
221 |     model = PSPHead(Bottleneck,[3, 4, 6, 3])
222 |     return model
223 | 
224 | 
225 | if __name__ == '__main__':
226 |     i = torch.Tensor(1,3,769,769).cuda()
227 |     model = PSPNet_res101(19)
228 |     model.eval()
229 |     o = model(i)
230 |     print(o[0].size())
231 |     print(o[1].size())


--------------------------------------------------------------------------------
/libs/models/SwiftNet.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | """
  4 |     SwiftNet is a little different
  5 |     1. because it use the pre-activation input as lateral feature input.
  6 |     The backbone need writing for easier experiment
  7 |     2. I also add dsn head for easier training during the decoder upsample process.
  8 |     3. SwiftNet use torch pretrained backbone.
  9 | """
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | import torch.utils.model_zoo as model_zoo
 15 | 
 16 | from libs.core.operators import dsn, upsample, conv3x3
 17 | 
 18 | model_urls = {
 19 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 20 | }
 21 | 
 22 | 
 23 | class BasicBlock(nn.Module):
 24 |     expansion = 1
 25 | 
 26 |     def __init__(self, inplanes, planes, stride=1, downsample=None, efficient=True, use_bn=True):
 27 |         super(BasicBlock, self).__init__()
 28 |         self.use_bn = use_bn
 29 |         self.conv1 = conv3x3(inplanes, planes, stride)
 30 |         self.bn1 = nn.BatchNorm2d(planes) if self.use_bn else None
 31 |         self.relu = nn.ReLU(inplace=True)
 32 |         self.conv2 = conv3x3(planes, planes)
 33 |         self.bn2 = nn.BatchNorm2d(planes) if self.use_bn else None
 34 |         self.downsample = downsample
 35 |         self.stride = stride
 36 |         self.efficient = efficient
 37 | 
 38 |     def forward(self, x):
 39 |         residual = x
 40 |         out = self.conv1(x)
 41 |         out = self.bn1(out)
 42 |         out = self.relu(out)
 43 | 
 44 |         out = self.conv2(out)
 45 |         out = self.bn2(out)
 46 | 
 47 |         if self.downsample is not None:
 48 |             residual = self.downsample(x)
 49 | 
 50 |         out = out + residual
 51 |         relu = self.relu(out)
 52 | 
 53 |         return relu, out
 54 | 
 55 | 
 56 | class SwiftNetResNet(nn.Module):
 57 |     def __init__(self, block, layers, num_features=19, k_up=3, efficient=True, use_bn=True,
 58 |                  spp_grids=(8, 4, 2, 1), spp_square_grid=False):
 59 |         super(SwiftNetResNet, self).__init__()
 60 |         self.inplanes = 64
 61 |         self.efficient = efficient
 62 |         self.nclass = num_features
 63 |         self.use_bn = use_bn
 64 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 65 |                                bias=False)
 66 |         self.bn1 = nn.BatchNorm2d(64) if self.use_bn else lambda x: x
 67 |         self.relu = nn.ReLU(inplace=True)
 68 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 69 |         upsamples = []
 70 |         self.layer1 = self._make_layer(block, 64, layers[0])
 71 |         upsamples += [_Upsample(num_features, self.inplanes, num_features, use_bn=self.use_bn, k=k_up)]
 72 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 73 |         upsamples += [_Upsample(num_features, self.inplanes, num_features, use_bn=self.use_bn, k=k_up)]
 74 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 75 |         upsamples += [_Upsample(num_features, self.inplanes, num_features, use_bn=self.use_bn, k=k_up)]
 76 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 77 | 
 78 |         self.fine_tune = [self.conv1, self.maxpool, self.layer1, self.layer2, self.layer3, self.layer4]
 79 |         if self.use_bn:
 80 |             self.fine_tune += [self.bn1]
 81 | 
 82 |         num_levels = 3
 83 |         self.spp_size = num_features
 84 |         bt_size = self.spp_size
 85 | 
 86 |         level_size = self.spp_size // num_levels
 87 | 
 88 |         self.dsn = dsn(256, self.nclass)
 89 | 
 90 |         self.spp = SpatialPyramidPooling(self.inplanes, num_levels, bt_size=bt_size, level_size=level_size,
 91 |                                          out_size=self.spp_size, grids=spp_grids, square_grid=spp_square_grid,
 92 |                                          bn_momentum=0.01 / 2, use_bn=self.use_bn)
 93 |         self.upsample = nn.ModuleList(list(reversed(upsamples)))
 94 | 
 95 |         self.random_init = [self.spp, self.upsample]
 96 | 
 97 |         self.num_features = num_features
 98 | 
 99 |         for m in self.modules():
100 |             if isinstance(m, nn.Conv2d):
101 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
102 |             elif isinstance(m, nn.BatchNorm2d):
103 |                 nn.init.constant_(m.weight, 1)
104 |                 nn.init.constant_(m.bias, 0)
105 | 
106 |     def _make_layer(self, block, planes, blocks, stride=1):
107 |         downsample = None
108 |         if stride != 1 or self.inplanes != planes * block.expansion:
109 |             layers = [nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False)]
110 |             if self.use_bn:
111 |                 layers += [nn.BatchNorm2d(planes * block.expansion)]
112 |             downsample = nn.Sequential(*layers)
113 |         layers = [block(self.inplanes, planes, stride, downsample, efficient=self.efficient, use_bn=self.use_bn)]
114 |         self.inplanes = planes * block.expansion
115 |         for i in range(1, blocks):
116 |             layers += [block(self.inplanes, planes, efficient=self.efficient, use_bn=self.use_bn)]
117 | 
118 |         return nn.Sequential(*layers)
119 | 
120 | 
121 |     def forward_resblock(self, x, layers):
122 |         skip = None
123 |         for l in layers:
124 |             x = l(x)
125 |             if isinstance(x, tuple):
126 |                 x, skip = x
127 |         return x, skip
128 | 
129 |     def forward_down(self, image):
130 |         x = self.conv1(image)
131 |         x = self.bn1(x)
132 |         x = self.relu(x)
133 |         x = self.maxpool(x)
134 | 
135 |         features = []
136 |         x, skip = self.forward_resblock(x, self.layer1)
137 |         features += [skip]
138 |         x, skip = self.forward_resblock(x, self.layer2)
139 |         features += [skip]
140 |         x, skip = self.forward_resblock(x, self.layer3)
141 |         features += [skip]
142 | 
143 |         dsn = None
144 |         if self.training:
145 |             dsn = self.dsn(x)
146 |         x, skip = self.forward_resblock(x, self.layer4)
147 | 
148 |         features += [self.spp.forward(skip)]
149 |         if self.training:
150 |             return features, dsn
151 |         else:
152 |             return features
153 | 
154 |     def forward_up(self, features):
155 |         features = features[::-1]
156 | 
157 |         x = features[0]
158 | 
159 |         upsamples = []
160 |         for skip, up in zip(features[1:], self.upsample):
161 |             x = up(x, skip)
162 |             upsamples += [x]
163 |         return [x]
164 | 
165 |     def forward(self, x):
166 |         dsn = None
167 |         if self.training:
168 |             features, dsn = self.forward_down(x)
169 |         else:
170 |             features = self.forward_down(x)
171 | 
172 |         res = self.forward_up(features)
173 | 
174 |         if self.training:
175 |             res.append(dsn)
176 |         return res
177 | 
178 | 
179 | class SpatialPyramidPooling(nn.Module):
180 |     """
181 |         SPP module is little different from ppm by inserting middle level feature to save the computation and  memory.
182 |     """
183 |     def __init__(self, num_maps_in, num_levels, bt_size=512, level_size=128, out_size=128,
184 |                  grids=(6, 3, 2, 1), square_grid=False, bn_momentum=0.1, use_bn=True):
185 |         super(SpatialPyramidPooling, self).__init__()
186 |         self.grids = grids
187 |         self.square_grid = square_grid
188 |         self.spp = nn.Sequential()
189 |         self.spp.add_module('spp_bn',
190 |                             _BNReluConv(num_maps_in, bt_size, k=1, bn_momentum=bn_momentum, batch_norm=use_bn))
191 |         num_features = bt_size
192 |         final_size = num_features
193 |         for i in range(num_levels):
194 |             final_size += level_size
195 |             self.spp.add_module('spp' + str(i),
196 |                                 _BNReluConv(num_features, level_size, k=1, bn_momentum=bn_momentum, batch_norm=use_bn))
197 |         self.spp.add_module('spp_fuse',
198 |                             _BNReluConv(final_size, out_size, k=1, bn_momentum=bn_momentum, batch_norm=use_bn))
199 | 
200 |     def forward(self, x):
201 |         levels = []
202 |         target_size = x.size()[2:4]
203 | 
204 |         ar = target_size[1] / target_size[0]
205 | 
206 |         x = self.spp[0].forward(x)
207 |         levels.append(x)
208 |         num = len(self.spp) - 1
209 | 
210 |         for i in range(1, num):
211 |             if not self.square_grid:
212 |                 grid_size = (self.grids[i - 1], max(1, round(ar * self.grids[i - 1])))
213 |                 x_pooled = F.adaptive_avg_pool2d(x, grid_size)
214 |             else:
215 |                 x_pooled = F.adaptive_avg_pool2d(x, self.grids[i - 1])
216 |             level = self.spp[i].forward(x_pooled)
217 | 
218 |             level = upsample(level, target_size)
219 |             levels.append(level)
220 |         x = torch.cat(levels, 1)
221 |         x = self.spp[-1].forward(x)
222 |         return x
223 | 
224 | 
225 | class _BNReluConv(nn.Sequential):
226 |     def __init__(self, num_maps_in, num_maps_out, k=3, batch_norm=True, bn_momentum=0.1, bias=False, dilation=1):
227 |         super(_BNReluConv, self).__init__()
228 |         if batch_norm:
229 |             self.add_module('norm', nn.BatchNorm2d(num_maps_in, momentum=bn_momentum))
230 |         self.add_module('relu', nn.ReLU(inplace=batch_norm is True))
231 |         padding = k // 2
232 |         self.add_module('conv', nn.Conv2d(num_maps_in, num_maps_out,
233 |                                           kernel_size=k, padding=padding, bias=bias, dilation=dilation))
234 | 
235 | 
236 | class _Upsample(nn.Module):
237 |     def __init__(self, num_maps_in, skip_maps_in, num_maps_out, use_bn=True, k=3):
238 |         super(_Upsample, self).__init__()
239 |         print(f'Upsample layer: in = {num_maps_in}, skip = {skip_maps_in}, out = {num_maps_out}')
240 |         self.bottleneck = _BNReluConv(skip_maps_in, num_maps_in, k=1, batch_norm=use_bn)
241 |         self.blend_conv = _BNReluConv(num_maps_in, num_maps_out, k=k, batch_norm=use_bn)
242 | 
243 |     def forward(self, x, skip):
244 |         skip = self.bottleneck.forward(skip)
245 |         skip_size = skip.size()[2:4]
246 |         x = upsample(x, skip_size)
247 |         x = x + skip
248 |         x = self.blend_conv.forward(x)
249 |         return x
250 | 
251 | 
252 | def SwiftNetRes18(nclass=19, pretrained=True, **kwargs):
253 |     """Constructs a ResNet-18 model.
254 |     Args:
255 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
256 |     """
257 |     model = SwiftNetResNet(BasicBlock, [2, 2, 2, 2], nclass, **kwargs)
258 |     if pretrained:
259 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), strict=False)
260 |     return model
261 | 
262 | 
263 | if __name__ == '__main__':
264 |     i = torch.Tensor(1, 3, 512, 512).cuda()
265 |     m = SwiftNetRes18(pretrained=False).cuda()
266 |     m.eval()
267 |     o = m(i)
268 |     print(o[0].size())
269 |     print("output length: ", len(o))


--------------------------------------------------------------------------------
/libs/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .PSPNet import *
2 | from .DFSegNet import *
3 | from .ICNet import *
4 | from .FastSCNN import *
5 | from .SwiftNet import *
6 | from .ESPNet import *


--------------------------------------------------------------------------------
/libs/models/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/models/backbone/__init__.py


--------------------------------------------------------------------------------
/libs/models/backbone/dfnet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division, absolute_import
  2 | 
  3 | import math
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.nn import BatchNorm2d
  8 | 
  9 | __all__ = ["dfnetv1", "dfnetv2"]
 10 | 
 11 | 
 12 | def conv3x3(in_planes, out_planes, stride=1):
 13 |     "3x3 convolution with padding"
 14 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 15 |                      padding=1, bias=False)
 16 | 
 17 | 
 18 | class BasicBlock(nn.Module):
 19 |     expansion = 1
 20 | 
 21 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 22 |         super(BasicBlock, self).__init__()
 23 |         self.conv1 = conv3x3(inplanes, planes, stride)
 24 |         self.bn1 = BatchNorm2d(planes)
 25 |         self.relu = nn.ReLU(inplace=True)
 26 |         self.conv2 = conv3x3(planes, planes)
 27 |         self.bn2 = BatchNorm2d(planes)
 28 |         self.downsample = downsample
 29 |         self.stride = stride
 30 | 
 31 |     def forward(self, x):
 32 |         residual = x
 33 | 
 34 |         out = self.conv1(x)
 35 |         out = self.bn1(out)
 36 |         out = self.relu(out)
 37 | 
 38 |         out = self.conv2(out)
 39 |         out = self.bn2(out)
 40 | 
 41 |         if self.downsample is not None:
 42 |             residual = self.downsample(x)
 43 | 
 44 |         out += residual
 45 |         out = self.relu(out)
 46 | 
 47 |         return out
 48 | 
 49 | 
 50 | class dfnetv1(nn.Module):
 51 |     def __init__(self, num_classes=1000):
 52 |         super(dfnetv1, self).__init__()
 53 |         self.inplanes = 64
 54 |         self.stage1 = nn.Sequential(
 55 |             nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=2, bias=False),
 56 |             BatchNorm2d(32),
 57 |             nn.ReLU(inplace=True),
 58 |             nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=2, bias=False),
 59 |             BatchNorm2d(64),
 60 |             nn.ReLU(inplace=True)
 61 |         )
 62 | 
 63 |         self.stage2 = self._make_layer(64, 3, stride=2)
 64 |         self.stage3 = self._make_layer(128, 3, stride=2)
 65 |         self.stage4 = self._make_layer(256, 3, stride=2)
 66 |         self.stage5 = self._make_layer(512, 1, stride=1)
 67 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 68 |         self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes)
 69 | 
 70 |         for m in self.modules():
 71 |             if isinstance(m, nn.Conv2d):
 72 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 73 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 74 |             elif isinstance(m, BatchNorm2d):
 75 |                 m.weight.data.fill_(1)
 76 |                 m.bias.data.zero_()
 77 | 
 78 |     def _make_layer(self, planes, blocks, stride=1):
 79 |         downsample = None
 80 |         if stride != 1 or self.inplanes != planes * BasicBlock.expansion:
 81 | 
 82 |             downsample = nn.Sequential(
 83 |                 nn.Conv2d(self.inplanes, planes * BasicBlock.expansion,
 84 |                           kernel_size=1, stride=stride, bias=False),
 85 |                 BatchNorm2d(planes * BasicBlock.expansion),
 86 |             )
 87 | 
 88 |         layers = []
 89 |         layers.append(BasicBlock(self.inplanes, planes, stride, downsample))
 90 |         self.inplanes = planes * BasicBlock.expansion
 91 |         for i in range(1, blocks):
 92 |             layers.append(BasicBlock(self.inplanes, planes))
 93 | 
 94 |         return nn.Sequential(*layers)
 95 | 
 96 |     def forward(self, x):
 97 |         x = self.stage1(x)  # 4x32
 98 |         x = self.stage2(x)  # 8x64
 99 |         x3 = self.stage3(x)  # 16x128
100 |         x4 = self.stage4(x3)  # 32x256
101 |         x5 = self.stage5(x4)  # 32x512
102 | 
103 |         return x3, x4, x5
104 | 
105 | 
106 | class dfnetv2(nn.Module):
107 |     def __init__(self, num_classes=1000):
108 |         super(dfnetv2, self).__init__()
109 |         self.inplanes = 64
110 |         self.stage1 = nn.Sequential(
111 |             nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=2, bias=False),
112 |             BatchNorm2d(32),
113 |             nn.ReLU(inplace=True),
114 |             nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=2, bias=False),
115 |             BatchNorm2d(64),
116 |             nn.ReLU(inplace=True)
117 |         )
118 | 
119 |         self.stage2_1 = self._make_layer(64, 2, stride=2)
120 |         self.stage2_2 = self._make_layer(128, 1, stride=1)
121 |         self.stage3_1 = self._make_layer(128, 10, stride=2)
122 |         self.stage3_2 = self._make_layer(256, 1, stride=1)
123 |         self.stage4_1 = self._make_layer(256, 4, stride=2)
124 |         self.stage4_2 = self._make_layer(512, 2, stride=1)
125 |         self.avgpool = nn.AvgPool2d(7, stride=1)
126 |         self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes)
127 | 
128 |         for m in self.modules():
129 |             if isinstance(m, nn.Conv2d):
130 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
131 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
132 |             elif isinstance(m, BatchNorm2d):
133 |                 m.weight.data.fill_(1)
134 |                 m.bias.data.zero_()
135 | 
136 |     def _make_layer(self, planes, blocks, stride=1):
137 |         downsample = None
138 |         if stride != 1 or self.inplanes != planes * BasicBlock.expansion:
139 | 
140 |             downsample = nn.Sequential(
141 |                 nn.Conv2d(self.inplanes, planes * BasicBlock.expansion,
142 |                           kernel_size=1, stride=stride, bias=False),
143 |                 BatchNorm2d(planes * BasicBlock.expansion),
144 |             )
145 | 
146 |         layers = []
147 |         layers.append(BasicBlock(self.inplanes, planes, stride, downsample))
148 |         self.inplanes = planes * BasicBlock.expansion
149 |         for i in range(1, blocks):
150 |             layers.append(BasicBlock(self.inplanes, planes))
151 | 
152 |         return nn.Sequential(*layers)
153 | 
154 |     def forward(self, x):
155 |         x = self.stage1(x)  # 4x32
156 |         x = self.stage2_1(x)  # 8x64
157 |         x3 = self.stage2_2(x)  # 8x64
158 |         x4 = self.stage3_1(x3)  # 16x128
159 |         x4 = self.stage3_2(x4)  # 16x128
160 |         x5 = self.stage4_1(x4)  # 32x256
161 |         x5 = self.stage4_2(x5)  # 32x256
162 |         return x3,x4,x5
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     i = torch.Tensor(1,3,512,512).cuda()
167 |     m = dfnetv2().cuda()
168 |     m(i)
169 | 
170 | 


--------------------------------------------------------------------------------
/libs/models/backbone/resnet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division, absolute_import
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | from libs.utils.tools import load_model
  6 | 
  7 | 
  8 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101']
  9 | 
 10 | 
 11 | def conv3x3(in_planes, out_planes, stride=1):
 12 |     """3x3 convolution with padding"""
 13 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 14 |                      padding=1, bias=False)
 15 | 
 16 | 
 17 | class BasicBlock(nn.Module):
 18 |     expansion = 1
 19 | 
 20 |     def __init__(self, inplanes, planes, stride=1, norm_layer=None,
 21 |                  bn_eps=1e-5, bn_momentum=0.1, downsample=None, inplace=True):
 22 |         super(BasicBlock, self).__init__()
 23 |         self.conv1 = conv3x3(inplanes, planes, stride)
 24 |         self.bn1 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum)
 25 |         self.relu = nn.ReLU(inplace=inplace)
 26 |         self.relu_inplace = nn.ReLU(inplace=True)
 27 |         self.conv2 = conv3x3(planes, planes)
 28 |         self.bn2 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum)
 29 |         self.downsample = downsample
 30 |         self.stride = stride
 31 |         self.inplace = inplace
 32 | 
 33 |     def forward(self, x):
 34 |         residual = x
 35 | 
 36 |         out = self.conv1(x)
 37 |         out = self.bn1(out)
 38 |         out = self.relu(out)
 39 | 
 40 |         out = self.conv2(out)
 41 |         out = self.bn2(out)
 42 | 
 43 |         if self.downsample is not None:
 44 |             residual = self.downsample(x)
 45 | 
 46 |         if self.inplace:
 47 |             out += residual
 48 |         else:
 49 |             out = out + residual
 50 | 
 51 |         out = self.relu_inplace(out)
 52 | 
 53 |         return out
 54 | 
 55 | 
 56 | class Bottleneck(nn.Module):
 57 |     expansion = 4
 58 | 
 59 |     def __init__(self, inplanes, planes, stride=1,
 60 |                  norm_layer=None, bn_eps=1e-5, bn_momentum=0.1,
 61 |                  downsample=None, inplace=True):
 62 |         super(Bottleneck, self).__init__()
 63 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 64 |         self.bn1 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum)
 65 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 66 |                                padding=1, bias=False)
 67 |         self.bn2 = norm_layer(planes, eps=bn_eps, momentum=bn_momentum)
 68 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 69 |                                bias=False)
 70 |         self.bn3 = norm_layer(planes * self.expansion, eps=bn_eps,
 71 |                               momentum=bn_momentum)
 72 |         self.relu = nn.ReLU(inplace=inplace)
 73 |         self.relu_inplace = nn.ReLU(inplace=True)
 74 |         self.downsample = downsample
 75 |         self.stride = stride
 76 |         self.inplace = inplace
 77 | 
 78 |     def forward(self, x):
 79 |         residual = x
 80 | 
 81 |         out = self.conv1(x)
 82 |         out = self.bn1(out)
 83 |         out = self.relu(out)
 84 | 
 85 |         out = self.conv2(out)
 86 |         out = self.bn2(out)
 87 |         out = self.relu(out)
 88 | 
 89 |         out = self.conv3(out)
 90 |         out = self.bn3(out)
 91 | 
 92 |         if self.downsample is not None:
 93 |             residual = self.downsample(x)
 94 | 
 95 |         if self.inplace:
 96 |             out += residual
 97 |         else:
 98 |             out = out + residual
 99 |         out = self.relu_inplace(out)
100 | 
101 |         return out
102 | 
103 | 
104 | class ResNet(nn.Module):
105 | 
106 |     def __init__(self, block, layers, norm_layer=nn.BatchNorm2d, bn_eps=1e-5,
107 |                  bn_momentum=0.1, deep_stem=False, stem_width=32, inplace=True):
108 |         self.inplanes = stem_width * 2 if deep_stem else 64
109 |         super(ResNet, self).__init__()
110 |         if deep_stem:
111 |             self.conv1 = nn.Sequential(
112 |                 nn.Conv2d(3, stem_width, kernel_size=3, stride=2, padding=1,
113 |                           bias=False),
114 |                 norm_layer(stem_width, eps=bn_eps, momentum=bn_momentum),
115 |                 nn.ReLU(inplace=inplace),
116 |                 nn.Conv2d(stem_width, stem_width, kernel_size=3, stride=1,
117 |                           padding=1,
118 |                           bias=False),
119 |                 norm_layer(stem_width, eps=bn_eps, momentum=bn_momentum),
120 |                 nn.ReLU(inplace=inplace),
121 |                 nn.Conv2d(stem_width, stem_width * 2, kernel_size=3, stride=1,
122 |                           padding=1,
123 |                           bias=False),
124 |             )
125 |         else:
126 |             self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
127 |                                    bias=False)
128 | 
129 |         self.bn1 = norm_layer(stem_width * 2 if deep_stem else 64, eps=bn_eps,
130 |                               momentum=bn_momentum)
131 |         self.relu = nn.ReLU(inplace=inplace)
132 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
133 |         self.layer1 = self._make_layer(block, norm_layer, 64, layers[0],
134 |                                        inplace,
135 |                                        bn_eps=bn_eps, bn_momentum=bn_momentum)
136 |         self.layer2 = self._make_layer(block, norm_layer, 128, layers[1],
137 |                                        inplace, stride=2,
138 |                                        bn_eps=bn_eps, bn_momentum=bn_momentum)
139 |         self.layer3 = self._make_layer(block, norm_layer, 256, layers[2],
140 |                                        inplace, stride=2,
141 |                                        bn_eps=bn_eps, bn_momentum=bn_momentum)
142 |         self.layer4 = self._make_layer(block, norm_layer, 512, layers[3],
143 |                                        inplace, stride=2,
144 |                                        bn_eps=bn_eps, bn_momentum=bn_momentum)
145 | 
146 |     def _make_layer(self, block, norm_layer, planes, blocks, inplace=True,
147 |                     stride=1, bn_eps=1e-5, bn_momentum=0.1):
148 |         downsample = None
149 |         if stride != 1 or self.inplanes != planes * block.expansion:
150 |             downsample = nn.Sequential(
151 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
152 |                           kernel_size=1, stride=stride, bias=False),
153 |                 norm_layer(planes * block.expansion, eps=bn_eps,
154 |                            momentum=bn_momentum),
155 |             )
156 | 
157 |         layers = []
158 |         layers.append(block(self.inplanes, planes, stride, norm_layer, bn_eps,
159 |                             bn_momentum, downsample, inplace))
160 |         self.inplanes = planes * block.expansion
161 |         for i in range(1, blocks):
162 |             layers.append(block(self.inplanes, planes,
163 |                                 norm_layer=norm_layer, bn_eps=bn_eps,
164 |                                 bn_momentum=bn_momentum, inplace=inplace))
165 | 
166 |         return nn.Sequential(*layers)
167 | 
168 |     def forward(self, x):
169 |         x = self.conv1(x)
170 |         x = self.bn1(x)
171 |         x = self.relu(x)
172 |         x = self.maxpool(x)
173 | 
174 |         layers = []
175 |         x = self.layer1(x)
176 |         layers.append(x)
177 |         x = self.layer2(x)
178 |         layers.append(x)
179 |         x = self.layer3(x)
180 |         layers.append(x)
181 |         x = self.layer4(x)
182 |         layers.append(x)
183 | 
184 |         return layers
185 | 
186 | 
187 | def resnet18(pretrained_model=None, **kwargs):
188 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
189 | 
190 |     if pretrained_model is not None:
191 |         model = load_model(model, pretrained_model)
192 |     return model
193 | 
194 | 
195 | def resnet34(pretrained_model=None, **kwargs):
196 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
197 | 
198 |     if pretrained_model is not None:
199 |         model = load_model(model, pretrained_model)
200 |     return model
201 | 
202 | 
203 | def resnet50(pretrained_model=None, **kwargs):
204 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
205 | 
206 |     if pretrained_model is not None:
207 |         model = load_model(model, pretrained_model)
208 |     return model
209 | 
210 | 
211 | def resnet101(pretrained_model=None, **kwargs):
212 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
213 | 
214 |     if pretrained_model is not None:
215 |         model = load_model(model, pretrained_model)
216 |     return model
217 | 
218 | 


--------------------------------------------------------------------------------
/libs/models/backbone/xception.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division, absolute_import
  2 | import torch.nn as nn
  3 | 
  4 | from libs.core.operators import ConvBnRelu, SeparableConv2d
  5 | from libs.utils.tools import load_model
  6 | 
  7 | __all__ = ['Xception', 'Xception39','XceptionA']
  8 | 
  9 | 
 10 | class SeparableConvBnRelu(nn.Module):
 11 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
 12 |                  padding=0, dilation=1,
 13 |                  has_relu=True, norm_layer=nn.BatchNorm2d):
 14 |         super(SeparableConvBnRelu, self).__init__()
 15 | 
 16 |         self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size, stride,
 17 |                                padding, dilation, groups=in_channels,
 18 |                                bias=False)
 19 |         self.point_wise_cbr = ConvBnRelu(in_channels, out_channels, 1, 1, 0,
 20 |                                          has_bn=True, norm_layer=norm_layer,
 21 |                                          has_relu=has_relu, has_bias=False)
 22 | 
 23 |     def forward(self, x):
 24 |         x = self.conv1(x)
 25 |         x = self.point_wise_cbr(x)
 26 |         return x
 27 | 
 28 | 
 29 | class Block(nn.Module):
 30 |     expansion = 4
 31 | 
 32 |     def __init__(self, in_channels, mid_out_channels, has_proj, stride,
 33 |                  dilation=1, norm_layer=nn.BatchNorm2d):
 34 |         super(Block, self).__init__()
 35 |         self.has_proj = has_proj
 36 | 
 37 |         if has_proj:
 38 |             self.proj = SeparableConvBnRelu(in_channels,
 39 |                                             mid_out_channels * self.expansion,
 40 |                                             3, stride, 1,
 41 |                                             has_relu=False,
 42 |                                             norm_layer=norm_layer)
 43 | 
 44 |         self.residual_branch = nn.Sequential(
 45 |             SeparableConvBnRelu(in_channels, mid_out_channels,
 46 |                                 3, stride, dilation, dilation,
 47 |                                 has_relu=True, norm_layer=norm_layer),
 48 |             SeparableConvBnRelu(mid_out_channels, mid_out_channels, 3, 1, 1,
 49 |                                 has_relu=True, norm_layer=norm_layer),
 50 |             SeparableConvBnRelu(mid_out_channels,
 51 |                                 mid_out_channels * self.expansion, 3, 1, 1,
 52 |                                 has_relu=False, norm_layer=norm_layer))
 53 |         self.relu = nn.ReLU(inplace=True)
 54 | 
 55 |     def forward(self, x):
 56 |         shortcut = x
 57 |         if self.has_proj:
 58 |             shortcut = self.proj(x)
 59 | 
 60 |         residual = self.residual_branch(x)
 61 |         output = self.relu(shortcut + residual)
 62 | 
 63 |         return output
 64 | 
 65 | 
 66 | class Xception(nn.Module):
 67 |     def __init__(self, block, layers, channels, norm_layer=nn.BatchNorm2d):
 68 |         super(Xception, self).__init__()
 69 | 
 70 |         self.in_channels = 8
 71 |         self.conv1 = ConvBnRelu(3, self.in_channels, 3, 2, 1,
 72 |                                 has_bn=True, norm_layer=norm_layer,
 73 |                                 has_relu=True, has_bias=False)
 74 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 75 | 
 76 |         self.layer1 = self._make_layer(block, norm_layer,
 77 |                                        layers[0], channels[0], stride=2)
 78 |         self.layer2 = self._make_layer(block, norm_layer,
 79 |                                        layers[1], channels[1], stride=2)
 80 |         self.layer3 = self._make_layer(block, norm_layer,
 81 |                                        layers[2], channels[2], stride=2)
 82 | 
 83 |     def _make_layer(self, block, norm_layer, blocks,
 84 |                     mid_out_channels, stride=1):
 85 |         layers = []
 86 |         has_proj = True if stride > 1 else False
 87 |         layers.append(block(self.in_channels, mid_out_channels, has_proj,
 88 |                             stride=stride, norm_layer=norm_layer))
 89 |         self.in_channels = mid_out_channels * block.expansion
 90 |         for i in range(1, blocks):
 91 |             layers.append(block(self.in_channels, mid_out_channels,
 92 |                                 has_proj=False, stride=1,
 93 |                                 norm_layer=norm_layer))
 94 | 
 95 |         return nn.Sequential(*layers)
 96 | 
 97 |     def forward(self, x):
 98 |         x = self.conv1(x)
 99 |         x = self.maxpool(x)
100 | 
101 |         blocks = []
102 |         x = self.layer1(x)
103 |         blocks.append(x)
104 |         x = self.layer2(x)
105 |         blocks.append(x)
106 |         x = self.layer3(x)
107 |         blocks.append(x)
108 | 
109 |         return blocks
110 | 
111 | 
112 | """
113 |     Xception39 is used for BiSeg Network
114 | """
115 | def Xception39(pretrained_model=None, **kwargs):
116 |     model = Xception(Block, [4, 8, 4], [16, 32, 64], **kwargs)
117 | 
118 |     if pretrained_model is not None:
119 |         model = load_model(model, pretrained_model)
120 |     return model
121 | 
122 | 
123 | class BlockA(nn.Module):
124 |     def __init__(self, in_channels, out_channels, stride=1, dilation=1, norm_layer=nn.BatchNorm2d, start_with_relu=True):
125 |         super(BlockA, self).__init__()
126 |         if out_channels != in_channels or stride != 1:
127 |             self.skip = nn.Conv2d(in_channels, out_channels, 1, stride, bias=False)
128 |             self.skipbn = norm_layer(out_channels)
129 |         else:
130 |             self.skip = None
131 |         self.relu = nn.ReLU()
132 |         rep = list()
133 |         inter_channels = out_channels // 4
134 | 
135 |         if start_with_relu:
136 |             rep.append(self.relu)
137 |         rep.append(SeparableConv2d(in_channels, inter_channels, 3, 1, dilation, norm_layer=norm_layer))
138 |         rep.append(norm_layer(inter_channels))
139 | 
140 |         rep.append(self.relu)
141 |         rep.append(SeparableConv2d(inter_channels, inter_channels, 3, 1, dilation, norm_layer=norm_layer))
142 |         rep.append(norm_layer(inter_channels))
143 | 
144 |         if stride != 1:
145 |             rep.append(self.relu)
146 |             rep.append(SeparableConv2d(inter_channels, out_channels, 3, stride, norm_layer=norm_layer))
147 |             rep.append(norm_layer(out_channels))
148 |         else:
149 |             rep.append(self.relu)
150 |             rep.append(SeparableConv2d(inter_channels, out_channels, 3, 1, norm_layer=norm_layer))
151 |             rep.append(norm_layer(out_channels))
152 |         self.rep = nn.Sequential(*rep)
153 | 
154 |     def forward(self, x):
155 |         out = self.rep(x)
156 |         if self.skip is not None:
157 |             skip = self.skipbn(self.skip(x))
158 |         else:
159 |             skip = x
160 |         out = out + skip
161 |         return out
162 | 
163 | 
164 | class Enc(nn.Module):
165 |     def __init__(self, in_channels, out_channels, blocks, norm_layer=nn.BatchNorm2d):
166 |         super(Enc, self).__init__()
167 |         block = list()
168 |         block.append(BlockA(in_channels, out_channels, 2, norm_layer=norm_layer))
169 |         for i in range(blocks - 1):
170 |             block.append(BlockA(out_channels, out_channels, 1, norm_layer=norm_layer))
171 |         self.block = nn.Sequential(*block)
172 | 
173 |     def forward(self, x):
174 |         return self.block(x)
175 | 
176 | 
177 | class FCAttention(nn.Module):
178 |     def __init__(self, in_channels, norm_layer=nn.BatchNorm2d):
179 |         super(FCAttention, self).__init__()
180 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
181 |         self.fc = nn.Linear(in_channels, 1000)
182 |         self.conv = nn.Sequential(
183 |             nn.Conv2d(1000, in_channels, 1, bias=False),
184 |             norm_layer(in_channels),
185 |             nn.ReLU())
186 | 
187 |     def forward(self, x):
188 |         n, c, _, _ = x.size()
189 |         att = self.avgpool(x).view(n, c)
190 |         att = self.fc(att).view(n, 1000, 1, 1)
191 |         att = self.conv(att)
192 |         return x * att.expand_as(x)
193 | 
194 | 
195 | """
196 |     XceptionA is used for DFANet  
197 | """
198 | 
199 | class XceptionA(nn.Module):
200 |     def __init__(self, num_classes=1000, norm_layer=nn.BatchNorm2d):
201 |         super(XceptionA, self).__init__()
202 |         self.conv1 = nn.Sequential(nn.Conv2d(3, 8, 3, 2, 1, bias=False),
203 |                                    norm_layer(8),
204 |                                    nn.ReLU())
205 | 
206 |         self.enc2 = Enc(8, 48, 4, norm_layer=norm_layer)
207 |         self.enc3 = Enc(48, 96, 6, norm_layer=norm_layer)
208 |         self.enc4 = Enc(96, 192, 4, norm_layer=norm_layer)
209 | 
210 |         self.fca = FCAttention(192, norm_layer=norm_layer)
211 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
212 |         self.fc = nn.Linear(192, num_classes)
213 | 
214 |     def forward(self, x):
215 |         x = self.conv1(x)
216 | 
217 |         x = self.enc2(x)
218 |         x = self.enc3(x)
219 |         x = self.enc4(x)
220 |         x = self.fca(x)
221 | 
222 |         x = self.avgpool(x)
223 |         x = x.view(x.size(0), -1)
224 |         x = self.fc(x)
225 | 
226 |         return x


--------------------------------------------------------------------------------
/libs/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/Fast_Seg/7895738fda6170837dd508389bf3ee9561eff28c/libs/utils/__init__.py


--------------------------------------------------------------------------------
/libs/utils/image_utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import numbers
  4 | import random
  5 | import collections
  6 | 
  7 | import torch
  8 | import torch.nn.functional as F
  9 | 
 10 | 
 11 | def get_2dshape(shape, *, zero=True):
 12 |     if not isinstance(shape, collections.Iterable):
 13 |         shape = int(shape)
 14 |         shape = (shape, shape)
 15 |     else:
 16 |         h, w = map(int, shape)
 17 |         shape = (h, w)
 18 |     if zero:
 19 |         minv = 0
 20 |     else:
 21 |         minv = 1
 22 | 
 23 |     assert min(shape) >= minv, 'invalid shape: {}'.format(shape)
 24 |     return shape
 25 | 
 26 | 
 27 | def random_crop_pad_to_shape(img, crop_pos, crop_size, pad_label_value):
 28 |     h, w = img.shape[:2]
 29 |     start_crop_h, start_crop_w = crop_pos
 30 |     assert ((start_crop_h < h) and (start_crop_h >= 0))
 31 |     assert ((start_crop_w < w) and (start_crop_w >= 0))
 32 | 
 33 |     crop_size = get_2dshape(crop_size)
 34 |     crop_h, crop_w = crop_size
 35 | 
 36 |     img_crop = img[start_crop_h:start_crop_h + crop_h,
 37 |                start_crop_w:start_crop_w + crop_w, ...]
 38 | 
 39 |     img_, margin = pad_image_to_shape(img_crop, crop_size, cv2.BORDER_CONSTANT,
 40 |                                       pad_label_value)
 41 | 
 42 |     return img_, margin
 43 | 
 44 | 
 45 | def generate_random_crop_pos(ori_size, crop_size):
 46 |     ori_size = get_2dshape(ori_size)
 47 |     h, w = ori_size
 48 | 
 49 |     crop_size = get_2dshape(crop_size)
 50 |     crop_h, crop_w = crop_size
 51 | 
 52 |     pos_h, pos_w = 0, 0
 53 | 
 54 |     if h > crop_h:
 55 |         pos_h = random.randint(0, h - crop_h + 1)
 56 | 
 57 |     if w > crop_w:
 58 |         pos_w = random.randint(0, w - crop_w + 1)
 59 | 
 60 |     return pos_h, pos_w
 61 | 
 62 | 
 63 | def pad_image_to_shape(img, shape, border_mode, value):
 64 |     margin = np.zeros(4, np.uint32)
 65 |     shape = get_2dshape(shape)
 66 |     pad_height = shape[0] - img.shape[0] if shape[0] - img.shape[0] > 0 else 0
 67 |     pad_width = shape[1] - img.shape[1] if shape[1] - img.shape[1] > 0 else 0
 68 | 
 69 |     margin[0] = pad_height // 2
 70 |     margin[1] = pad_height // 2 + pad_height % 2
 71 |     margin[2] = pad_width // 2
 72 |     margin[3] = pad_width // 2 + pad_width % 2
 73 | 
 74 |     img = cv2.copyMakeBorder(img, margin[0], margin[1], margin[2], margin[3],
 75 |                              border_mode, value=value)
 76 | 
 77 |     return img, margin
 78 | 
 79 | 
 80 | def pad_image_size_to_multiples_of(img, multiple, pad_value):
 81 |     h, w = img.shape[:2]
 82 |     d = multiple
 83 | 
 84 |     def canonicalize(s):
 85 |         v = s // d
 86 |         return (v + (v * d != s)) * d
 87 | 
 88 |     th, tw = map(canonicalize, (h, w))
 89 | 
 90 |     return pad_image_to_shape(img, (th, tw), cv2.BORDER_CONSTANT, pad_value)
 91 | 
 92 | 
 93 | def resize_ensure_shortest_edge(img, edge_length,
 94 |                                 interpolation_mode=cv2.INTER_LINEAR):
 95 |     assert isinstance(edge_length, int) and edge_length > 0, edge_length
 96 |     h, w = img.shape[:2]
 97 |     if h < w:
 98 |         ratio = float(edge_length) / h
 99 |         th, tw = edge_length, max(1, int(ratio * w))
100 |     else:
101 |         ratio = float(edge_length) / w
102 |         th, tw = max(1, int(ratio * h)), edge_length
103 |     img = cv2.resize(img, (tw, th), interpolation_mode)
104 | 
105 |     return img
106 | 
107 | 
108 | def random_scale(img, gt, scales):
109 |     scale = random.choice(scales)
110 |     sh = int(img.shape[0] * scale)
111 |     sw = int(img.shape[1] * scale)
112 |     img = cv2.resize(img, (sw, sh), interpolation=cv2.INTER_LINEAR)
113 |     gt = cv2.resize(gt, (sw, sh), interpolation=cv2.INTER_NEAREST)
114 | 
115 |     return img, gt, scale
116 | 
117 | 
118 | def random_scale_with_length(img, gt, length):
119 |     size = random.choice(length)
120 |     sh = size
121 |     sw = size
122 |     img = cv2.resize(img, (sw, sh), interpolation=cv2.INTER_LINEAR)
123 |     gt = cv2.resize(gt, (sw, sh), interpolation=cv2.INTER_NEAREST)
124 | 
125 |     return img, gt, size
126 | 
127 | 
128 | def random_mirror(img, gt):
129 |     if random.random() >= 0.5:
130 |         img = cv2.flip(img, 1)
131 |         gt = cv2.flip(gt, 1)
132 | 
133 |     return img, gt,
134 | 
135 | 
136 | def random_rotation(img, gt):
137 |     angle = random.random() * 20 - 10
138 |     h, w = img.shape[:2]
139 |     rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
140 |     img = cv2.warpAffine(img, rotation_matrix, (w, h), flags=cv2.INTER_LINEAR)
141 |     gt = cv2.warpAffine(gt, rotation_matrix, (w, h), flags=cv2.INTER_NEAREST)
142 | 
143 |     return img, gt
144 | 
145 | 
146 | def random_gaussian_blur(img):
147 |     gauss_size = random.choice([1, 3, 5, 7])
148 |     if gauss_size > 1:
149 |         # do the gaussian blur
150 |         img = cv2.GaussianBlur(img, (gauss_size, gauss_size), 0)
151 | 
152 |     return img
153 | 
154 | 
155 | def center_crop(img, shape):
156 |     h, w = shape[0], shape[1]
157 |     y = (img.shape[0] - h) // 2
158 |     x = (img.shape[1] - w) // 2
159 |     return img[y:y + h, x:x + w]
160 | 
161 | 
162 | def random_crop(img, gt, size):
163 |     if isinstance(size, numbers.Number):
164 |         size = (int(size), int(size))
165 |     else:
166 |         size = size
167 | 
168 |     h, w = img.shape[:2]
169 |     crop_h, crop_w = size[0], size[1]
170 | 
171 |     if h > crop_h:
172 |         x = random.randint(0, h - crop_h + 1)
173 |         img = img[x:x + crop_h, :, :]
174 |         gt = gt[x:x + crop_h, :]
175 | 
176 |     if w > crop_w:
177 |         x = random.randint(0, w - crop_w + 1)
178 |         img = img[:, x:x + crop_w, :]
179 |         gt = gt[:, x:x + crop_w]
180 | 
181 |     return img, gt
182 | 
183 | 
184 | 
185 | def normalize(img, mean, std):
186 |     # pytorch pretrained model need the input range: 0-1
187 |     img = img.astype(np.float32) / 255.0
188 |     img = img - mean
189 |     img = img / std
190 | 
191 |     return img
192 | def resize_image(img, h, w, **up_kwargs):
193 |     return F.upsample(img, (h, w), **up_kwargs)
194 | 
195 | 
196 | def pad_image(img, mean, std, crop_size):
197 |     b,c,h,w = img.size()
198 |     assert(c==3)
199 |     padh = crop_size - h if h < crop_size else 0
200 |     padw = crop_size - w if w < crop_size else 0
201 |     pad_values = -np.array(mean) / np.array(std)
202 |     img_pad = img.new().resize_(b,c,h+padh,w+padw)
203 |     for i in range(c):
204 |         # note that pytorch pad params is in reversed orders
205 |         img_pad[:,i,:,:] = F.pad(img[:,i,:,:], (0, padw, 0, padh), value=pad_values[i])
206 |     assert(img_pad.size(2)>=crop_size and img_pad.size(3)>=crop_size)
207 |     return img_pad
208 | 
209 | 
210 | def crop_image(img, h0, h1, w0, w1):
211 |     return img[:,:,h0:h1,w0:w1]
212 | 
213 | 
214 | def flip_image(img):
215 |     assert(img.dim()==4)
216 |     with torch.cuda.device_of(img):
217 |         idx = torch.arange(img.size(3)-1, -1, -1).type_as(img).long()
218 |     return img.index_select(3, idx)
219 | 


--------------------------------------------------------------------------------
/libs/utils/logger.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 -*-
  3 | # Author: Donny You(youansheng@gmail.com)
  4 | # Logging tool implemented with the python Package logging.
  5 | 
  6 | 
  7 | import argparse
  8 | import logging
  9 | import os
 10 | import sys
 11 | 
 12 | DEFAULT_LOG_LEVEL = 'info'
 13 | DEFAULT_LOG_FILE = './default.log'
 14 | DEFAULT_LOG_FORMAT = '%(asctime)s %(levelname)-7s %(message)s'
 15 | 
 16 | LOG_LEVEL_DICT = {
 17 |     'debug': logging.DEBUG,
 18 |     'info': logging.INFO,
 19 |     'warning': logging.WARNING,
 20 |     'error': logging.ERROR,
 21 |     'critical': logging.CRITICAL
 22 | }
 23 | 
 24 | 
 25 | class Logger(object):
 26 |     """
 27 |     Args:
 28 |       Log level: CRITICAL>ERROR>WARNING>INFO>DEBUG.
 29 |       Log file: The file that stores the logging info.
 30 |       rewrite: Clear the log file.
 31 |       log format: The format of log messages.
 32 |       stdout level: The log level to print on the screen.
 33 |     """
 34 |     log_level = None
 35 |     log_file = None
 36 |     log_format = None
 37 |     rewrite = None
 38 |     stdout_level = None
 39 |     logger = None
 40 | 
 41 |     @staticmethod
 42 |     def init(log_level = DEFAULT_LOG_LEVEL,
 43 |              log_file = DEFAULT_LOG_FILE,
 44 |              log_format = DEFAULT_LOG_FORMAT,
 45 |              rewrite = False,
 46 |              stdout_level = None):
 47 |         Logger.log_level = log_level
 48 |         Logger.log_file = log_file
 49 |         Logger.log_format = log_format
 50 |         Logger.rewrite = rewrite
 51 |         Logger.stdout_level = stdout_level
 52 | 
 53 |         filemode = 'w'
 54 |         if not Logger.rewrite:
 55 |             filemode = 'a'
 56 | 
 57 |         dir_name = os.path.dirname(os.path.abspath(Logger.log_file))
 58 |         if not os.path.exists(dir_name):
 59 |             os.makedirs(dir_name)
 60 | 
 61 |         Logger.logger = logging.getLogger()
 62 | 
 63 |         if not Logger.log_level in LOG_LEVEL_DICT:
 64 |             print('Invalid logging level: {}'.format(Logger.log_level))
 65 |             Logger.log_level = DEFAULT_LOG_LEVEL
 66 | 
 67 |         Logger.logger.setLevel(LOG_LEVEL_DICT[Logger.log_level])
 68 | 
 69 |         fmt = logging.Formatter(Logger.log_format)
 70 |         fh = logging.FileHandler(Logger.log_file, mode=filemode)
 71 |         fh.setFormatter(fmt)
 72 |         fh.setLevel(LOG_LEVEL_DICT[Logger.log_level])
 73 | 
 74 |         Logger.logger.addHandler(fh)
 75 | 
 76 |         if stdout_level is not None:
 77 |             console = logging.StreamHandler()
 78 |             if not Logger.stdout_level in LOG_LEVEL_DICT:
 79 |                 print('Invalid logging level: {}'.format(Logger.stdout_level))
 80 |                 return
 81 | 
 82 |             console.setLevel(LOG_LEVEL_DICT[Logger.stdout_level])
 83 |             console.setFormatter(fmt)
 84 |             Logger.logger.addHandler(console)
 85 | 
 86 |     @staticmethod
 87 |     def set_log_file(file_path):
 88 |         Logger.log_file = file_path
 89 |         Logger.init()
 90 | 
 91 |     @staticmethod
 92 |     def set_log_level(log_level):
 93 |         if not LOG_LEVEL_DICT.has_key(log_level):
 94 |             print('Invalid logging level: {}'.format(Logger.log_level))
 95 |             return
 96 | 
 97 |         Logger.log_level = log_level
 98 |         Logger.init()
 99 | 
100 |     @staticmethod
101 |     def clear_log_file():
102 |         Logger.rewrite = True
103 |         Logger.init()
104 | 
105 |     @staticmethod
106 |     def set_stdout_level(log_level):
107 |         if not LOG_LEVEL_DICT.has_key(log_level):
108 |             print('Invalid logging level: {}'.format(Logger.log_level))
109 |             return
110 | 
111 |         Logger.stdout_level = log_level
112 |         Logger.init()
113 | 
114 |     @staticmethod
115 |     def debug(message):
116 |         filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
117 |         lineno = sys._getframe().f_back.f_lineno
118 |         prefix = '[{}, {}]'.format(filename,lineno)
119 |         Logger.logger.debug('{} {}'.format(prefix, message))
120 | 
121 |     @staticmethod
122 |     def info(message):
123 |         filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
124 |         lineno = sys._getframe().f_back.f_lineno
125 |         prefix = '[{}, {}]'.format(filename,lineno)
126 |         Logger.logger.info('{} {}'.format(prefix, message))
127 | 
128 |     @staticmethod
129 |     def warn(message):
130 |         filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
131 |         lineno = sys._getframe().f_back.f_lineno
132 |         prefix = '[{}, {}]'.format(filename,lineno)
133 |         Logger.logger.warn('{} {}'.format(prefix, message))
134 | 
135 |     @staticmethod
136 |     def error(message):
137 |         filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
138 |         lineno = sys._getframe().f_back.f_lineno
139 |         prefix = '[{}, {}]'.format(filename,lineno)
140 |         Logger.logger.error('{} {}'.format(prefix, message))
141 | 
142 |     @staticmethod
143 |     def critical(message):
144 |         filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
145 |         lineno = sys._getframe().f_back.f_lineno
146 |         prefix = '[{}, {}]'.format(filename,lineno)
147 |         Logger.logger.critical('{} {}'.format(prefix, message))
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     parser = argparse.ArgumentParser()
152 |     parser.add_argument('--log_level', default="info", type=str,
153 |                         dest='log_level', help='To set the log level to files.')
154 |     parser.add_argument('--stdout_level', default=None, type=str,
155 |                         dest='stdout_level', help='To set the level to print to screen.')
156 |     parser.add_argument('--log_file', default="./default.log", type=str,
157 |                         dest='log_file', help='The path of log files.')
158 |     parser.add_argument('--log_format', default="%(asctime)s %(levelname)-7s %(message)s",
159 |                         type=str, dest='log_format', help='The format of log messages.')
160 |     parser.add_argument('--rewrite', default=False, type=bool,
161 |                         dest='rewrite', help='Clear the log files existed.')
162 | 
163 |     args = parser.parse_args()
164 |     Logger.init(log_level = args.log_level,
165 |                 stdout_level = args.stdout_level,
166 |                 log_file = args.log_file,
167 |                 log_format = args.log_format,
168 |                 rewrite = args.rewrite)
169 | 
170 |     Logger.info("info test.")
171 |     Logger.debug("debug test.")
172 |     Logger.warn("warn test.")
173 |     Logger.error("error test.")
174 | 


--------------------------------------------------------------------------------
/libs/utils/tools.py:
--------------------------------------------------------------------------------
 1 | # some tools for network training
 2 | 
 3 | import argparse
 4 | import time
 5 | from collections import OrderedDict
 6 | 
 7 | import torch
 8 | import torch.distributed as dist
 9 | 
10 | 
11 | def all_reduce_tensor(tensor, op=dist.ReduceOp.SUM, world_size=1):
12 |     tensor = tensor.clone()
13 |     dist.all_reduce(tensor, op)
14 |     tensor.div_(world_size)
15 |     return tensor
16 | 
17 | 
18 | def str2bool(v):
19 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
20 |         return True
21 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
22 |         return False
23 |     else:
24 |         raise argparse.ArgumentTypeError('Boolean value expected.')
25 | 
26 | 
27 | def lr_poly(base_lr, iter, max_iter, power):
28 |     return base_lr * ((1 - float(iter) / max_iter) ** (power))
29 | 
30 | 
31 | 
32 | def adjust_learning_rate(optimizer, args, i_iter, total_steps):
33 |     lr = lr_poly(args.learning_rate, i_iter, total_steps, args.power)
34 |     optimizer.param_groups[0]['lr'] = lr
35 |     return lr
36 | 
37 | 
38 | 
39 | def set_bn_momentum(m):
40 |     classname = m.__class__.__name__
41 |     if classname.find('BatchNorm') != -1:
42 | 
43 |         m.momentum = 0.0003
44 | 
45 | def fixModelBN(m):
46 |     pass
47 | 
48 | 
49 | def load_model(model, model_file, is_restore=False):
50 |     t_start = time.time()
51 |     if isinstance(model_file, str):
52 |         state_dict = torch.load(model_file, map_location=torch.device('cpu'))
53 |         if 'model' in state_dict.keys():
54 |             state_dict = state_dict['model']
55 |     else:
56 |         state_dict = model_file
57 |     t_ioend = time.time()
58 | 
59 |     if is_restore:
60 |         new_state_dict = OrderedDict()
61 |         for k, v in state_dict.items():
62 |             name = 'module.' + k
63 |             new_state_dict[name] = v
64 |         state_dict = new_state_dict
65 | 
66 |     model.load_state_dict(state_dict, strict=False)
67 |     ckpt_keys = set(state_dict.keys())
68 |     own_keys = set(model.state_dict().keys())
69 |     missing_keys = own_keys - ckpt_keys
70 |     unexpected_keys = ckpt_keys - own_keys
71 | 
72 |     if len(missing_keys) > 0:
73 |         print('Missing key(s) in state_dict: {}'.format(
74 |             ', '.join('{}'.format(k) for k in missing_keys)))
75 | 
76 |     if len(unexpected_keys) > 0:
77 |         print('Unexpected key(s) in state_dict: {}'.format(
78 |             ', '.join('{}'.format(k) for k in unexpected_keys)))
79 | 
80 |     del state_dict
81 |     t_end = time.time()
82 |     print(
83 |         "Load model, Time usage:\n\tIO: {}, initialize parameters: {}".format(
84 |             t_ioend - t_start, t_end - t_ioend))
85 | 
86 |     return model


--------------------------------------------------------------------------------
/prediction_test_different_size.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import torch
  4 | import torch.nn.functional as F
  5 | import cv2
  6 | import numpy as np
  7 | import datetime
  8 | 
  9 | 
 10 | import libs.models as models
 11 | 
 12 | 
 13 | N_CLASS = 19
 14 | color_list = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33]
 15 | color_map = [(128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153),
 16 |              (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60),
 17 |              (255, 0, 0), (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)]
 18 | up_kwargs = {'mode': 'bilinear', 'align_corners': True}
 19 | 
 20 | 
 21 | def transform(img):
 22 |     img = cv2.imread(img)
 23 |     IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32)
 24 |     img = img - IMG_MEAN
 25 |     img = img.transpose((2, 0, 1))
 26 |     img = torch.from_numpy(img).unsqueeze(0).cuda()
 27 |     return img
 28 | 
 29 | def transform_rgb(img):
 30 |     img = cv2.imread(img, cv2.IMREAD_COLOR)[:, :, ::-1].astype(np.float32)
 31 | 
 32 |     img /= 255
 33 |     IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32)
 34 |     IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32)
 35 | 
 36 |     img -= IMG_MEAN
 37 |     img /= IMG_VARS
 38 | 
 39 |     img = img.transpose((2, 0, 1))
 40 |     img = torch.from_numpy(img).unsqueeze(0).cuda()
 41 |     return img
 42 | 
 43 | 
 44 | 
 45 | def makeTestlist(dir,start=0,end=1525):
 46 |     out = []
 47 |     floder = os.listdir(dir)
 48 |     for f in floder:
 49 |         floder_dir = os.path.join(dir, f)
 50 |         for i in os.listdir(floder_dir):
 51 |             out.append(os.path.join(floder_dir, i))
 52 |     out.sort()
 53 |     return out[start:end]
 54 | 
 55 | 
 56 | def WholeTest(args, model, size=1.0):
 57 |     net = model.cuda()
 58 |     net.eval()
 59 |     saved_state_dict = torch.load(args.resume)
 60 |     net.load_state_dict(saved_state_dict)
 61 |     img_list = makeTestlist(args.input_dir)
 62 |     out_dir = args.output_dir
 63 |     for i in img_list:
 64 |         name = i
 65 |         with torch.no_grad():
 66 |             if args.rgb:
 67 |                 img = transform_rgb(i)
 68 |             else:
 69 |                 img = transform(i)
 70 |             _, _, origin_h, origin_w = img.size()
 71 |             h, w = int(origin_h*size), int(origin_w*size)
 72 |             img = F.upsample(img, size=(h, w), mode="bilinear", align_corners=True)
 73 |             out = net(img)[0]
 74 |             out = F.upsample(out, size=(origin_h, origin_w), mode='bilinear', align_corners=True)
 75 |             result = out.argmax(dim=1)[0]
 76 |             result = result.data.cpu().squeeze().numpy()
 77 |             row, col = result.shape
 78 |             dst = np.ones((row, col), dtype=np.uint8) * 255
 79 |             for i in range(19):
 80 |                 dst[result == i] = color_list[i]
 81 |             print(name, " done!")
 82 |             save_name = os.path.join(out_dir, "/".join(name.split('/')[4:]))
 83 |             save_dir = "/".join(save_name.split("/")[:-1])
 84 |             if not os.path.exists(save_dir):
 85 |                 os.makedirs(save_dir)
 86 |             cv2.imwrite(save_name, dst)
 87 | 
 88 | if __name__ == '__main__':
 89 |     parser = argparse.ArgumentParser(description='PyTorch \
 90 |                 Segmentation Crop Prediction')
 91 |     parser.add_argument('--input_dir', type=str,
 92 |                         default="/home/lxt/data/Cityscapes/leftImg8bit/test",
 93 |                         help='training dataset folder (default: \
 94 |                               $(HOME)/data)')
 95 |     parser.add_argument("--input_disp_dir", type=str, default=None)
 96 |     parser.add_argument('--output_dir', type=str, default="/home/lxt/debug/cgnl_ohem_crop_ms",
 97 |                         help='output directory of the model_test, for saving the seg_models')
 98 |     parser.add_argument("--resume", type=str, default="/home/lxt/Desktop/Seg_model_ZOO/CNL_net_4w_ohem/CS_scenes_40000.pth")
 99 |     parser.add_argument("--start",type=int,default=0,help="start index of crop test")
100 |     parser.add_argument("--end",type=int,default=1525,help="end index of crop test")
101 |     parser.add_argument("--gpu",type=str,default="0",help="which gpu to use")
102 |     parser.add_argument("--arch",type=str,default=None, help="which network are used")
103 |     parser.add_argument("--size",type=float,default=1.0,help="ratio of the input images")
104 |     parser.add_argument("--rgb",type=int,default=0)
105 |     args = parser.parse_args()
106 |     os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
107 |     test_list = makeTestlist(args.input_dir,args.start, args.end)
108 |     model= models.__dict__[args.arch](num_classes=19, data_set="cityscapes")
109 |     WholeTest(args, model=model, size=args.size)


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # :zap:Fast_Seg:zap:
 4 | 
 5 | This repo try to implement **state-of-art fast semantic segmentation model** s on **road scene dataset**(CityScape,
 6 | Mapillary, Camvid).
 7 | 
 8 | 
 9 | ## News!!
10 | 
11 | Check out our Fast Segmentation Framework in [SFSegNets](https://github.com/lxtGH/SFSegNets). SFNet-ECCV-2020, SFNet-Lite, IJCV-2023
12 | 
13 | # What is the purpose of this repo?
14 | This repo aims to do experiments and verify the idea of fast semantic segmentation, and this repo also provides some fast models.
15 | 
16 | Our ICnet implementation achieves **74.5% mIoU**, which is **5%** point higher than the original paper. !!!!! Here: [model](https://drive.google.com/open?id=1A6z87_GCHEuKeZfbGpEvnkZ0POdW2Q_U)
17 | 
18 | # Another Link For Accurate Seg:
19 | [GALD-net](https://github.com/lxtGH/GALD-Net) provides some state-of-art accurate methods of implementation.
20 | 
21 | # Model Zoo (Updating)
22 | 1. ICNet: ICnet for real-time semantic segmentation on high-resolution images. ECCV-2018, [paper](https://arxiv.org/abs/1704.08545)
23 | 2. DF-Net: Partial Order Pruning: for Best Speed/Accuracy Trade-off in Neural Architecture Search.CVPR-2019, [paper](https://arxiv.org/abs/1903.03777)
24 | 3. Bi-Seg: Bilateral segmentation network for real-time semantic segmentation.ECCV-2018, [paper](https://arxiv.org/pdf/1808.00897.pdf)
25 | 4. DFA-Net: Deep feature aggregation for real-time semantic segmentation.CVPR-2019,[paper](https://arxiv.org/abs/1904.02216)
26 | 5. ESP-Net: Efficient Spatial Pyramid of Dilated Convolutions for Semantic Segmentation. ECCV-2018,[paper](https://arxiv.org/abs/1803.06815)
27 | 6. SwiftNet: In defense of pre-trained imagenet architectures for real-time semantic segmentation of road-driving images. CVPR2019, [paper](http://openaccess.thecvf.com/content_CVPR_2019/papers/Orsic_In_Defense_of_Pre-Trained_ImageNet_Architectures_for_Real-Time_Semantic_Segmentation_CVPR_2019_paper.pdf)
28 | 7. Real-Time Semantic Segmentation via Multiply Spatial Fusion Network.(face++) arxiv,[paper](https://arxiv.org/abs/1911.07217)
29 | 8. Fast-SCNN: Fast Semantic Segmentation Network.BMVC-2019 [paper](https://arxiv.org/abs/1902.04502)
30 | 
31 | 
32 | 
33 | 
34 | # Usage
35 | 1. use train_distribute.py for training For example, use scripts in exp floder for training and evaluation.
36 | 2. use prediction_test_different_size.py for prediction with different size input.
37 | 
38 | 
39 | ## Datasets Perparation
40 | - You can download [cityscapes] dataset (https://www.cityscapes-dataset.com/) from [here](https://www.cityscapes-dataset.com/downloads/). Note: please download [leftImg8bit_trainvaltest.zip(11GB)](https://www.cityscapes-dataset.com/file-handling/?packageID=4) and [gtFine_trainvaltest(241MB)](https://www.cityscapes-dataset.com/file-handling/?packageID=1).
41 | - You can download camvid dataset from [here](https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid).
42 | - You can download pretrained XceptionA with RGB input and ResNet18 with bgr input  and ResNet50 with bgr input
43 | [link]:(https://pan.baidu.com/s/1mM_Lc44iX9CT1nPq6tjOAA)  password:bnfv.
44 | or ['link']: [resnet50-deep.pth](https://drive.google.com/file/d/166ANLmlV5cQTkmzD0pngc8leOQUR_32n/view?usp=sharing), [icnet_final.pth](https://drive.google.com/file/d/1A6z87_GCHEuKeZfbGpEvnkZ0POdW2Q_U/view?usp=sharing), [resnet18-deep-caffe.pth](https://drive.google.com/file/d/1P_d9T__kTKIEFK8ElQFq0cZ1XKx1gMGn/view?usp=sharing), [xceptiona_imagenet.pth](https://drive.google.com/file/d/1y4TuRod_F9NEeBQ1fo9GI-WLETS-b1jF/view?usp=sharing)
45 | 
46 | 
47 | # Some Advice on Training
48 | 1. use syn-bn(apex).
49 | 2. use batch-size >=8.
50 | 3. use deep supervised loss for easier optimation.
51 | 4. use large crop size during training.
52 | 5. longer training time for small models(60,000 interaction or more).
53 | 6. use Mapillary data for pretraining for boosting performance.
54 | 7. Deeply based resnet runs slowly than torch pretrained resnet but with higher accuracy.
55 | 8. The small network doesn't need ImageNet pretraining if training longer time on Cityscape.(Fast-SCNN paper)
56 | 
57 | |(a) test image|(b) ground truth|(c) predicted result|
58 | |:--:|:--:|:--:|
59 | |![a](data/fig/frankfurt_000000_002196_leftImg8bit.png)|![b](data/fig/frankfurt_000000_002196_gtFine_color.png)|![c](data/fig/frankfurt_000000_002196_leftImg8bit_pred.png)|
60 | 
61 | # License
62 | This project is released under the Apache 2.0 license.
63 | 
64 | 
65 | # Acknowledgement
66 | 
67 | Thanks to the previous open-sourced repo:
68 | [Encoding](https://github.com/zhanghang1989/PyTorch-Encoding)
69 | [CCNet](https://github.com/speedinghzl/CCNet)
70 | [TorchSeg](https://github.com/ycszen/TorchSeg)
71 | [pytorchseg](https://github.com/meetshah1995/pytorch-semseg)
72 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | apex
2 | opencv-python
3 | torch>=1.1.0
4 | torchvision
5 | 


--------------------------------------------------------------------------------
/train_distribute.py:
--------------------------------------------------------------------------------
  1 | # Author: Xiangtai Li
  2 | # Email: lxtpku@pku.edu.cn
  3 | """
  4 |     Distribute Training Code For Fast training.
  5 | """
  6 | 
  7 | import argparse
  8 | import os
  9 | import os.path as osp
 10 | import timeit
 11 | import numpy as np
 12 | 
 13 | 
 14 | import torch
 15 | from torch.utils import data
 16 | import torch.optim as optim
 17 | import torch.backends.cudnn as cudnn
 18 | 
 19 | from libs.utils.logger import Logger as Log
 20 | from libs.utils.tools import adjust_learning_rate, all_reduce_tensor
 21 | from libs.datasets.cityscapes import Cityscapes
 22 | from libs.datasets.camvid import CamVidDataSet
 23 | 
 24 | from libs.core.loss import CriterionOhemDSN, CriterionDSN, CriterionICNet, CriterionDFANet
 25 | 
 26 | 
 27 | try:
 28 |     import apex
 29 |     from apex import amp
 30 |     from apex.parallel import DistributedDataParallel, SyncBatchNorm
 31 | except ImportError:
 32 |     raise ImportError(
 33 |         "Please install apex from https://www.github.com/nvidia/apex.")
 34 | 
 35 | 
 36 | def str2bool(v):
 37 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 38 |         return True
 39 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 40 |         return False
 41 |     else:
 42 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 43 | 
 44 | 
 45 | def get_arguments():
 46 |     """
 47 |     Parse all the arguments
 48 |     Returns: args
 49 |     A list of parsed arguments.
 50 |     """
 51 |     parser = argparse.ArgumentParser(description="DeepLab-ResNet Network")
 52 |     parser.add_argument("--batch_size_per_gpu", type=int, default=1,
 53 |                         help="Number of images sent to the network in one step.")
 54 |     parser.add_argument("--batch_size", type=int, default=8,
 55 |                         help="Number of images sent to the network in one step.")
 56 |     parser.add_argument('--gpu_num',type=int, default=8)
 57 |     parser.add_argument("--data_dir", type=str, default="./data",
 58 |                         help="Path to the directory containing the Cityscapes dataset.")
 59 |     parser.add_argument("--data_list", type=str, default="./data/cityscapes/train.txt",
 60 |                         help="Path to the file listing the images in the dataset.")
 61 |     parser.add_argument("--data_set", type=str, default="cityscapes", help="dataset to train")
 62 |     parser.add_argument("--arch", type=str, default="ICNet", help="network architecture")
 63 |     parser.add_argument("--ignore_label", type=int, default=255,
 64 |                         help="The index of the label to ignore during the training.")
 65 |     parser.add_argument("--input_size", type=int, default=832 ,
 66 |                         help="Comma-separated string with height and width of images.")
 67 |     parser.add_argument("--learning_rate", type=float, default=1e-2,
 68 |                         help="Base learning rate for training with polynomial decay.")
 69 |     parser.add_argument("--momentum", type=float, default=0.9,
 70 |                         help="Momentum component of the optimiser.")
 71 |     parser.add_argument("--num_classes", type=int, default=19,
 72 |                         help="Number of classes to predict (including background).")
 73 |     parser.add_argument("--num_steps", type=int, default=50000,
 74 |                         help="Number of training steps.")
 75 |     parser.add_argument("--power", type=float, default=0.9,
 76 |                         help="Decay parameter to compute the learning rate.")
 77 |     parser.add_argument("--weight_decay", type=float, default=5e-4,
 78 |                         help="Regularisation parameter for L2-loss.")
 79 |     parser.add_argument("--num_workers", type=int, default=8)
 80 |     parser.add_argument("--random_mirror", action="store_true", default=True,
 81 |                         help="Whether to randomly mirror the inputs during the training.")
 82 |     parser.add_argument("--random_scale", action="store_true", default=True,
 83 |                         help="Whether to randomly scale the inputs during the training.")
 84 |     parser.add_argument("--random_seed", type=int, default=1234,
 85 |                         help="Random seed to have reproducible results.")
 86 | 
 87 |     # ***** Params for save and load ******
 88 |     parser.add_argument("--restore_from", type=str, default="./pretrained",
 89 |                         help="Where restore models parameters from.")
 90 |     parser.add_argument("--save_pred_every", type=int, default=5000,
 91 |                         help="Save summaries and checkpoint every often.")
 92 |     parser.add_argument("--save_dir", type=str, default=None,
 93 |                         help="Where to save snapshots of the models.")
 94 |     parser.add_argument("--save_start",type=int, default=40000)
 95 |     parser.add_argument("--gpu", type=str, default=None,
 96 |                         help="choose gpu device.")
 97 |     parser.add_argument("--ft", type=bool, default=False,
 98 |                         help="fine-tune the models with large input size.")
 99 |     # **** Params for OHEM **** #
100 |     parser.add_argument("--ohem", type=str2bool, default='False',
101 |                         help="use hard negative mining")
102 |     parser.add_argument("--ohem_thres", type=float, default=0.7,
103 |                         help="choose the samples with correct probability underthe threshold.")
104 |     parser.add_argument("--ohem_keep", type=int, default=100000,
105 |                         help="choose the samples with correct probability underthe threshold.")
106 |     # ***** Params for logging ***** #
107 |     parser.add_argument('--log_level', default="info", type=str,
108 |                         dest='log_level', help='To set the log level to files.')
109 |     parser.add_argument('--log_file', default="./log/train.log", type=str,
110 |                         dest='log_file', help='The path of log files.')
111 |     parser.add_argument("--log_format", default="%(asctime)s %(levelname)-7s %(message)s", type=str,
112 |                         dest="log_format", help="format of log files"
113 |                         )
114 |     parser.add_argument('--stdout_level', default="info", type=str,
115 |                         dest='stdout_level', help='To set the level to print to screen.')
116 |     parser.add_argument("--rewrite", default=False, type=bool,
117 |                         dest="rewrite", help="whether write the file when using log"
118 |                         )
119 |     parser.add_argument("--rgb", type=str2bool, default='False')
120 |     # ***** Params for Distributed Traning ***** #
121 |     parser.add_argument('--apex', action='store_true', default=False,
122 |                         help='Use Nvidia Apex Distributed Data Parallel')
123 |     parser.add_argument("--local_rank", default=0, type=int, help="parameter used by apex library")
124 |     args = parser.parse_args()
125 |     return args
126 | 
127 | 
128 | start = timeit.default_timer()
129 | 
130 | args = get_arguments()
131 | 
132 | 
133 | def main():
134 | 
135 |     # make save dir
136 |     if args.local_rank == 0:
137 |         if not os.path.exists(args.save_dir):
138 |             os.makedirs(args.save_dir)
139 |     # launch the logger
140 |     Log.init(
141 |         log_level=args.log_level,
142 |         log_file=osp.join(args.save_dir, args.log_file),
143 |         log_format=args.log_format,
144 |         rewrite=args.rewrite,
145 |         stdout_level=args.stdout_level
146 |     )
147 |     # RGB or BGR input(RGB input for ImageNet pretrained models while BGR input for caffe pretrained models)
148 |     if args.rgb:
149 |         IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32)
150 |         IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32)
151 |     else:
152 |         IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32)
153 |         IMG_VARS = np.array((1, 1, 1), dtype=np.float32)
154 | 
155 |     # set models
156 |     import libs.models as models
157 |     deeplab = models.__dict__[args.arch](num_classes=args.num_classes, data_set=args.data_set)
158 |     if args.restore_from is not None:
159 |         saved_state_dict = torch.load(args.restore_from, map_location=torch.device('cpu'))
160 |         new_params = deeplab.state_dict().copy()
161 |         for i in saved_state_dict:
162 |             i_parts = i.split('.')
163 |             if not i_parts[0] == 'fc':
164 |                 new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
165 |         Log.info("load pretrined models")
166 |         if deeplab.backbone is not None:
167 |             deeplab.backbone.load_state_dict(new_params, strict=False)
168 |         else:
169 |             deeplab.load_state_dict(new_params, strict=False)
170 |     else:
171 |         Log.info("train from stracth")
172 | 
173 | 
174 |     args.world_size = 1
175 | 
176 |     if 'WORLD_SIZE' in os.environ and args.apex:
177 |         args.apex = int(os.environ['WORLD_SIZE']) > 1
178 |         args.world_size = int(os.environ['WORLD_SIZE'])
179 |         print("Total world size: ", int(os.environ['WORLD_SIZE']))
180 | 
181 |     if not args.gpu == None:
182 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
183 |     h, w = args.input_size, args.input_size
184 |     input_size = (h, w)
185 | 
186 | 
187 |      # Set the device according to local_rank.
188 |     torch.cuda.set_device(args.local_rank)
189 |     Log.info("Local Rank: {}".format(args.local_rank))
190 |     torch.distributed.init_process_group(backend='nccl',
191 |                                          init_method='env://')
192 |     # set optimizer
193 |     optimizer = optim.SGD(
194 |         [{'params': filter(lambda p: p.requires_grad, deeplab.parameters()), 'lr': args.learning_rate}],
195 |         lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
196 |     optimizer.zero_grad()
197 | 
198 |     # set on cuda
199 |     deeplab.cuda()
200 | 
201 |     # models transformation
202 |     model = DistributedDataParallel(deeplab)
203 |     model = apex.parallel.convert_syncbn_model(model)
204 |     model.train()
205 |     model.float()
206 |     model.cuda()
207 | 
208 |     # set loss function
209 |     if args.ohem:
210 |         criterion = CriterionOhemDSN(thresh=args.ohem_thres, min_kept=args.ohem_keep)  # OHEM CrossEntrop
211 |         if "ic" in args.arch:
212 |             criterion = CriterionICNet(thresh=args.ohem_thres, min_kept=args.ohem_keep)
213 |         if "dfa" in args.arch:
214 |             criterion = CriterionDFANet(thresh=args.ohem_thres, min_kept=args.ohem_keep)
215 |     else:
216 |         criterion = CriterionDSN()  # CrossEntropy
217 |     criterion.cuda()
218 | 
219 |     cudnn.benchmark = True
220 | 
221 |     if args.world_size == 1:
222 |         print(model)
223 | 
224 |     # this is a little different from mul-gpu traning setting in distributed training
225 |     # because each trainloader is a process that sample from the dataset class.
226 |     batch_size = args.gpu_num * args.batch_size_per_gpu
227 |     max_iters = args.num_steps * batch_size / args.gpu_num
228 |     # set data loader
229 |     if args.data_set == "cityscapes":
230 |         data_set = Cityscapes(args.data_dir, args.data_list, max_iters=max_iters, crop_size=input_size,
231 |                   scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN,vars=IMG_VARS, RGB= args.rgb)
232 |     elif args.data_set == "camvid":
233 |         data_set = CamVidDataSet(args.data_dir, args.data_list, max_iters=max_iters, crop_size=input_size,
234 |                   scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN,vars=IMG_VARS, RGB= args.rgb)
235 |     else:
236 |         raise "No such dataset support!"
237 | 
238 |     trainloader = data.DataLoader(
239 |         data_set,
240 |         batch_size=args.batch_size_per_gpu, shuffle=True, num_workers=args.num_workers, pin_memory=True)
241 | 
242 |     print("trainloader", len(trainloader))
243 | 
244 |     torch.cuda.empty_cache()
245 | 
246 |     # start training:
247 |     for i_iter, batch in enumerate(trainloader):
248 |         images, labels = batch
249 |         images = images.cuda()
250 |         labels = labels.long().cuda()
251 |         optimizer.zero_grad()
252 |         lr = adjust_learning_rate(optimizer, args, i_iter, len(trainloader))
253 |         preds = model(images)
254 | 
255 |         loss = criterion(preds, labels)
256 |         loss.backward()
257 |         optimizer.step()
258 |         reduce_loss = all_reduce_tensor(loss,
259 |                                         world_size=args.gpu_num)
260 |         if args.local_rank == 0:
261 |             Log.info('iter = {} of {} completed, lr={}, loss = {}'.format(i_iter,
262 |                                                                       len(trainloader), lr, reduce_loss.data.cpu().numpy()))
263 |             if i_iter % args.save_pred_every == 0 and i_iter > args.save_start:
264 |                 print('save models ...')
265 |                 torch.save(deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + str(i_iter) + '.pth'))
266 | 
267 |     end = timeit.default_timer()
268 | 
269 |     if args.local_rank == 0:
270 |         Log.info("Training cost: "+ str(end - start) + 'seconds')
271 |         Log.info("Save final models")
272 |         torch.save(deeplab.state_dict(), osp.join(args.save_dir, str(args.arch) + '_final' + '.pth'))
273 | 
274 | 
275 | if __name__ == '__main__':
276 |     main()
277 | 


--------------------------------------------------------------------------------
/val.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from scipy import ndimage
  3 | import numpy as np
  4 | import json
  5 | 
  6 | import torch
  7 | from torch.utils import data
  8 | import torch.nn as nn
  9 | 
 10 | import os
 11 | from math import ceil
 12 | from PIL import Image as PILImage
 13 | 
 14 | from libs.datasets.cityscapes import Cityscapes
 15 | from libs.datasets.camvid import CamVidDataSet
 16 | 
 17 | DATA_DIRECTORY = 'cityscapes'
 18 | DATA_LIST_PATH = './data/cityscapes/val.lst'
 19 | IGNORE_LABEL = 255
 20 | NUM_CLASSES = 19
 21 | NUM_STEPS = 500  # Number of images in the validation set.
 22 | INPUT_SIZE = 832
 23 | RESTORE_FROM = './deeplab_resnet.pth'
 24 | 
 25 | 
 26 | def str2bool(v):
 27 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 28 |         return True
 29 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 30 |         return False
 31 |     else:
 32 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 33 | 
 34 | 
 35 | def get_arguments():
 36 |     """Parse all the arguments provided from the CLI.
 37 |     Returns:
 38 |       A list of parsed arguments.
 39 |     """
 40 |     parser = argparse.ArgumentParser(description="DeepLabLFOV Network")
 41 |     parser.add_argument("--data_dir", type=str, default=DATA_DIRECTORY,
 42 |                         help="Path to the directory containing the PASCAL VOC dataset.")
 43 |     parser.add_argument("--data_list", type=str, default=DATA_LIST_PATH,
 44 |                         help="Path to the file listing the images in the dataset.")
 45 |     parser.add_argument("--data_set", type=str, default="cityscapes", help="dataset to train")
 46 |     parser.add_argument("--arch",type=str,default="CascadeRelatioNet_res50")
 47 |     parser.add_argument("--ignore_label", type=int, default=IGNORE_LABEL,
 48 |                         help="The index of the label to ignore during the training.")
 49 |     parser.add_argument("--num_classes", type=int, default=19,
 50 |                         help="Number of classes to predict (including background).")
 51 |     parser.add_argument("--restore_from", type=str, default=RESTORE_FROM,
 52 |                         help="Where restore models parameters from.")
 53 |     parser.add_argument("--gpu", type=str, default='0',
 54 |                         help="choose gpu device.")
 55 |     parser.add_argument("--input_size", type=int, default=INPUT_SIZE,
 56 |                         help="Comma-separated string with height and width of images.")
 57 |     parser.add_argument("--whole", type=bool, default=False,
 58 |                         help="use whole input size.")
 59 |     parser.add_argument("--output_dir", type=str, default="outputs",
 60 |                         help="output dir of prediction")
 61 |     parser.add_argument("--rgb", type=str2bool, default='False')
 62 |     return parser.parse_args()
 63 | 
 64 | 
 65 | def get_palette(num_cls):
 66 |     """ Returns the color map for visualizing the segmentation mask.
 67 |     Args:
 68 |         num_cls: Number of classes
 69 |     Returns:
 70 |         The color map
 71 |     """
 72 |     n = num_cls
 73 |     palette = [0] * (n * 3)
 74 |     for j in range(0, n):
 75 |         lab = j
 76 |         palette[j * 3 + 0] = 0
 77 |         palette[j * 3 + 1] = 0
 78 |         palette[j * 3 + 2] = 0
 79 |         i = 0
 80 |         while lab:
 81 |             palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
 82 |             palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
 83 |             palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
 84 |             i += 1
 85 |             lab >>= 3
 86 |     return palette
 87 | 
 88 | 
 89 | def pad_image(img, target_size):
 90 |     """Pad an image up to the target size."""
 91 |     rows_missing = target_size[0] - img.shape[2]
 92 |     cols_missing = target_size[1] - img.shape[3]
 93 |     padded_img = np.pad(img, ((0, 0), (0, 0), (0, rows_missing), (0, cols_missing)), 'constant')
 94 |     return padded_img
 95 | 
 96 | 
 97 | def predict_sliding(net, image, tile_size, classes, flip_evaluation):
 98 |     interp = nn.Upsample(size=tile_size, mode='bilinear', align_corners=True)
 99 |     image_size = image.shape
100 |     overlap = 1.0 / 3.0
101 | 
102 |     stride = ceil(tile_size[0] * (1 - overlap))
103 |     tile_rows = int(ceil((image_size[2] - tile_size[0]) / stride) + 1)  # strided convolution formula
104 |     tile_cols = int(ceil((image_size[3] - tile_size[1]) / stride) + 1)
105 |     print("Need %i x %i prediction tiles @ stride %i px" % (tile_cols, tile_rows, stride))
106 |     full_probs = np.zeros((image_size[2], image_size[3], classes))
107 |     count_predictions = np.zeros((image_size[2], image_size[3], classes))
108 |     tile_counter = 0
109 | 
110 |     for row in range(tile_rows):
111 |         for col in range(tile_cols):
112 |             x1 = int(col * stride)
113 |             y1 = int(row * stride)
114 |             x2 = min(x1 + tile_size[1], image_size[3])
115 |             y2 = min(y1 + tile_size[0], image_size[2])
116 |             x1 = max(int(x2 - tile_size[1]), 0)  # for portrait images the x1 underflows sometimes
117 |             y1 = max(int(y2 - tile_size[0]), 0)  # for very few rows y1 underflows
118 | 
119 |             img = image[:, :, y1:y2, x1:x2]
120 |             padded_img = pad_image(img, tile_size)
121 |             tile_counter += 1
122 |             # print("Predicting tile %i" % tile_counter)
123 |             # print(padded_img.shape)
124 |             padded_img = torch.from_numpy(padded_img)
125 |             padded_img = padded_img.cuda()
126 |             # print(len(padded_img))
127 |             # print(padded_img)
128 |             padded_prediction = net(padded_img)
129 |             if isinstance(padded_prediction, list):
130 |                 padded_prediction = padded_prediction[0]
131 |             padded_prediction = interp(padded_prediction).cpu().data[0].numpy().transpose(1, 2, 0)
132 |             prediction = padded_prediction[0:img.shape[2], 0:img.shape[3], :]
133 |             count_predictions[y1:y2, x1:x2] += 1
134 |             full_probs[y1:y2, x1:x2] += prediction  # accumulate the predictions also in the overlapping regions
135 | 
136 |     # average the predictions in the overlapping regions
137 |     full_probs /= count_predictions
138 |     return full_probs
139 | 
140 | 
141 | def predict_whole(net, image, tile_size):
142 |     image = torch.from_numpy(image)
143 |     interp = nn.Upsample(size=tile_size, mode='bilinear', align_corners=True)
144 |     prediction = net(image.cuda())
145 |     if isinstance(prediction, list):
146 |         prediction = prediction[0]
147 |     prediction = interp(prediction).cpu().data[0].numpy().transpose(1, 2, 0)
148 |     return prediction
149 | 
150 | 
151 | def predict_multiscale(net, image, tile_size, scales, classes, flip_evaluation):
152 |     """
153 |     Predict an image by looking at it with different scales.
154 |         We choose the "predict_whole_img" for the image with less than the original input size,
155 |         for the input of larger size, we would choose the cropping method to ensure that GPU memory is enough.
156 |     """
157 |     image = image.data
158 |     N_, C_, H_, W_ = image.shape
159 |     full_probs = np.zeros((H_, W_, classes))
160 |     for scale in scales:
161 |         scale = float(scale)
162 |         print("Predicting image scaled by %f" % scale)
163 |         scale_image = ndimage.zoom(image, (1.0, 1.0, scale, scale), order=1, prefilter=False)
164 |         scaled_probs = predict_whole(net, scale_image, tile_size)
165 |         if flip_evaluation == True:
166 |             flip_scaled_probs = predict_whole(net, scale_image[:, :, :, ::-1].copy(), tile_size)
167 |             scaled_probs = 0.5 * (scaled_probs + flip_scaled_probs[:, ::-1, :])
168 |         full_probs += scaled_probs
169 |     full_probs /= len(scales)
170 |     return full_probs
171 | 
172 | 
173 | def get_confusion_matrix(gt_label, pred_label, class_num):
174 |     """
175 |     Calcute the confusion matrix by given label and pred
176 |     :param gt_label: the ground truth label
177 |     :param pred_label: the pred label
178 |     :param class_num: the nunber of class
179 |     :return: the confusion matrix
180 |     """
181 |     index = (gt_label * class_num + pred_label).astype('int32')
182 |     label_count = np.bincount(index)
183 |     confusion_matrix = np.zeros((class_num, class_num))
184 | 
185 |     for i_label in range(class_num):
186 |         for i_pred_label in range(class_num):
187 |             cur_index = i_label * class_num + i_pred_label
188 |             if cur_index < len(label_count):
189 |                 confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
190 | 
191 |     return confusion_matrix
192 | 
193 | 
194 | def val():
195 |     """Create the models and start the evaluation process."""
196 |     args = get_arguments()
197 | 
198 |     h, w = args.input_size, args.input_size
199 |     if args.whole:
200 |         input_size = (1024, 2048)
201 |     else:
202 |         input_size = (h, w)
203 |     import libs.models as models
204 |     model = models.__dict__[args.arch](num_classes=args.num_classes, data_set=args.data_set)
205 |     saved_state_dict = torch.load(args.restore_from)
206 |     model.load_state_dict(saved_state_dict,strict=False)
207 | 
208 |     model.eval()
209 |     model.cuda()
210 |     if args.rgb == 1:
211 |         IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32)
212 |         IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32)
213 |     else:
214 |         IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32)
215 |         IMG_VARS = np.array((1, 1, 1), dtype=np.float32)
216 | 
217 |     # dataset = Cityscapes(args.data_dir, args.data_list, crop_size=(1024, 2048), mean=IMG_MEAN, vars=IMG_VARS,
218 |     #                     scale=False, mirror=False, RGB=args.rgb)
219 |     # set data loader
220 |     if args.data_set == "cityscapes":
221 |         data_set = Cityscapes(args.data_dir, args.data_list, crop_size=(1024, 2048), mean=IMG_MEAN, vars=IMG_VARS,
222 |                         scale=False, mirror=False, RGB=args.rgb)
223 |     elif args.data_set == "camvid":
224 |         data_set = CamVidDataSet(args.data_dir, args.data_list,  crop_size=(360, 480),
225 |                                   mean=IMG_MEAN, vars=IMG_VARS, scale=False, mirror=False, RGB=args.rgb)
226 |     else:
227 |         return
228 | 
229 |     testloader = data.DataLoader(data_set, batch_size=1, shuffle=False, pin_memory=True)
230 | 
231 |     confusion_matrix = np.zeros((args.num_classes, args.num_classes))
232 | 
233 |     output_images = os.path.join(args.output_dir, "./images")
234 |     output_results = os.path.join(args.output_dir, "./result")
235 |     if not os.path.exists(args.output_dir):
236 |         os.makedirs(args.output_dir)
237 |     if not os.path.exists(output_images):
238 |         os.makedirs(output_images)
239 |     if not os.path.exists(output_results):
240 |         os.makedirs(output_results)
241 | 
242 |     for index, batch in enumerate(testloader):
243 |         if index % 100 == 0:
244 |             print('%d processd' % (index))
245 |         image, label = batch
246 |         size = image[0].size()[-2:]
247 |         with torch.no_grad():
248 |             if args.whole:
249 |                 output = predict_multiscale(model, image, input_size, [1.0], args.num_classes, False)
250 |             else:
251 |                 output = predict_sliding(model, image.numpy(), input_size, args.num_classes, True)
252 | 
253 |         seg_pred = np.asarray(np.argmax(output, axis=2), dtype=np.uint8)
254 | 
255 |         seg_gt = np.asarray(label[0].numpy()[:size[0], :size[1]], dtype=np.int)
256 | 
257 |         ignore_index = seg_gt != 255
258 |         seg_gt = seg_gt[ignore_index]
259 |         seg_pred = seg_pred[ignore_index]
260 |         confusion_matrix += get_confusion_matrix(seg_gt, seg_pred, args.num_classes)
261 | 
262 |     pos = confusion_matrix.sum(1)
263 |     res = confusion_matrix.sum(0)
264 |     tp = np.diag(confusion_matrix)
265 | 
266 |     IU_array = (tp / np.maximum(1.0, pos + res - tp))
267 |     mean_IU = IU_array.mean()
268 | 
269 |     print({'meanIU': mean_IU, 'IU_array': IU_array})
270 |     with open(os.path.join(args.output_dir,"result",'result.txt'), 'w') as f:
271 |         f.write(json.dumps({'meanIU': mean_IU, 'IU_array': IU_array.tolist()}))
272 | 
273 | 
274 | if __name__ == '__main__':
275 |     val()
276 | 


--------------------------------------------------------------------------------