├── LICENSE
├── README.md
├── config
    ├── Config_Crnn.yaml
    ├── Config_DB.yaml
    ├── Config_ICT.yaml
    ├── Config_Maskformer.yaml
    ├── Config_ReBiSe.yaml
    ├── Config_Seg.yaml
    ├── Config_Solo.yaml
    └── Config_Yolox.yaml
├── data
    ├── __init__.py
    ├── boxes.py
    ├── cityscapes
    │   └── cityscapes_info.json
    ├── coco
    │   ├── coco_classes.py
    │   └── coco_stuff_10k_classes.py
    ├── data_utils.py
    ├── dataloader.py
    └── dataset.py
├── model
    ├── __init__.py
    ├── backbone.py
    ├── head.py
    ├── model_factory.py
    ├── models.py
    ├── neck.py
    └── utils
    │   ├── __init__.py
    │   ├── csp_utils.py
    │   ├── maskformer_utils.py
    │   ├── mobilenetv3_utils.py
    │   ├── mobilevit_utils.py
    │   ├── ops.py
    │   ├── rebise_utils.py
    │   ├── res_utils.py
    │   ├── swin_utils.py
    │   └── transformer_utils.py
├── requirements.txt
├── setup.py
├── tools
    ├── __init__.py
    ├── augmentation.py
    ├── boxes.py
    ├── evaluation_tools.py
    ├── loss
    │   ├── SigmoidFocalLoss_cuda.cpython-38-x86_64-linux-gnu.so
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── detr_criterion.cpython-38.pyc
    │   │   ├── detr_matcher.cpython-38.pyc
    │   │   ├── loss.cpython-38.pyc
    │   │   └── loss_utils.cpython-38.pyc
    │   ├── detr_criterion.py
    │   ├── detr_matcher.py
    │   ├── loss.py
    │   ├── loss_utils.py
    │   ├── sigmoid_focal_loss_cuda.cpython-38-x86_64-linux-gnu.so
    │   └── src
    │   │   ├── SigmoidFocalLoss.cpp
    │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   ├── sigmoid_focal_loss.cpp
    │   │   └── sigmoid_focal_loss_cuda.cu
    ├── misc.py
    ├── nms.py
    └── nninit.py
├── train_ddp.py
├── trainer_ddp.py
└── utils
    ├── __init__.py
    ├── chars_v1_p.txt
    ├── common.py
    ├── standard_tools.py
    └── visualize.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pytorch-Devkit
 2 | 
 3 | This repository contains related algorithms in many fields: 
 4 | * object detection
 5 | * segmentation
 6 | * OCR
 7 | * image translation
 8 | 
 9 | ## Table of Contents
10 | - [Pytorch-Devkit](#pytorch-devkit)
11 |   - [Table of Contents](#table-of-contents)
12 |   - [About Details](#about-details)
13 |   - [How to Use](#how-to-use)
14 |   - [Requirements](#requirements)
15 | 
16 | ## About Details
17 | - Support distributed training
18 | - Support mixed precision training
19 | - Support multiple augments
20 | - Backbone: Mobilenetv3 MobileViT Resnet Swintransformer DarkNet StdcNet
21 | - NECK:FPN、PAFPN
22 | - Character Recognition：CRNN
23 | - Character detection：DBNET
24 | - Object detection：YOLOX
25 | - Segmentation：ReBiSegNet MaskFormer SOLOV2
26 | - Image translation：ICT
27 | ## How to Use
28 | 
29 | ```bash
30 | $ python setup.py develop
31 | $ python train_ddp.py -f ./config/Config_Yolox.yaml
32 | ```
33 | 
34 | For details about how to configure related algorithms, see examples.
35 | 
36 | 
37 | ## Requirements
38 | 
39 | * `requirements.txt`


--------------------------------------------------------------------------------
/config/Config_Crnn.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: CRNN
 2 | config_name: 'basic'
 3 | backbone: 'basic'
 4 | 
 5 | optimizer: 'adamW'
 6 | scheduler: 'OneCycleLR'
 7 | 
 8 | lr: 0.001
 9 | batch_size: 2
10 | epoch: 10
11 | num_workers: 0
12 | 
13 | aug: True
14 | augmentation_type: 'cls'
15 | aug_dicts:
16 |   affine: 
17 |     translate_percent: [-0.02, 0.02]
18 |     scale: [0.8, 1.2]
19 |     rotate: [-3, 3]
20 |     mode: 'constant'
21 |     cval: [200, 255]
22 |   addgaussiannoise:   
23 |     loc: 0
24 |     scale: [0.0, 8.0]
25 |     per_channel: 0.5
26 |   multiply:
27 |     mul: [0.4, 1.5]
28 |   grayscale: 
29 |     alpha: [0.5, 1.0]
30 |   gaussianblur:
31 |     sigma: [0.6,1.4]
32 | 
33 | input_size: (32, 256)
34 | num_classes: 6624
35 | 
36 | dataset_name: 'OcrRec'
37 | train_list: ['/home/shaoran/github_source/OCR/text_renderer-master/example_data/output/chn_data/images', '/home/shaoran/github_source/OCR/text_renderer-master/example_data/output/chn_data/chn_data.txt','/home/shaoran/git/Pytorch_Projectization_Tools/utils/chars_v1_p.txt']
38 | test_list: ['/home/shaoran/github_source/OCR/text_renderer-master/example_data/output/chn_data/images', '/home/shaoran/github_source/OCR/text_renderer-master/example_data/output/chn_data/chn_data.txt','/home/shaoran/git/Pytorch_Projectization_Tools/utils/chars_v1_p.txt']


--------------------------------------------------------------------------------
/config/Config_DB.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: DB
 2 | config_name: 'mobilevit_xxs-PAFPN-640x640'
 3 | backbone: 'mobilevit_xxs'
 4 | neck: 'PAFPN'
 5 | 
 6 | optimizer: 'adamW'
 7 | scheduler: 'OneCycleLR'
 8 | 
 9 | lr: 0.001
10 | batch_size: 2
11 | epoch: 10
12 | num_workers: 1
13 | 
14 | aug: True
15 | augmentation_type: 'polygon'
16 | aug_dicts:
17 |   affine: 
18 |     translate_percent: [-0.05, 0.05]
19 |     scale: [0.5, 1.5]
20 |     rotate: [-5, 5]
21 |     mode: 'constant'
22 |     cval: [200, 255]
23 |   fliplr:
24 |     p: 0.5
25 |   flipud:
26 |     p: 0.5
27 |   addgaussiannoise:            
28 |     loc: 0
29 |     scale: [0.0, 8.0]
30 |     per_channel: 0.5
31 |   multiply:          
32 |     mul: [0.4, 1.5]
33 |   grayscale:     
34 |     alpha: [0.5, 1.0]
35 |   gaussianblur:   
36 |     sigma: [0.6,1.4]
37 | 
38 | 
39 | input_size: (640, 640)
40 | 
41 | dataset_name: 'OcrDet'
42 | train_list: ['/home/shaoran/datasets/OCR/DET/datasets/test/img', '/home/shaoran/datasets/OCR/DET/datasets/test/gt']
43 | test_list: ['/home/shaoran/datasets/OCR/DET/datasets/test/img', '/home/shaoran/datasets/OCR/DET/datasets/test/gt']


--------------------------------------------------------------------------------
/config/Config_ICT.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: ICT
 2 | backbone: 'res_18'
 3 | d_model: 512
 4 | num_heads: 8
 5 | dff: 1024
 6 | num_layers: 2
 7 | target_vocab_size: 1426
 8 | max_length: 50
 9 | pad_index: 1
10 | 
11 | optimizer: 'adamW'
12 | scheduler: 'OneCycleLR'
13 | 
14 | lr: 0.001
15 | batch_size: 2
16 | epoch: 10
17 | num_workers: 0
18 | 
19 | aug: False
20 | augmentation_type: 'det'
21 | 
22 | input_size: (64, 320)
23 | num_channel: 3
24 | 
25 | dataset_name: 'IC'
26 | 
27 | images_dir: '/home/shaoran/datasets/starsee/formula/rec/files/latex10k_test_v1_songti/images'
28 | labels_dir: '/home/shaoran/datasets/starsee/formula/rec/files/latex10k_test_v1_songti/matching.txt'
29 | chars_file: '/home/shaoran/datasets/starsee/formula/rec/files/vocab.txt'
30 | 
31 | images_dir_val: '/home/shaoran/datasets/starsee/formula/rec/files/latex10k_test_v1_songti/images'
32 | labels_dir_val: '/home/shaoran/datasets/starsee/formula/rec/files/latex10k_test_v1_songti/matching.txt'
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/config/Config_Maskformer.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: MaskFormer
 2 | config_name: 'mobilenetv3_s'
 3 | backbone: 'mobilenetv3_s'
 4 | 
 5 | optimizer: 'adamW'
 6 | scheduler: 'OneCycleLR'
 7 | 
 8 | lr: 0.0001
 9 | batch_size: 2
10 | epoch: 10
11 | num_workers: 2
12 | 
13 | aug: True
14 | augmentation_type: 'seg'
15 | aug_dicts:
16 |   affine: 
17 |     translate_percent: [-0.2, 0.2]
18 |     scale: [0.5, 1.5]
19 |     rotate: [-5, 5]
20 |     mode: 'constant'
21 |     cval: [200, 255]
22 |   fliplr:
23 |     p: 0.5
24 |   flipud:
25 |     p: 0.5
26 |   multiply:               
27 |     mul: [0.9, 1.1]
28 |   gaussianblur:
29 |     sigma: [0.9,1.1]
30 | 
31 | 
32 | input_size: (512, 512)  
33 | 
34 | dataset_name: 'Coco_stff_10k'
35 | data_dir: '/home/shaoran/datasets/coco/coco_stuff_10k'
36 | num_classes: 171
37 | 
38 | loss_criteria: True


--------------------------------------------------------------------------------
/config/Config_ReBiSe.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: ReBiSe
 2 | backbone: 'stdc_l'
 3 | 
 4 | optimizer: 'adamW'
 5 | scheduler: 'OneCycleLR'
 6 | 
 7 | lr: 0.001
 8 | batch_size: 4
 9 | epoch: 10
10 | num_workers: 1
11 | 
12 | aug: False
13 | augmentation_type: 'seg'
14 | input_size: (512, 1024)
15 | ignore_lb: 255
16 | 
17 | dataset_name: 'Cityscapes'
18 | num_classes: 19
19 | images_dir: '/home/shaoran/datasets/cityscapes' 
20 | json_file: '/home/shaoran/git/Pytorch_Projectization_Tools/data/cityscapes/cityscapes_info.json'   
21 | 
22 | name: 'val'                                                      
23 | 
24 | name_val: 'val'
25 | loss_criteria: False                                                


--------------------------------------------------------------------------------
/config/Config_Seg.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: Seg
 2 | encoder: 'se_resnext50_32x4d'
 3 | weights: 'imagenet'
 4 | activation: 'sigmoid'
 5 | 
 6 | optimizer: 'adamW'
 7 | scheduler: 'OneCycleLR'
 8 | 
 9 | lr: 0.001
10 | batch_size: 2
11 | epoch: 10
12 | num_workers: 2
13 | 
14 | aug: True
15 | augmentation_type: 'seg'
16 | input_size: (512, 512)
17 | 
18 | classes: ['visible_row', 'visible_column', 'unvisible_row', 'unvisible_column']
19 | dataset_name: 'test_seg'
20 | train_list: ['./torch_data/seg_test/images', './torch_data/seg_test/masks_vr', './torch_data/seg_test/masks_vc', './torch_data/seg_test/masks_ur', './torch_data/seg_test/masks_uc']
21 | test_list: ['./torch_data/seg_test/images', './torch_data/seg_test/masks_vr', './torch_data/seg_test/masks_vc', './torch_data/seg_test/masks_ur', './torch_data/seg_test/masks_uc']


--------------------------------------------------------------------------------
/config/Config_Solo.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: SOLO
 2 | config_name: 'res_18-768x512'
 3 | backbone: 'res_18'
 4 | 
 5 | optimizer: 'adamW'
 6 | scheduler: 'OneCycleLR'
 7 | 
 8 | lr: 0.0001
 9 | batch_size: 1
10 | epoch: 50
11 | num_workers: 0
12 | 
13 | aug: False
14 | augmentation_type: 'seg'
15 | aug_dicts:
16 |   affine: 
17 |     translate_percent: [-0.1, 0.1]
18 |     scale: [0.5, 1.5]
19 |     rotate: [-5, 5]
20 |     mode: 'constant'
21 |     cval: [200, 255]
22 |   fliplr:
23 |     p: 0.5
24 |   flipud:
25 |     p: 0.5
26 |   addgaussiannoise: 
27 |     loc: 0
28 |     scale: [0.0, 8.0]
29 |     per_channel: 0.5
30 |   multiply:              
31 |     mul: [0.8, 1.2]
32 | 
33 | 
34 | # (768, 512):scale ((1, 56), (28, 112), (56, 224), (112, 448), (224, 896))
35 | # (852, 512)): scale ((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),   
36 | # (1333,  800):scale ((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048))
37 | input_size: (768, 512)            
38 | 
39 | dataset_name: 'Coco'
40 | training_mission: 'instance'
41 | num_classes: 80
42 | data_dir: '/home/shaoran/datasets/coco2017' 
43 | # 'val_annotation.json'    'instances_val2017.json'
44 | json_file: 'instances_val2017.json'   
45 |  # 'val2017'   ''
46 | name: 'val2017'                                                      
47 | 
48 | # 'val_annotation.json'    'instances_val2017.json'
49 | json_file_val: 'instances_val2017.json'
50 | # 'val2017'  ''
51 | name_val: 'val2017'                                                


--------------------------------------------------------------------------------
/config/Config_Yolox.yaml:
--------------------------------------------------------------------------------
 1 | experiment_name: YOLOX
 2 | config_name: 'dark_s-FPN'
 3 | backbone: 'dark_s'
 4 | neck: 'FPN'
 5 | act: 'lrelu'
 6 | 
 7 | optimizer: 'adamW'
 8 | scheduler: 'OneCycleLR'
 9 | 
10 | lr: 0.0001
11 | batch_size: 2
12 | epoch: 10
13 | num_workers: 1
14 | 
15 | aug: True
16 | augmentation_type: 'det'
17 | aug_dicts:
18 |   affine: 
19 |     translate_percent: [-0.05, 0.05]
20 |     scale: [0.5, 1.5]
21 |     rotate: [-5, 5]
22 |     mode: 'constant'
23 |     cval: [200, 255]
24 |   fliplr:
25 |     p: 0.5
26 |   flipud:
27 |     p: 0.5
28 |   addgaussiannoise: 
29 |     loc: 0
30 |     scale: [0.0, 8.0]
31 |     per_channel: 0.5
32 |   multiply:    
33 |     mul: [0.4, 1.5]
34 |   cutout:       
35 |     nb_iterations: [2, 6]
36 |     size: 0.05
37 |     squared: False
38 |     fill_mode: "constant"
39 |     cval: [0, 255]
40 |     fill_per_channel: 0.5
41 |   add:    
42 |     value: [-40, 40]
43 |     per_channel: 0.5
44 |   grayscale:      
45 |     alpha: [0.5, 1.0]
46 |   gaussianblur:               
47 |     sigma: [0.6,1.4]
48 | 
49 | input_size: (512, 512)     
50 | 
51 | dataset_name: 'Coco'
52 | training_mission: 'det'
53 | num_classes: 80
54 | data_dir: '/home/shaoran/Datasets/coco2017/'
55 | 
56 | json_file: 'instances_val2017.json'
57 | name: 'val2017'
58 | 
59 | json_file_val: 'instances_val2017.json'
60 | name_val: 'val2017'
61 | loss_criteria: True


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/data/__init__.py


--------------------------------------------------------------------------------
/data/boxes.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import numpy as np
 12 | 
 13 | import torch
 14 | import torchvision
 15 | 
 16 | __all__ = [
 17 |     "filter_box",
 18 |     "postprocess",
 19 |     "bboxes_iou",
 20 |     "matrix_iou",
 21 |     "adjust_box_anns",
 22 |     "xyxy2xywh",
 23 |     "xyxy2cxcywh",
 24 | ]
 25 | 
 26 | 
 27 | def filter_box(output, scale_range):
 28 |     """
 29 |     output: (N, 5+class) shape
 30 |     """
 31 |     min_scale, max_scale = scale_range
 32 |     w = output[:, 2] - output[:, 0]
 33 |     h = output[:, 3] - output[:, 1]
 34 |     keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)
 35 |     return output[keep]
 36 | 
 37 | 
 38 | def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
 39 |     box_corner = prediction.new(prediction.shape)
 40 |     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
 41 |     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
 42 |     box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
 43 |     box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
 44 |     prediction[:, :, :4] = box_corner[:, :, :4]
 45 | 
 46 |     output = [None for _ in range(len(prediction))]
 47 |     for i, image_pred in enumerate(prediction):
 48 | 
 49 |         if not image_pred.size(0):
 50 |             continue
 51 |         class_conf, class_pred = torch.max(
 52 |             image_pred[:, 5 : 5 + num_classes], 1, keepdim=True
 53 |         )
 54 | 
 55 |         conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
 56 |         detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
 57 |         detections = detections[conf_mask]
 58 |         if not detections.size(0):
 59 |             continue
 60 | 
 61 |         nms_out_index = torchvision.ops.batched_nms(
 62 |             detections[:, :4],
 63 |             detections[:, 4] * detections[:, 5],
 64 |             detections[:, 6],
 65 |             nms_thre,
 66 |         )
 67 |         detections = detections[nms_out_index]
 68 |         if output[i] is None:
 69 |             output[i] = detections
 70 |         else:
 71 |             output[i] = torch.cat((output[i], detections))
 72 | 
 73 |     return output
 74 | 
 75 | 
 76 | def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
 77 |     if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
 78 |         raise IndexError
 79 | 
 80 |     if xyxy:
 81 |         tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
 82 |         br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
 83 |         area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
 84 |         area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
 85 |     else:
 86 |         tl = torch.max(
 87 |             (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
 88 |             (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
 89 |         )
 90 |         br = torch.min(
 91 |             (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
 92 |             (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
 93 |         )
 94 | 
 95 |         area_a = torch.prod(bboxes_a[:, 2:], 1)
 96 |         area_b = torch.prod(bboxes_b[:, 2:], 1)
 97 |     en = (tl < br).type(tl.type()).prod(dim=2)
 98 |     area_i = torch.prod(br - tl, 2) * en
 99 |     return area_i / (area_a[:, None] + area_b - area_i)
100 | 
101 | 
102 | def matrix_iou(a, b):
103 |     """
104 |     return iou of a and b, numpy version for data augenmentation
105 |     """
106 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
107 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
108 | 
109 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
110 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
111 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
112 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
113 | 
114 | 
115 | def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):
116 |     bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)
117 |     bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)
118 |     return bbox
119 | 
120 | 
121 | def xyxy2xywh(bboxes):
122 |     bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
123 |     bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
124 |     return bboxes
125 | 
126 | 
127 | def xyxy2cxcywh(bboxes):
128 |     bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
129 |     bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
130 |     bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
131 |     bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
132 |     return bboxes
133 | 


--------------------------------------------------------------------------------
/data/cityscapes/cityscapes_info.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "hasInstances": false,
  4 |     "category": "void",
  5 |     "catid": 0,
  6 |     "name": "unlabeled",
  7 |     "ignoreInEval": true,
  8 |     "id": 0,
  9 |     "color": [
 10 |       0,
 11 |       0,
 12 |       0
 13 |     ],
 14 |     "trainId": 255
 15 |   },
 16 |   {
 17 |     "hasInstances": false,
 18 |     "category": "void",
 19 |     "catid": 0,
 20 |     "name": "ego vehicle",
 21 |     "ignoreInEval": true,
 22 |     "id": 1,
 23 |     "color": [
 24 |       0,
 25 |       0,
 26 |       0
 27 |     ],
 28 |     "trainId": 255
 29 |   },
 30 |   {
 31 |     "hasInstances": false,
 32 |     "category": "void",
 33 |     "catid": 0,
 34 |     "name": "rectification border",
 35 |     "ignoreInEval": true,
 36 |     "id": 2,
 37 |     "color": [
 38 |       0,
 39 |       0,
 40 |       0
 41 |     ],
 42 |     "trainId": 255
 43 |   },
 44 |   {
 45 |     "hasInstances": false,
 46 |     "category": "void",
 47 |     "catid": 0,
 48 |     "name": "out of roi",
 49 |     "ignoreInEval": true,
 50 |     "id": 3,
 51 |     "color": [
 52 |       0,
 53 |       0,
 54 |       0
 55 |     ],
 56 |     "trainId": 255
 57 |   },
 58 |   {
 59 |     "hasInstances": false,
 60 |     "category": "void",
 61 |     "catid": 0,
 62 |     "name": "static",
 63 |     "ignoreInEval": true,
 64 |     "id": 4,
 65 |     "color": [
 66 |       0,
 67 |       0,
 68 |       0
 69 |     ],
 70 |     "trainId": 255
 71 |   },
 72 |   {
 73 |     "hasInstances": false,
 74 |     "category": "void",
 75 |     "catid": 0,
 76 |     "name": "dynamic",
 77 |     "ignoreInEval": true,
 78 |     "id": 5,
 79 |     "color": [
 80 |       111,
 81 |       74,
 82 |       0
 83 |     ],
 84 |     "trainId": 255
 85 |   },
 86 |   {
 87 |     "hasInstances": false,
 88 |     "category": "void",
 89 |     "catid": 0,
 90 |     "name": "ground",
 91 |     "ignoreInEval": true,
 92 |     "id": 6,
 93 |     "color": [
 94 |       81,
 95 |       0,
 96 |       81
 97 |     ],
 98 |     "trainId": 255
 99 |   },
100 |   {
101 |     "hasInstances": false,
102 |     "category": "flat",
103 |     "catid": 1,
104 |     "name": "road",
105 |     "ignoreInEval": false,
106 |     "id": 7,
107 |     "color": [
108 |       128,
109 |       64,
110 |       128
111 |     ],
112 |     "trainId": 0
113 |   },
114 |   {
115 |     "hasInstances": false,
116 |     "category": "flat",
117 |     "catid": 1,
118 |     "name": "sidewalk",
119 |     "ignoreInEval": false,
120 |     "id": 8,
121 |     "color": [
122 |       244,
123 |       35,
124 |       232
125 |     ],
126 |     "trainId": 1
127 |   },
128 |   {
129 |     "hasInstances": false,
130 |     "category": "flat",
131 |     "catid": 1,
132 |     "name": "parking",
133 |     "ignoreInEval": true,
134 |     "id": 9,
135 |     "color": [
136 |       250,
137 |       170,
138 |       160
139 |     ],
140 |     "trainId": 255
141 |   },
142 |   {
143 |     "hasInstances": false,
144 |     "category": "flat",
145 |     "catid": 1,
146 |     "name": "rail track",
147 |     "ignoreInEval": true,
148 |     "id": 10,
149 |     "color": [
150 |       230,
151 |       150,
152 |       140
153 |     ],
154 |     "trainId": 255
155 |   },
156 |   {
157 |     "hasInstances": false,
158 |     "category": "construction",
159 |     "catid": 2,
160 |     "name": "building",
161 |     "ignoreInEval": false,
162 |     "id": 11,
163 |     "color": [
164 |       70,
165 |       70,
166 |       70
167 |     ],
168 |     "trainId": 2
169 |   },
170 |   {
171 |     "hasInstances": false,
172 |     "category": "construction",
173 |     "catid": 2,
174 |     "name": "wall",
175 |     "ignoreInEval": false,
176 |     "id": 12,
177 |     "color": [
178 |       102,
179 |       102,
180 |       156
181 |     ],
182 |     "trainId": 3
183 |   },
184 |   {
185 |     "hasInstances": false,
186 |     "category": "construction",
187 |     "catid": 2,
188 |     "name": "fence",
189 |     "ignoreInEval": false,
190 |     "id": 13,
191 |     "color": [
192 |       190,
193 |       153,
194 |       153
195 |     ],
196 |     "trainId": 4
197 |   },
198 |   {
199 |     "hasInstances": false,
200 |     "category": "construction",
201 |     "catid": 2,
202 |     "name": "guard rail",
203 |     "ignoreInEval": true,
204 |     "id": 14,
205 |     "color": [
206 |       180,
207 |       165,
208 |       180
209 |     ],
210 |     "trainId": 255
211 |   },
212 |   {
213 |     "hasInstances": false,
214 |     "category": "construction",
215 |     "catid": 2,
216 |     "name": "bridge",
217 |     "ignoreInEval": true,
218 |     "id": 15,
219 |     "color": [
220 |       150,
221 |       100,
222 |       100
223 |     ],
224 |     "trainId": 255
225 |   },
226 |   {
227 |     "hasInstances": false,
228 |     "category": "construction",
229 |     "catid": 2,
230 |     "name": "tunnel",
231 |     "ignoreInEval": true,
232 |     "id": 16,
233 |     "color": [
234 |       150,
235 |       120,
236 |       90
237 |     ],
238 |     "trainId": 255
239 |   },
240 |   {
241 |     "hasInstances": false,
242 |     "category": "object",
243 |     "catid": 3,
244 |     "name": "pole",
245 |     "ignoreInEval": false,
246 |     "id": 17,
247 |     "color": [
248 |       153,
249 |       153,
250 |       153
251 |     ],
252 |     "trainId": 5
253 |   },
254 |   {
255 |     "hasInstances": false,
256 |     "category": "object",
257 |     "catid": 3,
258 |     "name": "polegroup",
259 |     "ignoreInEval": true,
260 |     "id": 18,
261 |     "color": [
262 |       153,
263 |       153,
264 |       153
265 |     ],
266 |     "trainId": 255
267 |   },
268 |   {
269 |     "hasInstances": false,
270 |     "category": "object",
271 |     "catid": 3,
272 |     "name": "traffic light",
273 |     "ignoreInEval": false,
274 |     "id": 19,
275 |     "color": [
276 |       250,
277 |       170,
278 |       30
279 |     ],
280 |     "trainId": 6
281 |   },
282 |   {
283 |     "hasInstances": false,
284 |     "category": "object",
285 |     "catid": 3,
286 |     "name": "traffic sign",
287 |     "ignoreInEval": false,
288 |     "id": 20,
289 |     "color": [
290 |       220,
291 |       220,
292 |       0
293 |     ],
294 |     "trainId": 7
295 |   },
296 |   {
297 |     "hasInstances": false,
298 |     "category": "nature",
299 |     "catid": 4,
300 |     "name": "vegetation",
301 |     "ignoreInEval": false,
302 |     "id": 21,
303 |     "color": [
304 |       107,
305 |       142,
306 |       35
307 |     ],
308 |     "trainId": 8
309 |   },
310 |   {
311 |     "hasInstances": false,
312 |     "category": "nature",
313 |     "catid": 4,
314 |     "name": "terrain",
315 |     "ignoreInEval": false,
316 |     "id": 22,
317 |     "color": [
318 |       152,
319 |       251,
320 |       152
321 |     ],
322 |     "trainId": 9
323 |   },
324 |   {
325 |     "hasInstances": false,
326 |     "category": "sky",
327 |     "catid": 5,
328 |     "name": "sky",
329 |     "ignoreInEval": false,
330 |     "id": 23,
331 |     "color": [
332 |       70,
333 |       130,
334 |       180
335 |     ],
336 |     "trainId": 10
337 |   },
338 |   {
339 |     "hasInstances": true,
340 |     "category": "human",
341 |     "catid": 6,
342 |     "name": "person",
343 |     "ignoreInEval": false,
344 |     "id": 24,
345 |     "color": [
346 |       220,
347 |       20,
348 |       60
349 |     ],
350 |     "trainId": 11
351 |   },
352 |   {
353 |     "hasInstances": true,
354 |     "category": "human",
355 |     "catid": 6,
356 |     "name": "rider",
357 |     "ignoreInEval": false,
358 |     "id": 25,
359 |     "color": [
360 |       255,
361 |       0,
362 |       0
363 |     ],
364 |     "trainId": 12
365 |   },
366 |   {
367 |     "hasInstances": true,
368 |     "category": "vehicle",
369 |     "catid": 7,
370 |     "name": "car",
371 |     "ignoreInEval": false,
372 |     "id": 26,
373 |     "color": [
374 |       0,
375 |       0,
376 |       142
377 |     ],
378 |     "trainId": 13
379 |   },
380 |   {
381 |     "hasInstances": true,
382 |     "category": "vehicle",
383 |     "catid": 7,
384 |     "name": "truck",
385 |     "ignoreInEval": false,
386 |     "id": 27,
387 |     "color": [
388 |       0,
389 |       0,
390 |       70
391 |     ],
392 |     "trainId": 14
393 |   },
394 |   {
395 |     "hasInstances": true,
396 |     "category": "vehicle",
397 |     "catid": 7,
398 |     "name": "bus",
399 |     "ignoreInEval": false,
400 |     "id": 28,
401 |     "color": [
402 |       0,
403 |       60,
404 |       100
405 |     ],
406 |     "trainId": 15
407 |   },
408 |   {
409 |     "hasInstances": true,
410 |     "category": "vehicle",
411 |     "catid": 7,
412 |     "name": "caravan",
413 |     "ignoreInEval": true,
414 |     "id": 29,
415 |     "color": [
416 |       0,
417 |       0,
418 |       90
419 |     ],
420 |     "trainId": 255
421 |   },
422 |   {
423 |     "hasInstances": true,
424 |     "category": "vehicle",
425 |     "catid": 7,
426 |     "name": "trailer",
427 |     "ignoreInEval": true,
428 |     "id": 30,
429 |     "color": [
430 |       0,
431 |       0,
432 |       110
433 |     ],
434 |     "trainId": 255
435 |   },
436 |   {
437 |     "hasInstances": true,
438 |     "category": "vehicle",
439 |     "catid": 7,
440 |     "name": "train",
441 |     "ignoreInEval": false,
442 |     "id": 31,
443 |     "color": [
444 |       0,
445 |       80,
446 |       100
447 |     ],
448 |     "trainId": 16
449 |   },
450 |   {
451 |     "hasInstances": true,
452 |     "category": "vehicle",
453 |     "catid": 7,
454 |     "name": "motorcycle",
455 |     "ignoreInEval": false,
456 |     "id": 32,
457 |     "color": [
458 |       0,
459 |       0,
460 |       230
461 |     ],
462 |     "trainId": 17
463 |   },
464 |   {
465 |     "hasInstances": true,
466 |     "category": "vehicle",
467 |     "catid": 7,
468 |     "name": "bicycle",
469 |     "ignoreInEval": false,
470 |     "id": 33,
471 |     "color": [
472 |       119,
473 |       11,
474 |       32
475 |     ],
476 |     "trainId": 18
477 |   },
478 |   {
479 |     "hasInstances": false,
480 |     "category": "vehicle",
481 |     "catid": 7,
482 |     "name": "license plate",
483 |     "ignoreInEval": true,
484 |     "id": -1,
485 |     "color": [
486 |       0,
487 |       0,
488 |       142
489 |     ],
490 |     "trainId": -1
491 |   }
492 | ]


--------------------------------------------------------------------------------
/data/coco/coco_classes.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | # coding: utf-8
 11 | 
 12 | COCO_CLASSES = (
 13 |     "person",
 14 |     "bicycle",
 15 |     "car",
 16 |     "motorcycle",
 17 |     "airplane",
 18 |     "bus",
 19 |     "train",
 20 |     "truck",
 21 |     "boat",
 22 |     "traffic light",
 23 |     "fire hydrant",
 24 |     "stop sign",
 25 |     "parking meter",
 26 |     "bench",
 27 |     "bird",
 28 |     "cat",
 29 |     "dog",
 30 |     "horse",
 31 |     "sheep",
 32 |     "cow",
 33 |     "elephant",
 34 |     "bear",
 35 |     "zebra",
 36 |     "giraffe",
 37 |     "backpack",
 38 |     "umbrella",
 39 |     "handbag",
 40 |     "tie",
 41 |     "suitcase",
 42 |     "frisbee",
 43 |     "skis",
 44 |     "snowboard",
 45 |     "sports ball",
 46 |     "kite",
 47 |     "baseball bat",
 48 |     "baseball glove",
 49 |     "skateboard",
 50 |     "surfboard",
 51 |     "tennis racket",
 52 |     "bottle",
 53 |     "wine glass",
 54 |     "cup",
 55 |     "fork",
 56 |     "knife",
 57 |     "spoon",
 58 |     "bowl",
 59 |     "banana",
 60 |     "apple",
 61 |     "sandwich",
 62 |     "orange",
 63 |     "broccoli",
 64 |     "carrot",
 65 |     "hot dog",
 66 |     "pizza",
 67 |     "donut",
 68 |     "cake",
 69 |     "chair",
 70 |     "couch",
 71 |     "potted plant",
 72 |     "bed",
 73 |     "dining table",
 74 |     "toilet",
 75 |     "tv",
 76 |     "laptop",
 77 |     "mouse",
 78 |     "remote",
 79 |     "keyboard",
 80 |     "cell phone",
 81 |     "microwave",
 82 |     "oven",
 83 |     "toaster",
 84 |     "sink",
 85 |     "refrigerator",
 86 |     "book",
 87 |     "clock",
 88 |     "vase",
 89 |     "scissors",
 90 |     "teddy bear",
 91 |     "hair drier",
 92 |     "toothbrush",
 93 | )
 94 | 
 95 | COCO_LABEL = [1,  2,  3,  4,  5,  6,  7,  8,
 96 |                    9, 10, 11, 13, 14, 15, 16, 17,
 97 |                   18, 19, 20, 21, 22, 23, 24, 25,
 98 |                   27, 28, 31, 32, 33, 34, 35, 36,
 99 |                   37, 38, 39, 40, 41, 42, 43, 44,
100 |                   46, 47, 48, 49, 50, 51, 52, 53,
101 |                   54, 55, 56, 57, 58, 59, 60, 61,
102 |                   62, 63, 64, 65, 67, 70, 72, 73,
103 |                   74, 75, 76, 77, 78, 79, 80, 81,
104 |                   82, 84, 85, 86, 87, 88, 89, 90]
105 | 
106 | COCO_LABEL_MAP = { 1:  1,  2:  2,  3:  3,  4:  4,  5:  5,  6:  6,  7:  7,  8:  8,
107 |                    9:  9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16,
108 |                   18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24,
109 |                   27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, 36: 32,
110 |                   37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40,
111 |                   46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48,
112 |                   54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56,
113 |                   62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64,
114 |                   74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72,
115 |                   82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80}


--------------------------------------------------------------------------------
/data/coco/coco_stuff_10k_classes.py:
--------------------------------------------------------------------------------
  1 | COCO_STUFF_10k_CATEGORIES = [
  2 |     {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
  3 |     {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
  4 |     {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
  5 |     {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
  6 |     {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
  7 |     {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
  8 |     {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
  9 |     {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
 10 |     {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
 11 |     {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
 12 |     {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
 13 |     {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
 14 |     {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
 15 |     {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
 16 |     {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
 17 |     {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
 18 |     {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
 19 |     {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
 20 |     {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
 21 |     {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
 22 |     {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
 23 |     {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
 24 |     {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
 25 |     {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
 26 |     {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
 27 |     {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
 28 |     {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
 29 |     {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
 30 |     {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
 31 |     {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
 32 |     {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
 33 |     {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
 34 |     {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
 35 |     {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
 36 |     {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
 37 |     {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
 38 |     {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
 39 |     {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
 40 |     {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
 41 |     {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
 42 |     {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
 43 |     {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
 44 |     {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
 45 |     {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
 46 |     {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
 47 |     {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
 48 |     {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
 49 |     {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
 50 |     {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
 51 |     {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
 52 |     {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
 53 |     {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
 54 |     {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
 55 |     {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
 56 |     {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
 57 |     {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
 58 |     {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
 59 |     {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
 60 |     {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
 61 |     {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
 62 |     {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
 63 |     {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
 64 |     {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
 65 |     {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
 66 |     {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
 67 |     {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
 68 |     {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
 69 |     {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
 70 |     {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
 71 |     {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
 72 |     {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
 73 |     {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
 74 |     {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
 75 |     {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
 76 |     {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
 77 |     {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
 78 |     {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
 79 |     {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
 80 |     {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
 81 |     {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
 82 |     {"id": 92, "name": "banner", "supercategory": "textile"},
 83 |     {"id": 93, "name": "blanket", "supercategory": "textile"},
 84 |     {"id": 94, "name": "branch", "supercategory": "plant"},
 85 |     {"id": 95, "name": "bridge", "supercategory": "building"},
 86 |     {"id": 96, "name": "building-other", "supercategory": "building"},
 87 |     {"id": 97, "name": "bush", "supercategory": "plant"},
 88 |     {"id": 98, "name": "cabinet", "supercategory": "furniture-stuff"},
 89 |     {"id": 99, "name": "cage", "supercategory": "structural"},
 90 |     {"id": 100, "name": "cardboard", "supercategory": "raw-material"},
 91 |     {"id": 101, "name": "carpet", "supercategory": "floor"},
 92 |     {"id": 102, "name": "ceiling-other", "supercategory": "ceiling"},
 93 |     {"id": 103, "name": "ceiling-tile", "supercategory": "ceiling"},
 94 |     {"id": 104, "name": "cloth", "supercategory": "textile"},
 95 |     {"id": 105, "name": "clothes", "supercategory": "textile"},
 96 |     {"id": 106, "name": "clouds", "supercategory": "sky"},
 97 |     {"id": 107, "name": "counter", "supercategory": "furniture-stuff"},
 98 |     {"id": 108, "name": "cupboard", "supercategory": "furniture-stuff"},
 99 |     {"id": 109, "name": "curtain", "supercategory": "textile"},
100 |     {"id": 110, "name": "desk-stuff", "supercategory": "furniture-stuff"},
101 |     {"id": 111, "name": "dirt", "supercategory": "ground"},
102 |     {"id": 112, "name": "door-stuff", "supercategory": "furniture-stuff"},
103 |     {"id": 113, "name": "fence", "supercategory": "structural"},
104 |     {"id": 114, "name": "floor-marble", "supercategory": "floor"},
105 |     {"id": 115, "name": "floor-other", "supercategory": "floor"},
106 |     {"id": 116, "name": "floor-stone", "supercategory": "floor"},
107 |     {"id": 117, "name": "floor-tile", "supercategory": "floor"},
108 |     {"id": 118, "name": "floor-wood", "supercategory": "floor"},
109 |     {"id": 119, "name": "flower", "supercategory": "plant"},
110 |     {"id": 120, "name": "fog", "supercategory": "water"},
111 |     {"id": 121, "name": "food-other", "supercategory": "food-stuff"},
112 |     {"id": 122, "name": "fruit", "supercategory": "food-stuff"},
113 |     {"id": 123, "name": "furniture-other", "supercategory": "furniture-stuff"},
114 |     {"id": 124, "name": "grass", "supercategory": "plant"},
115 |     {"id": 125, "name": "gravel", "supercategory": "ground"},
116 |     {"id": 126, "name": "ground-other", "supercategory": "ground"},
117 |     {"id": 127, "name": "hill", "supercategory": "solid"},
118 |     {"id": 128, "name": "house", "supercategory": "building"},
119 |     {"id": 129, "name": "leaves", "supercategory": "plant"},
120 |     {"id": 130, "name": "light", "supercategory": "furniture-stuff"},
121 |     {"id": 131, "name": "mat", "supercategory": "textile"},
122 |     {"id": 132, "name": "metal", "supercategory": "raw-material"},
123 |     {"id": 133, "name": "mirror-stuff", "supercategory": "furniture-stuff"},
124 |     {"id": 134, "name": "moss", "supercategory": "plant"},
125 |     {"id": 135, "name": "mountain", "supercategory": "solid"},
126 |     {"id": 136, "name": "mud", "supercategory": "ground"},
127 |     {"id": 137, "name": "napkin", "supercategory": "textile"},
128 |     {"id": 138, "name": "net", "supercategory": "structural"},
129 |     {"id": 139, "name": "paper", "supercategory": "raw-material"},
130 |     {"id": 140, "name": "pavement", "supercategory": "ground"},
131 |     {"id": 141, "name": "pillow", "supercategory": "textile"},
132 |     {"id": 142, "name": "plant-other", "supercategory": "plant"},
133 |     {"id": 143, "name": "plastic", "supercategory": "raw-material"},
134 |     {"id": 144, "name": "platform", "supercategory": "ground"},
135 |     {"id": 145, "name": "playingfield", "supercategory": "ground"},
136 |     {"id": 146, "name": "railing", "supercategory": "structural"},
137 |     {"id": 147, "name": "railroad", "supercategory": "ground"},
138 |     {"id": 148, "name": "river", "supercategory": "water"},
139 |     {"id": 149, "name": "road", "supercategory": "ground"},
140 |     {"id": 150, "name": "rock", "supercategory": "solid"},
141 |     {"id": 151, "name": "roof", "supercategory": "building"},
142 |     {"id": 152, "name": "rug", "supercategory": "textile"},
143 |     {"id": 153, "name": "salad", "supercategory": "food-stuff"},
144 |     {"id": 154, "name": "sand", "supercategory": "ground"},
145 |     {"id": 155, "name": "sea", "supercategory": "water"},
146 |     {"id": 156, "name": "shelf", "supercategory": "furniture-stuff"},
147 |     {"id": 157, "name": "sky-other", "supercategory": "sky"},
148 |     {"id": 158, "name": "skyscraper", "supercategory": "building"},
149 |     {"id": 159, "name": "snow", "supercategory": "ground"},
150 |     {"id": 160, "name": "solid-other", "supercategory": "solid"},
151 |     {"id": 161, "name": "stairs", "supercategory": "furniture-stuff"},
152 |     {"id": 162, "name": "stone", "supercategory": "solid"},
153 |     {"id": 163, "name": "straw", "supercategory": "plant"},
154 |     {"id": 164, "name": "structural-other", "supercategory": "structural"},
155 |     {"id": 165, "name": "table", "supercategory": "furniture-stuff"},
156 |     {"id": 166, "name": "tent", "supercategory": "building"},
157 |     {"id": 167, "name": "textile-other", "supercategory": "textile"},
158 |     {"id": 168, "name": "towel", "supercategory": "textile"},
159 |     {"id": 169, "name": "tree", "supercategory": "plant"},
160 |     {"id": 170, "name": "vegetable", "supercategory": "food-stuff"},
161 |     {"id": 171, "name": "wall-brick", "supercategory": "wall"},
162 |     {"id": 172, "name": "wall-concrete", "supercategory": "wall"},
163 |     {"id": 173, "name": "wall-other", "supercategory": "wall"},
164 |     {"id": 174, "name": "wall-panel", "supercategory": "wall"},
165 |     {"id": 175, "name": "wall-stone", "supercategory": "wall"},
166 |     {"id": 176, "name": "wall-tile", "supercategory": "wall"},
167 |     {"id": 177, "name": "wall-wood", "supercategory": "wall"},
168 |     {"id": 178, "name": "water-other", "supercategory": "water"},
169 |     {"id": 179, "name": "waterdrops", "supercategory": "water"},
170 |     {"id": 180, "name": "window-blind", "supercategory": "window"},
171 |     {"id": 181, "name": "window-other", "supercategory": "window"},
172 |     {"id": 182, "name": "wood", "supercategory": "solid"},
173 | ]
174 | 


--------------------------------------------------------------------------------
/data/dataloader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | import torch
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | import torchvision
 14 | from torchvision import datasets
 15 | from torchvision.transforms import ToTensor,  Resize
 16 | 
 17 | from loguru import logger
 18 | import os
 19 | import sys
 20 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 21 | sys.path.append(__dir__)
 22 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
 23 | 
 24 | from tools.augmentation import Augmentation, BaseAugmentation
 25 | from data.dataset import CustomDataset,  CustomDataset_seg, OcrDetDataset, OcrRecDataset, CocoDataset,\
 26 |     ImageCaptionDataset, CityscapesDataset, CocoStuff_10kDataset
 27 | from data.data_utils import TrainTransform, TrainTransform_Instance
 28 | 
 29 | trans = torchvision.transforms.Compose([ ToTensor(), Resize(size=(224, 224)) ])
 30 | 
 31 | class Data_loader():
 32 |     def __init__(self, config, args): 
 33 |         self.is_main_process = True if  args.rank== 0 else False
 34 |         self.config = config
 35 |         self.train_list = config.get('train_list', None)
 36 |         self.test_list = config.get('test_list', None)
 37 |         self.dataset_name = self.config['dataset_name']
 38 |         augmentation_type = self.config.get('augmentation_type', None)
 39 |         aug_dicts = config.get('aug_dicts', None)
 40 |         if aug_dicts is not None:
 41 |             aug=BaseAugmentation(config['aug_dicts'])()
 42 |             if self.is_main_process:
 43 |                 print('Using config augmentation')
 44 |         else:
 45 |             aug = None
 46 |             if self.is_main_process:
 47 |                 print('Using default augmentation')
 48 |         self.augmentation = Augmentation(task_type='cls' if augmentation_type is None else augmentation_type, aug=aug)
 49 |         
 50 |         
 51 |     def get_train(self, distributed=False, nprocs=1):
 52 |         if self.is_main_process:
 53 |             logger.info("Trian Dataset name: {}".format(self.dataset_name))
 54 |         if self.dataset_name == 'custom':
 55 |             train_dataset = CustomDataset(lists=self.train_list, shape=eval(self.config['input_size']),
 56 |                                           augmentation=self.augmentation if self.config['aug'] else None, dtype='train'
 57 |                                           )
 58 |         elif self.dataset_name == 'test_cls':
 59 |             train_dataset = datasets.CIFAR10(
 60 |                             root="../torch_data",
 61 |                             train=True,
 62 |                             download=True,
 63 |                             transform=trans
 64 |                             )
 65 |         elif self.dataset_name == 'test_seg':
 66 |             train_dataset = CustomDataset_seg(lists=self.train_list, shape=eval(self.config['input_size']),
 67 |                                               augmentation=self.augmentation if self.config['aug'] else None, dtype='train'
 68 |                                               )    
 69 |         elif self.dataset_name == 'OcrDet':
 70 |             train_dataset = OcrDetDataset(lists=self.train_list, shape=eval(self.config['input_size']),
 71 |                                               augmentation=self.augmentation if self.config['aug'] else None, dtype='train'
 72 |                                               )  
 73 |         elif self.dataset_name == 'OcrRec':
 74 |             train_dataset = OcrRecDataset( lists=self.train_list, shape=eval(self.config['input_size']),
 75 |                                                 augmentation=self.augmentation if self.config['aug'] else None, dtype='train'
 76 |                                               )   
 77 |         elif self.dataset_name == 'Coco':
 78 |             train_dataset = CocoDataset(data_dir=self.config['data_dir'],  json_file=self.config['json_file'], name=self.config['name'],
 79 |                                                                         shape=eval(self.config['input_size']), augmentation=self.augmentation if self.config['aug'] else None,
 80 |                                                                         preproc=TrainTransform(rgb_means=(0.485, 0.456, 0.406),
 81 |                                                                                                                             std=(0.229, 0.224, 0.225),
 82 |                                                                                                                             max_labels=50
 83 |                                                                                                                             ) if self.config.get('training_mission', 'det') != 'instance' else TrainTransform_Instance(rgb_means=(0.485, 0.456, 0.406),
 84 |                                                                                                                             std=(0.229, 0.224, 0.225)),
 85 |                                                                         mode= self.config.get('training_mission', 'det'),
 86 |                                                                         dtype='train'
 87 |                                                                         )
 88 |         elif self.dataset_name == 'IC':
 89 |             train_dataset = ImageCaptionDataset(images_dir=self.config['images_dir'],  labels_dir=self.config['labels_dir'], chars_file=self.config['chars_file'],
 90 |                                                                         shape=eval(self.config['input_size']), augmentation=self.augmentation if self.config['aug'] else None,
 91 |                                                                         max_length = int(self.config.get('max_length', 50)),
 92 |                                                                         num_channel= int(self.config.get('num_channel', 3)),
 93 |                                                                         dtype='train'
 94 |                                                                         )
 95 |         elif self.dataset_name == 'Cityscapes':
 96 |             train_dataset = CityscapesDataset(data_dir=self.config['images_dir'], json_file=self.config.get('json_file', None),  shape=eval(self.config['input_size']),
 97 |                                                                                     augmentation=self.augmentation if self.config['aug'] else None,  name=self.config['name'], 
 98 |                                                                                     dtype='train', ignore_lb=int(self.config.get('ignore_lb', 255)),
 99 |                                                                                     scales=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5)
100 |                                                                         )
101 | 
102 |         elif self.dataset_name == 'Coco_stff_10k':
103 |             train_dataset = CocoStuff_10kDataset(data_dir=self.config['data_dir'], shape=eval(self.config['input_size']),
104 |                                                                                     dtype='train',  mean=None, std=None
105 |                                                                                     )
106 | 
107 |         else:
108 |             raise NotImplementedError('{} dataset_name not supported.'.format(self.dataset_name))
109 | 
110 |         if self.is_main_process:
111 |             logger.info("Train Dataset samples: {}".format(len(train_dataset)))
112 | 
113 |         if not distributed:
114 |             return DataLoader(train_dataset, batch_size=self.config['batch_size'], shuffle=self.config.get('shuffle', True),
115 |                             num_workers=self.config['num_workers'], collate_fn=train_dataset.get_collate_fn())
116 | 
117 |         else:
118 |             assert self.config['batch_size']  % nprocs == 0
119 |             train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
120 |             sampler_flag = self.config.get('sampler', True)
121 |             train_loader = DataLoader(train_dataset,
122 |                                                     batch_size=self.config['batch_size']  // nprocs,
123 |                                                     num_workers=max(self.config['num_workers'] // nprocs, 1),
124 |                                                     pin_memory=True,
125 |                                                     sampler=train_sampler if sampler_flag else None,
126 |                                                     collate_fn=train_dataset.get_collate_fn())
127 |             return train_loader, train_sampler
128 | 
129 |     
130 |     def get_test(self, distributed=False, nprocs=1):
131 |         if self.is_main_process:
132 |             logger.info("Test Dataset name: {}".format(self.dataset_name))
133 | 
134 |         if self.dataset_name == 'custom':
135 |             test_dataset = CustomDataset(lists=self.test_list, shape=eval(self.config['input_size']),
136 |                                           augmentation=None, dtype='val'
137 |                                           )
138 |         elif self.dataset_name == 'test_cls':
139 |             test_dataset = datasets.CIFAR10(
140 |                            root="../torch_data",
141 |                            train=False,
142 |                            download=True,
143 |                            transform=trans
144 |                            )
145 |         elif self.dataset_name == 'test_seg':
146 |             test_dataset = CustomDataset_seg(lists=self.test_list, shape=eval(self.config['input_size']),
147 |                                              augmentation=None, dtype='val'
148 |                                              )
149 |         elif self.dataset_name == 'OcrDet':
150 |             test_dataset = OcrDetDataset(lists=self.test_list, shape=eval(self.config['input_size']),
151 |                                               augmentation=None, dtype='val'
152 |                                               ) 
153 |         elif self.dataset_name == 'OcrRec':
154 |             test_dataset = OcrRecDataset( lists=self.test_list, shape=eval(self.config['input_size']),
155 |                                                 augmentation=None, dtype='val'
156 |                                               )   
157 |         elif self.dataset_name == 'Coco':
158 |             test_dataset = CocoDataset(data_dir=self.config['data_dir'],  json_file=self.config['json_file_val'], name=self.config['name_val'],
159 |                                                                         shape=eval(self.config['input_size']), augmentation=None,
160 |                                                                         preproc=TrainTransform(rgb_means=(0.485, 0.456, 0.406),
161 |                                                                                                                             std=(0.229, 0.224, 0.225),
162 |                                                                                                                             max_labels=50
163 |                                                                                                                             ) if self.config.get('training_mission', 'det') != 'instance' else TrainTransform_Instance(rgb_means=(0.485, 0.456, 0.406),
164 |                                                                                                                             std=(0.229, 0.224, 0.225), with_box=True),
165 |                                                                         mode= self.config.get('training_mission', 'det'),
166 |                                                                         dtype='val'
167 |                                                                         )
168 |         elif self.dataset_name == 'IC':
169 |             test_dataset = ImageCaptionDataset(images_dir=self.config['images_dir_val'],  labels_dir=self.config['labels_dir_val'], chars_file=self.config['chars_file'],
170 |                                                                         shape=eval(self.config['input_size']), augmentation= None,
171 |                                                                         max_length = int(self.config.get('max_length', 50)),
172 |                                                                         num_channel= int(self.config.get('num_channel', 3)),
173 |                                                                         dtype='val'
174 |                                                                         )
175 | 
176 |         elif self.dataset_name == 'Cityscapes':
177 |             test_dataset = CityscapesDataset(data_dir=self.config['images_dir'], json_file=self.config.get('json_file', None),  shape=eval(self.config['input_size']),
178 |                                                                                     augmentation=None,  name=self.config['name_val'], 
179 |                                                                                     dtype='val', ignore_lb=int(self.config.get('ignore_lb', 255)),
180 |                                                                                     scales=(0.125, 0.25, 0.375, 0.5, 0.675, 0.75, 0.875, 1.0, 1.25, 1.5)
181 |                                                                         )
182 |         elif self.dataset_name == 'Coco_stff_10k':
183 |             test_dataset = CocoStuff_10kDataset(data_dir=self.config['data_dir'], shape=eval(self.config['input_size']),
184 |                                                                                     dtype='test',  mean=self.config.get('mean', None), std=self.config.get('std', None)
185 |                                                                                     )
186 | 
187 |         else:
188 |             raise NotImplementedError('{} dataset_name not supported.'.format(self.dataset_name))
189 |         if self.is_main_process:
190 |             logger.info("Test Dataset samples: {}".format(len(test_dataset)))
191 |         
192 |         if not distributed:
193 |             return DataLoader(test_dataset, batch_size=self.config['batch_size'], shuffle=False,
194 |                             num_workers=self.config['num_workers'], collate_fn=test_dataset.get_collate_fn())
195 |         else:
196 |             test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
197 |             sampler_flag = self.config.get('sampler', True)
198 |             test_loader = DataLoader(test_dataset,
199 |                                                     batch_size=self.config['batch_size']  // nprocs,
200 |                                                     num_workers=max(self.config['num_workers'] // nprocs, 1),
201 |                                                     pin_memory=True,
202 |                                                     sampler=test_sampler if sampler_flag else None,
203 |                                                     collate_fn=test_dataset.get_collate_fn()
204 |                                                     )
205 |             return test_loader, test_sampler
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | # __all__ = ['DB_Model', 'Segmentation_Model', 'Classify_Model', 'Yolox_Model', 'Crnn_Model', 'Solo_Model', 'ICTransformer', 'ReBiSeNet_Model', 'MaskFormer_Model']


--------------------------------------------------------------------------------
/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/model/utils/__init__.py


--------------------------------------------------------------------------------
/model/utils/csp_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | import numpy as np
 15 | 
 16 | import os
 17 | import sys
 18 | 
 19 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 20 | sys.path.append(__dir__)
 21 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
 22 | 
 23 | from model.utils.ops import CBA, get_activation
 24 | 
 25 | class ResLayer(nn.Module):
 26 |     "Residual layer with `in_channels` inputs."
 27 | 
 28 |     def __init__(self, in_channels: int):
 29 |         super().__init__()
 30 |         mid_channels = in_channels // 2
 31 |         self.layer1 = CBA(
 32 |             in_channels, mid_channels, ksize=1, stride=1, act="lrelu"
 33 |         )
 34 |         self.layer2 = CBA(
 35 |             mid_channels, in_channels, ksize=3, stride=1, act="lrelu"
 36 |         )
 37 | 
 38 |     def forward(self, x):
 39 |         out = self.layer2(self.layer1(x))
 40 |         return x + out
 41 | 
 42 | class Focus(nn.Module):
 43 |     """Focus width and height information into channel space."""
 44 | 
 45 |     def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"):
 46 |         super().__init__()
 47 |         self.conv = CBA(in_channels * 4, out_channels, ksize, stride, act=act)
 48 | 
 49 |     def forward(self, x):
 50 |         patch_top_left = x[..., ::2, ::2]
 51 |         patch_top_right = x[..., ::2, 1::2]
 52 |         patch_bot_left = x[..., 1::2, ::2]
 53 |         patch_bot_right = x[..., 1::2, 1::2]
 54 |         x = torch.cat(
 55 |             (
 56 |                 patch_top_left,
 57 |                 patch_bot_left,
 58 |                 patch_top_right,
 59 |                 patch_bot_right,
 60 |             ),
 61 |             dim=1,
 62 |         )
 63 |         return self.conv(x)
 64 | 
 65 | class Bottleneck(nn.Module):
 66 |     def __init__(
 67 |         self,
 68 |         in_channels,
 69 |         out_channels,
 70 |         shortcut=True,
 71 |         expansion=0.5,
 72 |         act="silu",
 73 |     ):
 74 |         super().__init__()
 75 |         hidden_channels = int(out_channels * expansion)
 76 |         self.conv1 = CBA(in_channels, hidden_channels, 1, stride=1, act=act)
 77 |         self.conv2 = CBA(hidden_channels, out_channels, 3, stride=1, act=act)
 78 |         self.use_add = shortcut and in_channels == out_channels
 79 | 
 80 |     def forward(self, x):
 81 |         y = self.conv2(self.conv1(x))
 82 |         if self.use_add:
 83 |             y = y + x
 84 |         return y
 85 | 
 86 | class CSPLayer(nn.Module):
 87 |     """C3 in yolov5, CSP Bottleneck with 3 convolutions
 88 |                                    x: c,w,h
 89 |                 CBA(): o//2,w,h            CBA():o//2,w,h
 90 |         Bottelneck():o//2,w,h    
 91 |                                 cat(): o,w,h
 92 |                                 CBA():o,w,h      
 93 | 
 94 |     """
 95 | 
 96 |     def __init__(
 97 |         self,
 98 |         in_channels,
 99 |         out_channels,
100 |         n=1,
101 |         shortcut=False,
102 |         expansion=0.5,
103 |         act="silu",
104 |     ):
105 |         """
106 |         Args:
107 |             in_channels (int): input channels.
108 |             out_channels (int): output channels.
109 |             n (int): number of Bottlenecks. Default value: 1.
110 |         """
111 |         super().__init__()
112 |         hidden_channels = int(out_channels * expansion)
113 |         self.conv1 = CBA(in_channels, hidden_channels, 1, stride=1, act=act)
114 |         self.conv2 = CBA(in_channels, hidden_channels, 1, stride=1, act=act)
115 |         self.conv3 = CBA(2 * hidden_channels, out_channels, 1, stride=1, act=act)
116 |         module_list = [
117 |             Bottleneck(
118 |                 hidden_channels, hidden_channels, shortcut, 1.0,  act=act
119 |             )
120 |             for _ in range(n)
121 |         ]
122 |         self.m = nn.Sequential(*module_list)
123 | 
124 |     def forward(self, x):
125 |         x_1 = self.conv1(x)
126 |         x_2 = self.conv2(x)
127 |         x_1 = self.m(x_1)
128 |         x = torch.cat((x_1, x_2), dim=1)
129 |         return self.conv3(x)
130 | 
131 | class SPPBottleneck(nn.Module):
132 |     """Spatial pyramid pooling layer used in YOLOv3-SPP
133 |     
134 |                                                                         x:c,w,h
135 |                                                                 CBA():c//2,w,h
136 | maxpool2d(5):c//2,w,h                    *:c//2,w,h                            maxpool2d(9):c//2,w,h              maxpool2d(13):c//2,w,h
137 |                                                                 cat(): c*2, w, h
138 |                                                                 CBA():o,w,h
139 | 
140 |     """
141 | 
142 |     def __init__(
143 |         self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu"
144 |     ):
145 |         super().__init__()
146 |         hidden_channels = in_channels // 2
147 |         self.conv1 = CBA(in_channels, hidden_channels, 1, stride=1, act=activation)
148 |         self.m = nn.ModuleList(
149 |             [
150 |                 nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2)
151 |                 for ks in kernel_sizes
152 |             ]
153 |         )
154 |         conv2_channels = hidden_channels * (len(kernel_sizes) + 1)
155 |         self.conv2 = CBA(conv2_channels, out_channels, 1, stride=1, act=activation)
156 | 
157 |     def forward(self, x):
158 |         x = self.conv1(x)
159 |         x = torch.cat([x] + [m(x) for m in self.m], dim=1)
160 |         x = self.conv2(x)
161 |         return x
162 | 
163 | class SPPBottleneck_1D(nn.Module):
164 |     """Spatial pyramid pooling layer used in YOLOv3-SPP
165 |     
166 |                                                                         x:c,w
167 |                                                                 CBA():c//2,w
168 | maxpool2d(5):c//2,w                    *:c//2,w                            maxpool2d(9):c//2,w              maxpool2d(13):c//2,w
169 |                                                                 cat(): c*2, w
170 |                                                                 CBA():o,w
171 | 
172 |     """
173 | 
174 |     def __init__(
175 |         self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu"
176 |     ):
177 |         super().__init__()
178 |         hidden_channels = in_channels // 2
179 |         self.conv1 = nn.Sequential(
180 |             nn.Conv1d(
181 |             in_channels,
182 |             hidden_channels,
183 |             kernel_size=3,
184 |             stride=1,
185 |             padding=1,
186 |             groups=1,
187 |             bias=False,
188 |         ),
189 |         nn.BatchNorm1d(hidden_channels),
190 |         get_activation(activation, inplace=True)
191 |         )
192 |         self.m = nn.ModuleList(
193 |             [
194 |                 nn.MaxPool1d(kernel_size=ks, stride=1, padding=ks // 2)
195 |                 for ks in kernel_sizes
196 |             ]
197 |         )
198 |         conv2_channels = hidden_channels * (len(kernel_sizes) + 1)
199 |         self.conv2 = nn.Sequential(
200 |             nn.Conv1d(
201 |             conv2_channels,
202 |             out_channels,
203 |             kernel_size=3,
204 |             stride=1,
205 |             padding=1,
206 |             groups=1,
207 |             bias=False,
208 |         ),
209 |         nn.BatchNorm1d(out_channels),
210 |         get_activation(activation, inplace=True)
211 |         )
212 | 
213 |     def forward(self, x):
214 |         x = self.conv1(x)
215 |         x = torch.cat([x] + [m(x) for m in self.m], dim=1)
216 |         x = self.conv2(x)
217 |         return x


--------------------------------------------------------------------------------
/model/utils/maskformer_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | from typing import List, Optional
 12 | 
 13 | import torch
 14 | import torch.nn as nn
 15 | from torch import Tensor
 16 | 
 17 | import math
 18 | 
 19 | import os
 20 | import sys
 21 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 22 | sys.path.append(__dir__)
 23 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
 24 | 
 25 | from model.utils.ops import clones, get_activation
 26 | 
 27 | class MaskTransformer(nn.Module):
 28 |     def __init__(
 29 |         self,
 30 |         d_model=512,
 31 |         nhead=8,
 32 |         num_encoder_layers=6,
 33 |         num_decoder_layers=6,
 34 |         dim_feedforward=2048,
 35 |         dropout=0.1,
 36 |         activation="relu",
 37 |         normalize_before=False,
 38 |         return_intermediate_dec=False,
 39 |     ):
 40 |         super().__init__()
 41 | 
 42 |         encoder_layer = TransformerEncoderLayer(
 43 |             d_model, nhead, dim_feedforward, dropout, activation, normalize_before
 44 |         )
 45 |         encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
 46 |         self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
 47 | 
 48 |         decoder_layer = TransformerDecoderLayer(
 49 |             d_model, nhead, dim_feedforward, dropout, activation, normalize_before
 50 |         )
 51 |         decoder_norm = nn.LayerNorm(d_model)
 52 |         self.decoder = TransformerDecoder(
 53 |             decoder_layer,
 54 |             num_decoder_layers,
 55 |             decoder_norm,
 56 |             return_intermediate=return_intermediate_dec,
 57 |         )
 58 | 
 59 |         self._reset_parameters()
 60 | 
 61 |         self.d_model = d_model
 62 |         self.nhead = nhead
 63 | 
 64 |     def _reset_parameters(self):
 65 |         for p in self.parameters():
 66 |             if p.dim() > 1:
 67 |                 nn.init.xavier_uniform_(p)
 68 | 
 69 |     def forward(self, src, mask, query_embed, pos_embed):
 70 |         bs, c, h, w = src.shape
 71 |         src = src.flatten(2).permute(2, 0, 1)
 72 |         pos_embed = pos_embed.flatten(2).permute(2, 0, 1)
 73 |         query_embed = query_embed.unsqueeze(1).repeat(1, bs, 1)
 74 |         if mask is not None:
 75 |             mask = mask.flatten(1)
 76 | 
 77 |         tgt = torch.zeros_like(query_embed)
 78 |         memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed)
 79 |         hs = self.decoder(
 80 |             tgt, memory, memory_key_padding_mask=mask, pos=pos_embed, query_pos=query_embed
 81 |         )
 82 |         return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w)
 83 | 
 84 | 
 85 | class TransformerEncoder(nn.Module):
 86 |     def __init__(self, encoder_layer, num_layers, norm=None):
 87 |         super().__init__()
 88 |         self.layers = clones(encoder_layer, num_layers)
 89 |         self.num_layers = num_layers
 90 |         self.norm = norm
 91 | 
 92 |     def forward(
 93 |         self,
 94 |         src,
 95 |         mask: Optional[Tensor] = None,
 96 |         src_key_padding_mask: Optional[Tensor] = None,
 97 |         pos: Optional[Tensor] = None,
 98 |     ):
 99 |         output = src
100 | 
101 |         for layer in self.layers:
102 |             output = layer(
103 |                 output, src_mask=mask, src_key_padding_mask=src_key_padding_mask, pos=pos
104 |             )
105 | 
106 |         if self.norm is not None:
107 |             output = self.norm(output)
108 | 
109 |         return output
110 | 
111 | 
112 | class TransformerDecoder(nn.Module):
113 |     def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
114 |         super().__init__()
115 |         self.layers = clones(decoder_layer, num_layers)
116 |         self.num_layers = num_layers
117 |         self.norm = norm
118 |         self.return_intermediate = return_intermediate
119 | 
120 |     def forward(
121 |         self,
122 |         tgt,
123 |         memory,
124 |         tgt_mask: Optional[Tensor] = None,
125 |         memory_mask: Optional[Tensor] = None,
126 |         tgt_key_padding_mask: Optional[Tensor] = None,
127 |         memory_key_padding_mask: Optional[Tensor] = None,
128 |         pos: Optional[Tensor] = None,
129 |         query_pos: Optional[Tensor] = None,
130 |     ):
131 |         output = tgt
132 | 
133 |         intermediate = []
134 | 
135 |         for layer in self.layers:
136 |             output = layer(
137 |                 output,
138 |                 memory,
139 |                 tgt_mask=tgt_mask,
140 |                 memory_mask=memory_mask,
141 |                 tgt_key_padding_mask=tgt_key_padding_mask,
142 |                 memory_key_padding_mask=memory_key_padding_mask,
143 |                 pos=pos,
144 |                 query_pos=query_pos,
145 |             )
146 |             if self.return_intermediate:
147 |                 intermediate.append(self.norm(output))
148 | 
149 |         if self.norm is not None:
150 |             output = self.norm(output)
151 |             if self.return_intermediate:
152 |                 intermediate.pop()
153 |                 intermediate.append(output)
154 | 
155 |         if self.return_intermediate:
156 |             return torch.stack(intermediate)
157 | 
158 |         return output.unsqueeze(0)
159 | 
160 | 
161 | class TransformerEncoderLayer(nn.Module):
162 |     def __init__(
163 |         self,
164 |         d_model,
165 |         nhead,
166 |         dim_feedforward=2048,
167 |         dropout=0.1,
168 |         activation="relu",
169 |         normalize_before=False,
170 |     ):
171 |         super().__init__()
172 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
173 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
174 |         self.dropout = nn.Dropout(dropout)
175 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
176 | 
177 |         self.norm1 = nn.LayerNorm(d_model)
178 |         self.norm2 = nn.LayerNorm(d_model)
179 |         self.dropout1 = nn.Dropout(dropout)
180 |         self.dropout2 = nn.Dropout(dropout)
181 | 
182 |         self.activation = get_activation(activation)
183 |         self.normalize_before = normalize_before
184 | 
185 |     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
186 |         return tensor if pos is None else tensor + pos
187 | 
188 |     def forward_post(
189 |         self,
190 |         src,
191 |         src_mask: Optional[Tensor] = None,
192 |         src_key_padding_mask: Optional[Tensor] = None,
193 |         pos: Optional[Tensor] = None,
194 |     ):
195 |         q = k = self.with_pos_embed(src, pos)
196 |         src2 = self.self_attn(
197 |             q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
198 |         )[0]
199 |         src = src + self.dropout1(src2)
200 |         src = self.norm1(src)
201 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
202 |         src = src + self.dropout2(src2)
203 |         src = self.norm2(src)
204 |         return src
205 | 
206 |     def forward_pre(
207 |         self,
208 |         src,
209 |         src_mask: Optional[Tensor] = None,
210 |         src_key_padding_mask: Optional[Tensor] = None,
211 |         pos: Optional[Tensor] = None,
212 |     ):
213 |         src2 = self.norm1(src)
214 |         q = k = self.with_pos_embed(src2, pos)
215 |         src2 = self.self_attn(
216 |             q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
217 |         )[0]
218 |         src = src + self.dropout1(src2)
219 |         src2 = self.norm2(src)
220 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
221 |         src = src + self.dropout2(src2)
222 |         return src
223 | 
224 |     def forward(
225 |         self,
226 |         src,
227 |         src_mask: Optional[Tensor] = None,
228 |         src_key_padding_mask: Optional[Tensor] = None,
229 |         pos: Optional[Tensor] = None,
230 |     ):
231 |         if self.normalize_before:
232 |             return self.forward_pre(src, src_mask, src_key_padding_mask, pos)
233 |         return self.forward_post(src, src_mask, src_key_padding_mask, pos)
234 | 
235 | 
236 | class TransformerDecoderLayer(nn.Module):
237 |     def __init__(
238 |         self,
239 |         d_model,
240 |         nhead,
241 |         dim_feedforward=2048,
242 |         dropout=0.1,
243 |         activation="relu",
244 |         normalize_before=False,
245 |     ):
246 |         super().__init__()
247 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
248 |         self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
249 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
250 |         self.dropout = nn.Dropout(dropout)
251 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
252 | 
253 |         self.norm1 = nn.LayerNorm(d_model)
254 |         self.norm2 = nn.LayerNorm(d_model)
255 |         self.norm3 = nn.LayerNorm(d_model)
256 |         self.dropout1 = nn.Dropout(dropout)
257 |         self.dropout2 = nn.Dropout(dropout)
258 |         self.dropout3 = nn.Dropout(dropout)
259 | 
260 |         self.activation = get_activation(activation)
261 |         self.normalize_before = normalize_before
262 | 
263 |     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
264 |         return tensor if pos is None else tensor + pos
265 | 
266 |     def forward_post(
267 |         self,
268 |         tgt,
269 |         memory,
270 |         tgt_mask: Optional[Tensor] = None,
271 |         memory_mask: Optional[Tensor] = None,
272 |         tgt_key_padding_mask: Optional[Tensor] = None,
273 |         memory_key_padding_mask: Optional[Tensor] = None,
274 |         pos: Optional[Tensor] = None,
275 |         query_pos: Optional[Tensor] = None,
276 |     ):
277 |         q = k = self.with_pos_embed(tgt, query_pos)
278 |         tgt2 = self.self_attn(
279 |             q, k, value=tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
280 |         )[0]
281 |         tgt = tgt + self.dropout1(tgt2)
282 |         tgt = self.norm1(tgt)
283 |         tgt2 = self.multihead_attn(
284 |             query=self.with_pos_embed(tgt, query_pos),
285 |             key=self.with_pos_embed(memory, pos),
286 |             value=memory,
287 |             attn_mask=memory_mask,
288 |             key_padding_mask=memory_key_padding_mask,
289 |         )[0]
290 |         tgt = tgt + self.dropout2(tgt2)
291 |         tgt = self.norm2(tgt)
292 |         tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
293 |         tgt = tgt + self.dropout3(tgt2)
294 |         tgt = self.norm3(tgt)
295 |         return tgt
296 | 
297 |     def forward_pre(
298 |         self,
299 |         tgt,
300 |         memory,
301 |         tgt_mask: Optional[Tensor] = None,
302 |         memory_mask: Optional[Tensor] = None,
303 |         tgt_key_padding_mask: Optional[Tensor] = None,
304 |         memory_key_padding_mask: Optional[Tensor] = None,
305 |         pos: Optional[Tensor] = None,
306 |         query_pos: Optional[Tensor] = None,
307 |     ):
308 |         tgt2 = self.norm1(tgt)
309 |         q = k = self.with_pos_embed(tgt2, query_pos)
310 |         tgt2 = self.self_attn(
311 |             q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
312 |         )[0]
313 |         tgt = tgt + self.dropout1(tgt2)
314 |         tgt2 = self.norm2(tgt)
315 |         tgt2 = self.multihead_attn(
316 |             query=self.with_pos_embed(tgt2, query_pos),
317 |             key=self.with_pos_embed(memory, pos),
318 |             value=memory,
319 |             attn_mask=memory_mask,
320 |             key_padding_mask=memory_key_padding_mask,
321 |         )[0]
322 |         tgt = tgt + self.dropout2(tgt2)
323 |         tgt2 = self.norm3(tgt)
324 |         tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
325 |         tgt = tgt + self.dropout3(tgt2)
326 |         return tgt
327 | 
328 |     def forward(
329 |         self,
330 |         tgt,
331 |         memory,
332 |         tgt_mask: Optional[Tensor] = None,
333 |         memory_mask: Optional[Tensor] = None,
334 |         tgt_key_padding_mask: Optional[Tensor] = None,
335 |         memory_key_padding_mask: Optional[Tensor] = None,
336 |         pos: Optional[Tensor] = None,
337 |         query_pos: Optional[Tensor] = None,
338 |     ):
339 |         if self.normalize_before:
340 |             return self.forward_pre(
341 |                 tgt,
342 |                 memory,
343 |                 tgt_mask,
344 |                 memory_mask,
345 |                 tgt_key_padding_mask,
346 |                 memory_key_padding_mask,
347 |                 pos,
348 |                 query_pos,
349 |             )
350 |         return self.forward_post(
351 |             tgt,
352 |             memory,
353 |             tgt_mask,
354 |             memory_mask,
355 |             tgt_key_padding_mask,
356 |             memory_key_padding_mask,
357 |             pos,
358 |             query_pos,
359 |         )
360 | 
361 | 
362 | class PositionEmbeddingSine(nn.Module):
363 |     """
364 |     This is a more standard version of the position embedding, very similar to the one
365 |     used by the Attention is all you need paper, generalized to work on images.
366 |     """
367 | 
368 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
369 |         super().__init__()
370 |         self.num_pos_feats = num_pos_feats
371 |         self.temperature = temperature
372 |         self.normalize = normalize
373 |         if scale is not None and normalize is False:
374 |             raise ValueError("normalize should be True if scale is passed")
375 |         if scale is None:
376 |             scale = 2 * math.pi
377 |         self.scale = scale
378 | 
379 |     def forward(self, x, mask=None):
380 |         if mask is None:
381 |             mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
382 |         not_mask = ~mask
383 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
384 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
385 |         if self.normalize:
386 |             eps = 1e-6
387 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
388 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
389 | 
390 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
391 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
392 | 
393 |         pos_x = x_embed[:, :, :, None] / dim_t
394 |         pos_y = y_embed[:, :, :, None] / dim_t
395 |         pos_x = torch.stack(
396 |             (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
397 |         ).flatten(3)
398 |         pos_y = torch.stack(
399 |             (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
400 |         ).flatten(3)
401 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
402 |         return pos


--------------------------------------------------------------------------------
/model/utils/mobilenetv3_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.nn import init
  5 | from torchsummary import summary
  6 | import os
  7 | import sys
  8 | 
  9 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 10 | sys.path.append(__dir__)
 11 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
 12 | 
 13 | from model.utils.ops import hsigmoid, hswish
 14 | 
 15 | class SeModule(nn.Module):
 16 |     def __init__(self, in_size, reduction=4):
 17 |         super(SeModule, self).__init__()
 18 |         self.se = nn.Sequential(
 19 |             nn.AdaptiveAvgPool2d(1),
 20 |             nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
 21 |             nn.BatchNorm2d(in_size // reduction),
 22 |             nn.ReLU(inplace=True),
 23 |             nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
 24 |             nn.BatchNorm2d(in_size),
 25 |             hsigmoid()
 26 |         )
 27 | 
 28 |     def forward(self, x):
 29 |         return x * self.se(x)
 30 | 
 31 | 
 32 | class MobilenetBlock(nn.Module):
 33 |     '''expand + depthwise + pointwise'''
 34 |     def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
 35 |         super(MobilenetBlock, self).__init__()
 36 |         self.stride = stride
 37 |         self.se = semodule
 38 | 
 39 |         self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
 40 |         self.bn1 = nn.BatchNorm2d(expand_size)
 41 |         self.nolinear1 = nolinear
 42 |         self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False)
 43 |         self.bn2 = nn.BatchNorm2d(expand_size)
 44 |         self.nolinear2 = nolinear
 45 |         self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
 46 |         self.bn3 = nn.BatchNorm2d(out_size)
 47 | 
 48 |         self.shortcut = nn.Sequential()
 49 |         if stride == 1 and in_size != out_size:
 50 |             self.shortcut = nn.Sequential(
 51 |                 nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
 52 |                 nn.BatchNorm2d(out_size),
 53 |             )
 54 | 
 55 |     def forward(self, x):
 56 |         out = self.nolinear1(self.bn1(self.conv1(x)))
 57 |         out = self.nolinear2(self.bn2(self.conv2(out)))
 58 |         out = self.bn3(self.conv3(out))
 59 |         if self.se != None:
 60 |             out = self.se(out)
 61 |         out = out + self.shortcut(x) if self.stride==1 else out
 62 |         return out
 63 | 
 64 | class MobileNetV3_Large_(nn.Module):
 65 |     def __init__(self, num_classes=1000):
 66 |         super(MobileNetV3_Large_, self).__init__()
 67 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
 68 |         self.bn1 = nn.BatchNorm2d(16)
 69 |         self.hs1 = hswish()
 70 | 
 71 |         self.bneck = nn.ModuleList([])
 72 |         self.bneck.append(MobilenetBlock(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1))
 73 |         self.bneck.append(MobilenetBlock(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2))
 74 |         self.bneck.append(MobilenetBlock(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1))
 75 |         self.bneck.append(MobilenetBlock(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2))
 76 |         self.bneck.append(MobilenetBlock(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1))
 77 |         self.bneck.append(MobilenetBlock(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1))
 78 |         self.bneck.append(MobilenetBlock(3, 40, 240, 80, hswish(), None, 2))
 79 |         self.bneck.append(MobilenetBlock(3, 80, 200, 80, hswish(), None, 1))
 80 |         self.bneck.append(MobilenetBlock(3, 80, 184, 80, hswish(), None, 1))
 81 |         self.bneck.append(MobilenetBlock(3, 80, 184, 80, hswish(), None, 1))
 82 |         self.bneck.append(MobilenetBlock(3, 80, 480, 112, hswish(), SeModule(112), 1))
 83 |         self.bneck.append(MobilenetBlock(3, 112, 672, 112, hswish(), SeModule(112), 1))
 84 |         self.bneck.append(MobilenetBlock(5, 112, 672, 160, hswish(), SeModule(160), 1))
 85 |         self.bneck.append(MobilenetBlock(5, 160, 672, 160, hswish(), SeModule(160), 2))
 86 |         self.bneck.append(MobilenetBlock(5, 160, 960, 160, hswish(), SeModule(160), 1))
 87 | 
 88 |         self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
 89 |         self.bn2 = nn.BatchNorm2d(960)
 90 |         self.hs2 = hswish()
 91 |         self.linear3 = nn.Linear(960, 1280)
 92 |         self.bn3 = nn.BatchNorm1d(1280)
 93 |         self.hs3 = hswish()
 94 |         self.linear4 = nn.Linear(1280, num_classes)
 95 |         self.init_params()
 96 | 
 97 |     def init_params(self):
 98 |         for m in self.modules():
 99 |             if isinstance(m, nn.Conv2d):
100 |                 init.kaiming_normal_(m.weight, mode='fan_out')
101 |                 if m.bias is not None:
102 |                     init.constant_(m.bias, 0)
103 |             elif isinstance(m, nn.BatchNorm2d):
104 |                 init.constant_(m.weight, 1)
105 |                 init.constant_(m.bias, 0)
106 |             elif isinstance(m, nn.Linear):
107 |                 init.normal_(m.weight, std=0.001)
108 |                 if m.bias is not None:
109 |                     init.constant_(m.bias, 0)
110 | 
111 |     def forward(self, x):
112 |         out = self.hs1(self.bn1(self.conv1(x)))
113 |         for layer in self.bneck:
114 |             out = layer(out)
115 |             print(out.size())
116 |         out = self.hs2(self.bn2(self.conv2(out)))
117 |         out = F.avg_pool2d(out, 7)
118 |         out = out.view(out.size(0), -1)
119 |         out = self.hs3(self.bn3(self.linear3(out)))
120 |         out = self.linear4(out)
121 |         return out
122 | 
123 | class MobileNetV3_Small_(nn.Module):
124 |     def __init__(self, num_classes=1000):
125 |         super(MobileNetV3_Small_, self).__init__()
126 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
127 |         self.bn1 = nn.BatchNorm2d(16)
128 |         self.hs1 = hswish()
129 | 
130 |         self.bneck = nn.ModuleList([])
131 |         self.bneck.append(MobilenetBlock(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2))
132 |         self.bneck.append(MobilenetBlock(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2))
133 |         self.bneck.append(MobilenetBlock(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1))
134 |         self.bneck.append(MobilenetBlock(5, 24, 96, 40, hswish(), SeModule(40), 2))
135 |         self.bneck.append(MobilenetBlock(5, 40, 240, 40, hswish(), SeModule(40), 1))
136 |         self.bneck.append(MobilenetBlock(5, 40, 240, 40, hswish(), SeModule(40), 1))
137 |         self.bneck.append(MobilenetBlock(5, 40, 120, 48, hswish(), SeModule(48), 1))
138 |         self.bneck.append(MobilenetBlock(5, 48, 144, 48, hswish(), SeModule(48), 1))
139 |         self.bneck.append(MobilenetBlock(5, 48, 288, 96, hswish(), SeModule(96), 2))
140 |         self.bneck.append(MobilenetBlock(5, 96, 576, 96, hswish(), SeModule(96), 1))
141 |         self.bneck.append(MobilenetBlock(5, 96, 576, 96, hswish(), SeModule(96), 1))
142 | 
143 |         self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
144 |         self.bn2 = nn.BatchNorm2d(576)
145 |         self.hs2 = hswish()
146 |         self.linear3 = nn.Linear(576, 1280)
147 |         self.bn3 = nn.BatchNorm1d(1280)
148 |         self.hs3 = hswish()
149 |         self.linear4 = nn.Linear(1280, num_classes)
150 |         self.init_params()
151 | 
152 |     def init_params(self):
153 |         for m in self.modules():
154 |             if isinstance(m, nn.Conv2d):
155 |                 init.kaiming_normal_(m.weight, mode='fan_out')
156 |                 if m.bias is not None:
157 |                     init.constant_(m.bias, 0)
158 |             elif isinstance(m, nn.BatchNorm2d):
159 |                 init.constant_(m.weight, 1)
160 |                 init.constant_(m.bias, 0)
161 |             elif isinstance(m, nn.Linear):
162 |                 init.normal_(m.weight, std=0.001)
163 |                 if m.bias is not None:
164 |                     init.constant_(m.bias, 0)
165 | 
166 |     def forward(self, x):
167 |         out = self.hs1(self.bn1(self.conv1(x)))
168 |         for layer in self.bneck:
169 |             out = layer(out)
170 |             print(out.size())
171 |         out = self.hs2(self.bn2(self.conv2(out)))
172 |         out = F.avg_pool2d(out, 7)
173 |         out = out.view(out.size(0), -1)
174 |         out = self.hs3(self.bn3(self.linear3(out)))
175 |         out = self.linear4(out)
176 |         return out
177 | 
178 | 
179 | if __name__ == '__main__':
180 | 
181 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
182 | 
183 |     mobilenetv3_l = MobileNetV3_Large_().to(device)
184 |     summary(mobilenetv3_l, (3, 256, 256))
185 | 
186 |     mobilenetv3_s = MobileNetV3_Small_().to(device)
187 |     summary(mobilenetv3_s, (3, 256, 256))
188 | 
189 | 


--------------------------------------------------------------------------------
/model/utils/mobilevit_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchsummary import summary
  4 | 
  5 | from einops import rearrange
  6 | import os
  7 | import sys
  8 | 
  9 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 10 | sys.path.append(__dir__)
 11 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
 12 | 
 13 | from model.utils.ops import CBA
 14 | 
 15 | class PreNorm(nn.Module):
 16 |     def __init__(self, dim, fn):
 17 |         super().__init__()
 18 |         self.norm = nn.LayerNorm(dim)
 19 |         self.fn = fn
 20 |     
 21 |     def forward(self, x, **kwargs):
 22 |         return self.fn(self.norm(x), **kwargs)
 23 | 
 24 | 
 25 | class FeedForward(nn.Module):
 26 |     def __init__(self, dim, hidden_dim, dropout=0.):
 27 |         super().__init__()
 28 |         self.net = nn.Sequential(
 29 |             nn.Linear(dim, hidden_dim),
 30 |             nn.SiLU(),
 31 |             nn.Dropout(dropout),
 32 |             nn.Linear(hidden_dim, dim),
 33 |             nn.Dropout(dropout)
 34 |         )
 35 |     
 36 |     def forward(self, x):
 37 |         return self.net(x)
 38 | 
 39 | 
 40 | class Attention(nn.Module):
 41 |     def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
 42 |         super().__init__()
 43 |         inner_dim = dim_head *  heads
 44 |         project_out = not (heads == 1 and dim_head == dim)
 45 | 
 46 |         self.heads = heads
 47 |         self.scale = dim_head ** -0.5
 48 | 
 49 |         self.attend = nn.Softmax(dim = -1)
 50 |         self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
 51 | 
 52 |         self.to_out = nn.Sequential(
 53 |             nn.Linear(inner_dim, dim),
 54 |             nn.Dropout(dropout)
 55 |         ) if project_out else nn.Identity()
 56 | 
 57 |     def forward(self, x):
 58 |         qkv = self.to_qkv(x).chunk(3, dim=-1)
 59 |         q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h = self.heads), qkv)
 60 | 
 61 |         dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
 62 |         attn = self.attend(dots)
 63 |         out = torch.matmul(attn, v)
 64 |         out = rearrange(out, 'b p h n d -> b p n (h d)')
 65 |         return self.to_out(out)
 66 | 
 67 | 
 68 | class Transformer(nn.Module):
 69 |     def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
 70 |         super().__init__()
 71 |         self.layers = nn.ModuleList([])
 72 |         for _ in range(depth):
 73 |             self.layers.append(nn.ModuleList([
 74 |                 PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
 75 |                 PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
 76 |             ]))
 77 |     
 78 |     def forward(self, x):
 79 |         for attn, ff in self.layers:
 80 |             x = attn(x) + x
 81 |             x = ff(x) + x
 82 |         return x
 83 | 
 84 | 
 85 | class MV2Block(nn.Module):
 86 |     def __init__(self, inp, oup, stride=1, expansion=4):
 87 |         super().__init__()
 88 |         self.stride = stride
 89 |         assert stride in [1, 2]
 90 | 
 91 |         hidden_dim = int(inp * expansion)
 92 |         self.use_res_connect = self.stride == 1 and inp == oup
 93 | 
 94 |         if expansion == 1:
 95 |             self.conv = nn.Sequential(
 96 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 97 |                 nn.BatchNorm2d(hidden_dim),
 98 |                 nn.SiLU(),
 99 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
100 |                 nn.BatchNorm2d(oup),
101 |             )
102 |         else:
103 |             self.conv = nn.Sequential(
104 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
105 |                 nn.BatchNorm2d(hidden_dim),
106 |                 nn.SiLU(),
107 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
108 |                 nn.BatchNorm2d(hidden_dim),
109 |                 nn.SiLU(),
110 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
111 |                 nn.BatchNorm2d(oup),
112 |             )
113 | 
114 |     def forward(self, x):
115 |         if self.use_res_connect:
116 |             return x + self.conv(x)
117 |         else:
118 |             return self.conv(x)
119 | 
120 | 
121 | class MobileViTBlock(nn.Module):
122 |     def __init__(self, dim, depth, channel, kernel_size, patch_size, mlp_dim, dropout=0.):
123 |         super().__init__()
124 |         self.ph, self.pw = patch_size
125 | 
126 |         self.conv1 = CBA(channel, channel,  ksize=kernel_size, stride=1, pad=1,  bias=False, act="silu")
127 |         self.conv2 = CBA(channel, dim,  ksize=1, stride=1, pad=0,  bias=False, act="silu")
128 | 
129 |         self.transformer = Transformer(dim, depth, 4, 8, mlp_dim, dropout)
130 | 
131 |         self.conv3 = CBA(dim, channel,  ksize=1, stride=1, pad=0,  bias=False, act="silu")
132 |         self.conv4 = CBA(2 * channel, channel,  ksize=kernel_size, stride=1, pad=1,  bias=False, act="silu")
133 |     
134 |     def forward(self, x):
135 |         y = x.clone()
136 | 
137 |         x = self.conv1(x)
138 |         x = self.conv2(x)
139 |         
140 |         _, _, h, w = x.shape
141 |         x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=self.ph, pw=self.pw)
142 |         x = self.transformer(x)
143 |         x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h//self.ph, w=w//self.pw, ph=self.ph, pw=self.pw)
144 | 
145 |         x = self.conv3(x)
146 |         x = torch.cat((x, y), 1)
147 |         x = self.conv4(x)
148 |         return x
149 | 
150 | 
151 | class MobileViT_(nn.Module):
152 |     def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2)):
153 |         super().__init__()
154 |         ih, iw = image_size
155 |         ph, pw = patch_size
156 |         assert ih % ph == 0 and iw % pw == 0
157 | 
158 |         L = [2, 4, 3]
159 | 
160 |         self.conv1 = CBA(3, channels[0],  ksize=3, stride=2, pad=1,  bias=False, act="silu")
161 | 
162 |         self.mv2 = nn.ModuleList([])
163 |         self.mv2.append(MV2Block(channels[0], channels[1], 1, expansion))
164 |         self.mv2.append(MV2Block(channels[1], channels[2], 2, expansion))
165 |         self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))
166 |         self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))
167 |         self.mv2.append(MV2Block(channels[3], channels[4], 2, expansion))
168 |         self.mv2.append(MV2Block(channels[5], channels[6], 2, expansion))
169 |         self.mv2.append(MV2Block(channels[7], channels[8], 2, expansion))
170 |         
171 |         self.mvit = nn.ModuleList([])
172 |         self.mvit.append(MobileViTBlock(dims[0], L[0], channels[5], kernel_size, patch_size, int(dims[0]*2)))
173 |         self.mvit.append(MobileViTBlock(dims[1], L[1], channels[7], kernel_size, patch_size, int(dims[1]*4)))
174 |         self.mvit.append(MobileViTBlock(dims[2], L[2], channels[9], kernel_size, patch_size, int(dims[2]*4)))
175 | 
176 |         self.conv2 = CBA(channels[-2], channels[-1],  ksize=1, stride=1, pad=0,  bias=False, act="silu")
177 | 
178 |         self.pool = nn.AvgPool2d(ih//32, 1)
179 |         self.fc = nn.Linear(channels[-1], num_classes, bias=False)
180 | 
181 |     def forward(self, x):
182 |         x = self.conv1(x)
183 |         x = self.mv2[0](x)
184 |         print(x.size())
185 | 
186 |         x = self.mv2[1](x)
187 |         x = self.mv2[2](x)
188 |         x = self.mv2[3](x)
189 |         print(x.size())
190 | 
191 |         x = self.mv2[4](x)
192 |         x = self.mvit[0](x)
193 |         print(x.size())
194 | 
195 |         x = self.mv2[5](x)
196 |         x = self.mvit[1](x)
197 |         print(x.size())
198 | 
199 |         x = self.mv2[6](x)
200 |         x = self.mvit[2](x)
201 |         x = self.conv2(x)
202 |         print(x.size())
203 | 
204 |         x = self.pool(x).view(-1, x.shape[1])
205 |         x = self.fc(x)
206 |         return x
207 | 
208 | 
209 | def mobilevit_xxs():
210 |     dims = [64, 80, 96]
211 |     channels = [16, 16, 24, 24, 48, 48, 64, 64, 80, 80, 320]
212 |     return MobileViT_((256, 256), dims, channels, num_classes=1000, expansion=2)
213 | 
214 | 
215 | def mobilevit_xs():
216 |     dims = [96, 120, 144]
217 |     channels = [16, 32, 48, 48, 64, 64, 80, 80, 96, 96, 384]
218 |     return MobileViT_((256, 256), dims, channels, num_classes=1000)
219 | 
220 | 
221 | def mobilevit_s():
222 |     dims = [144, 192, 240]
223 |     channels = [16, 32, 64, 64, 96, 96, 128, 128, 160, 160, 640]
224 |     return MobileViT_((256, 256), dims, channels, num_classes=1000)
225 | 
226 | 
227 | def count_parameters(model):
228 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
229 | 
230 | 
231 | if __name__ == '__main__':
232 |     img = torch.randn(5, 3, 256, 256)
233 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
234 |     img = img.to(device)
235 | 
236 |     vit = mobilevit_xxs()
237 |     vit = vit.to(device)
238 |     summary(vit, (3, 256, 256))
239 |     print(count_parameters(vit))
240 | 
241 |     vit = mobilevit_xs()
242 |     vit = vit.to(device)
243 |     summary(vit, (3, 256, 256))
244 |     print(count_parameters(vit))
245 | 
246 |     vit = mobilevit_s()
247 |     vit = vit.to(device)
248 |     summary(vit, (3, 256, 256))
249 |     print(count_parameters(vit))


--------------------------------------------------------------------------------
/model/utils/ops.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | from torch.nn.utils import clip_grad
 15 | 
 16 | import math
 17 | from fractions import gcd
 18 | 
 19 | import copy
 20 | 
 21 | CONV_SELECT = {'conv1d': nn.Conv1d, 'conv2d': nn.Conv2d, 'conv3d': nn.Conv3d}
 22 | BN_SELECT = {'conv1d': nn.BatchNorm1d, 'conv2d': nn.BatchNorm2d, 'LN': nn.LayerNorm}
 23 | 
 24 | def clones(_to_clone_module, _clone_times):
 25 |     """Produce N identical layers."""
 26 |     return nn.ModuleList([copy.deepcopy(_to_clone_module) for _ in range(_clone_times)])
 27 | 
 28 | def clip_grads(params, clip_norm_val=35):
 29 |     params = list(
 30 |             filter(lambda p: p.requires_grad and p.grad is not None, params))
 31 |     if len(params) > 0:
 32 |         return clip_grad.clip_grad_norm_(params, max_norm=clip_norm_val, norm_type=2)
 33 | 
 34 | def drop_path(x, drop_prob: float = 0., training: bool = False):
 35 |     if drop_prob == 0. or not training:
 36 |         return x
 37 |     keep_prob = 1 - drop_prob
 38 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)
 39 |     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
 40 |     random_tensor.floor_()
 41 |     output = x.div(keep_prob) * random_tensor
 42 |     return output
 43 | 
 44 | def get_activation(name="silu", inplace=True):
 45 |     if name == "silu":
 46 |         module = nn.SiLU(inplace=inplace)
 47 |     elif name == "relu":
 48 |         module = nn.ReLU(inplace=inplace)
 49 |     elif name == "lrelu":
 50 |         module = nn.LeakyReLU(0.1, inplace=inplace)
 51 |     elif name is None:
 52 |         module = None
 53 |     else:
 54 |         raise AttributeError("Unsupported act type: {}".format(name))
 55 |     return module
 56 | 
 57 | class hswish(nn.Module):
 58 |     def forward(self, x):
 59 |         out = x * F.relu6(x + 3, inplace=True) / 6
 60 |         return out
 61 | 
 62 | class hsigmoid(nn.Module):
 63 |     def forward(self, x):
 64 |         out = F.relu6(x + 3, inplace=True) / 6
 65 |         return out
 66 | 
 67 | class DropPath(nn.Module):
 68 |     """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
 69 |     """
 70 |     def __init__(self, drop_prob=None):
 71 |         super(DropPath, self).__init__()
 72 |         self.drop_prob = drop_prob
 73 | 
 74 |     def forward(self, x):
 75 |         return drop_path(x, self.drop_prob, self.training)
 76 | 
 77 | class SiLU(nn.Module):
 78 |     """export-friendly version of nn.SiLU()"""
 79 |     @staticmethod
 80 |     def forward(x):
 81 |         return x * torch.sigmoid(x)
 82 | 
 83 | """
 84 | class CBA(nn.Module):
 85 |     # A Conv2d -> Batchnorm -> silu/leaky relu block
 86 | 
 87 |     def __init__(
 88 |         self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu", use_bn=True, pad=None, norm='BN'):
 89 |         super().__init__()
 90 |         if pad is None:
 91 |             # same padding
 92 |             pad = (ksize - 1) // 2
 93 |         self.conv = nn.Conv2d(
 94 |             in_channels,
 95 |             out_channels,
 96 |             kernel_size=ksize,
 97 |             stride=stride,
 98 |             padding=pad,
 99 |             groups=groups,
100 |             bias=bias,
101 |         )
102 |    
103 |         if norm == 'GN':
104 |             self.bn = nn.GroupNorm(32, out_channels)
105 |         else:
106 |             self.bn = nn.BatchNorm2d(out_channels)
107 | 
108 |         self.act = get_activation(act, inplace=True)
109 |         self.use_bn = use_bn
110 | 
111 |     def forward(self, x):
112 |         if self.use_bn:
113 |             return self.act(self.bn(self.conv(x)))
114 |         else:
115 |             return self.act((self.conv(x)))
116 | """
117 | 
118 | class CBA(nn.Module):
119 |     def __init__(
120 |         self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu", use_bn=True, pad=None, norm='BN', group_num=None, conv='conv2d', res_type=False):
121 |         super().__init__()
122 |         self.res_type = res_type
123 | 
124 |         if pad is None:
125 |             pad = (ksize - 1) // 2
126 |         self.conv = CONV_SELECT[conv](
127 |             in_channels,
128 |             out_channels,
129 |             kernel_size=ksize,
130 |             stride=stride,
131 |             padding=pad,
132 |             groups=groups,
133 |             bias=bias,
134 |         )
135 |    
136 |         if norm == 'GN':
137 |             if group_num is None:
138 |                 self.bn = nn.GroupNorm(gcd(32, out_channels), out_channels)
139 |             else:
140 |                 self.bn = nn.GroupNorm(gcd(group_num, out_channels), out_channels)
141 |         else:
142 |             self.bn = BN_SELECT[conv](out_channels)
143 | 
144 |         self.act = get_activation(act, inplace=True)
145 |         if not use_bn:
146 |             self.bn = None
147 | 
148 |         if self.res_type:
149 |             assert norm is not None
150 |             self.conv2 = CONV_SELECT[conv](
151 |                 out_channels,
152 |                 out_channels,
153 |                 kernel_size=3,
154 |                 stride=1,
155 |                 padding=1,
156 |                 groups=1,
157 |                 bias=False,
158 |             )
159 |             if norm == 'GN':
160 |                  self.bn2 = nn.GroupNorm(gcd(32, out_channels), out_channels) if group_num is None else nn.GroupNorm(gcd(group_num, out_channels), out_channels)
161 |             else:
162 |                 self.bn2 = BN_SELECT[conv](out_channels)
163 | 
164 |             if in_channels != out_channels or stride != 1:
165 |                 if norm == 'GN':
166 |                     self.transform = nn.Sequential(
167 |                         CONV_SELECT[conv](in_channels, out_channels, kernel_size=3 if stride!=1 else 1, stride=stride, padding=1 if stride!=1 else 0, groups=1, bias=False),
168 |                         nn.GroupNorm(gcd(32, out_channels), out_channels) if group_num is None else nn.GroupNorm(gcd(group_num, out_channels), out_channels))
169 |                 elif norm == 'BN':
170 |                     self.transform = nn.Sequential(
171 |                         CONV_SELECT[conv](in_channels, out_channels, kernel_size=3 if stride!=1 else 1, stride=stride, padding=1 if stride!=1 else 0, groups=1, bias=False),
172 |                         BN_SELECT[conv](out_channels))
173 |                 else:
174 |                     raise NotImplementedError('Type {} not supported.'.format(norm))
175 |             else:
176 |                 self.transform = None
177 | 
178 |     def forward(self, x):
179 | 
180 |         out = self.conv(x)
181 |         if self.bn is not None:
182 |             out = self.bn(out)
183 |         if self.act is not None:
184 |             out = self.act(out)
185 | 
186 |         if self.res_type:
187 |             out = self.conv2(out)
188 |             out = self.bn2(out)           
189 |             if self.transform is not None:
190 |                 out += self.transform(x)
191 |             else:
192 |                 out += x
193 |             if self.act is not None:
194 |                 out = self.act(out)
195 | 
196 |         return out
197 | 
198 | class MLP(nn.Module):
199 |     """A Linear -> norm -> activation block"""
200 | 
201 |     def __init__(
202 |         self, num_in, num_out=None,  bias=True, act="relu", norm='GN', group_num=None, res_type=False):
203 |         super().__init__()
204 |         if num_out is None:
205 |             num_out = num_in
206 |             
207 |         self.linear = nn.Linear(num_in, num_out, bias=bias)
208 |         self.res_type = res_type
209 | 
210 |         if norm is not None:
211 |             if norm == 'GN':
212 |                 self.norm = nn.GroupNorm(gcd(32, num_out), num_out) if group_num is None else nn.GroupNorm(gcd(group_num, num_out), num_out)
213 |             elif norm == 'LN':
214 |                 self.norm = nn.LayerNorm(num_out)
215 |             elif norm == 'BN':
216 |                 self.norm = nn.BatchNorm1d(num_out)
217 |             else:
218 |                 raise NotImplementedError('Type {} not supported.'.format(norm))
219 |         else:
220 |             self.norm = None
221 | 
222 |         if act is not None:
223 |             self.act = get_activation(act, inplace=True)
224 |         else:
225 |             self.act = None
226 | 
227 |         if self.res_type:
228 |             assert norm is not None
229 |             self.linear2 = nn.Linear(num_out, num_out, bias=bias)
230 |             if norm == 'GN':
231 |                  self.norm2 = nn.GroupNorm(gcd(32, num_out), num_out) if group_num is None else nn.GroupNorm(gcd(group_num, num_out), num_out)
232 |             elif norm == 'LN':
233 |                 self.norm2 = nn.LayerNorm(num_out)
234 |             elif norm == 'BN':
235 |                 self.norm2 = nn.BatchNorm1d(num_out)
236 |             else:
237 |                 raise NotImplementedError('Type {} not supported.'.format(norm))           
238 | 
239 |             if num_in != num_out:
240 |                 if norm == 'GN':
241 |                     self.transform = nn.Sequential(
242 |                         nn.Linear(num_in, num_out, bias=bias),
243 |                         nn.GroupNorm(gcd(32, num_out), num_out) if group_num is None else nn.GroupNorm(gcd(group_num, num_out), num_out))
244 |                 elif norm == 'LN':
245 |                     self.transform = nn.Sequential(
246 |                         nn.Linear(num_in, num_out, bias=bias),
247 |                         nn.LayerNorm(num_out))
248 |                 elif norm == 'BN':
249 |                     self.transform = nn.Sequential(
250 |                         nn.Linear(num_in, num_out, bias=bias),
251 |                         nn.BatchNorm1d(num_out))
252 |                 else:
253 |                     raise NotImplementedError('Type {} not supported.'.format(norm))
254 |             else:
255 |                 self.transform = None
256 | 
257 |     def forward(self, x):
258 |         out = self.linear(x)
259 |         if self.norm is not None:
260 |             out = self.norm(out)
261 |         if self.act is not None:
262 |             out = self.act(out)
263 | 
264 |         if self.res_type:
265 |             out = self.linear2(out)
266 |             out = self.norm2(out)           
267 |             if self.transform is not None:
268 |                 out += self.transform(x)
269 |             else:
270 |                 out += x
271 |             if self.act is not None:
272 |                 out = self.act(out)
273 | 
274 |         return out
275 | 
276 | class SeparableConv(nn.Module):
277 |     def __init__(self, in_channels, out_channels=None, act="silu", use_bn=True,  norm='BN'):
278 |         super(SeparableConv, self).__init__()
279 |         self.use_bn = use_bn
280 |         if out_channels is None:
281 |             out_channels = in_channels
282 | 
283 |     
284 |         self.depthwise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding='same', groups=in_channels, bias=False)
285 |         self.pointwise_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding='same', bias=True)
286 | 
287 |         if norm == 'GN':
288 |             self.bn = nn.GroupNorm(32, out_channels)
289 |         else:
290 |             self.bn = nn.BatchNorm2d(num_features=out_channels, momentum=0.01, eps=1e-3)
291 |         
292 |         if act is not None:
293 |             self.act = get_activation(act, inplace=True)
294 |         else:
295 |             self.act = None
296 | 
297 |     def forward(self, x):
298 |         x = self.depthwise_conv(x)
299 |         x = self.pointwise_conv(x)
300 | 
301 |         if self.use_bn:
302 |             x = self.bn(x)
303 | 
304 |         if self.act is not None:
305 |             x = self.act(x)
306 | 
307 |         return x
308 | 
309 | class MaxPool2dStaticSamePadding(nn.Module):
310 |     """
311 |     created by Zylo117
312 |     The real keras/tensorflow MaxPool2d with same padding
313 |     """
314 | 
315 |     def __init__(self, *args, **kwargs):
316 |         super().__init__()
317 |         self.pool = nn.MaxPool2d(*args, **kwargs)
318 |         self.stride = self.pool.stride
319 |         self.kernel_size = self.pool.kernel_size
320 | 
321 |         if isinstance(self.stride, int):
322 |             self.stride = [self.stride] * 2
323 |         elif len(self.stride) == 1:
324 |             self.stride = [self.stride[0]] * 2
325 | 
326 |         if isinstance(self.kernel_size, int):
327 |             self.kernel_size = [self.kernel_size] * 2
328 |         elif len(self.kernel_size) == 1:
329 |             self.kernel_size = [self.kernel_size[0]] * 2
330 | 
331 |     def forward(self, x):
332 |         h, w = x.shape[-2:]
333 |         
334 |         extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1]
335 |         extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0]
336 | 
337 |         left = extra_h // 2
338 |         right = extra_h - left
339 |         top = extra_v // 2
340 |         bottom = extra_v - top
341 | 
342 |         x = F.pad(x, [left, right, top, bottom])
343 | 
344 |         x = self.pool(x)
345 |         return x
346 | 
347 | class FFN(nn.Module):
348 |     """Very simple multi-layer perceptron (also called FFN)"""
349 | 
350 |     def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
351 |         super().__init__()
352 |         self.num_layers = num_layers
353 |         h = [hidden_dim] * (num_layers - 1)
354 |         self.layers = nn.ModuleList(
355 |             nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
356 |         )
357 | 
358 |     def forward(self, x):
359 |         for i, layer in enumerate(self.layers):
360 |             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
361 |         return x
362 | 
363 | class BidirectionalLSTM(nn.Module):
364 |     def __init__(self, nIn, nHidden, nOut):
365 |         super(BidirectionalLSTM, self).__init__()
366 | 
367 |         self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
368 |         self.FC = nn.Linear(nHidden * 2, nOut)
369 | 
370 |     def forward(self, input):
371 |         recurrent, _ = self.rnn(input)
372 |         T, b, h = recurrent.size()
373 |         t_rec = recurrent.view(T * b, h)
374 | 
375 |         output = self.FC(t_rec)
376 |         output = output.view(T, b, -1)
377 | 
378 |         return output


--------------------------------------------------------------------------------
/model/utils/rebise_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | import math
 15 | 
 16 | import os
 17 | import sys
 18 | 
 19 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 20 | sys.path.append(__dir__)
 21 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
 22 | 
 23 | from model.utils.ops import CBA
 24 | from tools.nninit import common_init
 25 | 
 26 | class CatBottleneck(nn.Module):
 27 |     def __init__(self, in_planes, out_planes, block_num=3, stride=1):
 28 |         super(CatBottleneck, self).__init__()
 29 |         self.conv_list = nn.ModuleList()
 30 |         self.stride = stride
 31 |         if stride == 2:
 32 |             self.avd_layer = nn.Sequential(
 33 |                 nn.Conv2d(out_planes//2, out_planes//2, kernel_size=3, stride=2, padding=1, groups=out_planes//2, bias=False),
 34 |                 nn.BatchNorm2d(out_planes//2),
 35 |             )
 36 |             self.skip = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
 37 |             stride = 1
 38 | 
 39 |         for idx in range(block_num):
 40 |             if idx == 0:
 41 |                 self.conv_list.append(CBA(in_planes, out_planes//2, ksize=1, stride=1, act="relu"))
 42 |             elif idx == 1 and block_num == 2:
 43 |                 self.conv_list.append(CBA(out_planes//2, out_planes//2, ksize=3, stride=stride, act="relu"))
 44 |             elif idx == 1 and block_num > 2:
 45 |                 self.conv_list.append(CBA(out_planes//2, out_planes//4, ksize=3, stride=stride, act="relu"))
 46 |             elif idx < block_num - 1:
 47 |                 self.conv_list.append(CBA(out_planes//int(math.pow(2, idx)), out_planes//int(math.pow(2, idx+1)),  ksize=3, stride=1))
 48 |             else:
 49 |                 self.conv_list.append(CBA(out_planes//int(math.pow(2, idx)), out_planes//int(math.pow(2, idx)),  ksize=3, stride=1))
 50 |             
 51 |     def forward(self, x):
 52 |         out_list = []
 53 |         out1 = self.conv_list[0](x)
 54 | 
 55 |         for idx, conv in enumerate(self.conv_list[1:]):
 56 |             if idx == 0:
 57 |                 if self.stride == 2:
 58 |                     out = conv(self.avd_layer(out1))
 59 |                 else:
 60 |                     out = conv(out1)
 61 |             else:
 62 |                 out = conv(out)
 63 |             out_list.append(out)
 64 | 
 65 |         if self.stride == 2:
 66 |             out1 = self.skip(out1)
 67 |         out_list.insert(0, out1)
 68 | 
 69 |         out = torch.cat(out_list, dim=1)
 70 |         return out
 71 | 
 72 | class DetailHead(nn.Module):
 73 |     def __init__(self, in_chan, mid_chan, n_classes):
 74 |         super(DetailHead, self).__init__()
 75 |         self.conv = CBA(in_chan, mid_chan, ksize=3, stride=1, act="relu", pad=1)
 76 |         self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
 77 |         self.apply(self._init_weights)
 78 | 
 79 |     def forward(self, x):
 80 |         x = self.conv(x)
 81 |         x = self.conv_out(x)
 82 |         return x
 83 | 
 84 |     def _init_weights(self, m):
 85 |         common_init(m)
 86 | 
 87 | class AttentionRefinementModule(nn.Module):
 88 |     def __init__(self, in_chan, out_chan, ksize=3):
 89 |         super(AttentionRefinementModule, self).__init__()
 90 |         self.conv = CBA(in_chan, out_chan, ksize=ksize, stride=1, act="relu", pad=1)
 91 |         self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
 92 |         self.bn_atten = nn.BatchNorm2d(out_chan)
 93 |         self.sigmoid_atten = nn.Sigmoid()
 94 |         self.apply(self._init_weights)
 95 | 
 96 |     def forward(self, x):
 97 |         feat = self.conv(x)
 98 |         atten = F.avg_pool2d(feat, feat.size()[2:])
 99 |         atten = self.conv_atten(atten)
100 |         atten = self.bn_atten(atten)
101 |         atten = self.sigmoid_atten(atten)
102 |         out = torch.mul(feat, atten)
103 |         return out
104 | 
105 |     def _init_weights(self, m):
106 |         common_init(m)


--------------------------------------------------------------------------------
/model/utils/res_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import torch.nn as nn
 12 | 
 13 | model_urls = {
 14 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 15 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 16 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 17 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 18 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 19 | }
 20 | 
 21 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 22 |     """3x3 convolution with padding"""
 23 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 24 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
 25 | 
 26 | 
 27 | def conv1x1(in_planes, out_planes, stride=1):
 28 |     """1x1 convolution"""
 29 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 30 | 
 31 | 
 32 | class BasicBlock(nn.Module):
 33 |     expansion = 1
 34 | 
 35 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 36 |                  base_width=64, dilation=1, norm_layer=None):
 37 |         super(BasicBlock, self).__init__()
 38 |         if norm_layer is None:
 39 |             norm_layer = nn.BatchNorm2d
 40 |         if groups != 1 or base_width != 64:
 41 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
 42 |         if dilation > 1:
 43 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 44 |         self.conv1 = conv3x3(inplanes, planes, stride)
 45 |         self.bn1 = norm_layer(planes)
 46 |         self.conv2 = conv3x3(planes, planes)
 47 |         self.bn2 = norm_layer(planes)
 48 |         self.relu = nn.ReLU(inplace=True)
 49 |         self.downsample = downsample
 50 |         self.stride = stride
 51 | 
 52 |     def forward(self, x):
 53 |         identity = x
 54 |         
 55 |         out = self.conv1(x)
 56 |         out = self.bn1(out)
 57 |         out = self.relu(out)
 58 | 
 59 |         out = self.conv2(out)
 60 |         out = self.bn2(out)
 61 |   
 62 |         if self.downsample is not None:
 63 |             identity = self.downsample(x)
 64 | 
 65 |         out += identity
 66 |         out = self.relu(out)
 67 | 
 68 |         return out
 69 | 
 70 | class Bottleneck(nn.Module):
 71 | 
 72 |     expansion = 4
 73 | 
 74 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 75 |                  base_width=64, dilation=1, norm_layer=None):
 76 |         super(Bottleneck, self).__init__()
 77 |         if norm_layer is None:
 78 |             norm_layer = nn.BatchNorm2d
 79 |         width = int(planes * (base_width / 64.)) * groups
 80 |         self.conv1 = conv1x1(inplanes, width)
 81 |         self.bn1 = norm_layer(width)
 82 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
 83 |         self.bn2 = norm_layer(width)
 84 |         self.conv3 = conv1x1(width, planes * self.expansion)
 85 |         self.bn3 = norm_layer(planes * self.expansion)
 86 |         self.relu = nn.ReLU(inplace=True)
 87 |         self.downsample = downsample
 88 |         self.stride = stride
 89 | 
 90 |     def forward(self, x):
 91 |         identity = x
 92 | 
 93 |         out = self.conv1(x)
 94 |         out = self.bn1(out)
 95 |         out = self.relu(out)
 96 | 
 97 |         out = self.conv2(out)
 98 |         out = self.bn2(out)
 99 |         out = self.relu(out)
100 | 
101 |         out = self.conv3(out)
102 |         out = self.bn3(out)
103 | 
104 |         if self.downsample is not None:
105 |             identity = self.downsample(x)
106 | 
107 |         out += identity
108 |         out = self.relu(out)
109 | 
110 |         return out
111 | 


--------------------------------------------------------------------------------
/model/utils/transformer_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | from torch import Tensor
 14 | import torch.nn.functional as F
 15 | from torch.nn.parameter import Parameter
 16 | 
 17 | import numpy as np
 18 | import math
 19 | 
 20 | import os
 21 | import sys
 22 | __dir__ = os.path.dirname(os.path.abspath(__file__))
 23 | sys.path.append(__dir__)
 24 | sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
 25 | 
 26 | from model.utils.ops import clones
 27 | 
 28 | class MultiHeadAttention(nn.Module):
 29 |     def __init__(self, multi_attention_heads, dimensions, dropout=0.1):
 30 |         """
 31 | 
 32 |         :param _multi_attention_heads: number of self attention head
 33 |         :param _dimensions: dimension of model
 34 |         :param _dropout:
 35 |         """
 36 |         super(MultiHeadAttention, self).__init__()
 37 | 
 38 |         assert dimensions % multi_attention_heads == 0
 39 |         self.d_k = int(dimensions / multi_attention_heads)
 40 |         self.h = multi_attention_heads
 41 |         self.linears = clones(nn.Linear(dimensions, dimensions), 4)
 42 |         self.attention = None
 43 |         self.dropout = nn.Dropout(p=dropout)
 44 | 
 45 |     def dot_product_attention(self, query, key, value, mask):
 46 |         """
 47 |         Compute 'Scaled Dot Product Attention
 48 | 
 49 |         :param _query: (N, h, seq_len, d_q), h is multi-head
 50 |         :param _key: (N, h, seq_len, d_k)
 51 |         :param _value: (N, h, seq_len, d_v)
 52 |         :param _mask: None or (N, 1, seq_len, seq_len), 0 will be replaced with -1e9
 53 |         :return:
 54 |         """
 55 | 
 56 |         d_k = value.size(-1)
 57 |         score = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
 58 |         if mask is not None:
 59 |             score = score.masked_fill(mask == 0, -1e9)
 60 |         p_attn = F.softmax(score, dim=-1)
 61 |         return torch.matmul(p_attn, value), p_attn
 62 | 
 63 |     def forward(self, query, key, value, mask):
 64 |         batch_size = query.size(0)
 65 | 
 66 |         query, key, value = \
 67 |             [l(x).view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
 68 |              for l, x in zip(self.linears, (query, key, value))]
 69 | 
 70 |         product_and_attention = self.dot_product_attention(query, key, value, mask=mask)
 71 |         x = product_and_attention[0]
 72 | 
 73 |         x = x.transpose(1, 2).contiguous() \
 74 |             .view(batch_size, -1, self.h * self.d_k)
 75 | 
 76 |         return self.linears[-1](x)
 77 | 
 78 | class FeedForwarding(nn.Module):
 79 |     def __init__(self, _dimensions, _feed_forward_dimensions, _dropout=0.1):
 80 |         super(FeedForwarding, self).__init__()
 81 |         self.w_1 = nn.Linear(_dimensions, _feed_forward_dimensions)
 82 |         self.w_2 = nn.Linear(_feed_forward_dimensions, _dimensions)
 83 |         self.dropout = nn.Dropout(p=_dropout)
 84 | 
 85 |     def forward(self, _input_tensor):
 86 |         return self.w_2(self.dropout(F.relu(self.w_1(_input_tensor))))
 87 | 
 88 | class PositionalEncoding(nn.Module):
 89 |     def __init__(self, emb_size, dropout=0.1, maxlen=5000):
 90 |         super(PositionalEncoding, self).__init__()
 91 | 
 92 | 
 93 |         den = torch.exp(- torch.arange(0, emb_size, 2) * math.log(10000) / emb_size)
 94 |         pos = torch.arange(0, maxlen).reshape(maxlen, 1)
 95 |         pos_embedding = torch.zeros((maxlen, emb_size))
 96 |         pos_embedding[:, 0::2] = torch.sin(pos * den)
 97 |         pos_embedding[:, 1::2] = torch.cos(pos * den)
 98 |         self.pos_embedding = pos_embedding.unsqueeze(0)
 99 |         self.dropout = nn.Dropout(dropout)
100 | 
101 |     def forward(self, x): 
102 |         """Forward pass.
103 |         Args:
104 |             x: (B, len, d_model)
105 |         Returns:
106 |             (B, len, d_model)
107 |         """
108 |         return self.dropout(x + self.pos_embedding[:,  :x.size(1),  :].to(x.device))
109 | 
110 | class PositionalEncoding2D(nn.Module):
111 |     def __init__(self, emb_size, dropout=0.1, max_h=1000, max_w=1000):
112 |         super(PositionalEncoding2D, self).__init__()
113 | 
114 | 
115 |         self.emb_size = emb_size
116 |         assert emb_size % 2 == 0, f"Embedding depth {emb_size} is not even"
117 |         pe_h = self.make_pe(emb_size // 2, maxlen=max_h)
118 |         pe_w = self.make_pe(emb_size // 2, maxlen=max_w)
119 | 
120 |         pe_h = pe_h.permute(2, 1, 0).expand(-1, -1, max_w)
121 |         pe_w = pe_w.permute(2, 0, 1).expand(-1, max_h, -1)
122 | 
123 |         pe = torch.cat([pe_h, pe_w], dim=0)
124 |         self.pe = pe.unsqueeze(0)
125 |         self.dropout = nn.Dropout(dropout)
126 | 
127 |     def make_pe(self, emb_size, maxlen=2000):
128 |         den = torch.exp(- torch.arange(0, emb_size, 2) * math.log(10000) / emb_size)
129 |         pos = torch.arange(0, maxlen).reshape(maxlen, 1)
130 |         pos_embedding = torch.zeros((maxlen, emb_size))
131 |         pos_embedding[:, 0::2] = torch.sin(pos * den)
132 |         pos_embedding[:, 1::2] = torch.cos(pos * den)
133 |         pos_embedding = pos_embedding.unsqueeze(0)
134 |         return pos_embedding
135 | 
136 |     def forward(self, x):
137 |         """Forward pass.
138 |         Args:
139 |             x: (B, d_model, H, W)
140 |         Returns:
141 |             (B, d_model, H, W)
142 |         """
143 |         assert x.shape[1] == self.pe.shape[1]
144 |         return self.dropout(x + self.pe[:, :, : x.size(2), : x.size(3)].to(x.device))
145 | 
146 | class TransformerDecoderLayer(nn.Module):
147 |     def __init__(self, nhead, d_model, n_layers, dropout, dim_feedforward, n_classes, PAD_IDX=1):
148 |         
149 |         super(TransformerDecoderLayer, self).__init__()
150 |         self.attention = MultiHeadAttention(nhead, d_model, dropout)
151 |         self.source_attention = MultiHeadAttention(nhead, d_model, dropout)
152 |         self.position_feed_forward = FeedForwarding(d_model, dim_feedforward, dropout)
153 |         self.position = PositionalEncoding(d_model, dropout)
154 |         self.stacks = n_layers
155 |         self.dropout = torch.nn.Dropout(dropout)
156 |         self.layer_norm = torch.nn.LayerNorm(d_model, eps=1e-6)
157 |         self.embedding = nn.Embedding(n_classes, d_model)
158 |         self.sqrt_model_size = math.sqrt(d_model)
159 |         self.padding_symbol = PAD_IDX
160 | 
161 |     def generate_target_mask(self, source, target):
162 |         target_pad_mask = (target != self.padding_symbol).unsqueeze(1).unsqueeze(3)
163 |         target_length = target.size(1)
164 |         target_sub_mask = torch.tril(
165 |             torch.ones((target_length, target_length), dtype=torch.uint8, device=source.device)
166 |         )
167 |         source_mask = torch.ones((target_length, source.size(1)), dtype=torch.uint8, device=source.device)
168 |         target_mask = target_pad_mask & target_sub_mask.bool()
169 |         return source_mask, target_mask
170 | 
171 |     def eval(self):
172 |         self.attention.eval()
173 |         self.source_attention.eval()
174 |         self.position_feed_forward.eval()
175 |         self.position.eval()
176 |         self.dropout.eval()
177 |         self.layer_norm.eval()
178 |         self.embedding.eval()
179 | 
180 |     def forward(self, target_result, memory):
181 |         target = self.embedding(target_result) * self.sqrt_model_size
182 |         target = self.position(target)
183 | 
184 |         if self.padding_symbol is None:
185 |             source_mask, target_mask = None, None
186 |         else:
187 |             source_mask, target_mask = self.generate_target_mask(memory, target_result)
188 |         output = target
189 |         for i in range(self.stacks):
190 |             normed_output = self.layer_norm(output)
191 |             output = output + self.dropout(
192 |                 self.attention(normed_output, normed_output, normed_output, target_mask)
193 |             )
194 |             normed_output = self.layer_norm(output)
195 |             output = output + self.dropout(self.source_attention(normed_output, memory, memory, source_mask))
196 |             normed_output = self.layer_norm(output)
197 |             output = output + self.dropout(self.position_feed_forward(normed_output))
198 |         return self.layer_norm(output)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pyyaml
 2 | Cython
 3 | loguru
 4 | h5py
 5 | einops
 6 | pyclipper
 7 | pycocotools
 8 | Shapely
 9 | timm
10 | segmentation-models-pytorch
11 | torchsummary
12 | imgaug
13 | opencv-python
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | import torch
 4 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 5 | 
 6 | def make_cuda_ext(name, module, sources):
 7 |     define_macros = []
 8 | 
 9 |     if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
10 |         define_macros += [("WITH_CUDA", None)]
11 |     else:
12 |         raise EnvironmentError('CUDA is required to compile!')
13 | 
14 |     return CUDAExtension(
15 |         name='{}.{}'.format(module, name),
16 |         sources=[os.path.join(*module.split('.'), p) for p in sources],
17 |         define_macros=define_macros,
18 |         extra_compile_args={
19 |             'cxx': ['-std=c++14'],
20 |             'nvcc': [
21 |                 '-D__CUDA_NO_HALF_OPERATORS__',
22 |                 '-D__CUDA_NO_HALF_CONVERSIONS__',
23 |                 '-D__CUDA_NO_HALF2_OPERATORS__',
24 |             ]
25 |         })
26 | 
27 | #  python setup.py develop
28 | # python setup.py build_ext --inplace
29 | if __name__ == '__main__':
30 |     
31 |     setup(
32 |           name='focalloss',
33 |           version='1.0.0',
34 |           package_data={'tools/loss': ['*/*.so']},
35 |           classifiers=[
36 |             'Development Status :: 4 - Beta',
37 |             'License :: OSI Approved :: Apache Software License',
38 |             'Operating System :: OS Independent',
39 |             'Programming Language :: Python :: 3',
40 |             'Programming Language :: Python :: 3.8'
41 |         ],
42 | 
43 |         ext_modules=[ 
44 |            make_cuda_ext(name='sigmoid_focal_loss_cuda', module='tools.loss',
45 |                   sources=[
46 |                       'src/sigmoid_focal_loss.cpp',
47 |                       'src/sigmoid_focal_loss_cuda.cu'
48 |                   ]),
49 |            make_cuda_ext(name='SigmoidFocalLoss_cuda', module='tools.loss',
50 |                   sources=[
51 |                       'src/SigmoidFocalLoss.cpp',
52 |                       'src/SigmoidFocalLoss_cuda.cu'
53 |                   ])
54 | 
55 |         ],
56 | 
57 |         cmdclass={'build_ext': BuildExtension},
58 |         zip_safe=False)
59 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/__init__.py


--------------------------------------------------------------------------------
/tools/augmentation.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 数据增强
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import numpy as np
 12 | from loguru import logger
 13 | 
 14 | import imgaug as ia
 15 | from imgaug import augmenters as iaa
 16 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
 17 | from imgaug.augmentables.polys import PolygonsOnImage
 18 | from imgaug.augmentables.segmaps import SegmentationMapsOnImage
 19 | 
 20 | aug_func = {
 21 |     'affine': iaa.Affine,
 22 |     'fliplr': iaa.Fliplr,
 23 |     'flipud': iaa.Flipud,
 24 |     'addgaussiannoise': iaa.AdditiveGaussianNoise,
 25 |     'multiply': iaa.Multiply,
 26 |     'cutout': iaa.Cutout,
 27 |     'add': iaa.Add,
 28 |     'grayscale': iaa.Grayscale,
 29 |     'clouds': iaa.Clouds,
 30 |     'fog': iaa.Fog,
 31 |     'snowflakes': iaa.Snowflakes,
 32 |     'rain': iaa.Rain,
 33 |     'gaussianblur': iaa.GaussianBlur
 34 | }
 35 | 
 36 | class BaseAugmentation():
 37 |     def __init__(self, aug_dicts, mode='some'):
 38 |         assert isinstance(aug_dicts, dict)
 39 |         self.aug_dicts = aug_dicts
 40 |         self.mode = mode
 41 |     
 42 |     def __call__(self):
 43 |         augment_func =[aug_func[f](**self.aug_dicts[f]) for f in self.aug_dicts]
 44 |         if self.mode == 'some':
 45 |             return iaa.SomeOf((0, len(augment_func)), augment_func)
 46 |         else:
 47 |             return iaa.Sequential(augment_func)
 48 | 
 49 | class Augmentation():
 50 |     def __init__(self, use_aug=True, task_type='cls', aug=None):
 51 |         
 52 |         assert task_type in ['cls', 'det', 'seg', 'polygon', 'custom']
 53 |         self.use_aug = use_aug
 54 |         self.aug = iaa.SomeOf((0, 13),[
 55 |                     iaa.Affine(translate_percent=[-0.05, 0.05], scale=[0.8, 1.2], rotate=(-5, 5), mode='constant', cval=[240, 255]),
 56 |                     iaa.Fliplr(0.5),
 57 |                     iaa.Flipud(0.5),
 58 |                     iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 12.0), per_channel=0.5),
 59 |                     iaa.Multiply((0.5, 1.5)),
 60 |                     iaa.Cutout(nb_iterations=(1, 4), size=0.1, squared=False, fill_mode="constant", cval=(0, 255), fill_per_channel=0.5),
 61 |                     iaa.Add((-40, 40), per_channel=0.5),
 62 |                     iaa.Grayscale(alpha=(0.0, 1.0)),
 63 |                     iaa.GaussianBlur(sigma=(0.0,1.4))
 64 |                 ]) if aug is None else aug
 65 |         
 66 |         self.task_type = task_type
 67 |         logger.info("Augmentation_type: {}".format(self.task_type))
 68 |     def make_aug(self, img, label, box_label=None):
 69 |         if self.task_type == 'cls':
 70 |             img = self.aug(image=img)
 71 |             return img, label
 72 |         elif self.task_type == 'det':
 73 |             boxes = BoundingBoxesOnImage([BoundingBox(x1=float(ii[0]), y1=float(ii[1]), x2=float(ii[2]), y2=float(ii[3]), 
 74 |                                                                                                                 label=ii[4]) for ii in label], shape=img.shape)
 75 |             new_img, new_boxes = self.aug(image=img, bounding_boxes=boxes)
 76 |             new_boxes = new_boxes.remove_out_of_image().clip_out_of_image()
 77 |             boxes_ = [[float(new_boxes.bounding_boxes[j].x1), float(new_boxes.bounding_boxes[j].y1),
 78 |                       float(new_boxes.bounding_boxes[j].x2), float(new_boxes.bounding_boxes[j].y2), new_boxes.bounding_boxes[j].label] for j in range(len(new_boxes.bounding_boxes))]
 79 |             
 80 |             return new_img, boxes_
 81 |         
 82 |         elif self.task_type == 'polygon':
 83 |             polygons = PolygonsOnImage([ia.Polygon(p[:-1], label=p[-1]) for p in label], shape=img.shape)
 84 |             new_img, new_polygons = self.aug(image=img,  polygons=polygons)
 85 |             new_polygons = new_polygons.remove_out_of_image().clip_out_of_image()
 86 |             polygons_ = [new_polygons.polygons[j].coords.tolist()+[new_polygons.polygons[j].label] for j in range(len(new_polygons.polygons))]
 87 |             
 88 |             return new_img, polygons_
 89 |             
 90 |         elif self.task_type == 'seg':
 91 |             label = np.array(label)
 92 |             if box_label is not None:
 93 |                 box_label = np.array(box_label)
 94 |                 box_label = BoundingBoxesOnImage([BoundingBox(x1=float(ii[0]), y1=float(ii[1]), x2=float(ii[2]), y2=float(ii[3]), 
 95 |                                                                                                                     label=ii[4]) for ii in box_label], shape=img.shape)
 96 |                 seg_map = SegmentationMapsOnImage(label, shape=img.shape)
 97 |                 new_img, seg_map, new_boxes = self.aug(image=img, segmentation_maps=seg_map, bounding_boxes=box_label)
 98 |                 new_boxes = [[float(new_boxes.bounding_boxes[j].x1), float(new_boxes.bounding_boxes[j].y1),
 99 |                         float(new_boxes.bounding_boxes[j].x2), float(new_boxes.bounding_boxes[j].y2), new_boxes.bounding_boxes[j].label] for j in range(len(new_boxes.bounding_boxes))]
100 |                 seg_map = seg_map.get_arr()
101 |                 return new_img, seg_map, new_boxes
102 | 
103 |             else:
104 |                 seg_map = SegmentationMapsOnImage(label, shape=img.shape)
105 |                 new_img, seg_map = self.aug(image=img, segmentation_maps=seg_map)
106 |                 seg_map = seg_map.get_arr()
107 |                 
108 |                 return new_img, seg_map
109 |         
110 |         else:
111 |             return self.custom_label_type(img, label)
112 |         
113 |     def custom_label_type(self, img, label):
114 |         raise NotImplementedError('Custom label type not supported.')
115 | 
116 |     def reorder_vertexes(self, pts):
117 |         pts = np.array(pts)
118 |         rect = np.zeros((4, 2), dtype = "float32")
119 | 
120 |         s = pts.sum(axis = 1)
121 |         rect[0] = pts[np.argmin(s)]
122 |         rect[2] = pts[np.argmax(s)]
123 | 
124 |         diff = np.diff(pts, axis = 1)
125 |         rect[1] = pts[np.argmin(diff)]
126 |         rect[3] = pts[np.argmax(diff)]
127 | 
128 |         return rect.tolist()


--------------------------------------------------------------------------------
/tools/boxes.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import numpy as np
 12 | 
 13 | import torch
 14 | import torchvision
 15 | 
 16 | 
 17 | def filter_box(output, scale_range):
 18 |     """
 19 |     output: (N, 5+class) shape
 20 |     """
 21 |     min_scale, max_scale = scale_range
 22 |     w = output[:, 2] - output[:, 0]
 23 |     h = output[:, 3] - output[:, 1]
 24 |     keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)
 25 |     return output[keep]
 26 | 
 27 | """
 28 | def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
 29 |     box_corner = prediction.new(prediction.shape)
 30 |     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
 31 |     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
 32 |     box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
 33 |     box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
 34 |     prediction[:, :, :4] = box_corner[:, :, :4]
 35 | 
 36 |     output = [None for _ in range(len(prediction))]
 37 |     for i, image_pred in enumerate(prediction):
 38 | 
 39 |         # If none are remaining => process next image
 40 |         if not image_pred.size(0):
 41 |             continue
 42 |         # Get score and class with highest confidence
 43 |         class_conf, class_pred = torch.max(
 44 |             image_pred[:, 5 : 5 + num_classes], 1, keepdim=True
 45 |         )
 46 | 
 47 |         conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
 48 |         # _, conf_mask = torch.topk((image_pred[:, 4] * class_conf.squeeze()), 1000)
 49 |         # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
 50 |         detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
 51 |         detections = detections[conf_mask]
 52 |         if not detections.size(0):
 53 |             continue
 54 | 
 55 |         nms_out_index = torchvision.ops.batched_nms(
 56 |             detections[:, :4],
 57 |             detections[:, 4] * detections[:, 5],
 58 |             detections[:, 6],
 59 |             nms_thre,
 60 |         )
 61 |         detections = detections[nms_out_index]
 62 |         if output[i] is None:
 63 |             output[i] = detections
 64 |         else:
 65 |             output[i] = torch.cat((output[i], detections))
 66 | 
 67 |     return output
 68 | """
 69 | 
 70 | def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
 71 |     if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
 72 |         raise IndexError
 73 | 
 74 |     if xyxy:
 75 |         tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
 76 |         br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
 77 |         area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
 78 |         area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
 79 |     else:
 80 |         tl = torch.max(
 81 |             (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
 82 |             (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
 83 |         )
 84 |         br = torch.min(
 85 |             (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
 86 |             (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
 87 |         )
 88 | 
 89 |         area_a = torch.prod(bboxes_a[:, 2:], 1)
 90 |         area_b = torch.prod(bboxes_b[:, 2:], 1)
 91 |     en = (tl < br).type(tl.type()).prod(dim=2)
 92 |     area_i = torch.prod(br - tl, 2) * en
 93 |     return area_i / (area_a[:, None] + area_b - area_i)
 94 | 
 95 | 
 96 | def matrix_iou(a, b):
 97 |     """
 98 |     return iou of a and b, numpy version for data augenmentation
 99 |     """
100 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
101 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
102 | 
103 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
104 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
105 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
106 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
107 | 
108 | 
109 | def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):
110 |     bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)
111 |     bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)
112 |     return bbox
113 | 
114 | 
115 | def xyxy2xywh(bboxes):
116 |     bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
117 |     bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
118 |     return bboxes
119 | 
120 | 
121 | def xyxy2cxcywh(bboxes):
122 |     bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
123 |     bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
124 |     bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
125 |     bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
126 |     return bboxes
127 | 


--------------------------------------------------------------------------------
/tools/evaluation_tools.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | from tqdm import tqdm
 12 | import numpy as np
 13 | 
 14 | import torch
 15 | import torch.nn as nn
 16 | import torch.nn.functional as F
 17 | 
 18 | import json
 19 | import tempfile
 20 | 
 21 | from pycocotools.coco import COCO
 22 | from pycocotools.cocoeval import COCOeval
 23 | 
 24 | from tools.boxes import xyxy2xywh
 25 | 
 26 | class Coco_eval():
 27 |     def __init__(self,  eval_bbox=False, eval_mask=False, jsonfile=None):
 28 |         self.eval_bbox = eval_bbox
 29 |         self.eval_mask = eval_mask
 30 |         self.jsonfile = jsonfile
 31 |         
 32 |     def __call__(self, data_list, ann_file):
 33 |         if self.jsonfile is not None:
 34 |             json.dump(data_list, open(self.jsonfile, "w"))
 35 |         else:
 36 |             _, self.jsonfile = tempfile.mkstemp()
 37 |             json.dump(data_list, open(self.jsonfile, "w"))
 38 |         print('Loading annotations...')
 39 |         gt_annotations = COCO(ann_file)
 40 |         test_res = gt_annotations.loadRes(self.jsonfile)
 41 | 
 42 |         if self.eval_bbox:
 43 |             print('\nEvaluating BBoxes:')
 44 |             bbox_eval = COCOeval(gt_annotations, test_res, 'bbox')
 45 |             bbox_eval.evaluate()
 46 |             bbox_eval.accumulate()
 47 |             bbox_eval.summarize()
 48 | 
 49 |         if self.eval_mask:
 50 |             print('\nEvaluating Masks:')
 51 |             bbox_eval = COCOeval(gt_annotations, test_res, 'segm')
 52 |             bbox_eval.evaluate()
 53 |             bbox_eval.accumulate()
 54 |             bbox_eval.summarize()
 55 | 
 56 |         return bbox_eval.stats[0]
 57 | 
 58 | class ConvertCocoFormat():
 59 | 
 60 |     def __init__(self, id2cat, mode='bbox'):
 61 |         self.id2cat = id2cat
 62 |         self.mode = mode
 63 | 
 64 |     def __call__(self, b_bboxes, b_cls, b_scores,  ids):
 65 |         data_list = []
 66 |         for (bboxes, cls, scores, img_id) in zip(b_bboxes, b_cls, b_scores, ids):
 67 |             bboxes, cls, scores = bboxes.cpu(), cls.cpu(), scores.cpu()
 68 | 
 69 |             if bboxes is None:
 70 |                 continue
 71 |             if self.mode == 'bbox':
 72 |                 for ind in range(bboxes.shape[0]):
 73 |                     label = self.id2cat[int(cls[ind])]
 74 |                     pred_data = {
 75 |                         "image_id": int(img_id.numpy().item()),
 76 |                         "category_id": label,
 77 |                         "bbox": bboxes[ind].numpy().tolist(),
 78 |                         "score": scores[ind].numpy().item(),
 79 |                         "segmentation": [],
 80 |                     }
 81 |                     data_list.append(pred_data)
 82 |             else:
 83 |                 raise NotImplementedError
 84 |         return data_list
 85 |     
 86 | class SemanticSegmIOU():
 87 |     def __init__(self, scale=0.5, ignore_label=255):
 88 |         super().__init__()
 89 |         self.scale = scale
 90 |         self.ignore_label = ignore_label
 91 | 
 92 |     def __call__(self, model, dataset, n_classes):
 93 |         hist = torch.zeros(n_classes, n_classes).cuda().detach()
 94 | 
 95 |         for inps, targets in tqdm(dataset):
 96 | 
 97 |             N, H, W = targets.shape
 98 |             targets = targets.cuda()
 99 |             size = targets.size()[-2:]
100 | 
101 |             inps = inps.cuda()
102 |             N, C, H, W = inps.size()
103 | 
104 |             new_hw = [int(H*self.scale), int(W*self.scale)]
105 |             inps = F.interpolate(inps, new_hw, mode='bilinear', align_corners=True)
106 | 
107 |             logits = model(inps)[0]
108 |             logits = F.interpolate(logits, size=size, mode='bilinear', align_corners=True)
109 |             probs = torch.softmax(logits, dim=1)
110 |             preds = torch.argmax(probs, dim=1)
111 |             keep = targets != self.ignore_label
112 | 
113 |             hist += torch.bincount(targets[keep] * n_classes + preds[keep], minlength=n_classes ** 2).view(n_classes, n_classes).float()
114 |             
115 |         ious = hist.diag() / (hist.sum(dim=0) + hist.sum(dim=1) - hist.diag())
116 |         miou = ious.mean()
117 |         return miou.item()


--------------------------------------------------------------------------------
/tools/loss/SigmoidFocalLoss_cuda.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/loss/SigmoidFocalLoss_cuda.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/tools/loss/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tools/loss/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/loss/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/loss/__pycache__/detr_criterion.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/loss/__pycache__/detr_criterion.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/loss/__pycache__/detr_matcher.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/loss/__pycache__/detr_matcher.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/loss/__pycache__/loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/loss/__pycache__/loss.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/loss/__pycache__/loss_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/loss/__pycache__/loss_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/loss/detr_criterion.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MaskFormer criterion.
  3 | """
  4 | import torch
  5 | import torch.nn.functional as F
  6 | from torch import nn
  7 | 
  8 | 
  9 | from ..misc import nested_tensor_from_tensor_list
 10 | 
 11 | 
 12 | def dice_loss(inputs, targets, num_masks):
 13 |     """
 14 |     Compute the DICE loss, similar to generalized IOU for masks
 15 |     Args:
 16 |         inputs: A float tensor of arbitrary shape.
 17 |                 The predictions for each example.
 18 |         targets: A float tensor with the same shape as inputs. Stores the binary
 19 |                  classification label for each element in inputs
 20 |                 (0 for the negative class and 1 for the positive class).
 21 |     """
 22 |     inputs = inputs.sigmoid()
 23 |     inputs = inputs.flatten(1)
 24 |     numerator = 2 * (inputs * targets).sum(-1)
 25 |     denominator = inputs.sum(-1) + targets.sum(-1)
 26 |     loss = 1 - (numerator + 1) / (denominator + 1)
 27 |     return loss.sum() / num_masks
 28 | 
 29 | 
 30 | def sigmoid_focal_loss(inputs, targets, num_masks, alpha: float = 0.25, gamma: float = 2):
 31 |     """
 32 |     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
 33 |     Args:
 34 |         inputs: A float tensor of arbitrary shape.
 35 |                 The predictions for each example.
 36 |         targets: A float tensor with the same shape as inputs. Stores the binary
 37 |                  classification label for each element in inputs
 38 |                 (0 for the negative class and 1 for the positive class).
 39 |         alpha: (optional) Weighting factor in range (0,1) to balance
 40 |                 positive vs negative examples. Default = -1 (no weighting).
 41 |         gamma: Exponent of the modulating factor (1 - p_t) to
 42 |                balance easy vs hard examples.
 43 |     Returns:
 44 |         Loss tensor
 45 |     """
 46 |     prob = inputs.sigmoid()
 47 |     ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
 48 |     p_t = prob * targets + (1 - prob) * (1 - targets)
 49 |     loss = ce_loss * ((1 - p_t) ** gamma)
 50 | 
 51 |     if alpha >= 0:
 52 |         alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
 53 |         loss = alpha_t * loss
 54 |     return loss.mean(1).sum() / num_masks
 55 | 
 56 | 
 57 | class SetCriterion(nn.Module):
 58 |     """This class computes the loss for DETR.
 59 |     The process happens in two steps:
 60 |         1) we compute hungarian assignment between ground truth boxes and the outputs of the model
 61 |         2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
 62 |     """
 63 | 
 64 |     def __init__(self, num_classes, matcher, weight_dict, eos_coef, losses):
 65 |         """Create the criterion.
 66 |         Parameters:
 67 |             num_classes: number of object categories, omitting the special no-object category
 68 |             matcher: module able to compute a matching between targets and proposals
 69 |             weight_dict: dict containing as key the names of the losses and as values their relative weight.
 70 |             eos_coef: relative classification weight applied to the no-object category
 71 |             losses: list of all the losses to be applied. See get_loss for list of available losses.
 72 |         """
 73 |         super().__init__()
 74 |         self.num_classes = num_classes
 75 |         self.matcher = matcher
 76 |         self.weight_dict = weight_dict
 77 |         self.eos_coef = eos_coef
 78 |         self.losses = losses
 79 |         empty_weight = torch.ones(self.num_classes + 1)
 80 |         empty_weight[-1] = self.eos_coef
 81 |         self.register_buffer("empty_weight", empty_weight)
 82 | 
 83 |     def loss_labels(self, outputs, targets, indices, num_masks):
 84 |         """Classification loss (NLL)
 85 |         targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
 86 |         """
 87 |         assert "pred_logits" in outputs
 88 |         src_logits = outputs["pred_logits"]
 89 | 
 90 |         idx = self._get_src_permutation_idx(indices)
 91 |         target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
 92 |         target_classes = torch.full(
 93 |             src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device
 94 |         )
 95 |         target_classes[idx] = target_classes_o
 96 | 
 97 |         loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight)
 98 |         losses = {"loss_ce": loss_ce}
 99 |         return losses
100 | 
101 |     def loss_masks(self, outputs, targets, indices, num_masks):
102 |         """Compute the losses related to the masks: the focal loss and the dice loss.
103 |         targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]
104 |         """
105 |         assert "pred_masks" in outputs
106 | 
107 |         src_idx = self._get_src_permutation_idx(indices)
108 |         tgt_idx = self._get_tgt_permutation_idx(indices)
109 |         src_masks = outputs["pred_masks"]
110 |         src_masks = src_masks[src_idx]
111 |         masks = [t["masks"] for t in targets]
112 |         target_masks, valid = nested_tensor_from_tensor_list(masks).decompose()
113 |         target_masks = target_masks.to(src_masks)
114 |         target_masks = target_masks[tgt_idx]
115 | 
116 |         src_masks = F.interpolate(
117 |             src_masks[:, None], size=target_masks.shape[-2:], mode="bilinear", align_corners=False
118 |         )
119 |         src_masks = src_masks[:, 0].flatten(1)
120 | 
121 |         target_masks = target_masks.flatten(1)
122 |         target_masks = target_masks.view(src_masks.shape)
123 |         losses = {
124 |             "loss_mask": sigmoid_focal_loss(src_masks, target_masks, num_masks),
125 |             "loss_dice": dice_loss(src_masks, target_masks, num_masks),
126 |         }
127 |         return losses
128 | 
129 |     def _get_src_permutation_idx(self, indices):
130 |         batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
131 |         src_idx = torch.cat([src for (src, _) in indices])
132 |         return batch_idx, src_idx
133 | 
134 |     def _get_tgt_permutation_idx(self, indices):
135 |         batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
136 |         tgt_idx = torch.cat([tgt for (_, tgt) in indices])
137 |         return batch_idx, tgt_idx
138 |     def get_loss(self, loss, outputs, targets, indices, num_masks):
139 |         loss_map = {"labels": self.loss_labels, "masks": self.loss_masks}
140 |         assert loss in loss_map, f"do you really want to compute {loss} loss?"
141 |         return loss_map[loss](outputs, targets, indices, num_masks)
142 | 
143 |     def forward(self, outputs, targets):
144 |         """This performs the loss computation.
145 |         Parameters:
146 |              outputs: dict of tensors, see the output specification of the model for the format
147 |              targets: list of dicts, such that len(targets) == batch_size.
148 |                       The expected keys in each dict depends on the losses applied, see each loss' doc
149 |         """
150 | 
151 |         for i in range(len(targets)):
152 |             targets[i]['labels'] = targets[i]['labels'].cuda()
153 |             targets[i]['masks'] = targets[i]['masks'].cuda()
154 | 
155 |         outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"}
156 | 
157 |         indices = self.matcher(outputs_without_aux, targets)
158 | 
159 |         num_masks = sum(len(t["labels"]) for t in targets)
160 |         num_masks = torch.as_tensor(
161 |             [num_masks], dtype=torch.float, device=next(iter(outputs.values())).device
162 |         )
163 | 
164 |         losses = {}
165 |         for loss in self.losses:
166 |             losses.update(self.get_loss(loss, outputs, targets, indices, num_masks))
167 | 
168 |         if "aux_outputs" in outputs:
169 |             for i, aux_outputs in enumerate(outputs["aux_outputs"]):
170 |                 indices = self.matcher(aux_outputs, targets)
171 |                 for loss in self.losses:
172 |                     l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_masks)
173 |                     l_dict = {k + f"_{i}": v for k, v in l_dict.items()}
174 |                     losses.update(l_dict)
175 | 
176 |         for k in list(losses.keys()):
177 |             if k in self.weight_dict:
178 |                 losses[k] *= self.weight_dict[k]
179 |             else:
180 |                 losses.pop(k)
181 |                 
182 |         losses = sum(losses.values())
183 |         return losses
184 | 


--------------------------------------------------------------------------------
/tools/loss/detr_matcher.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Modules to compute the matching cost and solve the corresponding LSAP.
  3 | """
  4 | import torch
  5 | import torch.nn.functional as F
  6 | from scipy.optimize import linear_sum_assignment
  7 | from torch import nn
  8 | 
  9 | 
 10 | def batch_dice_loss(inputs, targets):
 11 |     """
 12 |     Compute the DICE loss, similar to generalized IOU for masks
 13 |     Args:
 14 |         inputs: A float tensor of arbitrary shape.
 15 |                 The predictions for each example.
 16 |         targets: A float tensor with the same shape as inputs. Stores the binary
 17 |                  classification label for each element in inputs
 18 |                 (0 for the negative class and 1 for the positive class).
 19 |     """
 20 |     inputs = inputs.sigmoid()
 21 |     inputs = inputs.flatten(1)
 22 |     numerator = 2 * torch.einsum("nc,mc->nm", inputs, targets)
 23 |     denominator = inputs.sum(-1)[:, None] + targets.sum(-1)[None, :]
 24 |     loss = 1 - (numerator + 1) / (denominator + 1)
 25 |     return loss
 26 | 
 27 | 
 28 | def batch_sigmoid_focal_loss(inputs, targets, alpha: float = 0.25, gamma: float = 2):
 29 |     """
 30 |     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
 31 |     Args:
 32 |         inputs: A float tensor of arbitrary shape.
 33 |                 The predictions for each example.
 34 |         targets: A float tensor with the same shape as inputs. Stores the binary
 35 |                  classification label for each element in inputs
 36 |                 (0 for the negative class and 1 for the positive class).
 37 |         alpha: (optional) Weighting factor in range (0,1) to balance
 38 |                 positive vs negative examples. Default = -1 (no weighting).
 39 |         gamma: Exponent of the modulating factor (1 - p_t) to
 40 |                balance easy vs hard examples.
 41 |     Returns:
 42 |         Loss tensor
 43 |     """
 44 |     hw = inputs.shape[1]
 45 | 
 46 |     prob = inputs.sigmoid()
 47 |     focal_pos = ((1 - prob) ** gamma) * F.binary_cross_entropy_with_logits(
 48 |         inputs, torch.ones_like(inputs), reduction="none"
 49 |     )
 50 |     focal_neg = (prob ** gamma) * F.binary_cross_entropy_with_logits(
 51 |         inputs, torch.zeros_like(inputs), reduction="none"
 52 |     )
 53 |     if alpha >= 0:
 54 |         focal_pos = focal_pos * alpha
 55 |         focal_neg = focal_neg * (1 - alpha)
 56 | 
 57 |     loss = torch.einsum("nc,mc->nm", focal_pos, targets) + torch.einsum(
 58 |         "nc,mc->nm", focal_neg, (1 - targets)
 59 |     )
 60 | 
 61 |     return loss / hw
 62 | 
 63 | 
 64 | class HungarianMatcher(nn.Module):
 65 |     """This class computes an assignment between the targets and the predictions of the network
 66 | 
 67 |     For efficiency reasons, the targets don't include the no_object. Because of this, in general,
 68 |     there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
 69 |     while the others are un-matched (and thus treated as non-objects).
 70 |     """
 71 | 
 72 |     def __init__(self, cost_class: float = 1, cost_mask: float = 1, cost_dice: float = 1):
 73 |         """Creates the matcher
 74 | 
 75 |         Params:
 76 |             cost_class: This is the relative weight of the classification error in the matching cost
 77 |             cost_mask: This is the relative weight of the focal loss of the binary mask in the matching cost
 78 |             cost_dice: This is the relative weight of the dice loss of the binary mask in the matching cost
 79 |         """
 80 |         super().__init__()
 81 |         self.cost_class = cost_class
 82 |         self.cost_mask = cost_mask
 83 |         self.cost_dice = cost_dice
 84 |         assert cost_class != 0 or cost_mask != 0 or cost_dice != 0, "all costs cant be 0"
 85 | 
 86 |     @torch.no_grad()
 87 |     def memory_efficient_forward(self, outputs, targets):
 88 |         """More memory-friendly matching"""
 89 |         bs, num_queries = outputs["pred_logits"].shape[:2]
 90 | 
 91 |         masks = [v["masks"] for v in targets]
 92 |         h_max = max([m.shape[1] for m in masks])
 93 |         w_max = max([m.shape[2] for m in masks])
 94 | 
 95 |         indices = []
 96 | 
 97 |         for b in range(bs):
 98 | 
 99 |             out_prob = outputs["pred_logits"][b].softmax(-1)
100 |             out_mask = outputs["pred_masks"][b]
101 | 
102 |             tgt_ids = targets[b]["labels"]
103 |             tgt_mask = targets[b]["masks"].to(out_mask)
104 | 
105 |             cost_class = -out_prob[:, tgt_ids]
106 | 
107 |             tgt_mask = F.interpolate(tgt_mask[:, None], size=out_mask.shape[-2:], mode="nearest")
108 | 
109 |             out_mask = out_mask.flatten(1)
110 |             tgt_mask = tgt_mask[:, 0].flatten(1)
111 | 
112 |             cost_mask = batch_sigmoid_focal_loss(out_mask, tgt_mask)
113 | 
114 |             cost_dice = batch_dice_loss(out_mask, tgt_mask)
115 | 
116 |             C = (
117 |                 self.cost_mask * cost_mask
118 |                 + self.cost_class * cost_class
119 |                 + self.cost_dice * cost_dice
120 |             )
121 |             C = C.reshape(num_queries, -1).cpu()
122 |             indices.append(linear_sum_assignment(C))
123 |         return [
124 |             (torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64))
125 |             for i, j in indices
126 |         ]
127 | 
128 |     @torch.no_grad()
129 |     def forward(self, outputs, targets):
130 |         """Performs the matching
131 | 
132 |         Params:
133 |             outputs: This is a dict that contains at least these entries:
134 |                  "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
135 |                  "pred_masks": Tensor of dim [batch_size, num_queries, H_pred, W_pred] with the predicted masks
136 | 
137 |             targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
138 |                  "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
139 |                            objects in the target) containing the class labels
140 |                  "masks": Tensor of dim [num_target_boxes, H_gt, W_gt] containing the target masks
141 | 
142 |         Returns:
143 |             A list of size batch_size, containing tuples of (index_i, index_j) where:
144 |                 - index_i is the indices of the selected predictions (in order)
145 |                 - index_j is the indices of the corresponding selected targets (in order)
146 |             For each batch element, it holds:
147 |                 len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
148 |         """
149 |         return self.memory_efficient_forward(outputs, targets)
150 | 
151 |     def __repr__(self):
152 |         head = "Matcher " + self.__class__.__name__
153 |         body = [
154 |             "cost_class: {}".format(self.cost_class),
155 |             "cost_mask: {}".format(self.cost_mask),
156 |             "cost_dice: {}".format(self.cost_dice),
157 |         ]
158 |         _repr_indent = 4
159 |         lines = [head] + [" " * _repr_indent + line for line in body]
160 |         return "\n".join(lines)
161 | 


--------------------------------------------------------------------------------
/tools/loss/loss_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | from torch.autograd import Function
 11 | from torch.autograd.function import once_differentiable
 12 | import torch.nn.functional as F
 13 | 
 14 | def norm_add_multi_loss(loss_dict):
 15 |     for i, key in enumerate(loss_dict.keys()):
 16 |         if i == 0:
 17 |             loss = loss_dict[key]
 18 |             n = loss_dict[key].detach()
 19 |         else:
 20 |             loss = loss +  loss_dict[key]/n
 21 | 
 22 |     return loss
 23 | 
 24 | def reduce_loss(loss, reduction):
 25 |     """Reduce loss as specified.
 26 | 
 27 |     Args:
 28 |         loss (Tensor): Elementwise loss tensor.
 29 |         reduction (str): Options are "none", "mean" and "sum".
 30 | 
 31 |     Return:
 32 |         Tensor: Reduced loss tensor.
 33 |     """
 34 |     reduction_enum = F._Reduction.get_enum(reduction)
 35 |     if reduction_enum == 0:
 36 |         return loss
 37 |     elif reduction_enum == 1:
 38 |         return loss.mean()
 39 |     elif reduction_enum == 2:
 40 |         return loss.sum()
 41 | 
 42 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
 43 |     """Apply element-wise weight and reduce loss.
 44 | 
 45 |     Args:
 46 |         loss (Tensor): Element-wise loss.
 47 |         weight (Tensor): Element-wise weights.
 48 |         reduction (str): Same as built-in losses of PyTorch.
 49 |         avg_factor (float): Avarage factor when computing the mean of losses.
 50 | 
 51 |     Returns:
 52 |         Tensor: Processed loss values.
 53 |     """
 54 |     if weight is not None:
 55 |         loss = loss * weight
 56 | 
 57 |     if avg_factor is None:
 58 |         loss = reduce_loss(loss, reduction)
 59 |     else:
 60 |         if reduction == 'mean':
 61 |             loss = loss.sum() / avg_factor
 62 |         elif reduction != 'none':
 63 |             raise ValueError('avg_factor can not be used with reduction="sum"')
 64 |     return loss
 65 | 
 66 | from . import  sigmoid_focal_loss_cuda, SigmoidFocalLoss_cuda
 67 | 
 68 | class SigmoidFocalLossFunction(Function):
 69 | 
 70 |     @staticmethod
 71 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25):
 72 |         ctx.save_for_backward(input, target)
 73 |         num_classes = input.shape[1]
 74 |         ctx.num_classes = num_classes
 75 |         ctx.gamma = gamma
 76 |         ctx.alpha = alpha
 77 | 
 78 |         loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,  gamma, alpha)
 79 |         return loss
 80 | 
 81 |     @staticmethod
 82 |     @once_differentiable
 83 |     def backward(ctx, d_loss):
 84 |         input, target = ctx.saved_tensors
 85 |         num_classes = ctx.num_classes
 86 |         gamma = ctx.gamma
 87 |         alpha = ctx.alpha
 88 |         d_loss = d_loss.contiguous()
 89 |         d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, num_classes, gamma, alpha)
 90 |         return d_input, None, None, None, None
 91 | 
 92 | sigmoid_focal_loss_ = SigmoidFocalLossFunction.apply
 93 | 
 94 | def sigmoid_focal_loss(pred,
 95 |                     target,
 96 |                     weight=None,
 97 |                     gamma=2.0,
 98 |                     alpha=0.25,
 99 |                     reduction='mean',
100 |                     avg_factor=None):
101 |     loss = sigmoid_focal_loss_(pred, target, gamma, alpha)
102 | 
103 |     if weight is not None:
104 |         weight = weight.view(-1, 1)
105 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
106 |     return loss
107 | 
108 | 
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/tools/loss/sigmoid_focal_loss_cuda.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/tools/loss/sigmoid_focal_loss_cuda.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/tools/loss/src/SigmoidFocalLoss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
26 | }
27 | 
28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
29 |                                      const at::Tensor &targets,
30 |                                      const at::Tensor &d_losses,
31 |                                      const int num_classes, const float gamma,
32 |                                      const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
35 |                                           num_classes, gamma, alpha);
36 |   }
37 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
38 | }
39 | 
40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
41 |   m.def("forward", &SigmoidFocalLoss_forward,
42 |         "SigmoidFocalLoss forward (CUDA)");
43 |   m.def("backward", &SigmoidFocalLoss_backward,
44 |         "SigmoidFocalLoss backward (CUDA)");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/tools/loss/src/SigmoidFocalLoss_cuda.cu:
--------------------------------------------------------------------------------
  1 | // modified from
  2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu
  3 | 
  4 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  5 | // This file is modified from
  6 | // https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu
  7 | // Cheng-Yang Fu
  8 | // cyfu@cs.unc.edu
  9 | #include <ATen/ATen.h>
 10 | #include <ATen/cuda/CUDAContext.h>
 11 | 
 12 | #include <THC/THC.h>
 13 | #include <THC/THCAtomics.cuh>
 14 | #include <THC/THCDeviceUtils.cuh>
 15 | 
 16 | #include <cfloat>
 17 | 
 18 | // TODO make it in a common file
 19 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 20 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 21 |        i += blockDim.x * gridDim.x)
 22 | 
 23 | template <typename scalar_t>
 24 | __global__ void SigmoidFocalLossForward(const int nthreads,
 25 |                                         const scalar_t *logits,
 26 |                                         const int64_t *targets,
 27 |                                         const int num_classes,
 28 |                                         const float gamma, const float alpha,
 29 |                                         const int num, scalar_t *losses) {
 30 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 31 |     int n = i / num_classes;
 32 |     int d = i % num_classes;  // current class[0~79];
 33 |     int t = targets[n];       // target class [0~79];
 34 | 
 35 |     // Decide it is positive or negative case.
 36 |     scalar_t c1 = (t == d);
 37 |     scalar_t c2 = (t >= 0 & t != d);
 38 | 
 39 |     scalar_t zn = (1.0 - alpha);
 40 |     scalar_t zp = (alpha);
 41 | 
 42 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 43 |     scalar_t p = 1. / (1. + expf(-logits[i]));
 44 | 
 45 |     // (1-p)**gamma * log(p) where
 46 |     scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
 47 | 
 48 |     // p**gamma * log(1-p)
 49 |     scalar_t term2 =
 50 |         powf(p, gamma) *
 51 |         (-1. * logits[i] * (logits[i] >= 0) -
 52 |          logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
 53 | 
 54 |     losses[i] = 0.0;
 55 |     losses[i] += -c1 * term1 * zp;
 56 |     losses[i] += -c2 * term2 * zn;
 57 | 
 58 |   }  // CUDA_1D_KERNEL_LOOP
 59 | }  // SigmoidFocalLossForward
 60 | 
 61 | template <typename scalar_t>
 62 | __global__ void SigmoidFocalLossBackward(
 63 |     const int nthreads, const scalar_t *logits, const int64_t *targets,
 64 |     const scalar_t *d_losses, const int num_classes, const float gamma,
 65 |     const float alpha, const int num, scalar_t *d_logits) {
 66 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 67 |     int n = i / num_classes;
 68 |     int d = i % num_classes;  // current class[0~79];
 69 |     int t = targets[n];       // target class [1~80], 0 is background;
 70 | 
 71 |     // Decide it is positive or negative case.
 72 |     scalar_t c1 = (t == d);
 73 |     scalar_t c2 = (t >= 0 & t != d);
 74 | 
 75 |     scalar_t zn = (1.0 - alpha);
 76 |     scalar_t zp = (alpha);
 77 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 78 |     scalar_t p = 1. / (1. + expf(-logits[i]));
 79 | 
 80 |     // (1-p)**g * (1 - p - g*p*log(p)
 81 |     scalar_t term1 =
 82 |         powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
 83 | 
 84 |     // (p**g) * (g*(1-p)*log(1-p) - p)
 85 |     scalar_t term2 =
 86 |         powf(p, gamma) *
 87 |         ((-1. * logits[i] * (logits[i] >= 0) -
 88 |           logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
 89 |              (1. - p) * gamma -
 90 |          p);
 91 |     d_logits[i] = 0.0;
 92 |     d_logits[i] += -c1 * term1 * zp;
 93 |     d_logits[i] += -c2 * term2 * zn;
 94 |     d_logits[i] = d_logits[i] * d_losses[i];
 95 | 
 96 |   }  // CUDA_1D_KERNEL_LOOP
 97 | }  // SigmoidFocalLossBackward
 98 | 
 99 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
100 |                                          const at::Tensor &targets,
101 |                                          const int num_classes,
102 |                                          const float gamma, const float alpha) {
103 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
104 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
105 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
106 | 
107 |   const int num_samples = logits.size(0);
108 | 
109 |   auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
110 |   auto losses_size = num_samples * logits.size(1);
111 | 
112 |   dim3 grid(
113 |       std::min(THCCeilDiv((int64_t)losses_size, (int64_t)512), (int64_t)4096));
114 |   dim3 block(512);
115 | 
116 |   if (losses.numel() == 0) {
117 |     THCudaCheck(cudaGetLastError());
118 |     return losses;
119 |   }
120 | 
121 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
122 |       logits.scalar_type(), "SigmoidFocalLoss_forward", [&] {
123 |         SigmoidFocalLossForward<scalar_t><<<grid, block, 0, at::cuda::getCurrentCUDAStream()>>>(
124 |             losses_size, logits.contiguous().data<scalar_t>(),
125 |             targets.contiguous().data<int64_t>(), num_classes, gamma, alpha,
126 |             num_samples, losses.data<scalar_t>());
127 |       });
128 |   THCudaCheck(cudaGetLastError());
129 |   return losses;
130 | }
131 | 
132 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
133 |                                           const at::Tensor &targets,
134 |                                           const at::Tensor &d_losses,
135 |                                           const int num_classes,
136 |                                           const float gamma,
137 |                                           const float alpha) {
138 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
139 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
140 |   AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor");
141 | 
142 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
143 | 
144 |   const int num_samples = logits.size(0);
145 |   AT_ASSERTM(logits.size(1) == num_classes,
146 |              "logits.size(1) should be num_classes");
147 | 
148 |   auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
149 |   auto d_logits_size = num_samples * logits.size(1);
150 | 
151 |   dim3 grid(std::min(THCCeilDiv((int64_t)d_logits_size, (int64_t)512),
152 |                      (int64_t)4096));
153 |   dim3 block(512);
154 | 
155 |   if (d_logits.numel() == 0) {
156 |     THCudaCheck(cudaGetLastError());
157 |     return d_logits;
158 |   }
159 | 
160 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
161 |       logits.scalar_type(), "SigmoidFocalLoss_backward", [&] {
162 |         SigmoidFocalLossBackward<scalar_t><<<grid, block, 0, at::cuda::getCurrentCUDAStream()>>>(
163 |             d_logits_size, logits.contiguous().data<scalar_t>(),
164 |             targets.contiguous().data<int64_t>(),
165 |             d_losses.contiguous().data<scalar_t>(), num_classes, gamma, alpha,
166 |             num_samples, d_logits.data<scalar_t>());
167 |       });
168 | 
169 |   THCudaCheck(cudaGetLastError());
170 |   return d_logits;
171 | }
172 | 


--------------------------------------------------------------------------------
/tools/loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
26 | }
27 | 
28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
29 |                                      const at::Tensor &targets,
30 |                                      const at::Tensor &d_losses,
31 |                                      const int num_classes, const float gamma,
32 |                                      const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
35 |                                           num_classes, gamma, alpha);
36 |   }
37 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
38 | }
39 | 
40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
41 |   m.def("forward", &SigmoidFocalLoss_forward,
42 |         "SigmoidFocalLoss forward (CUDA)");
43 |   m.def("backward", &SigmoidFocalLoss_backward,
44 |         "SigmoidFocalLoss backward (CUDA)");
45 | }
46 | 


--------------------------------------------------------------------------------
/tools/loss/src/sigmoid_focal_loss_cuda.cu:
--------------------------------------------------------------------------------
  1 | // modified from
  2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu
  3 | 
  4 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  5 | // This file is modified from
  6 | // https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu
  7 | // Cheng-Yang Fu
  8 | // cyfu@cs.unc.edu
  9 | #include <ATen/ATen.h>
 10 | #include <ATen/cuda/CUDAContext.h>
 11 | 
 12 | #include <THC/THC.h>
 13 | #include <THC/THCAtomics.cuh>
 14 | #include <THC/THCDeviceUtils.cuh>
 15 | 
 16 | #include <cfloat>
 17 | 
 18 | // TODO make it in a common file
 19 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 20 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 21 |        i += blockDim.x * gridDim.x)
 22 | 
 23 | template <typename scalar_t>
 24 | __global__ void SigmoidFocalLossForward(const int nthreads,
 25 |                                         const scalar_t *logits,
 26 |                                         const int64_t *targets,
 27 |                                         const int num_classes,
 28 |                                         const float gamma, const float alpha,
 29 |                                         const int num, scalar_t *losses) {
 30 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 31 |     int n = i / num_classes;
 32 |     int d = i % num_classes;  // current class[0~79];
 33 |     int t = targets[n];       // target class [1~80];
 34 | 
 35 |     // Decide it is positive or negative case.
 36 |     scalar_t c1 = (t == (d + 1));
 37 |     scalar_t c2 = (t >= 0 & t != (d + 1));
 38 | 
 39 |     scalar_t zn = (1.0 - alpha);
 40 |     scalar_t zp = (alpha);
 41 | 
 42 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 43 |     scalar_t p = 1. / (1. + expf(-logits[i]));
 44 | 
 45 |     // (1-p)**gamma * log(p) where
 46 |     scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
 47 | 
 48 |     // p**gamma * log(1-p)
 49 |     scalar_t term2 =
 50 |         powf(p, gamma) *
 51 |         (-1. * logits[i] * (logits[i] >= 0) -
 52 |          logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
 53 | 
 54 |     losses[i] = 0.0;
 55 |     losses[i] += -c1 * term1 * zp;
 56 |     losses[i] += -c2 * term2 * zn;
 57 | 
 58 |   }  // CUDA_1D_KERNEL_LOOP
 59 | }  // SigmoidFocalLossForward
 60 | 
 61 | template <typename scalar_t>
 62 | __global__ void SigmoidFocalLossBackward(
 63 |     const int nthreads, const scalar_t *logits, const int64_t *targets,
 64 |     const scalar_t *d_losses, const int num_classes, const float gamma,
 65 |     const float alpha, const int num, scalar_t *d_logits) {
 66 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 67 |     int n = i / num_classes;
 68 |     int d = i % num_classes;  // current class[0~79];
 69 |     int t = targets[n];       // target class [1~80], 0 is background;
 70 | 
 71 |     // Decide it is positive or negative case.
 72 |     scalar_t c1 = (t == (d + 1));
 73 |     scalar_t c2 = (t >= 0 & t != (d + 1));
 74 | 
 75 |     scalar_t zn = (1.0 - alpha);
 76 |     scalar_t zp = (alpha);
 77 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 78 |     scalar_t p = 1. / (1. + expf(-logits[i]));
 79 | 
 80 |     // (1-p)**g * (1 - p - g*p*log(p)
 81 |     scalar_t term1 =
 82 |         powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
 83 | 
 84 |     // (p**g) * (g*(1-p)*log(1-p) - p)
 85 |     scalar_t term2 =
 86 |         powf(p, gamma) *
 87 |         ((-1. * logits[i] * (logits[i] >= 0) -
 88 |           logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
 89 |              (1. - p) * gamma -
 90 |          p);
 91 |     d_logits[i] = 0.0;
 92 |     d_logits[i] += -c1 * term1 * zp;
 93 |     d_logits[i] += -c2 * term2 * zn;
 94 |     d_logits[i] = d_logits[i] * d_losses[i];
 95 | 
 96 |   }  // CUDA_1D_KERNEL_LOOP
 97 | }  // SigmoidFocalLossBackward
 98 | 
 99 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
100 |                                          const at::Tensor &targets,
101 |                                          const int num_classes,
102 |                                          const float gamma, const float alpha) {
103 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
104 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
105 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
106 | 
107 |   const int num_samples = logits.size(0);
108 | 
109 |   auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
110 |   auto losses_size = num_samples * logits.size(1);
111 | 
112 |   dim3 grid(
113 |       std::min(THCCeilDiv((int64_t)losses_size, (int64_t)512), (int64_t)4096));
114 |   dim3 block(512);
115 | 
116 |   if (losses.numel() == 0) {
117 |     THCudaCheck(cudaGetLastError());
118 |     return losses;
119 |   }
120 | 
121 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
122 |       logits.scalar_type(), "SigmoidFocalLoss_forward", [&] {
123 |         SigmoidFocalLossForward<scalar_t><<<grid, block, 0, at::cuda::getCurrentCUDAStream()>>>(
124 |             losses_size, logits.contiguous().data<scalar_t>(),
125 |             targets.contiguous().data<int64_t>(), num_classes, gamma, alpha,
126 |             num_samples, losses.data<scalar_t>());
127 |       });
128 |   THCudaCheck(cudaGetLastError());
129 |   return losses;
130 | }
131 | 
132 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
133 |                                           const at::Tensor &targets,
134 |                                           const at::Tensor &d_losses,
135 |                                           const int num_classes,
136 |                                           const float gamma,
137 |                                           const float alpha) {
138 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
139 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
140 |   AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor");
141 | 
142 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
143 | 
144 |   const int num_samples = logits.size(0);
145 |   AT_ASSERTM(logits.size(1) == num_classes,
146 |              "logits.size(1) should be num_classes");
147 | 
148 |   auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
149 |   auto d_logits_size = num_samples * logits.size(1);
150 | 
151 |   dim3 grid(std::min(THCCeilDiv((int64_t)d_logits_size, (int64_t)512),
152 |                      (int64_t)4096));
153 |   dim3 block(512);
154 | 
155 |   if (d_logits.numel() == 0) {
156 |     THCudaCheck(cudaGetLastError());
157 |     return d_logits;
158 |   }
159 | 
160 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
161 |       logits.scalar_type(), "SigmoidFocalLoss_backward", [&] {
162 |         SigmoidFocalLossBackward<scalar_t><<<grid, block, 0, at::cuda::getCurrentCUDAStream()>>>(
163 |             d_logits_size, logits.contiguous().data<scalar_t>(),
164 |             targets.contiguous().data<int64_t>(),
165 |             d_losses.contiguous().data<scalar_t>(), num_classes, gamma, alpha,
166 |             num_samples, d_logits.data<scalar_t>());
167 |       });
168 | 
169 |   THCudaCheck(cudaGetLastError());
170 |   return d_logits;
171 | }
172 | 


--------------------------------------------------------------------------------
/tools/misc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Misc functions, including distributed helpers.
 3 | 
 4 | Mostly copy-paste from torchvision references.
 5 | """
 6 | from typing import List, Optional
 7 | 
 8 | import torch
 9 | import torch.distributed as dist
10 | import torchvision
11 | from torch import Tensor
12 | 
13 | 
14 | def _max_by_axis(the_list):
15 |     maxes = the_list[0]
16 |     for sublist in the_list[1:]:
17 |         for index, item in enumerate(sublist):
18 |             maxes[index] = max(maxes[index], item)
19 |     return maxes
20 | 
21 | 
22 | class NestedTensor(object):
23 |     def __init__(self, tensors, mask: Optional[Tensor]):
24 |         self.tensors = tensors
25 |         self.mask = mask
26 | 
27 |     def to(self, device):
28 |         cast_tensor = self.tensors.to(device)
29 |         mask = self.mask
30 |         if mask is not None:
31 |             assert mask is not None
32 |             cast_mask = mask.to(device)
33 |         else:
34 |             cast_mask = None
35 |         return NestedTensor(cast_tensor, cast_mask)
36 | 
37 |     def decompose(self):
38 |         return self.tensors, self.mask
39 | 
40 |     def __repr__(self):
41 |         return str(self.tensors)
42 | 
43 | 
44 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
45 |     if tensor_list[0].ndim == 3:
46 |         if torchvision._is_tracing():
47 |             return _onnx_nested_tensor_from_tensor_list(tensor_list)
48 | 
49 |         max_size = _max_by_axis([list(img.shape) for img in tensor_list])
50 |         batch_shape = [len(tensor_list)] + max_size
51 |         b, c, h, w = batch_shape
52 |         dtype = tensor_list[0].dtype
53 |         device = tensor_list[0].device
54 |         tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
55 |         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
56 |         for img, pad_img, m in zip(tensor_list, tensor, mask):
57 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
58 |             m[: img.shape[1], : img.shape[2]] = False
59 |     else:
60 |         raise ValueError("not supported")
61 |     return NestedTensor(tensor, mask)
62 | 
63 | 
64 | @torch.jit.unused
65 | def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
66 |     max_size = []
67 |     for i in range(tensor_list[0].dim()):
68 |         max_size_i = torch.max(
69 |             torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)
70 |         ).to(torch.int64)
71 |         max_size.append(max_size_i)
72 |     max_size = tuple(max_size)
73 | 
74 |     padded_imgs = []
75 |     padded_masks = []
76 |     for img in tensor_list:
77 |         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
78 |         padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
79 |         padded_imgs.append(padded_img)
80 | 
81 |         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
82 |         padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
83 |         padded_masks.append(padded_mask.to(torch.bool))
84 | 
85 |     tensor = torch.stack(padded_imgs)
86 |     mask = torch.stack(padded_masks)
87 | 
88 |     return NestedTensor(tensor, mask=mask)
89 | 
90 | 
91 | def is_dist_avail_and_initialized():
92 |     if not dist.is_available():
93 |         return False
94 |     if not dist.is_initialized():
95 |         return False
96 |     return True
97 | 


--------------------------------------------------------------------------------
/tools/nms.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: NMS tools
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | import numpy as np
 12 | import torch.nn as nn
 13 | import torch
 14 | import torch.nn.functional as F
 15 | 
 16 | def nms(boxes, scores, nms_thr):
 17 |     x1 = boxes[:, 0]
 18 |     y1 = boxes[:, 1]
 19 |     x2 = boxes[:, 2]
 20 |     y2 = boxes[:, 3]
 21 | 
 22 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 23 |     order = scores.argsort()[::-1]
 24 | 
 25 |     keep = []
 26 |     while order.size > 0:
 27 |         i = order[0]
 28 |         keep.append(i)
 29 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 30 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 31 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 32 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 33 | 
 34 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 35 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 36 |         inter = w * h
 37 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 38 | 
 39 |         inds = np.where(ovr <= nms_thr)[0]
 40 |         order = order[inds + 1]
 41 | 
 42 |     return keep
 43 | 
 44 | def multiclass_nms(boxes, scores, nms_thr, score_thr):
 45 |     final_dets = []
 46 |     num_classes = scores.shape[1]
 47 |     for cls_ind in range(num_classes):
 48 |         cls_scores = scores[:, cls_ind]
 49 |         valid_score_mask = cls_scores > score_thr
 50 |         if valid_score_mask.sum() == 0:
 51 |             continue
 52 |         else:
 53 |             valid_scores = cls_scores[valid_score_mask]
 54 |             valid_boxes = boxes[valid_score_mask]
 55 |             keep = nms(valid_boxes, valid_scores, nms_thr)
 56 |             if len(keep) > 0:
 57 |                 cls_inds = np.ones((len(keep), 1)) * cls_ind
 58 |                 dets = np.concatenate(
 59 |                     [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
 60 |                 )
 61 |                 final_dets.append(dets)
 62 |     if len(final_dets) == 0:
 63 |         return None
 64 |     return np.concatenate(final_dets, 0)
 65 | 
 66 | def points_nms(heat, kernel=2):
 67 |     hmax = nn.functional.max_pool2d(
 68 |         heat, (kernel, kernel), stride=1, padding=1)
 69 |     keep = (hmax[:, :, :-1, :-1] == heat).float()
 70 |     return heat * keep
 71 | 
 72 | def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
 73 |     """Matrix NMS for multi-class masks.
 74 | 
 75 |     Args:
 76 |         seg_masks (Tensor): shape (n, h, w)
 77 |         cate_labels (Tensor): shape (n), mask labels in descending order
 78 |         cate_scores (Tensor): shape (n), mask scores in descending order
 79 |         kernel (str):  'linear' or 'gauss' 
 80 |         sigma (float): std in gaussian method
 81 |         sum_masks (Tensor): The sum of seg_masks
 82 | 
 83 |     Returns:
 84 |         Tensor: cate_scores_update, tensors of shape (n)
 85 |     """
 86 |     n_samples = len(cate_labels)
 87 |     if n_samples == 0:
 88 |         return []
 89 |     if sum_masks is None:
 90 |         sum_masks = seg_masks.sum((1, 2)).float()
 91 |     seg_masks = seg_masks.reshape(n_samples, -1).float()
 92 |     inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0))
 93 |     sum_masks_x = sum_masks.expand(n_samples, n_samples)
 94 |     iou_matrix = (inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix)).triu(diagonal=1)
 95 |     cate_labels_x = cate_labels.expand(n_samples, n_samples)
 96 |     label_matrix = (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1)
 97 | 
 98 |     compensate_iou, _ = (iou_matrix * label_matrix).max(0)
 99 |     compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0)
100 | 
101 |     decay_iou = iou_matrix * label_matrix
102 | 
103 |     if kernel == 'gaussian':
104 |         decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2))
105 |         compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2))
106 |         decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0)
107 |     elif kernel == 'linear':
108 |         decay_matrix = (1-decay_iou)/(1-compensate_iou)
109 |         decay_coefficient, _ = decay_matrix.min(0)
110 |     else:
111 |         raise NotImplementedError
112 | 
113 |     cate_scores_update = cate_scores * decay_coefficient
114 |     return cate_scores_update


--------------------------------------------------------------------------------
/tools/nninit.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | import warnings
 7 | 
 8 | 
 9 | def constant_init(module, val, bias=0):
10 |     if hasattr(module, 'weight') and module.weight is not None:
11 |         nn.init.constant_(module.weight, val)
12 |     if hasattr(module, 'bias') and module.bias is not None:
13 |         nn.init.constant_(module.bias, bias)
14 | 
15 | 
16 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
17 |     assert distribution in ['uniform', 'normal']
18 |     if distribution == 'uniform':
19 |         nn.init.xavier_uniform_(module.weight, gain=gain)
20 |     else:
21 |         nn.init.xavier_normal_(module.weight, gain=gain)
22 |     if hasattr(module, 'bias') and module.bias is not None:
23 |         nn.init.constant_(module.bias, bias)
24 | 
25 | 
26 | def normal_init(module, mean=0, std=1, bias=0):
27 |     nn.init.normal_(module.weight, mean, std)
28 |     if hasattr(module, 'bias') and module.bias is not None:
29 |         nn.init.constant_(module.bias, bias)
30 | 
31 | 
32 | def uniform_init(module, a=0, b=1, bias=0):
33 |     nn.init.uniform_(module.weight, a, b)
34 |     if hasattr(module, 'bias') and module.bias is not None:
35 |         nn.init.constant_(module.bias, bias)
36 | 
37 | 
38 | def kaiming_init(module,
39 |                  a=0,
40 |                  mode='fan_out',
41 |                  nonlinearity='relu',
42 |                  bias=0,
43 |                  distribution='normal'):
44 |     assert distribution in ['uniform', 'normal']
45 |     if distribution == 'uniform':
46 |         nn.init.kaiming_uniform_(
47 |             module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
48 |     else:
49 |         nn.init.kaiming_normal_(
50 |             module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
51 |     if hasattr(module, 'bias') and module.bias is not None:
52 |         nn.init.constant_(module.bias, bias)
53 | 
54 | 
55 | def bias_init_with_prob(prior_prob):
56 |     """initialize conv/fc bias value according to giving probablity."""
57 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
58 |     return bias_init
59 | 
60 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
61 |     def norm_cdf(x):
62 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
63 | 
64 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
65 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
66 |                         "The distribution of values may be incorrect.", stacklevel=2)
67 |     with torch.no_grad():
68 |         l = norm_cdf((a - mean) / std)
69 |         u = norm_cdf((b - mean) / std)
70 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
71 |         tensor.erfinv_()
72 |         tensor.mul_(std * math.sqrt(2.))
73 |         tensor.add_(mean)
74 |         tensor.clamp_(min=a, max=b)
75 |         return tensor
76 | 
77 | def common_init(m):
78 |     if isinstance(m, (nn.Conv2d, nn.Conv1d)):
79 |         kaiming_init(m)
80 |     elif isinstance(m, nn.Linear):
81 |         trunc_normal_(m.weight, std=.02)
82 |         if  m.bias is not None:
83 |             nn.init.constant_(m.bias, 0)
84 |     elif isinstance(m,  (nn.BatchNorm2d, nn.BatchNorm1d, nn.GroupNorm, nn.LayerNorm)):
85 |         nn.init.constant_(m.weight, 1)
86 |         nn.init.constant_(m.bias, 0)
87 | 


--------------------------------------------------------------------------------
/train_ddp.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: PPT训练框架入口
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | 
 11 | 
 12 | import torch
 13 | import torch.backends.cudnn as cudnn
 14 | import torch.distributed as dist
 15 | import torch.multiprocessing as mp
 16 | from torch.nn.parallel import DistributedDataParallel as DDP
 17 | 
 18 | 
 19 | import argparse
 20 | import numpy as np
 21 | import random
 22 | 
 23 | import warnings
 24 | import yaml
 25 | 
 26 | from data.dataloader import Data_loader
 27 | from trainer_ddp import Trainer
 28 | from model.model_factory import Classify_Model, DB_Model, Segmentation_Model, Yolox_Model, \
 29 |     Crnn_Model, Solo_Model, ICTransformer, ReBiSeNet_Model, MaskFormer_Model
 30 | from utils.common import find_free_port
 31 | 
 32 | MODEL_SELECT = {'Classify': Classify_Model, 'DB': DB_Model, 'Seg': Segmentation_Model, 'YOLOX':Yolox_Model,
 33 |                 'CRNN':Crnn_Model, 'SOLO':Solo_Model, 'ICT': ICTransformer, 'ReBiSe': ReBiSeNet_Model, 'MaskFormer': MaskFormer_Model}
 34 | 
 35 | def arg_parser():
 36 |     parser = argparse.ArgumentParser("train parser")
 37 |     parser.add_argument(
 38 |         "-e", "--eval_interval", type=int, default=1, help="eval interval"
 39 |     )
 40 |     parser.add_argument(
 41 |         "-s", "--save_interval", type=int, default=1, help="save interval"
 42 |     )
 43 |     parser.add_argument(
 44 |         "-v", "--visual_batch_interval", type=int, default=10, help="save interval"
 45 |     )
 46 |     parser.add_argument(
 47 |         "-ste", "--start_eval", type=int, default=0, help="save interval"
 48 |     )
 49 |     parser.add_argument(
 50 |         "-se", "--seed", type=int, default=None, help="random seed"
 51 |     )
 52 |     parser.add_argument(
 53 |         "--local_rank", default=0, type=int, help="GPU device for training"
 54 |     )
 55 |     parser.add_argument(
 56 |         "--nprocs", default=1, type=int, help="GPU device for training"
 57 |     )
 58 |     parser.add_argument(
 59 |         "--syncBN", default=False, action="store_true", help="syncBN"
 60 |     )
 61 |     parser.add_argument(
 62 |         "-c", "--ckpt", default=None, type=str, help="checkpoint file"
 63 |     )
 64 |     parser.add_argument(
 65 |         "--resume", default=False, action="store_true", help="resume training"
 66 |     )
 67 |     parser.add_argument(
 68 |         "-pre", "--pretrained", default=None, type=str, help="pretrained file"
 69 |     )
 70 |     parser.add_argument(
 71 |         "-f",
 72 |         "--exp_file",
 73 |         default='./config/Config.yaml',
 74 |         type=str,
 75 |         help="training description file",
 76 |     )
 77 |     parser.add_argument(
 78 |         "-o",
 79 |         "--output_dir",
 80 |         default='./checkpoints',
 81 |         type=str,
 82 |         help="save dir",
 83 |     )
 84 |     parser.add_argument(
 85 |         "--fp16",
 86 |         dest="fp16",
 87 |         default=False,
 88 |         action="store_true",
 89 |         help="Adopting mix precision training.",
 90 |     )
 91 | 
 92 |     return parser
 93 | 
 94 | def init_seeds(seed=0, cuda_deterministic=True):
 95 |     random.seed(seed)
 96 |     np.random.seed(seed)
 97 |     torch.manual_seed(seed)
 98 |     if cuda_deterministic:
 99 |         cudnn.deterministic = True
100 |         cudnn.benchmark = False
101 |     else:  
102 |         cudnn.deterministic = False
103 |         cudnn.benchmark = True
104 | 
105 | def main():
106 |     args = arg_parser().parse_args()
107 |     args.nprocs = torch.cuda.device_count()
108 | 
109 |     args.distributed = True if args.nprocs > 1 else False
110 |     args.dis_backend = 'nccl'
111 | 
112 |     dist_url = "tcp://127.0.0.1"
113 |     port = find_free_port()
114 |     args.dist_url = "{}:{}".format(dist_url, str(port) )
115 | 
116 |     with open(args.exp_file, mode='r') as fr:
117 |         cfg = yaml.load(fr, Loader=yaml.FullLoader)
118 | 
119 |     if args.distributed:
120 |         mp.spawn(main_worker, nprocs=args.nprocs, args=(args.nprocs, args, cfg))
121 |     else:
122 |         main_worker(args.local_rank, args.nprocs, args, cfg)
123 | 
124 | def main_worker(local_rank,nprocs, args, cfg):
125 |     assert ( torch.cuda.is_available()), "cuda is not available. Please check your installation."
126 |     args.rank = local_rank
127 |     cfg['distributed'] = args.distributed
128 |     init_seeds(local_rank+1)
129 | 
130 |     cudnn.benchmark = True
131 |     if args.distributed:
132 |         dist.init_process_group(backend=args.dis_backend,
133 |                                                         init_method=args.dist_url,
134 |                                                         world_size=nprocs,
135 |                                                         rank=local_rank)
136 | 
137 |     Model = MODEL_SELECT[cfg['experiment_name']](config=cfg,  amp_training=args.fp16)
138 |     DATA_Loader = Data_loader(config=cfg, args=args)
139 | 
140 |     trainer = Trainer(cfg, args, Model, DATA_Loader, step_update=True)
141 |     trainer.train()
142 | 
143 | if __name__ == '__main__':
144 |     main()


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lsr12345/Pytorch-Devkit/97f4ca88b9dbecc43cd97be2537ddc601715cbc9/utils/__init__.py


--------------------------------------------------------------------------------
/utils/common.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: PPT common functions
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | # coding: utf-8
 11 | 
 12 | import os
 13 | from functools import partial
 14 | 
 15 | import torch
 16 | import torch.distributed as dist
 17 | 
 18 | from loguru import logger
 19 | 
 20 | # # distribute function and config ##
 21 | # _LOCAL_PROCESS_GROUP = None
 22 | 
 23 | def synchronize():
 24 |     """
 25 |     Helper function to synchronize (barrier) among all processes when using distributed training
 26 |     """
 27 |     if not dist.is_available():
 28 |         return
 29 |     if not dist.is_initialized():
 30 |         return
 31 |     world_size = dist.get_world_size()
 32 |     if world_size == 1:
 33 |         return
 34 |     dist.barrier()
 35 | 
 36 | def find_free_port():
 37 |     """
 38 |     Find an available port of current machine / node.
 39 |     """
 40 |     import socket
 41 | 
 42 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 43 |     # Binding to port 0 will cause the OS to find an available port for us
 44 |     sock.bind(("", 0))
 45 |     port = sock.getsockname()[1]
 46 |     sock.close()
 47 |     # NOTE: there is still a chance the port could be taken by other processes.
 48 |     return port
 49 | 
 50 | def get_rank():
 51 |     if not dist.is_available():
 52 |         return 0
 53 |     if not dist.is_initialized():
 54 |         return 0
 55 |     return dist.get_rank()
 56 | 
 57 | def reduce_mean(tensor, nprocs):
 58 |     rt = tensor.clone()
 59 |     dist.all_reduce(rt, op=dist.ReduceOp.SUM)
 60 |     rt /= nprocs
 61 |     return rt
 62 | 
 63 | def reduce_sum(tensor):
 64 |     rt = tensor.clone()
 65 |     dist.all_reduce(rt, op=dist.ReduceOp.SUM)
 66 |     return rt
 67 | 
 68 | def remove_file(file_dir, key_words=''):
 69 |     assert key_words != ''
 70 |     for fn in os.listdir(file_dir):
 71 |         if key_words in fn:
 72 |             os.remove(os.path.join(file_dir, fn))
 73 |             return True
 74 |     else:
 75 |         return False
 76 | 
 77 | def prepare_device(local_rank, local_world_size, distributed=False):
 78 |     '''
 79 |         setup GPU device if available, move model into configured device
 80 |     :param local_rank:
 81 |     :param local_world_size:
 82 |     :return:
 83 |     '''
 84 |     if distributed:
 85 |         ngpu_per_process = torch.cuda.device_count() // local_world_size
 86 |         device_ids = list(range(local_rank * ngpu_per_process, (local_rank + 1) * ngpu_per_process))
 87 | 
 88 |         if torch.cuda.is_available() and local_rank != -1:
 89 |             torch.cuda.set_device(device_ids[0])  # device_ids[0] =local_rank if local_world_size = n_gpu per node
 90 |             device = 'cuda'
 91 |         else:
 92 |             device = 'cpu'
 93 |         device = torch.device(device)
 94 |         return device, device_ids
 95 |     else:
 96 |         n_gpu = torch.cuda.device_count()
 97 |         n_gpu_use = local_world_size
 98 |         if n_gpu_use > 0 and n_gpu == 0:
 99 |             n_gpu_use = 0
100 |         if n_gpu_use > n_gpu:
101 |             n_gpu_use = n_gpu
102 | 
103 |         list_ids = list(range(n_gpu_use))
104 |         if n_gpu_use > 0:
105 |             torch.cuda.set_device(list_ids[0])  # only use first available gpu as devices
106 |             device = 'cuda'
107 |         else:
108 |             device = 'cpu'
109 |         device = torch.device(device)
110 |         return device, list_ids
111 | 
112 | def multi_apply(func, *args, **kwargs):
113 |     """Apply function to a list of arguments.
114 | 
115 |     Note:
116 |         This function applies the ``func`` to multiple inputs and
117 |             map the multiple outputs of the ``func`` into different
118 |             list. Each list contains the same type of outputs corresponding
119 |             to different inputs.
120 | 
121 |     Args:
122 |         func (Function): A function that will be applied to a list of
123 |             arguments
124 | 
125 |     Returns:
126 |         tuple(list): A tuple containing multiple list, each list contains
127 |             a kind of returned results by the function
128 |     """
129 |     pfunc = partial(func, **kwargs) if kwargs else func
130 |     map_results = map(pfunc, *args)
131 |     return tuple(map(list, zip(*map_results)))
132 | 
133 | # def togpu(data, requires_grad=True):
134 | #     """
135 | #     Transfer tensor in `data` to gpu recursively
136 | #     `data` can be dict, list or tuple
137 | #     """
138 | #     if isinstance(data, list) or isinstance(data, tuple):
139 | #         data = [togpu(x) for x in data]
140 | #     elif isinstance(data, dict):
141 | #         data = {key:togpu(_data) for key,_data in data.items()}
142 | #     elif isinstance(data, torch.Tensor):
143 | #         data = data.contiguous().cuda(non_blocking=True).requires_grad = requires_grad
144 | #     return data
145 | 
146 | # def togpu(data):
147 | #     """
148 | #     Transfer tensor in `data` to gpu recursively
149 | #     `data` can be dict, list or tuple
150 | #     """
151 | #     if isinstance(data, list) or isinstance(data, tuple):
152 | #         data = [togpu(x) for x in data]
153 | #     elif isinstance(data, dict):
154 | #         data = {key:togpu(_data) for key,_data in data.items()}
155 | #     # else:
156 | #     #     data = torch.tensor(data)
157 | #     else:
158 | #         if not torch.is_tensor(data):
159 | #             data = torch.tensor(data)
160 | #         data = data.contiguous().cuda(non_blocking=True)
161 | #     return data
162 | 
163 | # def tolong(data):
164 | #     if isinstance(data, dict):
165 | #         for key in data.keys():
166 | #             data[key] = tolong(data[key])
167 | #     if isinstance(data, list) or isinstance(data, tuple):
168 | #         data = [tolong(x) for x in data]
169 | #     if torch.is_tensor(data) and data.dtype == torch.int16:
170 | #         data = data.long()
171 | #     return data
172 | ###############################


--------------------------------------------------------------------------------
/utils/standard_tools.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | """
 5 | def recursiveToTensor(data):
 6 |     if isinstance(data, dict):
 7 |         for key in data.keys():
 8 |             data[key] = recursiveToTensor(data[key])
 9 |     if isinstance(data, list) or isinstance(data, tuple):
10 |         data = [recursiveToTensor(x) for x in data]
11 |     if isinstance(data, np.ndarray):
12 |         data = torch.from_numpy(data)
13 |     if isinstance(data, bool):
14 |         data = torch.tensor(data)
15 |     return data
16 | """
17 | 
18 | def recursiveToTensor(data):
19 |     """Recursively transform numpy.ndarray to torch.Tensor.
20 |     """
21 |     if isinstance(data, dict):
22 |         for key in data.keys():
23 |             data[key] = recursiveToTensor(data[key])
24 |     elif isinstance(data, list) or isinstance(data, tuple):
25 |         data = [recursiveToTensor(x) for x in data]
26 |         # data = torch.tensor(data)
27 |     elif isinstance(data, np.ndarray):
28 |         """Pytorch now has bool type."""
29 |         data = torch.from_numpy(data).float()
30 |     # if isinstance(data, bool):
31 |     #     data = torch.tensor(data)
32 |     elif  torch.is_tensor(data):
33 |         return data
34 |     # else:
35 |     #     data = torch.tensor(data)
36 |     return data
37 | 
38 | def togpu(data):
39 |     """
40 |     Transfer tensor in `data` to gpu recursively
41 |     `data` can be dict, list or tuple
42 |     """
43 |     if isinstance(data, list) or isinstance(data, tuple):
44 |         data = [togpu(x) for x in data]
45 |     elif isinstance(data, dict):
46 |         data = {key:togpu(_data) for key,_data in data.items()}
47 |     # else:
48 |     #     data = torch.tensor(data)
49 |     else:
50 |         if not torch.is_tensor(data):
51 |             data = torch.tensor(data)
52 |         data = data.contiguous().cuda(non_blocking=True)
53 |     return data
54 | 
55 | def tolong(data):
56 |     if isinstance(data, dict):
57 |         for key in data.keys():
58 |             data[key] = tolong(data[key])
59 |     if isinstance(data, list) or isinstance(data, tuple):
60 |         data = [tolong(x) for x in data]
61 |     if torch.is_tensor(data) and data.dtype == torch.int16:
62 |         data = data.long()
63 |     return data
64 | 
65 | def recursiveToNumpy(data):
66 |     """Recursively transform numpy.ndarray to torch.Tensor.
67 |     """
68 |     if isinstance(data, dict):
69 |         for key in data.keys():
70 |             data[key] = recursiveToTensor(data[key])
71 |     if isinstance(data, list) or isinstance(data, tuple):
72 |         data = np.array(data)
73 |     return data


--------------------------------------------------------------------------------
/utils/visualize.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | # Author: Shaoran Lu
  3 | # Date: 2021/10/04
  4 | # Email: lushaoran92@gmail.com
  5 | # Description: 
  6 | 
  7 | example:
  8 | 
  9 | '''
 10 | # coding: utf-8
 11 | 
 12 | import cv2
 13 | import numpy as np
 14 | from scipy import ndimage
 15 | from data.coco.coco_classes import COCO_CLASSES, COCO_LABEL, COCO_LABEL_MAP
 16 | 
 17 | def box_vis(img, cfg, boxes, scores, cls_ids):
 18 |     class_names = cfg.get('class_names', None)
 19 |     conf = cfg.get('test_conf', 0.1)
 20 | 
 21 |     if class_names is None:
 22 |         class_names = COCO_CLASSES
 23 | 
 24 |     if boxes is None:
 25 |         return img
 26 | 
 27 |     for i in range(len(boxes)):
 28 |         box = boxes[i]
 29 |         cls_id = int(cls_ids[i])
 30 |         score = scores[i]
 31 |         if score < conf:
 32 |             continue
 33 |         x0 = int(box[0])
 34 |         y0 = int(box[1])
 35 |         x1 = int(box[2])
 36 |         y1 = int(box[3])
 37 | 
 38 |         color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
 39 |         text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
 40 |         txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
 41 |         font = cv2.FONT_HERSHEY_SIMPLEX
 42 | 
 43 |         txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
 44 |         cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
 45 | 
 46 |         txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
 47 |         cv2.rectangle(
 48 |             img,
 49 |             (x0, y0 + 1),
 50 |             (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])),
 51 |             txt_bk_color,
 52 |             -1
 53 |         )
 54 |         cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
 55 | 
 56 |     return img
 57 | 
 58 | 
 59 | def instance_vis(img, cfg, seg_label, cate_label, score, sort_by_density=False):
 60 |     score_thr = cfg.get('test_score_thr', 0.1)
 61 |     img_show = img.copy()
 62 |     h, w, _ = img.shape
 63 |     seg_label = seg_label[:, :h, :w]
 64 | 
 65 |     seg_label = seg_label.cpu().numpy().astype(np.uint8)
 66 |     # print(seg_label.sum())
 67 |     cate_label = cate_label.cpu().numpy()
 68 |     score = score.cpu().numpy()
 69 | 
 70 |     vis_inds = score > score_thr
 71 |     seg_label = seg_label[vis_inds]
 72 |     num_mask = seg_label.shape[0]
 73 |     cate_label = cate_label[vis_inds]
 74 |     cate_score = score[vis_inds]
 75 | 
 76 |     if sort_by_density:
 77 |         mask_density = []
 78 |         for idx in range(num_mask):
 79 |             cur_mask = seg_label[idx, :, :]
 80 |             cur_mask = cv2.resize(cur_mask, (w, h), interpolation= cv2.INTER_LINEAR)
 81 |             cur_mask = (cur_mask > 0.5).astype(np.int32)
 82 |             mask_density.append(cur_mask.sum())
 83 |         orders = np.argsort(mask_density)
 84 |         seg_label = seg_label[orders]
 85 |         cate_label = cate_label[orders]
 86 |         cate_score = cate_score[orders]
 87 | 
 88 |     np.random.seed(42)
 89 |     color_masks = [
 90 |         np.random.randint(0, 256, (1, 3), dtype=np.uint8)
 91 |         for _ in range(num_mask)
 92 |     ]
 93 |     #img_show = None
 94 |     font = cv2.FONT_HERSHEY_SIMPLEX
 95 |     for idx in range(num_mask):
 96 |         idx = -(idx+1)
 97 |         cur_mask = seg_label[idx, :, :]
 98 |         # cur_mask = cv2.resize(cur_mask, (w, h), interpolation= cv2.INTER_LINEAR)
 99 |         cur_mask = (cur_mask > 0.5).astype(np.uint8)
100 |         if cur_mask.sum() == 0:
101 |             # print('*')
102 |             continue
103 |         color_mask = color_masks[idx]
104 |         cur_mask_bool = cur_mask.astype(np.bool)
105 |         img_show[cur_mask_bool] = img[cur_mask_bool] * 0.5 + color_mask * 0.5
106 | 
107 |         #当前实例的类别
108 |         cur_cate = cate_label[idx]      # 1-80
109 |         realclass = COCO_LABEL[cur_cate]
110 |         # realclass = COCO_LABEL[cur_cate-1]
111 |         cur_score = cate_score[idx]
112 | 
113 |         name_idx = COCO_LABEL_MAP[realclass]
114 |         label_text = COCO_CLASSES[name_idx-1]
115 |         # label_text = COCO_CLASSES[name_idx]
116 |         label_text += '|{:.02f}'.format(cur_score)
117 |         center_y, center_x = ndimage.measurements.center_of_mass(cur_mask)
118 |         vis_pos = (max(int(center_x) - 10, 0), int(center_y))
119 |         cv2.putText(img_show, label_text, vis_pos, font, 0.4, (255, 255, 255))  # green
120 |  
121 |     return img_show
122 | 
123 | _COLORS = np.array(
124 |     [
125 |         0.000, 0.447, 0.741,
126 |         0.850, 0.325, 0.098,
127 |         0.929, 0.694, 0.125,
128 |         0.494, 0.184, 0.556,
129 |         0.466, 0.674, 0.188,
130 |         0.301, 0.745, 0.933,
131 |         0.635, 0.078, 0.184,
132 |         0.300, 0.300, 0.300,
133 |         0.600, 0.600, 0.600,
134 |         1.000, 0.000, 0.000,
135 |         1.000, 0.500, 0.000,
136 |         0.749, 0.749, 0.000,
137 |         0.000, 1.000, 0.000,
138 |         0.000, 0.000, 1.000,
139 |         0.667, 0.000, 1.000,
140 |         0.333, 0.333, 0.000,
141 |         0.333, 0.667, 0.000,
142 |         0.333, 1.000, 0.000,
143 |         0.667, 0.333, 0.000,
144 |         0.667, 0.667, 0.000,
145 |         0.667, 1.000, 0.000,
146 |         1.000, 0.333, 0.000,
147 |         1.000, 0.667, 0.000,
148 |         1.000, 1.000, 0.000,
149 |         0.000, 0.333, 0.500,
150 |         0.000, 0.667, 0.500,
151 |         0.000, 1.000, 0.500,
152 |         0.333, 0.000, 0.500,
153 |         0.333, 0.333, 0.500,
154 |         0.333, 0.667, 0.500,
155 |         0.333, 1.000, 0.500,
156 |         0.667, 0.000, 0.500,
157 |         0.667, 0.333, 0.500,
158 |         0.667, 0.667, 0.500,
159 |         0.667, 1.000, 0.500,
160 |         1.000, 0.000, 0.500,
161 |         1.000, 0.333, 0.500,
162 |         1.000, 0.667, 0.500,
163 |         1.000, 1.000, 0.500,
164 |         0.000, 0.333, 1.000,
165 |         0.000, 0.667, 1.000,
166 |         0.000, 1.000, 1.000,
167 |         0.333, 0.000, 1.000,
168 |         0.333, 0.333, 1.000,
169 |         0.333, 0.667, 1.000,
170 |         0.333, 1.000, 1.000,
171 |         0.667, 0.000, 1.000,
172 |         0.667, 0.333, 1.000,
173 |         0.667, 0.667, 1.000,
174 |         0.667, 1.000, 1.000,
175 |         1.000, 0.000, 1.000,
176 |         1.000, 0.333, 1.000,
177 |         1.000, 0.667, 1.000,
178 |         0.333, 0.000, 0.000,
179 |         0.500, 0.000, 0.000,
180 |         0.667, 0.000, 0.000,
181 |         0.833, 0.000, 0.000,
182 |         1.000, 0.000, 0.000,
183 |         0.000, 0.167, 0.000,
184 |         0.000, 0.333, 0.000,
185 |         0.000, 0.500, 0.000,
186 |         0.000, 0.667, 0.000,
187 |         0.000, 0.833, 0.000,
188 |         0.000, 1.000, 0.000,
189 |         0.000, 0.000, 0.167,
190 |         0.000, 0.000, 0.333,
191 |         0.000, 0.000, 0.500,
192 |         0.000, 0.000, 0.667,
193 |         0.000, 0.000, 0.833,
194 |         0.000, 0.000, 1.000,
195 |         0.000, 0.000, 0.000,
196 |         0.143, 0.143, 0.143,
197 |         0.286, 0.286, 0.286,
198 |         0.429, 0.429, 0.429,
199 |         0.571, 0.571, 0.571,
200 |         0.714, 0.714, 0.714,
201 |         0.857, 0.857, 0.857,
202 |         0.000, 0.447, 0.741,
203 |         0.314, 0.717, 0.741,
204 |         0.50, 0.5, 0
205 |     ]
206 | ).astype(np.float32).reshape(-1, 3)
207 | 
208 | 


--------------------------------------------------------------------------------